I needed to write a sed/awk Python equivalent for walking into a directory tree and renaming certain subdirectories, while also looking into all xml files on the way and replacing/modifying certain strings in those files.
It would be nicer if someone could suggest an enhanced re.sub(regex, replacement, subject) where I could replace all strings of a certain pattern with other strings of a certain pattern i.e. the second argument in re.sub namely 'replacement' would then be a regular expression and would be a different string for each different string in 'subject' that matches with the pattern 'regex'. For example 'arthinternational-d' would be replaced by 'arthinternational-r', 'arthfmt-d' would be replaced by 'arthfmt-r' but 'a-d' would remain unmodified.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102  | from __future__ import generators
import fileinput, glob, string, sys, os, re
from os.path import join
def checkdirname(name):
	"check if directory name matches with the given pattern"
	
	pattern = re.compile(r'^arth(\D*)-d$')
	#print 'checking dirname:', name
	m = pattern.search(name)
	if m is None:
		return False
	else:
		#print 'returning true for', name
		return True
def checkfilename(name):
	"check if file name matches with the given pattern"
	m = re.search('(\D*).xml$', name)
	#print 'checking filename', name
	if m is None:
		return False
	else:
		#print 'returning true for filename', name
		return True
			
def renamedir(dirname, newname):
 	"rename a directory with a given new name"
	os.rename(dirname, newname)
	
def replacestrs(filename):
	"replace a certain type of string occurances in all files in a directory" 
	
	files = glob.glob(filename)
	#print 'files in files:', files
	stext = '-d0'
	rtext = '-r0'
	
	for line in fileinput.input(files,inplace=1):
		
		lineno = 0
  		lineno = string.find(line, stext)
  		if lineno >0:
  			line =line.replace(stext, rtext)
			
  		sys.stdout.write(line)		
	
	
def dirwalk(dir):
    '''walk a directory tree, using a generator, rename certain directories
    replace particular strings in xml files on the way'''
    newname = 'newdir'
    for f in os.listdir(dir):
    	
        fullpath = os.path.join(dir, f)
        
        if os.path.isdir(fullpath) and not os.path.islink(fullpath):
        	
        	if checkdirname(f):
        		newname = f[:len(f)-2]+'-r'
        		renamedir(fullpath, os.path.join(dir, newname))
        		fullpath = os.path.join(dir, newname)
        	
        	for x in dirwalk(fullpath):
        			#print 'recursing in subdirectory: ', f , x
        			yield x
        if os.path.isfile(fullpath):
			print 'Saw file', fullpath
			
			if checkfilename(f):
				replacestrs(fullpath)
				
			yield f, fullpath	
        else:
        	
        	yield f, fullpath
      
        
        	
def main():
		
		if len(sys.argv) < 2:
			print 'Usage: Python dirwalkren.py directoryname.'
			sys.exit(1)
		else:
			for dir in dirwalk(sys.argv[1]):
				pass
				
			
if __name__ == '__main__':
	main()        	
        	
        	
        	
        	
        	
        
 | 
Suggestions welcome!
Download
Copy to clipboard
replacement can be a function. From the library reference:
"If repl is a function, it is called for every non-overlapping occurrence of pattern. The function takes a single match object argument, and returns the replacement string."
Applied to your problem (if I understood correctly what you meant):
Thanks Chris! This is what I'd add to the code, I only realized later. Thank you!