├── README └── filecomp.py /README: -------------------------------------------------------------------------------- 1 | /*********************************\ 2 | filecomp.py 3 | Will compare files in two 4 | directories for missing 5 | and/or mismatched files 6 | (based upon md5sums). 7 | 8 | Currently a bit sloppy 9 | and lacks recursion. 10 | 11 | Written by Twitch(Ben) 12 | \*********************************/ 13 | 14 | This is a very small, as of yet, utility to compare the contents of a pair of directories. This became necessary when managing archived files with their originals and is likely quite useful for managing files/data across multiple systems (without using something overly complex like git/svn/cvs). 15 | 16 | Recursion is now in place, though not exceedingly well tested. 17 | 18 | 2010-07-21 19 | 20 | Picked up some touch-ups again. This now runs against UNIX files (i.e. '/' vs. '\') rather than FAT paths. I should try to fix this to do both soon. I can borrow some code from another project I did something similar in. 21 | 22 | 2010-07-23 23 | Fixed the shebang to run in both environments and added os.name checks to use a pathsep so that this will run in *nix and Windows environments. 24 | -------------------------------------------------------------------------------- /filecomp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os,sys,hashlib,re 3 | 4 | """ 5 | /*********************************\ 6 | filecomp.py 7 | Will compare files in two 8 | directories for missing 9 | and/or mismatched files 10 | (based on md5sums). 11 | 12 | Currently a bit sloppy. 13 | 14 | Written by Twitch(Ben) 15 | \*********************************/ 16 | """ 17 | 18 | """ Confirm input (very) briefly """ 19 | if len(sys.argv) != 3: 20 | print "You're doing something wrong.\nUsage: %s dir1 dir2" % sys.argv[0] 21 | exit(2) 22 | 23 | (onedir, twodir) = sys.argv[1], sys.argv[2] 24 | 25 | 26 | ''' Determine OS type and use appropriate path seperator. ''' 27 | if os.name == "posix": 28 | pathsep = "/" 29 | elif os.name == "nt": 30 | pathsep = "\\" 31 | else: 32 | print "I haven't the faintest idea what operating system you have run me on, but I have no intentions of attempting to work under these conditions!" 33 | exit(2) 34 | 35 | 36 | def sumfile(fobj): 37 | '''Returns an md5 hash for an object with read() method.''' 38 | m = hashlib.md5() 39 | while True: 40 | d = fobj.read(8096) 41 | if not d: 42 | break 43 | m.update(d) 44 | return m.hexdigest() 45 | 46 | def md5sum(fname): 47 | '''Returns md5 of a file, or stdin if fname is "-".''' 48 | if fname == '-': 49 | ret = sumfile(sys.stdin) 50 | else: 51 | try: 52 | f = file(fname, 'rb') 53 | except: 54 | return 'Failed to open file' 55 | ret = sumfile(f) 56 | f.close() 57 | return ret 58 | 59 | """ Abandon all hope, ye who enter here """ 60 | 61 | def walkdirs(dir1, dir2): 62 | dirsa = [] 63 | dirsb = [] 64 | 65 | ''' Walk directories and build list of similar structures. ''' 66 | ''' This creates relative names for the sub-directories and adds them to the list. ''' 67 | for directory in os.walk(dir1): 68 | relativedirname = pathsep + re.sub(dir1, "", directory[0]) 69 | if relativedirname != pathsep: 70 | dirsa.append(relativedirname) 71 | 72 | for directory in os.walk(dir2): 73 | relativedirname = pathsep + re.sub(dir2, "", directory[0]) 74 | if relativedirname != pathsep: 75 | dirsb.append(relativedirname) 76 | 77 | # Return matches for further inspection. 78 | return set(dirsa).intersection(set(dirsb)) 79 | 80 | def complists(dir1, dir2): 81 | ''' Compare list of files in the directories. ''' 82 | print "#######################################" 83 | print "Comparing \n%s \n%s" % (dir1, dir2) 84 | print "#######################################" 85 | ''' List files in target directories. ''' 86 | dira = [] 87 | dirb = [] 88 | 89 | for file in os.listdir(dir1): 90 | dira.append(file) 91 | for file in os.listdir(dir2): 92 | dirb.append(file) 93 | 94 | ''' Compare files and return disparities. ''' 95 | indir1 = set(dira).difference(set(dirb)) 96 | indir2 = set(dirb).difference(set(dira)) 97 | return (indir1, indir2, set(dira).intersection(set(dirb))) 98 | 99 | def comfiles(files, onedir, twodir): 100 | ''' Compare files which appear in both directories. ''' 101 | firstdir = {} 102 | secdir = {} 103 | # Create an absolute path to them from the relative filename and get the md5. 104 | for f in files: 105 | of = onedir + pathsep + f 106 | sf = twodir + pathsep + f 107 | firstdir[f] = md5sum(of) 108 | secdir[f] = md5sum(sf) 109 | for x in firstdir: 110 | if firstdir[x] != secdir[x]: 111 | print "File %s in both targets but does not match!" % x 112 | print "\n" 113 | 114 | def outp(datum, fdir=onedir, sdir=twodir): 115 | if len(datum[0]) > 0: 116 | print "Items in \"%s\" and not in \"%s\":" % (fdir, sdir) 117 | for z in datum[0]: 118 | print " " + z 119 | print "\n------------------------------" 120 | else: 121 | print "No unique items in \"%s\"" % (fdir) 122 | 123 | if len(datum[1]) > 0: 124 | print "Items in \"%s\" and not in \"%s\":" %(sdir, fdir) 125 | for z in datum[1]: 126 | print " " + z 127 | print "\n------------------------------" 128 | else: 129 | print "No unique items in \"%s\"" % (sdir) 130 | 131 | if len(datum[2]) > 0: 132 | comfiles(datum[2], fdir, sdir) 133 | 134 | subdirs = walkdirs(onedir, twodir) 135 | setinfo = complists(onedir, twodir) 136 | outp(setinfo) 137 | 138 | # Perform recursion through directories returned by the walkdirs() function. 139 | 140 | for subdir in subdirs: 141 | setinfo = complists(onedir + subdir, twodir + subdir) 142 | outp(setinfo, onedir + subdir, twodir + subdir) 143 | --------------------------------------------------------------------------------