Alpha version comparison tool, can be run from the CLI with two directory paths as arguments.
compare.py /foo/bar/dir1/ /foo/bar/dir2
Or imported as a module.
TODO:
Give option for recursive searches, at the moment subdirectories are ignored.
More elegant error handling for permissions errors etc..
#!/usr/bin/python
import hashlib
import os
import sys
class Comparison:
"""compares files in two directories, returns standard output and list of
common files with different content,files unique to directory one,files unique to directory two
and count of identical files. Does not search recursively sub-directories are ignored"""
def __init__(self):
pass
def compareFiles(self,file1,file2):
"""compares text and binary file content of two files by matching hashes,
returns 1 if files are different 0 if identical"""
#comparison is not recursive, sub directories are ignored
if os.path.isfile(file1) and os.path.isfile(file2):
x=hashlib.md5()
y=hashlib.md5()
x.update(open(file1,'r').read())
y.update(open(file2,'r').read())
if x.digest() == y.digest():
return 0
else:
return 1
def compareDirectories(self,dir1,dir2):
"""Compares two directories proccesing common files with the 'compareFiles' function,
lists unique files for each directory and counts unique files """
id=0
#add trailing slashes if needed
if dir1[-1] is not '/':
dir1=dir1+'/'
if dir2[-1] is not '/':
dir2=dir2+'/'
d1=os.listdir(dir1)
d2=os.listdir(dir2)
changed=[]
common=list(set(d1) & set(d2))
for file in common:
if self.compareFiles(dir1+file,dir2+file) == 1:
changed.append(file)
else:
id+=1
ind1=list(set(d1).difference(set(d2)))
ind2=list(set(d2).difference(set(d1)))
return [changed,ind1,ind2,id]
def compare(self,dir1,dir2):
"""Processes results of compareDirectories for screen output"""
r=self.compareDirectories(dir1,dir2)
d=0
if r[0]:
print"******************************************"
print "Common file(s) with different content"
print"******************************************"
for f in r[0]:
print f
if r[1]:
print"******************************************"
print dir1+" unique files"
print"******************************************"
for f in r[1]:
print f
if r[2]:
print"******************************************"
print dir2+" unique files"
print"******************************************"
for f in r[2]:
print f
if r[3]:
print"******************************************"
print str(r[3])+" identical files found"
print"******************************************"
return r
print "\n"
c=Comparison()
c.compare(sys.argv[1],sys.argv[2])
print "\n"
0 Responses
Stay in touch with the conversation, subscribe to the RSS feed for comments on this post.
You must be logged in to post a comment.