Skip to content


python file comparison tool

Alpha version comparison tool, can be run from the CLI with two directory paths as arguments.


compare.py /foo/bar/dir1/ /foo/bar/dir2

Or imported as a module.

TODO:
Give option for recursive searches, at the moment subdirectories are ignored.
More elegant error handling for permissions errors etc..

#!/usr/bin/python
import hashlib
import os
import sys

class Comparison:
    """compares files in two directories, returns standard output and list of 
    common files with different content,files unique to directory one,files unique to directory two
    and count of identical files. Does not search recursively sub-directories are ignored"""
    def __init__(self):
        pass
    def compareFiles(self,file1,file2):
        """compares text and binary file content of two files by matching hashes, 
        returns 1 if files are different 0 if identical"""
        #comparison is not recursive, sub directories are ignored
        if os.path.isfile(file1) and os.path.isfile(file2):
            x=hashlib.md5()
            y=hashlib.md5()
            x.update(open(file1,'r').read())
            y.update(open(file2,'r').read())
            if x.digest() == y.digest():
                return 0
            else:
                return 1

    def compareDirectories(self,dir1,dir2):
        """Compares two directories proccesing common files with the 'compareFiles' function,
        lists unique files for each directory and counts unique files """
        id=0
        #add trailing slashes if needed
        if dir1[-1] is not '/':
            dir1=dir1+'/'
        if dir2[-1] is not '/':
            dir2=dir2+'/'
        d1=os.listdir(dir1)
        d2=os.listdir(dir2)
        changed=[]
        common=list(set(d1) & set(d2))
        for file in common:
            if self.compareFiles(dir1+file,dir2+file) == 1:
                changed.append(file)
            else:
                id+=1
        ind1=list(set(d1).difference(set(d2)))
        ind2=list(set(d2).difference(set(d1)))
        return [changed,ind1,ind2,id]
    
    def compare(self,dir1,dir2):
        """Processes results of compareDirectories for screen output"""
        r=self.compareDirectories(dir1,dir2)
        d=0
        if r[0]:
            print"******************************************"
            print "Common file(s) with different content"
            print"******************************************"
            for f in r[0]:
                print f
        if r[1]:
            print"******************************************"
            print dir1+" unique files"
            print"******************************************"
            for f in r[1]:
                print f
        if r[2]:
            print"******************************************"
            print dir2+" unique files"
            print"******************************************"
            for f in r[2]:
                print f
        if r[3]:
            print"******************************************"
            print str(r[3])+" identical files found"
            print"******************************************"
        return r
print "\n"
c=Comparison()
c.compare(sys.argv[1],sys.argv[2])
print "\n"

Posted in geek, python.

Tagged with , , .


4 Responses

Stay in touch with the conversation, subscribe to the RSS feed for comments on this post.

Continuing the Discussion

  1. CURTIS linked to this post on December 21, 2010

    buy viagra canada

    Buygeneric drugs…

  2. ARMANDO linked to this post on December 25, 2010

    abilify and siezures

    Buynow…

  3. DAVE linked to this post on December 27, 2010

    .

    Buywithout prescription…

  4. JOSHUA linked to this post on December 29, 2010

    .

    Buygeneric drugs…

You must be logged in to post a comment.