import hashlib import os import send2trash import time ''' 根据MD5,查找一个目录下的相同文件,并且可以选择是否删除(只是删除到回收站) ''' def getMD5(path): f=open(path,'rb') d5 = hashlib.md5() #生成一个hash的对象 with open(path,'rb') as f: while True: content = f.read(40960) if not content: break d5.update(content) # 每次读取一部分,然后添加到hash对象里 # print('MD5 : %s' % d5.hexdigest()) return d5.hexdigest() # 打印16进制的hash值 def getSha512(path): f = open(path, 'rb') sh = hashlib.sha512() with open(path,'rb') as f: while True: content = f.read(40960) if not content: break sh.update(content) # print(sh.hexdigest()) return sh.hexdigest() # 装饰器,计算时间用的 def timer(func): # 高阶函数:以函数作为参数 def deco(*args,**kwargs): # 嵌套函数,在函数内部以 def 声明一个函数,接受 被装饰函数的所有参数 time1 = time.time() func(*args,**kwargs) time2 = time.time() use_time = round(time2-time1,1) print('Elapsed %ss' %(use_time)) return deco # 注意,返回的函数没有加括号!所以返回的是一个内存地址,而不是函数的返回值 @timer def walk(path): size = 0 x = input('Want to delete duplicate file? y/n ') if x.lower() == 'y': delete = True else: delete = False dict = {} n = 1 for folder,subfolder,filenames in os.walk(path): for filename in filenames: print(' Has scanned %s files' %n,end='') root = os.path.join(folder,filename) md5 = getMD5(root) if md5 in dict.keys(): # sha1 = getSha512(root) # sha2 = getSha512(dict[md5]) # if sha1 == sha2: # 发送到回收站 if delete == True: send2trash.send2trash(dict[md5]) print(' %s %s ' %(root,dict[md5])) # 获取文件大小 fsize = os.path.getsize(root) size += fsize else: pass dict[md5] = root n += 1 # 处理文件大小的单位 M size = size/float(1024*1024) size = round(size,2) if delete == True: print(' Saved %sM space.' % size) else: print(" Wasted %sM sapce." %size) if __name__ =="__main__": x = input('Input path:') walk(x)