• python查找并删除相同文件-UNIQ File-wxPython版本


    今天用wxPython做了一个GUI程序,我称之为UNIQ File,实现查找指定目录内的相同文件,主要原理是计算文件的md5值(计算前先找出文件大小相同的文件,然后计算这些文件的md5值,而不是所有文件都计算,大大减少了md5的计算量),加入了多线程功能。

    运行该程序需要安装wxPython。

    界面图

    源代码:

    UNIQFile-wxPython.py

      1 # -*- coding: gbk -*-
      2 
      3 '''
      4 Author:@DoNotSpyOnMe
      5 Blog: http://www.cnblogs.com/aaronhoo
      6 '''
      7 
      8 import wx
      9 import hashlib
     10 import os
     11 import threading
     12 
     13 class WorkerThread(threading.Thread):
     14     def __init__(self, frame,dir,operation,msg):
     15         """初始化工作线程: 把主窗口传进来"""
     16         threading.Thread.__init__(self)
     17         self.frame = frame
     18         self.dir=dir
     19         self.operation=operation
     20         self.msg=msg
     21         self.setDaemon(True)#设置子线程随UI主线程结束而结束
     22         self.start() 
     23 
     24     #----------------------------------------------------------------------
     25     def run(self):
     26         """执行工作线程"""
     27         self.frame.SetButtons('operating')
     28         try:
     29             if self.operation=='list':
     30                 self.listSameFile(self.dir)
     31                 self.frame.btnList.Enable()
     32             elif self.operation=='remove':
     33                 self.removeSameFile(self.dir)
     34                 self.frame.btnRemove.Enable()            
     35         except:
     36             pass
     37         finally:
     38             self.frame.SetButtons('completed')
     39 #         
     40 #     def stop(self):
     41 #         self.keepRunning=False
     42     def appendMsg(self,msg):
     43         if self.frame:
     44             #以下方式可以实现终端式的刷新:自动滚动到最新行
     45             self.frame.txtContent.AppendText(msg+'
    ')
     46             #废弃的方式
     47 #             currentMsg=self.frame.txtContent.GetValue()
     48 #             currentMsg=currentMsg+'
    '+msg
     49 #             self.frame.txtContent.SetValue(currentMsg)
     50             
     51     def getFileSize(self,filePath):
     52         return os.path.getsize(filePath)        
     53     
     54     ''' 一般文件的md5计算方法,一次读取文件的全部内容'''           
     55     def CalcMD5(self,filepath):
     56         with open(filepath,'rb') as f:
     57             md5obj = hashlib.md5()
     58             md5obj.update(f.read())
     59             hash = md5obj.hexdigest()
     60             return hash    
     61     '''大文件计算md5的方法,分批读取文件内容,防止内存爆掉'''    
     62     def GetFileMd5(self,filename):
     63         if not os.path.isfile(filename):
     64             return
     65         myhash = hashlib.md5()
     66         f = open(filename,'rb')
     67         while True:
     68             b = f.read(8*1024)
     69             if not b :
     70                 break
     71             myhash.update(b)
     72         f.close()
     73         return myhash.hexdigest()
     74     
     75     def GetAllFiles(self,directory):
     76         files=[]
     77         for dirpath, dirnames,filenames in os.walk(directory):
     78             if filenames!=[]:
     79                 for file in filenames:
     80                     files.append(dirpath+'\'+file)
           files.sort(key=len)#按照文件名的长度排序 
    81 return files 82 83 def findSameSizeFiles(self,files): 84 dicSize={} 85 for f in files: 86 size=self.getFileSize(f) 87 if not dicSize.has_key(size): 88 dicSize[size]=f 89 else: 90 dicSize[size]=dicSize[size]+';'+f 91 dicCopy=dicSize.copy() 92 for k in dicSize.iterkeys(): 93 if dicSize[k].find(';')==-1: 94 dicCopy.pop(k) 95 del dicSize 96 return dicCopy 97 98 def findSameMD5Files(self,files): 99 dicMD5={} 100 for f in files: 101 self.appendMsg('calculating the md5 value of file %s'%f) 102 md5=self.GetFileMd5(f) 103 if not dicMD5.has_key(md5): 104 dicMD5[md5]=f 105 else: 106 dicMD5[md5]=dicMD5[md5]+';'+f 107 dicCopy=dicMD5.copy() 108 for k in dicMD5.iterkeys(): 109 if dicMD5[k].find(';')==-1: 110 dicCopy.pop(k) 111 del dicMD5 112 return dicCopy 113 114 def removeSameFile(self,mydir): 115 msg='' 116 msgUniq='Congratulations,no file is removed since they are all uniq.' 117 try: 118 existsFlag=False 119 files=self.GetAllFiles(mydir) 120 self.appendMsg('%s files found in directory %s '%(len(files),mydir)) 121 dicFileOfSameSize=self.findSameSizeFiles(files) 122 if dicFileOfSameSize=={}: 123 self.appendMsg(msgUniq) 124 return 125 else: 126 #list the duplicated files first: 127 dicFiltered={} 128 for k in dicFileOfSameSize.iterkeys(): 129 filesOfSameSize=dicFileOfSameSize[k].split(';') 130 dicSameMD5file=self.findSameMD5Files(filesOfSameSize) 131 if dicSameMD5file!={}: 132 existsFlag=True 133 for k in dicSameMD5file.iterkeys(): 134 msg=msg+'md5 %s: %s'%(k,dicSameMD5file[k])+' ' 135 dicFiltered[k]=dicSameMD5file[k] 136 if not existsFlag: 137 msg=msgUniq 138 return 139 else: 140 msg='Duplicated files: '+msg+' ' 141 #then remove the duplicated files: 142 removeCount=0 143 for k in dicFiltered.iterkeys(): 144 sameFiles=dicFiltered[k].split(';') 145 flagRemove=False 146 for f in sameFiles: 147 if not flagRemove: 148 flagRemove=True 149 else: 150 msg=msg+'Removing file: %s'%f+' ' 151 os.remove(f) 152 removeCount=removeCount+1 153 msg=msg+'%s files are removed. '%removeCount 154 except Exception,e: 155 # print e 156 msg='Exception occured.' 157 finally: 158 self.appendMsg(msg+' '+'Operation finished.') 159 160 161 def listSameFile(self,mydir): 162 msg='' 163 msgUniq='Congratulations,all files are uniq.' 164 try: 165 existsFlag=False 166 files=self.GetAllFiles(mydir) 167 self.appendMsg('%s files found in directory %s '%(len(files),mydir)) 168 dicFileOfSameSize=self.findSameSizeFiles(files) 169 if dicFileOfSameSize=={}: 170 self.appendMsg(msgUniq) 171 return 172 else: 173 for k in dicFileOfSameSize.iterkeys(): 174 filesOfSameSize=dicFileOfSameSize[k].split(';') 175 dicSameMD5file=self.findSameMD5Files(filesOfSameSize) 176 if dicSameMD5file!={}: 177 existsFlag=True 178 for k in dicSameMD5file.iterkeys(): 179 msg=msg+'md5 %s: %s'%(k,dicSameMD5file[k])+' ' 180 if not existsFlag: 181 msg=msgUniq 182 else: 183 msg='Duplicated files: '+msg 184 except Exception,e: 185 # print e 186 msg='Exception occured.' 187 finally: 188 self.appendMsg(msg+' '+'Operation finished.') 189 190 191 class MyFrame(wx.Frame): 192 def __init__(self): 193 super(MyFrame,self).__init__(None,title='UNIQ File-wxPython',size=(780,450)) 194 pan=wx.Panel(self) 195 self.lblDir=wx.StaticText(pan,-1,'Dir:',style=wx.ALIGN_LEFT) 196 self.txtFile=wx.TextCtrl(pan,size=(380,30)) 197 # self.txtFile.Disable() 198 self.btnOpen=wx.Button(pan,label='Pick Directory') 199 self.btnOpen.Bind(wx.EVT_BUTTON, self.BtnOpenHandler) 200 self.btnList=wx.Button(pan,label='Find Same') 201 self.btnList.Bind(wx.EVT_BUTTON, self.BtnListHandler) 202 self.btnRemove=wx.Button(pan,label='Remove duplicated') 203 self.btnRemove.Bind(wx.EVT_BUTTON, self.BtnRemoveHandler) 204 # self.btnStop=wx.Button(pan,label='Stop') 205 # self.btnStop.Bind(wx.EVT_BUTTON, self.BtnStopHandler) 206 207 hbox=wx.BoxSizer() 208 hbox.Add(self.lblDir,proportion=0,flag=wx.LEFT,border=5) 209 hbox.Add(self.txtFile,proportion=0,flag=wx.LEFT,border=5) 210 hbox.Add(self.btnOpen,proportion=0,flag=wx.LEFT,border=5) 211 hbox.Add(self.btnList,proportion=0,flag=wx.LEFT,border=5) 212 hbox.Add(self.btnRemove,proportion=0,flag=wx.LEFT,border=5) 213 # hbox.Add(self.btnStop,proportion=0,flag=wx.LEFT,border=5) 214 215 self.txtContent=wx.TextCtrl(pan,style=wx.TE_MULTILINE|wx.HSCROLL) 216 vbox=wx.BoxSizer(wx.VERTICAL) 217 vbox.Add(hbox,proportion=0,flag=wx.EXPAND|wx.ALL,border=5) 218 vbox.Add(self.txtContent,proportion=1,flag=wx.EXPAND,border=5) 219 pan.SetSizer(vbox) 220 # self.SetButtons('init') 221 222 def BtnOpenHandler(self,event): 223 dlg = wx.DirDialog(None,u"选择文件夹",style=wx.DD_DEFAULT_STYLE) 224 if dlg.ShowModal() == wx.ID_OK: 225 dlg.Destroy() 226 if dlg.GetPath(): 227 self.dirSelected=dlg.GetPath() #文件夹路径 228 self.txtFile.SetValue(self.dirSelected) 229 230 self.SetButtons('selected') 231 self.txtContent.SetValue('Selected dirctory: %s '%self.dirSelected) 232 233 def BtnListHandler(self,event): 234 if not self.txtFile.GetValue() or not os.path.isdir(self.txtFile.GetValue()): 235 wx.MessageBox('please select a valid directory first.','Tip Message',wx.YES_DEFAULT|wx.ICON_INFORMATION) 236 return 237 self.dirSelected=self.txtFile.GetValue() 238 self.txtContent.SetValue('') 239 msg='Listing same files in %s '%self.dirSelected 240 self.txtContent.SetValue(msg) 241 workThread=WorkerThread(self,self.dirSelected,'list',msg) 242 243 def BtnRemoveHandler(self,event): 244 if not self.txtFile.GetValue() or not os.path.isdir(self.txtFile.GetValue()): 245 wx.MessageBox('please select a valid directory first.','Tip Message',wx.YES_DEFAULT|wx.ICON_INFORMATION) 246 return 247 self.dirSelected=self.txtFile.GetValue() 248 self.txtContent.SetValue('') 249 msg='Removing duplicated files in %s '%self.dirSelected 250 self.txtContent.SetValue(msg) 251 WorkerThread(self,self.dirSelected,'remove',msg) 252 253 def BtnStopHandler(self,event): 254 pass 255 256 def SetButtons(self,status): 257 if status=='init': 258 self.btnOpen.Enable() 259 self.btnList.Disable() 260 self.btnRemove.Disable() 261 # self.btnStop.Disable() 262 elif status=='operating': 263 self.btnOpen.Disable() 264 self.btnList.Disable() 265 self.btnRemove.Disable() 266 # self.btnStop.Enable() 267 elif status=='completed': 268 self.btnOpen.Enable() 269 self.btnList.Enable() 270 self.btnRemove.Enable() 271 # self.btnStop.Disable() 272 elif status=='selected': 273 self.btnOpen.Enable() 274 self.btnList.Enable() 275 self.btnRemove.Enable() 276 # self.btnStop.Disable() 277 278 if __name__=="__main__": 279 app=wx.App() 280 MyFrame().Show() 281 app.MainLoop()
  • 相关阅读:
    生病了,难受啊
    2005年1月31号随笔一篇
    [转]永远的Beyond
    关于IE问题,请教和求救
    中国财富排行榜
    这两天简直倒霉透顶了
    Asp.net(C#)给图片加上水印效果
    我不爱的那个女人
    555,我的hotmail从2G变回2M的了
    怀念永远的战神
  • 原文地址:https://www.cnblogs.com/aaronhoo/p/5401968.html
Copyright © 2020-2023  润新知