1. 简述
递归解压文件夹中的所有压缩包到指定文件夹,方便快速搜索文件和整理移动文件。
2. 环境配置
python解压rar文件需要安装依赖库 (python-unrar)
Windows:
- 在 RARLab 官方下载安装库文件 http://www.rarlab.com/rar/UnRARDLL.exe
- 默认路径伪 C:Program Files (x86)UnrarDLL
- 添加环境变量 UNRAR_LIB_PATH 键值 C:Program Files (x86)UnrarDLLx64UnRAR64.dll,如果是32位就是 C:Program Files (x86)UnrarDLLUnRAR.dll
Linux:
- 下载库文件 https://www.rarlab.com/rar/unrarsrc-5.6.8.tar.gz
-
$ make lib $ make install-lib
- 添加环境变量 export UNRAR_LIB_PATH=/usr/lib/libunrar.so
3. 实现
代码实现
1 #!/usr/bin/env python3 2 # .zip .rar .tar .tgz .tar.gz .tar.bz2 .tar.bz .tar.tgz 3 import os 4 import zlib 5 import unrar 6 import shutil 7 import zipfile 8 import tarfile 9 from time import sleep 10 # from unrar import rarfile 11 12 filepath = "./dirname" #relative path 13 14 class BaseTool(object): 15 def __init__(self, path): 16 self.path = path 17 self.compress = [".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz",".zip",".rar"] 18 19 def iszip(self, file): 20 for z in self.compress: 21 if file.endswith(z): 22 return z 23 24 def zip_to_path(self, file): 25 for i in self.compress: 26 file = file.replace(i,"") 27 return file 28 29 def error_record(self, info): 30 with open("error.txt","a+") as r: 31 r.write(info+" ") 32 33 def un_zip(self, src, dst): 34 """ src : aa/asdf.zip 35 dst : unzip/aa/asdf.zip 36 """ 37 try: 38 zip_file = zipfile.ZipFile(src) 39 uz_path = self.zip_to_path(dst) 40 if not os.path.exists(uz_path): 41 os.makedirs(uz_path) 42 for name in zip_file.namelist(): 43 zip_file.extract(name, uz_path) 44 zip_file.close() 45 except zipfile.BadZipfile: 46 pass 47 except zlib.error: 48 print("zlib error : "+src) 49 self.error_record("zlib error : "+src) 50 51 def un_rar(self, src, dst): 52 try: 53 rar = unrar.rarfile.RarFile(src) 54 uz_path = self.zip_to_path(dst) 55 rar.extractall(uz_path) 56 except unrar.rarfile.BadRarFile: 57 pass 58 except Exception as e: 59 print(e) 60 self.error_record(str(e)+src) 61 62 def un_tar(self, src, dst): 63 try: 64 tar = tarfile.open(src) 65 uz_path = self.zip_to_path(dst) 66 tar.extractall(path = uz_path) 67 except tarfile.ReadError: 68 pass 69 except Exception as e: 70 print(e) 71 self.error_record(str(e)+src) 72 73 74 class UnZip(BaseTool): 75 """ UnZip files """ 76 def __init__(self, path): 77 super(UnZip, self).__init__(self) 78 self.path = path 79 self.output = "./unzip/" 80 self.current_path = os.getcwd()+"/" 81 82 def recursive_unzip(self, repath): 83 """recursive unzip file 84 """ 85 for (root, dirs, files) in os.walk(repath): 86 for filename in files: 87 src = os.path.join(root,filename) 88 if self.iszip(src) == ".zip": 89 print("[+] child unzip: "+src) 90 self.un_zip(src, src) 91 os.remove(src) 92 self.recursive_unzip(self.zip_to_path(src)) 93 sleep(0.1) 94 if self.iszip(src) == ".rar": 95 from unrar import rarfile 96 print("[+] child unrar : "+src) 97 self.un_rar(src,src) 98 os.remove(src) 99 self.recursive_unzip(self.zip_to_path(src)) 100 sleep(0.1) 101 if self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"): 102 print("[+] child untar : "+src) 103 self.un_tar(src,src) 104 os.remove(src) 105 self.recursive_unzip(self.zip_to_path(src)) 106 sleep(0.1) 107 108 def main_unzip(self): 109 for (root, dirs, files) in os.walk(self.path): 110 for filename in files: 111 zippath = os.path.join(self.output,root) 112 if not os.path.exists(zippath): 113 os.makedirs(zippath) 114 src = os.path.join(root,filename) 115 dst = os.path.join(self.output,root,filename) 116 if self.iszip(src) == ".zip": 117 print("[+] main unzip : "+src) 118 self.un_zip(src,dst) 119 if self.iszip(src) == ".rar": 120 from unrar import rarfile 121 print("[+] main unrar : "+src) 122 self.un_rar(src,dst) 123 if self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"): 124 print("[+] main untar : "+src) 125 self.un_tar(src,dst) 126 else: 127 try: 128 shutil.copyfile(src,dst) 129 except OSError as e: 130 print(str(e)) 131 self.error_record(str(e)) 132 133 self.recursive_unzip(self.output+self.path) 134 135 136 def main(): 137 z = UnZip(filepath) #relative path 138 z.main_unzip() 139 140 if __name__ == '__main__': 141 main()
4. 多线程
1 #!/usr/bin/env python3 2 # .zip .rar .tar .tgz .tar.gz .tar.bz2 .tar.bz .tar.tgz 3 import os 4 import zlib 5 import unrar 6 import shutil 7 import zipfile 8 import tarfile 9 import argparse 10 import time 11 import threading 12 from time import sleep 13 from itertools import chain 14 from unrar import rarfile 15 16 17 filepath = "./filepath" #relative path 18 thread_num = 1 19 20 class BaseTool(object): 21 def __init__(self): 22 super(BaseTool, self).__init__() 23 self.compress = [".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz",".zip",".rar"] 24 25 def run_threads(self, threads_number: int, target_function: any, *args, **kwargs) -> None: 26 """ Run function across specified number of threads 27 :param int thread_number: number of threads that should be executed 28 :param func target_function: function that should be executed accross specified number of threads 29 :param any args: args passed to target_function 30 :param any kwargs: kwargs passed to target function 31 :return None 32 """ 33 34 threads = [] 35 threads_running = threading.Event() 36 threads_running.set() 37 38 for thread_id in range(int(threads_number)): 39 thread = threading.Thread( 40 target=target_function, 41 args=chain((threads_running,), args), 42 kwargs=kwargs, 43 name="thread-{}".format(thread_id), 44 ) 45 threads.append(thread) 46 47 # print("{} thread is starting...".format(thread.name)) 48 thread.start() 49 50 start = time.time() 51 try: 52 while thread.isAlive(): 53 thread.join(1) 54 55 except KeyboardInterrupt: 56 threads_running.clear() 57 58 for thread in threads: 59 thread.join() 60 # print("{} thread is terminated.".format(thread.name)) 61 62 print("Elapsed time: {} seconds".format(time.time() - start)) 63 64 def iszip(self, file): 65 for z in self.compress: 66 if file.endswith(z): 67 return z 68 69 def zip_to_path(self, file): 70 for i in self.compress: 71 file = file.replace(i,"") 72 return file 73 74 def error_record(self, info): 75 with open("error.txt","a+") as w: 76 w.write(info+" ") 77 78 def remove(self, filepath): 79 if os.path.exists(self.zip_to_path(filepath)) and os.path.exists(filepath): 80 os.remove(filepath) 81 82 def un_zip(self, src, dst): 83 """ src : aa/asdf.zip 84 dst : unzip/aa/asdf.zip 85 """ 86 try: 87 zip_file = zipfile.ZipFile(src) 88 uz_path = self.zip_to_path(dst) 89 if not os.path.exists(uz_path): 90 os.makedirs(uz_path) 91 for name in zip_file.namelist(): 92 zip_file.extract(name, uz_path) 93 zip_file.close() 94 except zipfile.BadZipfile: 95 pass 96 except RuntimeError: 97 self.error_record("pass required : "+src) 98 return "PassRequired" 99 except zlib.error: 100 print("zlib error : "+src) 101 self.error_record("zlib error : "+src) 102 except Exception as e: 103 print(e) 104 self.error_record(str(e)+src) 105 106 def un_rar(self, src, dst): 107 try: 108 rar = unrar.rarfile.RarFile(src) 109 uz_path = self.zip_to_path(dst) 110 rar.extractall(uz_path) 111 except unrar.rarfile.BadRarFile: 112 pass 113 except Exception as e: 114 print(e) 115 self.error_record(str(e)+src) 116 117 def un_tar(self, src, dst): 118 try: 119 tar = tarfile.open(src) 120 uz_path = self.zip_to_path(dst) 121 tar.extractall(path = uz_path) 122 except tarfile.ReadError: 123 pass 124 except Exception as e: 125 print(e) 126 self.error_record(str(e)+src) 127 128 129 class LockedIterator(object): 130 def __init__(self, it): 131 self.lock = threading.Lock() 132 self.it = it.__iter__() 133 134 def __iter__(self): 135 return self 136 137 def next(self): 138 self.lock.acquire() 139 try: 140 item = next(self.it) 141 142 if type(item) is tuple: 143 return (item[0].strip(), item[1].strip(), item[2].strip()) 144 elif type(item) is str: 145 return item.strip() 146 147 return item 148 finally: 149 self.lock.release() 150 151 152 class UnZip(BaseTool): 153 """ UnZip files """ 154 def __init__(self, path): 155 super(UnZip, self).__init__() 156 self.path = path 157 self.threads = thread_num 158 self.output = "./unzip/" 159 self.current_path = os.getcwd()+"/" 160 self.parser = argparse.ArgumentParser() 161 self.parser.add_argument("-v","--verbose", action="store_true", help="./zipperpro.py -v") 162 self.args = self.parser.parse_args() 163 164 def run(self): 165 self.main_unzip(self.path) 166 167 def recursive_unzip(self, repath): 168 """recursive unzip file 169 """ 170 task_list = [] 171 for (root, dirs, files) in os.walk(repath): 172 for filename in files: 173 filename = filename.strip("./") 174 src = os.path.join("./"+root,filename) 175 data = (src, src, "child") 176 task_list.append(data) 177 data = LockedIterator(chain(task_list)) 178 print("[+] child unzip ...") 179 self.run_threads(self.threads, self.do_unzip, data) 180 181 def main_unzip(self, mainpath): 182 task_list = [] 183 print("Initialization......") 184 for (root, dirs, files) in os.walk(mainpath): 185 for filename in files: 186 zippath = os.path.join(self.output,root) 187 if not os.path.exists(zippath): 188 os.makedirs(zippath) 189 src = os.path.join(root,filename) 190 dst = os.path.join(self.output,root,filename) 191 if not os.path.exists(self.zip_to_path(dst)): 192 data = ((src, dst, "main")) 193 task_list.append(data) 194 data = LockedIterator(chain(task_list)) 195 print("[+] main unzip ...") 196 self.run_threads(self.threads, self.do_unzip, data) 197 self.recursive_unzip(self.output+self.path) 198 199 def do_unzip(self, running, data): 200 while running.is_set(): 201 try: 202 (src, dst, flag) = data.next() 203 if flag == "main": 204 if self.iszip(src) == ".zip": 205 if self.args.verbose: 206 print("[+] main unzip : "+src) 207 self.un_zip(src,dst) 208 elif self.iszip(src) == ".rar": 209 if self.args.verbose: 210 print("[+] main unrar : "+src) 211 self.un_rar(src,dst) 212 elif self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"): 213 if self.args.verbose: 214 print("[+] main untar : "+src) 215 self.un_tar(src,dst) 216 else: 217 try: 218 shutil.copyfile(src,dst) 219 except OSError as e: 220 print(str(e)) 221 self.error_record(str(e)) 222 elif flag == "child": 223 if self.iszip(src) == ".zip": 224 if self.args.verbose: 225 print("[+] child unzip: "+src) 226 if not self.un_zip(src, src) == "PassRequired": 227 self.remove(src) 228 self.recursive_unzip(self.zip_to_path(src)) 229 sleep(0.1) 230 elif self.iszip(src) == ".rar": 231 if self.args.verbose: 232 print("[+] child unrar : "+src) 233 self.un_rar(src,src) 234 self.remove(src) 235 self.recursive_unzip(self.zip_to_path(src)) 236 sleep(0.1) 237 elif self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"): 238 if self.args.verbose: 239 print("[+] child untar : "+src) 240 self.un_tar(src,src) 241 self.remove(src) 242 self.recursive_unzip(self.zip_to_path(src)) 243 sleep(0.1) 244 245 except StopIteration: 246 break 247 248 249 def main(): 250 z = UnZip(filepath) 251 z.run() 252 253 254 255 if __name__ == '__main__': 256 main()
5. 问题
- 中文压缩包乱码,中文路径解压出错
- rar解压遇加密文件卡死
https://github.com/Gitmaninc/SmallTooools/tree/master/unzip-tool