import os import sys import re import time import base64 def extract_data(filename, dir): file = open(filename, "r") content = file.read() index = 0 for match in re.finditer(r'(?=/9j)(.|\n)*?(?=(\n\n))', content): index = index + 1 print("%s/%s.jpg" % (dir, index)) newfile = open("%s/%s.jpg" % (dir, index), "wb") real = re.sub("\n", "", match.group()) length = len(real) times = (4 - length % 4) % 4 i = 0 while i < times: real = "%sA===" % real i = i + 1 #newfile.write(real) newfile.write(base64.b64decode(real)) newfile.close() file.close() def export_file(file_path): r = re.search(r"(?<=\\|/).*", file_path) file_name_ext = file_path while r != None: r = r.group() file_name_ext = r r = re.search(r"(?<=\\|/).*", r) pattern = "%s" % file_name_ext pattern = re.sub("\[", "\\\[", pattern) pattern = re.sub("\]", "\\\]", pattern) pattern = re.sub("\(", "\\\(", pattern) pattern = re.sub("\)", "\\\)", pattern) pattern = ".*?(?=%s)" % pattern path = re.search(pattern, file_path) if path != None: path = path.group() else: path = "" dot = re.search(r"\.", file_name_ext) file_name = file_name_ext if dot != None: file_name = re.search(r".*?(?=\.)", file_name_ext) file_name = file_name.group() else: file_name = file_name + "_dir" full_path = path + file_name bExist = os.path.exists(full_path) if bExist != True: os.mkdir(full_path) else: print("The Directory \"%s\" Exists!\n" % full_path) extract_data(file_path, full_path) def main(): print(os.name) index = 0 for arg in sys.argv: index += 1 if index == 1: continue if index == 2: export_file(arg) if index == 1: file_path = "E:\\Scripts\\test.mht" export_file(file_path) if __name__ == "__main__": main() print("just wait for 2 seconds!\n") time.sleep(2)