• python 将文件 批量转换编码


    文件批量转换编码    原始基础代码来自互联网   在其基础上 改了改  做了一些适配   记录下 只用了  code2code 这个方法 其他没用到  也就没去动了。

    # -*- coding:utf-8 -*-
    # @Time : 2022/3/22 20:04
    # @Author: zhcode
    # @File : convert_file_encode.py
    import chardet
    import codecs
    import os
    import sys
    import time
     
    CURRENT_PATH = os.path.abspath('.')
     
    def code2code(filename, encode_out):
        print("transfer filename " + filename + " ... ")
        fileencoding = chardet.detect(open(filename, "rb").read())
        fileencode = fileencoding['encoding']
        if fileencode == encode_out:
            print("encode is no need to transfer...")
            return
    
        if fileencode:
            print("transfer detect codetype = " + fileencode)
        else:
            print(fileencode)
            return 
    
        with codecs.open(filename, mode='r', encoding=fileencode) as fi:
            data = fi.read()
            with codecs.open(filename, mode='w', encoding=encode_out) as fo:
                fo.write(data)
        return os.path.basename(filename), fileencode
     
     
    def main():
        try:
            if len(sys.argv) <= 1:
                convert()
            elif sys.argv[1] == '-h':
                helper()
            else:
                argv = sys.argv[1:]
                args = [(op, argv[i + 1]) for i, op in enumerate(argv) if i % 2 == 0]
                keys = list(map(lambda x: x[0], args))
                values = list(map(lambda x: x[1], args))
     
                encode_out = "gb2312"
                dir_path = None
                if '-h' in keys:
                    helper()
                if '-f' in keys:
                    t = values[keys.index('-f')]
                if '-d' in keys:
                    dir_path = values[keys.index('-d')]
                convert(f=encode_out, d=dir_path)
     
                print('Finish 转换完毕')
     
        except KeyboardInterrupt:
            print("\ngoodbye.")
        except Exception as ex:
            print(ex)
            exit(1)
     
     
    def time_format(time_diff):
        add_zero = lambda t: "0{}".format(t) if t < 10 else t
        return "{}:{}:{}".format(add_zero(int(time_diff / 3600)), add_zero(int((time_diff % 3600) / 60)),
                                 add_zero(int(time_diff % 60)))
     
     
    def traverse_dir(file_dir):
        """
        Traverse the specific folder and return picture's name list.
        :param file_dir: Traverse folder name
        :return: picture's name list
        """
        file_path_list = []
        try:
            for root, dirs, files in os.walk(file_dir):
                # print(root, dirs, files)
                for file in files:
                    if os.path.splitext(file)[1] not in [".py", ".class", ".gif", ".png", ".jpg", ".project",]:
                        file_path_list.append(os.path.join(root, file))
        except Exception as ex:
            print("文件路径不正确!")
     
        return file_path_list
     
     
    def convert(f=None, d=None):
        encode_out = f
        dir_path = None
        if not d:
            dir_path = CURRENT_PATH
        else:
            if os.path.isdir(d):
                dir_path = d
            else:
                raise Exception("该路径不是一个文件夹.")
     
        start_time = time.time()
        file_path_list = traverse_dir(dir_path)
        len_pics = len(file_path_list)
        # iterate filename
        # print(f_dir, " ", d_dir)
        for i in range(len_pics):
            time_diff = int(time.time() - start_time)
            time_eat = time_format(time_diff)
     
            file_name, file_encode = code2code(file_path_list[i], encode_out)
     
            progressbar(i + 1, len_pics, 50, r"{time_eat} {file_name} {file_encode}")
     
     
    def progressbar(curr, total, duration=10, extra=''):
        """
        show the progress bar
        :param curr:
        :param total:
        :param duration:
        :param extra:
        :return:
        """
    
        print(curr)
        print(total)
        frac = curr / total
        print(frac)
        # filled = int(round(frac * duration))
        filled = round(frac * duration)
    
        ffff = int(filled)
        tstr = ''
        for x in xrange(1,ffff):
            # pass
            tstr = tstr + '#'
    
        tstr1 = ''
        for x in xrange(1,duration - ffff):
            # pass
            tstr1 = tstr1 + ' '
    
        
        # print('{0:.1f}'.format(frac))
        print('\r'+tstr + tstr1 + '[0~{0:.0f}]'.format(frac*duration))
        sys.stdout.flush()
     
     
    def helper():
        app_name = sys.argv[0]
        app_name = "./{}".format(os.path.split(app_name)[-1])
        print("====== Image format conversion ======")
        print(r'{app_name}                  # 将当前文件夹下文件格式转换为gb2312格式.')
        print(r'{app_name} -f <type>        # 设置转换的编码格式')
        print(r'{app_name} -d <dirname>     # 设置转换文件的路径')
        print(r'{app_name} -h               # 帮助')
     
     
    if __name__ == '__main__':
        # main()
        file_name_list = traverse_dir(CURRENT_PATH)
        
        print("file_name_list = ",len(file_name_list))
        index = 0
        for name in file_name_list:
            index = index + 1
            print(code2code(name, "utf-8"))
            # progressbar(index, len(file_name_list), duration=100, extra='')
     
  • 相关阅读:
    通过http.client解析url返回的数据时为什么中文变成了unicode码
    爬取微博的数据时别人用的是FM.view方法传递html标签那么jsoup怎么解析呢
    [Functional Programming] Function modelling -- 4. Reader Monda example
    [Angular] Understanding the Angular Component providers property
    [HTML5] Correctly Define Heading Levels of a Web Page
    [Functional Programming] Function modelling -- 3. Reader Monad
    [Functional Programming] 1. Function modelling -- Combine functions
    [React] When to useReducer instead of useState
    [Web component] Using Custom Events as a web component API
    [Functional Programming] 2. Function modelling -- Validation
  • 原文地址:https://www.cnblogs.com/lesten/p/16385354.html
Copyright © 2020-2023  润新知