• Python转码&解压&多进程


    Python批量转换文件编码格式

    Eclipse中看ANSI编码的文件有乱码,所以希望通过python将相关文件转换成utf-8编码。

    源:https://www.cnblogs.com/tsbc/p/4450675.html

    '''

    遍历文件夹

    如果文件名是.cpp .h

        如果原来的编码不是utf-8,将文件编码格式改成utf-8

    '''

    import os,sys

    import chardet

    def convert( filename, out_enc="UTF8" ):

        try:

            fp = open(filename,'rb+')

            content = fp.read()

            coding = chardet.detect(content)['encoding'] #获取encoding的值[编码格式]

            if coding != 'utf-8':

                new_content = content.decode(coding,"ignore").encode(out_enc)

                fp.seek(0)

                fp.write(new_content)

                fp.close()

        except IOError:

            print( " error")

    def explore(dir):

        #遍历目录

        for root, dirs, files in os.walk(dir):

            for file in files:

                if '.cpp' in file or '.h' in file:

                    path = os.path.join(root, file)

                    convert(path)

    fiePath = r'E:Code'

    def main():

        explore(fiePath)

    if __name__ == "__main__":

        main()

    Python解压

    https://www.cnblogs.com/Oliva/p/8824040.html 多线程字典破解加密zip

    https://www.cnblogs.com/fyqq0403/p/9710420.html 解压加密的zip

    https://www.cnblogs.com/flyhigh1860/p/3884842.html 解压zip

    Python多线程&多进程

    https://www.cnblogs.com/yeayee/p/4952022.html 基础介绍

    https://www.cnblogs.com/kellyseeme/p/5525017.html 锁的应用

    https://www.cnblogs.com/znicy/p/6234522.html  通过多进程的方式解决了解压缩的性能问题

    https://www.cnblogs.com/xybaby/p/6510941.html#undefined  python性能优化,介绍了GIL导致多线程的问题

    https://www.cnblogs.com/SuKiWX/p/8804974.html   python GIL解释

    python解压多个压缩文件(环境中有6000个左右压缩文件)遇到瓶颈,解压过程非常慢。尝试用多线程解压,处理时间不仅没有减少,还增加了。后搜索上述博客后,用多进程解压以缩短处理时间。

    import zipfile

    import tarfile

    import gzip

    import os

    from time import ctime

    from multiprocessing import Pool

    from multiprocessing import cpu_count

    dayZipsPath = r'.'             

    quarterZipsPath = r'./tmp'     

    zipPassWord = b'password'        

    mrFilePath = r'./data'          

    def unzipDayFile():

        for file_name in os.listdir(dayZipsPath):

            if os.path.splitext(file_name)[1] == '.zip':

                print( file_name)

                file_zip = zipfile.ZipFile(file_name, 'r')

                file_zip.extractall(path = quarterZipsPath, pwd = zipPassWord)

                file_zip.close()

                #os.remove(file_name)

    def untarDayFile():

        for file_name in os.listdir(dayZipsPath):

            if '.tar.gz' in file_name:

                print( file_name)

                file_tar = tarfile.open(file_name)

                file_tar.extractall(path = quarterZipsPath)

                file_tar.close()

                #os.remove(file_name)

    def unzip(zipsList):

        for file_name in zipsList:

            if os.path.splitext(file_name)[1] == '.zip':

                zipFileName = quarterZipsPath +'/'+ file_name

                file_zip = zipfile.ZipFile(zipFileName, 'r')

                file_zip.extractall(path = mrFilePath, pwd = zipPassWord)

                file_zip.close()

                os.remove(zipFileName)

    if __name__ == '__main__':

        print('Begin:%s' % ctime())

        #获取CPU核个数

        cpuNum = cpu_count()

        print(cpuNum)

        

        unzipDayFile()

        untarDayFile()

        

        #多进程解压,大大缩短处理时间

        quarterZipsList = list(os.listdir(quarterZipsPath))

        zipFileNum = len(quarterZipsList)

        print("total zip files num:%d" % (zipFileNum))

        print("begin unzip:%s" % ctime())

        p = Pool()

        for i in range(cpuNum):

            beginPos = int(i*zipFileNum/cpuNum)

            endPos = min(int((i+1)*zipFileNum/cpuNum),zipFileNum)

            print("proc %d - %d" % (beginPos, endPos))

            p.apply_async(unzip,args=(quarterZipsList[beginPos:endPos],))

        print("waiting for unzip quarter mr data ...")

        p.close()

        p.join()

        print("end unzip:%s" % ctime())  

        print( "End:%s" % ctime())

  • 相关阅读:
    03.redis集群
    02.redis数据同步
    01.redis数据类型
    06.MySQL主从同步
    05.MySQL优化
    04.MySQL慢查询
    lamp服务器站点目录被植入广告代码
    tar命令简单用法
    linux基础优化
    Linux 思想与法则
  • 原文地址:https://www.cnblogs.com/sunnypoem/p/10123142.html
Copyright © 2020-2023  润新知