• python 大文件读写


    1. 使用
    
    import smbclient  # pip install smbprotocol
    import zlib
    import re
    import os
    
    smbclient.register_session("10.x.x.x", username="123", password="123")
    img_path = smbclient.listdir(r"\10.x.x.ximg")
    file_name = os.path.basename(img_path )
    
    #方法1
    # res = path.rsplit('.',1)[0].split('_')[1]
    #方法2
    res = re.search(r'(0xw+).tar',path)
    
    file_checksum = res.group(1)
    
    
    def read_in_chunks(filePath, chunk_size=1024*1024):
        """
        Lazy function (generator) to read a file piece by piece.
        Default chunk size: 1M
        You can set your own chunk size
        """
        # file_object = open(filePath)
        file_object = smbclient.open_file(filePath,mode='rb')
        while True:
            chunk_data = file_object.read(chunk_size)
            if not chunk_data:
                break
            yield chunk_data
    
    
    def verify_file_checksum(file, checksum):
        with open (file,'rb') as f:
            if zlib.adler32(f.read())==int(checksum,16):
                print ('checksum verification pass')
                return True
        print ('checksum verification fail')
        return False
    
    if __name__ == "__main__":
        os.remove('1.img')
        for chunk in read_in_chunks(path):
            with open('1.img',mode='ab') as fw:
                fw.write(chunk)
    
        verify_file_checksum('1.img',file_checksum)
    
    
    1. 模板
    def read_in_chunks(filePath, chunk_size=1024*1024):
        """
        Lazy function (generator) to read a file piece by piece.
        Default chunk size: 1M
        You can set your own chunk size
        """
        file_object = open(filePath)
        while True:
            chunk_data = file_object.read(chunk_size)
            if not chunk_data:
                break
            yield chunk_data
    
    if __name__ == "__main__":
        filePath = 'filename'
        for chunk in read_in_chunks(filePath):
            process(chunk) # <do something with chunk>
    
    
    1. fileinput()
    
    fileinput模块可以对一个或多个文件中的内容进行迭代、遍历等操作。
    该模块的input()函数有点类似文件readlines()方法,但它是一个可迭代对象,即每次只生成一行,需要用for循环迭代。在碰到大文件的读取时,无疑效率更高效。
    用fileinput对文件进行循环遍历,格式化输出,查找、替换等操作,非常方便。
    
    import fileinput
    for line in fileinput.input(['sum.log']):
        print line
    
    
    
  • 相关阅读:
    DjangoORM更新操作、查询方式与优化
    uniapp 打电话功能的几种方式
    liveserver 安装和运行:
    django的ORM中get方法异常,**(双星号)
    Django的DRF
    java中枚举类常见用法
    Oracle数据库备份和还原
    node.js安装教程
    node.js安装下载
    java中数据匹配
  • 原文地址:https://www.cnblogs.com/amize/p/14266472.html
Copyright © 2020-2023  润新知