• Python3之常用模块


    大纲>>

    1. time &datetime模块
    2. random模块
    3. OS模块
    4. sys模块
    5. shelve模块
    6. shutil模块
    7. xml模块
    8. configparser模块
    9. Hashlib、Hmac模块
    10. zipfile&tarfile模块
    11. PyYAML模块
    12. re正则表达式

    time & datetime模块

    # !/usr/bin/env python
    import time, datetime
    
    """
        常用标准库:
        
        time
        
            1、时间戳:时间戳表示的是从1970年1月1日00:00:00开始按秒计算的偏移量;
            2、格式化的时间字符串
            3、元组(struct_time):struct_time元组共有9个元素
            
            
        
            格式:
                %a    本地(locale)简化星期名称    
                %A    本地完整星期名称    
                %b    本地简化月份名称    
                %B    本地完整月份名称    
                %c    本地相应的日期和时间表示    
                %d    一个月中的第几天(01 - 31)    
                %H    一天中的第几个小时(24小时制,00 - 23)    
                %I    第几个小时(12小时制,01 - 12)    
                %j    一年中的第几天(001 - 366)    
                %m    月份(01 - 12)    
                %M    分钟数(00 - 59)    
                %p    本地am或者pm的相应符    一    
                %S    秒(01 - 61)    二    
                %U    一年中的星期数。(00 - 53星期天是一个星期的开始。)第一个星期天之前的所有天数都放在第0周。    三    
                %w    一个星期中的第几天(0 - 6,0是星期天)    三    
                %W    和%U基本相同,不同的是%W以星期一为一个星期的开始。    
                %x    本地相应日期    
                %X    本地相应时间    
                %y    去掉世纪的年份(00 - 99)    
                %Y    完整的年份    
                %Z    时区的名字(如果不存在为空字符)    
                %%    ‘%’字符
    """
    # print(help(time))
    # print(help(time.ctime)) # 查看具体命令用法
    
    # 当前时间 时间戳
    print(time.time())
    # cpu 时间
    print(time.clock())
    
    # 延迟多少秒
    # print(time.sleep(1))
    
    # 返回元组格式的时间 UTC         time.gmtime(x) x为时间戳
    print(time.gmtime())
    
    # 返回元组格式的时间 UTC+8  这是我们常用的时间    time.localtime(x) x为时间戳
    print(time.localtime())
    
    
    x = time.localtime()
    print("x:", x)
    # 将元组格式的时间格式化为str格式的自定义格式时间 time.strftime(str_format, x)  str_format:格式   x元组时间
    print(time.strftime("%Y-%m-%d %H:%M:%S", x))
    
    # 秒格式化为字符串形式  格式为:Tue Jun 16 11:53:31 2009
    print(time.ctime(1245124411))
    
    # 获取元组时间中的具体时间  年/月/日......
    print(x.tm_year, x.tm_mon, x.tm_mday, x.tm_hour, x.tm_min, x.tm_sec)
    
    # 将元组格式的时间转换为时间戳
    print(time.mktime(x))
    
    # 将时间戳转为字符串格式
    print(time.gmtime(time.time()-86640))   # 将utc时间戳转换成struct_time格式
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))   # 将utc struct_time格式转成指定的字符串格式
    
    
    """
        datetime模块:
    """
    print("时间加减datetime模块".center(50, "~"))
    
    # 返回 2018-01-20 23:20:49.418354
    print(datetime.datetime.now())
    
    # 时间戳直接转成日期格式 2018-01-20
    print(datetime.date.fromtimestamp(time.time()))
    
    # 当前时间+3天
    print(datetime.datetime.now() + datetime.timedelta(3))
    
    # 当前时间-3天
    print(datetime.datetime.now() + datetime.timedelta(-3))
    
    # 当前时间+3小时
    print(datetime.datetime.now() + datetime.timedelta(hours=3))
    
    # 当前时间+30分
    print(datetime.datetime.now() + datetime.timedelta(minutes=30))
    
    c_time  = datetime.datetime.now()
    # 时间替换
    print(c_time.replace(minute=54, hour=5))
    

      

    时间关系转换图:

    random模块

    # Author:Allister.Liu
    # !/usr/bin/env python
    import random
    
    """
        random模块:
            
    
    """
    
    # 用于生成一个0到1的随机符点数: 0 <= n < 1.0
    print(random.random())
    
    #  random.randint(a, b),用于生成一个指定范围内的整数。其中参数a是下限,参数b是上限,生成的随机数n: a <= n <= b
    print(random.randint(1, 10))
    
    
    # random.randrange([start], stop[, step]),
    # 从指定范围内,按指定基数递增的集合中 获取一个随机数。如:random.randrange(10, 100, 2),
    # 结果相当于从[10, 12, 14, 16, ... 96, 98]序列中获取一个随机数。
    # random.randrange(10, 100, 2)在结果上与 random.choice(range(10, 100, 2) 等效。
    print(random.randrange(1, 10))
    print(random.choice(range(10, 100, 2)))
    
    # 从序列中获取一个随机元素。 random.choice(sequence) sequence在python不是一种特定的类型,而是泛指一系列的类型。 list, tuple, 字符串都属于sequence
    print(random.choice("abcdef"))
    
    print(random.choice("学习Python的小伙伴"))  # 伙
    print(random.choice(["JGood", "is", "a", "handsome", "boy"]))  # boy--  List
    print(random.choice(("Tuple","List","Dict")))   # Tuple
    
    # random.sample(sequence, k),从指定序列中随机获取指定长度的片断。sample函数不会修改原有序列。
    print(random.sample([1, 2, 3, 4, 5, 6, 7, 8, 9], 5))  # [2, 1, 9, 5, 7]
    
    
    # 随机整数:
    print(random.randint(0, 99))  # 70
    
    # 随机选取0到100间的偶数:
    print(random.randrange(0, 101, 2))  # 4
    
    # 随机浮点数:
    print(random.random())  # 0.2746445568079129
    print(random.uniform(1, 10))  # 9.887001463194844
    
    # 随机字符:
    print(random.choice('abcdefg&#%^*f'))  # e
    
    # 多个字符中选取特定数量的字符:
    print(random.sample('abcdefghij123', 3))  # ['3', 'j', 'i']
    
    # 随机选取字符串:
    print(random.choice(['apple', 'pear', 'peach', 'orange', 'lemon']))  # peach
    
    # 洗牌#
    items = [1, 2, 3, 4, 5, 6, 7, 8, 9]
    print(items)  # [1, 2, 3, 4, 5, 6, 7, 8, 9]
    
    random.shuffle(items)
    print(items)  # [8, 3, 6, 1, 4, 9, 5, 7, 2]
    
    
    """
     生成6为验证码:由数字, 大写字母, 小写字母组成的6位随机验证码
    """
    
    
    def produce_check_code(scope = 6):
        check_code = ""
        for i in range(scope):
            tmp = random.randint(0, 10)
    
            if tmp < 6:
                tmp = random.randint(0, 9)
            elif tmp > 8:
                tmp = chr(random.randint(65, 90))
            else:
                tmp = chr(random.randint(97, 122))
    
            check_code += str(tmp)
        return check_code
    
    
    print(produce_check_code(8))
    

      

    0.21786963196954112
    3
    2
    34
    b
    的
    JGood
    List
    [7, 2, 6, 4, 8]
    12
    14
    0.5355914470942843
    3.3065568721321013
    %
    ['2', 'g', 'f']
    pear
    [1, 2, 3, 4, 5, 6, 7, 8, 9]
    [6, 7, 5, 9, 1, 2, 3, 4, 8]
    D626EbYt
    

     

    OS模块

    提供对操作系统进行调用的接口:

    # Author:Allister.Liu
    # !/usr/bin/env python
    import os
    
    """
        OS模块:
    """
    path = "E:/logo/ic2c/logo.png"
    
    # 获取当前工作目录,即当前python脚本工作的目录路径 === linux: pwd
    print(os.getcwd())
    
    # 改变当前脚本工作目录;相当于shell下cd
    # os.chdir("dirname")
    
    # 返回当前目录: ('.')
    print(os.curdir)
    
    # 获取当前目录的父目录字符串名:('..')
    print(os.pardir)
    
    # 可生成多层递归目录
    # os.makedirs('dirname1/dirname2')
    
    # 若目录为空,则删除,并递归到上一级目录,如若也为空,则删除,依此类推
    # os.removedirs('dirname1')
    
    # 生成单级目录;相当于shell中mkdir dirname
    # os.mkdir('dirname')
    
    # 删除单级空目录,若目录不为空则无法删除,报错;相当于shell中rmdir dirname
    # os.rmdir('dirname')
    
    # 列出指定目录下的所有文件和子目录,包括隐藏文件,并以列表方式打印
    print(os.listdir('E:/logo'))
    
    # 删除一个文件
    # os.remove()
    
    # 重命名文件/目录
    # os.rename("oldname","newname")
    
    # 获取文件/目录信息
    # os.stat('path/filename')
    
    # 输出操作系统特定的路径分隔符,win下为"\",Linux下为"/"
    os.sep
    
    # 输出当前平台使用的行终止符,win下为"	
    ",Linux下为"
    "
    os.linesep
    
    # 输出用于分割文件路径的字符串 eg:环境变量path的分隔符
    os.pathsep
    
    # 输出字符串指示当前使用平台。win->'nt'; Linux->'posix'
    os.name
    
    # 运行shell命令,直接显示
    os.system("dir")
    
    # 获取系统环境变量
    print(os.environ)
    
    # 返回path规范化的绝对路径
    print(os.path.abspath(path))
    
    # 将path分割成目录和文件名二元组返回
    print(os.path.split(path))
    
    # 返回path的目录。其实就是os.path.split(path)的第一个元素
    print(os.path.dirname(path))
    
    # 返回path最后的文件名。如何path以/或结尾,那么就会返回空值。即os.path.split(path)的第二个元素
    print(os.path.basename(path))
    
    # 如果path存在,返回True;如果path不存在,返回False
    print(os.path.exists(path))
    
    # 如果path是绝对路径,返回True
    print(os.path.isabs(path))
    
    # 如果path是一个存在的文件,返回True。否则返回False
    print(os.path.isfile(path))
    
    # 如果path是一个存在的目录,则返回True。否则返回False
    print(os.path.isdir(path))
    
    # 将多个路径组合后返回,第一个绝对路径之前的参数将被忽略
    # os.path.join(path1[, path2[, ...]])
    
    # 返回path所指向的文件或者目录的最后存取时间
    print(os.path.getatime(path))
    
    # 返回path所指向的文件或者目录的最后修改时间
    print(os.path.getmtime(path))
    

      

    sys模块

    # Author:Allister.Liu
    # !/usr/bin/env python
    
    import sys
    
    
    print(help(sys))
    # 命令行参数List,第一个元素是程序本身路径
    sys.argv
    
    # 退出程序,正常退出时exit(0)
    # sys.exit(0)
    
    # 获取Python解释程序的版本信息
    print(sys.version)
    
    # 最大的Int值
    print(sys.maxsize)
    
    # 返回模块的搜索路径,初始化时使用PYTHONPATH环境变量的值
    print(sys.path)
    
    # 返回操作系统平台名称
    print(sys.platform)
    
    # 不换行输出  进度条
    sys.stdout.write('please:')
    val = sys.stdin.readline()[:-1]
    print(val)
    

      

    shelve模块

    # Author:Allister.Liu
    # !/usr/bin/env python
    import shelve
    import os, datetime
    
    
    """
        shelve模块:shelve模块是一个简单的k,v将内存数据通过文件持久化的模块,可以持久化任何pickle可支持的python数据格式
    """
    
    file_path = "datas"
    # 文件夹不存在则创建
    if not os.path.exists(file_path):
        os.mkdir(file_path)
    
    
    # 打开一个文件
    d = shelve.open(file_path + "/shelve_file.data")
    
    
    class Test(object):
        def __init__(self, n):
            self.n = n
    
    t1 = Test(123)
    t2 = Test(123334)
    
    names = ["Allister", "Linde", "Heddy", "Daty"]
    
    # 持久化列表 k为names
    d["names"] = names
    
    # 持久化类
    d["t1"] = t1
    
    d["t2"] = t2
    
    d["date"] = datetime.datetime.now()
    
    
    """
        获取文件内容
    """
    # 根据key获取value
    print(d.get("names"))
    print(d.get("t1"))
    print(d.get("date"))
    
    print(d.items())
    

      

    shutil模块

    # Author:Allister.Liu
    # !/usr/bin/env python
    
    import shutil
    
    """
        shutil模块:
        
        
        shutil.copyfileobj(fsrc, fdst[, length]):将文件内容拷贝到另一个文件中,可以部分内容;
        
        shutil.copyfile(src, dst):拷贝文件;
        
        shutil.copymode(src, dst):仅拷贝权限。内容、组、用户均不变;
        
        shutil.copystat(src, dst):拷贝状态的信息,包括:mode bits, atime, mtime, flags;
        
        shutil.copy(src, dst):拷贝文件和权限;
        
        shutil.copy2(src, dst):拷贝文件和状态信息&权限等;
        
        shutil.rmtree(path[, ignore_errors[, onerror]]):递归的去删除文件;
        
        shutil.move(src, dst):递归的去移动文件;
        
        shutil.copytree(src, dst, symlinks=False, ignore=None):递归的去拷贝文件,目录;
        
        
        shutil.move(src, dst):递归的去移动文件
        
        shutil.make_archive(base_name, format,...):创建压缩包并返回文件路径,例如:zip、tar;
            base_name: 压缩包的文件名,也可以是压缩包的路径。只是文件名时,则保存至当前目录,否则保存至指定路径, 如:ic2c      =>保存至当前路径; 
                如:/Users/Allister/ic2c =>保存至/Users/Allister/;
            format:	压缩包种类,“zip”, “tar”, “bztar”,“gztar”;
            root_dir:	要压缩的文件夹路径(默认当前目录);
            owner:	用户,默认当前用户;
            group:	组,默认当前组;
            logger:	用于记录日志,通常是logging.Logger对象;
        
        
    """
    
    """
    复制“笔记.data”至文件“笔记1.data”
    """
    with open("笔记.data", "r", encoding= "utf-8") as f1:
        with open("笔记1.data", "w", encoding="utf-8") as f2:
            shutil.copyfileobj(f1, f2)
    
    
    # 无需打开文件,copyfile自动打开文件并复制
    # shutil.copyfile("笔记.data", "笔记2.data")
    
    # 递归copy文件夹下的所有文件,
    # shutil.copytree("../day4", "../day5/copys")
    
    # 将以上递归copy的目录删除
    # shutil.rmtree("copys")
    
    # 压缩文件并返回路径
    # print(shutil.make_archive("H:/wx/432", "zip" ,root_dir="H:/PycharmProjects/python_tutorial/"))
    

      

    xml模块

     1 <data>
     2     <country name="Liechtenstein">
     3         <rank updated="yes">2</rank>
     4         <year updated="yes">2009</year>
     5         <gdppc>141100</gdppc>
     6         <neighbor direction="E" name="Austria" />
     7         <neighbor direction="W" name="Switzerland" />
     8     </country>
     9     <country name="Singapore">
    10         <rank updated="yes">5</rank>
    11         <year updated="yes">2012</year>
    12         <gdppc>59900</gdppc>
    13         <neighbor direction="N" name="Malaysia" />
    14     </country>
    15     <country name="Panama">
    16         <rank updated="yes">69</rank>
    17         <year updated="yes">2012</year>
    18         <gdppc>13600</gdppc>
    19         <neighbor direction="W" name="Costa Rica" />
    20         <neighbor direction="E" name="Colombia" />
    21     </country>
    22 </data>
    xml_test.xml
    # Author:Allister.Liu
    # !/usr/bin/env python
    import xml.etree.ElementTree as ET
    
    
    """
    xml处理模块:xml是实现不同语言或程序之间进行数据交换的协议,跟json差不多,但json使用起来更简单,不过,古时候,在json还没诞生的黑暗年代,大家只能选择用xml呀,
                至今很多传统公司如金融行业的很多系统的接口还主要是xml。
    
    """
    
    # xml协议在各个语言里的都 是支持的,在python中可以用以下模块操作xml
    
    tree = ET.parse("datas/xml_test.xml")
    
    root = tree.getroot()
    print("父节点:", root.tag)
    
    # print("遍历xml文档".center(50, "~"))
    # # 遍历xml文档
    # for child in root:
    #     print(child.tag, child.attrib)
    #     for i in child:
    #         print(i.tag, i.text)
    #
    # print("year节点".center(50, "~"))
    # # 只遍历year节点
    # for node in root.iter('year'):
    #     print(node.tag, node.text)
    
    
    """
        修改和删除xml文档内容
    """
    # 修改
    for node in root.iter('year'):
        new_year = int(node.text) + 1
        node.text = str(new_year)
        node.set("updated", "yes")
    
    tree.write("datas/xmltest.xml")
    
    # 删除node
    for country in root.findall('country'):
        rank = int(country.find('rank').text)
        if rank > 50:
            root.remove(country)
    
    tree.write('datas/output.xml')
    
    
    
    
    """
        创建xml文档
    """
    new_xml = ET.Element("namelist")
    name = ET.SubElement(new_xml, "name", attrib={"enrolled": "yes"})
    age = ET.SubElement(name, "age", attrib={"checked": "no"})
    sex = ET.SubElement(name, "sex")
    sex.text = '33'
    name2 = ET.SubElement(new_xml, "name", attrib={"enrolled": "no"})
    age = ET.SubElement(name2, "age")
    age.text = '19'
    
    et = ET.ElementTree(new_xml)  # 生成文档对象
    et.write("datas/test.xml", encoding="utf-8", xml_declaration=True)
    
    ET.dump(new_xml)  # 打印生成的格式
    

      

    configparser模块

    • 文件的生成:
    # Author:Allister.Liu
    # !/usr/bin/env python
    import configparser
    
    """
        mysql的配置文件:
    """
    config = configparser.ConfigParser()
    # 第一种赋值
    config["client"] = {'port': '3306',
                        'default-character-set': 'utf8'}
    
    # 第二种赋值
    config['mysqld'] = {}
    config['mysqld']['port'] = '3306'
    config['mysqld']['character_set_server'] = 'utf8'
    config['mysqld']['collation-server'] = 'utf8_general_ci'
    config['mysqld']['lower_case_table_names'] = '1'
    config['mysqld']['max_connections'] = '200'
    
    # 第三种赋值
    config['mysqld_safe'] = {}
    topsecret = config['mysqld_safe']
    topsecret['log-error'] = '/usr/local/mysql/error.log'
    
    config['mysqld']['datadir'] = '/usr/local/mysql/data'
    
    with open('datas/my.ini', 'w') as configfile:
        config.write(configfile)
    
    • 文件的读取:
    # Author:Allister.Liu
    # !/usr/bin/env python
    import configparser
    
    """
        configparser的读取:
    """
    
    config = configparser.ConfigParser()
    
    # 打开文件,返回文件路径
    config.read('datas/my.ini')
    
    # 读取文件中的父节点
    print(config.sections())  # ['client', 'mysqld', 'mysqld_safe', 'logs']
    
    # 判断节点是否存在文件中
    print("mysqld" in config)  # True
    
    # 获取节点下某个值
    print(config["mysqld"]["port"])  # 3306
    print(config["mysqld_safe"]["log-error"])   # /usr/local/mysql/error.log
    
    topsecret = config["mysqld_safe"]
    print(topsecret["log-error"])   # /usr/local/mysql/error.log
    
    print("遍历配置文件".center(50, "~"))
    for key in config["mysqld"]:
        print(key)
    
    # 返回元组格式的属性
    # [('port', '3306'), ('character_set_server', 'utf8'), ('collation-server', 'utf8_general_ci'), ('lower_case_table_names', '1'), ('max_connections', '200'), ('datadir', '/usr/local/mysql/data')]
    print(config.items("mysqld"))
    
    
    print(" 改写 ".center(50, "#"))
    # 删除mysqld后重新写入
    # sec = config.remove_section('mysqld') # 要删除的key
    # config.write(open('datas/my.ini', "w"))
    
    # # 判断一个节点是否存在
    # sec = config.has_section('mysqld')
    # print(sec)
    # # 添加一个节点,如果存在会报错
    # sec = config.add_section('logs')
    # config.write(open('datas/my.ini', "w"))
    
    
    # 新增logs节点下的log_path
    config.set('logs', 'log_path', "/usr/logs")
    config.write(open('datas/my.ini', "w"))
    

      

    Hashlib、Hmac模块

    # Author:Allister.Liu
    # !/usr/bin/env python
    
    import hashlib
    
    """
        hashlib模块:用于加密相关的操作,3.x里代替了md5模块和sha模块,主要提供 SHA1, SHA224, SHA256, SHA384, SHA512 ,MD5 算法。
    """
    m1 = hashlib.md5()
    m1.update("asdfghjkl".encode("utf-8"))
    
    # 2进制
    print(m1.digest())
    # 16进制
    print(m1.hexdigest())
    
    # ######## md5 ########
    print(" md5 ".center(50, "#"))
    hash = hashlib.md5()
    hash.update('admin'.encode("utf-8"))
    print(hash.hexdigest())
    
    # ######## sha1 ########
    print(" sha1 ".center(50, "#"))
    hash = hashlib.sha1()
    hash.update('admin'.encode("utf-8"))
    print(hash.hexdigest())
    
    # ######## sha256 ########
    print(" sha256 ".center(50, "#"))
    hash = hashlib.sha256()
    hash.update('admin'.encode("utf-8"))
    print(hash.hexdigest())
    
    # ######## sha384 ########
    print(" sha384 ".center(50, "#"))
    hash = hashlib.sha384()
    hash.update('admin'.encode("utf-8"))
    print(hash.hexdigest())
    
    # ######## sha512 ########
    print(" sha512 ".center(50, "#"))
    hash = hashlib.sha512()
    hash.update('admin'.encode("utf-8"))
    print(hash.hexdigest())
    
    
    """
        python 还有一个 hmac 模块,它内部对我们创建 key 和 内容 再进行处理然后再加密
        
        散列消息鉴别码,简称HMAC,是一种基于消息鉴别码MAC(Message Authentication Code)的鉴别机制。使用HMAC时,消息通讯的双方,通过验证消息中加入的鉴别密钥K来鉴别消息的真伪;
        
        一般用于网络通信中消息加密,前提是双方先要约定好key,就像接头暗号一样,然后消息发送把用key把消息加密,接收方用key + 消息明文再加密,拿加密后的值 跟 发送者的相对比是否相等,这样就能验证消息的真实性,及发送者的合法性了。
    """
    
    import hmac
    h = hmac.new('中华好儿女'.encode("utf-8"), '美丽的山河'.encode("utf-8"))
    print(h.hexdigest())
    

      

    zipfile&tarfile模块

    # Author:Allister.Liu
    # !/usr/bin/env python
    
    """
    zip解压缩
    
    """
    import zipfile
    # 压缩
    z = zipfile.ZipFile('Allister.zip', 'w')
    z.write('笔记.data')
    z.write('sys_test.py')
    z.close()
    
    # 解压
    z = zipfile.ZipFile('Allister.zip', 'r')
    z.extractall()
    z.close()
    
    
    """
        tar解压缩
    """
    
    import tarfile
    
    # 压缩
    tar = tarfile.open('your.tar', 'w')
    tar.add('/home/dsa.tools/mysql.zip', arcname='mysql.zip')
    tar.add('/Users/wupeiqi/PycharmProjects/cmdb.zip', arcname='cmdb.zip')
    tar.close()
    
    # 解压
    tar = tarfile.open('your.tar', 'r')
    tar.extractall()  # 可设置解压地址
    tar.close()
    

      

    a、zipfile

       1 """
       2 Read and write ZIP files.
       3 
       4 XXX references to utf-8 need further investigation.
       5 """
       6 import io
       7 import os
       8 import re
       9 import importlib.util
      10 import sys
      11 import time
      12 import stat
      13 import shutil
      14 import struct
      15 import binascii
      16 
      17 try:
      18     import threading
      19 except ImportError:
      20     import dummy_threading as threading
      21 
      22 try:
      23     import zlib # We may need its compression method
      24     crc32 = zlib.crc32
      25 except ImportError:
      26     zlib = None
      27     crc32 = binascii.crc32
      28 
      29 try:
      30     import bz2 # We may need its compression method
      31 except ImportError:
      32     bz2 = None
      33 
      34 try:
      35     import lzma # We may need its compression method
      36 except ImportError:
      37     lzma = None
      38 
      39 __all__ = ["BadZipFile", "BadZipfile", "error",
      40            "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
      41            "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
      42 
      43 class BadZipFile(Exception):
      44     pass
      45 
      46 
      47 class LargeZipFile(Exception):
      48     """
      49     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
      50     and those extensions are disabled.
      51     """
      52 
      53 error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
      54 
      55 
      56 ZIP64_LIMIT = (1 << 31) - 1
      57 ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
      58 ZIP_MAX_COMMENT = (1 << 16) - 1
      59 
      60 # constants for Zip file compression methods
      61 ZIP_STORED = 0
      62 ZIP_DEFLATED = 8
      63 ZIP_BZIP2 = 12
      64 ZIP_LZMA = 14
      65 # Other ZIP compression methods not supported
      66 
      67 DEFAULT_VERSION = 20
      68 ZIP64_VERSION = 45
      69 BZIP2_VERSION = 46
      70 LZMA_VERSION = 63
      71 # we recognize (but not necessarily support) all features up to that version
      72 MAX_EXTRACT_VERSION = 63
      73 
      74 # Below are some formats and associated data for reading/writing headers using
      75 # the struct module.  The names and structures of headers/records are those used
      76 # in the PKWARE description of the ZIP file format:
      77 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
      78 # (URL valid as of January 2008)
      79 
      80 # The "end of central directory" structure, magic number, size, and indices
      81 # (section V.I in the format document)
      82 structEndArchive = b"<4s4H2LH"
      83 stringEndArchive = b"PK0506"
      84 sizeEndCentDir = struct.calcsize(structEndArchive)
      85 
      86 _ECD_SIGNATURE = 0
      87 _ECD_DISK_NUMBER = 1
      88 _ECD_DISK_START = 2
      89 _ECD_ENTRIES_THIS_DISK = 3
      90 _ECD_ENTRIES_TOTAL = 4
      91 _ECD_SIZE = 5
      92 _ECD_OFFSET = 6
      93 _ECD_COMMENT_SIZE = 7
      94 # These last two indices are not part of the structure as defined in the
      95 # spec, but they are used internally by this module as a convenience
      96 _ECD_COMMENT = 8
      97 _ECD_LOCATION = 9
      98 
      99 # The "central directory" structure, magic number, size, and indices
     100 # of entries in the structure (section V.F in the format document)
     101 structCentralDir = "<4s4B4HL2L5H2L"
     102 stringCentralDir = b"PK0102"
     103 sizeCentralDir = struct.calcsize(structCentralDir)
     104 
     105 # indexes of entries in the central directory structure
     106 _CD_SIGNATURE = 0
     107 _CD_CREATE_VERSION = 1
     108 _CD_CREATE_SYSTEM = 2
     109 _CD_EXTRACT_VERSION = 3
     110 _CD_EXTRACT_SYSTEM = 4
     111 _CD_FLAG_BITS = 5
     112 _CD_COMPRESS_TYPE = 6
     113 _CD_TIME = 7
     114 _CD_DATE = 8
     115 _CD_CRC = 9
     116 _CD_COMPRESSED_SIZE = 10
     117 _CD_UNCOMPRESSED_SIZE = 11
     118 _CD_FILENAME_LENGTH = 12
     119 _CD_EXTRA_FIELD_LENGTH = 13
     120 _CD_COMMENT_LENGTH = 14
     121 _CD_DISK_NUMBER_START = 15
     122 _CD_INTERNAL_FILE_ATTRIBUTES = 16
     123 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
     124 _CD_LOCAL_HEADER_OFFSET = 18
     125 
     126 # The "local file header" structure, magic number, size, and indices
     127 # (section V.A in the format document)
     128 structFileHeader = "<4s2B4HL2L2H"
     129 stringFileHeader = b"PK0304"
     130 sizeFileHeader = struct.calcsize(structFileHeader)
     131 
     132 _FH_SIGNATURE = 0
     133 _FH_EXTRACT_VERSION = 1
     134 _FH_EXTRACT_SYSTEM = 2
     135 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
     136 _FH_COMPRESSION_METHOD = 4
     137 _FH_LAST_MOD_TIME = 5
     138 _FH_LAST_MOD_DATE = 6
     139 _FH_CRC = 7
     140 _FH_COMPRESSED_SIZE = 8
     141 _FH_UNCOMPRESSED_SIZE = 9
     142 _FH_FILENAME_LENGTH = 10
     143 _FH_EXTRA_FIELD_LENGTH = 11
     144 
     145 # The "Zip64 end of central directory locator" structure, magic number, and size
     146 structEndArchive64Locator = "<4sLQL"
     147 stringEndArchive64Locator = b"PKx06x07"
     148 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
     149 
     150 # The "Zip64 end of central directory" record, magic number, size, and indices
     151 # (section V.G in the format document)
     152 structEndArchive64 = "<4sQ2H2L4Q"
     153 stringEndArchive64 = b"PKx06x06"
     154 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
     155 
     156 _CD64_SIGNATURE = 0
     157 _CD64_DIRECTORY_RECSIZE = 1
     158 _CD64_CREATE_VERSION = 2
     159 _CD64_EXTRACT_VERSION = 3
     160 _CD64_DISK_NUMBER = 4
     161 _CD64_DISK_NUMBER_START = 5
     162 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
     163 _CD64_NUMBER_ENTRIES_TOTAL = 7
     164 _CD64_DIRECTORY_SIZE = 8
     165 _CD64_OFFSET_START_CENTDIR = 9
     166 
     167 def _check_zipfile(fp):
     168     try:
     169         if _EndRecData(fp):
     170             return True         # file has correct magic number
     171     except OSError:
     172         pass
     173     return False
     174 
     175 def is_zipfile(filename):
     176     """Quickly see if a file is a ZIP file by checking the magic number.
     177 
     178     The filename argument may be a file or file-like object too.
     179     """
     180     result = False
     181     try:
     182         if hasattr(filename, "read"):
     183             result = _check_zipfile(fp=filename)
     184         else:
     185             with open(filename, "rb") as fp:
     186                 result = _check_zipfile(fp)
     187     except OSError:
     188         pass
     189     return result
     190 
     191 def _EndRecData64(fpin, offset, endrec):
     192     """
     193     Read the ZIP64 end-of-archive records and use that to update endrec
     194     """
     195     try:
     196         fpin.seek(offset - sizeEndCentDir64Locator, 2)
     197     except OSError:
     198         # If the seek fails, the file is not large enough to contain a ZIP64
     199         # end-of-archive record, so just return the end record we were given.
     200         return endrec
     201 
     202     data = fpin.read(sizeEndCentDir64Locator)
     203     if len(data) != sizeEndCentDir64Locator:
     204         return endrec
     205     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
     206     if sig != stringEndArchive64Locator:
     207         return endrec
     208 
     209     if diskno != 0 or disks != 1:
     210         raise BadZipFile("zipfiles that span multiple disks are not supported")
     211 
     212     # Assume no 'zip64 extensible data'
     213     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
     214     data = fpin.read(sizeEndCentDir64)
     215     if len(data) != sizeEndCentDir64:
     216         return endrec
     217     sig, sz, create_version, read_version, disk_num, disk_dir, 
     218         dircount, dircount2, dirsize, diroffset = 
     219         struct.unpack(structEndArchive64, data)
     220     if sig != stringEndArchive64:
     221         return endrec
     222 
     223     # Update the original endrec using data from the ZIP64 record
     224     endrec[_ECD_SIGNATURE] = sig
     225     endrec[_ECD_DISK_NUMBER] = disk_num
     226     endrec[_ECD_DISK_START] = disk_dir
     227     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
     228     endrec[_ECD_ENTRIES_TOTAL] = dircount2
     229     endrec[_ECD_SIZE] = dirsize
     230     endrec[_ECD_OFFSET] = diroffset
     231     return endrec
     232 
     233 
     234 def _EndRecData(fpin):
     235     """Return data from the "End of Central Directory" record, or None.
     236 
     237     The data is a list of the nine items in the ZIP "End of central dir"
     238     record followed by a tenth item, the file seek offset of this record."""
     239 
     240     # Determine file size
     241     fpin.seek(0, 2)
     242     filesize = fpin.tell()
     243 
     244     # Check to see if this is ZIP file with no archive comment (the
     245     # "end of central directory" structure should be the last item in the
     246     # file if this is the case).
     247     try:
     248         fpin.seek(-sizeEndCentDir, 2)
     249     except OSError:
     250         return None
     251     data = fpin.read()
     252     if (len(data) == sizeEndCentDir and
     253         data[0:4] == stringEndArchive and
     254         data[-2:] == b"0000"):
     255         # the signature is correct and there's no comment, unpack structure
     256         endrec = struct.unpack(structEndArchive, data)
     257         endrec=list(endrec)
     258 
     259         # Append a blank comment and record start offset
     260         endrec.append(b"")
     261         endrec.append(filesize - sizeEndCentDir)
     262 
     263         # Try to read the "Zip64 end of central directory" structure
     264         return _EndRecData64(fpin, -sizeEndCentDir, endrec)
     265 
     266     # Either this is not a ZIP file, or it is a ZIP file with an archive
     267     # comment.  Search the end of the file for the "end of central directory"
     268     # record signature. The comment is the last item in the ZIP file and may be
     269     # up to 64K long.  It is assumed that the "end of central directory" magic
     270     # number does not appear in the comment.
     271     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
     272     fpin.seek(maxCommentStart, 0)
     273     data = fpin.read()
     274     start = data.rfind(stringEndArchive)
     275     if start >= 0:
     276         # found the magic number; attempt to unpack and interpret
     277         recData = data[start:start+sizeEndCentDir]
     278         if len(recData) != sizeEndCentDir:
     279             # Zip file is corrupted.
     280             return None
     281         endrec = list(struct.unpack(structEndArchive, recData))
     282         commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
     283         comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
     284         endrec.append(comment)
     285         endrec.append(maxCommentStart + start)
     286 
     287         # Try to read the "Zip64 end of central directory" structure
     288         return _EndRecData64(fpin, maxCommentStart + start - filesize,
     289                              endrec)
     290 
     291     # Unable to find a valid end of central directory structure
     292     return None
     293 
     294 
     295 class ZipInfo (object):
     296     """Class with attributes describing each file in the ZIP archive."""
     297 
     298     __slots__ = (
     299         'orig_filename',
     300         'filename',
     301         'date_time',
     302         'compress_type',
     303         'comment',
     304         'extra',
     305         'create_system',
     306         'create_version',
     307         'extract_version',
     308         'reserved',
     309         'flag_bits',
     310         'volume',
     311         'internal_attr',
     312         'external_attr',
     313         'header_offset',
     314         'CRC',
     315         'compress_size',
     316         'file_size',
     317         '_raw_time',
     318     )
     319 
     320     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
     321         self.orig_filename = filename   # Original file name in archive
     322 
     323         # Terminate the file name at the first null byte.  Null bytes in file
     324         # names are used as tricks by viruses in archives.
     325         null_byte = filename.find(chr(0))
     326         if null_byte >= 0:
     327             filename = filename[0:null_byte]
     328         # This is used to ensure paths in generated ZIP files always use
     329         # forward slashes as the directory separator, as required by the
     330         # ZIP format specification.
     331         if os.sep != "/" and os.sep in filename:
     332             filename = filename.replace(os.sep, "/")
     333 
     334         self.filename = filename        # Normalized file name
     335         self.date_time = date_time      # year, month, day, hour, min, sec
     336 
     337         if date_time[0] < 1980:
     338             raise ValueError('ZIP does not support timestamps before 1980')
     339 
     340         # Standard values:
     341         self.compress_type = ZIP_STORED # Type of compression for the file
     342         self.comment = b""              # Comment for each file
     343         self.extra = b""                # ZIP extra data
     344         if sys.platform == 'win32':
     345             self.create_system = 0          # System which created ZIP archive
     346         else:
     347             # Assume everything else is unix-y
     348             self.create_system = 3          # System which created ZIP archive
     349         self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
     350         self.extract_version = DEFAULT_VERSION # Version needed to extract archive
     351         self.reserved = 0               # Must be zero
     352         self.flag_bits = 0              # ZIP flag bits
     353         self.volume = 0                 # Volume number of file header
     354         self.internal_attr = 0          # Internal attributes
     355         self.external_attr = 0          # External file attributes
     356         # Other attributes are set by class ZipFile:
     357         # header_offset         Byte offset to the file header
     358         # CRC                   CRC-32 of the uncompressed file
     359         # compress_size         Size of the compressed file
     360         # file_size             Size of the uncompressed file
     361 
     362     def __repr__(self):
     363         result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
     364         if self.compress_type != ZIP_STORED:
     365             result.append(' compress_type=%s' %
     366                           compressor_names.get(self.compress_type,
     367                                                self.compress_type))
     368         hi = self.external_attr >> 16
     369         lo = self.external_attr & 0xFFFF
     370         if hi:
     371             result.append(' filemode=%r' % stat.filemode(hi))
     372         if lo:
     373             result.append(' external_attr=%#x' % lo)
     374         isdir = self.filename[-1:] == '/'
     375         if not isdir or self.file_size:
     376             result.append(' file_size=%r' % self.file_size)
     377         if ((not isdir or self.compress_size) and
     378             (self.compress_type != ZIP_STORED or
     379              self.file_size != self.compress_size)):
     380             result.append(' compress_size=%r' % self.compress_size)
     381         result.append('>')
     382         return ''.join(result)
     383 
     384     def FileHeader(self, zip64=None):
     385         """Return the per-file header as a string."""
     386         dt = self.date_time
     387         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
     388         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
     389         if self.flag_bits & 0x08:
     390             # Set these to zero because we write them after the file data
     391             CRC = compress_size = file_size = 0
     392         else:
     393             CRC = self.CRC
     394             compress_size = self.compress_size
     395             file_size = self.file_size
     396 
     397         extra = self.extra
     398 
     399         min_version = 0
     400         if zip64 is None:
     401             zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
     402         if zip64:
     403             fmt = '<HHQQ'
     404             extra = extra + struct.pack(fmt,
     405                                         1, struct.calcsize(fmt)-4, file_size, compress_size)
     406         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
     407             if not zip64:
     408                 raise LargeZipFile("Filesize would require ZIP64 extensions")
     409             # File is larger than what fits into a 4 byte integer,
     410             # fall back to the ZIP64 extension
     411             file_size = 0xffffffff
     412             compress_size = 0xffffffff
     413             min_version = ZIP64_VERSION
     414 
     415         if self.compress_type == ZIP_BZIP2:
     416             min_version = max(BZIP2_VERSION, min_version)
     417         elif self.compress_type == ZIP_LZMA:
     418             min_version = max(LZMA_VERSION, min_version)
     419 
     420         self.extract_version = max(min_version, self.extract_version)
     421         self.create_version = max(min_version, self.create_version)
     422         filename, flag_bits = self._encodeFilenameFlags()
     423         header = struct.pack(structFileHeader, stringFileHeader,
     424                              self.extract_version, self.reserved, flag_bits,
     425                              self.compress_type, dostime, dosdate, CRC,
     426                              compress_size, file_size,
     427                              len(filename), len(extra))
     428         return header + filename + extra
     429 
     430     def _encodeFilenameFlags(self):
     431         try:
     432             return self.filename.encode('ascii'), self.flag_bits
     433         except UnicodeEncodeError:
     434             return self.filename.encode('utf-8'), self.flag_bits | 0x800
     435 
     436     def _decodeExtra(self):
     437         # Try to decode the extra field.
     438         extra = self.extra
     439         unpack = struct.unpack
     440         while len(extra) >= 4:
     441             tp, ln = unpack('<HH', extra[:4])
     442             if tp == 1:
     443                 if ln >= 24:
     444                     counts = unpack('<QQQ', extra[4:28])
     445                 elif ln == 16:
     446                     counts = unpack('<QQ', extra[4:20])
     447                 elif ln == 8:
     448                     counts = unpack('<Q', extra[4:12])
     449                 elif ln == 0:
     450                     counts = ()
     451                 else:
     452                     raise RuntimeError("Corrupt extra field %s"%(ln,))
     453 
     454                 idx = 0
     455 
     456                 # ZIP64 extension (large files and/or large archives)
     457                 if self.file_size in (0xffffffffffffffff, 0xffffffff):
     458                     self.file_size = counts[idx]
     459                     idx += 1
     460 
     461                 if self.compress_size == 0xFFFFFFFF:
     462                     self.compress_size = counts[idx]
     463                     idx += 1
     464 
     465                 if self.header_offset == 0xffffffff:
     466                     old = self.header_offset
     467                     self.header_offset = counts[idx]
     468                     idx+=1
     469 
     470             extra = extra[ln+4:]
     471 
     472 
     473 class _ZipDecrypter:
     474     """Class to handle decryption of files stored within a ZIP archive.
     475 
     476     ZIP supports a password-based form of encryption. Even though known
     477     plaintext attacks have been found against it, it is still useful
     478     to be able to get data out of such a file.
     479 
     480     Usage:
     481         zd = _ZipDecrypter(mypwd)
     482         plain_char = zd(cypher_char)
     483         plain_text = map(zd, cypher_text)
     484     """
     485 
     486     def _GenerateCRCTable():
     487         """Generate a CRC-32 table.
     488 
     489         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
     490         internal keys. We noticed that a direct implementation is faster than
     491         relying on binascii.crc32().
     492         """
     493         poly = 0xedb88320
     494         table = [0] * 256
     495         for i in range(256):
     496             crc = i
     497             for j in range(8):
     498                 if crc & 1:
     499                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
     500                 else:
     501                     crc = ((crc >> 1) & 0x7FFFFFFF)
     502             table[i] = crc
     503         return table
     504     crctable = None
     505 
     506     def _crc32(self, ch, crc):
     507         """Compute the CRC32 primitive on one byte."""
     508         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
     509 
     510     def __init__(self, pwd):
     511         if _ZipDecrypter.crctable is None:
     512             _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
     513         self.key0 = 305419896
     514         self.key1 = 591751049
     515         self.key2 = 878082192
     516         for p in pwd:
     517             self._UpdateKeys(p)
     518 
     519     def _UpdateKeys(self, c):
     520         self.key0 = self._crc32(c, self.key0)
     521         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
     522         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
     523         self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
     524 
     525     def __call__(self, c):
     526         """Decrypt a single character."""
     527         assert isinstance(c, int)
     528         k = self.key2 | 2
     529         c = c ^ (((k * (k^1)) >> 8) & 255)
     530         self._UpdateKeys(c)
     531         return c
     532 
     533 
     534 class LZMACompressor:
     535 
     536     def __init__(self):
     537         self._comp = None
     538 
     539     def _init(self):
     540         props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
     541         self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
     542             lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
     543         ])
     544         return struct.pack('<BBH', 9, 4, len(props)) + props
     545 
     546     def compress(self, data):
     547         if self._comp is None:
     548             return self._init() + self._comp.compress(data)
     549         return self._comp.compress(data)
     550 
     551     def flush(self):
     552         if self._comp is None:
     553             return self._init() + self._comp.flush()
     554         return self._comp.flush()
     555 
     556 
     557 class LZMADecompressor:
     558 
     559     def __init__(self):
     560         self._decomp = None
     561         self._unconsumed = b''
     562         self.eof = False
     563 
     564     def decompress(self, data):
     565         if self._decomp is None:
     566             self._unconsumed += data
     567             if len(self._unconsumed) <= 4:
     568                 return b''
     569             psize, = struct.unpack('<H', self._unconsumed[2:4])
     570             if len(self._unconsumed) <= 4 + psize:
     571                 return b''
     572 
     573             self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
     574                 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
     575                                                self._unconsumed[4:4 + psize])
     576             ])
     577             data = self._unconsumed[4 + psize:]
     578             del self._unconsumed
     579 
     580         result = self._decomp.decompress(data)
     581         self.eof = self._decomp.eof
     582         return result
     583 
     584 
     585 compressor_names = {
     586     0: 'store',
     587     1: 'shrink',
     588     2: 'reduce',
     589     3: 'reduce',
     590     4: 'reduce',
     591     5: 'reduce',
     592     6: 'implode',
     593     7: 'tokenize',
     594     8: 'deflate',
     595     9: 'deflate64',
     596     10: 'implode',
     597     12: 'bzip2',
     598     14: 'lzma',
     599     18: 'terse',
     600     19: 'lz77',
     601     97: 'wavpack',
     602     98: 'ppmd',
     603 }
     604 
     605 def _check_compression(compression):
     606     if compression == ZIP_STORED:
     607         pass
     608     elif compression == ZIP_DEFLATED:
     609         if not zlib:
     610             raise RuntimeError(
     611                 "Compression requires the (missing) zlib module")
     612     elif compression == ZIP_BZIP2:
     613         if not bz2:
     614             raise RuntimeError(
     615                 "Compression requires the (missing) bz2 module")
     616     elif compression == ZIP_LZMA:
     617         if not lzma:
     618             raise RuntimeError(
     619                 "Compression requires the (missing) lzma module")
     620     else:
     621         raise RuntimeError("That compression method is not supported")
     622 
     623 
     624 def _get_compressor(compress_type):
     625     if compress_type == ZIP_DEFLATED:
     626         return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
     627                                 zlib.DEFLATED, -15)
     628     elif compress_type == ZIP_BZIP2:
     629         return bz2.BZ2Compressor()
     630     elif compress_type == ZIP_LZMA:
     631         return LZMACompressor()
     632     else:
     633         return None
     634 
     635 
     636 def _get_decompressor(compress_type):
     637     if compress_type == ZIP_STORED:
     638         return None
     639     elif compress_type == ZIP_DEFLATED:
     640         return zlib.decompressobj(-15)
     641     elif compress_type == ZIP_BZIP2:
     642         return bz2.BZ2Decompressor()
     643     elif compress_type == ZIP_LZMA:
     644         return LZMADecompressor()
     645     else:
     646         descr = compressor_names.get(compress_type)
     647         if descr:
     648             raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
     649         else:
     650             raise NotImplementedError("compression type %d" % (compress_type,))
     651 
     652 
     653 class _SharedFile:
     654     def __init__(self, file, pos, close, lock):
     655         self._file = file
     656         self._pos = pos
     657         self._close = close
     658         self._lock = lock
     659 
     660     def read(self, n=-1):
     661         with self._lock:
     662             self._file.seek(self._pos)
     663             data = self._file.read(n)
     664             self._pos = self._file.tell()
     665             return data
     666 
     667     def close(self):
     668         if self._file is not None:
     669             fileobj = self._file
     670             self._file = None
     671             self._close(fileobj)
     672 
     673 # Provide the tell method for unseekable stream
     674 class _Tellable:
     675     def __init__(self, fp):
     676         self.fp = fp
     677         self.offset = 0
     678 
     679     def write(self, data):
     680         n = self.fp.write(data)
     681         self.offset += n
     682         return n
     683 
     684     def tell(self):
     685         return self.offset
     686 
     687     def flush(self):
     688         self.fp.flush()
     689 
     690     def close(self):
     691         self.fp.close()
     692 
     693 
     694 class ZipExtFile(io.BufferedIOBase):
     695     """File-like object for reading an archive member.
     696        Is returned by ZipFile.open().
     697     """
     698 
     699     # Max size supported by decompressor.
     700     MAX_N = 1 << 31 - 1
     701 
     702     # Read from compressed files in 4k blocks.
     703     MIN_READ_SIZE = 4096
     704 
     705     # Search for universal newlines or line chunks.
     706     PATTERN = re.compile(br'^(?P<chunk>[^
    ]+)|(?P<newline>
    |
    ?)')
     707 
     708     def __init__(self, fileobj, mode, zipinfo, decrypter=None,
     709                  close_fileobj=False):
     710         self._fileobj = fileobj
     711         self._decrypter = decrypter
     712         self._close_fileobj = close_fileobj
     713 
     714         self._compress_type = zipinfo.compress_type
     715         self._compress_left = zipinfo.compress_size
     716         self._left = zipinfo.file_size
     717 
     718         self._decompressor = _get_decompressor(self._compress_type)
     719 
     720         self._eof = False
     721         self._readbuffer = b''
     722         self._offset = 0
     723 
     724         self._universal = 'U' in mode
     725         self.newlines = None
     726 
     727         # Adjust read size for encrypted files since the first 12 bytes
     728         # are for the encryption/password information.
     729         if self._decrypter is not None:
     730             self._compress_left -= 12
     731 
     732         self.mode = mode
     733         self.name = zipinfo.filename
     734 
     735         if hasattr(zipinfo, 'CRC'):
     736             self._expected_crc = zipinfo.CRC
     737             self._running_crc = crc32(b'')
     738         else:
     739             self._expected_crc = None
     740 
     741     def __repr__(self):
     742         result = ['<%s.%s' % (self.__class__.__module__,
     743                               self.__class__.__qualname__)]
     744         if not self.closed:
     745             result.append(' name=%r mode=%r' % (self.name, self.mode))
     746             if self._compress_type != ZIP_STORED:
     747                 result.append(' compress_type=%s' %
     748                               compressor_names.get(self._compress_type,
     749                                                    self._compress_type))
     750         else:
     751             result.append(' [closed]')
     752         result.append('>')
     753         return ''.join(result)
     754 
     755     def readline(self, limit=-1):
     756         """Read and return a line from the stream.
     757 
     758         If limit is specified, at most limit bytes will be read.
     759         """
     760 
     761         if not self._universal and limit < 0:
     762             # Shortcut common case - newline found in buffer.
     763             i = self._readbuffer.find(b'
    ', self._offset) + 1
     764             if i > 0:
     765                 line = self._readbuffer[self._offset: i]
     766                 self._offset = i
     767                 return line
     768 
     769         if not self._universal:
     770             return io.BufferedIOBase.readline(self, limit)
     771 
     772         line = b''
     773         while limit < 0 or len(line) < limit:
     774             readahead = self.peek(2)
     775             if readahead == b'':
     776                 return line
     777 
     778             #
     779             # Search for universal newlines or line chunks.
     780             #
     781             # The pattern returns either a line chunk or a newline, but not
     782             # both. Combined with peek(2), we are assured that the sequence
     783             # '
    ' is always retrieved completely and never split into
     784             # separate newlines - '
    ', '
    ' due to coincidental readaheads.
     785             #
     786             match = self.PATTERN.search(readahead)
     787             newline = match.group('newline')
     788             if newline is not None:
     789                 if self.newlines is None:
     790                     self.newlines = []
     791                 if newline not in self.newlines:
     792                     self.newlines.append(newline)
     793                 self._offset += len(newline)
     794                 return line + b'
    '
     795 
     796             chunk = match.group('chunk')
     797             if limit >= 0:
     798                 chunk = chunk[: limit - len(line)]
     799 
     800             self._offset += len(chunk)
     801             line += chunk
     802 
     803         return line
     804 
     805     def peek(self, n=1):
     806         """Returns buffered bytes without advancing the position."""
     807         if n > len(self._readbuffer) - self._offset:
     808             chunk = self.read(n)
     809             if len(chunk) > self._offset:
     810                 self._readbuffer = chunk + self._readbuffer[self._offset:]
     811                 self._offset = 0
     812             else:
     813                 self._offset -= len(chunk)
     814 
     815         # Return up to 512 bytes to reduce allocation overhead for tight loops.
     816         return self._readbuffer[self._offset: self._offset + 512]
     817 
     818     def readable(self):
     819         return True
     820 
     821     def read(self, n=-1):
     822         """Read and return up to n bytes.
     823         If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
     824         """
     825         if n is None or n < 0:
     826             buf = self._readbuffer[self._offset:]
     827             self._readbuffer = b''
     828             self._offset = 0
     829             while not self._eof:
     830                 buf += self._read1(self.MAX_N)
     831             return buf
     832 
     833         end = n + self._offset
     834         if end < len(self._readbuffer):
     835             buf = self._readbuffer[self._offset:end]
     836             self._offset = end
     837             return buf
     838 
     839         n = end - len(self._readbuffer)
     840         buf = self._readbuffer[self._offset:]
     841         self._readbuffer = b''
     842         self._offset = 0
     843         while n > 0 and not self._eof:
     844             data = self._read1(n)
     845             if n < len(data):
     846                 self._readbuffer = data
     847                 self._offset = n
     848                 buf += data[:n]
     849                 break
     850             buf += data
     851             n -= len(data)
     852         return buf
     853 
     854     def _update_crc(self, newdata):
     855         # Update the CRC using the given data.
     856         if self._expected_crc is None:
     857             # No need to compute the CRC if we don't have a reference value
     858             return
     859         self._running_crc = crc32(newdata, self._running_crc)
     860         # Check the CRC if we're at the end of the file
     861         if self._eof and self._running_crc != self._expected_crc:
     862             raise BadZipFile("Bad CRC-32 for file %r" % self.name)
     863 
     864     def read1(self, n):
     865         """Read up to n bytes with at most one read() system call."""
     866 
     867         if n is None or n < 0:
     868             buf = self._readbuffer[self._offset:]
     869             self._readbuffer = b''
     870             self._offset = 0
     871             while not self._eof:
     872                 data = self._read1(self.MAX_N)
     873                 if data:
     874                     buf += data
     875                     break
     876             return buf
     877 
     878         end = n + self._offset
     879         if end < len(self._readbuffer):
     880             buf = self._readbuffer[self._offset:end]
     881             self._offset = end
     882             return buf
     883 
     884         n = end - len(self._readbuffer)
     885         buf = self._readbuffer[self._offset:]
     886         self._readbuffer = b''
     887         self._offset = 0
     888         if n > 0:
     889             while not self._eof:
     890                 data = self._read1(n)
     891                 if n < len(data):
     892                     self._readbuffer = data
     893                     self._offset = n
     894                     buf += data[:n]
     895                     break
     896                 if data:
     897                     buf += data
     898                     break
     899         return buf
     900 
     901     def _read1(self, n):
     902         # Read up to n compressed bytes with at most one read() system call,
     903         # decrypt and decompress them.
     904         if self._eof or n <= 0:
     905             return b''
     906 
     907         # Read from file.
     908         if self._compress_type == ZIP_DEFLATED:
     909             ## Handle unconsumed data.
     910             data = self._decompressor.unconsumed_tail
     911             if n > len(data):
     912                 data += self._read2(n - len(data))
     913         else:
     914             data = self._read2(n)
     915 
     916         if self._compress_type == ZIP_STORED:
     917             self._eof = self._compress_left <= 0
     918         elif self._compress_type == ZIP_DEFLATED:
     919             n = max(n, self.MIN_READ_SIZE)
     920             data = self._decompressor.decompress(data, n)
     921             self._eof = (self._decompressor.eof or
     922                          self._compress_left <= 0 and
     923                          not self._decompressor.unconsumed_tail)
     924             if self._eof:
     925                 data += self._decompressor.flush()
     926         else:
     927             data = self._decompressor.decompress(data)
     928             self._eof = self._decompressor.eof or self._compress_left <= 0
     929 
     930         data = data[:self._left]
     931         self._left -= len(data)
     932         if self._left <= 0:
     933             self._eof = True
     934         self._update_crc(data)
     935         return data
     936 
     937     def _read2(self, n):
     938         if self._compress_left <= 0:
     939             return b''
     940 
     941         n = max(n, self.MIN_READ_SIZE)
     942         n = min(n, self._compress_left)
     943 
     944         data = self._fileobj.read(n)
     945         self._compress_left -= len(data)
     946         if not data:
     947             raise EOFError
     948 
     949         if self._decrypter is not None:
     950             data = bytes(map(self._decrypter, data))
     951         return data
     952 
     953     def close(self):
     954         try:
     955             if self._close_fileobj:
     956                 self._fileobj.close()
     957         finally:
     958             super().close()
     959 
     960 
     961 class ZipFile:
     962     """ Class with methods to open, read, write, close, list zip files.
     963 
     964     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
     965 
     966     file: Either the path to the file, or a file-like object.
     967           If it is a path, the file will be opened and closed by ZipFile.
     968     mode: The mode can be either read 'r', write 'w', exclusive create 'x',
     969           or append 'a'.
     970     compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
     971                  ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
     972     allowZip64: if True ZipFile will create files with ZIP64 extensions when
     973                 needed, otherwise it will raise an exception when this would
     974                 be necessary.
     975 
     976     """
     977 
     978     fp = None                   # Set here since __del__ checks it
     979     _windows_illegal_name_trans_table = None
     980 
     981     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
     982         """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
     983         or append 'a'."""
     984         if mode not in ('r', 'w', 'x', 'a'):
     985             raise RuntimeError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
     986 
     987         _check_compression(compression)
     988 
     989         self._allowZip64 = allowZip64
     990         self._didModify = False
     991         self.debug = 0  # Level of printing: 0 through 3
     992         self.NameToInfo = {}    # Find file info given name
     993         self.filelist = []      # List of ZipInfo instances for archive
     994         self.compression = compression  # Method of compression
     995         self.mode = mode
     996         self.pwd = None
     997         self._comment = b''
     998 
     999         # Check if we were passed a file-like object
    1000         if isinstance(file, str):
    1001             # No, it's a filename
    1002             self._filePassed = 0
    1003             self.filename = file
    1004             modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
    1005                         'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
    1006             filemode = modeDict[mode]
    1007             while True:
    1008                 try:
    1009                     self.fp = io.open(file, filemode)
    1010                 except OSError:
    1011                     if filemode in modeDict:
    1012                         filemode = modeDict[filemode]
    1013                         continue
    1014                     raise
    1015                 break
    1016         else:
    1017             self._filePassed = 1
    1018             self.fp = file
    1019             self.filename = getattr(file, 'name', None)
    1020         self._fileRefCnt = 1
    1021         self._lock = threading.RLock()
    1022         self._seekable = True
    1023 
    1024         try:
    1025             if mode == 'r':
    1026                 self._RealGetContents()
    1027             elif mode in ('w', 'x'):
    1028                 # set the modified flag so central directory gets written
    1029                 # even if no files are added to the archive
    1030                 self._didModify = True
    1031                 try:
    1032                     self.start_dir = self.fp.tell()
    1033                 except (AttributeError, OSError):
    1034                     self.fp = _Tellable(self.fp)
    1035                     self.start_dir = 0
    1036                     self._seekable = False
    1037                 else:
    1038                     # Some file-like objects can provide tell() but not seek()
    1039                     try:
    1040                         self.fp.seek(self.start_dir)
    1041                     except (AttributeError, OSError):
    1042                         self._seekable = False
    1043             elif mode == 'a':
    1044                 try:
    1045                     # See if file is a zip file
    1046                     self._RealGetContents()
    1047                     # seek to start of directory and overwrite
    1048                     self.fp.seek(self.start_dir)
    1049                 except BadZipFile:
    1050                     # file is not a zip file, just append
    1051                     self.fp.seek(0, 2)
    1052 
    1053                     # set the modified flag so central directory gets written
    1054                     # even if no files are added to the archive
    1055                     self._didModify = True
    1056                     self.start_dir = self.fp.tell()
    1057             else:
    1058                 raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'")
    1059         except:
    1060             fp = self.fp
    1061             self.fp = None
    1062             self._fpclose(fp)
    1063             raise
    1064 
    1065     def __enter__(self):
    1066         return self
    1067 
    1068     def __exit__(self, type, value, traceback):
    1069         self.close()
    1070 
    1071     def __repr__(self):
    1072         result = ['<%s.%s' % (self.__class__.__module__,
    1073                               self.__class__.__qualname__)]
    1074         if self.fp is not None:
    1075             if self._filePassed:
    1076                 result.append(' file=%r' % self.fp)
    1077             elif self.filename is not None:
    1078                 result.append(' filename=%r' % self.filename)
    1079             result.append(' mode=%r' % self.mode)
    1080         else:
    1081             result.append(' [closed]')
    1082         result.append('>')
    1083         return ''.join(result)
    1084 
    1085     def _RealGetContents(self):
    1086         """Read in the table of contents for the ZIP file."""
    1087         fp = self.fp
    1088         try:
    1089             endrec = _EndRecData(fp)
    1090         except OSError:
    1091             raise BadZipFile("File is not a zip file")
    1092         if not endrec:
    1093             raise BadZipFile("File is not a zip file")
    1094         if self.debug > 1:
    1095             print(endrec)
    1096         size_cd = endrec[_ECD_SIZE]             # bytes in central directory
    1097         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
    1098         self._comment = endrec[_ECD_COMMENT]    # archive comment
    1099 
    1100         # "concat" is zero, unless zip was concatenated to another file
    1101         concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
    1102         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
    1103             # If Zip64 extension structures are present, account for them
    1104             concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
    1105 
    1106         if self.debug > 2:
    1107             inferred = concat + offset_cd
    1108             print("given, inferred, offset", offset_cd, inferred, concat)
    1109         # self.start_dir:  Position of start of central directory
    1110         self.start_dir = offset_cd + concat
    1111         fp.seek(self.start_dir, 0)
    1112         data = fp.read(size_cd)
    1113         fp = io.BytesIO(data)
    1114         total = 0
    1115         while total < size_cd:
    1116             centdir = fp.read(sizeCentralDir)
    1117             if len(centdir) != sizeCentralDir:
    1118                 raise BadZipFile("Truncated central directory")
    1119             centdir = struct.unpack(structCentralDir, centdir)
    1120             if centdir[_CD_SIGNATURE] != stringCentralDir:
    1121                 raise BadZipFile("Bad magic number for central directory")
    1122             if self.debug > 2:
    1123                 print(centdir)
    1124             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
    1125             flags = centdir[5]
    1126             if flags & 0x800:
    1127                 # UTF-8 file names extension
    1128                 filename = filename.decode('utf-8')
    1129             else:
    1130                 # Historical ZIP filename encoding
    1131                 filename = filename.decode('cp437')
    1132             # Create ZipInfo instance to store file information
    1133             x = ZipInfo(filename)
    1134             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
    1135             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
    1136             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
    1137             (x.create_version, x.create_system, x.extract_version, x.reserved,
    1138              x.flag_bits, x.compress_type, t, d,
    1139              x.CRC, x.compress_size, x.file_size) = centdir[1:12]
    1140             if x.extract_version > MAX_EXTRACT_VERSION:
    1141                 raise NotImplementedError("zip file version %.1f" %
    1142                                           (x.extract_version / 10))
    1143             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
    1144             # Convert date/time code to (year, month, day, hour, min, sec)
    1145             x._raw_time = t
    1146             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
    1147                             t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
    1148 
    1149             x._decodeExtra()
    1150             x.header_offset = x.header_offset + concat
    1151             self.filelist.append(x)
    1152             self.NameToInfo[x.filename] = x
    1153 
    1154             # update total bytes read from central directory
    1155             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
    1156                      + centdir[_CD_EXTRA_FIELD_LENGTH]
    1157                      + centdir[_CD_COMMENT_LENGTH])
    1158 
    1159             if self.debug > 2:
    1160                 print("total", total)
    1161 
    1162 
    1163     def namelist(self):
    1164         """Return a list of file names in the archive."""
    1165         return [data.filename for data in self.filelist]
    1166 
    1167     def infolist(self):
    1168         """Return a list of class ZipInfo instances for files in the
    1169         archive."""
    1170         return self.filelist
    1171 
    1172     def printdir(self, file=None):
    1173         """Print a table of contents for the zip file."""
    1174         print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
    1175               file=file)
    1176         for zinfo in self.filelist:
    1177             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
    1178             print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
    1179                   file=file)
    1180 
    1181     def testzip(self):
    1182         """Read all the files and check the CRC."""
    1183         chunk_size = 2 ** 20
    1184         for zinfo in self.filelist:
    1185             try:
    1186                 # Read by chunks, to avoid an OverflowError or a
    1187                 # MemoryError with very large embedded files.
    1188                 with self.open(zinfo.filename, "r") as f:
    1189                     while f.read(chunk_size):     # Check CRC-32
    1190                         pass
    1191             except BadZipFile:
    1192                 return zinfo.filename
    1193 
    1194     def getinfo(self, name):
    1195         """Return the instance of ZipInfo given 'name'."""
    1196         info = self.NameToInfo.get(name)
    1197         if info is None:
    1198             raise KeyError(
    1199                 'There is no item named %r in the archive' % name)
    1200 
    1201         return info
    1202 
    1203     def setpassword(self, pwd):
    1204         """Set default password for encrypted files."""
    1205         if pwd and not isinstance(pwd, bytes):
    1206             raise TypeError("pwd: expected bytes, got %s" % type(pwd))
    1207         if pwd:
    1208             self.pwd = pwd
    1209         else:
    1210             self.pwd = None
    1211 
    1212     @property
    1213     def comment(self):
    1214         """The comment text associated with the ZIP file."""
    1215         return self._comment
    1216 
    1217     @comment.setter
    1218     def comment(self, comment):
    1219         if not isinstance(comment, bytes):
    1220             raise TypeError("comment: expected bytes, got %s" % type(comment))
    1221         # check for valid comment length
    1222         if len(comment) > ZIP_MAX_COMMENT:
    1223             import warnings
    1224             warnings.warn('Archive comment is too long; truncating to %d bytes'
    1225                           % ZIP_MAX_COMMENT, stacklevel=2)
    1226             comment = comment[:ZIP_MAX_COMMENT]
    1227         self._comment = comment
    1228         self._didModify = True
    1229 
    1230     def read(self, name, pwd=None):
    1231         """Return file bytes (as a string) for name."""
    1232         with self.open(name, "r", pwd) as fp:
    1233             return fp.read()
    1234 
    1235     def open(self, name, mode="r", pwd=None):
    1236         """Return file-like object for 'name'."""
    1237         if mode not in ("r", "U", "rU"):
    1238             raise RuntimeError('open() requires mode "r", "U", or "rU"')
    1239         if 'U' in mode:
    1240             import warnings
    1241             warnings.warn("'U' mode is deprecated",
    1242                           DeprecationWarning, 2)
    1243         if pwd and not isinstance(pwd, bytes):
    1244             raise TypeError("pwd: expected bytes, got %s" % type(pwd))
    1245         if not self.fp:
    1246             raise RuntimeError(
    1247                 "Attempt to read ZIP archive that was already closed")
    1248 
    1249         # Make sure we have an info object
    1250         if isinstance(name, ZipInfo):
    1251             # 'name' is already an info object
    1252             zinfo = name
    1253         else:
    1254             # Get info object for name
    1255             zinfo = self.getinfo(name)
    1256 
    1257         self._fileRefCnt += 1
    1258         zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
    1259         try:
    1260             # Skip the file header:
    1261             fheader = zef_file.read(sizeFileHeader)
    1262             if len(fheader) != sizeFileHeader:
    1263                 raise BadZipFile("Truncated file header")
    1264             fheader = struct.unpack(structFileHeader, fheader)
    1265             if fheader[_FH_SIGNATURE] != stringFileHeader:
    1266                 raise BadZipFile("Bad magic number for file header")
    1267 
    1268             fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
    1269             if fheader[_FH_EXTRA_FIELD_LENGTH]:
    1270                 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
    1271 
    1272             if zinfo.flag_bits & 0x20:
    1273                 # Zip 2.7: compressed patched data
    1274                 raise NotImplementedError("compressed patched data (flag bit 5)")
    1275 
    1276             if zinfo.flag_bits & 0x40:
    1277                 # strong encryption
    1278                 raise NotImplementedError("strong encryption (flag bit 6)")
    1279 
    1280             if zinfo.flag_bits & 0x800:
    1281                 # UTF-8 filename
    1282                 fname_str = fname.decode("utf-8")
    1283             else:
    1284                 fname_str = fname.decode("cp437")
    1285 
    1286             if fname_str != zinfo.orig_filename:
    1287                 raise BadZipFile(
    1288                     'File name in directory %r and header %r differ.'
    1289                     % (zinfo.orig_filename, fname))
    1290 
    1291             # check for encrypted flag & handle password
    1292             is_encrypted = zinfo.flag_bits & 0x1
    1293             zd = None
    1294             if is_encrypted:
    1295                 if not pwd:
    1296                     pwd = self.pwd
    1297                 if not pwd:
    1298                     raise RuntimeError("File %s is encrypted, password "
    1299                                        "required for extraction" % name)
    1300 
    1301                 zd = _ZipDecrypter(pwd)
    1302                 # The first 12 bytes in the cypher stream is an encryption header
    1303                 #  used to strengthen the algorithm. The first 11 bytes are
    1304                 #  completely random, while the 12th contains the MSB of the CRC,
    1305                 #  or the MSB of the file time depending on the header type
    1306                 #  and is used to check the correctness of the password.
    1307                 header = zef_file.read(12)
    1308                 h = list(map(zd, header[0:12]))
    1309                 if zinfo.flag_bits & 0x8:
    1310                     # compare against the file type from extended local headers
    1311                     check_byte = (zinfo._raw_time >> 8) & 0xff
    1312                 else:
    1313                     # compare against the CRC otherwise
    1314                     check_byte = (zinfo.CRC >> 24) & 0xff
    1315                 if h[11] != check_byte:
    1316                     raise RuntimeError("Bad password for file", name)
    1317 
    1318             return ZipExtFile(zef_file, mode, zinfo, zd, True)
    1319         except:
    1320             zef_file.close()
    1321             raise
    1322 
    1323     def extract(self, member, path=None, pwd=None):
    1324         """Extract a member from the archive to the current working directory,
    1325            using its full name. Its file information is extracted as accurately
    1326            as possible. `member' may be a filename or a ZipInfo object. You can
    1327            specify a different directory using `path'.
    1328         """
    1329         if not isinstance(member, ZipInfo):
    1330             member = self.getinfo(member)
    1331 
    1332         if path is None:
    1333             path = os.getcwd()
    1334 
    1335         return self._extract_member(member, path, pwd)
    1336 
    1337     def extractall(self, path=None, members=None, pwd=None):
    1338         """Extract all members from the archive to the current working
    1339            directory. `path' specifies a different directory to extract to.
    1340            `members' is optional and must be a subset of the list returned
    1341            by namelist().
    1342         """
    1343         if members is None:
    1344             members = self.namelist()
    1345 
    1346         for zipinfo in members:
    1347             self.extract(zipinfo, path, pwd)
    1348 
    1349     @classmethod
    1350     def _sanitize_windows_name(cls, arcname, pathsep):
    1351         """Replace bad characters and remove trailing dots from parts."""
    1352         table = cls._windows_illegal_name_trans_table
    1353         if not table:
    1354             illegal = ':<>|"?*'
    1355             table = str.maketrans(illegal, '_' * len(illegal))
    1356             cls._windows_illegal_name_trans_table = table
    1357         arcname = arcname.translate(table)
    1358         # remove trailing dots
    1359         arcname = (x.rstrip('.') for x in arcname.split(pathsep))
    1360         # rejoin, removing empty parts.
    1361         arcname = pathsep.join(x for x in arcname if x)
    1362         return arcname
    1363 
    1364     def _extract_member(self, member, targetpath, pwd):
    1365         """Extract the ZipInfo object 'member' to a physical
    1366            file on the path targetpath.
    1367         """
    1368         # build the destination pathname, replacing
    1369         # forward slashes to platform specific separators.
    1370         arcname = member.filename.replace('/', os.path.sep)
    1371 
    1372         if os.path.altsep:
    1373             arcname = arcname.replace(os.path.altsep, os.path.sep)
    1374         # interpret absolute pathname as relative, remove drive letter or
    1375         # UNC path, redundant separators, "." and ".." components.
    1376         arcname = os.path.splitdrive(arcname)[1]
    1377         invalid_path_parts = ('', os.path.curdir, os.path.pardir)
    1378         arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
    1379                                    if x not in invalid_path_parts)
    1380         if os.path.sep == '\':
    1381             # filter illegal characters on Windows
    1382             arcname = self._sanitize_windows_name(arcname, os.path.sep)
    1383 
    1384         targetpath = os.path.join(targetpath, arcname)
    1385         targetpath = os.path.normpath(targetpath)
    1386 
    1387         # Create all upper directories if necessary.
    1388         upperdirs = os.path.dirname(targetpath)
    1389         if upperdirs and not os.path.exists(upperdirs):
    1390             os.makedirs(upperdirs)
    1391 
    1392         if member.filename[-1] == '/':
    1393             if not os.path.isdir(targetpath):
    1394                 os.mkdir(targetpath)
    1395             return targetpath
    1396 
    1397         with self.open(member, pwd=pwd) as source, 
    1398              open(targetpath, "wb") as target:
    1399             shutil.copyfileobj(source, target)
    1400 
    1401         return targetpath
    1402 
    1403     def _writecheck(self, zinfo):
    1404         """Check for errors before writing a file to the archive."""
    1405         if zinfo.filename in self.NameToInfo:
    1406             import warnings
    1407             warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
    1408         if self.mode not in ('w', 'x', 'a'):
    1409             raise RuntimeError("write() requires mode 'w', 'x', or 'a'")
    1410         if not self.fp:
    1411             raise RuntimeError(
    1412                 "Attempt to write ZIP archive that was already closed")
    1413         _check_compression(zinfo.compress_type)
    1414         if not self._allowZip64:
    1415             requires_zip64 = None
    1416             if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
    1417                 requires_zip64 = "Files count"
    1418             elif zinfo.file_size > ZIP64_LIMIT:
    1419                 requires_zip64 = "Filesize"
    1420             elif zinfo.header_offset > ZIP64_LIMIT:
    1421                 requires_zip64 = "Zipfile size"
    1422             if requires_zip64:
    1423                 raise LargeZipFile(requires_zip64 +
    1424                                    " would require ZIP64 extensions")
    1425 
    1426     def write(self, filename, arcname=None, compress_type=None):
    1427         """Put the bytes from filename into the archive under the name
    1428         arcname."""
    1429         if not self.fp:
    1430             raise RuntimeError(
    1431                 "Attempt to write to ZIP archive that was already closed")
    1432 
    1433         st = os.stat(filename)
    1434         isdir = stat.S_ISDIR(st.st_mode)
    1435         mtime = time.localtime(st.st_mtime)
    1436         date_time = mtime[0:6]
    1437         # Create ZipInfo instance to store file information
    1438         if arcname is None:
    1439             arcname = filename
    1440         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
    1441         while arcname[0] in (os.sep, os.altsep):
    1442             arcname = arcname[1:]
    1443         if isdir:
    1444             arcname += '/'
    1445         zinfo = ZipInfo(arcname, date_time)
    1446         zinfo.external_attr = (st[0] & 0xFFFF) << 16      # Unix attributes
    1447         if isdir:
    1448             zinfo.compress_type = ZIP_STORED
    1449         elif compress_type is None:
    1450             zinfo.compress_type = self.compression
    1451         else:
    1452             zinfo.compress_type = compress_type
    1453 
    1454         zinfo.file_size = st.st_size
    1455         zinfo.flag_bits = 0x00
    1456         with self._lock:
    1457             if self._seekable:
    1458                 self.fp.seek(self.start_dir)
    1459             zinfo.header_offset = self.fp.tell()    # Start of header bytes
    1460             if zinfo.compress_type == ZIP_LZMA:
    1461                 # Compressed data includes an end-of-stream (EOS) marker
    1462                 zinfo.flag_bits |= 0x02
    1463 
    1464             self._writecheck(zinfo)
    1465             self._didModify = True
    1466 
    1467             if isdir:
    1468                 zinfo.file_size = 0
    1469                 zinfo.compress_size = 0
    1470                 zinfo.CRC = 0
    1471                 zinfo.external_attr |= 0x10  # MS-DOS directory flag
    1472                 self.filelist.append(zinfo)
    1473                 self.NameToInfo[zinfo.filename] = zinfo
    1474                 self.fp.write(zinfo.FileHeader(False))
    1475                 self.start_dir = self.fp.tell()
    1476                 return
    1477 
    1478             cmpr = _get_compressor(zinfo.compress_type)
    1479             if not self._seekable:
    1480                 zinfo.flag_bits |= 0x08
    1481             with open(filename, "rb") as fp:
    1482                 # Must overwrite CRC and sizes with correct data later
    1483                 zinfo.CRC = CRC = 0
    1484                 zinfo.compress_size = compress_size = 0
    1485                 # Compressed size can be larger than uncompressed size
    1486                 zip64 = self._allowZip64 and 
    1487                     zinfo.file_size * 1.05 > ZIP64_LIMIT
    1488                 self.fp.write(zinfo.FileHeader(zip64))
    1489                 file_size = 0
    1490                 while 1:
    1491                     buf = fp.read(1024 * 8)
    1492                     if not buf:
    1493                         break
    1494                     file_size = file_size + len(buf)
    1495                     CRC = crc32(buf, CRC)
    1496                     if cmpr:
    1497                         buf = cmpr.compress(buf)
    1498                         compress_size = compress_size + len(buf)
    1499                     self.fp.write(buf)
    1500             if cmpr:
    1501                 buf = cmpr.flush()
    1502                 compress_size = compress_size + len(buf)
    1503                 self.fp.write(buf)
    1504                 zinfo.compress_size = compress_size
    1505             else:
    1506                 zinfo.compress_size = file_size
    1507             zinfo.CRC = CRC
    1508             zinfo.file_size = file_size
    1509             if zinfo.flag_bits & 0x08:
    1510                 # Write CRC and file sizes after the file data
    1511                 fmt = '<LQQ' if zip64 else '<LLL'
    1512                 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
    1513                                           zinfo.file_size))
    1514                 self.start_dir = self.fp.tell()
    1515             else:
    1516                 if not zip64 and self._allowZip64:
    1517                     if file_size > ZIP64_LIMIT:
    1518                         raise RuntimeError('File size has increased during compressing')
    1519                     if compress_size > ZIP64_LIMIT:
    1520                         raise RuntimeError('Compressed size larger than uncompressed size')
    1521                 # Seek backwards and write file header (which will now include
    1522                 # correct CRC and file sizes)
    1523                 self.start_dir = self.fp.tell() # Preserve current position in file
    1524                 self.fp.seek(zinfo.header_offset)
    1525                 self.fp.write(zinfo.FileHeader(zip64))
    1526                 self.fp.seek(self.start_dir)
    1527             self.filelist.append(zinfo)
    1528             self.NameToInfo[zinfo.filename] = zinfo
    1529 
    1530     def writestr(self, zinfo_or_arcname, data, compress_type=None):
    1531         """Write a file into the archive.  The contents is 'data', which
    1532         may be either a 'str' or a 'bytes' instance; if it is a 'str',
    1533         it is encoded as UTF-8 first.
    1534         'zinfo_or_arcname' is either a ZipInfo instance or
    1535         the name of the file in the archive."""
    1536         if isinstance(data, str):
    1537             data = data.encode("utf-8")
    1538         if not isinstance(zinfo_or_arcname, ZipInfo):
    1539             zinfo = ZipInfo(filename=zinfo_or_arcname,
    1540                             date_time=time.localtime(time.time())[:6])
    1541             zinfo.compress_type = self.compression
    1542             if zinfo.filename[-1] == '/':
    1543                 zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
    1544                 zinfo.external_attr |= 0x10           # MS-DOS directory flag
    1545             else:
    1546                 zinfo.external_attr = 0o600 << 16     # ?rw-------
    1547         else:
    1548             zinfo = zinfo_or_arcname
    1549 
    1550         if not self.fp:
    1551             raise RuntimeError(
    1552                 "Attempt to write to ZIP archive that was already closed")
    1553 
    1554         zinfo.file_size = len(data)            # Uncompressed size
    1555         with self._lock:
    1556             if self._seekable:
    1557                 self.fp.seek(self.start_dir)
    1558             zinfo.header_offset = self.fp.tell()    # Start of header data
    1559             if compress_type is not None:
    1560                 zinfo.compress_type = compress_type
    1561             zinfo.header_offset = self.fp.tell()    # Start of header data
    1562             if compress_type is not None:
    1563                 zinfo.compress_type = compress_type
    1564             if zinfo.compress_type == ZIP_LZMA:
    1565                 # Compressed data includes an end-of-stream (EOS) marker
    1566                 zinfo.flag_bits |= 0x02
    1567 
    1568             self._writecheck(zinfo)
    1569             self._didModify = True
    1570             zinfo.CRC = crc32(data)       # CRC-32 checksum
    1571             co = _get_compressor(zinfo.compress_type)
    1572             if co:
    1573                 data = co.compress(data) + co.flush()
    1574                 zinfo.compress_size = len(data)    # Compressed size
    1575             else:
    1576                 zinfo.compress_size = zinfo.file_size
    1577             zip64 = zinfo.file_size > ZIP64_LIMIT or 
    1578                 zinfo.compress_size > ZIP64_LIMIT
    1579             if zip64 and not self._allowZip64:
    1580                 raise LargeZipFile("Filesize would require ZIP64 extensions")
    1581             self.fp.write(zinfo.FileHeader(zip64))
    1582             self.fp.write(data)
    1583             if zinfo.flag_bits & 0x08:
    1584                 # Write CRC and file sizes after the file data
    1585                 fmt = '<LQQ' if zip64 else '<LLL'
    1586                 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
    1587                                           zinfo.file_size))
    1588             self.fp.flush()
    1589             self.start_dir = self.fp.tell()
    1590             self.filelist.append(zinfo)
    1591             self.NameToInfo[zinfo.filename] = zinfo
    1592 
    1593     def __del__(self):
    1594         """Call the "close()" method in case the user forgot."""
    1595         self.close()
    1596 
    1597     def close(self):
    1598         """Close the file, and for mode 'w', 'x' and 'a' write the ending
    1599         records."""
    1600         if self.fp is None:
    1601             return
    1602 
    1603         try:
    1604             if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
    1605                 with self._lock:
    1606                     if self._seekable:
    1607                         self.fp.seek(self.start_dir)
    1608                     self._write_end_record()
    1609         finally:
    1610             fp = self.fp
    1611             self.fp = None
    1612             self._fpclose(fp)
    1613 
    1614     def _write_end_record(self):
    1615         for zinfo in self.filelist:         # write central directory
    1616             dt = zinfo.date_time
    1617             dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
    1618             dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
    1619             extra = []
    1620             if zinfo.file_size > ZIP64_LIMIT 
    1621                or zinfo.compress_size > ZIP64_LIMIT:
    1622                 extra.append(zinfo.file_size)
    1623                 extra.append(zinfo.compress_size)
    1624                 file_size = 0xffffffff
    1625                 compress_size = 0xffffffff
    1626             else:
    1627                 file_size = zinfo.file_size
    1628                 compress_size = zinfo.compress_size
    1629 
    1630             if zinfo.header_offset > ZIP64_LIMIT:
    1631                 extra.append(zinfo.header_offset)
    1632                 header_offset = 0xffffffff
    1633             else:
    1634                 header_offset = zinfo.header_offset
    1635 
    1636             extra_data = zinfo.extra
    1637             min_version = 0
    1638             if extra:
    1639                 # Append a ZIP64 field to the extra's
    1640                 extra_data = struct.pack(
    1641                     '<HH' + 'Q'*len(extra),
    1642                     1, 8*len(extra), *extra) + extra_data
    1643 
    1644                 min_version = ZIP64_VERSION
    1645 
    1646             if zinfo.compress_type == ZIP_BZIP2:
    1647                 min_version = max(BZIP2_VERSION, min_version)
    1648             elif zinfo.compress_type == ZIP_LZMA:
    1649                 min_version = max(LZMA_VERSION, min_version)
    1650 
    1651             extract_version = max(min_version, zinfo.extract_version)
    1652             create_version = max(min_version, zinfo.create_version)
    1653             try:
    1654                 filename, flag_bits = zinfo._encodeFilenameFlags()
    1655                 centdir = struct.pack(structCentralDir,
    1656                                       stringCentralDir, create_version,
    1657                                       zinfo.create_system, extract_version, zinfo.reserved,
    1658                                       flag_bits, zinfo.compress_type, dostime, dosdate,
    1659                                       zinfo.CRC, compress_size, file_size,
    1660                                       len(filename), len(extra_data), len(zinfo.comment),
    1661                                       0, zinfo.internal_attr, zinfo.external_attr,
    1662                                       header_offset)
    1663             except DeprecationWarning:
    1664                 print((structCentralDir, stringCentralDir, create_version,
    1665                        zinfo.create_system, extract_version, zinfo.reserved,
    1666                        zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
    1667                        zinfo.CRC, compress_size, file_size,
    1668                        len(zinfo.filename), len(extra_data), len(zinfo.comment),
    1669                        0, zinfo.internal_attr, zinfo.external_attr,
    1670                        header_offset), file=sys.stderr)
    1671                 raise
    1672             self.fp.write(centdir)
    1673             self.fp.write(filename)
    1674             self.fp.write(extra_data)
    1675             self.fp.write(zinfo.comment)
    1676 
    1677         pos2 = self.fp.tell()
    1678         # Write end-of-zip-archive record
    1679         centDirCount = len(self.filelist)
    1680         centDirSize = pos2 - self.start_dir
    1681         centDirOffset = self.start_dir
    1682         requires_zip64 = None
    1683         if centDirCount > ZIP_FILECOUNT_LIMIT:
    1684             requires_zip64 = "Files count"
    1685         elif centDirOffset > ZIP64_LIMIT:
    1686             requires_zip64 = "Central directory offset"
    1687         elif centDirSize > ZIP64_LIMIT:
    1688             requires_zip64 = "Central directory size"
    1689         if requires_zip64:
    1690             # Need to write the ZIP64 end-of-archive records
    1691             if not self._allowZip64:
    1692                 raise LargeZipFile(requires_zip64 +
    1693                                    " would require ZIP64 extensions")
    1694             zip64endrec = struct.pack(
    1695                 structEndArchive64, stringEndArchive64,
    1696                 44, 45, 45, 0, 0, centDirCount, centDirCount,
    1697                 centDirSize, centDirOffset)
    1698             self.fp.write(zip64endrec)
    1699 
    1700             zip64locrec = struct.pack(
    1701                 structEndArchive64Locator,
    1702                 stringEndArchive64Locator, 0, pos2, 1)
    1703             self.fp.write(zip64locrec)
    1704             centDirCount = min(centDirCount, 0xFFFF)
    1705             centDirSize = min(centDirSize, 0xFFFFFFFF)
    1706             centDirOffset = min(centDirOffset, 0xFFFFFFFF)
    1707 
    1708         endrec = struct.pack(structEndArchive, stringEndArchive,
    1709                              0, 0, centDirCount, centDirCount,
    1710                              centDirSize, centDirOffset, len(self._comment))
    1711         self.fp.write(endrec)
    1712         self.fp.write(self._comment)
    1713         self.fp.flush()
    1714 
    1715     def _fpclose(self, fp):
    1716         assert self._fileRefCnt > 0
    1717         self._fileRefCnt -= 1
    1718         if not self._fileRefCnt and not self._filePassed:
    1719             fp.close()
    1720 
    1721 
    1722 class PyZipFile(ZipFile):
    1723     """Class to create ZIP archives with Python library files and packages."""
    1724 
    1725     def __init__(self, file, mode="r", compression=ZIP_STORED,
    1726                  allowZip64=True, optimize=-1):
    1727         ZipFile.__init__(self, file, mode=mode, compression=compression,
    1728                          allowZip64=allowZip64)
    1729         self._optimize = optimize
    1730 
    1731     def writepy(self, pathname, basename="", filterfunc=None):
    1732         """Add all files from "pathname" to the ZIP archive.
    1733 
    1734         If pathname is a package directory, search the directory and
    1735         all package subdirectories recursively for all *.py and enter
    1736         the modules into the archive.  If pathname is a plain
    1737         directory, listdir *.py and enter all modules.  Else, pathname
    1738         must be a Python *.py file and the module will be put into the
    1739         archive.  Added modules are always module.pyc.
    1740         This method will compile the module.py into module.pyc if
    1741         necessary.
    1742         If filterfunc(pathname) is given, it is called with every argument.
    1743         When it is False, the file or directory is skipped.
    1744         """
    1745         if filterfunc and not filterfunc(pathname):
    1746             if self.debug:
    1747                 label = 'path' if os.path.isdir(pathname) else 'file'
    1748                 print('%s "%s" skipped by filterfunc' % (label, pathname))
    1749             return
    1750         dir, name = os.path.split(pathname)
    1751         if os.path.isdir(pathname):
    1752             initname = os.path.join(pathname, "__init__.py")
    1753             if os.path.isfile(initname):
    1754                 # This is a package directory, add it
    1755                 if basename:
    1756                     basename = "%s/%s" % (basename, name)
    1757                 else:
    1758                     basename = name
    1759                 if self.debug:
    1760                     print("Adding package in", pathname, "as", basename)
    1761                 fname, arcname = self._get_codename(initname[0:-3], basename)
    1762                 if self.debug:
    1763                     print("Adding", arcname)
    1764                 self.write(fname, arcname)
    1765                 dirlist = os.listdir(pathname)
    1766                 dirlist.remove("__init__.py")
    1767                 # Add all *.py files and package subdirectories
    1768                 for filename in dirlist:
    1769                     path = os.path.join(pathname, filename)
    1770                     root, ext = os.path.splitext(filename)
    1771                     if os.path.isdir(path):
    1772                         if os.path.isfile(os.path.join(path, "__init__.py")):
    1773                             # This is a package directory, add it
    1774                             self.writepy(path, basename,
    1775                                          filterfunc=filterfunc)  # Recursive call
    1776                     elif ext == ".py":
    1777                         if filterfunc and not filterfunc(path):
    1778                             if self.debug:
    1779                                 print('file "%s" skipped by filterfunc' % path)
    1780                             continue
    1781                         fname, arcname = self._get_codename(path[0:-3],
    1782                                                             basename)
    1783                         if self.debug:
    1784                             print("Adding", arcname)
    1785                         self.write(fname, arcname)
    1786             else:
    1787                 # This is NOT a package directory, add its files at top level
    1788                 if self.debug:
    1789                     print("Adding files from directory", pathname)
    1790                 for filename in os.listdir(pathname):
    1791                     path = os.path.join(pathname, filename)
    1792                     root, ext = os.path.splitext(filename)
    1793                     if ext == ".py":
    1794                         if filterfunc and not filterfunc(path):
    1795                             if self.debug:
    1796                                 print('file "%s" skipped by filterfunc' % path)
    1797                             continue
    1798                         fname, arcname = self._get_codename(path[0:-3],
    1799                                                             basename)
    1800                         if self.debug:
    1801                             print("Adding", arcname)
    1802                         self.write(fname, arcname)
    1803         else:
    1804             if pathname[-3:] != ".py":
    1805                 raise RuntimeError(
    1806                     'Files added with writepy() must end with ".py"')
    1807             fname, arcname = self._get_codename(pathname[0:-3], basename)
    1808             if self.debug:
    1809                 print("Adding file", arcname)
    1810             self.write(fname, arcname)
    1811 
    1812     def _get_codename(self, pathname, basename):
    1813         """Return (filename, archivename) for the path.
    1814 
    1815         Given a module name path, return the correct file path and
    1816         archive name, compiling if necessary.  For example, given
    1817         /python/lib/string, return (/python/lib/string.pyc, string).
    1818         """
    1819         def _compile(file, optimize=-1):
    1820             import py_compile
    1821             if self.debug:
    1822                 print("Compiling", file)
    1823             try:
    1824                 py_compile.compile(file, doraise=True, optimize=optimize)
    1825             except py_compile.PyCompileError as err:
    1826                 print(err.msg)
    1827                 return False
    1828             return True
    1829 
    1830         file_py  = pathname + ".py"
    1831         file_pyc = pathname + ".pyc"
    1832         pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
    1833         pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
    1834         pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
    1835         if self._optimize == -1:
    1836             # legacy mode: use whatever file is present
    1837             if (os.path.isfile(file_pyc) and
    1838                   os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
    1839                 # Use .pyc file.
    1840                 arcname = fname = file_pyc
    1841             elif (os.path.isfile(pycache_opt0) and
    1842                   os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
    1843                 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
    1844                 # file name in the archive.
    1845                 fname = pycache_opt0
    1846                 arcname = file_pyc
    1847             elif (os.path.isfile(pycache_opt1) and
    1848                   os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
    1849                 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
    1850                 # file name in the archive.
    1851                 fname = pycache_opt1
    1852                 arcname = file_pyc
    1853             elif (os.path.isfile(pycache_opt2) and
    1854                   os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
    1855                 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
    1856                 # file name in the archive.
    1857                 fname = pycache_opt2
    1858                 arcname = file_pyc
    1859             else:
    1860                 # Compile py into PEP 3147 pyc file.
    1861                 if _compile(file_py):
    1862                     if sys.flags.optimize == 0:
    1863                         fname = pycache_opt0
    1864                     elif sys.flags.optimize == 1:
    1865                         fname = pycache_opt1
    1866                     else:
    1867                         fname = pycache_opt2
    1868                     arcname = file_pyc
    1869                 else:
    1870                     fname = arcname = file_py
    1871         else:
    1872             # new mode: use given optimization level
    1873             if self._optimize == 0:
    1874                 fname = pycache_opt0
    1875                 arcname = file_pyc
    1876             else:
    1877                 arcname = file_pyc
    1878                 if self._optimize == 1:
    1879                     fname = pycache_opt1
    1880                 elif self._optimize == 2:
    1881                     fname = pycache_opt2
    1882                 else:
    1883                     msg = "invalid value for 'optimize': {!r}".format(self._optimize)
    1884                     raise ValueError(msg)
    1885             if not (os.path.isfile(fname) and
    1886                     os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
    1887                 if not _compile(file_py, optimize=self._optimize):
    1888                     fname = arcname = file_py
    1889         archivename = os.path.split(arcname)[1]
    1890         if basename:
    1891             archivename = "%s/%s" % (basename, archivename)
    1892         return (fname, archivename)
    1893 
    1894 
    1895 def main(args = None):
    1896     import textwrap
    1897     USAGE=textwrap.dedent("""
    1898         Usage:
    1899             zipfile.py -l zipfile.zip        # Show listing of a zipfile
    1900             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
    1901             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
    1902             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
    1903         """)
    1904     if args is None:
    1905         args = sys.argv[1:]
    1906 
    1907     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
    1908         print(USAGE)
    1909         sys.exit(1)
    1910 
    1911     if args[0] == '-l':
    1912         if len(args) != 2:
    1913             print(USAGE)
    1914             sys.exit(1)
    1915         with ZipFile(args[1], 'r') as zf:
    1916             zf.printdir()
    1917 
    1918     elif args[0] == '-t':
    1919         if len(args) != 2:
    1920             print(USAGE)
    1921             sys.exit(1)
    1922         with ZipFile(args[1], 'r') as zf:
    1923             badfile = zf.testzip()
    1924         if badfile:
    1925             print("The following enclosed file is corrupted: {!r}".format(badfile))
    1926         print("Done testing")
    1927 
    1928     elif args[0] == '-e':
    1929         if len(args) != 3:
    1930             print(USAGE)
    1931             sys.exit(1)
    1932 
    1933         with ZipFile(args[1], 'r') as zf:
    1934             zf.extractall(args[2])
    1935 
    1936     elif args[0] == '-c':
    1937         if len(args) < 3:
    1938             print(USAGE)
    1939             sys.exit(1)
    1940 
    1941         def addToZip(zf, path, zippath):
    1942             if os.path.isfile(path):
    1943                 zf.write(path, zippath, ZIP_DEFLATED)
    1944             elif os.path.isdir(path):
    1945                 if zippath:
    1946                     zf.write(path, zippath)
    1947                 for nm in os.listdir(path):
    1948                     addToZip(zf,
    1949                              os.path.join(path, nm), os.path.join(zippath, nm))
    1950             # else: ignore
    1951 
    1952         with ZipFile(args[1], 'w') as zf:
    1953             for path in args[2:]:
    1954                 zippath = os.path.basename(path)
    1955                 if not zippath:
    1956                     zippath = os.path.basename(os.path.dirname(path))
    1957                 if zippath in ('', os.curdir, os.pardir):
    1958                     zippath = ''
    1959                 addToZip(zf, path, zippath)
    1960 
    1961 if __name__ == "__main__":
    1962     main()
    View zipfile Code

    b、tarfile

       1 #!/usr/bin/env python3
       2 #-------------------------------------------------------------------
       3 # tarfile.py
       4 #-------------------------------------------------------------------
       5 # Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
       6 # All rights reserved.
       7 #
       8 # Permission  is  hereby granted,  free  of charge,  to  any person
       9 # obtaining a  copy of  this software  and associated documentation
      10 # files  (the  "Software"),  to   deal  in  the  Software   without
      11 # restriction,  including  without limitation  the  rights to  use,
      12 # copy, modify, merge, publish, distribute, sublicense, and/or sell
      13 # copies  of  the  Software,  and to  permit  persons  to  whom the
      14 # Software  is  furnished  to  do  so,  subject  to  the  following
      15 # conditions:
      16 #
      17 # The above copyright  notice and this  permission notice shall  be
      18 # included in all copies or substantial portions of the Software.
      19 #
      20 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
      21 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
      22 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
      23 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
      24 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
      25 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
      26 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      27 # OTHER DEALINGS IN THE SOFTWARE.
      28 #
      29 """Read from and write to tar format archives.
      30 """
      31 
      32 version     = "0.9.0"
      33 __author__  = "Lars Gustu00e4bel (lars@gustaebel.de)"
      34 __date__    = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
      35 __cvsid__   = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
      36 __credits__ = "Gustavo Niemeyer, Niels Gustu00e4bel, Richard Townsend."
      37 
      38 #---------
      39 # Imports
      40 #---------
      41 from builtins import open as bltn_open
      42 import sys
      43 import os
      44 import io
      45 import shutil
      46 import stat
      47 import time
      48 import struct
      49 import copy
      50 import re
      51 
      52 try:
      53     import grp, pwd
      54 except ImportError:
      55     grp = pwd = None
      56 
      57 # os.symlink on Windows prior to 6.0 raises NotImplementedError
      58 symlink_exception = (AttributeError, NotImplementedError)
      59 try:
      60     # OSError (winerror=1314) will be raised if the caller does not hold the
      61     # SeCreateSymbolicLinkPrivilege privilege
      62     symlink_exception += (OSError,)
      63 except NameError:
      64     pass
      65 
      66 # from tarfile import *
      67 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
      68 
      69 #---------------------------------------------------------
      70 # tar constants
      71 #---------------------------------------------------------
      72 NUL = b""                     # the null character
      73 BLOCKSIZE = 512                 # length of processing blocks
      74 RECORDSIZE = BLOCKSIZE * 20     # length of records
      75 GNU_MAGIC = b"ustar  "        # magic gnu tar string
      76 POSIX_MAGIC = b"ustarx0000"    # magic posix tar string
      77 
      78 LENGTH_NAME = 100               # maximum length of a filename
      79 LENGTH_LINK = 100               # maximum length of a linkname
      80 LENGTH_PREFIX = 155             # maximum length of the prefix field
      81 
      82 REGTYPE = b"0"                  # regular file
      83 AREGTYPE = b""                # regular file
      84 LNKTYPE = b"1"                  # link (inside tarfile)
      85 SYMTYPE = b"2"                  # symbolic link
      86 CHRTYPE = b"3"                  # character special device
      87 BLKTYPE = b"4"                  # block special device
      88 DIRTYPE = b"5"                  # directory
      89 FIFOTYPE = b"6"                 # fifo special device
      90 CONTTYPE = b"7"                 # contiguous file
      91 
      92 GNUTYPE_LONGNAME = b"L"         # GNU tar longname
      93 GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
      94 GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
      95 
      96 XHDTYPE = b"x"                  # POSIX.1-2001 extended header
      97 XGLTYPE = b"g"                  # POSIX.1-2001 global header
      98 SOLARIS_XHDTYPE = b"X"          # Solaris extended header
      99 
     100 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
     101 GNU_FORMAT = 1                  # GNU tar format
     102 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
     103 DEFAULT_FORMAT = GNU_FORMAT
     104 
     105 #---------------------------------------------------------
     106 # tarfile constants
     107 #---------------------------------------------------------
     108 # File types that tarfile supports:
     109 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
     110                    SYMTYPE, DIRTYPE, FIFOTYPE,
     111                    CONTTYPE, CHRTYPE, BLKTYPE,
     112                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
     113                    GNUTYPE_SPARSE)
     114 
     115 # File types that will be treated as a regular file.
     116 REGULAR_TYPES = (REGTYPE, AREGTYPE,
     117                  CONTTYPE, GNUTYPE_SPARSE)
     118 
     119 # File types that are part of the GNU tar format.
     120 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
     121              GNUTYPE_SPARSE)
     122 
     123 # Fields from a pax header that override a TarInfo attribute.
     124 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
     125               "uid", "gid", "uname", "gname")
     126 
     127 # Fields from a pax header that are affected by hdrcharset.
     128 PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
     129 
     130 # Fields in a pax header that are numbers, all other fields
     131 # are treated as strings.
     132 PAX_NUMBER_FIELDS = {
     133     "atime": float,
     134     "ctime": float,
     135     "mtime": float,
     136     "uid": int,
     137     "gid": int,
     138     "size": int
     139 }
     140 
     141 #---------------------------------------------------------
     142 # initialization
     143 #---------------------------------------------------------
     144 if os.name in ("nt", "ce"):
     145     ENCODING = "utf-8"
     146 else:
     147     ENCODING = sys.getfilesystemencoding()
     148 
     149 #---------------------------------------------------------
     150 # Some useful functions
     151 #---------------------------------------------------------
     152 
     153 def stn(s, length, encoding, errors):
     154     """Convert a string to a null-terminated bytes object.
     155     """
     156     s = s.encode(encoding, errors)
     157     return s[:length] + (length - len(s)) * NUL
     158 
     159 def nts(s, encoding, errors):
     160     """Convert a null-terminated bytes object to a string.
     161     """
     162     p = s.find(b"")
     163     if p != -1:
     164         s = s[:p]
     165     return s.decode(encoding, errors)
     166 
     167 def nti(s):
     168     """Convert a number field to a python number.
     169     """
     170     # There are two possible encodings for a number field, see
     171     # itn() below.
     172     if s[0] in (0o200, 0o377):
     173         n = 0
     174         for i in range(len(s) - 1):
     175             n <<= 8
     176             n += s[i + 1]
     177         if s[0] == 0o377:
     178             n = -(256 ** (len(s) - 1) - n)
     179     else:
     180         try:
     181             s = nts(s, "ascii", "strict")
     182             n = int(s.strip() or "0", 8)
     183         except ValueError:
     184             raise InvalidHeaderError("invalid header")
     185     return n
     186 
     187 def itn(n, digits=8, format=DEFAULT_FORMAT):
     188     """Convert a python number to a number field.
     189     """
     190     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
     191     # octal digits followed by a null-byte, this allows values up to
     192     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
     193     # that if necessary. A leading 0o200 or 0o377 byte indicate this
     194     # particular encoding, the following digits-1 bytes are a big-endian
     195     # base-256 representation. This allows values up to (256**(digits-1))-1.
     196     # A 0o200 byte indicates a positive number, a 0o377 byte a negative
     197     # number.
     198     if 0 <= n < 8 ** (digits - 1):
     199         s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
     200     elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
     201         if n >= 0:
     202             s = bytearray([0o200])
     203         else:
     204             s = bytearray([0o377])
     205             n = 256 ** digits + n
     206 
     207         for i in range(digits - 1):
     208             s.insert(1, n & 0o377)
     209             n >>= 8
     210     else:
     211         raise ValueError("overflow in number field")
     212 
     213     return s
     214 
     215 def calc_chksums(buf):
     216     """Calculate the checksum for a member's header by summing up all
     217        characters except for the chksum field which is treated as if
     218        it was filled with spaces. According to the GNU tar sources,
     219        some tars (Sun and NeXT) calculate chksum with signed char,
     220        which will be different if there are chars in the buffer with
     221        the high bit set. So we calculate two checksums, unsigned and
     222        signed.
     223     """
     224     unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
     225     signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
     226     return unsigned_chksum, signed_chksum
     227 
     228 def copyfileobj(src, dst, length=None, exception=OSError):
     229     """Copy length bytes from fileobj src to fileobj dst.
     230        If length is None, copy the entire content.
     231     """
     232     if length == 0:
     233         return
     234     if length is None:
     235         shutil.copyfileobj(src, dst)
     236         return
     237 
     238     BUFSIZE = 16 * 1024
     239     blocks, remainder = divmod(length, BUFSIZE)
     240     for b in range(blocks):
     241         buf = src.read(BUFSIZE)
     242         if len(buf) < BUFSIZE:
     243             raise exception("unexpected end of data")
     244         dst.write(buf)
     245 
     246     if remainder != 0:
     247         buf = src.read(remainder)
     248         if len(buf) < remainder:
     249             raise exception("unexpected end of data")
     250         dst.write(buf)
     251     return
     252 
     253 def filemode(mode):
     254     """Deprecated in this location; use stat.filemode."""
     255     import warnings
     256     warnings.warn("deprecated in favor of stat.filemode",
     257                   DeprecationWarning, 2)
     258     return stat.filemode(mode)
     259 
     260 def _safe_print(s):
     261     encoding = getattr(sys.stdout, 'encoding', None)
     262     if encoding is not None:
     263         s = s.encode(encoding, 'backslashreplace').decode(encoding)
     264     print(s, end=' ')
     265 
     266 
     267 class TarError(Exception):
     268     """Base exception."""
     269     pass
     270 class ExtractError(TarError):
     271     """General exception for extract errors."""
     272     pass
     273 class ReadError(TarError):
     274     """Exception for unreadable tar archives."""
     275     pass
     276 class CompressionError(TarError):
     277     """Exception for unavailable compression methods."""
     278     pass
     279 class StreamError(TarError):
     280     """Exception for unsupported operations on stream-like TarFiles."""
     281     pass
     282 class HeaderError(TarError):
     283     """Base exception for header errors."""
     284     pass
     285 class EmptyHeaderError(HeaderError):
     286     """Exception for empty headers."""
     287     pass
     288 class TruncatedHeaderError(HeaderError):
     289     """Exception for truncated headers."""
     290     pass
     291 class EOFHeaderError(HeaderError):
     292     """Exception for end of file headers."""
     293     pass
     294 class InvalidHeaderError(HeaderError):
     295     """Exception for invalid headers."""
     296     pass
     297 class SubsequentHeaderError(HeaderError):
     298     """Exception for missing and invalid extended headers."""
     299     pass
     300 
     301 #---------------------------
     302 # internal stream interface
     303 #---------------------------
     304 class _LowLevelFile:
     305     """Low-level file object. Supports reading and writing.
     306        It is used instead of a regular file object for streaming
     307        access.
     308     """
     309 
     310     def __init__(self, name, mode):
     311         mode = {
     312             "r": os.O_RDONLY,
     313             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
     314         }[mode]
     315         if hasattr(os, "O_BINARY"):
     316             mode |= os.O_BINARY
     317         self.fd = os.open(name, mode, 0o666)
     318 
     319     def close(self):
     320         os.close(self.fd)
     321 
     322     def read(self, size):
     323         return os.read(self.fd, size)
     324 
     325     def write(self, s):
     326         os.write(self.fd, s)
     327 
     328 class _Stream:
     329     """Class that serves as an adapter between TarFile and
     330        a stream-like object.  The stream-like object only
     331        needs to have a read() or write() method and is accessed
     332        blockwise.  Use of gzip or bzip2 compression is possible.
     333        A stream-like object could be for example: sys.stdin,
     334        sys.stdout, a socket, a tape device etc.
     335 
     336        _Stream is intended to be used only internally.
     337     """
     338 
     339     def __init__(self, name, mode, comptype, fileobj, bufsize):
     340         """Construct a _Stream object.
     341         """
     342         self._extfileobj = True
     343         if fileobj is None:
     344             fileobj = _LowLevelFile(name, mode)
     345             self._extfileobj = False
     346 
     347         if comptype == '*':
     348             # Enable transparent compression detection for the
     349             # stream interface
     350             fileobj = _StreamProxy(fileobj)
     351             comptype = fileobj.getcomptype()
     352 
     353         self.name     = name or ""
     354         self.mode     = mode
     355         self.comptype = comptype
     356         self.fileobj  = fileobj
     357         self.bufsize  = bufsize
     358         self.buf      = b""
     359         self.pos      = 0
     360         self.closed   = False
     361 
     362         try:
     363             if comptype == "gz":
     364                 try:
     365                     import zlib
     366                 except ImportError:
     367                     raise CompressionError("zlib module is not available")
     368                 self.zlib = zlib
     369                 self.crc = zlib.crc32(b"")
     370                 if mode == "r":
     371                     self._init_read_gz()
     372                     self.exception = zlib.error
     373                 else:
     374                     self._init_write_gz()
     375 
     376             elif comptype == "bz2":
     377                 try:
     378                     import bz2
     379                 except ImportError:
     380                     raise CompressionError("bz2 module is not available")
     381                 if mode == "r":
     382                     self.dbuf = b""
     383                     self.cmp = bz2.BZ2Decompressor()
     384                     self.exception = OSError
     385                 else:
     386                     self.cmp = bz2.BZ2Compressor()
     387 
     388             elif comptype == "xz":
     389                 try:
     390                     import lzma
     391                 except ImportError:
     392                     raise CompressionError("lzma module is not available")
     393                 if mode == "r":
     394                     self.dbuf = b""
     395                     self.cmp = lzma.LZMADecompressor()
     396                     self.exception = lzma.LZMAError
     397                 else:
     398                     self.cmp = lzma.LZMACompressor()
     399 
     400             elif comptype != "tar":
     401                 raise CompressionError("unknown compression type %r" % comptype)
     402 
     403         except:
     404             if not self._extfileobj:
     405                 self.fileobj.close()
     406             self.closed = True
     407             raise
     408 
     409     def __del__(self):
     410         if hasattr(self, "closed") and not self.closed:
     411             self.close()
     412 
     413     def _init_write_gz(self):
     414         """Initialize for writing with gzip compression.
     415         """
     416         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
     417                                             -self.zlib.MAX_WBITS,
     418                                             self.zlib.DEF_MEM_LEVEL,
     419                                             0)
     420         timestamp = struct.pack("<L", int(time.time()))
     421         self.__write(b"372131010" + timestamp + b"02377")
     422         if self.name.endswith(".gz"):
     423             self.name = self.name[:-3]
     424         # RFC1952 says we must use ISO-8859-1 for the FNAME field.
     425         self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
     426 
     427     def write(self, s):
     428         """Write string s to the stream.
     429         """
     430         if self.comptype == "gz":
     431             self.crc = self.zlib.crc32(s, self.crc)
     432         self.pos += len(s)
     433         if self.comptype != "tar":
     434             s = self.cmp.compress(s)
     435         self.__write(s)
     436 
     437     def __write(self, s):
     438         """Write string s to the stream if a whole new block
     439            is ready to be written.
     440         """
     441         self.buf += s
     442         while len(self.buf) > self.bufsize:
     443             self.fileobj.write(self.buf[:self.bufsize])
     444             self.buf = self.buf[self.bufsize:]
     445 
     446     def close(self):
     447         """Close the _Stream object. No operation should be
     448            done on it afterwards.
     449         """
     450         if self.closed:
     451             return
     452 
     453         self.closed = True
     454         try:
     455             if self.mode == "w" and self.comptype != "tar":
     456                 self.buf += self.cmp.flush()
     457 
     458             if self.mode == "w" and self.buf:
     459                 self.fileobj.write(self.buf)
     460                 self.buf = b""
     461                 if self.comptype == "gz":
     462                     self.fileobj.write(struct.pack("<L", self.crc))
     463                     self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
     464         finally:
     465             if not self._extfileobj:
     466                 self.fileobj.close()
     467 
     468     def _init_read_gz(self):
     469         """Initialize for reading a gzip compressed fileobj.
     470         """
     471         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
     472         self.dbuf = b""
     473 
     474         # taken from gzip.GzipFile with some alterations
     475         if self.__read(2) != b"37213":
     476             raise ReadError("not a gzip file")
     477         if self.__read(1) != b"10":
     478             raise CompressionError("unsupported compression method")
     479 
     480         flag = ord(self.__read(1))
     481         self.__read(6)
     482 
     483         if flag & 4:
     484             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
     485             self.read(xlen)
     486         if flag & 8:
     487             while True:
     488                 s = self.__read(1)
     489                 if not s or s == NUL:
     490                     break
     491         if flag & 16:
     492             while True:
     493                 s = self.__read(1)
     494                 if not s or s == NUL:
     495                     break
     496         if flag & 2:
     497             self.__read(2)
     498 
     499     def tell(self):
     500         """Return the stream's file pointer position.
     501         """
     502         return self.pos
     503 
     504     def seek(self, pos=0):
     505         """Set the stream's file pointer to pos. Negative seeking
     506            is forbidden.
     507         """
     508         if pos - self.pos >= 0:
     509             blocks, remainder = divmod(pos - self.pos, self.bufsize)
     510             for i in range(blocks):
     511                 self.read(self.bufsize)
     512             self.read(remainder)
     513         else:
     514             raise StreamError("seeking backwards is not allowed")
     515         return self.pos
     516 
     517     def read(self, size=None):
     518         """Return the next size number of bytes from the stream.
     519            If size is not defined, return all bytes of the stream
     520            up to EOF.
     521         """
     522         if size is None:
     523             t = []
     524             while True:
     525                 buf = self._read(self.bufsize)
     526                 if not buf:
     527                     break
     528                 t.append(buf)
     529             buf = "".join(t)
     530         else:
     531             buf = self._read(size)
     532         self.pos += len(buf)
     533         return buf
     534 
     535     def _read(self, size):
     536         """Return size bytes from the stream.
     537         """
     538         if self.comptype == "tar":
     539             return self.__read(size)
     540 
     541         c = len(self.dbuf)
     542         while c < size:
     543             buf = self.__read(self.bufsize)
     544             if not buf:
     545                 break
     546             try:
     547                 buf = self.cmp.decompress(buf)
     548             except self.exception:
     549                 raise ReadError("invalid compressed data")
     550             self.dbuf += buf
     551             c += len(buf)
     552         buf = self.dbuf[:size]
     553         self.dbuf = self.dbuf[size:]
     554         return buf
     555 
     556     def __read(self, size):
     557         """Return size bytes from stream. If internal buffer is empty,
     558            read another block from the stream.
     559         """
     560         c = len(self.buf)
     561         while c < size:
     562             buf = self.fileobj.read(self.bufsize)
     563             if not buf:
     564                 break
     565             self.buf += buf
     566             c += len(buf)
     567         buf = self.buf[:size]
     568         self.buf = self.buf[size:]
     569         return buf
     570 # class _Stream
     571 
     572 class _StreamProxy(object):
     573     """Small proxy class that enables transparent compression
     574        detection for the Stream interface (mode 'r|*').
     575     """
     576 
     577     def __init__(self, fileobj):
     578         self.fileobj = fileobj
     579         self.buf = self.fileobj.read(BLOCKSIZE)
     580 
     581     def read(self, size):
     582         self.read = self.fileobj.read
     583         return self.buf
     584 
     585     def getcomptype(self):
     586         if self.buf.startswith(b"x1fx8bx08"):
     587             return "gz"
     588         elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
     589             return "bz2"
     590         elif self.buf.startswith((b"x5dx00x00x80", b"xfd7zXZ")):
     591             return "xz"
     592         else:
     593             return "tar"
     594 
     595     def close(self):
     596         self.fileobj.close()
     597 # class StreamProxy
     598 
     599 #------------------------
     600 # Extraction file object
     601 #------------------------
     602 class _FileInFile(object):
     603     """A thin wrapper around an existing file object that
     604        provides a part of its data as an individual file
     605        object.
     606     """
     607 
     608     def __init__(self, fileobj, offset, size, blockinfo=None):
     609         self.fileobj = fileobj
     610         self.offset = offset
     611         self.size = size
     612         self.position = 0
     613         self.name = getattr(fileobj, "name", None)
     614         self.closed = False
     615 
     616         if blockinfo is None:
     617             blockinfo = [(0, size)]
     618 
     619         # Construct a map with data and zero blocks.
     620         self.map_index = 0
     621         self.map = []
     622         lastpos = 0
     623         realpos = self.offset
     624         for offset, size in blockinfo:
     625             if offset > lastpos:
     626                 self.map.append((False, lastpos, offset, None))
     627             self.map.append((True, offset, offset + size, realpos))
     628             realpos += size
     629             lastpos = offset + size
     630         if lastpos < self.size:
     631             self.map.append((False, lastpos, self.size, None))
     632 
     633     def flush(self):
     634         pass
     635 
     636     def readable(self):
     637         return True
     638 
     639     def writable(self):
     640         return False
     641 
     642     def seekable(self):
     643         return self.fileobj.seekable()
     644 
     645     def tell(self):
     646         """Return the current file position.
     647         """
     648         return self.position
     649 
     650     def seek(self, position, whence=io.SEEK_SET):
     651         """Seek to a position in the file.
     652         """
     653         if whence == io.SEEK_SET:
     654             self.position = min(max(position, 0), self.size)
     655         elif whence == io.SEEK_CUR:
     656             if position < 0:
     657                 self.position = max(self.position + position, 0)
     658             else:
     659                 self.position = min(self.position + position, self.size)
     660         elif whence == io.SEEK_END:
     661             self.position = max(min(self.size + position, self.size), 0)
     662         else:
     663             raise ValueError("Invalid argument")
     664         return self.position
     665 
     666     def read(self, size=None):
     667         """Read data from the file.
     668         """
     669         if size is None:
     670             size = self.size - self.position
     671         else:
     672             size = min(size, self.size - self.position)
     673 
     674         buf = b""
     675         while size > 0:
     676             while True:
     677                 data, start, stop, offset = self.map[self.map_index]
     678                 if start <= self.position < stop:
     679                     break
     680                 else:
     681                     self.map_index += 1
     682                     if self.map_index == len(self.map):
     683                         self.map_index = 0
     684             length = min(size, stop - self.position)
     685             if data:
     686                 self.fileobj.seek(offset + (self.position - start))
     687                 b = self.fileobj.read(length)
     688                 if len(b) != length:
     689                     raise ReadError("unexpected end of data")
     690                 buf += b
     691             else:
     692                 buf += NUL * length
     693             size -= length
     694             self.position += length
     695         return buf
     696 
     697     def readinto(self, b):
     698         buf = self.read(len(b))
     699         b[:len(buf)] = buf
     700         return len(buf)
     701 
     702     def close(self):
     703         self.closed = True
     704 #class _FileInFile
     705 
     706 class ExFileObject(io.BufferedReader):
     707 
     708     def __init__(self, tarfile, tarinfo):
     709         fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
     710                 tarinfo.size, tarinfo.sparse)
     711         super().__init__(fileobj)
     712 #class ExFileObject
     713 
     714 #------------------
     715 # Exported Classes
     716 #------------------
     717 class TarInfo(object):
     718     """Informational class which holds the details about an
     719        archive member given by a tar header block.
     720        TarInfo objects are returned by TarFile.getmember(),
     721        TarFile.getmembers() and TarFile.gettarinfo() and are
     722        usually created internally.
     723     """
     724 
     725     __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
     726                  "chksum", "type", "linkname", "uname", "gname",
     727                  "devmajor", "devminor",
     728                  "offset", "offset_data", "pax_headers", "sparse",
     729                  "tarfile", "_sparse_structs", "_link_target")
     730 
     731     def __init__(self, name=""):
     732         """Construct a TarInfo object. name is the optional name
     733            of the member.
     734         """
     735         self.name = name        # member name
     736         self.mode = 0o644       # file permissions
     737         self.uid = 0            # user id
     738         self.gid = 0            # group id
     739         self.size = 0           # file size
     740         self.mtime = 0          # modification time
     741         self.chksum = 0         # header checksum
     742         self.type = REGTYPE     # member type
     743         self.linkname = ""      # link name
     744         self.uname = ""         # user name
     745         self.gname = ""         # group name
     746         self.devmajor = 0       # device major number
     747         self.devminor = 0       # device minor number
     748 
     749         self.offset = 0         # the tar header starts here
     750         self.offset_data = 0    # the file's data starts here
     751 
     752         self.sparse = None      # sparse member information
     753         self.pax_headers = {}   # pax header information
     754 
     755     # In pax headers the "name" and "linkname" field are called
     756     # "path" and "linkpath".
     757     def _getpath(self):
     758         return self.name
     759     def _setpath(self, name):
     760         self.name = name
     761     path = property(_getpath, _setpath)
     762 
     763     def _getlinkpath(self):
     764         return self.linkname
     765     def _setlinkpath(self, linkname):
     766         self.linkname = linkname
     767     linkpath = property(_getlinkpath, _setlinkpath)
     768 
     769     def __repr__(self):
     770         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
     771 
     772     def get_info(self):
     773         """Return the TarInfo's attributes as a dictionary.
     774         """
     775         info = {
     776             "name":     self.name,
     777             "mode":     self.mode & 0o7777,
     778             "uid":      self.uid,
     779             "gid":      self.gid,
     780             "size":     self.size,
     781             "mtime":    self.mtime,
     782             "chksum":   self.chksum,
     783             "type":     self.type,
     784             "linkname": self.linkname,
     785             "uname":    self.uname,
     786             "gname":    self.gname,
     787             "devmajor": self.devmajor,
     788             "devminor": self.devminor
     789         }
     790 
     791         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
     792             info["name"] += "/"
     793 
     794         return info
     795 
     796     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
     797         """Return a tar header as a string of 512 byte blocks.
     798         """
     799         info = self.get_info()
     800 
     801         if format == USTAR_FORMAT:
     802             return self.create_ustar_header(info, encoding, errors)
     803         elif format == GNU_FORMAT:
     804             return self.create_gnu_header(info, encoding, errors)
     805         elif format == PAX_FORMAT:
     806             return self.create_pax_header(info, encoding)
     807         else:
     808             raise ValueError("invalid format")
     809 
     810     def create_ustar_header(self, info, encoding, errors):
     811         """Return the object as a ustar header block.
     812         """
     813         info["magic"] = POSIX_MAGIC
     814 
     815         if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
     816             raise ValueError("linkname is too long")
     817 
     818         if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
     819             info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
     820 
     821         return self._create_header(info, USTAR_FORMAT, encoding, errors)
     822 
     823     def create_gnu_header(self, info, encoding, errors):
     824         """Return the object as a GNU header block sequence.
     825         """
     826         info["magic"] = GNU_MAGIC
     827 
     828         buf = b""
     829         if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
     830             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
     831 
     832         if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
     833             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
     834 
     835         return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
     836 
     837     def create_pax_header(self, info, encoding):
     838         """Return the object as a ustar header block. If it cannot be
     839            represented this way, prepend a pax extended header sequence
     840            with supplement information.
     841         """
     842         info["magic"] = POSIX_MAGIC
     843         pax_headers = self.pax_headers.copy()
     844 
     845         # Test string fields for values that exceed the field length or cannot
     846         # be represented in ASCII encoding.
     847         for name, hname, length in (
     848                 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
     849                 ("uname", "uname", 32), ("gname", "gname", 32)):
     850 
     851             if hname in pax_headers:
     852                 # The pax header has priority.
     853                 continue
     854 
     855             # Try to encode the string as ASCII.
     856             try:
     857                 info[name].encode("ascii", "strict")
     858             except UnicodeEncodeError:
     859                 pax_headers[hname] = info[name]
     860                 continue
     861 
     862             if len(info[name]) > length:
     863                 pax_headers[hname] = info[name]
     864 
     865         # Test number fields for values that exceed the field limit or values
     866         # that like to be stored as float.
     867         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
     868             if name in pax_headers:
     869                 # The pax header has priority. Avoid overflow.
     870                 info[name] = 0
     871                 continue
     872 
     873             val = info[name]
     874             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
     875                 pax_headers[name] = str(val)
     876                 info[name] = 0
     877 
     878         # Create a pax extended header if necessary.
     879         if pax_headers:
     880             buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
     881         else:
     882             buf = b""
     883 
     884         return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
     885 
     886     @classmethod
     887     def create_pax_global_header(cls, pax_headers):
     888         """Return the object as a pax global header block sequence.
     889         """
     890         return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
     891 
     892     def _posix_split_name(self, name, encoding, errors):
     893         """Split a name longer than 100 chars into a prefix
     894            and a name part.
     895         """
     896         components = name.split("/")
     897         for i in range(1, len(components)):
     898             prefix = "/".join(components[:i])
     899             name = "/".join(components[i:])
     900             if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and 
     901                     len(name.encode(encoding, errors)) <= LENGTH_NAME:
     902                 break
     903         else:
     904             raise ValueError("name is too long")
     905 
     906         return prefix, name
     907 
     908     @staticmethod
     909     def _create_header(info, format, encoding, errors):
     910         """Return a header block. info is a dictionary with file
     911            information, format must be one of the *_FORMAT constants.
     912         """
     913         parts = [
     914             stn(info.get("name", ""), 100, encoding, errors),
     915             itn(info.get("mode", 0) & 0o7777, 8, format),
     916             itn(info.get("uid", 0), 8, format),
     917             itn(info.get("gid", 0), 8, format),
     918             itn(info.get("size", 0), 12, format),
     919             itn(info.get("mtime", 0), 12, format),
     920             b"        ", # checksum field
     921             info.get("type", REGTYPE),
     922             stn(info.get("linkname", ""), 100, encoding, errors),
     923             info.get("magic", POSIX_MAGIC),
     924             stn(info.get("uname", ""), 32, encoding, errors),
     925             stn(info.get("gname", ""), 32, encoding, errors),
     926             itn(info.get("devmajor", 0), 8, format),
     927             itn(info.get("devminor", 0), 8, format),
     928             stn(info.get("prefix", ""), 155, encoding, errors)
     929         ]
     930 
     931         buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
     932         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
     933         buf = buf[:-364] + bytes("%06o" % chksum, "ascii") + buf[-357:]
     934         return buf
     935 
     936     @staticmethod
     937     def _create_payload(payload):
     938         """Return the string payload filled with zero bytes
     939            up to the next 512 byte border.
     940         """
     941         blocks, remainder = divmod(len(payload), BLOCKSIZE)
     942         if remainder > 0:
     943             payload += (BLOCKSIZE - remainder) * NUL
     944         return payload
     945 
     946     @classmethod
     947     def _create_gnu_long_header(cls, name, type, encoding, errors):
     948         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
     949            for name.
     950         """
     951         name = name.encode(encoding, errors) + NUL
     952 
     953         info = {}
     954         info["name"] = "././@LongLink"
     955         info["type"] = type
     956         info["size"] = len(name)
     957         info["magic"] = GNU_MAGIC
     958 
     959         # create extended header + name blocks.
     960         return cls._create_header(info, USTAR_FORMAT, encoding, errors) + 
     961                 cls._create_payload(name)
     962 
     963     @classmethod
     964     def _create_pax_generic_header(cls, pax_headers, type, encoding):
     965         """Return a POSIX.1-2008 extended or global header sequence
     966            that contains a list of keyword, value pairs. The values
     967            must be strings.
     968         """
     969         # Check if one of the fields contains surrogate characters and thereby
     970         # forces hdrcharset=BINARY, see _proc_pax() for more information.
     971         binary = False
     972         for keyword, value in pax_headers.items():
     973             try:
     974                 value.encode("utf-8", "strict")
     975             except UnicodeEncodeError:
     976                 binary = True
     977                 break
     978 
     979         records = b""
     980         if binary:
     981             # Put the hdrcharset field at the beginning of the header.
     982             records += b"21 hdrcharset=BINARY
    "
     983 
     984         for keyword, value in pax_headers.items():
     985             keyword = keyword.encode("utf-8")
     986             if binary:
     987                 # Try to restore the original byte representation of `value'.
     988                 # Needless to say, that the encoding must match the string.
     989                 value = value.encode(encoding, "surrogateescape")
     990             else:
     991                 value = value.encode("utf-8")
     992 
     993             l = len(keyword) + len(value) + 3   # ' ' + '=' + '
    '
     994             n = p = 0
     995             while True:
     996                 n = l + len(str(p))
     997                 if n == p:
     998                     break
     999                 p = n
    1000             records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"
    "
    1001 
    1002         # We use a hardcoded "././@PaxHeader" name like star does
    1003         # instead of the one that POSIX recommends.
    1004         info = {}
    1005         info["name"] = "././@PaxHeader"
    1006         info["type"] = type
    1007         info["size"] = len(records)
    1008         info["magic"] = POSIX_MAGIC
    1009 
    1010         # Create pax header + record blocks.
    1011         return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + 
    1012                 cls._create_payload(records)
    1013 
    1014     @classmethod
    1015     def frombuf(cls, buf, encoding, errors):
    1016         """Construct a TarInfo object from a 512 byte bytes object.
    1017         """
    1018         if len(buf) == 0:
    1019             raise EmptyHeaderError("empty header")
    1020         if len(buf) != BLOCKSIZE:
    1021             raise TruncatedHeaderError("truncated header")
    1022         if buf.count(NUL) == BLOCKSIZE:
    1023             raise EOFHeaderError("end of file header")
    1024 
    1025         chksum = nti(buf[148:156])
    1026         if chksum not in calc_chksums(buf):
    1027             raise InvalidHeaderError("bad checksum")
    1028 
    1029         obj = cls()
    1030         obj.name = nts(buf[0:100], encoding, errors)
    1031         obj.mode = nti(buf[100:108])
    1032         obj.uid = nti(buf[108:116])
    1033         obj.gid = nti(buf[116:124])
    1034         obj.size = nti(buf[124:136])
    1035         obj.mtime = nti(buf[136:148])
    1036         obj.chksum = chksum
    1037         obj.type = buf[156:157]
    1038         obj.linkname = nts(buf[157:257], encoding, errors)
    1039         obj.uname = nts(buf[265:297], encoding, errors)
    1040         obj.gname = nts(buf[297:329], encoding, errors)
    1041         obj.devmajor = nti(buf[329:337])
    1042         obj.devminor = nti(buf[337:345])
    1043         prefix = nts(buf[345:500], encoding, errors)
    1044 
    1045         # Old V7 tar format represents a directory as a regular
    1046         # file with a trailing slash.
    1047         if obj.type == AREGTYPE and obj.name.endswith("/"):
    1048             obj.type = DIRTYPE
    1049 
    1050         # The old GNU sparse format occupies some of the unused
    1051         # space in the buffer for up to 4 sparse structures.
    1052         # Save the them for later processing in _proc_sparse().
    1053         if obj.type == GNUTYPE_SPARSE:
    1054             pos = 386
    1055             structs = []
    1056             for i in range(4):
    1057                 try:
    1058                     offset = nti(buf[pos:pos + 12])
    1059                     numbytes = nti(buf[pos + 12:pos + 24])
    1060                 except ValueError:
    1061                     break
    1062                 structs.append((offset, numbytes))
    1063                 pos += 24
    1064             isextended = bool(buf[482])
    1065             origsize = nti(buf[483:495])
    1066             obj._sparse_structs = (structs, isextended, origsize)
    1067 
    1068         # Remove redundant slashes from directories.
    1069         if obj.isdir():
    1070             obj.name = obj.name.rstrip("/")
    1071 
    1072         # Reconstruct a ustar longname.
    1073         if prefix and obj.type not in GNU_TYPES:
    1074             obj.name = prefix + "/" + obj.name
    1075         return obj
    1076 
    1077     @classmethod
    1078     def fromtarfile(cls, tarfile):
    1079         """Return the next TarInfo object from TarFile object
    1080            tarfile.
    1081         """
    1082         buf = tarfile.fileobj.read(BLOCKSIZE)
    1083         obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
    1084         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
    1085         return obj._proc_member(tarfile)
    1086 
    1087     #--------------------------------------------------------------------------
    1088     # The following are methods that are called depending on the type of a
    1089     # member. The entry point is _proc_member() which can be overridden in a
    1090     # subclass to add custom _proc_*() methods. A _proc_*() method MUST
    1091     # implement the following
    1092     # operations:
    1093     # 1. Set self.offset_data to the position where the data blocks begin,
    1094     #    if there is data that follows.
    1095     # 2. Set tarfile.offset to the position where the next member's header will
    1096     #    begin.
    1097     # 3. Return self or another valid TarInfo object.
    1098     def _proc_member(self, tarfile):
    1099         """Choose the right processing method depending on
    1100            the type and call it.
    1101         """
    1102         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
    1103             return self._proc_gnulong(tarfile)
    1104         elif self.type == GNUTYPE_SPARSE:
    1105             return self._proc_sparse(tarfile)
    1106         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
    1107             return self._proc_pax(tarfile)
    1108         else:
    1109             return self._proc_builtin(tarfile)
    1110 
    1111     def _proc_builtin(self, tarfile):
    1112         """Process a builtin type or an unknown type which
    1113            will be treated as a regular file.
    1114         """
    1115         self.offset_data = tarfile.fileobj.tell()
    1116         offset = self.offset_data
    1117         if self.isreg() or self.type not in SUPPORTED_TYPES:
    1118             # Skip the following data blocks.
    1119             offset += self._block(self.size)
    1120         tarfile.offset = offset
    1121 
    1122         # Patch the TarInfo object with saved global
    1123         # header information.
    1124         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
    1125 
    1126         return self
    1127 
    1128     def _proc_gnulong(self, tarfile):
    1129         """Process the blocks that hold a GNU longname
    1130            or longlink member.
    1131         """
    1132         buf = tarfile.fileobj.read(self._block(self.size))
    1133 
    1134         # Fetch the next header and process it.
    1135         try:
    1136             next = self.fromtarfile(tarfile)
    1137         except HeaderError:
    1138             raise SubsequentHeaderError("missing or bad subsequent header")
    1139 
    1140         # Patch the TarInfo object from the next header with
    1141         # the longname information.
    1142         next.offset = self.offset
    1143         if self.type == GNUTYPE_LONGNAME:
    1144             next.name = nts(buf, tarfile.encoding, tarfile.errors)
    1145         elif self.type == GNUTYPE_LONGLINK:
    1146             next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
    1147 
    1148         return next
    1149 
    1150     def _proc_sparse(self, tarfile):
    1151         """Process a GNU sparse header plus extra headers.
    1152         """
    1153         # We already collected some sparse structures in frombuf().
    1154         structs, isextended, origsize = self._sparse_structs
    1155         del self._sparse_structs
    1156 
    1157         # Collect sparse structures from extended header blocks.
    1158         while isextended:
    1159             buf = tarfile.fileobj.read(BLOCKSIZE)
    1160             pos = 0
    1161             for i in range(21):
    1162                 try:
    1163                     offset = nti(buf[pos:pos + 12])
    1164                     numbytes = nti(buf[pos + 12:pos + 24])
    1165                 except ValueError:
    1166                     break
    1167                 if offset and numbytes:
    1168                     structs.append((offset, numbytes))
    1169                 pos += 24
    1170             isextended = bool(buf[504])
    1171         self.sparse = structs
    1172 
    1173         self.offset_data = tarfile.fileobj.tell()
    1174         tarfile.offset = self.offset_data + self._block(self.size)
    1175         self.size = origsize
    1176         return self
    1177 
    1178     def _proc_pax(self, tarfile):
    1179         """Process an extended or global header as described in
    1180            POSIX.1-2008.
    1181         """
    1182         # Read the header information.
    1183         buf = tarfile.fileobj.read(self._block(self.size))
    1184 
    1185         # A pax header stores supplemental information for either
    1186         # the following file (extended) or all following files
    1187         # (global).
    1188         if self.type == XGLTYPE:
    1189             pax_headers = tarfile.pax_headers
    1190         else:
    1191             pax_headers = tarfile.pax_headers.copy()
    1192 
    1193         # Check if the pax header contains a hdrcharset field. This tells us
    1194         # the encoding of the path, linkpath, uname and gname fields. Normally,
    1195         # these fields are UTF-8 encoded but since POSIX.1-2008 tar
    1196         # implementations are allowed to store them as raw binary strings if
    1197         # the translation to UTF-8 fails.
    1198         match = re.search(br"d+ hdrcharset=([^
    ]+)
    ", buf)
    1199         if match is not None:
    1200             pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
    1201 
    1202         # For the time being, we don't care about anything other than "BINARY".
    1203         # The only other value that is currently allowed by the standard is
    1204         # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
    1205         hdrcharset = pax_headers.get("hdrcharset")
    1206         if hdrcharset == "BINARY":
    1207             encoding = tarfile.encoding
    1208         else:
    1209             encoding = "utf-8"
    1210 
    1211         # Parse pax header information. A record looks like that:
    1212         # "%d %s=%s
    " % (length, keyword, value). length is the size
    1213         # of the complete record including the length field itself and
    1214         # the newline. keyword and value are both UTF-8 encoded strings.
    1215         regex = re.compile(br"(d+) ([^=]+)=")
    1216         pos = 0
    1217         while True:
    1218             match = regex.match(buf, pos)
    1219             if not match:
    1220                 break
    1221 
    1222             length, keyword = match.groups()
    1223             length = int(length)
    1224             value = buf[match.end(2) + 1:match.start(1) + length - 1]
    1225 
    1226             # Normally, we could just use "utf-8" as the encoding and "strict"
    1227             # as the error handler, but we better not take the risk. For
    1228             # example, GNU tar <= 1.23 is known to store filenames it cannot
    1229             # translate to UTF-8 as raw strings (unfortunately without a
    1230             # hdrcharset=BINARY header).
    1231             # We first try the strict standard encoding, and if that fails we
    1232             # fall back on the user's encoding and error handler.
    1233             keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
    1234                     tarfile.errors)
    1235             if keyword in PAX_NAME_FIELDS:
    1236                 value = self._decode_pax_field(value, encoding, tarfile.encoding,
    1237                         tarfile.errors)
    1238             else:
    1239                 value = self._decode_pax_field(value, "utf-8", "utf-8",
    1240                         tarfile.errors)
    1241 
    1242             pax_headers[keyword] = value
    1243             pos += length
    1244 
    1245         # Fetch the next header.
    1246         try:
    1247             next = self.fromtarfile(tarfile)
    1248         except HeaderError:
    1249             raise SubsequentHeaderError("missing or bad subsequent header")
    1250 
    1251         # Process GNU sparse information.
    1252         if "GNU.sparse.map" in pax_headers:
    1253             # GNU extended sparse format version 0.1.
    1254             self._proc_gnusparse_01(next, pax_headers)
    1255 
    1256         elif "GNU.sparse.size" in pax_headers:
    1257             # GNU extended sparse format version 0.0.
    1258             self._proc_gnusparse_00(next, pax_headers, buf)
    1259 
    1260         elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
    1261             # GNU extended sparse format version 1.0.
    1262             self._proc_gnusparse_10(next, pax_headers, tarfile)
    1263 
    1264         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
    1265             # Patch the TarInfo object with the extended header info.
    1266             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
    1267             next.offset = self.offset
    1268 
    1269             if "size" in pax_headers:
    1270                 # If the extended header replaces the size field,
    1271                 # we need to recalculate the offset where the next
    1272                 # header starts.
    1273                 offset = next.offset_data
    1274                 if next.isreg() or next.type not in SUPPORTED_TYPES:
    1275                     offset += next._block(next.size)
    1276                 tarfile.offset = offset
    1277 
    1278         return next
    1279 
    1280     def _proc_gnusparse_00(self, next, pax_headers, buf):
    1281         """Process a GNU tar extended sparse header, version 0.0.
    1282         """
    1283         offsets = []
    1284         for match in re.finditer(br"d+ GNU.sparse.offset=(d+)
    ", buf):
    1285             offsets.append(int(match.group(1)))
    1286         numbytes = []
    1287         for match in re.finditer(br"d+ GNU.sparse.numbytes=(d+)
    ", buf):
    1288             numbytes.append(int(match.group(1)))
    1289         next.sparse = list(zip(offsets, numbytes))
    1290 
    1291     def _proc_gnusparse_01(self, next, pax_headers):
    1292         """Process a GNU tar extended sparse header, version 0.1.
    1293         """
    1294         sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
    1295         next.sparse = list(zip(sparse[::2], sparse[1::2]))
    1296 
    1297     def _proc_gnusparse_10(self, next, pax_headers, tarfile):
    1298         """Process a GNU tar extended sparse header, version 1.0.
    1299         """
    1300         fields = None
    1301         sparse = []
    1302         buf = tarfile.fileobj.read(BLOCKSIZE)
    1303         fields, buf = buf.split(b"
    ", 1)
    1304         fields = int(fields)
    1305         while len(sparse) < fields * 2:
    1306             if b"
    " not in buf:
    1307                 buf += tarfile.fileobj.read(BLOCKSIZE)
    1308             number, buf = buf.split(b"
    ", 1)
    1309             sparse.append(int(number))
    1310         next.offset_data = tarfile.fileobj.tell()
    1311         next.sparse = list(zip(sparse[::2], sparse[1::2]))
    1312 
    1313     def _apply_pax_info(self, pax_headers, encoding, errors):
    1314         """Replace fields with supplemental information from a previous
    1315            pax extended or global header.
    1316         """
    1317         for keyword, value in pax_headers.items():
    1318             if keyword == "GNU.sparse.name":
    1319                 setattr(self, "path", value)
    1320             elif keyword == "GNU.sparse.size":
    1321                 setattr(self, "size", int(value))
    1322             elif keyword == "GNU.sparse.realsize":
    1323                 setattr(self, "size", int(value))
    1324             elif keyword in PAX_FIELDS:
    1325                 if keyword in PAX_NUMBER_FIELDS:
    1326                     try:
    1327                         value = PAX_NUMBER_FIELDS[keyword](value)
    1328                     except ValueError:
    1329                         value = 0
    1330                 if keyword == "path":
    1331                     value = value.rstrip("/")
    1332                 setattr(self, keyword, value)
    1333 
    1334         self.pax_headers = pax_headers.copy()
    1335 
    1336     def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
    1337         """Decode a single field from a pax record.
    1338         """
    1339         try:
    1340             return value.decode(encoding, "strict")
    1341         except UnicodeDecodeError:
    1342             return value.decode(fallback_encoding, fallback_errors)
    1343 
    1344     def _block(self, count):
    1345         """Round up a byte count by BLOCKSIZE and return it,
    1346            e.g. _block(834) => 1024.
    1347         """
    1348         blocks, remainder = divmod(count, BLOCKSIZE)
    1349         if remainder:
    1350             blocks += 1
    1351         return blocks * BLOCKSIZE
    1352 
    1353     def isreg(self):
    1354         return self.type in REGULAR_TYPES
    1355     def isfile(self):
    1356         return self.isreg()
    1357     def isdir(self):
    1358         return self.type == DIRTYPE
    1359     def issym(self):
    1360         return self.type == SYMTYPE
    1361     def islnk(self):
    1362         return self.type == LNKTYPE
    1363     def ischr(self):
    1364         return self.type == CHRTYPE
    1365     def isblk(self):
    1366         return self.type == BLKTYPE
    1367     def isfifo(self):
    1368         return self.type == FIFOTYPE
    1369     def issparse(self):
    1370         return self.sparse is not None
    1371     def isdev(self):
    1372         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
    1373 # class TarInfo
    1374 
    1375 class TarFile(object):
    1376     """The TarFile Class provides an interface to tar archives.
    1377     """
    1378 
    1379     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
    1380 
    1381     dereference = False         # If true, add content of linked file to the
    1382                                 # tar file, else the link.
    1383 
    1384     ignore_zeros = False        # If true, skips empty or invalid blocks and
    1385                                 # continues processing.
    1386 
    1387     errorlevel = 1              # If 0, fatal errors only appear in debug
    1388                                 # messages (if debug >= 0). If > 0, errors
    1389                                 # are passed to the caller as exceptions.
    1390 
    1391     format = DEFAULT_FORMAT     # The format to use when creating an archive.
    1392 
    1393     encoding = ENCODING         # Encoding for 8-bit character strings.
    1394 
    1395     errors = None               # Error handler for unicode conversion.
    1396 
    1397     tarinfo = TarInfo           # The default TarInfo class to use.
    1398 
    1399     fileobject = ExFileObject   # The file-object for extractfile().
    1400 
    1401     def __init__(self, name=None, mode="r", fileobj=None, format=None,
    1402             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
    1403             errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
    1404         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
    1405            read from an existing archive, 'a' to append data to an existing
    1406            file or 'w' to create a new file overwriting an existing one. `mode'
    1407            defaults to 'r'.
    1408            If `fileobj' is given, it is used for reading or writing data. If it
    1409            can be determined, `mode' is overridden by `fileobj's mode.
    1410            `fileobj' is not closed, when TarFile is closed.
    1411         """
    1412         modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
    1413         if mode not in modes:
    1414             raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
    1415         self.mode = mode
    1416         self._mode = modes[mode]
    1417 
    1418         if not fileobj:
    1419             if self.mode == "a" and not os.path.exists(name):
    1420                 # Create nonexistent files in append mode.
    1421                 self.mode = "w"
    1422                 self._mode = "wb"
    1423             fileobj = bltn_open(name, self._mode)
    1424             self._extfileobj = False
    1425         else:
    1426             if (name is None and hasattr(fileobj, "name") and
    1427                 isinstance(fileobj.name, (str, bytes))):
    1428                 name = fileobj.name
    1429             if hasattr(fileobj, "mode"):
    1430                 self._mode = fileobj.mode
    1431             self._extfileobj = True
    1432         self.name = os.path.abspath(name) if name else None
    1433         self.fileobj = fileobj
    1434 
    1435         # Init attributes.
    1436         if format is not None:
    1437             self.format = format
    1438         if tarinfo is not None:
    1439             self.tarinfo = tarinfo
    1440         if dereference is not None:
    1441             self.dereference = dereference
    1442         if ignore_zeros is not None:
    1443             self.ignore_zeros = ignore_zeros
    1444         if encoding is not None:
    1445             self.encoding = encoding
    1446         self.errors = errors
    1447 
    1448         if pax_headers is not None and self.format == PAX_FORMAT:
    1449             self.pax_headers = pax_headers
    1450         else:
    1451             self.pax_headers = {}
    1452 
    1453         if debug is not None:
    1454             self.debug = debug
    1455         if errorlevel is not None:
    1456             self.errorlevel = errorlevel
    1457 
    1458         # Init datastructures.
    1459         self.closed = False
    1460         self.members = []       # list of members as TarInfo objects
    1461         self._loaded = False    # flag if all members have been read
    1462         self.offset = self.fileobj.tell()
    1463                                 # current position in the archive file
    1464         self.inodes = {}        # dictionary caching the inodes of
    1465                                 # archive members already added
    1466 
    1467         try:
    1468             if self.mode == "r":
    1469                 self.firstmember = None
    1470                 self.firstmember = self.next()
    1471 
    1472             if self.mode == "a":
    1473                 # Move to the end of the archive,
    1474                 # before the first empty block.
    1475                 while True:
    1476                     self.fileobj.seek(self.offset)
    1477                     try:
    1478                         tarinfo = self.tarinfo.fromtarfile(self)
    1479                         self.members.append(tarinfo)
    1480                     except EOFHeaderError:
    1481                         self.fileobj.seek(self.offset)
    1482                         break
    1483                     except HeaderError as e:
    1484                         raise ReadError(str(e))
    1485 
    1486             if self.mode in ("a", "w", "x"):
    1487                 self._loaded = True
    1488 
    1489                 if self.pax_headers:
    1490                     buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
    1491                     self.fileobj.write(buf)
    1492                     self.offset += len(buf)
    1493         except:
    1494             if not self._extfileobj:
    1495                 self.fileobj.close()
    1496             self.closed = True
    1497             raise
    1498 
    1499     #--------------------------------------------------------------------------
    1500     # Below are the classmethods which act as alternate constructors to the
    1501     # TarFile class. The open() method is the only one that is needed for
    1502     # public use; it is the "super"-constructor and is able to select an
    1503     # adequate "sub"-constructor for a particular compression using the mapping
    1504     # from OPEN_METH.
    1505     #
    1506     # This concept allows one to subclass TarFile without losing the comfort of
    1507     # the super-constructor. A sub-constructor is registered and made available
    1508     # by adding it to the mapping in OPEN_METH.
    1509 
    1510     @classmethod
    1511     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
    1512         """Open a tar archive for reading, writing or appending. Return
    1513            an appropriate TarFile class.
    1514 
    1515            mode:
    1516            'r' or 'r:*' open for reading with transparent compression
    1517            'r:'         open for reading exclusively uncompressed
    1518            'r:gz'       open for reading with gzip compression
    1519            'r:bz2'      open for reading with bzip2 compression
    1520            'r:xz'       open for reading with lzma compression
    1521            'a' or 'a:'  open for appending, creating the file if necessary
    1522            'w' or 'w:'  open for writing without compression
    1523            'w:gz'       open for writing with gzip compression
    1524            'w:bz2'      open for writing with bzip2 compression
    1525            'w:xz'       open for writing with lzma compression
    1526 
    1527            'x' or 'x:'  create a tarfile exclusively without compression, raise
    1528                         an exception if the file is already created
    1529            'x:gz'       create a gzip compressed tarfile, raise an exception
    1530                         if the file is already created
    1531            'x:bz2'      create a bzip2 compressed tarfile, raise an exception
    1532                         if the file is already created
    1533            'x:xz'       create an lzma compressed tarfile, raise an exception
    1534                         if the file is already created
    1535 
    1536            'r|*'        open a stream of tar blocks with transparent compression
    1537            'r|'         open an uncompressed stream of tar blocks for reading
    1538            'r|gz'       open a gzip compressed stream of tar blocks
    1539            'r|bz2'      open a bzip2 compressed stream of tar blocks
    1540            'r|xz'       open an lzma compressed stream of tar blocks
    1541            'w|'         open an uncompressed stream for writing
    1542            'w|gz'       open a gzip compressed stream for writing
    1543            'w|bz2'      open a bzip2 compressed stream for writing
    1544            'w|xz'       open an lzma compressed stream for writing
    1545         """
    1546 
    1547         if not name and not fileobj:
    1548             raise ValueError("nothing to open")
    1549 
    1550         if mode in ("r", "r:*"):
    1551             # Find out which *open() is appropriate for opening the file.
    1552             for comptype in cls.OPEN_METH:
    1553                 func = getattr(cls, cls.OPEN_METH[comptype])
    1554                 if fileobj is not None:
    1555                     saved_pos = fileobj.tell()
    1556                 try:
    1557                     return func(name, "r", fileobj, **kwargs)
    1558                 except (ReadError, CompressionError) as e:
    1559                     if fileobj is not None:
    1560                         fileobj.seek(saved_pos)
    1561                     continue
    1562             raise ReadError("file could not be opened successfully")
    1563 
    1564         elif ":" in mode:
    1565             filemode, comptype = mode.split(":", 1)
    1566             filemode = filemode or "r"
    1567             comptype = comptype or "tar"
    1568 
    1569             # Select the *open() function according to
    1570             # given compression.
    1571             if comptype in cls.OPEN_METH:
    1572                 func = getattr(cls, cls.OPEN_METH[comptype])
    1573             else:
    1574                 raise CompressionError("unknown compression type %r" % comptype)
    1575             return func(name, filemode, fileobj, **kwargs)
    1576 
    1577         elif "|" in mode:
    1578             filemode, comptype = mode.split("|", 1)
    1579             filemode = filemode or "r"
    1580             comptype = comptype or "tar"
    1581 
    1582             if filemode not in ("r", "w"):
    1583                 raise ValueError("mode must be 'r' or 'w'")
    1584 
    1585             stream = _Stream(name, filemode, comptype, fileobj, bufsize)
    1586             try:
    1587                 t = cls(name, filemode, stream, **kwargs)
    1588             except:
    1589                 stream.close()
    1590                 raise
    1591             t._extfileobj = False
    1592             return t
    1593 
    1594         elif mode in ("a", "w", "x"):
    1595             return cls.taropen(name, mode, fileobj, **kwargs)
    1596 
    1597         raise ValueError("undiscernible mode")
    1598 
    1599     @classmethod
    1600     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
    1601         """Open uncompressed tar archive name for reading or writing.
    1602         """
    1603         if mode not in ("r", "a", "w", "x"):
    1604             raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
    1605         return cls(name, mode, fileobj, **kwargs)
    1606 
    1607     @classmethod
    1608     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
    1609         """Open gzip compressed tar archive name for reading or writing.
    1610            Appending is not allowed.
    1611         """
    1612         if mode not in ("r", "w", "x"):
    1613             raise ValueError("mode must be 'r', 'w' or 'x'")
    1614 
    1615         try:
    1616             import gzip
    1617             gzip.GzipFile
    1618         except (ImportError, AttributeError):
    1619             raise CompressionError("gzip module is not available")
    1620 
    1621         try:
    1622             fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
    1623         except OSError:
    1624             if fileobj is not None and mode == 'r':
    1625                 raise ReadError("not a gzip file")
    1626             raise
    1627 
    1628         try:
    1629             t = cls.taropen(name, mode, fileobj, **kwargs)
    1630         except OSError:
    1631             fileobj.close()
    1632             if mode == 'r':
    1633                 raise ReadError("not a gzip file")
    1634             raise
    1635         except:
    1636             fileobj.close()
    1637             raise
    1638         t._extfileobj = False
    1639         return t
    1640 
    1641     @classmethod
    1642     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
    1643         """Open bzip2 compressed tar archive name for reading or writing.
    1644            Appending is not allowed.
    1645         """
    1646         if mode not in ("r", "w", "x"):
    1647             raise ValueError("mode must be 'r', 'w' or 'x'")
    1648 
    1649         try:
    1650             import bz2
    1651         except ImportError:
    1652             raise CompressionError("bz2 module is not available")
    1653 
    1654         fileobj = bz2.BZ2File(fileobj or name, mode,
    1655                               compresslevel=compresslevel)
    1656 
    1657         try:
    1658             t = cls.taropen(name, mode, fileobj, **kwargs)
    1659         except (OSError, EOFError):
    1660             fileobj.close()
    1661             if mode == 'r':
    1662                 raise ReadError("not a bzip2 file")
    1663             raise
    1664         except:
    1665             fileobj.close()
    1666             raise
    1667         t._extfileobj = False
    1668         return t
    1669 
    1670     @classmethod
    1671     def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
    1672         """Open lzma compressed tar archive name for reading or writing.
    1673            Appending is not allowed.
    1674         """
    1675         if mode not in ("r", "w", "x"):
    1676             raise ValueError("mode must be 'r', 'w' or 'x'")
    1677 
    1678         try:
    1679             import lzma
    1680         except ImportError:
    1681             raise CompressionError("lzma module is not available")
    1682 
    1683         fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
    1684 
    1685         try:
    1686             t = cls.taropen(name, mode, fileobj, **kwargs)
    1687         except (lzma.LZMAError, EOFError):
    1688             fileobj.close()
    1689             if mode == 'r':
    1690                 raise ReadError("not an lzma file")
    1691             raise
    1692         except:
    1693             fileobj.close()
    1694             raise
    1695         t._extfileobj = False
    1696         return t
    1697 
    1698     # All *open() methods are registered here.
    1699     OPEN_METH = {
    1700         "tar": "taropen",   # uncompressed tar
    1701         "gz":  "gzopen",    # gzip compressed tar
    1702         "bz2": "bz2open",   # bzip2 compressed tar
    1703         "xz":  "xzopen"     # lzma compressed tar
    1704     }
    1705 
    1706     #--------------------------------------------------------------------------
    1707     # The public methods which TarFile provides:
    1708 
    1709     def close(self):
    1710         """Close the TarFile. In write-mode, two finishing zero blocks are
    1711            appended to the archive.
    1712         """
    1713         if self.closed:
    1714             return
    1715 
    1716         self.closed = True
    1717         try:
    1718             if self.mode in ("a", "w", "x"):
    1719                 self.fileobj.write(NUL * (BLOCKSIZE * 2))
    1720                 self.offset += (BLOCKSIZE * 2)
    1721                 # fill up the end with zero-blocks
    1722                 # (like option -b20 for tar does)
    1723                 blocks, remainder = divmod(self.offset, RECORDSIZE)
    1724                 if remainder > 0:
    1725                     self.fileobj.write(NUL * (RECORDSIZE - remainder))
    1726         finally:
    1727             if not self._extfileobj:
    1728                 self.fileobj.close()
    1729 
    1730     def getmember(self, name):
    1731         """Return a TarInfo object for member `name'. If `name' can not be
    1732            found in the archive, KeyError is raised. If a member occurs more
    1733            than once in the archive, its last occurrence is assumed to be the
    1734            most up-to-date version.
    1735         """
    1736         tarinfo = self._getmember(name)
    1737         if tarinfo is None:
    1738             raise KeyError("filename %r not found" % name)
    1739         return tarinfo
    1740 
    1741     def getmembers(self):
    1742         """Return the members of the archive as a list of TarInfo objects. The
    1743            list has the same order as the members in the archive.
    1744         """
    1745         self._check()
    1746         if not self._loaded:    # if we want to obtain a list of
    1747             self._load()        # all members, we first have to
    1748                                 # scan the whole archive.
    1749         return self.members
    1750 
    1751     def getnames(self):
    1752         """Return the members of the archive as a list of their names. It has
    1753            the same order as the list returned by getmembers().
    1754         """
    1755         return [tarinfo.name for tarinfo in self.getmembers()]
    1756 
    1757     def gettarinfo(self, name=None, arcname=None, fileobj=None):
    1758         """Create a TarInfo object from the result of os.stat or equivalent
    1759            on an existing file. The file is either named by `name', or
    1760            specified as a file object `fileobj' with a file descriptor. If
    1761            given, `arcname' specifies an alternative name for the file in the
    1762            archive, otherwise, the name is taken from the 'name' attribute of
    1763            'fileobj', or the 'name' argument. The name should be a text
    1764            string.
    1765         """
    1766         self._check("awx")
    1767 
    1768         # When fileobj is given, replace name by
    1769         # fileobj's real name.
    1770         if fileobj is not None:
    1771             name = fileobj.name
    1772 
    1773         # Building the name of the member in the archive.
    1774         # Backward slashes are converted to forward slashes,
    1775         # Absolute paths are turned to relative paths.
    1776         if arcname is None:
    1777             arcname = name
    1778         drv, arcname = os.path.splitdrive(arcname)
    1779         arcname = arcname.replace(os.sep, "/")
    1780         arcname = arcname.lstrip("/")
    1781 
    1782         # Now, fill the TarInfo object with
    1783         # information specific for the file.
    1784         tarinfo = self.tarinfo()
    1785         tarinfo.tarfile = self  # Not needed
    1786 
    1787         # Use os.stat or os.lstat, depending on platform
    1788         # and if symlinks shall be resolved.
    1789         if fileobj is None:
    1790             if hasattr(os, "lstat") and not self.dereference:
    1791                 statres = os.lstat(name)
    1792             else:
    1793                 statres = os.stat(name)
    1794         else:
    1795             statres = os.fstat(fileobj.fileno())
    1796         linkname = ""
    1797 
    1798         stmd = statres.st_mode
    1799         if stat.S_ISREG(stmd):
    1800             inode = (statres.st_ino, statres.st_dev)
    1801             if not self.dereference and statres.st_nlink > 1 and 
    1802                     inode in self.inodes and arcname != self.inodes[inode]:
    1803                 # Is it a hardlink to an already
    1804                 # archived file?
    1805                 type = LNKTYPE
    1806                 linkname = self.inodes[inode]
    1807             else:
    1808                 # The inode is added only if its valid.
    1809                 # For win32 it is always 0.
    1810                 type = REGTYPE
    1811                 if inode[0]:
    1812                     self.inodes[inode] = arcname
    1813         elif stat.S_ISDIR(stmd):
    1814             type = DIRTYPE
    1815         elif stat.S_ISFIFO(stmd):
    1816             type = FIFOTYPE
    1817         elif stat.S_ISLNK(stmd):
    1818             type = SYMTYPE
    1819             linkname = os.readlink(name)
    1820         elif stat.S_ISCHR(stmd):
    1821             type = CHRTYPE
    1822         elif stat.S_ISBLK(stmd):
    1823             type = BLKTYPE
    1824         else:
    1825             return None
    1826 
    1827         # Fill the TarInfo object with all
    1828         # information we can get.
    1829         tarinfo.name = arcname
    1830         tarinfo.mode = stmd
    1831         tarinfo.uid = statres.st_uid
    1832         tarinfo.gid = statres.st_gid
    1833         if type == REGTYPE:
    1834             tarinfo.size = statres.st_size
    1835         else:
    1836             tarinfo.size = 0
    1837         tarinfo.mtime = statres.st_mtime
    1838         tarinfo.type = type
    1839         tarinfo.linkname = linkname
    1840         if pwd:
    1841             try:
    1842                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
    1843             except KeyError:
    1844                 pass
    1845         if grp:
    1846             try:
    1847                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
    1848             except KeyError:
    1849                 pass
    1850 
    1851         if type in (CHRTYPE, BLKTYPE):
    1852             if hasattr(os, "major") and hasattr(os, "minor"):
    1853                 tarinfo.devmajor = os.major(statres.st_rdev)
    1854                 tarinfo.devminor = os.minor(statres.st_rdev)
    1855         return tarinfo
    1856 
    1857     def list(self, verbose=True, *, members=None):
    1858         """Print a table of contents to sys.stdout. If `verbose' is False, only
    1859            the names of the members are printed. If it is True, an `ls -l'-like
    1860            output is produced. `members' is optional and must be a subset of the
    1861            list returned by getmembers().
    1862         """
    1863         self._check()
    1864 
    1865         if members is None:
    1866             members = self
    1867         for tarinfo in members:
    1868             if verbose:
    1869                 _safe_print(stat.filemode(tarinfo.mode))
    1870                 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
    1871                                        tarinfo.gname or tarinfo.gid))
    1872                 if tarinfo.ischr() or tarinfo.isblk():
    1873                     _safe_print("%10s" %
    1874                             ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
    1875                 else:
    1876                     _safe_print("%10d" % tarinfo.size)
    1877                 _safe_print("%d-%02d-%02d %02d:%02d:%02d" 
    1878                             % time.localtime(tarinfo.mtime)[:6])
    1879 
    1880             _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
    1881 
    1882             if verbose:
    1883                 if tarinfo.issym():
    1884                     _safe_print("-> " + tarinfo.linkname)
    1885                 if tarinfo.islnk():
    1886                     _safe_print("link to " + tarinfo.linkname)
    1887             print()
    1888 
    1889     def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
    1890         """Add the file `name' to the archive. `name' may be any type of file
    1891            (directory, fifo, symbolic link, etc.). If given, `arcname'
    1892            specifies an alternative name for the file in the archive.
    1893            Directories are added recursively by default. This can be avoided by
    1894            setting `recursive' to False. `exclude' is a function that should
    1895            return True for each filename to be excluded. `filter' is a function
    1896            that expects a TarInfo object argument and returns the changed
    1897            TarInfo object, if it returns None the TarInfo object will be
    1898            excluded from the archive.
    1899         """
    1900         self._check("awx")
    1901 
    1902         if arcname is None:
    1903             arcname = name
    1904 
    1905         # Exclude pathnames.
    1906         if exclude is not None:
    1907             import warnings
    1908             warnings.warn("use the filter argument instead",
    1909                     DeprecationWarning, 2)
    1910             if exclude(name):
    1911                 self._dbg(2, "tarfile: Excluded %r" % name)
    1912                 return
    1913 
    1914         # Skip if somebody tries to archive the archive...
    1915         if self.name is not None and os.path.abspath(name) == self.name:
    1916             self._dbg(2, "tarfile: Skipped %r" % name)
    1917             return
    1918 
    1919         self._dbg(1, name)
    1920 
    1921         # Create a TarInfo object from the file.
    1922         tarinfo = self.gettarinfo(name, arcname)
    1923 
    1924         if tarinfo is None:
    1925             self._dbg(1, "tarfile: Unsupported type %r" % name)
    1926             return
    1927 
    1928         # Change or exclude the TarInfo object.
    1929         if filter is not None:
    1930             tarinfo = filter(tarinfo)
    1931             if tarinfo is None:
    1932                 self._dbg(2, "tarfile: Excluded %r" % name)
    1933                 return
    1934 
    1935         # Append the tar header and data to the archive.
    1936         if tarinfo.isreg():
    1937             with bltn_open(name, "rb") as f:
    1938                 self.addfile(tarinfo, f)
    1939 
    1940         elif tarinfo.isdir():
    1941             self.addfile(tarinfo)
    1942             if recursive:
    1943                 for f in os.listdir(name):
    1944                     self.add(os.path.join(name, f), os.path.join(arcname, f),
    1945                             recursive, exclude, filter=filter)
    1946 
    1947         else:
    1948             self.addfile(tarinfo)
    1949 
    1950     def addfile(self, tarinfo, fileobj=None):
    1951         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
    1952            given, it should be a binary file, and tarinfo.size bytes are read
    1953            from it and added to the archive. You can create TarInfo objects
    1954            directly, or by using gettarinfo().
    1955         """
    1956         self._check("awx")
    1957 
    1958         tarinfo = copy.copy(tarinfo)
    1959 
    1960         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
    1961         self.fileobj.write(buf)
    1962         self.offset += len(buf)
    1963 
    1964         # If there's data to follow, append it.
    1965         if fileobj is not None:
    1966             copyfileobj(fileobj, self.fileobj, tarinfo.size)
    1967             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
    1968             if remainder > 0:
    1969                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
    1970                 blocks += 1
    1971             self.offset += blocks * BLOCKSIZE
    1972 
    1973         self.members.append(tarinfo)
    1974 
    1975     def extractall(self, path=".", members=None, *, numeric_owner=False):
    1976         """Extract all members from the archive to the current working
    1977            directory and set owner, modification time and permissions on
    1978            directories afterwards. `path' specifies a different directory
    1979            to extract to. `members' is optional and must be a subset of the
    1980            list returned by getmembers(). If `numeric_owner` is True, only
    1981            the numbers for user/group names are used and not the names.
    1982         """
    1983         directories = []
    1984 
    1985         if members is None:
    1986             members = self
    1987 
    1988         for tarinfo in members:
    1989             if tarinfo.isdir():
    1990                 # Extract directories with a safe mode.
    1991                 directories.append(tarinfo)
    1992                 tarinfo = copy.copy(tarinfo)
    1993                 tarinfo.mode = 0o700
    1994             # Do not set_attrs directories, as we will do that further down
    1995             self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
    1996                          numeric_owner=numeric_owner)
    1997 
    1998         # Reverse sort directories.
    1999         directories.sort(key=lambda a: a.name)
    2000         directories.reverse()
    2001 
    2002         # Set correct owner, mtime and filemode on directories.
    2003         for tarinfo in directories:
    2004             dirpath = os.path.join(path, tarinfo.name)
    2005             try:
    2006                 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
    2007                 self.utime(tarinfo, dirpath)
    2008                 self.chmod(tarinfo, dirpath)
    2009             except ExtractError as e:
    2010                 if self.errorlevel > 1:
    2011                     raise
    2012                 else:
    2013                     self._dbg(1, "tarfile: %s" % e)
    2014 
    2015     def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
    2016         """Extract a member from the archive to the current working directory,
    2017            using its full name. Its file information is extracted as accurately
    2018            as possible. `member' may be a filename or a TarInfo object. You can
    2019            specify a different directory using `path'. File attributes (owner,
    2020            mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
    2021            is True, only the numbers for user/group names are used and not
    2022            the names.
    2023         """
    2024         self._check("r")
    2025 
    2026         if isinstance(member, str):
    2027             tarinfo = self.getmember(member)
    2028         else:
    2029             tarinfo = member
    2030 
    2031         # Prepare the link target for makelink().
    2032         if tarinfo.islnk():
    2033             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
    2034 
    2035         try:
    2036             self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
    2037                                  set_attrs=set_attrs,
    2038                                  numeric_owner=numeric_owner)
    2039         except OSError as e:
    2040             if self.errorlevel > 0:
    2041                 raise
    2042             else:
    2043                 if e.filename is None:
    2044                     self._dbg(1, "tarfile: %s" % e.strerror)
    2045                 else:
    2046                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
    2047         except ExtractError as e:
    2048             if self.errorlevel > 1:
    2049                 raise
    2050             else:
    2051                 self._dbg(1, "tarfile: %s" % e)
    2052 
    2053     def extractfile(self, member):
    2054         """Extract a member from the archive as a file object. `member' may be
    2055            a filename or a TarInfo object. If `member' is a regular file or a
    2056            link, an io.BufferedReader object is returned. Otherwise, None is
    2057            returned.
    2058         """
    2059         self._check("r")
    2060 
    2061         if isinstance(member, str):
    2062             tarinfo = self.getmember(member)
    2063         else:
    2064             tarinfo = member
    2065 
    2066         if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
    2067             # Members with unknown types are treated as regular files.
    2068             return self.fileobject(self, tarinfo)
    2069 
    2070         elif tarinfo.islnk() or tarinfo.issym():
    2071             if isinstance(self.fileobj, _Stream):
    2072                 # A small but ugly workaround for the case that someone tries
    2073                 # to extract a (sym)link as a file-object from a non-seekable
    2074                 # stream of tar blocks.
    2075                 raise StreamError("cannot extract (sym)link as file object")
    2076             else:
    2077                 # A (sym)link's file object is its target's file object.
    2078                 return self.extractfile(self._find_link_target(tarinfo))
    2079         else:
    2080             # If there's no data associated with the member (directory, chrdev,
    2081             # blkdev, etc.), return None instead of a file object.
    2082             return None
    2083 
    2084     def _extract_member(self, tarinfo, targetpath, set_attrs=True,
    2085                         numeric_owner=False):
    2086         """Extract the TarInfo object tarinfo to a physical
    2087            file called targetpath.
    2088         """
    2089         # Fetch the TarInfo object for the given name
    2090         # and build the destination pathname, replacing
    2091         # forward slashes to platform specific separators.
    2092         targetpath = targetpath.rstrip("/")
    2093         targetpath = targetpath.replace("/", os.sep)
    2094 
    2095         # Create all upper directories.
    2096         upperdirs = os.path.dirname(targetpath)
    2097         if upperdirs and not os.path.exists(upperdirs):
    2098             # Create directories that are not part of the archive with
    2099             # default permissions.
    2100             os.makedirs(upperdirs)
    2101 
    2102         if tarinfo.islnk() or tarinfo.issym():
    2103             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
    2104         else:
    2105             self._dbg(1, tarinfo.name)
    2106 
    2107         if tarinfo.isreg():
    2108             self.makefile(tarinfo, targetpath)
    2109         elif tarinfo.isdir():
    2110             self.makedir(tarinfo, targetpath)
    2111         elif tarinfo.isfifo():
    2112             self.makefifo(tarinfo, targetpath)
    2113         elif tarinfo.ischr() or tarinfo.isblk():
    2114             self.makedev(tarinfo, targetpath)
    2115         elif tarinfo.islnk() or tarinfo.issym():
    2116             self.makelink(tarinfo, targetpath)
    2117         elif tarinfo.type not in SUPPORTED_TYPES:
    2118             self.makeunknown(tarinfo, targetpath)
    2119         else:
    2120             self.makefile(tarinfo, targetpath)
    2121 
    2122         if set_attrs:
    2123             self.chown(tarinfo, targetpath, numeric_owner)
    2124             if not tarinfo.issym():
    2125                 self.chmod(tarinfo, targetpath)
    2126                 self.utime(tarinfo, targetpath)
    2127 
    2128     #--------------------------------------------------------------------------
    2129     # Below are the different file methods. They are called via
    2130     # _extract_member() when extract() is called. They can be replaced in a
    2131     # subclass to implement other functionality.
    2132 
    2133     def makedir(self, tarinfo, targetpath):
    2134         """Make a directory called targetpath.
    2135         """
    2136         try:
    2137             # Use a safe mode for the directory, the real mode is set
    2138             # later in _extract_member().
    2139             os.mkdir(targetpath, 0o700)
    2140         except FileExistsError:
    2141             pass
    2142 
    2143     def makefile(self, tarinfo, targetpath):
    2144         """Make a file called targetpath.
    2145         """
    2146         source = self.fileobj
    2147         source.seek(tarinfo.offset_data)
    2148         with bltn_open(targetpath, "wb") as target:
    2149             if tarinfo.sparse is not None:
    2150                 for offset, size in tarinfo.sparse:
    2151                     target.seek(offset)
    2152                     copyfileobj(source, target, size, ReadError)
    2153                 target.seek(tarinfo.size)
    2154                 target.truncate()
    2155             else:
    2156                 copyfileobj(source, target, tarinfo.size, ReadError)
    2157 
    2158     def makeunknown(self, tarinfo, targetpath):
    2159         """Make a file from a TarInfo object with an unknown type
    2160            at targetpath.
    2161         """
    2162         self.makefile(tarinfo, targetpath)
    2163         self._dbg(1, "tarfile: Unknown file type %r, " 
    2164                      "extracted as regular file." % tarinfo.type)
    2165 
    2166     def makefifo(self, tarinfo, targetpath):
    2167         """Make a fifo called targetpath.
    2168         """
    2169         if hasattr(os, "mkfifo"):
    2170             os.mkfifo(targetpath)
    2171         else:
    2172             raise ExtractError("fifo not supported by system")
    2173 
    2174     def makedev(self, tarinfo, targetpath):
    2175         """Make a character or block device called targetpath.
    2176         """
    2177         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
    2178             raise ExtractError("special devices not supported by system")
    2179 
    2180         mode = tarinfo.mode
    2181         if tarinfo.isblk():
    2182             mode |= stat.S_IFBLK
    2183         else:
    2184             mode |= stat.S_IFCHR
    2185 
    2186         os.mknod(targetpath, mode,
    2187                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
    2188 
    2189     def makelink(self, tarinfo, targetpath):
    2190         """Make a (symbolic) link called targetpath. If it cannot be created
    2191           (platform limitation), we try to make a copy of the referenced file
    2192           instead of a link.
    2193         """
    2194         try:
    2195             # For systems that support symbolic and hard links.
    2196             if tarinfo.issym():
    2197                 os.symlink(tarinfo.linkname, targetpath)
    2198             else:
    2199                 # See extract().
    2200                 if os.path.exists(tarinfo._link_target):
    2201                     os.link(tarinfo._link_target, targetpath)
    2202                 else:
    2203                     self._extract_member(self._find_link_target(tarinfo),
    2204                                          targetpath)
    2205         except symlink_exception:
    2206             try:
    2207                 self._extract_member(self._find_link_target(tarinfo),
    2208                                      targetpath)
    2209             except KeyError:
    2210                 raise ExtractError("unable to resolve link inside archive")
    2211 
    2212     def chown(self, tarinfo, targetpath, numeric_owner):
    2213         """Set owner of targetpath according to tarinfo. If numeric_owner
    2214            is True, use .gid/.uid instead of .gname/.uname.
    2215         """
    2216         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
    2217             # We have to be root to do so.
    2218             if numeric_owner:
    2219                 g = tarinfo.gid
    2220                 u = tarinfo.uid
    2221             else:
    2222                 try:
    2223                     g = grp.getgrnam(tarinfo.gname)[2]
    2224                 except KeyError:
    2225                     g = tarinfo.gid
    2226                 try:
    2227                     u = pwd.getpwnam(tarinfo.uname)[2]
    2228                 except KeyError:
    2229                     u = tarinfo.uid
    2230             try:
    2231                 if tarinfo.issym() and hasattr(os, "lchown"):
    2232                     os.lchown(targetpath, u, g)
    2233                 else:
    2234                     os.chown(targetpath, u, g)
    2235             except OSError as e:
    2236                 raise ExtractError("could not change owner")
    2237 
    2238     def chmod(self, tarinfo, targetpath):
    2239         """Set file permissions of targetpath according to tarinfo.
    2240         """
    2241         if hasattr(os, 'chmod'):
    2242             try:
    2243                 os.chmod(targetpath, tarinfo.mode)
    2244             except OSError as e:
    2245                 raise ExtractError("could not change mode")
    2246 
    2247     def utime(self, tarinfo, targetpath):
    2248         """Set modification time of targetpath according to tarinfo.
    2249         """
    2250         if not hasattr(os, 'utime'):
    2251             return
    2252         try:
    2253             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
    2254         except OSError as e:
    2255             raise ExtractError("could not change modification time")
    2256 
    2257     #--------------------------------------------------------------------------
    2258     def next(self):
    2259         """Return the next member of the archive as a TarInfo object, when
    2260            TarFile is opened for reading. Return None if there is no more
    2261            available.
    2262         """
    2263         self._check("ra")
    2264         if self.firstmember is not None:
    2265             m = self.firstmember
    2266             self.firstmember = None
    2267             return m
    2268 
    2269         # Advance the file pointer.
    2270         if self.offset != self.fileobj.tell():
    2271             self.fileobj.seek(self.offset - 1)
    2272             if not self.fileobj.read(1):
    2273                 raise ReadError("unexpected end of data")
    2274 
    2275         # Read the next block.
    2276         tarinfo = None
    2277         while True:
    2278             try:
    2279                 tarinfo = self.tarinfo.fromtarfile(self)
    2280             except EOFHeaderError as e:
    2281                 if self.ignore_zeros:
    2282                     self._dbg(2, "0x%X: %s" % (self.offset, e))
    2283                     self.offset += BLOCKSIZE
    2284                     continue
    2285             except InvalidHeaderError as e:
    2286                 if self.ignore_zeros:
    2287                     self._dbg(2, "0x%X: %s" % (self.offset, e))
    2288                     self.offset += BLOCKSIZE
    2289                     continue
    2290                 elif self.offset == 0:
    2291                     raise ReadError(str(e))
    2292             except EmptyHeaderError:
    2293                 if self.offset == 0:
    2294                     raise ReadError("empty file")
    2295             except TruncatedHeaderError as e:
    2296                 if self.offset == 0:
    2297                     raise ReadError(str(e))
    2298             except SubsequentHeaderError as e:
    2299                 raise ReadError(str(e))
    2300             break
    2301 
    2302         if tarinfo is not None:
    2303             self.members.append(tarinfo)
    2304         else:
    2305             self._loaded = True
    2306 
    2307         return tarinfo
    2308 
    2309     #--------------------------------------------------------------------------
    2310     # Little helper methods:
    2311 
    2312     def _getmember(self, name, tarinfo=None, normalize=False):
    2313         """Find an archive member by name from bottom to top.
    2314            If tarinfo is given, it is used as the starting point.
    2315         """
    2316         # Ensure that all members have been loaded.
    2317         members = self.getmembers()
    2318 
    2319         # Limit the member search list up to tarinfo.
    2320         if tarinfo is not None:
    2321             members = members[:members.index(tarinfo)]
    2322 
    2323         if normalize:
    2324             name = os.path.normpath(name)
    2325 
    2326         for member in reversed(members):
    2327             if normalize:
    2328                 member_name = os.path.normpath(member.name)
    2329             else:
    2330                 member_name = member.name
    2331 
    2332             if name == member_name:
    2333                 return member
    2334 
    2335     def _load(self):
    2336         """Read through the entire archive file and look for readable
    2337            members.
    2338         """
    2339         while True:
    2340             tarinfo = self.next()
    2341             if tarinfo is None:
    2342                 break
    2343         self._loaded = True
    2344 
    2345     def _check(self, mode=None):
    2346         """Check if TarFile is still open, and if the operation's mode
    2347            corresponds to TarFile's mode.
    2348         """
    2349         if self.closed:
    2350             raise OSError("%s is closed" % self.__class__.__name__)
    2351         if mode is not None and self.mode not in mode:
    2352             raise OSError("bad operation for mode %r" % self.mode)
    2353 
    2354     def _find_link_target(self, tarinfo):
    2355         """Find the target member of a symlink or hardlink member in the
    2356            archive.
    2357         """
    2358         if tarinfo.issym():
    2359             # Always search the entire archive.
    2360             linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
    2361             limit = None
    2362         else:
    2363             # Search the archive before the link, because a hard link is
    2364             # just a reference to an already archived file.
    2365             linkname = tarinfo.linkname
    2366             limit = tarinfo
    2367 
    2368         member = self._getmember(linkname, tarinfo=limit, normalize=True)
    2369         if member is None:
    2370             raise KeyError("linkname %r not found" % linkname)
    2371         return member
    2372 
    2373     def __iter__(self):
    2374         """Provide an iterator object.
    2375         """
    2376         if self._loaded:
    2377             return iter(self.members)
    2378         else:
    2379             return TarIter(self)
    2380 
    2381     def _dbg(self, level, msg):
    2382         """Write debugging output to sys.stderr.
    2383         """
    2384         if level <= self.debug:
    2385             print(msg, file=sys.stderr)
    2386 
    2387     def __enter__(self):
    2388         self._check()
    2389         return self
    2390 
    2391     def __exit__(self, type, value, traceback):
    2392         if type is None:
    2393             self.close()
    2394         else:
    2395             # An exception occurred. We must not call close() because
    2396             # it would try to write end-of-archive blocks and padding.
    2397             if not self._extfileobj:
    2398                 self.fileobj.close()
    2399             self.closed = True
    2400 # class TarFile
    2401 
    2402 class TarIter:
    2403     """Iterator Class.
    2404 
    2405        for tarinfo in TarFile(...):
    2406            suite...
    2407     """
    2408 
    2409     def __init__(self, tarfile):
    2410         """Construct a TarIter object.
    2411         """
    2412         self.tarfile = tarfile
    2413         self.index = 0
    2414     def __iter__(self):
    2415         """Return iterator object.
    2416         """
    2417         return self
    2418     def __next__(self):
    2419         """Return the next item using TarFile's next() method.
    2420            When all members have been read, set TarFile as _loaded.
    2421         """
    2422         # Fix for SF #1100429: Under rare circumstances it can
    2423         # happen that getmembers() is called during iteration,
    2424         # which will cause TarIter to stop prematurely.
    2425 
    2426         if self.index == 0 and self.tarfile.firstmember is not None:
    2427             tarinfo = self.tarfile.next()
    2428         elif self.index < len(self.tarfile.members):
    2429             tarinfo = self.tarfile.members[self.index]
    2430         elif not self.tarfile._loaded:
    2431             tarinfo = self.tarfile.next()
    2432             if not tarinfo:
    2433                 self.tarfile._loaded = True
    2434                 raise StopIteration
    2435         else:
    2436             raise StopIteration
    2437         self.index += 1
    2438         return tarinfo
    2439 
    2440 #--------------------
    2441 # exported functions
    2442 #--------------------
    2443 def is_tarfile(name):
    2444     """Return True if name points to a tar archive that we
    2445        are able to handle, else return False.
    2446     """
    2447     try:
    2448         t = open(name)
    2449         t.close()
    2450         return True
    2451     except TarError:
    2452         return False
    2453 
    2454 open = TarFile.open
    2455 
    2456 
    2457 def main():
    2458     import argparse
    2459 
    2460     description = 'A simple command line interface for tarfile module.'
    2461     parser = argparse.ArgumentParser(description=description)
    2462     parser.add_argument('-v', '--verbose', action='store_true', default=False,
    2463                         help='Verbose output')
    2464     group = parser.add_mutually_exclusive_group()
    2465     group.add_argument('-l', '--list', metavar='<tarfile>',
    2466                        help='Show listing of a tarfile')
    2467     group.add_argument('-e', '--extract', nargs='+',
    2468                        metavar=('<tarfile>', '<output_dir>'),
    2469                        help='Extract tarfile into target dir')
    2470     group.add_argument('-c', '--create', nargs='+',
    2471                        metavar=('<name>', '<file>'),
    2472                        help='Create tarfile from sources')
    2473     group.add_argument('-t', '--test', metavar='<tarfile>',
    2474                        help='Test if a tarfile is valid')
    2475     args = parser.parse_args()
    2476 
    2477     if args.test:
    2478         src = args.test
    2479         if is_tarfile(src):
    2480             with open(src, 'r') as tar:
    2481                 tar.getmembers()
    2482                 print(tar.getmembers(), file=sys.stderr)
    2483             if args.verbose:
    2484                 print('{!r} is a tar archive.'.format(src))
    2485         else:
    2486             parser.exit(1, '{!r} is not a tar archive.
    '.format(src))
    2487 
    2488     elif args.list:
    2489         src = args.list
    2490         if is_tarfile(src):
    2491             with TarFile.open(src, 'r:*') as tf:
    2492                 tf.list(verbose=args.verbose)
    2493         else:
    2494             parser.exit(1, '{!r} is not a tar archive.
    '.format(src))
    2495 
    2496     elif args.extract:
    2497         if len(args.extract) == 1:
    2498             src = args.extract[0]
    2499             curdir = os.curdir
    2500         elif len(args.extract) == 2:
    2501             src, curdir = args.extract
    2502         else:
    2503             parser.exit(1, parser.format_help())
    2504 
    2505         if is_tarfile(src):
    2506             with TarFile.open(src, 'r:*') as tf:
    2507                 tf.extractall(path=curdir)
    2508             if args.verbose:
    2509                 if curdir == '.':
    2510                     msg = '{!r} file is extracted.'.format(src)
    2511                 else:
    2512                     msg = ('{!r} file is extracted '
    2513                            'into {!r} directory.').format(src, curdir)
    2514                 print(msg)
    2515         else:
    2516             parser.exit(1, '{!r} is not a tar archive.
    '.format(src))
    2517 
    2518     elif args.create:
    2519         tar_name = args.create.pop(0)
    2520         _, ext = os.path.splitext(tar_name)
    2521         compressions = {
    2522             # gz
    2523             '.gz': 'gz',
    2524             '.tgz': 'gz',
    2525             # xz
    2526             '.xz': 'xz',
    2527             '.txz': 'xz',
    2528             # bz2
    2529             '.bz2': 'bz2',
    2530             '.tbz': 'bz2',
    2531             '.tbz2': 'bz2',
    2532             '.tb2': 'bz2',
    2533         }
    2534         tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
    2535         tar_files = args.create
    2536 
    2537         with TarFile.open(tar_name, tar_mode) as tf:
    2538             for file_name in tar_files:
    2539                 tf.add(file_name)
    2540 
    2541         if args.verbose:
    2542             print('{!r} file created.'.format(tar_name))
    2543 
    2544     else:
    2545         parser.exit(1, parser.format_help())
    2546 
    2547 if __name__ == '__main__':
    2548     main()
    View tarfile Code

    PyYAML模块 

    Python也可以很容易的处理ymal文档格式,只不过需要安装一个模块,参考文档:http://pyyaml.org/wiki/PyYAMLDocumentation。

    re正则表达式

    正则表达式:
        
        '.'     默认匹配除
    之外的任意一个字符,若指定flag DOTALL,则匹配任意字符,包括换行
        '^'     匹配字符开头,若指定flags MULTILINE,这种也可以匹配上(r"^a","
    abc
    eee",flags=re.MULTILINE)
        '$'     匹配字符结尾,或e.search("foo$","bfoo
    sdfsf",flags=re.MULTILINE).group()也可以
        '*'     匹配*号前的字符0次或多次,re.findall("ab*","cabb3abcbbac")  结果为['abb', 'ab', 'a']
        '+'     匹配前一个字符1次或多次,re.findall("ab+","ab+cd+abb+bba") 结果['ab', 'abb']
        '?'     匹配前一个字符1次或0次
        '{m}'   匹配前一个字符m次
        '{n,m}' 匹配前一个字符n到m次,re.findall("ab{1,3}","abb abc abbcbbb") 结果'abb', 'ab', 'abb']
        '|'     匹配|左或|右的字符,re.search("abc|ABC","ABCBabcCD").group() 结果'ABC'
        '(...)' 分组匹配,re.search("(abc){2}a(123|456)c", "abcabca456c").group() 结果 abcabca456c
         
         
        'A'    只从字符开头匹配,re.search("Aabc","alexabc") 是匹配不到的
        ''    匹配字符结尾,同$
        'd'    匹配数字0-9
        'D'    匹配非数字
        'w'    匹配[A-Za-z0-9]
        'W'    匹配非[A-Za-z0-9]
        's'     匹配空白字符、	、
    、
     , re.search("s+","ab	c1
    3").group() 结果 '	'
        
        
    
        re.match 从头开始匹配
        re.search 匹配包含
        re.findall 把所有匹配到的字符放到以列表中的元素返回
        re.splitall 以匹配到的字符当做列表分隔符
        re.sub      匹配字符并替换
        
        
        匹配模式:
            re.I:忽略大小写
            re.M;多行模式,改变'^'和'$'的行为
            re.S:点任意匹配模式,改变'.'的行为
    

      

    # A开头,[A-Za-z0-9]1-7位,后面1个或n次数字并且以“n”结尾
    print(re.match("^Aw{1,7}d+w*n$", "Allister12365HaoSen"))
    
    # 以数字开头长度为17位,以数字|x|X结尾 18位身份证
    print(re.match("^d{17}(d|x|X){1}$", "42210319630213275X"))
    # 15位身份证 以数字开头的15位数字
    print(re.match("^d{15}", "422103196302132"))
    
    
    # 以“A”开头,a-zA-Z 一个或多个,后面加上r
    print(re.search("^A[a-zA-Z]+r", "Allister123Allister&ds"))
    
    # '?'     匹配前一个字符1次或0次
    print(re.search("aaa?", "aaEEEEaaa"))  # aa
    
    
    print(re.findall("abf?.", "abf%dafsgaabfterftw"))
    
    # 按指定字符分割为列表
    print(re.split("[0-9]+", "rf123Allister89ljp"))  # ['rf', 'Allister', 'ljp']
    
    # sub(pattern, repl, string, count=0, flags=0) 将匹配到的值替换为指定字符 可以指定替换次数
    print(re.sub("[0-9]+", "|", "rf123Allister89ljp", 5))  # rf|Allister|ljp
    
    
    """
        将身份证分解为省、市、区、年、月、日
    """
    # {'city': '09', 'county': '21', 'year': '1990', 'province': '51', 'day': '06', 'month': '08'}
    print(re.search("(?P<province>[0-9]{2})(?P<city>[0-9]{2})(?P<county>[0-9]{2})(?P<year>[0-9]{4})(?P<month>[0-9]{2})(?P<day>[0-9]{2})",
                    "51092119900806181X").groupdict())
    
    
    """
        匹配模式:
            re.I:忽略大小写
            re.M;多行模式,改变'^'和'$'的行为
            re.S:点任意匹配模式,改变'.'的行为
    """
    
    # re.I 忽略大小写
    print(re.search("[a-z]+", "abcdEFg", re.I))  # abcdEFg
    

      

    # !/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @File  : re_test.py
    # @Author: Allister.Liu
    # @Date  : 2018/1/22
    # @Desc  : 正则表达式
    
    import re
    
    """
    正则表达式:
        
        '.'     默认匹配除
    之外的任意一个字符,若指定flag DOTALL,则匹配任意字符,包括换行
        '^'     匹配字符开头,若指定flags MULTILINE,这种也可以匹配上(r"^a","
    abc
    eee",flags=re.MULTILINE)
        '$'     匹配字符结尾,或e.search("foo$","bfoo
    sdfsf",flags=re.MULTILINE).group()也可以
        '*'     匹配*号前的字符0次或多次,re.findall("ab*","cabb3abcbbac")  结果为['abb', 'ab', 'a']
        '+'     匹配前一个字符1次或多次,re.findall("ab+","ab+cd+abb+bba") 结果['ab', 'abb']
        '?'     匹配前一个字符1次或0次
        '{m}'   匹配前一个字符m次
        '{n,m}' 匹配前一个字符n到m次,re.findall("ab{1,3}","abb abc abbcbbb") 结果'abb', 'ab', 'abb']
        '|'     匹配|左或|右的字符,re.search("abc|ABC","ABCBabcCD").group() 结果'ABC'
        '(...)' 分组匹配,re.search("(abc){2}a(123|456)c", "abcabca456c").group() 结果 abcabca456c
         
         
        'A'    只从字符开头匹配,re.search("Aabc","alexabc") 是匹配不到的
        ''    匹配字符结尾,同$
        'd'    匹配数字0-9
        'D'    匹配非数字
        'w'    匹配[A-Za-z0-9]
        'W'    匹配非[A-Za-z0-9]
        's'     匹配空白字符、	、
    、
     , re.search("s+","ab	c1
    3").group() 结果 '	'
        
        
    
        re.match 从头开始匹配
        re.search 匹配包含
        re.findall 把所有匹配到的字符放到以列表中的元素返回
        re.splitall 以匹配到的字符当做列表分隔符
        re.sub      匹配字符并替换
        
        
        匹配模式:
            re.I:忽略大小写
            re.M;多行模式,改变'^'和'$'的行为
            re.S:点任意匹配模式,改变'.'的行为
    """
    
    # A开头,[A-Za-z0-9]1-7位,后面1个或n次数字并且以“n”结尾
    print(re.match("^Aw{1,7}d+w*n$", "Allister12365HaoSen"))
    
    # 以数字开头长度为17位,以数字|x|X结尾 18位身份证
    print(re.match("^d{17}(d|x|X){1}$", "42210319630213275X"))
    # 15位身份证 以数字开头的15位数字
    print(re.match("^d{15}", "422103196302132"))
    
    
    # 以“A”开头,a-zA-Z 一个或多个,后面加上r
    print(re.search("^A[a-zA-Z]+r", "Allister123Allister&ds"))
    
    # '?'     匹配前一个字符1次或0次
    print(re.search("aaa?", "aaEEEEaaa"))  # aa
    
    
    print(re.findall("abf?.", "abf%dafsgaabfterftw"))
    
    # 按指定字符分割为列表
    print(re.split("[0-9]+", "rf123Allister89ljp"))  # ['rf', 'Allister', 'ljp']
    
    # sub(pattern, repl, string, count=0, flags=0) 将匹配到的值替换为指定字符 可以指定替换次数
    print(re.sub("[0-9]+", "|", "rf123Allister89ljp", 5))  # rf|Allister|ljp
    
    
    """
        将身份证分解为省、市、区、年、月、日
    """
    # {'city': '09', 'county': '21', 'year': '1990', 'province': '51', 'day': '06', 'month': '08'}
    print(re.search("(?P<province>[0-9]{2})(?P<city>[0-9]{2})(?P<county>[0-9]{2})(?P<year>[0-9]{4})(?P<month>[0-9]{2})(?P<day>[0-9]{2})",
                    "51092119900806181X").groupdict())
    
    
    """
        匹配模式:
            re.I:忽略大小写
            re.M;多行模式,改变'^'和'$'的行为
            re.S:点任意匹配模式,改变'.'的行为
    """
    
    # re.I 忽略大小写
    print(re.search("[a-z]+", "abcdEFg", re.I))  # abcdEFg
    

      

  • 相关阅读:
    easyui控件写法造成的错误
    外部访问服务器数据库被防火墙屏蔽报错
    云服务器Windows Server2012 配置http服务器(又称Web服务器,IIS)
    mysql五:索引原理与慢查询优化
    mysql四:数据操作
    mysql四-2:多表查询
    sql查询作业答案
    mysql四-1:单表查询
    mysql五补充部分:SQL逻辑查询语句执行顺序
    第三篇:表操作
  • 原文地址:https://www.cnblogs.com/allister/p/8322355.html
Copyright © 2020-2023  润新知