• python3 通过头文件判断文件类型


    CODE_TYPE_DIC = {
        '68746D6C3E': 'html',
        'd0cf11e0a1b11ae10000': 'xls',
        '44656C69766572792D64': 'eml',
        'ffd8ffe000104a464946': 'jpg',
        '89504e470d0a1a0a0000': 'png',
        '47494638396126026f01': 'gif',
        '49492a00227105008037': 'tif',
        '424d228c010000000000': 'bmp',
        '424d8240090000000000': 'bmp',
        '424d8e1b030000000000': 'bmp',
        '41433130313500000000': 'dwg',
        '3c21444f435459504520': 'html',
        '3c21646f637479706520': 'htm',
        '48544d4c207b0d0a0942': 'css',
        '696b2e71623d696b2e71': 'js',
        '7b5c727466315c616e73': 'rtf',
        '38425053000100000000': 'psd',
        '46726f6d3a203d3f6762': 'eml',
        'd0cf11e0a1b11ae10000': 'doc',
        'd0cf11e0a1b11ae10000': 'vsd',
        '5374616E64617264204A': 'mdb',
        '252150532D41646F6265': 'ps',
        '255044462d312e350d0a': 'pdf',
        '2e524d46000000120001': 'rmvb',
        '464c5601050000000900': 'flv',
        '00000020667479706d70': 'mp4',
        '49443303000000002176': 'mp3',
        '000001ba210001000180': 'mpg',
        '3026b2758e66cf11a6d9': 'wmv',
        '52494646e27807005741': 'wav',
        '52494646d07d60074156': 'avi',
        '4d546864000000060001': 'mid',
        '504b0304140000080044': 'zip',
        '504b03040a0000080000': 'zip',
        '504b03040a0000000000': 'zip',
        '526172211a0700cf9073': 'rar',
        '235468697320636f6e66': 'ini',
        '504b03040a0000000000': 'jar',
        '4d5a9000030000000400': 'exe',
        '3c25402070616765206c': 'jsp',
        '4d616e69666573742d56': 'mf',
        '3c3f786d6c2076657273': 'xml',
        '494e5345525420494e54': 'sql',
        '7061636b616765207765': 'java',
        '406563686f206f66660d': 'bat',
        '1f8b0800000000000000': 'gz',
        '6c6f67346a2e726f6f74': 'properties',
        'cafebabe0000002e0041': 'class',
        '49545346030000006000': 'chm',
        '04000000010000001300': 'mxp',
        '504b0304140006000800': 'docx',
        'd0cf11e0a1b11ae10000': 'wps',
        '6431303a637265617465': 'torrent',
    }
    
    
    # 字节码转16进制字符串
    def bytes2hex(b):
        num = len(b)
        hex_str = u''
        for i in range(num):
            t = u'%x' % b[i]
            if len(t) % 2:
                hex_str += u'0'
            hex_str += t
        return hex_str.upper()
    
    
    # 获取文件类型
    def file_type(filename, n=20):
        with open(filename, 'rb') as f:
            bins = f.read(n)
        bins = bytes2hex(bins)  # 转码
        bins = bins.lower()  # 小写
        f_type = 'unknown'
        for hcode in CODE_TYPE_DIC.keys():
            lens = len(hcode)  # 需要的长度
            if bins[0:lens] == hcode:
                f_type = CODE_TYPE_DIC[hcode]
                break
        if f_type == 'unknown':  # 全码未找到,优化处理,码表取5位验证
            bins = bins[0:5]
            for hcode in CODE_TYPE_DIC.keys():
                if len(hcode) > 5 and bins == hcode[0:5]:
                    f_type = CODE_TYPE_DIC[hcode]
                    break
        return f_type
    
    
    if __name__ == '__main__':
        print(file_type('01.jpg'))
    

    参考链接:https://blog.csdn.net/privateobject/article/details/78069500

  • 相关阅读:
    eclipse恢复界面默认设置
    文件夹的拷贝
    文件的输入输出
    十进制转二进制,八进制,十六进制(查表法)
    数组元素的查找(折半查找)
    C++几个小函数
    C++ 内部排序(一)
    C++实现链表
    C++输出IP地址段内的合法地址
    我看软件工程师的职业规划
  • 原文地址:https://www.cnblogs.com/wuyongqiang/p/13891586.html
Copyright © 2020-2023  润新知