文件批量转换编码 原始基础代码来自互联网 在其基础上 改了改 做了一些适配 记录下 只用了 code2code 这个方法 其他没用到 也就没去动了。
# -*- coding:utf-8 -*-
# @Time : 2022/3/22 20:04
# @Author: zhcode
# @File : convert_file_encode.py
import chardet
import codecs
import os
import sys
import time
CURRENT_PATH = os.path.abspath('.')
def code2code(filename, encode_out):
print("transfer filename " + filename + " ... ")
fileencoding = chardet.detect(open(filename, "rb").read())
fileencode = fileencoding['encoding']
if fileencode == encode_out:
print("encode is no need to transfer...")
return
if fileencode:
print("transfer detect codetype = " + fileencode)
else:
print(fileencode)
return
with codecs.open(filename, mode='r', encoding=fileencode) as fi:
data = fi.read()
with codecs.open(filename, mode='w', encoding=encode_out) as fo:
fo.write(data)
return os.path.basename(filename), fileencode
def main():
try:
if len(sys.argv) <= 1:
convert()
elif sys.argv[1] == '-h':
helper()
else:
argv = sys.argv[1:]
args = [(op, argv[i + 1]) for i, op in enumerate(argv) if i % 2 == 0]
keys = list(map(lambda x: x[0], args))
values = list(map(lambda x: x[1], args))
encode_out = "gb2312"
dir_path = None
if '-h' in keys:
helper()
if '-f' in keys:
t = values[keys.index('-f')]
if '-d' in keys:
dir_path = values[keys.index('-d')]
convert(f=encode_out, d=dir_path)
print('Finish 转换完毕')
except KeyboardInterrupt:
print("\ngoodbye.")
except Exception as ex:
print(ex)
exit(1)
def time_format(time_diff):
add_zero = lambda t: "0{}".format(t) if t < 10 else t
return "{}:{}:{}".format(add_zero(int(time_diff / 3600)), add_zero(int((time_diff % 3600) / 60)),
add_zero(int(time_diff % 60)))
def traverse_dir(file_dir):
"""
Traverse the specific folder and return picture's name list.
:param file_dir: Traverse folder name
:return: picture's name list
"""
file_path_list = []
try:
for root, dirs, files in os.walk(file_dir):
# print(root, dirs, files)
for file in files:
if os.path.splitext(file)[1] not in [".py", ".class", ".gif", ".png", ".jpg", ".project",]:
file_path_list.append(os.path.join(root, file))
except Exception as ex:
print("文件路径不正确!")
return file_path_list
def convert(f=None, d=None):
encode_out = f
dir_path = None
if not d:
dir_path = CURRENT_PATH
else:
if os.path.isdir(d):
dir_path = d
else:
raise Exception("该路径不是一个文件夹.")
start_time = time.time()
file_path_list = traverse_dir(dir_path)
len_pics = len(file_path_list)
# iterate filename
# print(f_dir, " ", d_dir)
for i in range(len_pics):
time_diff = int(time.time() - start_time)
time_eat = time_format(time_diff)
file_name, file_encode = code2code(file_path_list[i], encode_out)
progressbar(i + 1, len_pics, 50, r"{time_eat} {file_name} {file_encode}")
def progressbar(curr, total, duration=10, extra=''):
"""
show the progress bar
:param curr:
:param total:
:param duration:
:param extra:
:return:
"""
print(curr)
print(total)
frac = curr / total
print(frac)
# filled = int(round(frac * duration))
filled = round(frac * duration)
ffff = int(filled)
tstr = ''
for x in xrange(1,ffff):
# pass
tstr = tstr + '#'
tstr1 = ''
for x in xrange(1,duration - ffff):
# pass
tstr1 = tstr1 + ' '
# print('{0:.1f}'.format(frac))
print('\r'+tstr + tstr1 + '[0~{0:.0f}]'.format(frac*duration))
sys.stdout.flush()
def helper():
app_name = sys.argv[0]
app_name = "./{}".format(os.path.split(app_name)[-1])
print("====== Image format conversion ======")
print(r'{app_name} # 将当前文件夹下文件格式转换为gb2312格式.')
print(r'{app_name} -f <type> # 设置转换的编码格式')
print(r'{app_name} -d <dirname> # 设置转换文件的路径')
print(r'{app_name} -h # 帮助')
if __name__ == '__main__':
# main()
file_name_list = traverse_dir(CURRENT_PATH)
print("file_name_list = ",len(file_name_list))
index = 0
for name in file_name_list:
index = index + 1
print(code2code(name, "utf-8"))
# progressbar(index, len(file_name_list), duration=100, extra='')