查看文件的编码格式
with open(r"C:UsersAdministratorDesktop1112.1 (1)smile.txt", 'rb+') as fp:
content = fp.read()
print(chardet.detect(content)) #{'encoding': 'UTF-8-SIG', 'confidence': 1.0, 'language': ''}
修改文件的编码格式
with open(r"C:UsersAdministratorDesktop1112.1 (1)smile.txt", 'rb+') as fp:
content = fp.read()
encoding = chardet.detect(content)['encoding']
content = content.decode(encoding).encode('utf8') # 修改为utf-8格式
fp.seek(0)
fp.write(content)
print(chardet.detect(content)) #{'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}
案例:修改编码格式并且把所有的txt都整合成一个txt
import os
import chardet
def saveFile(filename, content):
f = open(filename, "w", encoding="utf-8")
f.write(content)
def readFile(filename):
f = open(filename, "r", encoding="utf-8")
return f.read()
def file_extension(path):
return os.path.splitext(path)[1]
def combinetxt(rootdir, filename):
print(rootdir,filename)
f = open(filename, "w", encoding="utf-8")
list = os.listdir(rootdir)
for i in range(0, len(list)):
path = os.path.join(rootdir, list[i])
if os.path.isfile(path):
if file_extension(path) == ".txt":
content = readFile(path)
# f.write("*****************************")
f.write(content)
f.close()
if __name__ == '__main__':
# 打开文件
path = r"C:UsersAdministratorDesktop111弄好的"
dirs = os.listdir(path)
# 输出所有文件和文件夹
for file in dirs:
filepath = path + "\" + file
with open(filepath, 'rb+') as fp:
content = fp.read()
encoding = chardet.detect(content)['encoding']
content = content.decode(encoding).encode('utf8')
fp.seek(0)
fp.write(content)
outfile = "rest.txt"
combinetxt(path, outfile)