声明一下这是同学的想法,他写的有bug
我重写的
# 统计文本 字符 字符数 行数
import csv
from datetime import datetime
import os
import re
class Statistician(object):
def __init__(self, path, postfix="py"):
"""
:param path: 路径
:param postfix: 后缀
"""
self.path = path
self.postfix = postfix
self.file_path = []
self.size = 0
self.row = 0
self.char_table = dict()
def out_file_path(self, path):
"""
:return: 文件路径列表
"""
fs = os.listdir(path)
for f1 in fs:
tmp_path = os.path.join(path, f1)
if not os.path.isdir(tmp_path):
if re.match(r".*.%s$" % self.postfix, tmp_path):
self.file_path.append(tmp_path)
# print('文件: %s' % tmp_path)
else:
# print('文件夹:%s' % tmp_path)
self.out_file_path(tmp_path)
return self.file_path
def get_params(self):
"""
:return: 字符数 行数 字符字典
"""
for path in statistician.out_file_path(self.path):
with open(path, "rb") as f:
row_content_list = f.readlines()
# print(row_content_list)
for row_content in row_content_list:
content = row_content.strip()
if content:
content = content.decode()
self.size += len(content)
self.row += 1
for c in content:
if c not in self.char_table.keys():
self.char_table[c] = 0
self.char_table[c] += 1
return self.size, self.row, self.char_table
def create_table(self):
data = [(word, num) for word, num in self.char_table.items()]
with open('example.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(data)
if __name__ == '__main__':
statistician = Statistician("C:/Users/gym/Desktop/dir_test")
begin_time = datetime.now()
size, row, table = statistician.get_params()
statistician.create_table()
end_time = datetime.now()
print(end_time-begin_time)
print("字节数:" + str(size))
print("行数:" + str(row))
print(table)