import asyncio import aiofiles import time import csv import os async def main(out_path, infile, num=1000000): """ :param out_path: 输出文件路径 :param infile: 输入文件 :param num: 拆分每个文件的大小 :return: """ async with aiofiles.open(infile, "r", encoding="utf-8") as fp: basename = os.path.basename(infile).split('.')[0] start = time.time() contents = await fp.readlines() count = 0 csv_list = [] for line in contents: if count == 0: head = line if count % num == 0 and csv_list: file_idx = int(count / num) file = f"{out_path}/{basename}_{file_idx}.csv" print(len(csv_list)) if file_idx == 1: async with aiofiles.open(file, "w", encoding="utf-8") as fw: await fw.writelines(csv_list) else: async with aiofiles.open(file, "w+", encoding="utf-8") as fw: await fw.write(head) await fw.writelines(csv_list) csv_list = [] csv_list.append(line) count += 1 if csv_list: print(len(csv_list)) file = f"{out_path}/{basename}_{file_idx + 1}.csv" async with aiofiles.open(file, "w", encoding="utf-8") as w: await w.write(head) await w.writelines(csv_list) print(f"end1 {time.time() - start}") print(count) def read_csv_sync(out_path, infile, num=1000000): """ :param out_path: 输出文件路径 :param infile: 输入文件 :param num: 拆分每个文件的大小 :return: """ basename = os.path.basename(infile).split('.')[0] start = time.time() with open(infile, newline='', encoding='utf-8') as f: reader = csv.DictReader(e.replace('\0', '') for e in f) h = reader.fieldnames count = 0 res_lst = [] for r in reader: if count % num == 0 and res_lst: file_idx = int(count / num) file = f"{out_path}/{basename}_{file_idx}.csv" with open(file, newline='', encoding='utf-8') as fw: write = csv.DictWriter(fw, h) write.writerows(res_lst) res_lst = [] if res_lst: print(len(res_lst)) file = f"{out_path}/{basename}_{file_idx + 1}.csv" with open(file, newline='', encoding='utf-8') as w: write = csv.DictWriter(w, h) write.writerows(res_lst) print(f"end2 {time.time() - start}") if __name__ == '__main__': o_path = r'D:\常用保存文件\split' o_path2 = r'D:\常用保存文件\split2' in_file = r'D:\临时文件\test.csv' loop = asyncio.get_event_loop() loop.run_until_complete(main(o_path, in_file)) loop.close() read_csv_sync(o_path2, in_file)
测试文件 3852733 使用异步拆分时间对比
1000000
1000000
1000000
852733
end1 5.248189210891724
3852733
end2 9.213284492492676