# -*-encoding:utf-8-*-
import os
import re
import random
def find_line_exist_num(file_dir, save_path, pattern):
count = 0
with open(save_path, "w+") as result_f:
for root, dirs, files in os.walk(file_dir):
for file in files:
file_path = os.path.join(root, file)
with open(file_path) as f:
while True:
line = f.readline().decode("utf-16")
is_true = pattern.search(line)
if is_true:
result_f.write(line+"
")
count += 1
if not line:
break
return count
def get_random_line(file_path, num, result_file_path):
random_nums = [val for val in range(0, num)]
line_list = random.sample(random_nums, 2000)
with open(result_file_path, "w+") as result:
with open(file_path, "rb") as f:
cur_num = 0
while True:
line = f.readline().decode("utf-16")
if cur_num in line_list:
print(r"行号:%s, 内容:%s" % (str(cur_num), line))
result.write(line)
if __name__=="__main__":
# 文件夹路径
file_dir = ""
# 所有带数字的行结果存储地址
save_path = ""
# 随机抽取2000条数据存储地址
result_file_path = ""
pattern = re.compile(r'd+') # 查找数字
count = find_line_exist_num(file_dir, save_path, pattern)
get_random_line(save_path, count, result_file_path)