import os import nltk from bs4 import BeautifulSoup as bs def get_txt_name_from_bak_name(bak_name): if -1 == bak_name.find('.txt'): return '' file_names = bak_name.split('.') return file_names[-2]+ '.' +file_names[0]+'.txt' def get_txt_name_no_time(file_name): if -1 == file_name.find('.txt'): return '' file_names = file_name.split('.') return file_names[1]+'.'+file_names[2] def track_files_under_folder(folder_name): if os.path.exists(folder_name): #track every file for (root,dirs,files) in os.walk(folder_name): #process for filename in files: full_file_name = root+ '\' +filename #rename files if '' == get_txt_name_no_time(filename): continue new_file_name = root+'\'+get_txt_name_no_time(filename) #print(full_file_name + '--> ' + new_file_name) os.rename(full_file_name,new_file_name) def html_to_txt(file_name): with open(file_name,'r') as h,open(file_name+'.txt','w') as t: s = bs(h,'lxml') t.write(s.get_text()) if __name__ == '__main__': track_files_under_folder('\Files\tmp')