本来想使用Python读取源文件,并使用正则表达式的方式过滤。过程中发现文件编码的问题老是搞不定,就先写个简易版本用于过滤日志文件中感兴趣的行,然后将结果输出到另一个文件中。
功能:1、输入源文件路径;2、输入希望保存的文件路径; 3、输入希望过滤的词组,也就是满足这些词组中的任何一个都将视作匹配;
源码如下:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import os
#import re
import time
'readTextFile.py--read and display text file'
# 格式化成2016-03-20 11:45:39形式
#print time.strftime("%Y%m%d%H%M%S_log", time.localtime())
#get source file name
while True:
fsrcname = raw_input('Please input the file to read:')
if not os.path.exists(fsrcname):
print "Error : '%s' isn't exists." % fsrcname
else:
print "success : '%s' is exists." % fsrcname
break
while True:
fdestpath = raw_input('Please input the dest file path to output:')
if not os.path.isdir(fdestpath):
print "Error : '%s' isn't exists." % fdestpath
else:
print "success : '%s' is ok." % fdestpath
break
filterstr = [] #word to match
while True:
restr = raw_input('Please input the word to match for the lines in log,input :q to quit:')
if restr == ':q':
print " matching ...,please wait"
break
elif restr == '':
print "Please input valid word to match"
else:
filterstr.append(restr)
all = []
try:
fobj = open(fsrcname,'r')
lines = fobj.readlines()
except:
print("*** file open error" )
else:
#display the contents of the file to the screen.
for eachline in lines:
for eachword in filterstr:
m = eachline.find(eachword)
# if m is not None:
if m >= 0:
# print m.group()
all.append(eachline)
break
fobj.close()
if all is not None:
destfilename = time.strftime("%Y%m%d%H%M%S", time.localtime())
destfilename = destfilename + '.log'
if fdestpath.endswith('\') or fdestpath.endswith('/'):
fdestpath = fdestpath + destfilename
else:
fdestpath = fdestpath + '\'+ destfilename
print "filter file complete, now the output the result to the '%s' " % fdestpath
try:
fobj = open(fdestpath,'w')
except:
print("*** file open error")
else:
#display the contents of the file to the screen.
for eachline in all:
fobj.writelines(eachline)
fobj.close()
print "file output success, filepath is '%s' " % fdestpath
else:
print "filter file fail, no match "