import sys
import os
import re
def analysis_file(path):
print("analysis file: %s." % path)
fi = open(path, "r")
try:
all_text = fi.read()
finally:
fi.close()
#print("content:")
#print(all_text)
mails = set()
re_mail = re.compile(r"([a-zA-Z-]+(?:\.[\w-]+)*@[\w-]+(?:\.[a-zA-Z-]+)+)")
ms = re_mail.findall(all_text)
for m in ms:
#print(m)
mails.add(m)
print("results: %d" % len(mails))
if len(mails) > 0:
fo = open(path + ".mail.txt", "wt")
for mail in mails:
fo.write(mail)
fo.write(",")
fo.close()
def analysis_dir(path):
files = os.listdir(path)
for file in files:
if (not os.path.isfile(file)) or file.endswith(".mail.txt"):
continue
analysis_file(path + "\\" + file)
def main():
print("analysis is working... ...")
print("current direcotry: %s." % os.getcwd())
if len(sys.argv) < 2:
print("set the directory to serach")
return
path = sys.argv[1]
is_file = os.path.isfile(path)
if is_file:
print("searching file: %s." % path)
analysis_file(path)
else:
if not os.path.exists(path):
print("there isn't exist direcoty: %s" % path)
return
print("searching alll files in directory: %s." % path)
analysis_dir(path)
if __name__ == '__main__':
main()
这是我在实际过程中常碰到的问题,大量的邮件地址分散在若干文件中,可以处理单个文件或目录,提取所有的邮件地址,并拼接成邮件地址列表,可以直接由于批量邮件的发送。