imaplib 获取邮件,email解析邮件
config文件中存有路径
1 # config.py 2 FILE_PATH_PREFIX = os.getcwd() + '/static/' 3 FILE_PATH_PREFIX_ALIAS = "/static/" 4 FILE_DOMAIN_PREFIX = 'http://0.0.0.0:8090'
utils.py中的 file_path 方法
1 # 根据当前时间创建的文件夹,先检测再创建; 2 def file_path(file_path): 3 dayTime = datetime.now().strftime('%Y-%m-%d') 4 pwd = file_path + dayTime + '/' 5 isExists = os.path.exists(pwd) 6 if not isExists: 7 os.makedirs(pwd) 8 return pwd
imaplib 获取邮件,email解析邮件
get_email()函数的参数根据项目实际情况传。
旧版内容:
1 import imaplib 2 import email 3 import re 4 import time 5 from email.header import decode_header, Header 6 from datetime import datetime, timedelta 7 from email.utils import parseaddr 8 import util 9 from config import FILE_PATH_PREFIX, FILE_DOMAIN_PREFIX, FILE_PATH_PREFIX_ALIAS 10 import os 11 12 imaplib.Commands['ID'] = ('AUTH') 13 14 15 def decode_data(content, added_encode=None): 16 """解码""" 17 18 def _decode(bytes_, msg_charset): 19 try: 20 if isinstance(bytes_, bytes): 21 return str(bytes_, encoding=msg_charset) 22 else: 23 return str(bytes_).split(' ')[0] 24 except Exception as e: 25 return None 26 27 encodes = ['UTF-8', 'GBK', 'GB2312'] 28 if added_encode: 29 encodes = [added_encode] + encodes 30 for encoding in encodes: 31 if r'u' in str(content): 32 str_data = _decode(content, 'unicode-escape') 33 else: 34 str_data = _decode(content, encoding) 35 if str_data is not None: 36 return str_data 37 return None 38 39 40 def get_local_time_stamp(msg, date): 41 """将邮箱时间转换为北京时间""" 42 if date is None: 43 if msg['Received']: 44 date = msg['Received'].split(';')[-1].strip() 45 else: 46 return None 47 if ',' not in date: 48 date = msg['Received'].split(';')[-1].strip() 49 result = re.search(r"[-+]d+", date) 50 if result: 51 time_area = result.group() 52 symbol = time_area[0] 53 offset = int(time_area[1]) + int(time_area[2]) 54 date_re = re.compile(r'[(](.*?)[)]', re.S) 55 time_zone = re.findall(date_re, date) 56 if time_zone: 57 format_str = '%a, %d %b %Y %H:%M:%S ' + time_area + ' ({})'.format(time_zone[0]) 58 else: 59 format_str = '%a, %d %b %Y %H:%M:%S ' + time_area 60 if symbol == "+": 61 utc_time = time.strptime(date.strip(), format_str) 62 temps_time = datetime.fromtimestamp(time.mktime(utc_time)) 63 if offset > 8: 64 offset = offset - 8 65 elif offset < 8: 66 offset = 8 - offset 67 else: 68 offset = 0 69 local_temps_time = temps_time + timedelta(hours=offset) 70 else: 71 utc_time = time.strptime(date.strip(), format_str) 72 temps_time = datetime.fromtimestamp(time.mktime(utc_time)) 73 local_temps_time = temps_time + timedelta(hours=(offset + 8)) 74 return local_temps_time 75 else: 76 time_zone = date[-3:] 77 format_str = '%a, %d %b %Y %H:%M:%S {}'.format(time_zone) 78 utc_time = time.strptime(date.strip(), format_str) 79 temps_time = datetime.fromtimestamp(time.mktime(utc_time)) 80 if time_zone == 'UTC' or time_zone == 'GMT': 81 hours_ = 8 82 elif time_zone == 'CDT': 83 hours_ = 13 84 else: 85 hours_ = 0 86 local_temps_time = temps_time + timedelta(hours=hours_) 87 return local_temps_time 88 89 90 def parse_email_body(message): 91 """解析内容""" 92 content_list = [] 93 for part in message.walk(): 94 if not part.is_multipart(): 95 charset = part.get_charset() 96 contentType = part.get_content_type() 97 if contentType == 'text/plain' or contentType == 'text/html': 98 mail_content = decode_data(part.get_payload(decode=True), charset) 99 content_list.append(mail_content) 100 for i in content_list: 101 if 'html' in i: 102 content = i 103 return content 104 return content_list[0] 105 106 107 def parse_email_annex(message, client_id, from_email, send_email, mail_subject, mail_content, email_time): 108 """ 解析保存附件 """ 109 annex_list = [] 110 for part in message.walk(): 111 # 获取附件名称类型 112 file_name = part.get_filename() 113 if not part.is_multipart(): 114 if file_name: 115 # 附件内容,先检测是否已存有该附件再保存 116 file_name = decode_header(Header(file_name)) 117 annex_name = file_name[0][0] 118 if file_name[0][1]: 119 value, charset = decode_header(str(annex_name, file_name[0][1]))[0] 120 annex_name = decode_data(value, charset) 121 pwd = util.file_path(FILE_PATH_PREFIX + 'annex/') 122 id_ = str(int(time.time())) 123 url_ = FILE_DOMAIN_PREFIX + FILE_PATH_PREFIX_ALIAS + 'annex/' + pwd.split('/')[-2] + '/' + id_ + '_' 124 + annex_name 125 path_ = pwd + id_ + '_' + annex_name 126 if not os.path.isfile(path_): 127 fp = open(path_, 'wb') 128 fp.write(part.get_payload(decode=True)) 129 fp.close() 130 annex_list.append({'annexName': annex_name, 'annexUrl': url_, 'annexPath': path_}) 131 return annex_list 132 133 134 def get_email(server, username, password, send_email, client_id, created_by): 135 """ 136 获取邮件信息并保存 137 https://www.docs4dev.com/docs/zh/python/3.7.2rc1/all/library-imaplib.html 138 """ 139 # if first_run: 140 # mail_status = 'All' 141 # mail_num = -30 142 # else: 143 # mail_status = 'UnSeen' 144 # mail_num = -10 145 try: 146 imap = imaplib.IMAP4_SSL(server) 147 try: 148 imap.login(username, password) 149 except Exception as e: 150 print('账号:{} 登录失败:{}'.format(username, e)) 151 else: 152 # 通过遍历查看imap中有哪些mailbox的值可以选择: 153 # for i in imap.list()[1]: 154 # print('i:', i) 155 # mailbox:INBOX(默认收件箱)/Drafts(草稿箱)/Junk(垃圾箱)/Trash(已删除)/Sent(已发送) 156 mail_box = ['INBOX'] 157 # 针对网易邮箱被阻止:https://blog.csdn.net/jony_online/article/details/108638571 158 args = ("name", username, "contact", username, "version", "1.0.0", "vendor", "myclient") 159 imap._simple_command('ID', '("' + '" "'.join(args) + '")') 160 for i in mail_box: 161 try: 162 imap.select(mailbox=i) 163 typ, data = imap.search(None, 'All') # UnSeen 未读邮件 164 except: 165 imap.select() 166 typ, data = imap.search(None, 'All') 167 for num in data[0].split()[-10:]: 168 typ, data = imap.fetch(num, '(RFC822)') 169 str_message = decode_data(data[0][1]) 170 message = email.message_from_string(str_message) 171 sub = message.get('subject') 172 if sub: 173 mail_subject = '' 174 msgCharset = '' 175 for i in range(len(decode_header(sub))): 176 subject_, charset = decode_header(sub)[i] 177 msgCharset = charset 178 if charset is not None: 179 # subject_ = subject_.decode(charset) 180 subject_ = decode_data(subject_, charset) 181 mail_subject += subject_ 182 else: 183 subject_ = decode_data(subject_, charset) 184 mail_subject += subject_ 185 # print('mail_subject:', mail_subject) 186 from_email = parseaddr(message.get('from'))[1] # 发件人邮箱 187 from_name = message.get('from').split('<')[0].strip() 188 if '"' in from_name: 189 from_name = from_name.strip('"') 190 from_name = decode_data(decode_header(from_name)[0][0], msgCharset) # 发件人名称 191 # to_email = parseaddr(message.get('to'))[1] # 收件人邮箱 192 date_ = get_local_time_stamp(message, message.get('date')) 193 if date_ is not None: 194 email_time = date_.timestamp() # 收件时间 195 else: 196 email_time = time.time() 197 mail_content = parse_email_body(message) 198 # 将获取到的客户端上的邮件改为已读状态 199 # imap.store(num, '+FLAGS', '(\Seen)') 200 imap.close() 201 imap.logout() 202 except: 203 pass
更新后的内容:
1 import imaplib 2 import email 3 import re 4 import time 5 from email.header import decode_header, Header 6 from datetime import datetime, timedelta 7 from email.utils import parseaddr 8 import util 9 from config import FILE_PATH_PREFIX, FILE_DOMAIN_PREFIX, FILE_PATH_PREFIX_ALIAS 10 from model import EmailContentModel 11 from util import id_generator 12 import os 13 from func_timeout import func_set_timeout 14 15 16 imaplib.Commands['ID'] = ('AUTH') 17 18 19 def decode_data(content, added_encode=None): 20 """解码""" 21 22 def _decode(bytes_, msg_charset): 23 try: 24 if isinstance(bytes_, bytes): 25 return str(bytes_, encoding=msg_charset) 26 else: 27 return str(bytes_).split(' ')[0] 28 except Exception as e: 29 return None 30 31 encodes = ['UTF-8', 'GBK', 'GB2312'] 32 if added_encode: 33 encodes = [added_encode] + encodes 34 for encoding in encodes: 35 if r'u' in str(content): 36 str_data = _decode(content, 'unicode-escape') 37 else: 38 str_data = _decode(content, encoding) 39 if str_data is not None: 40 return str_data 41 return None 42 43 44 def parse_email_body(message): 45 """解析内容""" 46 content_list = [] 47 for part in message.walk(): 48 if not part.is_multipart(): 49 charset = part.get_charset() 50 contentType = part.get_content_type() 51 if contentType == 'text/plain' or contentType == 'text/html': 52 mail_content = decode_data(part.get_payload(decode=True), charset) 53 content_list.append(mail_content) 54 content_list = list(set(content_list)) 55 if len(content_list) > 0: 56 content_list = ''.join(content_list) 57 else: 58 content_list = content_list[0] 59 return content_list 60 61 62 def local_time(time_): 63 date = str(time_) 64 result = re.search(r"[-+]d+", date) 65 if result: 66 time_area = result.group() 67 symbol = time_area[0] 68 offset = int(time_area[1]) + int(time_area[2]) 69 date_re = re.compile(r'[(](.*?)[)]', re.S) 70 time_zone = re.findall(date_re, date) 71 if time_zone: 72 format_str = '%a, %d %b %Y %H:%M:%S ' + time_area + ' ({})'.format(time_zone[0]) 73 else: 74 format_str = '%a, %d %b %Y %H:%M:%S ' + time_area 75 if symbol == "+": 76 utc_time = time.strptime(date.strip(), format_str) 77 temps_time = datetime.fromtimestamp(time.mktime(utc_time)) 78 if offset > 8: 79 offset = offset - 8 80 elif offset < 8: 81 offset = 8 - offset 82 else: 83 offset = 0 84 local_temps_time = temps_time + timedelta(hours=offset) 85 else: 86 utc_time = time.strptime(date.strip(), format_str) 87 temps_time = datetime.fromtimestamp(time.mktime(utc_time)) 88 local_temps_time = temps_time + timedelta(hours=(offset + 8)) 89 return local_temps_time 90 else: 91 time_zone = date[-3:] 92 format_str = '%a, %d %b %Y %H:%M:%S {}'.format(time_zone) 93 utc_time = time.strptime(date.strip(), format_str) 94 temps_time = datetime.fromtimestamp(time.mktime(utc_time)) 95 if time_zone == 'UTC' or time_zone == 'GMT': 96 hours_ = 8 97 elif time_zone == 'CDT': 98 hours_ = 13 99 else: 100 hours_ = 0 101 local_temps_time = temps_time + timedelta(hours=hours_) 102 return local_temps_time 103 104 105 # # search('FROM','abc@outlook.com',conn) 根据输入的条件查找特定的邮件 106 # def search(key, value, conn): 107 # result, data = conn.search(None, key, '"()"'.format(value)) 108 # return data 109 110 111 # 获取附件 112 def get_attachements(msg): 113 annex_list = [] 114 for part in msg.walk(): 115 if part.get_content_maintype() == 'multipart': 116 continue 117 if part.get('Content-Disposition') is None: 118 continue 119 filename = part.get_filename() 120 if bool(filename): 121 pwd = util.file_path(FILE_PATH_PREFIX + 'annex/') 122 date_time = str(int(time.time())) 123 url_ = FILE_DOMAIN_PREFIX + FILE_PATH_PREFIX_ALIAS + 'annex/' + pwd.split('/')[-2] + '/' + date_time + '_' 124 + filename 125 path_ = pwd + date_time + '_' + filename 126 if not os.path.isfile(path_): 127 with open(path_, 'wb') as f: 128 f.write(part.get_payload(decode=True)) 129 annex_list.append({'annexName': filename, 'annexUrl': url_, 'annexPath': path_}) 130 return annex_list 131 132 133 @func_set_timeout(200) 134 def get_email(server, username, password, send_email, client_id, created_by, mail_status, mail_num): 135 """ 136 获取邮件信息并保存 137 https://www.docs4dev.com/docs/zh/python/3.7.2rc1/all/library-imaplib.html 138 """ 139 try: 140 imap = imaplib.IMAP4_SSL(server) 141 try: 142 imap.login(username, password) 143 except Exception as e: 144 print('账号:{} 登录失败:{}'.format(username, e)) 145 else: 146 # 通过遍历查看imap中有哪些mailbox的值可以选择: 147 # for i in imap.list()[1]: 148 # print('i:', i) 149 # mailbox:INBOX(默认收件箱)/Drafts(草稿箱)/Junk(垃圾箱)/Trash(已删除)/Sent(已发送) 150 # mail_box = ['INBOX', 'Junk'] 151 # 针对网易邮箱被阻止:https://blog.csdn.net/jony_online/article/details/108638571 152 args = ("name", username, "contact", username, "version", "1.0.0", "vendor", "myclient") 153 imap._simple_command('ID', '("' + '" "'.join(args) + '")') 154 imap.select(mailbox='INBOX', readonly=True) 155 typ, data = imap.search(None, mail_status) # UnSeen 未读邮件 156 for num in data[0].split()[mail_num:]: 157 # 获取邮件标识id 158 # data, data = imap.fetch(num, '(BODY[HEADER.FIELDS (MESSAGE-ID)])') 159 typ, data = imap.fetch(num, '(RFC822)') 160 message = email.message_from_bytes(data[0][1]) 161 r = re.search('<(.*)>', str(email.header.make_header(email.header.decode_header(message['Message-Id'])))) 162 message_id = r.group(1) 163 email_data = EmailContentModel.find_by_message_id(message_id) 164 if not email_data: 165 mail_subject = email.header.make_header(email.header.decode_header(message['Subject'])) 166 from_email_list = email.header.make_header(email.header.decode_header(message['From'])) 167 from_email = parseaddr(str(from_email_list))[1] 168 from_name = parseaddr(str(from_email_list))[0] 169 to_email_list = email.header.make_header(email.header.decode_header(message['To'])) 170 if str(to_email_list)[0] == '<' and str(to_email_list)[-1] == '>': 171 r_ = re.search('<(.*)>', str(to_email_list)) 172 to_email_list = r_.group(1) 173 if ',' in str(to_email_list): 174 to_email = str(to_email_list).split(',') 175 else: 176 to_email = [send_email] 177 # to_name = parseaddr(str(to_email_list))[0] 178 date_time = email.header.make_header(email.header.decode_header(message['Date'])) 179 email_time = local_time(date_time) 180 mail_content = parse_email_body(message) 181 # print("邮件内容是{}".format(body)) 182 annex_list = get_attachements(message) 183 else: 184 pass 185 imap.close() 186 imap.logout() 187 except: 188 # print('imaplib.IMAP4_SSL(server):连接超时') 189 return None
在 flask 项目中使用调度器使 get_email() 函数自动后台运行获取邮件
1 # app.py 2 from resource.imap_receive_email import save_email_task 3 from apscheduler.schedulers.background import BackgroundScheduler 4 from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor 5 6 app = Flask(__name__) 7 ... 8 api.add_resource(Login, '/login') 9 10 # 使用调度器 11 executors = { 12 # 执行器的线程与进程数 13 'default': ThreadPoolExecutor(10), 14 'processpool': ProcessPoolExecutor(10) 15 } 16 job_defaults = { 17 # 最近多久时间内允许存在的任务数 18 'misfire_grace_time': 10, 19 # 该定时任务允许最大的实例个数 20 'max_instances': 10, 21 # 是否运行一次最新的任务,当多个任务堆积时 22 'coalesce': True 23 } 24 25 scheduler = BackgroundScheduler(executors=executors, job_defaults=job_defaults) 26 scheduler.add_job(func=save_email_task, trigger='interval', seconds=30, replace_existing=True, max_instances=10) 27 try: 28 scheduler.start() 29 except (KeyboardInterrupt, SystemExit): 30 scheduler.shutdown() 31 32 if __name__ == '__main__': 33 app.run( 34 host='0.0.0.0', 35 port=8090, 36 debug=DEBUG 37 )
任务 save_email_task:
1 def save_email_task(): 2 # 先从数据库中获取邮箱的信息 3 email = EmailModel.object(...) 4 # 获取每个邮箱下的邮件 5 get_email(email['imapServer'], email['Username'], email['password']...)
poplib 获取邮件
参考:廖雪峰pop3收取邮件
(以下代码未测试,不可用)
1 # -*- coding: utf-8 -*- 2 3 import poplib 4 import email 5 import time 6 from email.parser import Parser 7 from email.header import decode_header 8 from email.utils import parseaddr 9 10 11 # 字符编码转换 12 def decode_str(str_in): 13 try: 14 value, charset = decode_header(str_in)[0] 15 if charset: 16 value = value.decode(charset) 17 return value 18 except: 19 return str_in 20 21 22 def guess_charset(msg): 23 charset = msg.get_charset() 24 if charset is None: 25 content_type = msg.get('Content-Type', '').lower() 26 pos = content_type.find('charset=') 27 if pos >= 0: 28 charset = content_type[pos + 8:].strip() 29 return charset 30 31 32 def get_email(pop_server, username, password, send_email, client_id): 33 # 登录邮箱 34 pop3_server = 'pop3.qq.com' 35 try: 36 server = poplib.POP3(pop3_server, 110, timeout=50) 37 # 身份认证: 38 server.user(username) 39 server.pass_(password) 40 except BaseException as e: 41 server = '' 42 print("登陆失败") 43 resp, mails, octets = server.list() # list()返回所有邮件的编号: 44 mails = mails 45 indexs = range(len(mails), 0, -1)[-2:] # 获取最近的10封邮件索引 46 # 从最近的邮件开始,依次遍历所有邮件 47 for index in [84, 0]: 48 # 解析邮件 49 # mail_msg = parser_mail(index) 50 try: 51 resp, lines, octets = server.retr(index) # 获取第index封邮件,lines存储了邮件的原始文本的每一行 52 except: 53 try: # 如果获取邮件失败,尝试重新登录邮箱再获取 54 server.user(username) 55 server.pass_(password) 56 resp, lines, octets = server.retr(index) 57 except: # 如果还是失败,返回False 58 return False 59 # 2、拼接邮件 60 try: 61 msg_content = b' '.join(lines).decode('gbk') # 邮件的原始文本 62 except: 63 try: 64 msg_content = b' '.join(lines).decode('utf-8') # 邮件的原始文本 65 except: 66 return False 67 68 # 3、解析邮件内容 69 try: 70 msg = Parser().parsestr(msg_content) 71 except: 72 msg = "None" 73 print('3:错误') 74 75 # 4、解析邮件主题(标题) 76 try: 77 Subject = decode_str(msg.get("Subject")) 78 except BaseException as e: 79 print('4: 错误') 80 Subject = "NONE" 81 82 # 5、解析邮件时间 83 try: 84 Date = time.strptime(decode_str(msg.get("Date"))[0:24], '%a, %d %b %Y %H:%M:%S') 85 Date = time.mktime(Date) # 获取邮件的接收时间,格式化收件时间 86 except: 87 Date = "NONE" 88 print('5:错误') 89 90 # 6、解析发件人 91 try: 92 From = decode_str(msg.get("From")).split(' ')[-1] 93 except: 94 From = '6:<None>' 95 96 mail_msg = { 97 'From': From, 98 'Date': Date, 99 'Subject': Subject, 100 'Msg': msg, 101 } 102 print('mail_msg:', mail_msg) 103 if mail_msg: 104 print_info(mail_msg['Msg']) # 输入邮件内容 105 get_att(mail_msg['Msg']) # 下载邮件中的附件 106 server.quit() 107 108 109 # indent用于缩进显示: 110 def print_info(msg, indent=0): 111 if indent == 0: 112 for header in ['From', 'To', 'Subject']: 113 value = msg.get(header, '') 114 if value: 115 if header == 'Subject': 116 value = decode_str(value) 117 else: 118 hdr, addr = parseaddr(value) 119 name = decode_str(hdr) 120 value = u'%s <%s>' % (name, addr) 121 print('%s%s: %s' % (' ' * indent, header, value)) 122 if (msg.is_multipart()): 123 parts = msg.get_payload() 124 for n, part in enumerate(parts): 125 print('%spart %s' % (' ' * indent, n)) 126 print('%s--------------------' % (' ' * indent)) 127 print_info(part, indent + 1) 128 else: 129 content_type = msg.get_content_type() 130 if content_type == 'text/plain' or content_type == 'text/html': 131 content = msg.get_payload(decode=True) 132 charset = guess_charset(msg) 133 if charset: 134 content = content.decode(charset) 135 print('%sText: %s' % (' ' * indent, content + '...')) 136 else: 137 print('%sAttachment: %s' % (' ' * indent, content_type)) 138 139 140 # 解析邮件,获取附件 141 def get_att(msg_in): 142 attachment_files = [] 143 i = 1 144 for part in msg_in.walk(): 145 # 获取附件名称类型 146 file_name = part.get_filename() 147 print('file_name', file_name) 148 # contType = part.get_content_type() 149 if file_name: 150 h = email.header.Header(file_name) 151 152 # 对附件名称进行解码 153 dh = email.header.decode_header(h) 154 filename = dh[0][0] 155 if dh[0][1]: 156 # 将附件名称可读化 157 filename = decode_str(str(filename, dh[0][1])) 158 # print(filename) 159 # filename = filename.encode("utf-8") 160 161 # 下载附件 162 data = part.get_payload(decode=True) 163 path = r"附件" # 在指定目录下创建文件,如果不存在则创建目录 164 if not os.path.exists(path): 165 os.makedirs(path) 166 att_file = open(path + '\' + filename, 'wb') # 注意二进制文件需要用wb模式打开 167 attachment_files.append(filename) 168 att_file.write(data) # 保存附件 169 att_file.close() 170 171 print(f'附件({i}): {filename}') 172 i += 1 173 return attachment_files