最近看简书文章关注了几个专题作者,写的文章都不错,对爬虫和数据分析都写的挺好,因此想到能不能获取最新的文章推送到Ipad网易邮箱大师。邮件发送代码封装成一个函数,从廖雪峰大神那里学的
网页源码获取和解析获取文章标题和url依然是用到的requests和BeautifulSoup4 模块 也封装成一个函数。
#coding: utf-8 import sys import requests from bs4 import BeautifulSoup as bs import smtplib import datetime from email.mime.text import MIMEText from email.header import Header from email.utils import parseaddr, formataddr #系统编码置为'utf-8' reload(sys) sys.setdefaultencoding('utf-8') #简书作者专题入口 rooturl='http://www.jianshu.com/notebooks/4204686/latest' #获取网页源代码 并用bs4解析 返回soup对象 def gethtml(url): res=requests.get(url) res.encoding='utf-8' html=res.text soup=bs(html,'html.parser') return soup #调用gethtml()函数获取网页soup对象 对网页进行解析 参数都是url def sendmail(url): soup=gethtml(url) catename = soup.select('h3.title a')[0].text titlename = soup.select('h4.title a')[0].text titleurl = 'http://www.jianshu.com' + soup.select('h4.title a')[0]['href'] sender = '发件人邮箱' receiver = '收件人邮箱' subject = 'python email test' smtpserver = '发件人邮箱smtp服务器' username = '发件人邮箱' password = '发件人邮箱密码' date=datetime.date.today() def _format_addr(s): name, addr = parseaddr(s) return formataddr(( Header(name, 'utf-8').encode(), addr.encode('utf-8') if isinstance(addr, unicode) else addr)) msg = MIMEText(''' 早安! Mr_Cxy,今天是%s! 简书作者"向右奔跑"在【%s】目录下最新发表的文章是: %s. 文章链接:%s ''' % (date,catename,titlename,titleurl), 'plain', 'utf-8') msg['From'] = _format_addr(u'简书App <%s>' % sender) msg['To'] = _format_addr(u'yourself~ <%s>' % receiver) msg['Subject'] = Header(u'简书作者最新文章', 'utf-8').encode() smtp = smtplib.SMTP('smtp.163.com',25) smtp.login(username, password) smtp.sendmail(sender, receiver, msg.as_string()) smtp.quit() print sendmail(rooturl)