import requests import re from lxml import etree '''噢百万抓取''' url = 'http://www.obaiwan.com/hk49/results/' p = re.compile('''<tr > <td >.+?</td> <td >(.+?)</td> <td >(.+?)</td> <td ><b style=".+?">(.+?)</b></td> <td ><b style=".+?">(.+?)</b></td> <td ><b style=".+?">(.+?)</b></td> <td ><b style=".+?">(.+?)</b></td> <td ><b style=".+?">(.+?)</b></td> <td ><b style=".+?">(.+?)</b></td> <td >.+?</td> <td >.+?</td> <td >.+?</td> <td >.+?</td> <td >.+?</td> <td >.+?</td> <td >.+?</td> <td ><b style=".+?">(.+?)</b></td> </tr>''') f = open('history.txt','w') res = '' for i in range(2003, 2016): year = i data = {'qinum':year,'submit':'%CC%E1%BD%BB%B2%E9%D1%AF'} r = requests.post(url, data=data) r.encoding = 'gb2312' matchs = p.findall(r.text) for row in matchs: res += ','.join(row) + ' ' f.write(res) f.close()