import requests from lxml import etree from bs4 import BeautifulSoup import csv url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china&channelFidStr' res = requests.get(url) res.encoding = res.apparent_encoding data = etree.HTML(res.text) copy = '//*[@id="settlementList"]/table/tbody/tr/td/table/tbody/tr[{}]/td[2]/p/span//text()' #可用格式化{}操作,其实数量少的话不如直接多写几个,还不用循环 data_dict =[]
f = open('e:\shuju\3.csv', 'w', newline="") writer = csv.writer(f) writer.writerow(('riqi', 'xinzeng', 'qimo')) for i in range(1,4): das = data.xpath(copy.format(i)) das = ''.join(das).replace('[','').replace(']','').replace("'",'') #把多余的括号去掉 data_dict.append(das)
writer.writerow((data_dict)) f.close()