# coding=gbk import requests from lxml import etree url = 'https://www.17k.com/list/3357123.html' response = requests.get(url, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36', }) res2 = response.text html1 = etree.HTML(response.text) goods_li = html1.xpath("/html/body/div[5]/dl/dd/a/@href") s = type for i in goods_li: text_url = 'https://www.17k.com'+i response = requests.get(text_url, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36', }) response.encoding = 'utf-8' res2 = response.text html1 = etree.HTML(response.text) goods_li1 = html1.xpath("//*[@id='readArea']/div[1]/div[2]/p//text()") for i in goods_li1: with open("17kk.txt", "a+",encoding='utf-8') as f: f.write(i) f.write(' ') s = f s.close()