爬虫-某游戏交易网站商品信息爬取
本站下所有关于爬虫相关的技术只限于学习交流,请勿商用!切勿给对方服务器造成压力!
from requests_html import HTMLSession
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
url_api = "http://s.5173.com/search/20c8bbc1b9794fc98bd96859624d4769.shtml?gp=&op=&ga=9ae12dfc4ab641baacdb406f6d0df692&gs=39621d91f05c48dab33f15a5cb335894&cate=243ab3c1e7614ba2b8e96a3a43754603&keyword="
session = HTMLSession()
def get_url(url_api):
r = session.get(url=url_api, headers=headers)
res_url = r.html.search('<link rel="canonical" href="{}" />')[0]
return res_url
def get_shop_info(url):
r = session.get(url=url, headers=headers)
content = r.html.find('.sin_pdlbox')
for item in content:
print("商品名:", item.find('.pdlist_info .tt h2', first=True).text)
print("比例:", item.find('.pdlist_unitprice > li', first=True).text)
print("总价:", item.find('.pdlist_price .pr', first=True).text)
print('*' * 100)
if __name__ == '__main__':
url_api = get_url(url_api)
get_shop_info(url_api)