获取淘宝特定商品信息

import re
import requests
from bs4 import BeautifulSoup
import numpy as np
import  bs4
def getHTMLText(url):
    headers = {
        'User-Agent': 'Chorme'}
    try:  # 请求爬虫框架
        coo = "这里输入你们自己的cookie"
        cookies = {}
        for line in coo.split(';'):  # 浏览器伪装
            name, value = line.strip().split('=', 1)
            cookies[name] = value
        r = requests.get(url, cookies=cookies, headers=headers, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding


        return r.text
    except:
        return ""


def parasePage(ilt,html):
    try:

        plt=re.findall(r'"view_price":"[d|.]*"',html)
        tlt=re.findall(r'"raw_title":".*?"',html)


        soup=BeautifulSoup(html,'html.parser')

        #for link in soup.find_all('a'):
         #   xx=link.get('href')

        for i in range(len(plt)):
            price=eval(plt[i].split(':')[1])#把得到的数最外层的双引号单引号去掉
            title=eval(tlt[i].split(':')[1])
            ilt.append([price,title])
    except:
        print("2")

def printGoodList(ilt):
    try:
        tplt = "{:4}	{:8}	{:16}"
        print(tplt.format("序号", "价格", "商品名称"))
        count = 0
        a=[]

        for g in ilt:
            count = count + 1


            print(tplt.format(count, g[0], g[1]))

    except:
        print('3')

def main():
    goods =input('请输入你想要查询的商品:
');

    depth = 2
    #start_url = "https://s.taobao.com/search?q=%E4%B9%A6%E5%8C%85&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306&bcoffset=3&ntoffset=3&p4ppushleft=1%2C48&s=0" + goods  # start_url通过将淘宝搜索页面的代码与变量goods的整合实现对商品的检索
    start_url="https://s.taobao.com/search?q="+goods
    infoList = []
    for i in range(depth):  # 单独对每一个url链接进行单独处理
        try:
            url = start_url + "&s=" + str(44 * i)  # 44是淘宝每个页面呈现的宝贝数量
            html = getHTMLText(url)  # 获得输入的url的网页
            parasePage(infoList, html)
        except:
            continue
    printGoodList(infoList)
    input()

main()

使用方式:

运行代码
输入想要查询的商品信息
显示出所有相关商品信息

相关阅读:
动态列 Excel 导出
Smart Thread Pool （智能线程池）
Nuget Server 搭建
hadoop 分布式集群安装
DRF 基本功能梳理 demo
docker 相关梳理
Python 开发面试梳理
结合 element-ui 对 Vue 相关知识点整理 (router,axios,Vuex )
VUE 相关工具 vue-cli/webpack/vue-router
Vue 基础语法相关特性

原文地址：https://www.cnblogs.com/Xiong-Jun/p/13515049.html