• 爬虫之爬取淘宝主题市场主要产品信息


    一、爬取主题市场中大分类对应的中分类

    1. 分析网页源码,构造中分类的url地址:

    class ThememarketSpider(scrapy.Spider):
        name = 'thememarket'
        allowed_domains = ['taobao.com', 'tce.alicdn.com']
        start_urls = ['https://www.taobao.com/']
    
        def parse(self, response):
            # 获取主题市场主分类
            li_list = response.xpath('//div[@class="service J_Service"]/ul/li')
            for li in li_list:
                a_list = li.xpath('./a')
                for a in a_list:
                    item = {}
                    # 获取分类名
                    item['b_category'] = a.xpath('./text()').extract_first()
                    # 获取分类的dataid,用于构造子分类的url
                    item['dataid'] = a.xpath('./@data-dataid').extract_first()
                    url = 'https://tce.alicdn.com/api/data.htm?ids={}'.format(item['dataid'])
                    yield scrapy.Request(
                        url,
                        callback=self.get_m_category,
                        meta={'item': item}
                    )
    
        def get_m_category(self, response):
            item = response.meta['item']
            # 返回json格式字符串
            dataid = item['dataid']
            res = json.loads(response.body.decode('utf-8'))
            # 返回列表数据类型
            m_category_list = res[dataid]['value']['list']
            for m_category_dic in m_category_list:
                m_category_name = m_category_dic['name']
                m_category_url = m_category_dic['link']
                item['m_category_name'] = m_category_name
                item['m_category_url'] = m_category_url
                """
                {"m_category": "内衣", "dataid": "222889", "m_category_name": "卡通睡衣",
                 "m_category_url": "https://s.taobao.com/list?q=%E5%8D%A1%E9%80%9A%E7%9D%A1%E8%A1%A3&cat=1625&style=grid&seller_type=taobao&spm=a219r.lm5734.1000187.1"}
                """
                with open('middle_category.py', 'a', encoding='utf-8') as fp:
                    res = json.dumps(item, ensure_ascii=False)
                    fp.write(res)
                    fp.write(os.linesep)
                    fp.flush()
                # 获取子分类的详情列表
                yield scrapy.Request(
                    m_category_url,
                    callback=self.m_category_list,
                    meta={'item': item},
                    cookies=cookies
                )
    
        # 此步操作需要登录网页(难点)
        def m_category_list(self, response):
            item = deepcopy(response.meta['item'])
            # 分析response.text网页源码发现页面渲染的主要数据均存放在script标签"auctions"下面,可以通过正则获取
            # 再进行反序列化,获取列表,遍历列表,提取数据
    获取中等分类代码

    2. 得到数据结构如下:

    {"b_category": "女装", "dataid": "222887", "m_category_name": "夏上新", "m_category_url": "https://s.taobao.com/list?q=%E5%A4%8F%E4%B8%8A%E6%96%B0&cat=16&style=grid&seller_type=taobao&spm=a219r.lm874.1000187.1"}
    {"b_category": "工具", "dataid": "222914", "m_category_name": "电钻", "m_category_url": "https://s.taobao.com/search?spm=a21ka.8063459.320001.8.470e5602N4a38c&q=%E7%94%B5%E9%92%BB&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20160602&ie=utf8"}
    {"b_category": "宠物", "dataid": "222894", "m_category_name": "进口狗粮", "m_category_url": "https://s.taobao.com/search?q=%E8%BF%9B%E5%8F%A3%E7%8B%97%E7%B2%AE&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306"}
    {"b_category": "农资", "dataid": "222920", "m_category_name": "农药", "m_category_url": "//s.taobao.com/list?&mid=5868&cps=yes&cat=56176009"}
    {"b_category": "美食", "dataid": "222899", "m_category_name": "牛奶", "m_category_url": "https://s.taobao.com/search?q=%E7%89%9B%E5%A5%B6&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20180724&ie=utf8"}
    {"b_category": "零食", "dataid": "222881", "m_category_name": "冰皮月饼", "m_category_url": "https://s.taobao.com/search?q=bingpiyuebing&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306"}
    {"b_category": "生鲜", "dataid": "222905", "m_category_name": "荔枝", "m_category_url": "https://s.taobao.com/search?q=%E8%8D%94%E6%9E%9D&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20180724&ie=utf8"}
    {"b_category": "鲜花", "dataid": "222911", "m_category_name": "鲜花速递", "m_category_url": "https://s.taobao.com/list?q=%E9%B2%9C%E8%8A%B1&style=grid&seller_type=taobao&spm=a217z.7279617.1000187.1&cps=yes&cat=290501"}
    {"b_category": "动漫", "dataid": "222883", "m_category_name": "手办", "m_category_url": "https://s.taobao.com/search?q=%E6%89%8B%E5%8A%9E&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20190221&ie=utf8&cps=yes&cat=25&filter=reserve_price%5B200%2C%5D"}
    {"b_category": "游戏", "dataid": "222882", "m_category_name": "DNF", "m_category_url": "//s.taobao.com/search?initiative_id=tbindexz_20150615&spm=1.7274553.1997520841.1&sourceId=tb.index&search_type=item&ssid=s5-e&commend=all&q=%E5%9C%B0%E4%B8%8B%E5%9F%8E%E4%B8%8E%E5%8B%87%E5%A3%AB&suggest=history_1&_input_charset=utf-8&wq=%E5%9C%B0%E4%B8%8B&suggest_query=%E5%9C%B0%E4%B8%8B&source=suggest"}
    {"b_category": "影视", "dataid": "222921", "m_category_name": "你的名字", "m_category_url": "https://s.taobao.com/search?q=%E4%BD%A0%E7%9A%84%E5%90%8D%E5%AD%97+%E5%91%A8%E8%BE%B9&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20190221&ie=utf8&cps=yes&cat=25&filter=reserve_price%5B%2C%5D"}
    {"b_category": "乐器", "dataid": "222910", "m_category_name": "全新钢琴", "m_category_url": "https://s.taobao.com/search?q=%E9%92%A2%E7%90%B4&fs=1"}
    {"b_category": "户外", "dataid": "222913", "m_category_name": "鱼线", "m_category_url": "https://s.taobao.com/list?q=%E9%B1%BC%E7%BA%BF&cat=50016756%2C50010728%2C50484015%2C50010388%2C2203%2C54418001&style=grid&seller_type=taobao&spm=a217w.1099561.1000187.1"}
    {"b_category": "运动", "dataid": "222880", "m_category_name": "Yeezy 350", "m_category_url": "//s.taobao.com/search?q=Yeezy+350+%E9%85%B7%E5%8A%A8%E5%9F%8E&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20160523&ie=utf8"}
    {"b_category": "手表", "dataid": "222902", "m_category_name": "运动表", "m_category_url": "https://s.taobao.com/list?spm=a217x.7282709.2167341.2.jO1AYS&q=运动表&cat=50015926%2C1705%2C50005700%2C28&style=grid&seller_type=taobao"}
    {"b_category": "眼镜", "dataid": "222888", "m_category_name": "眼镜架", "m_category_url": "https://s.taobao.com/list?spm=a217x.7278569.2167351.2.ssMZ9w&q=眼镜架&cat=50015926%2C1705%2C50005700%2C28&style=grid&seller_type=taobao"}
    {"b_category": "五金电子", "dataid": "222897", "m_category_name": "物联网市场", "m_category_url": "https://www.taobao.com/markets/dz/iot-mart"}
    {"b_category": "本地服务", "dataid": "222924", "m_category_name": "婚纱摄影", "m_category_url": "http://s.taobao.com/list?q=婚纱摄影&cat=50970014"}
    {"b_category": "卡券", "dataid": "222916", "m_category_name": "劳动节福利", "m_category_url": "//www.taobao.com/markets/quan/51kqkh"}
    {"b_category": "学习", "dataid": "222904", "m_category_name": "英语四级", "m_category_url": "https://www.taobao.com/markets/xue/cet4444444"}
    {"b_category": "家庭保健", "dataid": "222901", "m_category_name": "创可贴", "m_category_url": "https://s.taobao.com/search?spm=a21bt.186725.976177.2.ff26649RmBiyB&initiative_id=staobaoz_20150210&tab=all&q=%E5%88%9B%E5%8F%AF%E8%B4%B4&cps=yes&stats_click=search_radio_all%253A1&cat=55510001"}
    {"b_category": "餐厨", "dataid": "222903", "m_category_name": "收纳整理", "m_category_url": "https://s.taobao.com/list?source=youjia&cat=55098010"}
    {"b_category": "百货", "dataid": "222891", "m_category_name": "保温杯", "m_category_url": "https://s.taobao.com/list?source=youjia&q=%E4%BF%9D%E6%B8%A9%E6%9D%AF"}
    {"b_category": "DIY", "dataid": "222909", "m_category_name": "定制T恤", "m_category_url": "https://s.taobao.com/search?q=%E5%AE%9A%E5%88%B6T%E6%81%A4&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20180725&ie=utf8"}
    {"b_category": "用品", "dataid": "222900", "m_category_name": "车载空气净化器", "m_category_url": "https://s.taobao.com/search?q=%E8%BD%A6%E8%BD%BD%E7%A9%BA%E6%B0%94%E5%87%80%E5%8C%96%E5%99%A8&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20171031&ie=utf8"}
    {"b_category": "二手车", "dataid": "222892", "m_category_name": "司法车拍卖", "m_category_url": "//sf.taobao.com/item_list.htm?category=50025972"}
    {"b_category": "汽车", "dataid": "222912", "m_category_name": "买车送油卡", "m_category_url": "https://list.tmall.com/search_product.htm?q=%B4%F3%CB%D1%B3%B5&type=p&spm=a220m.1000858.a2227oh.d100&from=.list.pc_1_searchbutton"}
    {"b_category": "家纺", "dataid": "222884", "m_category_name": "夏凉被", "m_category_url": "//s.taobao.com/list?source=youjia&q=夏凉被"}
    {"b_category": "家饰", "dataid": "222922", "m_category_name": "窗帘", "m_category_url": "//s.taobao.com/list?source=youjia&q=窗帘"}
    {"b_category": "家具", "dataid": "222915", "m_category_name": "沙发", "m_category_url": "//s.taobao.com/list?source=youjia&q=沙发"}
    {"b_category": "办公", "dataid": "222923", "m_category_name": "打印机", "m_category_url": "https://s.taobao.com/search?q=%E6%89%93%E5%8D%B0%E6%9C%BA"}
    {"b_category": "建材", "dataid": "222919", "m_category_name": "建材优品", "m_category_url": "https://market.m.taobao.com/apps/abs/10/350/214270?wh_weex=true&psId=1902014&data_prefetch=true"}
    {"b_category": "装修", "dataid": "222877", "m_category_name": "全包", "m_category_url": "https://www.taobao.com/markets/youjia/jxzjpc?spm=a21bo.50862.201867-links-10.25.psFZT5&wh_ttid=pc"}
    {"b_category": "珠宝", "dataid": "222917", "m_category_name": "琥珀蜜蜡", "m_category_url": "https://s.taobao.com/list?spm=a21bo.7724922.8409-line-1.2.1LEJ8o&q=%E8%9C%9C%E8%9C%A1&cat=50015926%2C1705%2C50005700%2C28&style=grid&seller_type=taobao&scm=1007.12013.16568.300000000000000"}
    {"b_category": "保健品", "dataid": "222918", "m_category_name": "B族维生素", "m_category_url": "https://s.taobao.com/search?q=B%E6%97%8F%E7%BB%B4%E7%94%9F%E7%B4%A0%E5%85%A8%E7%90%83%E8%B4%AD&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20160314&ie=utf8"}
    {"b_category": "洗护", "dataid": "222896", "m_category_name": "洗发水", "m_category_url": "https://s.taobao.com/list?q=%E6%B4%97%E5%8F%91%E6%B0%B4&cat=1801%2C50071436%2C50010788&style=grid&seller_type=taobao&spm=a219r.lm843.1000187.1"}
    {"b_category": "手机", "dataid": "222879", "m_category_name": "iPhone xs", "m_category_url": "https://www.taobao.com/markets/3c/tbdc?spm=a21bo.2017.201867-links-3.35.483211d9JtR4Cw"}
    {"b_category": "数码", "dataid": "222908", "m_category_name": "游戏主机", "m_category_url": "https://s.taobao.com/list?spm=a217h.9580640.831011.57.3b0025aalRtynl&q=%E6%B8%B8%E6%88%8F%E4%B8%BB%E6%9C%BA&style=grid&seller_type=taobao&cat=&cps=yes&ppath=5409757%3A37174758&filter=reserve_price%5B1100%2C4000%5D&sort=default"}
    {"b_category": "家电", "dataid": "222878", "m_category_name": "淘宝速达", "m_category_url": "https://www.taobao.com/markets/3c/tbdc?spm=a21bo.2017.201867-main.11.223011d9iPVHG3"}
    {"b_category": "美妆", "dataid": "222893", "m_category_name": "面膜", "m_category_url": "https://s.taobao.com/list?spm=a21bo.7724922.8383.3.4YQNZR&seller_type=taobao&q=%E9%9D%A2%E8%86%9C"}
    {"b_category": "孕产", "dataid": "222885", "m_category_name": "美妈大衣", "m_category_url": "https://s.taobao.com/search?initiative_id=tbindexz_20170306&ie=utf8&spm=a21bo.2017.201856-taobao-item.2&sourceId=tb.index&search_type=item&ssid=s5-e&commend=all&imgfile=&q=%E5%A4%A7%E8%A1%A3%E5%A5%B3%E5%86%AC+%E5%8A%A0%E5%8E%9A&suggest=0_4&_input_charset=utf-8&wq=%E5%A4%A7%E8%A1%A3&suggest_query=%E5%A4%A7%E8%A1%A3&seller_type=taobao&source=suggest&cps=yes&cat=50067081"}
    {"b_category": "童装玩具", "dataid": "222907", "m_category_name": "连衣裙", "m_category_url": "https://s.taobao.com/list?q=女童+连衣裙+春&mid=869&style=grid&seller_type=taobao&spm=a219r.lm869.1000187.1&cps=yes&cat=50008165"}
    {"b_category": "配件", "dataid": "222898", "m_category_name": "帽子", "m_category_url": "https://www.taobao.com/market/fspj/new.php"}
    {"b_category": "男装", "dataid": "222890", "m_category_name": "春夏新品", "m_category_url": "https://s.taobao.com/list?q=%E6%96%B0%E5%93%81&cat=50344007&style=grid&seller_type=taobao&spm=a217m.8316598.1000187.1"}
    {"b_category": "箱包", "dataid": "222906", "m_category_name": "女包", "m_category_url": "https://s.taobao.com/list?q=%E5%A5%B3%E5%8C%85&cat=50006842%2C50072688%2C50072689%2C50072686&style=grid&seller_type=taobao&spm=a217q.8031046.1000187.1"}
    {"b_category": "鞋靴", "dataid": "222886", "m_category_name": "流行女鞋", "m_category_url": "https://s.taobao.com/search?q=%E6%B5%81%E8%A1%8C%E5%A5%B3%E9%9E%8B&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20190320&ie=utf8"}
    {"b_category": "内衣", "dataid": "222889", "m_category_name": "法式内衣", "m_category_url": "https://s.taobao.com/list?q=%E6%B3%95%E5%BC%8F%E5%86%85%E8%A1%A3&cat=1625&style=grid&seller_type=taobao&spm=a219r.lm5734.1000187.1"}
    爬取的大分类及中等分类数据结构(部分数据)

     二、爬取各中等分类对应小分类详细信息

    女装中裤子:

    https://s.taobao.com/list?q=%E8%A3%A4%E5%AD%90&cat=16&style=grid&seller_type=taobao&spm=a219r.lm874.1000187.1

    生鲜中牛排:

    https://s.taobao.com/search?q=%E7%89%9B%E6%8E%92&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20180724&ie=utf8

    通过分析发现:
      中等分类发送地址得到response对象,页面渲染需要的数据存放在script标签"auctions"下面,可以通过正则获取,再进行反序列化提取数据

    script标签中数据结构如下:

    "auctions": [{
                    "i2iTags": {
                        "samestyle": {"url": ""},
                        "similar": {"url": "/list?typeu003dsimilaru0026appu003di2iu0026rec_typeu003d1u0026uniqpidu003d-1827796196u0026nidu003d601691950527"}
                    },
                    "sku": [{
                        "picUrl": "//g-search1.alicdn.com/img/bao/uploaded/i4/i1/886407894/O1CN01e24HYM28BV6A2ZdOL_!!886407894.jpg",
                        "skuParam": "skuu003d1627207:5502761856#detail"
                    }, {
                        "picUrl": "//g-search2.alicdn.com/img/bao/uploaded/i4/i4/886407894/O1CN01k88CMK28BV68prcdf_!!886407894.jpg",
                        "skuParam": "skuu003d1627207:5502761857#detail"
                    }],
                    "p4pTags": [],
                    "nid": "601691950527",
                    "category": "162201",
                    "pid": "-1827796196",
                    "title": "女装-舒适简约风!通勤加分款绑带九分西装感阔腿裤 2色",
                    "raw_title": "女装-舒适简约风!通勤加分款绑带九分西装感阔腿裤 2色",
                    "pic_url": "//g-search3.alicdn.com/img/bao/uploaded/i4/i1/886407894/O1CN01e24HYM28BV6A2ZdOL_!!886407894.jpg",
                    "detail_url": "//item.taobao.com/item.htm?idu003d601691950527u0026nsu003d1u0026abbucketu003d14",
                    "view_price": "178.00",
                    "view_fee": "7.00",
                    "item_loc": "广东 广州",
                    "view_sales": "260人付款",
                    "comment_count": "",
                    "user_id": "886407894",
                    "nick": "苹果的鞋店",
                    "shopcard": {
                        "levelClasses": [{"levelClass": "icon-supple-level-guan"}, {"levelClass": "icon-supple-level-guan"}, {"levelClass": "icon-supple-level-guan"}, {"levelClass": "icon-supple-level-guan"}, {"levelClass": "icon-supple-level-guan"}],
                        "isTmall": false,
                        "delivery": [494, 1, 6223],
                        "description": [492, 1, 5911],
                        "service": [494, 1, 6476],
                        "encryptedUserId": "UOmgLMmNuOmk0",
                        "sellerCredit": 15,
                        "totalRate": 9988
                    },
                    "icon": [{
                        "title": "金牌卖家从千万卖家中脱颖而出,会为您的购物体验带来更多信任和安心",
                        "dom_class": "icon-service-jinpaimaijia",
                        "position": "1",
                        "show_type": "0",
                        "icon_category": "shop",
                        "outer_text": "0",
                        "html": "",
                        "icon_key": "icon-service-jinpaimaijia",
                        "trace": "srpservice",
                        "traceIdx": 0,
                        "innerText": "金牌卖家",
                        "url": "//www.taobao.com/go/act/jpmj.php",
                        "iconPopupNormal": {"dom_class": "icon-service-jinpaimaijia-l"}
                    }, {
                        "title": "年轻人的潮流穿搭社区,海量时尚新款,趣味互动体验。",
                        "dom_class": "icon-fest-ifashion",
                        "position": "1",
                        "show_type": "0",
                        "icon_category": "baobei",
                        "outer_text": "0",
                        "html": "",
                        "icon_key": "icon-fest-ifashion",
                        "trace": "srpservice",
                        "traceIdx": 1,
                        "innerText": "ifashion",
                        "iconPopupNormal": {"dom_class": "icon-fest-ifashionfuceng"}
                    }, {
                        "title": "当季新品",
                        "dom_class": "icon-service-xinpin",
                        "position": "1",
                        "show_type": "0",
                        "icon_category": "baobei",
                        "outer_text": "0",
                        "html": "",
                        "icon_key": "icon-service-xinpin",
                        "trace": "srpservice",
                        "traceIdx": 2,
                        "innerText": "新品",
                        "url": "//service.taobao.com/support/knowledge-1138476.htm"
                    }],
                    "comment_url": "//item.taobao.com/item.htm?idu003d601691950527u0026nsu003d1u0026abbucketu003d14u0026on_commentu003d1",
                    "shopLink": "//store.taobao.com/shop/view_shop.htm?user_number_idu003d886407894"
                }, {
                    "i2iTags": {
                        "samestyle": {"url": "/list?typeu003dsamestyleu0026appu003di2iu0026rec_typeu003d1u0026uniqpidu003d-995945099u0026nidu003d599067007410"},
                        "similar": {"url": "/list?typeu003dsimilaru0026appu003di2iu0026rec_typeu003d1u0026uniqpidu003d-995945099u0026nidu003d599067007410"}
                    },
                    "sku": [{
                        "picUrl": "//g-search3.alicdn.com/img/bao/uploaded/i4/i2/2303991755/O1CN01ScmHLa1Oppdkydtke_!!2303991755.jpg",
                        "skuParam": "skuu003d1627207:4911458640#detail"
                    }, {
                        "picUrl": "//g-search2.alicdn.com/img/bao/uploaded/i4/i1/2303991755/O1CN01ecN5Dh1Oppdit2yBe_!!2303991755.jpg",
                        "skuParam": "skuu003d1627207:4911458641#detail"
                    }, {
                        "picUrl": "//g-search2.alicdn.com/img/bao/uploaded/i4/i2/2303991755/O1CN016zUWD11Oppdhzs06o_!!2303991755.jpg",
                        "skuParam": "skuu003d1627207:4913441111#detail"
                    }, {
                        "picUrl": "//g-search1.alicdn.com/img/bao/uploaded/i4/i1/2303991755/O1CN01TKC34c1OppdkcETzE_!!2303991755.jpg",
                        "skuParam": "skuu003d1627207:4913441112#detail"
                    }],
                    "p4pTags": [],
                    "nid": "599067007410",
                    "category": "162205",
                    "pid": "-995945099",
                    "title": "韩国泫雅牛仔裤女2019夏季薄款高腰宽松长裤阔腿垂感直筒老爹u003cspan classu003dHu003e裤子u003c/spanu003e",
                    "raw_title": "韩国泫雅牛仔裤女2019夏季薄款高腰宽松长裤阔腿垂感直筒老爹裤子",
                    "pic_url": "//g-search3.alicdn.com/img/bao/uploaded/i4/i3/2303991755/O1CN01iD8ErS1OppdmDECAD_!!0-item_pic.jpg",
                    "detail_url": "//item.taobao.com/item.htm?idu003d599067007410u0026nsu003d1u0026abbucketu003d14",
                    "view_price": "438.00",
                    "view_fee": "0.00",
                    "item_loc": "上海",
                    "view_sales": "320人付款",
                    "comment_count": "73",
                    "user_id": "2303991755",
                    "nick": "时尚名品团购88",
                    "shopcard": {
                        "levelClasses": [{"levelClass": "icon-supple-level-guan"}, {"levelClass": "icon-supple-level-guan"}],
                        "isTmall": false,
                        "delivery": [499, 1, 9235],
                        "description": [499, 1, 9475],
                        "service": [499, 1, 9327],
                        "encryptedUserId": "UvCvWvGkSvFcbMQTT",
                        "sellerCredit": 12,
                        "totalRate": 9969
                    },
                    "icon": [{
                        "title": "金牌卖家从千万卖家中脱颖而出,会为您的购物体验带来更多信任和安心",
                        "dom_class": "icon-service-jinpaimaijia",
                        "position": "1",
                        "show_type": "0",
                        "icon_category": "shop",
                        "outer_text": "0",
                        "html": "",
                        "icon_key": "icon-service-jinpaimaijia",
                        "trace": "srpservice",
                        "traceIdx": 3,
                        "innerText": "金牌卖家",
                        "url": "//www.taobao.com/go/act/jpmj.php",
                        "iconPopupNormal": {"dom_class": "icon-service-jinpaimaijia-l"}
                    }, {
                        "title": "当季新品",
                        "dom_class": "icon-service-xinpin",
                        "position": "1",
                        "show_type": "0",
                        "icon_category": "baobei",
                        "outer_text": "0",
                        "html": "",
                        "icon_key": "icon-service-xinpin",
                        "trace": "srpservice",
                        "traceIdx": 4,
                        "innerText": "新品",
                        "url": "//service.taobao.com/support/knowledge-1138476.htm"
                    }],
                    "comment_url": "//item.taobao.com/item.htm?idu003d599067007410u0026nsu003d1u0026abbucketu003d14u0026on_commentu003d1",
                    "shopLink": "//store.taobao.com/shop/view_shop.htm?user_number_idu003d2303991755"
                }]
    数据主要部分

    分析发现:商品的标题、价格、销量、店铺名、商品详细信息等均可以在上面的数据中找到 

    三、最难点:登录

    可以通过selenium或者构造cookies,本文是通过构造cookies登录的

    构造的cookies如下:

    cookies = {"UtASsssmfA%3D%3D": ".taobao.com",
               "_l_g_": "Ug%3D%3D",
               "_mw_us_time_": "1567091473860",
               "_nk_": "用户名",
               "_tb_token_": "7e33e55e53b13",
               "atpsida": "55fd9900b78c23b8f6a27337_1567093867_2",
               "atpsidas": "b0472228ad0f01e87b72f3a0_1567093867_2",
               "aui": "685677682",
               "cna": "mzvsFYGJ+2sCAT2rp/6P69lK",
               "cnaui": "685677682",
               "cookie1": "BqVqOkCmn10hbUhhIn%2BLf1Et7P%2Bwfnm6A1OlwKDtQXI%3D",
               "cookie17": "VWeUPHKJc8pe",
               "cookie2": "150e39a32e88c0391860ea6a2ef4d118",
               "csg": "8404aba9",
               "dnk": "用户名",
               "existShop": "MTU2NzA5Mzg2Ng%3D%3D",
               "isg": "BAQE8j7JBZBuX7FlmcnRr9FD1YI2tSmC-rL7UB6ljU-SSaQTRi7YF7XjiaE01WDf",
               "l": "cBgnzyqRqzGPE7QQBOCZIuI8Ly7TQIRYmuPRwCcMi_5I86L66lQOkuXDmFp6cjWd9SLB4k6UXwe9-etks3lD1kd8E5vP.",
               "lgc": "用户名",
               "mt": "ci=103_1",
               "publishItemObj": "Ng%3D%3D",
               "sca": "eeb68204",
               "sg": "92d",
               "skt": "c7ecc2d4ad69b396",
               "t": "5a8cf2eb574c569b113486d5f115b115",
               "tbsa": "5f33fe694305a4915be0feca_1567093867_2",
               "tg": "0",
               "thw": "cn",
               "tracknick": "用户名",
               "ubn": "p",
               "uc1": "cookie16=UIHiLt3xCS3yM2h4eKHS9lpEOw%3D%3D&cookie21=UtASsssmfaNq04quL1TvFQ%3D%3D&cookie15=VFC%2FuZ9ayeYq2g%3D%3D&existShop=false&pas=0&cookie14=UoTaH0e4SsPaNA%3D%3D&tag=8&lng=zh_CN",
               "uc3": "vt3=F8dBy3MJk0cl4p%2FVwSw%3D&lg2=WqG3DMC9VAQiUQ%3D%3D&nk2=F5RCZsMkKx4R&id2=VWeUPHKJc8pe",
               "uc4": "id4=0%40V8Zvd9Pp%2FltGX%2FCTcQaWyUbxtJ0%3D&nk4=0%40FY4JiMyz7hjz0yGy77nHfHvacbs%3D",
               "ucn": "center",
               "unb": "685677682",
               "v": "0"}
    cookies

    未完待续……

  • 相关阅读:
    获取当前时间并格式化,CTime类
    疑问:VS在调试的过程中,总是会提示正在加载picface.dll的符号,然后卡死在那
    Markup解析XML——文档,说明
    .net Core 获取当前程序路径
    Excel中的细节
    心血来潮尝试一个小项目(WinForm)
    bat文件以管理员运行
    DataGridView一些总结
    常见辅助类、方法
    向txt文件中添加或者追加文字字符串
  • 原文地址:https://www.cnblogs.com/wuyongquan/p/11432872.html
Copyright © 2020-2023  润新知