• 唯品会爬取


    爬取首页轮播的链接,名字并且下载图片到本地

    # -*- coding: utf-8 -*-
    import urllib.request
    import ssl
    import json
    context = ssl._create_unverified_context()
    list_name=[]
    list_img=[]
    url='http://pcapi.vip.com/ads/index.php?callback=shopAds&type=ADSEC56K%2CADSIR7IX%2CADSX7W3G%2CADSNNLS7%2CADS7JI3F%2CADS2B669%2CADSITG64%2CADS45AV4%2CADS44T33&warehouse=VIP_NH&areaid=104104&preview=0&date_from=&time_from=&user_class=&channelId=0'
    url_data = urllib.request.urlopen(url).read().decode("utf-8")
    url_data=url_data.replace('shopAds(','')
    url_data=url_data.replace(')','')
    jsDict = json.loads(url_data)
    print(jsDict)
    jsdata = jsDict['ADADSEC56K']
    jsdatas = jsdata['items']
    for each in jsdatas:
        list_name.append(each['name'])
        list_img.append(each['img'])
    print(list_img)
    print(list_name)
    x=0
    for j in list_img:
        urllib.request.urlretrieve(j,'D:\test\%s.jpg'%x)
        x=x+1

     获取精选里面的名字和链接还有图片的源代码:

    # -*- coding: utf-8 -*-
    import urllib.request
    import ssl
    import json
    context = ssl._create_unverified_context()
    list_name=[]
    list_img=[]
    url='http://www.vip.com/index-ajax.php?act=getSellingBrandListV5&warehouse=VIP_NH&areaCode=104104&channelId=0&pagecode=b&sortType=1&province_name=%E5%B9%BF%E4%B8%9C&city_name=%E5%B9%BF%E5%B7%9E%E5%B8%82&preview=&sell_time_from=&time_from=&ids=1'
    url_data = urllib.request.urlopen(url).read().decode("utf-8")
    print(url_data)
    jsDict=json.loads(url_data)
    print(jsDict)
    jsdata=jsDict['data']
    jsfloor=jsdata['floors']
    jsfirst=jsfloor['1']
    jsitems=jsfirst['items']
    for each in jsitems:
        list_img.append(each['mobile_image_one'])
        list_name.append(each['name'])
    print(len(list_img))
    print(len(list_name))
    print(list_name)
    print(list_img)
    # url_data=url_data.replace('shopAds(','')
    # url_data=url_data.replace(')','')
    # jsDict = json.loads(url_data)
    # print(jsDict)
    # jsdata = jsDict['ADADSEC56K']
    # jsdatas = jsdata['items']
    # for each in jsdatas:
    #     list_name.append(each['name'])
    #     list_img.append(each['img'])
    # print(list_img)
    # print(list_name)
    # x=0
    # for j in list_img:
    #     urllib.request.urlretrieve(j,'D:\test\%s.jpg'%x)
    #     x=
  • 相关阅读:
    Boost for Android
    揭秘Facebook官方底层C++底层函数Folly
    ZT 将sublime text的tab改为四个空格
    ZT Linux可用的最新版本的sublime text注册
    http/ftp等的URL匹配正则表达式 ZT
    国内163的Ubuntu更新源
    oracle11g的监听配置文件中的program和env两个配置,必须干掉,客户端才能正常连接
    ubuntu下安装php7
    oracle密码过期的修改
    oracle 查看字段说明
  • 原文地址:https://www.cnblogs.com/caicaihong/p/5922668.html
Copyright © 2020-2023  润新知