• 京东


    # -*- conding:utf-8 -*-
    import requests
    import urllib.request
    import json
    import xlwt
    import time
    import datetime
    # list_url=[]
    # list_title=[]
    # list_img=[]
    # wb = xlwt.Workbook()
    # ws = wb.add_sheet('电脑数码')
    # url='http://f.3.cn/index-floor?argv=basic_3'
    # url_data = urllib.request.urlopen(url).read().decode("utf-8")
    # # url_data=requests.get(url)
    # # url_data=url_data.text
    # url_data=url_data.replace('jsonCallBackbasic_6(','')
    # url_data=url_data.replace(')','')
    # jsDict = json.loads(url_data)
    # jsdata=jsDict['data']
    def one_floor(jsdata,wb,ws):
        list_url=[]
        list_title=[]
        list_img=[]
        jscols = jsdata['cols'][0]
        jscols_brand = jscols['brand']
        jscols_content = jscols['content']
        jscols_tag = jscols['tag']
        print(jscols_tag)
        print(jscols_content)
        print(len(jscols_content))
        jscols_content1 = jscols_content[0]
        jscols_content2 = jscols_content[1]
        print(jscols_content1)
        print(jscols_content2)
        jscols_content1_cover = jscols_content1['cover']
        jscols_content2_cover = jscols_content2['cover']
        jscols_content1_bottom = jscols_content1['bottom']
        jscols_content2_bottom = jscols_content2['bottom']
        print(jscols_content1_cover)
        print(jscols_content2_cover)
        print(jscols_content1_bottom)
        print(jscols_content2_bottom)
        for each in range(len(jscols_content1_cover)):
            list_url.append(jscols_content1_cover[each]['url'])
            list_url.append(jscols_content2_cover[each]['url'])
            list_img.append('http:' + jscols_content1_cover[each]['imgUrl'])
            list_img.append('http:' + jscols_content2_cover[each]['imgUrl'])
            list_title.append('SL21' + jscols_content1_cover[each]['title'])
            list_title.append('SR21' + jscols_content2_cover[each]['title'])
        for each in range(len(jscols_brand)):
            list_url.append(jscols_brand[each]['href'])
            list_img.append('http:' + jscols_brand[each]['src'])
            list_title.append('S4' + str(each + 1))
        for each in jscols_content1_bottom:
            for every in range(len(each)):
                list_url.append(each[every]['url'])
                list_img.append('http:' + each[every]['imgUrl'])
                list_title.append('SL3' + str(every + 1))
                # print(each[every])
        for each in jscols_content2_bottom:
            for every in range(len(each)):
                list_url.append(each[every]['url'])
                list_img.append('http:' + each[every]['imgUrl'])
                list_title.append('SR3' + str(every + 1))
        for each in range(len(jscols_tag)):
            list_url.append(jscols_tag[each]['url'])
            list_img.append('no img')
            list_title.append('S1' + str(each + 1) + jscols_tag[each]['title'])
        ws.write(0, 0, '地点')
        ws.write(0, 1, '位置')
        ws.write(0, 2, 'URL')
        ws.write(0, 3, '照片')
        ws.write(0, 4, '主题')
        ws.write(0, 5, '时间')
        for each in range(len(list_title)):
            Add_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            ws.write(each + 1, 0, '广州')
            ws.write(each + 1, 1, '京东pc楼层')
            ws.write(each + 1, 2, list_url[each])
            ws.write(each + 1, 3, list_img[each])
            ws.write(each + 1, 4, list_title[each])
            ws.write(each + 1, 5, Add_time)
        wb.save('D:\jd\jd_pc.xls')
        print(list_title)
        print(list_img)
        print(list_url)
        x = 0
        for j in range(len(list_img)):
            if list_img[j] == 'no img':
                print(list_img[j])
            else:
                print(list_img[j])
                urllib.request.urlretrieve(list_img[j], 'D:\jd_pc_pic\' + list_title[j] + '.jpg')
                # urllib.request.urlretrieve(j, 'D:\jd_pc_pic\' + '\%s.jpg' % x)
                x = x + 1
    
    # print(jscols_brand)
    # # print(len(jscols))
    # print(jscols)
  • 相关阅读:
    离线计算框架 MapReduce
    Hadoop概述
    Linux之rpm/yum
    Linux之磁盘分区
    利用CMD合并多个VOB文件
    android中的simple_list_item
    jquery中的跨域-jsonp格式
    安卓代码中设置ImageView属性
    Android中常用的Adapter的种类和用法
    C#导入excel文件到oracle的方法
  • 原文地址:https://www.cnblogs.com/caicaihong/p/6030861.html
Copyright © 2020-2023  润新知