• 电商工作代码


    from selenium import webdriver
    from scrapy.selector import Selector
    import time
    import random
    import pymysql
    from urllib import parse
    import re
    import os


    class spider(object):

    def chul3(self,dates):
    a = Selector(text=dates)
    next_url = a.xpath('//*[@id="J_ShopSearchResult"]/div/div[2]/div[10]/a[11]/@href').extract_first("")
    return 'https:'+next_url
    chuli=spider()

    conection = pymysql.connect(host='localhost',user='root',password='123',db='7.25',charset='utf8mb4',cursorclass=pymysql.cursors.DictCursor)
    with conection.cursor() as cursor:
    sql1 = "select * from 商品id"
    cursor.execute(sql1)
    shop_id = cursor.fetchall()
    shop_oldid=[i['id'] for i in shop_id]
    sql1 = '''
    SELECT
    `商品id`.id,
    `上架时间`,'1天销量' as 日期
    FROM
    `商品id` WHERE TIMESTAMPDIFF(DAY,`上架时间`,CURDATE()) =1 union SELECT
    `商品id`.id,
    `上架时间`,'7天销量' as 日期
    FROM
    `商品id` WHERE TIMESTAMPDIFF(DAY,`上架时间`,CURDATE()) =7
    union SELECT
    `商品id`.id,
    `上架时间`,'30天销量' as 日期
    FROM
    `商品id` WHERE TIMESTAMPDIFF(DAY,`上架时间`,CURDATE()) =30'''
    cursor.execute(sql1)
    shop_id = cursor.fetchall()
    shop_olxx = [i for i in shop_id]
    conection.commit()
    cursor =conection.cursor()
    webdriver = webdriver.Ie()
    url = 'https://login.taobao.com/member/login.jhtml?spm=a21bo.50862.754894437.1.5dcec6f76Oq9Wh&f=top&redirectURL=https%3A%2F%2Fwww.taobao.com%2F%3Fspm%3Da1z10.1-c-s.1581860521.1.559a715a3EnsHq'
    webdriver.get(url)
    time.sleep(20)
    def lll(url):
    webdriver.implicitly_wait(200)
    webdriver.get(url)
    myDynamicElement = webdriver.find_element_by_class_name('pagination')
    a=webdriver.page_source
    time.sleep(random.randrange(2,6))
    selects=Selector(text=a)
    for i in selects.xpath('//*[@id="J_ShopSearchResult"]/div/div[2]/div/dl'):
    bd_pig = i.xpath("./dt/a/img/@src").re('(.*)_')
    bd_name = ''.join(re.findall('[u4e00-u9fa5]', i.xpath('./dd[1]/a/text()').extract_first('')))
    bd_id = ''.join(re.findall('d', i.xpath('./dd[1]/a/@href').extract_first('')))
    bd_much = i.xpath('./dd[1]/div/div[1]/span[2]/text()').extract_first('')
    bd_idlian='http://item.taobao.com/item.htm?id='+bd_id
    bd_liang = i.xpath('./dd[1]/div/div[last()]/span[last()]/text()').extract_first('')
    if bd_id not in shop_oldid:
    sql = "INSERT INTO 商品id (`品牌`, `id`,图片链接,价格,标题,商品地址) VALUES (%s,%s,%s,%s,%s,%s)"
    cursor.execute(sql,
    (shop.split(",")[0], bd_id, bd_pig, bd_much, bd_name,bd_idlian))
    conection.commit()
    webdriver.implicitly_wait(200)
    webdriver.get('http://item.taobao.com/item.htm?id='+bd_id)
    myDynamicElement = webdriver.find_element_by_class_name('tb-price-spec')
    time.sleep(random.randrange(2, 6))
    date=webdriver.page_source
    select_xixi = Selector(text=date)
    liem = select_xixi.xpath('//*[@id="J_TMySize"]/@data-value').extract_first("")
    pinjia=select_xixi.xpath('//*[@id="J_RateCounter"]/text()').extract_first("")
    if int(pinjia) == 0 :
    time_id=select_xixi.xpath('//script').extract()
    a = [i for i in time_id if len(str(i)) > 1000]
    new_time = re.findall(r".*dbst:(.d*)", str(a[0]).replace(" ", ""))[0][0:10]
    timeTuple = time.strftime("%Y-%m-%d", time.localtime(int(new_time)))
    sql = 'update `商品id` set `商品id`.`类目` = %s, `商品id`.`上架时间` = %s where id = %s'
    cursor.execute(sql,
    (liem, timeTuple, bd_id))
    conection.commit()
    title = path + '\' + shop.split(",")[0] + '\' + re.sub("W", "", webdriver.title + bd_id)
    capture(webdriver, title + '.jpg')
    else:
    sql = 'update `商品id` set `商品id`.`类目` = %s where id = %s'
    cursor.execute(sql,
    (liem, bd_id))
    conection.commit()

    c=1
    ee=1
    for i in select_xixi.xpath('//*[@id="J_isku"]/div/dl'):
    b = i.xpath('./dt/text()').extract_first("")
    if '尺码' in b:
    aa = i.xpath('./dd/ul/li/a/span/text()').extract()
    ee = len(aa)
    dd = ' '.join(aa)
    sql = 'update `商品id` set `商品id`.`尺码` = %s where id = %s'
    cursor.execute(sql,
    (dd, bd_id))
    conection.commit()
    if '颜色' in b:
    a = i.xpath('./dd/ul/li/a/span/text()').extract()
    c = len(a)
    d = ' '.join(a)
    sql = 'update `商品id` set `商品id`.`颜色` = %s where id = %s'
    cursor.execute(sql,
    (d, bd_id))
    conection.commit()
    w = c * ee
    sql= 'update `商品id` set `商品id`.`sku量` = %s where id = %s'
    cursor.execute(sql,
    (w,bd_id))
    conection.commit()





    for i in shop_olxx:
    if i['id'] == bd_id:
    sql = "UPDATE 商品id set " + i['日期'] + " = (%s) where id = %s"
    cursor.execute(sql,
    (bd_liang, i['id']))
    conection.commit()
    if selects.xpath('//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href').extract_first(""):
    lll('https:'+selects.xpath('//*[@id="J_ShopSearchResult"]/div/div[2]/div[last()]/a[last()]/@href').extract_first(""))


    path=os.getcwd()


    def capture(webder, save_fn="capture.png"):
    # browser = webdriver.Ie() # Get local session of firefox

    # browser.get(url) # Load page
    webder.execute_script("""
    (function () {
    var y = 0;
    var step = 100;
    window.scroll(0, 0);

    function f() {
    if (y < document.body.scrollHeight) {
    y += step;
    window.scroll(0, y);
    setTimeout(f, 50);
    } else {
    window.scroll(0, 0);
    document.title += "scroll-done";
    }
    }

    setTimeout(f, 1000);
    })();
    """)

    for i in range(30):
    if "scroll-done" in webder.title:
    break
    time.sleep(1)

    webder.save_screenshot(save_fn)
    with open(os.getcwd() + r'1.csv', 'r') as c:
    for shop in c.readlines():
    url = shop.split(",")[2]
    lll(url)
  • 相关阅读:
    学习总结javascript和ajax,php,和css
    css基础二
    JQUERY基础
    JS基础与DOM操作(一)
    div布局
    样式表
    框架及其他小技巧
    表格与表单
    HTML基础
    二阶段测试
  • 原文地址:https://www.cnblogs.com/gao-xiang/p/7228194.html
Copyright © 2020-2023  润新知