• 去哪儿网store更新


    import re
    import json
    from odps import ODPS
    from threading import Thread
    import threading
    from urllib import parse
    import datetime
    from lxml import etree

    import random 
    import requests
    import time

    from models import *

    store_urls_data = {"https://fh.dujia.qunar.com/",'https://aaag9.package.qunar.com'}

    def write_txt(html_data):
        f = open("a.txt", 'a+')
        f.write(html_data)
        f.write(" ")
        f.close()

    # 获取到的商户信息
    def process_nodes_list(url):    
        res_text = requests.get(url)
        html = etree.HTML(res_text.text)
        try:
            score = html.xpath("//var[@class='score']/text()")[0]
        except:
            # print(url)
            score = 0.0
        #print(score)
        try:
            try:           
                up_down =  html.xpath('//div[@class="rankline"]//i[contains(@class,"up")]')[0]
            except:
                up_down = html.xpath('//div[@class="rankline"]//i[contains(@class,"down")]')[0]
            up_down = up_down.attrib
        except:
            try:
                up_down = html.xpath('//div[@class="rankline"]//i[contains(@class,"equal")]')[0]
                up_down = up_down.attrib
            except:
                up_down = "NULL" # 此处为空值
                print(url)
        print(up_down)

        try:
            store_name = html.xpath("//div[@class='shop-rank']/strong/text()")[0]
        except:
            store_name = "NULL"
            print(url)

        res_li = html.xpath("//div[@class='business']//li/text()")
        if len(res_li) > 9:
            store = qunar_Store()
            store.store_name = store_name # 店铺名称
            store.store_score = score # 店铺评分
            if 'up' in str(up_down):
                store.store_rankline = 1
                store.store_percent = int(re.search(r'd+',str(up_down)).group(0)) / 100
            if 'down' in str(up_down):
                store.store_rankline = -1
                store.store_percent = int(re.search(r'd+',str(up_down)).group(0)) / 100
            if 'equal' in str(up_down):
                store.store_rankline = 0
                store.store_percent = 0.0
            if str(up_down) == "NULL":
                store.store_rankline = 2 # 出现2表示商户数据为空
                store.store_percent = 0.0
            store.company_name = res_li[1] # 公司的名字
            store.company_legal_person = res_li[3] # 公司的法人
            store.licence_num = res_li[5] # 
            store.trading_certificate = res_li[7]
            store.business_scope = res_li[9]
            store.create_time = datetime.datetime.now()

            store.save(force_insert=True)
        # except:
        #         write_txt(url)
        # if goods_list:
        #     data_0.up_product_to_odps(goods_list)

    def get_nodes_json():
        url =  r.lpop('test.com:store_url')
        #url = 'https://zqlr1.package.qunar.com/'
        if url and url not in store_urls_data:
            process_nodes_list(url)

    class parse_qunar_url_Thread(Thread):
        def run(self):
            while(1):
                get_nodes_json()
                #保存最终的数据


    if __name__ == "__main__":
        for i in range(50):
            parse_qunar_url_thread = parse_qunar_url_Thread()     
            parse_qunar_url_thread.start()    
        
  • 相关阅读:
    作男人 一定要有品位
    如何管理“人”
    Facebook怎样开发软件:工程师驱动的文化(转)
    为人处事100条——修身养性,经典收藏!
    抽空看看这些电影
    曹重英:技术人员也要打造人脉竞争力(转)
    动态分段统计SQL
    不成熟男人与成熟男人的区别
    Ubuntu11.10国内更新源地址汇总以及添加方法(目前最全最快的源)
    ubuntu11.10 64bits机器安装flash方法
  • 原文地址:https://www.cnblogs.com/dog-and-cat/p/13615469.html
Copyright © 2020-2023  润新知