• python 下载图片,音频。视频文件


    def catch_data(url,FileName):
        ip = commonMethod.getIP()
        userAgent = commonMethod.get_userAgent()
        driver = commonMethod.get_driver(ip, userAgent, False)
        try:
            driver.get(url)
            time.sleep(5)
            for i in range(1,12):
                wid = 500 * i + 500
                js = "var q=document.documentElement.scrollTop=" + str(wid)
                driver.execute_script(js)
                time.sleep(2)
    
            selenium_html = driver.execute_script("return document.documentElement.outerHTML")
            doc = pq(selenium_html)
            spans = doc("div[class='ml-wrap']").find("div[id='J_goodsList']").find("ul[class='gl-warp clearfix']").find("li[class^='gl-item']")
            data_list = []
            headList = ['大图链接', '价格', '商品名称', '评价数', '店铺名称']
            for span in spans.items():
                list1=[]
                picture_url = 'https:'+span.find("div[class='gl-i-wrap']").find("div[class='p-img']").find("a").find("img").attr('src')
                list1.append(picture_url)
                price = span.find("div[class='gl-i-wrap']").find("div[class='p-price']").text()
                list1.append(price)
                name = span.find("div[class='gl-i-wrap']").find("div[class='p-name p-name-type-3']").find("a").find("em").text()
                list1.append(name)
                comment_count = span.find("div[class='gl-i-wrap']").find("div[class='p-commit']").find("strong").find("a").text()
                list1.append(comment_count)
                store = span.find("div[class='gl-i-wrap']").find("div[class='p-shop']").find("span[class='J_im_icon']").find("a").attr('title')
                list1.append(store)
                print(picture_url,price,name,comment_count,store)
                data_list.append(list1)
    
            wbk = xlwt.Workbook()
            sheet1 = wbk.add_sheet('sheet', cell_overwrite_ok=True)
    
            rowIndex = 0
            commonMethod.WriteSheetRow(sheet1, headList, rowIndex, True)
            for lst in data_list:
                rowIndex += 1
                commonMethod.WriteSheetRow(sheet1, lst, rowIndex, False)
            wbk.save(FileName)
    
            time.sleep(1)
    
        except Exception as ex:
            print(ex)
    def catch_category1(FileName,url):
        ip = commonMethod.getIP()
        userAgent = commonMethod.get_userAgent()
        driver = commonMethod.get_driver(ip,userAgent, False)
        try:
            # url = 'https://www.jd.com/'
            driver.get(url)
            time.sleep(10)
            elements = driver.find_elements_by_xpath('//div[@class="fs_col1"]/div[@id="J_cate"]/ul[@class="JS_navCtn cate_menu"]/li[@class="cate_menu_item"]')
            category_one_list =[]
            for element in elements:
                print(element.text)
                txt = str(element.text).replace(' / ','/')
                category_one_list.append(txt)
                ActionChains(driver).move_to_element(element).perform()
                time.sleep(1)
    
            selenium_html = driver.execute_script("return document.documentElement.outerHTML")
            doc = pq(selenium_html)
            # spans = doc("div[class='fs_col1']").find("div[id='J_cate']").find("div[id='J_popCtn']").find("div[class='cate_part clearfix']").find(
            #     "div[class='cate_part_col1']").find("div[class='cate_detail']").find("dl[class^='cate_detail_item cate_detail_item']")
            spans = doc("div[class='fs_col1']").find("div[id='J_cate']").find("div[id='J_popCtn']").find("div[class='cate_part clearfix']")
            category_two = ''
            category_two_link = ''
            # .find("div[class='mc']").find("div[class='items']").find("dl[class='clearfix']")
            headList = ['序号', '一级分类', '二级分类', '三级分类', '三级分类链接']
            data_list = []
            index =0
            count=1
            for span in spans.items():
                category_one = category_one_list[index]
                index += 1
                subSpans = span.find(
                "div[class='cate_part_col1']").find("div[class='cate_detail']").find("dl[class^='cate_detail_item cate_detail_item']")
                for item in subSpans.items():
                    category_two = item.find("dt[class='cate_detail_tit']").find("a[class='cate_detail_tit_lk']").text()
                    category_two_link = item.find("dt[class='cate_detail_tit']").find("a[class='cate_detail_tit_lk']").attr(
                        'href')
                    sub_spans = item.find("dd[class='cate_detail_con']").find("a[class='cate_detail_con_lk']")
                    for sub_span in sub_spans.items():
                        col_list = []
                        ccategory_three = sub_span.text()
                        category_three_link = 'https:' + sub_span.attr('href')
                        print(category_one,category_two, ccategory_three, category_three_link)
                        col_list.append(count)
                        count += 1
                        col_list.append(category_one)
                        col_list.append(category_two)
                        col_list.append(ccategory_three)
                        col_list.append(category_three_link)
                        data_list.append(col_list)
    
    
            wbk = xlwt.Workbook()
            sheet1 = wbk.add_sheet('sheet', cell_overwrite_ok=True)
    
            rowIndex = 0
            commonMethod.WriteSheetRow(sheet1, headList, rowIndex, True)
            for lst in data_list:
                rowIndex += 1
                commonMethod.WriteSheetRow(sheet1, lst, rowIndex, False)
            wbk.save(FileName)
    
            time.sleep(1)
    
        except Exception as ex:
            print(ex)
    View Code
  • 相关阅读:
    Jquery才可以使用 this 指定当前DOM
    微擎使用腾讯地图拾取坐标
    使用 MUI 自制 弹出层
    mui 底部导航栏
    PHP 向数组头部插入数据
    a 标签添加 onclick 事件
    # & 等特殊字符会导致传参失败
    HTML 颜色输入框修改事件的触发,以及获取修改后的颜色
    C++ malloc()函数的注意点及使用示例
    C++ malloc函数
  • 原文地址:https://www.cnblogs.com/shaosks/p/16141343.html
Copyright © 2020-2023  润新知