def catch_data(url,FileName): ip = commonMethod.getIP() userAgent = commonMethod.get_userAgent() driver = commonMethod.get_driver(ip, userAgent, False) try: driver.get(url) time.sleep(5) for i in range(1,12): wid = 500 * i + 500 js = "var q=document.documentElement.scrollTop=" + str(wid) driver.execute_script(js) time.sleep(2) selenium_html = driver.execute_script("return document.documentElement.outerHTML") doc = pq(selenium_html) spans = doc("div[class='ml-wrap']").find("div[id='J_goodsList']").find("ul[class='gl-warp clearfix']").find("li[class^='gl-item']") data_list = [] headList = ['大图链接', '价格', '商品名称', '评价数', '店铺名称'] for span in spans.items(): list1=[] picture_url = 'https:'+span.find("div[class='gl-i-wrap']").find("div[class='p-img']").find("a").find("img").attr('src') list1.append(picture_url) price = span.find("div[class='gl-i-wrap']").find("div[class='p-price']").text() list1.append(price) name = span.find("div[class='gl-i-wrap']").find("div[class='p-name p-name-type-3']").find("a").find("em").text() list1.append(name) comment_count = span.find("div[class='gl-i-wrap']").find("div[class='p-commit']").find("strong").find("a").text() list1.append(comment_count) store = span.find("div[class='gl-i-wrap']").find("div[class='p-shop']").find("span[class='J_im_icon']").find("a").attr('title') list1.append(store) print(picture_url,price,name,comment_count,store) data_list.append(list1) wbk = xlwt.Workbook() sheet1 = wbk.add_sheet('sheet', cell_overwrite_ok=True) rowIndex = 0 commonMethod.WriteSheetRow(sheet1, headList, rowIndex, True) for lst in data_list: rowIndex += 1 commonMethod.WriteSheetRow(sheet1, lst, rowIndex, False) wbk.save(FileName) time.sleep(1) except Exception as ex: print(ex) def catch_category1(FileName,url): ip = commonMethod.getIP() userAgent = commonMethod.get_userAgent() driver = commonMethod.get_driver(ip,userAgent, False) try: # url = 'https://www.jd.com/' driver.get(url) time.sleep(10) elements = driver.find_elements_by_xpath('//div[@class="fs_col1"]/div[@id="J_cate"]/ul[@class="JS_navCtn cate_menu"]/li[@class="cate_menu_item"]') category_one_list =[] for element in elements: print(element.text) txt = str(element.text).replace(' / ','/') category_one_list.append(txt) ActionChains(driver).move_to_element(element).perform() time.sleep(1) selenium_html = driver.execute_script("return document.documentElement.outerHTML") doc = pq(selenium_html) # spans = doc("div[class='fs_col1']").find("div[id='J_cate']").find("div[id='J_popCtn']").find("div[class='cate_part clearfix']").find( # "div[class='cate_part_col1']").find("div[class='cate_detail']").find("dl[class^='cate_detail_item cate_detail_item']") spans = doc("div[class='fs_col1']").find("div[id='J_cate']").find("div[id='J_popCtn']").find("div[class='cate_part clearfix']") category_two = '' category_two_link = '' # .find("div[class='mc']").find("div[class='items']").find("dl[class='clearfix']") headList = ['序号', '一级分类', '二级分类', '三级分类', '三级分类链接'] data_list = [] index =0 count=1 for span in spans.items(): category_one = category_one_list[index] index += 1 subSpans = span.find( "div[class='cate_part_col1']").find("div[class='cate_detail']").find("dl[class^='cate_detail_item cate_detail_item']") for item in subSpans.items(): category_two = item.find("dt[class='cate_detail_tit']").find("a[class='cate_detail_tit_lk']").text() category_two_link = item.find("dt[class='cate_detail_tit']").find("a[class='cate_detail_tit_lk']").attr( 'href') sub_spans = item.find("dd[class='cate_detail_con']").find("a[class='cate_detail_con_lk']") for sub_span in sub_spans.items(): col_list = [] ccategory_three = sub_span.text() category_three_link = 'https:' + sub_span.attr('href') print(category_one,category_two, ccategory_three, category_three_link) col_list.append(count) count += 1 col_list.append(category_one) col_list.append(category_two) col_list.append(ccategory_three) col_list.append(category_three_link) data_list.append(col_list) wbk = xlwt.Workbook() sheet1 = wbk.add_sheet('sheet', cell_overwrite_ok=True) rowIndex = 0 commonMethod.WriteSheetRow(sheet1, headList, rowIndex, True) for lst in data_list: rowIndex += 1 commonMethod.WriteSheetRow(sheet1, lst, rowIndex, False) wbk.save(FileName) time.sleep(1) except Exception as ex: print(ex)