from urllib import request import requests import time import pymysql from selenium import webdriver from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By db=pymysql.connect('localhost','root','123qwe','ok') cur=db.cursor() #sql='create table lagou(name varchar(50),salary varchar(10),tiao varchar(10),company varchar(20),skill varchar(30),daiyu varchar(30))' #cur.execute(sql) url='https://www.lagou.com/' #opt = webdriver.chrome.options.Options() #opt.set_headless() broswer=webdriver.Chrome() broswer.get(url) broswer.find_element_by_id("cboxClose").click() time.sleep(1) ok=broswer.find_element_by_xpath('//input[@id="search_input"]') broswer.execute_script("arguments[0].click();", ok) broswer.find_element_by_xpath('//input[@id="search_input"]').send_keys('python') op=broswer.find_element_by_id('search_button') broswer.execute_script("arguments[0].click();",op) k=1 while k <= 30: print(k) time.sleep(2) name=broswer.find_elements_by_xpath('//a[@class="position_link"]/h3') classd=broswer.find_elements_by_xpath('//div[@class="p_bot"]/div[@class="li_b_l"]') comany=broswer.find_elements_by_xpath('//div[@class="company_name"]/a') skill=broswer.find_elements_by_xpath('//div[@class="list_item_bot"]/div[@class="li_b_l"]') daiyu=broswer.find_elements_by_xpath('//div[@class="list_item_bot"]/div[@class="li_b_r"]') for i in range(len(name)): sql='insert into lagou(name,salary,tiao,company,skill,daiyu) values(%s,%s,%s,%s,%s,%s)' value=(name[i].text,classd[i].text.split(' ')[0],classd[i].text.split(' ')[1],comany[i].text,skill[i].text,daiyu[i].text) cur.execute(sql,value) db.commit() time.sleep(1) js="var q=document.documentElement.scrollTop=3000" broswer.execute_script(js)#下拉滚动条 time.sleep(1) pages = broswer.find_element_by_xpath('//*[@id="s_position_list"]/div[2]/div/span[6]') ActionChains(broswer).move_to_element(pages).perform() try: next = WebDriverWait(broswer, 10).until( EC.element_to_be_clickable((By.CLASS_NAME, 'pager_next ')) ) next.click() except TimeoutException: next_page()#点击下一页非常关键,用平时click失效的方法,会从第3页直接跳到最后一页 k=k+1 cur.close() broswer.close()