python爬虫 python3+selenium+chrome

1、准备

　　安装selenium 使用命令安装selenium: pip install selenium

　　下载浏览器驱动：谷歌浏览器驱动下载地址：http://chromedriver.storage.googleapis.com/index.html

　　　　　　　　　　驱动程序和浏览器的映射关系：https://blog.csdn.net/mcfnhm/article/details/85339414

将下载后的浏览器驱动程序解压将chromedriver.exe复制到python的安装目录下的scripts的文件夹中

2、设置浏览器无头模式

from  selenium import webdriver
from time import sleep
#无头模式
from selenium.webdriver.chrome.options import Options
#实现回避检测(此方式已弃用)
#from selenium.webdriver import ChromeOptions

#无头
chrom_option = Options()
chrom_option.add_argument('--headless')
chrom_option.add_argument('--disable-gpu')

#规避检测(此方式已弃用)
#option = ChromeOptions()
chrom_option.add_experimental_option('excludeSwitches', ['enable-automation'])

chrom = webdriver.Chrome(options=chrom_option)

chrom.get("https://www.baidu.com")
print(chrom.page_source)

3、动作链示例

from selenium import webdriver
from time import sleep
#导入动作链
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import  By
from selenium.webdriver.chrome.options import Options


url='https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
chrom = webdriver.Chrome()
chrom.get(url)
chrom.maximize_window()

#定位元素位于iframe标签中 需要通过一下操作后再进行标签定位
chrom.switch_to.frame("iframeResult")
div_ele = chrom.find_element(By.ID,'draggable')

action = ActionChains(chrom)
action.click_and_hold(div_ele)

for i in range(5):
    #move_by_offset(x,y)
    #perform立即执行动作链
    action.move_by_offset(17,0).perform()
    sleep(1)
#释放动作链
action.release()

chrom.quit()

4.读取excel后写入txt

import xlrd
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep



def read_excel(url,chrome_url):
    # 导入需要读取的表格
    excel = xlrd.open_workbook(url)
    sheet = excel.sheets()[0]


    txt_path = './reData'
    if not os.path.exists(txt_path):
        os.mkdir(txt_path)
    fp = open('./'+txt_path+'/error.txt','w',encoding='utf-8')
    fs = open('./'+txt_path+'/succ.txt','w',encoding='utf-8')

    for row in range(2,sheet.nrows):
        name = sheet.cell_value(row,5)
        pwd  = sheet.cell_value(row,6)
        if len(name) > 0 and len(pwd) > 0:
            chrom = webdriver.Chrome()
            chrom.get(chrome_url)
            chrom.maximize_window()
            sleep(1)
            page_text=''
            try:
                name_input_ele = chrom.find_element(By.ID, 'userName')
                pwd_input_ele = chrom.find_element(By.ID, 'password')
                btn = chrom.find_element(By.ID, 'login')
                name_input_ele.send_keys(name)
                pwd_input_ele.send_keys(pwd)
                btn.click()
                sleep(1)

                page_text = chrom.page_source
            except:
                chrom.quit()

            if page_text.find('用户名或密码错误') >0:
                fp.write('%10s—%10s\n' % (name, pwd))
            else:
                fs.write('%10s—%10s\n' % (name, pwd))

        chrom.quit()
    fp.close()
    fs.close()



if __name__ == '__main__':
    pass

相关阅读:
BIND_MISMATCH导致过多VERSION COUNT的问题
 Using dbms_shared_pool.purge to remove a single task from the library cache
SQL Server 2012 新的分页函数 OFFSET & FETCH NEXT
How to delete expired archive log files using rman?
Oracle利用external table 查看trace文件
 全栈开发经验
 ASP.NET Core教程：使用Supervisor做ASP.NET Core应用程序守护进程
 ASP.NET Core教程：ASP.NET Core程序部署到Linux
ASP.NET Core教程：ASP.NET Core 程序部署到Windows系统
 C#：窗体传值
原文地址：https://www.cnblogs.com/GOOGnine/p/15934587.html