• python爬虫 python3+selenium+chrome


    1、准备

      安装selenium   使用命令安装selenium: pip install selenium

      下载浏览器驱动:谷歌浏览器驱动下载地址:http://chromedriver.storage.googleapis.com/index.html

              驱动程序和浏览器的映射关系:https://blog.csdn.net/mcfnhm/article/details/85339414

            将下载后的浏览器驱动程序解压  将chromedriver.exe复制到python的安装目录下的scripts的文件夹中

     2、设置浏览器无头模式

    from  selenium import webdriver
    from time import sleep
    #无头模式
    from selenium.webdriver.chrome.options import Options
    #实现回避检测(此方式已弃用)
    #from selenium.webdriver import ChromeOptions
    
    #无头
    chrom_option = Options()
    chrom_option.add_argument('--headless')
    chrom_option.add_argument('--disable-gpu')
    
    #规避检测(此方式已弃用)
    #option = ChromeOptions()
    chrom_option.add_experimental_option('excludeSwitches', ['enable-automation'])
    
    chrom = webdriver.Chrome(options=chrom_option)
    
    chrom.get("https://www.baidu.com")
    print(chrom.page_source)

    3、动作链示例

    from selenium import webdriver
    from time import sleep
    #导入动作链
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import  By
    from selenium.webdriver.chrome.options import Options
    
    
    url='https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
    chrom = webdriver.Chrome()
    chrom.get(url)
    chrom.maximize_window()
    
    #定位元素位于iframe标签中 需要通过一下操作后再进行标签定位
    chrom.switch_to.frame("iframeResult")
    div_ele = chrom.find_element(By.ID,'draggable')
    
    action = ActionChains(chrom)
    action.click_and_hold(div_ele)
    
    for i in range(5):
        #move_by_offset(x,y)
        #perform立即执行动作链
        action.move_by_offset(17,0).perform()
        sleep(1)
    #释放动作链
    action.release()
    
    chrom.quit()

     4.读取excel后写入txt

    import xlrd
    import os
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from time import sleep
    
    
    
    def read_excel(url,chrome_url):
        # 导入需要读取的表格
        excel = xlrd.open_workbook(url)
        sheet = excel.sheets()[0]
    
    
        txt_path = './reData'
        if not os.path.exists(txt_path):
            os.mkdir(txt_path)
        fp = open('./'+txt_path+'/error.txt','w',encoding='utf-8')
        fs = open('./'+txt_path+'/succ.txt','w',encoding='utf-8')
    
        for row in range(2,sheet.nrows):
            name = sheet.cell_value(row,5)
            pwd  = sheet.cell_value(row,6)
            if len(name) > 0 and len(pwd) > 0:
                chrom = webdriver.Chrome()
                chrom.get(chrome_url)
                chrom.maximize_window()
                sleep(1)
                page_text=''
                try:
                    name_input_ele = chrom.find_element(By.ID, 'userName')
                    pwd_input_ele = chrom.find_element(By.ID, 'password')
                    btn = chrom.find_element(By.ID, 'login')
                    name_input_ele.send_keys(name)
                    pwd_input_ele.send_keys(pwd)
                    btn.click()
                    sleep(1)
    
                    page_text = chrom.page_source
                except:
                    chrom.quit()
    
                if page_text.find('用户名或密码错误') >0:
                    fp.write('%10s—%10s\n' % (name, pwd))
                else:
                    fs.write('%10s—%10s\n' % (name, pwd))
    
            chrom.quit()
        fp.close()
        fs.close()
    
    
    
    if __name__ == '__main__':
        pass
  • 相关阅读:
    BIND_MISMATCH导致过多VERSION COUNT的问题
    Using dbms_shared_pool.purge to remove a single task from the library cache
    SQL Server 2012 新的分页函数 OFFSET & FETCH NEXT
    How to delete expired archive log files using rman?
    Oracle利用external table 查看trace文件
    全栈开发经验
    ASP.NET Core教程:使用Supervisor做ASP.NET Core应用程序守护进程
    ASP.NET Core教程:ASP.NET Core程序部署到Linux
    ASP.NET Core教程:ASP.NET Core 程序部署到Windows系统
    C#:窗体传值
  • 原文地址:https://www.cnblogs.com/GOOGnine/p/15934587.html
Copyright © 2020-2023  润新知