• 请求库-selenium 模块


    # -*- coding: utf-8 -*-
    
    # 安装:pip3 install selenium
    
    # 下载chromdriver.exe放到python安装路径的scripts目录中即可,注意最新版本是3.5
    # 国内镜像网站地址:http://npm.taobao.org/mirrors/chromedriver/3.5
    # 最新的版本去官网找:https://sites.google.com/a/chromium.org/chromedriver/downloads
    
    # 验证安装
    from selenium import webdriver
    # driver = webdriver.Chrome()       #弹出浏览器
    # driver.get("https://www.baidu.com")    #浏览器自动访问该url
    # print(driver.page_source)              #终端打印获取到的urltext文件
    
    # 安装:pip3 install selenium
    # 下载phantomjs,解压后把phantomjs.exe所在的bin目录放到环境变量
    # 下载链接:http://phantomjs.org/download.html
    
    # drivers = webdriver.PhantomJS(executable_path=r"E:pythonphantomjs-2.1.1-windowsinphantomjs.exe") #无界面浏览器
    # 环境变量配置之后就可以不用加里面的参数,但是不知道什么原因pycharm识别不了,只能手动吧目录填写进去
    # drivers.get('https://www.baidu.com') # print(drivers.page_source) # 基本使用 from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By #查找方式:ID,class from selenium.webdriver.common.keys import Keys #键盘操作,enter from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素 browser = webdriver.Chrome() try: browser.get("https://www.baidu.com") #访问这个页面 input_tag = browser.find_element_by_id("kw") #找到搜索框ID input_tag.send_keys("极致诱惑") #给搜索框添加搜索条件 input_tag.send_keys(Keys.ENTER) #模仿人手动敲击回车键 wait = WebDriverWait(browser,10) #等待10秒 wait.until(EC.presence_of_element_located((By.ID,"content_left--"))) #等待十秒,知道等到content_left--加载出来, print("browser.page_source",browser.page_source) print("browser.current_url",browser.current_url) print("browser.get_cookie()",browser.get_cookie) finally: browser.close() #最后得关闭
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait
    import time
    
    driver = webdriver.Chrome()
    driver.get("https://www.baidu.com")
    wait = WebDriverWait(driver,5)
    
    try:
        # ===============所有方法===================
        # 1、find_element_by_id
        # 2、find_element_by_link_text
        # 3、find_element_by_partial_link_text
        # 4、find_element_by_tag_name
        # 5、find_element_by_class_name
        # 6、find_element_by_name
        # 7、find_element_by_css_selector
        # 8、find_element_by_xpath
        ###############################################################
        # 1、find_element_by_id
        # print(driver.find_element_by_id("kw"))
        # 2、find_element_by_link_text
        # login = driver.find_elements_by_link_text("登录")[0]
        # login.click()
        # 3、find_element_by_partial_link_text
        login = driver.find_elements_by_partial_link_text("")[0]
        login.click()
        # 4、find_element_by_tag_name
        # print(driver.find_element_by_tag_name("a"))
        # 5、find_element_by_class_name
        button=wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'tang-pass-footerBarULogin')))
        button.click()
        print("============")
        # 6、find_element_by_name
        input_user = wait.until(EC.presence_of_element_located((By.NAME,"userName")))
        input_pwd = wait.until(EC.presence_of_element_located((By.NAME,"password")))
        commit = wait.until(EC.element_to_be_clickable((By.ID,"TANGRAM__PSP_10__submit")))
    
        input_user.send_keys("xxxxxxxxx")       #输入框输入用户名
        input_pwd.send_keys("xxxxxxxxx")        #密码框输入密码
        commit.click()
        time.sleep(4)
    
    finally:
        driver.close()
    自动登录百度账号
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait
    import time
    
    driver = webdriver.Chrome()
    driver.get('https://doc.scrapy.org/en/latest/_static/selectors-sample1.html')
    driver.implicitly_wait(3)         #有些文件加载比较缓慢,在加载之前等待
    
    try:
        driver.find_element_by_xpath("//a")     #从根目录下找子子孙孙,找不到报错
        # driver.find_element_by_xpath("//body/a") #从根目录body先找他的儿子,找不到爆粗
        driver.find_element_by_xpath("//body//a") #从body下的子子孙孙找a,
        res1 = driver.find_element_by_xpath("//body//a[3]")   # 取第一个a标签,标签按1开始
        print(res1.text)
        r2 = driver.find_element_by_xpath("//a[3]")
        print(r2.text)
        r3 = driver.find_element_by_xpath('//*[@id="images"]/a[3]')
        print(r3.text)
        # r1,r2,r3 的结果是相同的
        res4 = driver.find_elements_by_xpath("/html/body/div/a")[3]
        print(res4.text)
        res5 = driver.find_element_by_xpath('//a[img/@src="image3_thumb.jpg"]')
        print(res5.text)
    
    finally:
        driver.close()
    xpath的基本使用
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
    from selenium.webdriver.common.keys import Keys #键盘按键操作
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
    browser = webdriver.Chrome()
    browser.get("https://www.amazon.cn")
    wait = WebDriverWait(browser,10)
    wait.until(EC.presence_of_all_elements_located((By.ID,"cc-lm-tcgShowImgContainer")))
    tag = browser.find_element(By.CSS_SELECTOR,"#cc-lm-tcgShowImgContainer img")
    print(tag.get_attribute("src"))
    print(tag.id)
    print(tag.location)
    print(tag.tag_name)
    print(tag.size)
    browser.close()
    获取标签属性
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By  # 按照什么方式查找,By.ID,By.CSS_SELECTOR
    from selenium.webdriver.common.keys import Keys  # 键盘按键操作
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait  # 等待页面加载某些元素
    import time
    driver = webdriver.Chrome()
    driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
    # wait = WebDriverWait(driver,3)
    driver.implicitly_wait(3)
    try:
        driver.switch_to.frame("iframeResult")
        drop = driver.find_element_by_id("droppable")     #不懂
        drag = driver.find_element_by_id("draggable")     #
        # 移动方块
        # 方式一:
        # actions = ActionChains(driver)          #拿到动作连对象
        # actions.drag_and_drop(drop,drag)
        # actions.perform()
        # time.sleep(4)
        # 方式二
        ActionChains(driver).click_and_hold(drag).perform()
        distance = drop.location["x"] - drag.location["x"]    #获取到两者之间的距离
        track = 0
        while track < distance:
            ActionChains(driver).move_by_offset(xoffset=2,yoffset=0).perform()
            track +=2
        ActionChains(driver).release().perform()
        time.sleep(10)
    finally:
        driver.close()
    actionchains
    等待元素加载
    #1、selenium只是模拟浏览器的行为,而浏览器解析页面是需要时间的(执行css,js),一些元素可能需要过一段时间才能加载出来,为了保证能查找到元素,必须等待
    
    #2、等待的方式分两种:
    隐式等待:在browser.get('xxx')前就设置,针对所有元素有效
    显式等待:在browser.get('xxx')之后设置,只针对某个元素有效
    
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
    from selenium.webdriver.common.keys import Keys #键盘按键操作
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
    
    browser=webdriver.Chrome()
    
    #隐式等待:在查找所有元素时,如果尚未被加载,则等10秒
    browser.implicitly_wait(10)
    
    browser.get('https://www.baidu.com')
    
    
    input_tag=browser.find_element_by_id('kw')
    input_tag.send_keys('美女')
    input_tag.send_keys(Keys.ENTER)
    
    contents=browser.find_element_by_id('content_left') #没有等待环节而直接查找,找不到则会报错
    print(contents)
    
    browser.close()
    隐式等待
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
    from selenium.webdriver.common.keys import Keys #键盘按键操作
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
    
    browser=webdriver.Chrome()
    browser.get('https://www.baidu.com')
    
    
    input_tag=browser.find_element_by_id('kw')
    input_tag.send_keys('美女')
    input_tag.send_keys(Keys.ENTER)
    
    
    #显式等待:显式地等待某个元素被加载
    wait=WebDriverWait(browser,10)
    wait.until(EC.presence_of_element_located((By.ID,'content_left')))
    
    contents=browser.find_element(By.CSS_SELECTOR,'#content_left')
    print(contents)
    
    
    browser.close()
    显示等待
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
    from selenium.webdriver.common.keys import Keys #键盘按键操作
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
    
    browser = webdriver.Chrome()
    browser.get("https://www.taobao.com/")
    wait = WebDriverWait(browser,10)
    # input_tag = browser.find_element_by_id("q")
    # input_tag.send_keys("情趣用品")
    # button = browser.find_element_by_class_name("btn-search")
    # button.click()
    input_tag = wait.until(EC.presence_of_element_located((By.ID,"q")))
    input_tag.send_keys("情趣用品")
    button = wait.until(EC.presence_of_element_located((By.CLASS_NAME,"btn-search")))
    button.click()
    
    import time
    time.sleep(3)
    input_tag = browser.find_element_by_id("q")
    input_tag.clear()
    input_tag.send_keys("iphone9")
    button = browser.find_element_by_class_name("btn-search")
    button.click()
    自动搜索,二次跳转
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
    from selenium.webdriver.common.keys import Keys #键盘按键操作
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
    
    try:
        browser = webdriver.Chrome()
        browser.get("https://www.baidu.com")
        browser.execute_script("alert(6666)")
        import time
        time.sleep(3)
    finally:
        browser.close()
    手动写js
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
    from selenium.webdriver.common.keys import Keys #键盘按键操作
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
    
    #frame相当于一个单独的网页,在父frame里是无法直接查看到子frame的元素的,必须switch_to_frame切到该frame下,才能进一步查找
    try:
        browser = webdriver.Chrome()
        browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
        browser.switch_to.frame("iframeResult")
        tag1 = browser.find_element_by_id("droppable")
        print(tag1)
        browser.switch_to.parent_frame()
        tag2 = browser.find_element_by_id("textareaCode")
        print(tag2)
    finally:
        browser.close()
    
    # 先切换到iframe里找标签,然后切换到父母版中找标签
    iframe切换问题
    import time
    from selenium import webdriver
    browser = webdriver.Chrome()
    browser.get("https://www.baidu.com")
    browser.get("https://www.taobao.com")
    browser.get("https://www.sina.com.cn/")
    
    browser.back()      #后退
    time.sleep(12)
    browser.forward()   # 前进
    time.sleep(12)
    browser.close()
    
    # 访问顺序;首先访问百度,淘宝,新浪,然后返回淘宝,12秒后前进到新浪,12秒后关闭浏览器
    模拟浏览器前进后退
    #cookies
    # from selenium import webdriver
    # 
    # browser=webdriver.Chrome()
    # browser.get('https://www.zhihu.com/explore')
    # print(browser.get_cookies())
    # browser.add_cookie({'k1':'xxx','k2':'yyy'})
    # print(browser.get_cookies())
    # 
    # # browser.delete_all_cookies()
    cookie操作
    import time
    from selenium import webdriver
    browser = webdriver.Chrome()
    browser.get("https://www.baidu.com")
    browser.execute_script("window.open()")
    print(browser.window_handles)
    browser.switch_to.window(browser.window_handles[1])
    browser.get("https://www.taobao.com")
    time.sleep(10)
    browser.switch_to.window(browser.window_handles[0])
    browser.get("https://www.sina.com.cn")
    browser.close()
    # 先访问百度页面,然后打开新的选项卡,跳转到新的选项卡,访问淘宝,10秒后跳转到第一个选项卡,访问新浪,关闭
    选项卡切换操作
    from selenium import webdriver
    from selenium.common.exceptions import TimeoutException,NoSuchElementException,NoSuchFrameException
    
    try:
        browser=webdriver.Chrome()
        browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
        browser.switch_to.frame('iframssseResult')
    
    except TimeoutException as e:
        print(e)
    except NoSuchFrameException as e:
        print(e)
    finally:
        browser.close()
    异常处理

    # -*- coding: utf-8 -*-
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
    from selenium.webdriver.common.keys import Keys #键盘按键操作
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
    import time
    
    def get_good(driver):
        try:
            goods = driver.find_elements_by_class_name("gl-item")
            for good in goods:        # 循环着一夜的的数据
                detail_url = good.find_element_by_tag_name("a").get_attribute("href")     #商品详情
                detail_price = good.find_element_by_css_selector(".p-price i").text       #商品价格
                detail_name = good.find_element_by_css_selector(".p-name em").text        #商品名
                detail_com = good.find_element_by_css_selector(".p-commit a").text        #评论量
                msg = """
                    商品名:%s
                    详情链接:%s
                    商品价格:%s
                    评论量:%s
                """%(detail_name,detail_url,detail_price,detail_com)
                print(msg)
                # 这里可以写入文件操作
            button = driver.find_element_by_partial_link_text('下一页')       #检测到右下一页链接
            button.click()
            time.sleep(2)
            get_good(driver)    # 睡两秒后继续爬,爬的太快容易被服务器检测到
        except Exception:
            pass
    
    def spilder(url,keyword):
        driver = webdriver.Chrome()
        driver.get(url)
        driver.implicitly_wait(7)
        try:
            input_tag = driver.find_element_by_id("key")
            input_tag.send_keys(keyword)
            input_tag.send_keys(Keys.ENTER)
            get_good(driver)
        finally:
            driver.close()
    
    if __name__ == '__main__':
        spilder("https:www.jd.com","情趣")
    爬取京东商品
    # -*- coding: utf-8 -*-
    
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By #按照什么方式查找,By.ID,By.CSS_SELECTOR
    from selenium.webdriver.common.keys import Keys #键盘按键操作
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import WebDriverWait #等待页面加载某些元素
    from PIL import Image #pip3 install pillow
    
    import time
    
    def get_snap(driver):
        '''
        :param driver:
        :return: 返回屏幕的全图
        '''
        driver.save_screenshot("snap.png")
        snap_obj = Image.open("snap.png")
        # snap_obj.show()
        return snap_obj
    
    
    def get_image(driver):
        '''
        :param driver:
        :return:返回验证图片的全图
        '''
        img = driver.find_element_by_class_name("geetest_canvas_img")
        time.sleep(2)
        size = img.size       #获取到图片大小
        location = img.location  #获取到图片的相对位置
    
        left = location["x"]
        top = location["y"]
        right = left + size["width"]
        bottom = top + size["height"]
        # print(left,top,right,bottom)
    
        snap_obj = get_snap(driver)
        image_obj = snap_obj.crop((left,top,right,bottom))    #截取全部中的坐标图片
        # image_obj.show()
        return image_obj
    
    
    def get_distance(image1,image2):
        start_x = 58  # 图片的x轴起始位置
        threhold = 60  # 误差的阀值
        for x in range(start_x, image1.size[0]):
            for y in range(image1.size[1]):  # x,y 是他们额像素点
                rgb1 = image1.load()[x, y]  # 获取第一张图片的像素点
                rgb2 = image2.load()[x, y]  # 获取第二张图片的像素点
                res1 = abs(rgb1[0] - rgb2[0])
                res2 = abs(rgb1[1] - rgb2[1])
                res3 = abs(rgb1[2] - rgb2[2])
                if not (res1 < threhold and res2 < threhold and res3 < threhold):
                    return x - 7
    
    
    def get_tracks(distance):
        distance += 20  # 滑动距离+20   →欺骗服务器
        v0 = 0
        s = 0
        t = 0.2
        mid = distance*3/5
        forward_tracks = []
        while s < distance:
            if s < mid:
                a=2
            else:
                a=-3
            v = v0
            track = v*t+0.5*a*(t**2)
            track = round(track)    # 四舍五入
            v0 = v + a*t
            s+=track
            forward_tracks.append(track)        # 移动轨迹
        back_tracks = [-1, -1, -1, -2, -2, -2, -3, -3, -2, -2, -1]  # 多加的20个单位
        return {"forward_tracks":forward_tracks,"back_tracks":back_tracks}
    
    
    
    def slice(url,username,password):
        try:
            driver = webdriver.Chrome()
            driver.get(url)
            driver.implicitly_wait(3)
    
            # 1、输入账号,密码,点击登录
            input_user = driver.find_element_by_id("input1")
            input_pwd = driver.find_element_by_id("input2")
            login_user = driver.find_element_by_id("signin")
            input_user.send_keys(username)
            input_pwd.send_keys(password)
            login_user.click()
    
            # 2、 点击按钮,出现验证码图片(完整的图)
            geetest_radar_tip = driver.find_element_by_class_name("geetest_radar_tip")
            geetest_radar_tip.click()
    
            # 3、针对没有缺口的图片截图
            image1 = get_image(driver)
    
            # 4、点击滑动按钮,出现残缺的图片
            slider_button = driver.find_element_by_class_name("geetest_slider_button")
            slider_button.click()
    
            # 5、针对有缺口的图片截图
            image2 = get_image(driver)
    
            # 6、对比两张图片,找出缺口,就是滑动的2唯一
            distance = get_distance(image1, image2)  # 获取到图片的位移信息
    
            # 7、按照人的行为习惯,把总位移切成小的位移
            track_dic = get_tracks(distance)
    
            # 8、 按照位移移动图片
            slider_button = driver.find_element_by_class_name("geetest_slider_button")  # 找到按钮
            ActionChains(driver).click_and_hold(slider_button).perform()  # 摁住它
    
            forward_tracks = track_dic["forward_tracks"]  # 前进的距离
            back_tracks = track_dic["back_tracks"]  # 后退的距离(刚开始给distance+=20)
            print(forward_tracks, back_tracks)
            for forward_track in forward_tracks:
                ActionChains(driver).move_by_offset(xoffset=forward_track, yoffset=0).perform()  # 移动增加的距离
            time.sleep(0.3)  # 睡0.3秒模仿人动作延迟
    
            # print("==================================>")
            for back_track in back_tracks:  # 后退的距离
                ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform()
    
            ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform()  # 最终抖一抖
            ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform()  # 抖一抖
            time.sleep(0.3)
            ActionChains(driver).release().perform()  # 释放
            time.sleep(2)
    
        except Exception:
            pass
    
        finally:
            driver.close()
    
    
    
    
    if __name__ == '__main__':
        url = "https://passport.cnblogs.com/user/signin"
        username = "username"
        password = "password"
        slice(url,username,password)
    自动登录博客园

  • 相关阅读:
    ASP.NET 学习笔记(一)ASP.NET 概览
    JSP基础
    算法
    TestNG基础教程
    TestNG基础教程
    TestNG基础教程
    Jira
    Jira
    Jira
    Jira
  • 原文地址:https://www.cnblogs.com/52-qq/p/8303336.html
Copyright © 2020-2023  润新知