• 爬虫基础 selenium 基础


    # -*- coding:utf8 -*-
    # 工程路径:selenium驱动浏览器详解.py
    # 工程日期:10/6/2019
    # 工程目标:selenium 自动化测试库
    
    #%% 用来驱动浏览器模拟人的操作
    # 主要用于解析JS渲染的页面
    
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.wait import  WebDriverWait
    
    browser = webdriver.Chrome()  # 声明一个浏览器对象
    try:
        browser.get('https://www.baidu.com')  # 发起请求
        input = browser.find_element_by_id('kw')
        input.send_keys('美女图片')   # 输入关键字
        input.send_keys(Keys.ENTER)   # 回车
        wait = WebDriverWait(browser,10)  # 等待
        wait.until(EC.presence_of_element_located((By.ID, 'content_left')))   # 等待条件
        print(browser.current_url)    #输出 URL
        print(browser.get_cookies())  # 查看cookie
    finally:
        print("ok")
        browser.close()
    
    #%%  选择元素
    # 普通选择
    # css选择
    # xpath选择
    
    # 选取单个元素
    from selenium import webdriver
    browser = webdriver.Chrome()
    browser.get('http://www.taobao.com')
    find_nom = browser.find_elements_by_id('q')  # 正常方式查找
    find_css = browser.find_element_by_css_selector('#q')  # css选择器查找
    find_xpath = browser.find_element_by_xpath('//*[@id="q"]')  # xpath解析查找
    print(find_css, find_nom, find_xpath)
    """
    * find_element_by_name
    * find_element_by_xpath
    * find_element_by_link_text
    * find_element_by_partial_link_text
    * find_element_by_tag_name
    * find_element_by_class_name
    * find_element_by_css_selector
    """
    
    
    # 通用查找  browser.find_element(By.ID, 'w')
    find_us = browser.find_element(By.ID, 'q')
    print(find_us)
    
    
    # 多个元素查找
    print("----查找多个元素----")
    find_more = browser.find_elements_by_id('q')
    find_more_css = browser.find_elements_by_css_selector('.service-bd li')
    # 注意普通参数和CSS参数, 以及选择的那个元素和多个元素的复数的区别
    find_more_nom = browser.find_elements(By.CSS_SELECTOR, '.service-bd li')
    print(find_more)
    print(find_more_css)
    print("# 注意普通参数和CSS参数, 以及选择的那个元素和多个元素的复数的区别")
    print(find_more_nom)
    
    """ 多个元素的的返回是列表的形式
    * find_elements_by_name
    * find_elements_by_xpath
    * find_elements_by_link_text
    * find_elements_by_partial_link_text
    * find_elements_by_tag_name
    * find_elements_by_class_name
    * find_elements_by_css_selector
    """
    
    #%% 元素的交互操作
    # 对浏览器中的
    # 获取浏览器的文本框,按钮,滑动条,交互 输入文字,对浏览器的元素进行操作模拟人交互
    from selenium import webdriver
    import time
    
    browser = webdriver.Chrome()   # 申明对象
    browser.get('https://www.taobao.com')  # 发起请求
    input = browser.find_element_by_id('q')   # 查找目标元素
    input.send_keys('iPhone')    # 输入关键字
    time.sleep(1) # 模拟等待
    input.clear() # 删除输入的关键字
    input.send_keys('iPad') # 再次输入关键字
    button = browser.find_element_by_class_name('btn-search')  # 查找search点击按钮
    button.click()    # 模拟按钮的点击
    #%% 更多操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement
    
    
    
    #%% 交互的动作 将元素的动作加到动作链中串行执行, action chains 动作链
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    
    browser = webdriver.Chrome()
    url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
    browser.get(url)
    browser.switch_to.frame('iframeResult')   # 切换到 iframe标签
    source = browser.find_element_by_css_selector('#draggable')     # 找到被拖动元素
    target = browser.find_element_by_css_selector('#droppable')     # 找到拖动的位置的目标元素
    actions = ActionChains(browser)  # 申明动作链对象
    actions.drag_and_drop(source, target)  # 执行拖拽
    actions.perform()  #   显示
    
    #其他交互操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.action_chains
    """
    click_and_hold(on_element=None)
        Holds down the left mouse button on an element.
        Args:
        on_element: The element to mouse down. If None, clicks on current mouse position.
        context_click(on_element=None)
    
    Performs a context-click (right click) on an element.
        Args:
        on_element: The element to context-click. If None, clicks on current mouse position.
    
    double_click(on_element=None)
        Double-clicks an element.
        Args:
        on_element: The element to double-click. If None, clicks on current mouse position.
    
    drag_and_drop(source, target)
        Holds down the left mouse button on the source element,
        then moves to the target element and releases the mouse button.
        Args:
        source: The element to mouse down.
        target: The element to mouse up.
    
    drag_and_drop_by_offset(source, xoffset, yoffset)
        Holds down the left mouse button on the source element,
        then moves to the target offset and releases the mouse button.
        Args:
        source: The element to mouse down.
        xoffset: X offset to move to.
        yoffset: Y offset to move to.
    
    key_down(value, element=None)
        Sends a key press only, without releasing it.
        Should only be used with modifier keys (Control, Alt and Shift).
        Args:
        value: The modifier key to send. Values are defined in Keys class.
        element: The element to send keys. If None, sends a key to current focused element.
        Example, pressing ctrl+c:
    
    ActionChains(driver).key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()
        key_up(value, element=None)
        Releases a modifier key.
        Args:
        value: The modifier key to send. Values are defined in Keys class.
        element: The element to send keys. If None, sends a key to current focused element.
        Example, pressing ctrl+c:
    
    ActionChains(driver).key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()
        move_by_offset(xoffset, yoffset)
        Moving the mouse to an offset from current mouse position.
        Args:
        xoffset: X offset to move to, as a positive or negative integer.
        yoffset: Y offset to move to, as a positive or negative integer.
    
    move_to_element(to_element)
        Moving the mouse to the middle of an element.
        Args:
        to_element: The WebElement to move to.
    
    move_to_element_with_offset(to_element, xoffset, yoffset)
        Move the mouse by an offset of the specified element.
        Offsets are relative to the top-left corner of the element.
        Args:
        to_element: The WebElement to move to.
        xoffset: X offset to move to.
        yoffset: Y offset to move to.
    
    pause(seconds)
        Pause all inputs for the specified duration in seconds
    
    perform()
        Performs all stored actions.
    
    release(on_element=None)
        Releasing a held mouse button on an element.
        Args:
        on_element: The element to mouse up. If None, releases on current mouse position.
    
    reset_actions()
        Clears actions that are already stored locally and on the remote end
    
    send_keys(*keys_to_send)
        Sends keys to current focused element.
        Args:
        keys_to_send: The keys to send. Modifier keys constants can be found in the ‘Keys’ class.
    
    send_keys_to_element(element, *keys_to_send)
        Sends keys to an element.
        Args:
        element: The element to send keys.
        keys_to_send: The keys to send. Modifier keys constants can be found in the ‘Keys’ class
    """
    
    #%% 执行javascript
    # 通过execute_script 来执行javascript交互
    # 万能方法
    from  selenium import  webdriver
    browser = webdriver.Chrome()
    browser.get('https://www.zhihu.com/explore')
    browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
    browser.execute_script('alert("To Bottom")')
    
    
    #%%  获取页面的节点的属性信息, 文本信息
    
    from selenium import webdriver
    browser = webdriver.Chrome()
    browser.get('https://www.zhihu.com/explore')
    input = browser.find_element_by_class_name('zu-top-add-question')
    input1 = browser.find_element_by_class_name('zu-top-nav-link')
    print(input.text)
    print(input1.text)
    
    #%% 翻页操作
    import time
    from selenium import webdriver
    
    browser = webdriver.Chrome()
    browser.get('https://www.baidu.com/')
    browser.get('https://www.taobao.com/')
    browser.get('https://www.python.org/')
    browser.back()
    time.sleep(1)
    browser.forward()
    browser.close()
    
    #%% cookies 使用和管理
    from selenium import webdriver
    
    browser = webdriver.Chrome()
    browser.get('https://www.zhihu.com/explore')
    print(browser.get_cookies())
    browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})
    print(browser.get_cookies())
    browser.delete_all_cookies()
    print(browser.get_cookies())
    
    #%% 异常的处理
    # 查看官方文档详细的异常的情况
    
    from selenium import webdriver
    from selenium.common.exceptions import TimeoutException, NoSuchElementException
    
    browser = webdriver.Chrome()
    try:
        browser.get('https://www.baidu.com')
    except TimeoutException:
        print('Time Out')
    try:
        browser.find_element_by_id('hello')
    except NoSuchElementException:
        print('No Element')
    finally:
        browser.close()
    # 官方文档:http://selenium-python.readthedocs.io/api.html#module-selenium.common.exceptions
    
    
    
    
    
    
    
    
    
  • 相关阅读:
    JAVA多线程2 锁
    IE8标准模式下VML不能显示问题
    JAVA多线程1
    JAVA判断32位还是64位,调用不同的DLL
    JNA调用DLL
    如何提高执行力
    httpClient多线程请求
    【NodeJS】安装
    [转载]一个项目涉及到的50个Sql语句(整理版)
    resultMap中的collection集合出现只能读取一条数据的解决方法
  • 原文地址:https://www.cnblogs.com/binyang/p/10998419.html
Copyright © 2020-2023  润新知