爬虫基础 selenium 基础

# -*- coding:utf8 -*-
# 工程路径：selenium驱动浏览器详解.py
# 工程日期：10/6/2019
# 工程目标：selenium 自动化测试库

#%% 用来驱动浏览器模拟人的操作
# 主要用于解析JS渲染的页面

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import  WebDriverWait

browser = webdriver.Chrome()  # 声明一个浏览器对象
try:
    browser.get('https://www.baidu.com')  # 发起请求
    input = browser.find_element_by_id('kw')
    input.send_keys('美女图片')   # 输入关键字
    input.send_keys(Keys.ENTER)   # 回车
    wait = WebDriverWait(browser,10)  # 等待
    wait.until(EC.presence_of_element_located((By.ID, 'content_left')))   # 等待条件
    print(browser.current_url)    #输出 URL
    print(browser.get_cookies())  # 查看cookie
finally:
    print("ok")
    browser.close()

#%%  选择元素
# 普通选择
# css选择
# xpath选择

# 选取单个元素
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('http://www.taobao.com')
find_nom = browser.find_elements_by_id('q')  # 正常方式查找
find_css = browser.find_element_by_css_selector('#q')  # css选择器查找
find_xpath = browser.find_element_by_xpath('//*[@id="q"]')  # xpath解析查找
print(find_css, find_nom, find_xpath)
"""
* find_element_by_name
* find_element_by_xpath
* find_element_by_link_text
* find_element_by_partial_link_text
* find_element_by_tag_name
* find_element_by_class_name
* find_element_by_css_selector
"""


# 通用查找  browser.find_element(By.ID, 'w')
find_us = browser.find_element(By.ID, 'q')
print(find_us)


# 多个元素查找
print("----查找多个元素----")
find_more = browser.find_elements_by_id('q')
find_more_css = browser.find_elements_by_css_selector('.service-bd li')
# 注意普通参数和CSS参数， 以及选择的那个元素和多个元素的复数的区别
find_more_nom = browser.find_elements(By.CSS_SELECTOR, '.service-bd li')
print(find_more)
print(find_more_css)
print("# 注意普通参数和CSS参数， 以及选择的那个元素和多个元素的复数的区别")
print(find_more_nom)

""" 多个元素的的返回是列表的形式
* find_elements_by_name
* find_elements_by_xpath
* find_elements_by_link_text
* find_elements_by_partial_link_text
* find_elements_by_tag_name
* find_elements_by_class_name
* find_elements_by_css_selector
"""

#%% 元素的交互操作
# 对浏览器中的
# 获取浏览器的文本框，按钮，滑动条，交互 输入文字，对浏览器的元素进行操作模拟人交互
from selenium import webdriver
import time

browser = webdriver.Chrome()   # 申明对象
browser.get('https://www.taobao.com')  # 发起请求
input = browser.find_element_by_id('q')   # 查找目标元素
input.send_keys('iPhone')    # 输入关键字
time.sleep(1) # 模拟等待
input.clear() # 删除输入的关键字
input.send_keys('iPad') # 再次输入关键字
button = browser.find_element_by_class_name('btn-search')  # 查找search点击按钮
button.click()    # 模拟按钮的点击
#%% 更多操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement



#%% 交互的动作 将元素的动作加到动作链中串行执行， action chains 动作链
from selenium import webdriver
from selenium.webdriver import ActionChains

browser = webdriver.Chrome()
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')   # 切换到 iframe标签
source = browser.find_element_by_css_selector('#draggable')     # 找到被拖动元素
target = browser.find_element_by_css_selector('#droppable')     # 找到拖动的位置的目标元素
actions = ActionChains(browser)  # 申明动作链对象
actions.drag_and_drop(source, target)  # 执行拖拽
actions.perform()  #   显示

#其他交互操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.action_chains
"""
click_and_hold(on_element=None)
    Holds down the left mouse button on an element.
    Args:
    on_element: The element to mouse down. If None, clicks on current mouse position.
    context_click(on_element=None)

Performs a context-click (right click) on an element.
    Args:
    on_element: The element to context-click. If None, clicks on current mouse position.

double_click(on_element=None)
    Double-clicks an element.
    Args:
    on_element: The element to double-click. If None, clicks on current mouse position.

drag_and_drop(source, target)
    Holds down the left mouse button on the source element,
    then moves to the target element and releases the mouse button.
    Args:
    source: The element to mouse down.
    target: The element to mouse up.

drag_and_drop_by_offset(source, xoffset, yoffset)
    Holds down the left mouse button on the source element,
    then moves to the target offset and releases the mouse button.
    Args:
    source: The element to mouse down.
    xoffset: X offset to move to.
    yoffset: Y offset to move to.

key_down(value, element=None)
    Sends a key press only, without releasing it.
    Should only be used with modifier keys (Control, Alt and Shift).
    Args:
    value: The modifier key to send. Values are defined in Keys class.
    element: The element to send keys. If None, sends a key to current focused element.
    Example, pressing ctrl+c:

ActionChains(driver).key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()
    key_up(value, element=None)
    Releases a modifier key.
    Args:
    value: The modifier key to send. Values are defined in Keys class.
    element: The element to send keys. If None, sends a key to current focused element.
    Example, pressing ctrl+c:

ActionChains(driver).key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()
    move_by_offset(xoffset, yoffset)
    Moving the mouse to an offset from current mouse position.
    Args:
    xoffset: X offset to move to, as a positive or negative integer.
    yoffset: Y offset to move to, as a positive or negative integer.

move_to_element(to_element)
    Moving the mouse to the middle of an element.
    Args:
    to_element: The WebElement to move to.

move_to_element_with_offset(to_element, xoffset, yoffset)
    Move the mouse by an offset of the specified element.
    Offsets are relative to the top-left corner of the element.
    Args:
    to_element: The WebElement to move to.
    xoffset: X offset to move to.
    yoffset: Y offset to move to.

pause(seconds)
    Pause all inputs for the specified duration in seconds

perform()
    Performs all stored actions.

release(on_element=None)
    Releasing a held mouse button on an element.
    Args:
    on_element: The element to mouse up. If None, releases on current mouse position.

reset_actions()
    Clears actions that are already stored locally and on the remote end

send_keys(*keys_to_send)
    Sends keys to current focused element.
    Args:
    keys_to_send: The keys to send. Modifier keys constants can be found in the ‘Keys’ class.

send_keys_to_element(element, *keys_to_send)
    Sends keys to an element.
    Args:
    element: The element to send keys.
    keys_to_send: The keys to send. Modifier keys constants can be found in the ‘Keys’ class
"""

#%% 执行javascript
# 通过execute_script 来执行javascript交互
# 万能方法
from  selenium import  webdriver
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
browser.execute_script('alert("To Bottom")')


#%%  获取页面的节点的属性信息， 文本信息

from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
input = browser.find_element_by_class_name('zu-top-add-question')
input1 = browser.find_element_by_class_name('zu-top-nav-link')
print(input.text)
print(input1.text)

#%% 翻页操作
import time
from selenium import webdriver

browser = webdriver.Chrome()
browser.get('https://www.baidu.com/')
browser.get('https://www.taobao.com/')
browser.get('https://www.python.org/')
browser.back()
time.sleep(1)
browser.forward()
browser.close()

#%% cookies 使用和管理
from selenium import webdriver

browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())
browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})
print(browser.get_cookies())
browser.delete_all_cookies()
print(browser.get_cookies())

#%% 异常的处理
# 查看官方文档详细的异常的情况

from selenium import webdriver
from selenium.common.exceptions import TimeoutException, NoSuchElementException

browser = webdriver.Chrome()
try:
    browser.get('https://www.baidu.com')
except TimeoutException:
    print('Time Out')
try:
    browser.find_element_by_id('hello')
except NoSuchElementException:
    print('No Element')
finally:
    browser.close()
# 官方文档：http://selenium-python.readthedocs.io/api.html#module-selenium.common.exceptions
相关阅读:
JAVA多线程2 锁
 IE8标准模式下VML不能显示问题
 JAVA多线程1
JAVA判断32位还是64位，调用不同的DLL
JNA调用DLL
如何提高执行力
 httpClient多线程请求
 【NodeJS】安装
 [转载]一个项目涉及到的50个Sql语句(整理版)
resultMap中的collection集合出现只能读取一条数据的解决方法
原文地址：https://www.cnblogs.com/binyang/p/10998419.html