• Python Selenium 网页截全图


    Python Selenium 网页截全图


    代码如下:

    from selenium import webdriver
    from selenium.webdriver.support.expected_conditions import _find_element
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.common.exceptions import StaleElementReferenceException
    
    
    class text_to_be_present_in_element(object):
        """ An expectation for checking if the given text is present in the
        specified element.
        locator, text
        """
    
        def __init__(self, locator, text_):
            self.locator = locator
            self.text_ = text_
    
        def __call__(self, driver):
            try:
                element_text = _find_element(driver, self.locator).text
                return self.text_ in element_text
            except StaleElementReferenceException:
                return False
    
    
    class text_to_be_present_in_element_value(object):
        """
        An expectation for checking if the given text is present in the element's
        locator, text
        """
    
        def __init__(self, locator, key, text_):
            self.locator = locator
            self.text = text_
            self.key = key
    
        def __call__(self, driver):
            try:
                element_text = _find_element(driver,
                                             self.locator).get_attribute(f"{self.key}")
                if element_text:
                    return self.text in element_text
                else:
                    return False
            except StaleElementReferenceException:
                return False
    
    
    def chrome_headless():
        options = webdriver.ChromeOptions()
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        options.add_argument('--no-sandbox')
        options.add_argument('window-size=1920x1080')
        driver = webdriver.Chrome(options=options,
                                  executable_path='/Users/dengjiajie/Desktop/mark_book/my_awesome_book/my_tools/chromedriver')
        return driver
    
    
    def select_driver(name='chrome_headless'):
        driver_map = {
            'chrome_headless': chrome_headless
        }
        driver = driver_map.get(name, None)
        if driver is None:
            raise ValueError('driver is None, please check driver exist ')
        return driver()
    
    
    class SeHandler():
    
        def __init__(self, driver=None):
            self.driver = driver or select_driver()
    
        def save_img_from_url(self, url, abs_file_path, width=None, height=None, locator=None, text=None, attribute=None):
            self.driver.get(url)
            wait = WebDriverWait(self.driver, 5, 0.5)
            if locator:
                print('进入元素显现并可定位')
                wait.until(EC.visibility_of_element_located(locator=locator))
            if text:
                print('进入文本显式等待')
                wait.until(text_to_be_present_in_element(locator=locator, text_=text))
            if attribute and isinstance(attribute, (tuple, list)):
                print('进入属性值等待')
                attribute_name, attribute_value = attribute
                wait.until(
                    text_to_be_present_in_element_value(locator=locator, key=attribute_name, text_=attribute_value))
    
            # 获取页面宽度
            width = width or 1920
            # 获取页面高度
            print(f'{width}')
            if not height:
                height = self.driver.execute_script('return document.body.scrollHeight')
            print(f'height:{height}')
            # 设置窗口大小
            self.driver.set_window_size(width=width, height=height)
            # 截图
            self.driver.save_screenshot(abs_file_path)
            return True
    
        def quit(self):
            self.driver.quit()
            self.driver = None
    
    
    if __name__ == '__main__':
        import time
        from selenium.webdriver.common.by import By
    
        se_handler = SeHandler()
        url = 'https://debugtalk.com/post/use-pyenv-manage-multiple-python-virtualenvs/'
        file_name = f'{int(time.time())}_test-canvas.png'
        print(f'filename:{file_name}')
        locator = (By.ID, '背景')
        ret = se_handler.save_img_from_url(url, file_name, locator=locator, text='背景', attribute=('id', '背景'))
    
    

  • 相关阅读:
    uploadify控件在QQ、TT、firefox浏览器中不工作以及在updatecontrol中不工作的解决办法
    记202235日钓鱼 那个人
    Subtask Gated Networks for NonIntrusive Load Monitoring
    C#反射的应用
    activiti7实现流程撤回的两种思路
    antd pro V5从服务端请求菜单
    mysql复制一个表的数据到已存在的表中(可夸数据库实例)
    elasticsearch索引、文档、映射等概念
    vue图片查看(放大、缩小、旋转)
    spring事务传播机制之《REQUIRED》
  • 原文地址:https://www.cnblogs.com/snailrunning/p/15377553.html
Copyright © 2020-2023  润新知