• Python selenium PIL 全网页滚动截屏 && headless全网页截屏


    1. 思路

      ​ 先截取当前屏幕的图片,获取其高度作为base高度 h,再获取全网页body到尾部的高度 H ,循环截取图片,再通过PIL进行拼接。

    2. 代码

      # -*- coding:utf-8 -*-
      # author: donttouchkeyboard@gmail.com
      # software: PyCharm
      import os
      
      from PIL import Image
      from time import sleep
      
      
      class ScreenShot:
          __JS__ = {
              'scroll_to_bottom': "window.scroll({top:document.body.clientHeight,left:0,behavior:'auto'});",
              'scroll_to_y': "window.scroll({top:%d,left:0,behavior:'auto'});",
          }
          __base_end__ = 'tmp_end.png'
          __scroll_bottom__ = 'scroll_to_bottom'
          __scroll_y__ = 'scroll_to_y'
          __body__ = '//body'
          __height__ = 'height'
          __clear_shell__ = 'rm -rf *.png'
          __RGB__ = 'RGB'
      
          @classmethod
          def screen_shot(cls, driver, title, uploader_url='', delete=False):
              """
              全网页滚动截屏
              :param driver: webdriver 示例
              :param title: 标题(最终图片命名)
              :param uploader_url: 上传url
              :param delete: 是否清除所有图片
              :return:
              """
              base_image = '{}.png'.format(title)
              driver.save_screenshot(base_image)
              body_h = int(driver.find_element_by_xpath(cls.__body__).size.get(cls.__height__))
              current_h = Image.open(base_image).size[1] / 2
              for i in range(1, int(body_h / current_h)):
                  driver.execute_script(cls.__JS__[cls.__scroll_y__] % (current_h * i))
                  sleep(.5)
                  driver.save_screenshot(f'tmp_{i}.png')
                  cls.__join_images__(base_image, f'tmp_{i}.png', 0, base_image)
              driver.execute_script(cls.__JS__[cls.__scroll_bottom__])
              driver.save_screenshot(cls.__base_end__)
              cls.__join_images__(base_image, cls.__base_end__, int(current_h - int(body_h % current_h)), base_image)
              # TODO 上传图片
              url = ''
              # 移除图片
              if delete:
                  os.system(cls.__clear_shell__)
              return url
      
          @classmethod
          def __join_images__(cls, png1, png2, size=0, output='result.png'):
              """
              图片拼接
              :param png1: 图片1
              :param png2: 图片2
              :param size: 两个图片重叠的距离
              :param output: 输出的图片文件
              :return:
              """
              size = size * 2
              img1, img2 = Image.open(png1), Image.open(png2)
              size1, size2 = img1.size, img2.size
              joint = Image.new(cls.__RGB__, (size1[0], size1[1] + size2[1] - size))
              loc1, loc2 = (0, 0), (0, size1[1] - size)
              joint.paste(img1, loc1)
              joint.paste(img2, loc2)
              joint.save(output)
      
      
      if __name__ == '__main__':
          from selenium import webdriver
          driver = webdriver.Chrome()
          driver.get("https://www.cnblogs.com/worldline/")
          ScreenShot.screen_shot(driver, 'worldline')
          driver.quit()
      
      
    3. 其他

      如果是在headless模式,可以使用

      
      def get_image(url, pic_name):
          """
          适用于无头全屏截图
          :param url: url访问路径
          :param pic_name: 图片名称
          :return:
          """
          chrome_options = Options()
          chrome_options.add_argument('headless')
          driver = webdriver.Chrome(options=chrome_options)
          driver.get(url)
          time.sleep(.5)
          width = driver.execute_script("return document.documentElement.scrollWidth")
          height = driver.execute_script("return document.documentElement.scrollHeight")
          print(width, height)
          driver.set_window_size(width, height)
          time.sleep(.5)
          driver.save_screenshot(pic_name)
          driver.close()
      
  • 相关阅读:
    SSL 数据加密原理简述
    MQTT 协议 部分细节
    ARM汇编--汇编中符号和变量
    Kconfig 配置文件编码规则
    ARM汇编指令-STMFD/LDMFD
    python类属性和对象属性、类的普通方法和静态方法
    ARM汇编---程序获取符号的物理地址
    Spring源码分析:非懒加载的单例Bean初始化前后的一些操作
    Spring源码分析:非懒加载的单例Bean初始化过程(下)
    Spring源码分析:非懒加载的单例Bean初始化过程(上)
  • 原文地址:https://www.cnblogs.com/worldline/p/15430085.html
Copyright © 2020-2023  润新知