• 用Python五步实现网页截图


    方案说明

    功能要求:实现网页加载后将页面截取成长图片
    涉及模块:PyQT5 PIL

    逻辑说明:

    1:完成窗口设置,利用PyQT5 QWebEngineView加载网页地址,待网页加载完成后,调用check_pag;

    class MainWindow(QMainWindow):
        def __init__(self, parent=None):
            super(MainWindow, self).__init__(parent)
            self.setWindowTitle('易哈佛')
            self.temp_height = 0
            self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False)  # 禁用最大化,最小化
            # self.setWindowFlag(Qt.WindowStaysOnTopHint, True)  # 窗口顶置
            self.setWindowFlag(Qt.FramelessWindowHint, True)  # 窗口无边框
    
        def urlScreenShot(self, url):
            self.browser = QWebEngineView()
            self.browser.load(QUrl(url))
            geometry = self.chose_screen()
            self.setGeometry(geometry)
            self.browser.loadFinished.connect(self.check_page)
            self.setCentralWidget(self.browser)
    
        def get_page_size(self):
            size = self.browser.page().contentsSize()
            self.set_height = size.height()
            self.set_width = size.width()
            return size.width(), size.height()
    
        def chose_screen(self):
            width, height = 750, 1370
            desktop = QApplication.desktop()
            screen_count = desktop.screenCount()
            for i in range(0, screen_count):
                rect = desktop.availableGeometry(i)
                s_width, s_height = rect.width(), rect.height()
                if s_width > width and s_height > height:
                    return QRect(rect.left(), rect.top(), width, height)
            return QRect(0, 0, width, height)
    
    if __name__ == '__main__':
        app = QApplication(sys.argv)
        win = MainWindow()
        win.show()
        app.exit(app.exec_())
    

    2:收集页面高度,并计算分次截屏的次数和余量高度;实例化图片合并工具,设置定时器,超时信号发出后,执行exe_command;

    def check_page(self):
            p_width, p_height = self.get_page_size()
            self.page, self.over_flow_size = divmod(p_height, self.height())
            if self.page == 0:
                self.page = 1
            self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
            self.timer = QTimer(self)
            self.timer.timeout.connect(self.exe_command)
            self.timer.setInterval(400)
            self.timer.start()
    

    3:exe_command用来控制截图次数,并在每次截图完成后控制网页向下滑屏幕的高度;所有的页面都已截取时,完成图片合并。

    def exe_command(self):
            if self.page > 0:
                self.screen_shot()
                self.run_js()
    
            elif self.page < 0:
                self.timer.stop()
                self.ssm.image_merge()
                self.close()
    
            elif self.over_flow_size > 0:
                self.screen_shot()
            self.page -= 1
            
        def run_js(self):
            script = """
                var scroll = function (dHeight) {
                var t = document.documentElement.scrollTop
                var h = document.documentElement.scrollHeight
                dHeight = dHeight || 0
                var current = t + dHeight
                if (current > h) {
                    window.scrollTo(0, document.documentElement.clientHeight)
                  } else {
                    window.scrollTo(0, current)
                  }
                }
            """
            command = script + '
     scroll({})'.format(self.height())
            self.browser.page().runJavaScript(command)
    

    4:screen_shot在每次截图完成后将图片保存,并将图片对象由图片合并根据保存到列表中。

    def screen_shot(self):
            screen = QApplication.primaryScreen()
            winid = self.browser.winId()
            pix = screen.grabWindow(int(winid))
            name = '{}/temp.png'.format(self.ssm.root_path)
            pix.save(name)
            self.ssm.add_im(name)
    

    5:截图合并工具,在每次截图完成后将图片对象保存,完成余量截图的重绘和截图的合并。

    class ScreenShotMerge():
        def __init__(self, page, over_flow_size):
            self.im_list = []
            self.page = page
            self.over_flow_size = over_flow_size
            self.get_path()
    
        def get_path(self):
            self.root_path = Path(__file__).parent.joinpath('temp')
            if not self.root_path.exists():
                self.root_path.mkdir(parents=True)
            self.save_path = self.root_path.joinpath('merge.png')
    
        def add_im(self, path):
            if len(self.im_list) == self.page:
                im = self.reedit_image(path)
            else:
                im = Image.open(path)
            im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
            self.im_list.append(im)
    
        def get_new_size(self):
            max_width = 0
            total_height = 0
            # 计算合成后图片的宽度(以最宽的为准)和高度
            for img in self.im_list:
                width, height = img.size
                if width > max_
                    max_width = width
                total_height += height
            return max_width, total_height
    
        def image_merge(self, ):
            if len(self.im_list) > 1:
                max_width, total_height = self.get_new_size()
                # 产生一张空白图
                new_img = Image.new('RGB', (max_width - 15, total_height), 255)
                x = y = 0
                for img in self.im_list:
                    width, height = img.size
                    new_img.paste(img, (x, y))
                    y += height
                new_img.save(self.save_path)
                print('截图成功:', self.save_path)
            else:
                obj = self.im_list[0]
                width, height = obj.size
                left, top, right, bottom = 0, 0, width, height
                box = (left, top, right, bottom)
                region = obj.crop(box)
                new_img = Image.new('RGB', (width, height), 255)
                new_img.paste(region, box)
                new_img.save(self.save_path)
                print('截图成功:', self.save_path)
    
        def reedit_image(self, path):
            obj = Image.open(path)
            width, height = obj.size
            left, top, right, bottom = 0, height - self.over_flow_size, width, height
            box = (left, top, right, bottom)
            region = obj.crop(box)
            return region
    

    截图功能完整代码

    #!/usr/bin/env python
    # -*- coding:UTF-8 -*-
    #Python学习交流群:778463939
    
    import sys
    from PyQt5.QtCore import *
    from PyQt5.QtWidgets import *
    from PyQt5.QtWebEngineWidgets import *
    from PIL import Image
    from pathlib import Path
    
    
    class ScreenShotMerge():
        def __init__(self, page, over_flow_size):
            self.im_list = []
            self.page = page
            self.over_flow_size = over_flow_size
            self.get_path()
    
        def get_path(self):
            self.root_path = Path(__file__).parent.joinpath('temp')
            if not self.root_path.exists():
                self.root_path.mkdir(parents=True)
            self.save_path = self.root_path.joinpath('merge.png')
    
        def add_im(self, path):
            if len(self.im_list) == self.page:
                im = self.reedit_image(path)
            else:
                im = Image.open(path)
            im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
            self.im_list.append(im)
    
        def get_new_size(self):
            max_width = 0
            total_height = 0
            # 计算合成后图片的宽度(以最宽的为准)和高度
            for img in self.im_list:
                width, height = img.size
                if width > max_
                    max_width = width
                total_height += height
            return max_width, total_height
    
        def image_merge(self, ):
            if len(self.im_list) > 1:
                max_width, total_height = self.get_new_size()
                # 产生一张空白图
                new_img = Image.new('RGB', (max_width - 15, total_height), 255)
                x = y = 0
                for img in self.im_list:
                    width, height = img.size
                    new_img.paste(img, (x, y))
                    y += height
                new_img.save(self.save_path)
                print('截图成功:', self.save_path)
            else:
                obj = self.im_list[0]
                width, height = obj.size
                left, top, right, bottom = 0, 0, width, height
                box = (left, top, right, bottom)
                region = obj.crop(box)
                new_img = Image.new('RGB', (width, height), 255)
                new_img.paste(region, box)
                new_img.save(self.save_path)
                print('截图成功:', self.save_path)
    
        def reedit_image(self, path):
            obj = Image.open(path)
            width, height = obj.size
            left, top, right, bottom = 0, height - self.over_flow_size, width, height
            box = (left, top, right, bottom)
            region = obj.crop(box)
            return region
    
    
    class MainWindow(QMainWindow):
        def __init__(self, parent=None):
            super(MainWindow, self).__init__(parent)
            self.setWindowTitle('易哈佛')
            self.temp_height = 0
            self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False)  # 禁用最大化,最小化
            # self.setWindowFlag(Qt.WindowStaysOnTopHint, True)  # 窗口顶置
            self.setWindowFlag(Qt.FramelessWindowHint, True)  # 窗口无边框
    
        def urlScreenShot(self, url):
            self.browser = QWebEngineView()
            self.browser.load(QUrl(url))
            geometry = self.chose_screen()
            self.setGeometry(geometry)
            self.browser.loadFinished.connect(self.check_page)
            self.setCentralWidget(self.browser)
    
        def get_page_size(self):
            size = self.browser.page().contentsSize()
            self.set_height = size.height()
            self.set_width = size.width()
            return size.width(), size.height()
    
        def chose_screen(self):
            width, height = 750, 1370
            desktop = QApplication.desktop()
            screen_count = desktop.screenCount()
            for i in range(0, screen_count):
                rect = desktop.availableGeometry(i)
                s_width, s_height = rect.width(), rect.height()
                if s_width > width and s_height > height:
                    return QRect(rect.left(), rect.top(), width, height)
            return QRect(0, 0, width, height)
    
        def check_page(self):
            p_width, p_height = self.get_page_size()
            self.page, self.over_flow_size = divmod(p_height, self.height())
            if self.page == 0:
                self.page = 1
            self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
            self.timer = QTimer(self)
            self.timer.timeout.connect(self.exe_command)
            self.timer.setInterval(400)
            self.timer.start()
    
        def exe_command(self):
            if self.page > 0:
                self.screen_shot()
                self.run_js()
    
            elif self.page < 0:
                self.timer.stop()
                self.ssm.image_merge()
                self.close()
    
            elif self.over_flow_size > 0:
                self.screen_shot()
            self.page -= 1
    
        def run_js(self):
            script = """
                var scroll = function (dHeight) {
                var t = document.documentElement.scrollTop
                var h = document.documentElement.scrollHeight
                dHeight = dHeight || 0
                var current = t + dHeight
                if (current > h) {
                    window.scrollTo(0, document.documentElement.clientHeight)
                  } else {
                    window.scrollTo(0, current)
                  }
                }
            """
            command = script + '
     scroll({})'.format(self.height())
            self.browser.page().runJavaScript(command)
    
        def screen_shot(self):
            screen = QApplication.primaryScreen()
            winid = self.browser.winId()
            pix = screen.grabWindow(int(winid))
            name = '{}/temp.png'.format(self.ssm.root_path)
            pix.save(name)
            self.ssm.add_im(name)
    
    
    if __name__ == '__main__':
        url = 'http://blog.sina.com.cn/lm/rank/focusbang//'
        app = QApplication(sys.argv)
        win = MainWindow()
        win.urlScreenShot(url)
        win.show()
        app.exit(app.exec_())
    
  • 相关阅读:
    MyEclipe 配置 ivy 插件
    PHP 向 MySql 中数据修改操作时,只对数字操作有效,非数字操作无效,怎么办?
    Hadoop 中 Eclipse 的配置
    Hadoop 配置好hive,第一次在conf能进入,第二次就不行了,怎么办?
    7系列FPGA远程更新方案-QuickBoot(转)
    Serial interface (RS-232)
    Linux下安装微信(转)
    《图解HTTP》读书笔记(转)
    《图解TCP/IP》读书笔记(转)
    7 Serial Configuration 理解(三)
  • 原文地址:https://www.cnblogs.com/djdjdj123/p/13887924.html
Copyright © 2020-2023  润新知