• Python PDF 转 JPG 推荐使用


    我和同事分别用.net 和 python来实现这个功能。

    做好后发现.net 转的时间很慢,python代码少,时间快,最终采用了python 我写的代码。

    比较特殊的是poppler-0.68.0。大家可以参考下百度。

    具体代码如下:

    from pdf2image import convert_from_path
    from pathlib import Path 
    
    from os import listdir
    from PIL import Image
    
    import os,time
    from shutil import copyfile
    import shutil
    
    def pdf_to_image(pdf_filename):
        #判断路径是否存在
        
        if not pdf_filename.upper().endswith(".PDF"):
            return
        print('处理 pdf_filename:', pdf_filename)
    
        filename_withoutext = pdf_filename.split('.')[0]
        out_path = Path(filename_withoutext)
        print('out_path',out_path)
        out_path_full = os.path.join(path_to_watch, out_path)
        print('完整路径:',out_path_full)
        out_path_full_check=Path(out_path_full)
        if not out_path_full_check.exists():
            print('创建目录:', out_path_full)
            os.mkdir(out_path_full)
        print('开始转换')
        pdf_filename = os.path.join(path_to_watch, pdf_filename)
        print('filename:', pdf_filename)
        pages = convert_from_path(pdf_filename, dpi=400, output_folder=None, fmt="JPEG",
                           thread_count=5)
        pindex=1                       
        for p in pages:
            p_f = os.path.join(out_path_full,str(pindex)+'.jpg')
            p.save(p_f)
            pindex=pindex+1
    
        time.sleep(1)
        print('转换完成')
        contact_image(out_path_full)
        print('合并完成')
        path_file = pdf_filename.split('.')[0]
        sub_path = os.path.join(path_to_watch, path_file)
        print('删除目录', sub_path)
        shutil.rmtree(sub_path)
    
    def watch():
        while 1:
            time.sleep(3)
            #print('扫描目录的PDF文件')
            pdf_files = dict([(f, None) for f in os.listdir(path_to_watch) if f.upper().endswith('.PDF') ])
            for f in pdf_files:
                f_full = os.path.join(path_to_watch, f)
                f_jpg=f.split('.')[0]+'.jpg'
                f_jpg_full=os.path.join(path_to_watch,f_jpg)
                print(f_jpg_full)
                if not os.path.exists(f_jpg_full):
                    print(f_full)
                    time.sleep(1)
                    print('文件名:', f_full)
                    pdf_to_image(f)
                    
            
            #return
    
        #while 1:
            #return
            # before = dict([(f, None) for f in os.listdir(path_to_watch)])
            # time.sleep(1)
            # after = dict([(f,None) for f in os.listdir(path_to_watch)])
            # added = [ f for f in after if not f in before]
            # removed =[f for f in before if not f in after]
            # if added:
            #     for f_add in added:
            #         time.sleep(1)
            #         print('文件名:',os.path.join(path_to_watch,f_add))
            #         pdf_to_image(f_add)
            #         path_file=f_add.split('.')[0]
            #         print('删除目录')
            #         shutil.rmtree(os.path.join(path_to_watch, path_file))
                    
    
            # if removed:
            #     for f_r in removed:
            #         print('删除:', os.path.join(path_to_watch, f_r))
    
            # before = after 
    
    def open_image(out_path_full,fn):
        image_file = os.path.join(out_path_full,fn)
        print('打开图片路径', image_file)
        return Image.open(image_file)
    
    def contact_image(out_path_full):
        print('开始合并')
        print('合并路径:',out_path_full)
        image_list = [open_image(out_path_full, fn)
                      for fn in listdir(out_path_full) if fn.endswith('.jpg')]
        print('图片数量:',len(image_list))
        images=[]
        width=0
        height=0
        total_height=0
        max_width=0
    
        for i in image_list:
            if i.size[0]>width or i.size[1]>height:
                width, height = i.size
            
            print('width %d,height %d ' % (width, height))
            if height>
                new_image = i.resize((1102, 1564), Image.BILINEAR)  # 551*782
                images.append(new_image)
                total_height = total_height+1564
                max_width=1102
            else:
                new_image = i.resize((1102, 776), Image.BILINEAR)  # 551*782
                images.append(new_image)
                total_height = total_height+776
                max_width = 1102
    
            result = Image.new(images[0].mode, (max_width, total_height), "white")
        print('total_height:', total_height)
        save_path = out_path_full+".jpg"
        #copy_to=out_path_full+".swf"
    
        print('save path:',save_path)
        height_total=0
        for i,im in enumerate(images):
            height_im=im.size[1]
            print('height_im %d' % height_im)
            result.paste(im, box=(0, height_total))
            result.save(save_path)
            height_total = height_total+height_im
    
        #copyfile(save_path,copy_to)
    
    path_to_watch =  "D:\PDFS"
    print('监听目录:', path_to_watch)
    if __name__=='__main__':
        watch()
    

      

  • 相关阅读:
    iOS--异步下载
    linux搭建ftp服务器
    hexo常用命令
    Markdown入门
    Markdown 语法和 MWeb 写作使用说明
    vi/vim
    微信聊天机器人
    .vimrc
    配置双机
    python学习笔记(一)
  • 原文地址:https://www.cnblogs.com/xiaoyichong/p/14301059.html
Copyright © 2020-2023  润新知