• 利用jpedal进行pdf转换成jpeg,jpg,png,tiff,tif等格式的图片


             项目中运用到pdf文件转换成image图片,开始时使用pdfbox开源库进行图片转换,但是转换出来的文件中含有部分乱码的情况.下面是pdfBox 的pdf转换图片的代码示例.

    try{	
             String password = null;         
             int startPage = 1;
             String imageType = "jpg";	
             File imageFile = new File("E:\upload\pdf\20140424\Servlet."+ imageType);					
             File pdfFile = new File("E:\upload\pdf\20140424\Servlet.pdf");		
    	 PDDocument document = PDDocument.load(pdfFile);
    	 endPage = document.getPageCount();
    	 PDFImageWriter imageWriter = new PDFImageWriter();
    	 imageWriter.writeImage(document,imageType,password,startPage, endPage,imageFile.getAbsolutePath());
    	 document.close();			
    
    }catch(IOException  e){
    	e.printStackTrace();			
    }	

            比较了其他的开源库之后,准备采用jpedal。但是jpedal的治疗非常少,除了官方网站外,即使是英文资料也很少。而且官方提供的代码示例中的一些方法在的lgpl授权的
    jpeal的代码库中不存在。下面是收集到的一些资料

    1、jpedal文档:http://javadoc.idrsolutions.com/org/jpedal/PdfDecoder.html

    2、简单调用示例:http://www.idrsolutions.com/java-pdf-code-faq/#pdf2img
    3、lgpl授权的jpedal库的下载地址:http://sourceforge.net/projects/jpedal/
    4、转换示例示例地址:http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToImages.java.html

    5、高清图片转换示例地址:http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToHiResImages.java.html


            于是稍微修改了官方的转换示例,下面是经过测试可以使用的转换代码

    import cn.com.pujiConvert.util.Common;
    
    import com.sun.imageio.plugins.jpeg.JPEGImageWriter;
    import org.jpedal.*;
    import org.jpedal.color.ColorSpaces;
    import org.jpedal.constants.PageInfo;
    import org.jpedal.exception.PdfException;
    import org.jpedal.external.Options;
    import org.jpedal.fonts.FontMappings;
    import org.jpedal.objects.PdfFileInformation;
    import org.jpedal.utils.LogWriter;
    import org.w3c.dom.Element;
    
    import javax.imageio.IIOImage;
    import javax.imageio.ImageIO;
    import javax.imageio.ImageTypeSpecifier;
    import javax.imageio.metadata.IIOMetadata;
    import javax.imageio.plugins.jpeg.JPEGImageWriteParam;
    import javax.imageio.stream.ImageOutputStream;
    import java.awt.*;
    import java.awt.image.BufferedImage;
    import java.io.*;
    import java.util.Iterator;
    
    public class ConvertPagesToImages{
        
        /**
         * show if image transparent 
         */
        boolean isTransparent=false;
        
        /**output where we put files */
        private String user_dir = System.getProperty("user.dir");
        
        /**use 96 dpi as default so pages correct size (72 will be smaller) */
        private float pageScaling =1.33f;
        
        /**flag to show if we print messages */
        public static boolean outputMessages = false;
        
        String output_dir=null;
        
        /**correct separator for OS */
        String separator = System.getProperty("file.separator");
        
        /**the decoder object which decodes the pdf and returns a data object */
        PdfDecoder decode_pdf = null;
        
        //type of image to save thumbnails
        private String format = "png";
        
        /** holding all creators that produce OCR pdf's ocr*/
        private String[] ocr = {"TeleForm"};
        
        /**scaling to use - default is 100 percent */
        private int scaling=100;
        
        /**file password or null */
        private String password=null;
        
        //only used if between 0 and 1 
        private float JPEGcompression=-1f;
        
        private int pageCount = 0;
        
        public ConvertPagesToImages() { 
    
        }
        
        public void init(String file_name, int scaling, String format, String output_dir, String password, int pageCount){
            /*缩小比率*/
        	this.scaling = scaling;
            /*图片格式*/
            this.format = format;
            /*输出目录*/
            this.output_dir = output_dir;
            /*pdf密码*/
            this.password = password;
            /*输出图片数*/
            this.pageCount = pageCount;
            
            /*判断文件是否存在*/
            File pdf_file = new File(file_name);
            if (!pdf_file.exists()) {
                System.out.println("File " + pdf_file + " not found");
                System.out.println("May need full path");
                
                return;
            }
           
            extraction(file_name, output_dir);   
        }
        
        private void extraction(String file_name, String output_dir) {        
            this.output_dir=output_dir;
    
            if (!user_dir.endsWith(separator)){
                user_dir = user_dir + separator;
            }
            
            if (file_name.toLowerCase().endsWith(".pdf")) {
                
                if(output_dir==null){
                    output_dir=user_dir + "thumbnails" + separator;
                }
                
                decodeFile(file_name,output_dir);
            } else {
                String[] files = null;
                File inputFiles;
                
                if (!file_name.endsWith(separator)){
                    file_name = file_name + separator;
                }
                
                try {
                    inputFiles = new File(file_name);
                    
                    if (!inputFiles.isDirectory()) {
                        System.err.println(file_name + " is not a directory. Exiting program");
                    }else{
                        files = inputFiles.list();
                    }
                } catch (Exception ee) {
                    LogWriter.writeLog("Exception trying to access file " + ee.getMessage());
                    
                }
                
                if(files!=null){
                    for (String file : files) {
                        
                        if (file.toLowerCase().endsWith(".pdf")) {
                            if (outputMessages){
                                System.out.println(file_name + file);
                            }
                            
                            decodeFile(file_name + file, output_dir);
                        }
                    }
                }
            }
            
            if(outputMessages){
                System.out.println("Thumbnails created");
            }
        }
        
        /**
         * routine to decode a file 
         */
        private void decodeFile(String file_name,String output_dir) {
            String name = "demo"; //set a default just in case
            
            int pointer = file_name.lastIndexOf(separator);
            
            if(pointer==-1){
                pointer = file_name.lastIndexOf('/');
            }
            
            if (pointer != -1){
                name = file_name.substring(pointer + 1, file_name.length() - 4);
            }else if((file_name.toLowerCase().endsWith(".pdf"))){
                name=file_name.substring(0,file_name.length()-4);
            }
            
            //fix for odd files on Linux created when you view pages
            if(name.startsWith(".")){
                return;
            }
            
            //create output dir for images
            if(output_dir==null){
                output_dir = user_dir + "thumbnails" + separator ;
            }
            
            //PdfDecoder returns a PdfException if there is a problem
            try {
                if(decode_pdf==null){
                    decode_pdf = new PdfDecoder(true);
                }
                
                /**optional JAI code for faster rendering*/
                org.jpedal.external.ImageHandler myExampleImageHandler=new org.jpedal.examples.handlers.ExampleImageDrawOnScreenHandler();
                decode_pdf.addExternalHandler(myExampleImageHandler, Options.ImageHandler);
                
                //mappings for non-embedded fonts to use
                FontMappings.setFontReplacements();
    
                //true as we are rendering page
                decode_pdf.setExtractionMode(0, pageScaling);
                //don't bother to extract text and images
                
                /**
                 * open the file (and read metadata including pages in  file)
                 */
                if (outputMessages){
                    System.out.println("Opening file :" + file_name);
                }
                
                if(password != null && password != ""){
                    decode_pdf.openPdfFile(file_name,password);
                }else{
                    decode_pdf.openPdfFile(file_name);
                }
                
            } catch (Exception e) {
                System.err.println("8.Exception " + e + " in pdf code in "+file_name);
            }
            
            /**
             * extract data from pdf (if allowed).
             */
            if(decode_pdf.isEncrypted() && !decode_pdf.isFileViewable()){
            	throw new RuntimeException("Wrong password password used=>"+password+ '<');
            }else if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied())) && (!decode_pdf.isExtractionAllowed())) {
                throw new RuntimeException("Extraction not allowed");
            } else {
            	extractPageAsImage(file_name, output_dir, name, isTransparent);
            }
            
            /**close the pdf file */
            decode_pdf.closePdfFile();   
        }
        
        private void extractPageAsImage(String file_name, String output_dir, String name, boolean isTransparent) {   
            //create a directory if it doesn't exist
            File output_path = new File(output_dir);
            if (!output_path.exists()){
                output_path.mkdirs();
            }
            
            boolean isSingleOutputFile=false;
            boolean compressTiffs = false;
            String rawJPEGComp = null;   
            String jpgFlag = "96";        
            
            //page range
            int start = 1,  end = decode_pdf.getPageCount();
            
            end = (pageCount == 0) ? end : pageCount;
            
            if (outputMessages){
                System.out.println("Thumbnails will be in  " + output_dir);
            }
            
            try {
                BufferedImage[] multiPages = new BufferedImage[1 + (end - start)];
                
                for (int page = start; page < end + 1; page++){
                	getPage(output_dir, name, isTransparent, isSingleOutputFile,rawJPEGComp, jpgFlag, compressTiffs, start, end,multiPages, page);
                }
            } catch (Exception e) {
                decode_pdf.closePdfFile();
                throw new RuntimeException("Exception " + e.getMessage()+" with thumbnails on File="+file_name);
            }
        }
        
        private void getPage(
        		String output_dir, 
        		String name, 
        		boolean isTransparent,
                boolean isSingleOutputFile, 
                String rawJPEGComp, 
                String jpgFlag,
                boolean compressTiffs, 
                int start, 
                int end,
                BufferedImage[] multiPages, 
                int page
    	) throws PdfException, IOException, FileNotFoundException {
            if (outputMessages ){
                System.out.println("Page " + page);
            }
            
            /**
             * 补0操作
             */
            String pageAsString	= String.valueOf(page);
            String maxPageSize	= String.valueOf(end);
            int padding			= maxPageSize.length()-pageAsString.length();
                
            for(int ii = 0; ii < padding; ii++){
                pageAsString = '0' + pageAsString;
            }
            
            String image_name;
            if(isSingleOutputFile){
                image_name =name;
            }else{
                image_name =name+"_page_" + pageAsString;
            }
            
            /**
             * get PRODUCER and if OCR disable text printing
             */
            PdfFileInformation currentFileInformation = decode_pdf.getFileInformationData();
            
            String[] values=currentFileInformation.getFieldValues();
            String[] fields=PdfFileInformation.getFieldNames();
                
            for(int i=0;i<fields.length;i++){
                if(fields[i].equals("Creator")){      
                    for (String anOcr : ocr) {  
                        if (values[i].equals(anOcr)) {                           
                            decode_pdf.setRenderMode(PdfDecoder.RENDERIMAGES);                            
                        }
                    }
                }
            }
                
            BufferedImage image_to_save;
            if(!isTransparent){
                image_to_save=decode_pdf.getPageAsImage(page);
            }else{ 
            	//use this if you want a transparent image 
                image_to_save =decode_pdf.getPageAsTransparentImage(page);
                
                //java adds odd tint if you save this as JPEG which does not have transparency
                // so put as RGB on white background
                // (or save as PNG or TIFF which has transparency)
                // or just call decode_pdf.getPageAsImage(page)
                if(image_to_save!=null && format.toLowerCase().startsWith("jp")){
                    
                    BufferedImage rawVersion=image_to_save;
                    
                    int w=rawVersion.getWidth(), h=rawVersion.getHeight();
                    //blank canvas
                    image_to_save = new BufferedImage(w,h , BufferedImage.TYPE_INT_RGB);
                    
                    //
                    Graphics2D g2 = image_to_save.createGraphics();
                    //white background
                    g2.setPaint(Color.WHITE);
                    g2.fillRect(0,0,w,h);
                    //paint on image
                    g2.drawImage(rawVersion, 0, 0,null);
                }
            }
            
            /*if just gray we can reduce memory usage by converting image to Grayscale
    
            
            @SuppressWarnings("rawtypes")
    		Iterator colorspacesUsed = decode_pdf.getPageInfo(PageInfo.COLORSPACES);
            
            int nextID;
            boolean isGrayOnly=colorspacesUsed!=null; //assume true and disprove
            while(colorspacesUsed!=null && colorspacesUsed.hasNext()){
                nextID= (Integer) (colorspacesUsed.next());
                
                if(nextID!= ColorSpaces.DeviceGray && nextID!=ColorSpaces.CalGray){
                    isGrayOnly=false;
                }
            }
            
            //draw onto GRAY image to reduce colour depth
            if(isGrayOnly){
                BufferedImage image_to_save2=new BufferedImage(image_to_save.getWidth(),image_to_save.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
                image_to_save2.getGraphics().drawImage(image_to_save,0,0,null);
                image_to_save = image_to_save2;
            }
            
            //put image in array if multi-images
            if(isSingleOutputFile){
                multiPages[page-start] = image_to_save;
            }
            
            if (image_to_save != null) {
                
                /**BufferedImage does not support any dpi concept. A higher dpi can be created
                 * using JAI to convert to a higher dpi image*/
                
                //shrink the page to 50% with graphics2D transformation
                //- add your own parameters as needed
                //you may want to replace null with a hints object if you
                //want to fine tune quality.
                
                /** example 1 biliniear scaling
                 AffineTransform scale = new AffineTransform();
                 scale.scale(.5, .5); //50% as a decimal
                 AffineTransformOp scalingOp =new AffineTransformOp(scale, null);
                 image_to_save =scalingOp.filter(image_to_save, null);
    
                 */
                
                /** example 2 bicubic scaling - better quality but slower
                 to preserve aspect ratio set newWidth or newHeight to -1*/
                
                /**allow user to specify maximum dimension for thumbnail*/
                int maxDimension = -1;
                
                if(scaling!=100 || maxDimension != -1){
                    int newWidth=image_to_save.getWidth()*scaling/100;
                    int newHeight=image_to_save.getHeight()*scaling/100;
                    
                    Image scaledImage;
                    if(maxDimension != -1 && (newWidth > maxDimension || newHeight > maxDimension)){
                        if(newWidth > newHeight){
                            newWidth = maxDimension;
                            scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);
                        } else {
                            newHeight = maxDimension;
                            scaledImage= image_to_save.getScaledInstance(-1,newHeight,BufferedImage.SCALE_SMOOTH);
                        }
                    } else {
                        scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);
                    }
                    
                    if(format.toLowerCase().startsWith("jp")){
                        image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_RGB);
                    }else{
                        image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_ARGB);
                    }
                    
                    Graphics2D g2 = image_to_save.createGraphics();
                    
                    g2.drawImage(scaledImage, 0, 0,null);
                }
    
                if (format.startsWith("jp")) {
                	saveAsJPEG(jpgFlag, image_to_save, JPEGcompression, new BufferedOutputStream(new FileOutputStream(output_dir + pageAsString + image_name + '.' + format)));                
                } else {
                    //save image
                    decode_pdf.getObjectStore().saveStoredImage(
                            output_dir + pageAsString + image_name,
                            image_to_save,
                            true,
                            false,
                            format);
                }   
            }
            
            //flush images in case we do more than 1 page so only contains
            //images from current page
            decode_pdf.flushObjectValues(true);            
        }
        
        private static void saveAsJPEG(String jpgFlag,BufferedImage image_to_save, float JPEGcompression, BufferedOutputStream fos) throws IOException {
            JPEGImageWriter imageWriter = (JPEGImageWriter) ImageIO.getImageWritersBySuffix("jpeg").next();
            ImageOutputStream ios = ImageIO.createImageOutputStream(fos);
            imageWriter.setOutput(ios);
            
            IIOMetadata imageMetaData = imageWriter.getDefaultImageMetadata(new ImageTypeSpecifier(image_to_save), null);
            
            if (Common.isInteger(jpgFlag)){
                
                int dpi = 96;
                
                try {
                    dpi = Integer.parseInt(jpgFlag);
                } catch (Exception e) {
                    e.printStackTrace();
                }
                
                Element tree = (Element) imageMetaData.getAsTree("javax_imageio_jpeg_image_1.0");
                Element jfif = (Element)tree.getElementsByTagName("app0JFIF").item(0);
                jfif.setAttribute("Xdensity", Integer.toString(dpi));
                jfif.setAttribute("Ydensity", Integer.toString(dpi));
            }
            
            JPEGImageWriteParam jpegParams = (JPEGImageWriteParam) imageWriter.getDefaultWriteParam();
            if(JPEGcompression>=0 && JPEGcompression<=1f){     
                jpegParams.setCompressionMode(JPEGImageWriteParam.MODE_EXPLICIT);
                jpegParams.setCompressionQuality(JPEGcompression);
                
            }
            
            imageWriter.write(imageMetaData, new IIOImage(image_to_save, null, null), jpegParams);
            ios.close();
            imageWriter.dispose();
        }
        
         public static void main(String[] args) {   
    		 long start=System.currentTimeMillis();
    			 
    		 String pdfPath = "E:\upload\pdf\20140424\Servlet.pdf";
    		 int scaling = -1;
    		 String format = "jpg";
    		 String output_dir = "E:\upload\pdf\20140424\jpg\";
    		 String password = null;
    		 int pageCount = 10;
    
    		 ConvertPagesToImages convertPagesToImages = new ConvertPagesToImages();
    		 convertPagesToImages.init(pdfPath, scaling, format, output_dir, password, pageCount);
    	 
    		 System.out.println("花费时间为="+(System.currentTimeMillis()-start)/1000 + "秒");
     	}         
    }
    

    功能说明:

           1、支持对文件夹下的所有pdf转换成图片,同时也支持对单个pdf进行转换操作。

           2、支持转换成jpg,jpeg,tiff,tif,png格式的图片

           3、支持指定转换的图片数。

           4、支持指定图片的存储位置


    传入参数说明

     1、pdfPath pdf文件绝对路径,可以是pdf所在的目录也可以是pdf文件路径 
     2、format  图片格式 (支持jpg,jpeg,tiff,png) ,传参时不能带有点号
     3、scaling 图片比率从1到100(100 = 全尺寸) 支持设置为-1 将保持高质量
     4、output_dir 输出路径,输出路径为绝对路径
     5、password 文件密码 若没有传入null值

  • 相关阅读:
    Django1.11框架开发流程简述
    Python之Django框架执行流程简介
    Python之Django框架笔记
    Redis数据库学习笔记
    python之MiniWeb框架
    python之正则表达式
    python之with语句结合上下文管理器
    Python之闭包and装饰器
    Ajax之调用一言网站API接口
    python之pymysql模块简单应用
  • 原文地址:https://www.cnblogs.com/wala-wo/p/5119252.html
Copyright © 2020-2023  润新知