• Java实现pdf转html


    引入pdf2dom

            <dependency>
                <groupId>net.sf.cssbox</groupId>
                <artifactId>pdf2dom</artifactId>
                <version>1.8</version>
            </dependency>
    

    测试代码:

    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.StringWriter;
    import java.io.Writer;
    
    import javax.xml.parsers.ParserConfigurationException;
    
    import org.apache.commons.io.FileUtils;
    import org.apache.pdfbox.pdmodel.PDDocument;
    import org.fit.pdfdom.PDFDomTree;
    import org.fit.pdfdom.PDFDomTreeConfig;
    import org.junit.Test;
    
    public class TestPDFDomTree {
    
        public static String parseWithPdfDomTree(InputStream is, int startPage, int endPage, PDFDomTreeConfig config)
                throws IOException, ParserConfigurationException {
            PDDocument pdf = PDDocument.load(is);
            PDFDomTree parser = new PDFDomTree(config);
            parser.setStartPage(startPage);
            parser.setEndPage(endPage);
            Writer output = new StringWriter();
            parser.writeText(pdf, output);
            pdf.close();
            String htmlOutput = output.toString();
            return htmlOutput;
        }
    
        @Test
        public void test_convert_pdf_to_html() throws Exception {
            PDFDomTreeConfig config = PDFDomTreeConfig.createDefaultConfig();
            config.setImageHandler(PDFDomTreeConfig.saveToDirectory(new File("/mnt/res/")));
            config.setFontHandler(config.getImageHandler());
            String html = parseWithPdfDomTree(new FileInputStream("/mnt/电子版.pdf"), 0, 10, config);
    	FileUtils.write(new File("/mnt/test.html"), htmlOutput, "utf-8");
        }
    }
    

    感谢您的认真阅读。

    如果你觉得有帮助,欢迎点赞支持!

    不定期分享软件开发经验,欢迎关注作者, 一起交流软件开发:

  • 相关阅读:
    STM32 变量无法赋值问题
    ROS 多台计算机联网控制机器人
    Content-Disposition
    Arrays.asList()与toArray()
    length与size()
    computeIfAbsent
    共享锁、排他锁与意向锁
    行锁、页面锁与表锁
    classpath是什么
    start、run、join
  • 原文地址:https://www.cnblogs.com/xiaoqi/p/pdfdom.html
Copyright © 2020-2023  润新知