• java把Word文件转成html的字符串返回出去


    1、需求是把前端上传的word文件解析出来,生成html的字符串返回给前端去展示,Word里面的图片可以忽略不显示,所以这段代码去掉了解析图片的代码

    package com.lieni.core.util;
    
    import java.io.ByteArrayOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.OutputKeys;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerException;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;
    
    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
    import org.apache.poi.xwpf.usermodel.XWPFDocument;
    import org.springframework.web.multipart.MultipartFile;
    import org.w3c.dom.Document;
    
    import com.itextpdf.text.log.Logger;
    import com.itextpdf.text.log.LoggerFactory;
    
    /**
     * Created by LTmei on 2018/10/10 10:00
     */
    public class Word2HtmlUtil {
    
        /**
         * logger
         */
        private static final Logger logger = LoggerFactory.getLogger(Word2HtmlUtil.class);
    
        public static String Word2007ToHtml(MultipartFile file) throws IOException {
    
            if (file.isEmpty() || file.getSize() <= 0) {
                logger.error("Sorry File does not Exists!");
                return null;
            } else {
                if (file.getOriginalFilename().endsWith(".docx") || file.getOriginalFilename().endsWith(".DOCX")) {
    
                    // 1) 加载word文档生成 XWPFDocument对象
                    InputStream in = file.getInputStream();
                    XWPFDocument document = new XWPFDocument(in);
    
                    // 也可以使用字符数组流获取解析的内容
                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    XHTMLConverter.getInstance().convert(document, baos, null);
                    String content = baos.toString();
                    baos.close();
                    return content;
                } else {
                    logger.error("Enter only MS Office 2007+ files");
                    return null;
                }
            }
        }
    
        public static String Word2003ToHtml(MultipartFile file)
                throws IOException, ParserConfigurationException, TransformerException {
    
            if (file.isEmpty() || file.getSize() <= 0) {
                logger.error("Sorry File does not Exists!");
                return null;
            } else {
                if (file.getOriginalFilename().endsWith(".doc") || file.getOriginalFilename().endsWith(".DOC")) {
                    InputStream input = file.getInputStream();
                    HWPFDocument wordDocument = new HWPFDocument(input);
                    WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                            DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
    
                    // 解析word文档
                    wordToHtmlConverter.processDocument(wordDocument);
                    Document htmlDocument = wordToHtmlConverter.getDocument();
    
                    // 也可以使用字符数组流获取解析的内容
                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    DOMSource domSource = new DOMSource(htmlDocument);
                    StreamResult streamResult = new StreamResult(baos);
    
                    TransformerFactory factory = TransformerFactory.newInstance();
                    Transformer serializer = factory.newTransformer();
                    serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
                    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                    serializer.setOutputProperty(OutputKeys.METHOD, "html");
                    serializer.transform(domSource, streamResult);
    
                    // 也可以使用字符数组流获取解析的内容
                    String content = new String(baos.toByteArray());
                    baos.close();
                    return content;
                } else {
                    logger.error("Enter only MS Office 2003 files");
                    return null;
                }
            }
    
        }
    
    }
  • 相关阅读:
    Redis(window版本)安装及使用
    springMVC转发与重定向
    java集合的实现细节--ArrayList和LinkedList
    VMware(虚拟机) 12版安装深度linux系统
    java中String创建对象分析(转)
    java面试之谈
    java中堆与栈的区别
    sql百万级查询优化(转)
    解决Maven下载依赖慢的问题(转)
    SpringMVC的底层实现
  • 原文地址:https://www.cnblogs.com/LTmei/p/9779275.html
Copyright © 2020-2023  润新知