• 使用POI把Word Excel转为HTML


    此方法是针对Office2003的,但是word中如果有图片,图片能够解析出来但是HTML文件中不显示。也不支持excel中的图片解析。

    所需jar包如下如下:

    1:PoiUtil.java

    package com.wzh.poi;
    
    import java.io.BufferedWriter;
    import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStreamWriter;
    import java.io.UnsupportedEncodingException;
    import java.util.List;
    import java.util.logging.Level;
    import java.util.logging.Logger;
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.OutputKeys;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerConfigurationException;
    import javax.xml.transform.TransformerException;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;
    import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
    import org.apache.poi.hssf.usermodel.HSSFWorkbook;
    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.converter.PicturesManager;
    import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    import org.apache.poi.hwpf.usermodel.Picture;
    import org.apache.poi.hwpf.usermodel.PictureType;
    import org.w3c.dom.Document;
    
    /**
     * @date 2015-3-16 17:22:05
     * @author y
     * @desc
     */
    public class PoiUtil {
    
        /**
         * Excel 转为 HTML
         * @param fileName
         * @param outputFile
         * @throws FileNotFoundException
         * @throws IOException
         * @throws ParserConfigurationException
         * @throws TransformerConfigurationException
         * @throws TransformerException 
         */
        public static void excelToHtml(String fileName, String outputFile)
                throws FileNotFoundException, IOException, ParserConfigurationException, 
                    TransformerConfigurationException, TransformerException {
            InputStream is = new FileInputStream(fileName);
    
            HSSFWorkbook excelBook = new HSSFWorkbook(is);
    
            ExcelToHtmlConverter ethc = new ExcelToHtmlConverter(
                    DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
            ethc.setOutputColumnHeaders(false);
            ethc.setOutputRowNumbers(false);
    
            ethc.processWorkbook(excelBook);
    
            Document htmlDocument = ethc.getDocument();
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            DOMSource domSource = new DOMSource(htmlDocument);
            StreamResult streamResult = new StreamResult(out);
            
            TransformerFactory tf = TransformerFactory.newInstance();
            Transformer serializer = tf.newTransformer();
            serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
            serializer.setOutputProperty(OutputKeys.METHOD, "html");
            serializer.transform(domSource, streamResult);
            out.close();
            
            String htmlStr = new String(out.toByteArray());
            
            htmlStr = htmlStr.replace("<h2>Sheet1</h2>", "")
                             .replace("<h2>Sheet2</h2>", "")
                             .replace("<h2>Sheet3</h2>", "")
                             .replace("<h2>Sheet4</h2>", "")
                             .replace("<h2>Sheet5</h2>", "");
            
            writeFile(htmlStr, outputFile);
        }
    
        /**
         * Word 转为 HTML
         *
         * @param fileName
         * @param outputFile
         * @throws IOException
         * @throws ParserConfigurationException
         * @throws TransformerException
         */
        public static void wordToHtml(String fileName, String outputFile) throws
                IOException, ParserConfigurationException, TransformerException {
            HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName));
    
            WordToHtmlConverter wthc = new WordToHtmlConverter(
                    DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
    
            wthc.setPicturesManager(new PicturesManager() {
    
                @Override
                public String savePicture(byte[] bytes, PictureType pt, String string, float f, float f1) {
                    return string;
                }
    
            });
    
            wthc.processDocument(wordDoc);
    
            List<Picture> pics = wordDoc.getPicturesTable().getAllPictures();
            if (null != pics && pics.size() > 0) {
                for (Picture pic : pics) {
                    pic.writeImageContent(new FileOutputStream(pic.suggestFullFileName()));
                }
            }
    
            Document htmlDocument = wthc.getDocument();
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            DOMSource domSource = new DOMSource(htmlDocument);
            StreamResult streamResult = new StreamResult(out);
    
            TransformerFactory tf = TransformerFactory.newInstance();
            Transformer serializer = tf.newTransformer();
            serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
            serializer.setOutputProperty(OutputKeys.METHOD, "html");
            serializer.transform(domSource, streamResult);
    
            out.close();
    
            String htmlStr = new String(out.toByteArray());
            writeFile(htmlStr, outputFile);
        }
    
        public static void writeFile(String content, String path) {
            FileOutputStream fos = null;
            BufferedWriter bw = null;
    
            File file = new File(path);
    
            try {
                fos = new FileOutputStream(file);
    
                bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"));
                bw.write(content);
            } catch (FileNotFoundException ex) {
                Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
            } catch (UnsupportedEncodingException ex) {
                Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
            } catch (IOException ex) {
                Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
            } finally {
                try {
                    if (null != bw) {
                        bw.close();
                    }
                    if (null != fos) {
                        fos.close();
                    }
                } catch (IOException ex) {
                    Logger.getLogger(PoiUtil.class.getName()).log(Level.SEVERE, null, ex);
                }
    
            }
        }
    }

    2.Test.java

    import com.wzh.poi.PoiUtil;
    import java.io.IOException;
    import java.util.logging.Level;
    import java.util.logging.Logger;
    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.transform.TransformerException;
    
    
    
    /*
     * To change this license header, choose License Headers in Project Properties.
     * To change this template file, choose Tools | Templates
     * and open the template in the editor.
     */
    
    /**
     *
     * @author y
     */
    public class Test {
    
        /**
         * @param args the command line arguments
         */
        public static void main(String[] args) {
            try {
                PoiUtil.excelToHtml("t2.xls", "test.html");
            } catch (IOException ex) {
                Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
            } catch (ParserConfigurationException ex) {
                Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
            } catch (TransformerException ex) {
                Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
            }
            
        }
        
    }
  • 相关阅读:
    Asp.Net微型服务器,只有一个文件,并且才300K大小|建议从事Asp.Net开发的博友们人手一份 狼人:
    “Asp.Net微型服务器”根据博友们的要求改版了,也出.NET4.0版本了,要更新的博友们赶快下吧 狼人:
    不经历风雨,怎么见彩虹,没有人能随随便便成功 狼人:
    C#汉字转拼音代码分享|建议收藏 狼人:
    用C#开发类似QQ输入法的不规则窗体的程序详解+代码打包分享 狼人:
    Android开发必备武器,处理X“.NET研究”ML的利器——SAX快速上手 狼人:
    “.NET研究”在iPhone应用中如何避免内存泄露 狼人:
    向量样本【模式识别】感知器 Perceptron
    编程在线庞果网 在线编程 24点游戏
    实现接口一种可靠的 DLL 接口实现方案
  • 原文地址:https://www.cnblogs.com/yshyee/p/4342717.html
Copyright © 2020-2023  润新知