• POI实现DOC/DOCX转HTML


    1.使用HWPF处理DOC

    public class DocToHtml {  
        
        private static final String encoding = "UTF-8";
    
        public static String convert2Html(String wordPath)
                throws FileNotFoundException, TransformerException, IOException,
                ParserConfigurationException {
            if( wordPath == null || "".equals(wordPath) ) return "";
            File file = new File(wordPath);
            if( file.exists() && file.isFile() )
                return convert2Html(new FileInputStream(file));
            else
                return "";
        }
        
        public static String convert2Html(String wordPath, String context)
        throws FileNotFoundException, TransformerException, IOException,
        ParserConfigurationException {
            if( wordPath == null || "".equals(wordPath) ) return "";
            File file = new File(wordPath);
            if( file.exists() && file.isFile() )
                return convert2Html(new FileInputStream(file), context);
            else
                return "";
        }
      
        public static String convert2Html(InputStream is)
                throws TransformerException, IOException,
                ParserConfigurationException {
            return convert2Html(is, "");
        }
    
        public static String convert2Html(InputStream is, HttpServletRequest req) throws TransformerException, IOException,    ParserConfigurationException {
            return convert2Html(is, req.getContextPath());
        }
        
        public static String convert2Html(InputStream is, final String context) throws IOException, ParserConfigurationException, TransformerException {
            HWPFDocument wordDocument = new HWPFDocument(is);
            WordToHtmlConverter converter = new WordToHtmlConverter(
                    DocumentBuilderFactory.newInstance().newDocumentBuilder()
                            .newDocument());
            
            SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS");
            final String prefix = sdf.format(new Date());
            final Map<Object, String> suffixMap = new HashMap<Object, String>();
            
            converter.setPicturesManager(new PicturesManager() {
                    public String savePicture(byte[] content, PictureType pictureType,
                            String suggestedName, float widthInches, float heightInches) {
                        String prefixContext = context.replace("\", "").replace("/", "");
                        prefixContext = StringUtils.isNotBlank(prefixContext) ? "/" + prefixContext + "/" : prefixContext;
                        suffixMap.put(new String(content).replace(" ", "").length(), suggestedName);
                        
                        return  prefixContext
                                + UeConstants.VIEW_IMAGE_PATH + "/" + UeConstants.UEDITOR_PATH
                                + "/" + UeConstants.UEDITOR_IMAGE_PATH + "/"
                                + prefix + "_"
                                + suggestedName;
                    }
            });
            converter.processDocument(wordDocument);
            
            List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
            if (pics != null) {
                for(Picture pic : pics) {
                    try {
                        pic.writeImageContent(new FileOutputStream(
                                UeConstants.IMAGE_PATH
                                     + "/" + prefix + "_" + suffixMap.get(new String(pic.getContent()).replace(" ", "").length())));
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    }
                }
            }
            
            StringWriter writer = new StringWriter();
            
            Transformer serializer = TransformerFactory.newInstance().newTransformer();
            serializer.setOutputProperty(OutputKeys.ENCODING, encoding);
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
            serializer.setOutputProperty(OutputKeys.METHOD, "html");
            serializer.transform(
                    new DOMSource(converter.getDocument()),
                    new StreamResult(writer) );
            writer.close();
            return writer.toString();
        }
    }  

    2.使用XWPFDocument处理DOCX

    public class XHTMLConverterTestCase
        extends AbstractXWPFPOIConverterTest
    {
    
        protected void doGenerate( String fileInName )
            throws IOException
        {
            doGenerateSysOut( fileInName );
            doGenerateHTMLFile( fileInName );
        }
    
        protected void doGenerateSysOut( String fileInName )
            throws IOException
        {
    
            long startTime = System.currentTimeMillis();
    
            XWPFDocument document = new XWPFDocument( AbstractXWPFPOIConverterTest.class.getResourceAsStream( fileInName ) );
    
            XHTMLOptions options = XHTMLOptions.create().indent( 4 );
            OutputStream out = System.out;
            XHTMLConverter.getInstance().convert( document, out, options );
    
            System.err.println( "Elapsed time=" + ( System.currentTimeMillis() - startTime ) + "(ms)" );
        }
    
        protected void doGenerateHTMLFile( String fileInName )
            throws IOException
        {
    
            String root = "target";
            String fileOutName = root + "/" + fileInName + ".html";
    
            long startTime = System.currentTimeMillis();
    
            XWPFDocument document = new XWPFDocument( AbstractXWPFPOIConverterTest.class.getResourceAsStream( fileInName ) );
    
            XHTMLOptions options = XHTMLOptions.create();// .indent( 4 );
            // Extract image
            File imageFolder = new File( root + "/images/" + fileInName );
            options.setExtractor( new FileImageExtractor( imageFolder ) );
            // URI resolver
            options.URIResolver( new FileURIResolver( imageFolder ) );
    
            OutputStream out = new FileOutputStream( new File( fileOutName ) );
            XHTMLConverter.getInstance().convert( document, out, options );
    
            System.out.println( "Generate " + fileOutName + " with " + ( System.currentTimeMillis() - startTime ) + " ms." );
        }
    }

    项目下载地址:http://download.csdn.net/detail/luka2008/7902285

    本文转自:http://blog.csdn.net/luka2008/article/details/21168287

  • 相关阅读:
    Weather with you主题说明
    搜索枚举
    洛谷P2085——最小函数值
    洛谷P1402——乒乓球
    CSP2019,RP+=150。
    搜索之连通块(深搜广搜版)
    appium
    appium环境搭建
    Python抓取淘宝IP地址数据
    记录日志
  • 原文地址:https://www.cnblogs.com/dreammyle/p/4486861.html
Copyright © 2020-2023  润新知