• word2html文件


     1 package com.wesib2b.lw.app.util;
     2 
     3 import java.io.ByteArrayOutputStream;
     4 import java.io.File;
     5 import java.io.FileInputStream;
     6 import java.io.FileNotFoundException;
     7 import java.io.FileOutputStream;
     8 import java.io.InputStream;
     9 import java.util.List;
    10 import javax.xml.parsers.DocumentBuilderFactory;
    11 import javax.xml.transform.OutputKeys;
    12 import javax.xml.transform.Transformer;
    13 import javax.xml.transform.TransformerFactory;
    14 import javax.xml.transform.dom.DOMSource;
    15 import javax.xml.transform.stream.StreamResult;
    16 import org.apache.commons.io.FileUtils;
    17 import org.apache.poi.hwpf.HWPFDocument;
    18 import org.apache.poi.hwpf.converter.PicturesManager;
    19 import org.apache.poi.hwpf.converter.WordToHtmlConverter;
    20 import org.apache.poi.hwpf.usermodel.Picture;
    21 import org.apache.poi.hwpf.usermodel.PictureType;
    22 import org.w3c.dom.Document;
    23 
    24 public class test {
    25     public static void main(String[] args) throws Throwable {
    26         final String path = "D:\";
    27         final String file = "aaa.doc";
    28         InputStream input = new FileInputStream(path + file);
    29         HWPFDocument wordDocument = new HWPFDocument(input);
    30         WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
    31                 DocumentBuilderFactory.newInstance().newDocumentBuilder()
    32                         .newDocument());
    33         wordToHtmlConverter.setPicturesManager(new PicturesManager() {
    34             public String savePicture(byte[] content, PictureType pictureType,
    35                     String suggestedName, float widthInches, float heightInches) {
    36                 return suggestedName;
    37             }
    38         });
    39         wordToHtmlConverter.processDocument(wordDocument);
    40         List pics = wordDocument.getPicturesTable().getAllPictures();
    41         if (pics != null) {
    42             for (int i = 0; i < pics.size(); i++) {
    43                 Picture pic = (Picture) pics.get(i);
    44                 try {
    45                     pic.writeImageContent(new FileOutputStream(path
    46                             + pic.suggestFullFileName()));
    47                 } catch (FileNotFoundException e) {
    48                     e.printStackTrace();
    49                 }
    50             }
    51         }
    52         Document htmlDocument = wordToHtmlConverter.getDocument();
    53         ByteArrayOutputStream outStream = new ByteArrayOutputStream();
    54         DOMSource domSource = new DOMSource(htmlDocument);
    55         StreamResult streamResult = new StreamResult(outStream);
    56         TransformerFactory tf = TransformerFactory.newInstance();
    57         Transformer serializer = tf.newTransformer();
    58         serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
    59         serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    60         serializer.setOutputProperty(OutputKeys.METHOD, "html");
    61         serializer.transform(domSource, streamResult);
    62         outStream.close();
    63         String content = new String(outStream.toByteArray());
    64         FileUtils.write(new File(path, "1.html"), content, "utf-8");
    65     }
    66 }
  • 相关阅读:
    在eclipse中如何在大量项目中查找指定文件
    字体图标使用笔记
    微信扫码显示特效
    Flex 布局教程:实例篇
    Flex 布局教程:语法篇
    12月份前端资源分享
    什么样的技术能活下来?该如何筛选繁复的框架和工具
    web前端知识在乱花渐欲迷人眼的当下,如何分清主次和学习优先级呢?
    2015前端生态发展回顾(转)
    HDU1507 Uncle Tom's Inherited Land*
  • 原文地址:https://www.cnblogs.com/jason123/p/7039937.html
Copyright © 2020-2023  润新知