package cn.wgd.util;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.AbstractWordUtils;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.util.XMLHelper;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import fr.opensagres.poi.xwpf.converter.core.IXWPFConverter;
import fr.opensagres.poi.xwpf.converter.core.ImageManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
/**
* @author Kevin 2018-3-14
*
* 将word,pdf等文件转为html,用于附件预览!
*
* 圖片處理https://www.cnblogs.com/feiruo/p/5924514.html
*
* 本例程需要jar包:poi(poi3.17)相关jar包外,
* fr.opensagres.poi.xwpf.converter.core-2.0.1.jar
* fr.opensagres.poi.xwpf.converter.xhtml-2.0.1.jar
* fr.opensagres.xdocreport.core-2.0.1.jar
* ooxml-schemas-1.3.jar等
*
* 注:此方法为简单实现,如word需要更多样式处理,还需要自行实现!
*
*/
public class ConvertWord2HtmlUtil {
public static void main(String[] args) throws IOException, ParserConfigurationException, TransformerException, SAXException {
String path = "D:\testfile2html\test.docx";
String descPath = "D:\testfile2html\test.html";
String imagePath = "D:\testfile2html";
word2007ToHtml(path, descPath, imagePath);
}
/**
* 处理doc文件转HTML,此方法参考:org.apache.poi.hwpf.converter.WordToHtmlConverter.main()
* @param path
* @param descPath
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerException
*/
public static void word95T2007ToHtml(String path, String descPath)
throws IOException, ParserConfigurationException, TransformerException{
if(path == null)
throw new NullPointerException("路径不能为空!");
System.out.println( "Converting " + path );
System.out.println( "Saving output to " + descPath );
Document doc = ConvertWord2HtmlUtil.process(new File(path));
DOMSource domSource = new DOMSource( doc );
StreamResult streamResult = new StreamResult(new File(descPath));
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
serializer.setOutputProperty( OutputKeys.METHOD, "html" );
serializer.transform( domSource, streamResult );
}
/**
*
* 此方法来源于:org.apache.poi.hwpf.converter.WordToHtmlConverter
* @param docFile
* @return
* @throws IOException
* @throws ParserConfigurationException
*/
static Document process( File docFile ) throws IOException, ParserConfigurationException
{
final HWPFDocumentCore wordDocument = AbstractWordUtils.loadDoc( docFile );
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
XMLHelper.getDocumentBuilderFactory().newDocumentBuilder()
.newDocument() );
wordToHtmlConverter.processDocument( wordDocument );
return wordToHtmlConverter.getDocument();
}
/**
* @param path 源文件路径(doc or docx)
* @param descPath 转化后的文件路径(html)
* @param imagePath 图片存放地址(本地址默认为html文件同路径)
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerException
* @throws SAXException
*/
public static void word2007ToHtml(String path, String descPath, String imagePath)
throws IOException, ParserConfigurationException, TransformerException, SAXException{
if(path == null){
throw new NullPointerException("路径不能为空!");
}
File sourceFile = new File(path);
if(!sourceFile.exists()){
System.out.println("用户文件不存在!");
return;
}else{
if(path.endsWith(".docx") || path.endsWith(".DOCX")){
XWPFDocument document = new XWPFDocument(new FileInputStream(path));
IXWPFConverter<XHTMLOptions> converter = XHTMLConverter.getInstance();
XHTMLOptions options = XHTMLOptions.create();
ImageManager imageManager = new ImageManager(new File(imagePath), "image");
options.setImageManager(imageManager);
converter.convert(document, new FileOutputStream(descPath), options);
}else{
word95T2007ToHtml(path, descPath);
}
}
}
}