本来是想将REF文件直接转换成html文件,但是由于乱码的问题没办法解决,所以选择了一个折中的办法,先将RTF转换成DOC的文档,再利用HWPFDocument类将DOC文件转换为html文件
1、下载OpenOffice安装在需要进行文书转换的电脑上,下载地址:http://rj.baidu.com/soft/detail/15989.html?ald
2、在对应程序中添加jar包,对应下载jar包百度云盘地址: 获取码:
或者说直接在网上搜索jodconverter-core-3.0-beta-4,下载下来,里面有对应的包
3、写java类,运行项目进行调试转换
package com.thunisoft.test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.artofsolving.jodconverter.OfficeDocumentConverter;
import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration;
import org.artofsolving.jodconverter.office.OfficeManager;
public class TestRTF2Word {
private static String OFFICE_HOME = "C:\Program Files (x86)\OpenOffice 4"; // "/opt/libreoffice5.0";
// private static int port[] = { 8100 };
private static int port = 8100;
private static OfficeManager officeManager;
// 打开服务器
public static void startService() {
DefaultOfficeManagerConfiguration configuration = new DefaultOfficeManagerConfiguration();
try {
System.out.println("准备启动openoffice服务....");
configuration.setOfficeHome(OFFICE_HOME);// 设置OpenOffice.org安装目录
configuration.setPortNumbers(port);
configuration.setTaskExecutionTimeout(1000 * 60 * 5L);// 设置任务执行超时为5分钟
configuration.setTaskQueueTimeout(1000 * 60 * 60 * 24L);// 设置任务队列超时为24小时
officeManager = configuration.buildOfficeManager();
officeManager.start(); // 启动服务
System.out.println("office转换服务启动成功!");
} catch (Exception ce) {
System.out.println("office转换服务启动失败!详细信息:" + ce);
System.exit(0);
}
}
// 关闭服务器
public static void stopService() {
System.out.println("关闭office转换服务....");
if (officeManager != null) {
officeManager.stop();
}
System.out.println("关闭office转换成功!");
}
public static void main(String[] args) {
startService();
OfficeDocumentConverter converter = new OfficeDocumentConverter(
officeManager);
try {
File outFile = new File("E:\rtf\1\qwe123.doc");
if (!outFile.getParentFile().exists()) {
outFile.getParentFile().mkdirs();
}
System.out.println(outFile.exists());
File inFile = new File("E:\rtf\1\3606c3de7612-ffa5-44cd-bbc1-f5312de0aa3d.rtf");
System.out.println(inFile.exists());
converter.convert(inFile, outFile);
} catch (Exception e) {
e.printStackTrace();
} finally {
stopService();
}
}
}
4、转换完成后可以将其存放到某个目录下,通过以下方式转换为html文件
private static ByteArrayOutputStream docHandler(String fileName) {
InputStream is = new FileInputStream(fileName);
HWPFDocument wordDocument = null;
wordDocument = new HWPFDocument(is);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory
.newInstance().newDocumentBuilder().newDocument());
// wordToHtmlConverter.setPicturesManager(new PicturesManager() {
// public String savePicture(byte[] content, PictureType pictureType,
// String suggestedName, float widthInches, float heightInches) {
// return "pic/" + suggestedName;
// }
// });
wordToHtmlConverter.processDocument(wordDocument);
// save pictures
List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(URLDecoder.decode(getPicPath(),"UTF-8")
+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
// serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();
return out;
}