实现逻辑有两种:
一、利用jodconverter(基于OpenOffice服务)将文件(.doc、.docx、.xls、.ppt)转化为html格式。
二、利用jodconverter(基于OpenOffice服务)将文件(.doc、.docx、.xls、.ppt)转化为pdf格式。
转换成html格式大家都能理解,这样就可以直接在浏览器上查看了,也就实现了在线预览的功能;转换成pdf格式这点,需要用户安装了Adobe Reader XI,这样你会发现把pdf直接拖到浏览器页面可以直接打开预览,这样也就实现了在线预览的功能。
将文件转化为html格式或者pdf格式
话不多说,直接上代码。
package com.pdfPreview.util; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.ConnectException; import java.text.SimpleDateFormat; import java.util.Date; import com.artofsolving.jodconverter.DocumentConverter; import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection; import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection; import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter; /** * 利用jodconverter(基于OpenOffice服务)将文件(*.doc、*.docx、*.xls、*.ppt)转化为html格式或者pdf格式, * 使用前请检查OpenOffice服务是否已经开启, OpenOffice进程名称:soffice.exe | soffice.bin * * @author yjclsx */ public class Doc2HtmlUtil { private static Doc2HtmlUtil doc2HtmlUtil; /** * 获取Doc2HtmlUtil实例 */ public static synchronized Doc2HtmlUtil getDoc2HtmlUtilInstance() { if (doc2HtmlUtil == null) { doc2HtmlUtil = new Doc2HtmlUtil(); } return doc2HtmlUtil; } /** * 转换文件成html * * @param fromFileInputStream: * @throws IOException */ public String file2Html(InputStream fromFileInputStream, String toFilePath,String type) throws IOException { Date date = new Date(); SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss"); String timesuffix = sdf.format(date); String docFileName = null; String htmFileName = null; if("doc".equals(type)){ docFileName = "doc_" + timesuffix + ".doc"; htmFileName = "doc_" + timesuffix + ".html"; }else if("docx".equals(type)){ docFileName = "docx_" + timesuffix + ".docx"; htmFileName = "docx_" + timesuffix + ".html"; }else if("xls".equals(type)){ docFileName = "xls_" + timesuffix + ".xls"; htmFileName = "xls_" + timesuffix + ".html"; }else if("ppt".equals(type)){ docFileName = "ppt_" + timesuffix + ".ppt"; htmFileName = "ppt_" + timesuffix + ".html"; }else{ return null; } File htmlOutputFile = new File(toFilePath + File.separatorChar + htmFileName); File docInputFile = new File(toFilePath + File.separatorChar + docFileName); if (htmlOutputFile.exists()) htmlOutputFile.delete(); htmlOutputFile.createNewFile(); if (docInputFile.exists()) docInputFile.delete(); docInputFile.createNewFile(); /** * 由fromFileInputStream构建输入文件 */ try { OutputStream os = new FileOutputStream(docInputFile); int bytesRead = 0; byte[] buffer = new byte[1024 * 8]; while ((bytesRead = fromFileInputStream.read(buffer)) != -1) { os.write(buffer, 0, bytesRead); } os.close(); fromFileInputStream.close(); } catch (IOException e) { } OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100); try { connection.connect(); } catch (ConnectException e) { System.err.println("文件转换出错,请检查OpenOffice服务是否启动。"); } // convert DocumentConverter converter = new OpenOfficeDocumentConverter(connection); converter.convert(docInputFile, htmlOutputFile); connection.disconnect(); // 转换完之后删除word文件 docInputFile.delete(); return htmFileName; } /** * 转换文件成pdf * * @param fromFileInputStream: * @throws IOException */ public String file2pdf(InputStream fromFileInputStream, String toFilePath,String type) throws IOException { Date date = new Date(); SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss"); String timesuffix = sdf.format(date); String docFileName = null; String htmFileName = null; if("doc".equals(type)){ docFileName = "doc_" + timesuffix + ".doc"; htmFileName = "doc_" + timesuffix + ".pdf"; }else if("docx".equals(type)){ docFileName = "docx_" + timesuffix + ".docx"; htmFileName = "docx_" + timesuffix + ".pdf"; }else if("xls".equals(type)){ docFileName = "xls_" + timesuffix + ".xls"; htmFileName = "xls_" + timesuffix + ".pdf"; }else if("ppt".equals(type)){ docFileName = "ppt_" + timesuffix + ".ppt"; htmFileName = "ppt_" + timesuffix + ".pdf"; }else{ return null; } File htmlOutputFile = new File(toFilePath + File.separatorChar + htmFileName); File docInputFile = new File(toFilePath + File.separatorChar + docFileName); if (htmlOutputFile.exists()) htmlOutputFile.delete(); htmlOutputFile.createNewFile(); if (docInputFile.exists()) docInputFile.delete(); docInputFile.createNewFile(); /** * 由fromFileInputStream构建输入文件 */ try { OutputStream os = new FileOutputStream(docInputFile); int bytesRead = 0; byte[] buffer = new byte[1024 * 8]; while ((bytesRead = fromFileInputStream.read(buffer)) != -1) { os.write(buffer, 0, bytesRead); } os.close(); fromFileInputStream.close(); } catch (IOException e) { } OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100); try { connection.connect(); } catch (ConnectException e) { System.err.println("文件转换出错,请检查OpenOffice服务是否启动。"); } // convert DocumentConverter converter = new OpenOfficeDocumentConverter(connection); converter.convert(docInputFile, htmlOutputFile); connection.disconnect(); // 转换完之后删除word文件 docInputFile.delete(); return htmFileName; } public static void main(String[] args) throws IOException { Doc2HtmlUtil coc2HtmlUtil = getDoc2HtmlUtilInstance(); File file = null; FileInputStream fileInputStream = null; file = new File("D:/poi-test/exportExcel.xls"); fileInputStream = new FileInputStream(file); // coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/xls","xls"); coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/xls","xls"); file = new File("D:/poi-test/test.doc"); fileInputStream = new FileInputStream(file); // coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/doc","doc"); coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/doc","doc"); file = new File("D:/poi-test/周报模版.ppt"); fileInputStream = new FileInputStream(file); // coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/ppt","ppt"); coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/ppt","ppt"); file = new File("D:/poi-test/test.docx"); fileInputStream = new FileInputStream(file); // coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/docx","docx"); coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/docx","docx"); } }
转换成html和转换成pdf的过程几乎一样,只是在创建输出的File时前者命名为XXX.html,后者命名为XXX.pdf,在执行converter.convert(docInputFile, htmlOutputFile);时,jodconverter会自己根据文件类型名转换成对应的文件。
注意,main方法里别file2Html和file2pdf都调用,会报错的,要么转html,要么转pdf,只能选一个。还有就是在执行之前,需要启动openOffice的服务:在openOffice目录下的命令窗口中执行soffice -headless -accept=”socket,host=127.0.0.1,port=8100;urp;” -nofirststartwizard即可启动。
以上需要引入jodconverter的jar包。