Atitit ppt转换文本txt
目录
1.1. Atitit word ppt excel convert txt bp 等文档转换纯文本问题最佳实践.docx 1
Atitit word ppt excel等文档转换txt问题最佳实践
目录
1.1. // Word 直接抽取全部内容 1
1.2. //分章节Section、段落Paragraph、字符串CharacterRun抽取 1
1.3. //直接抽取幻灯片的全部内容 2
1.4. //一张幻灯片一张幻灯片地读取 3
1.5. //直接读取Excel的全部内容 4
1.6. //读取时细化到Sheet、行甚至单元格 4
D:\0workspace\AtiPlatf_cms\src\com\attilax\office\Office2Pdf.java
F:\workspace 空格\officePdf2html\src\com\attilax\office\Office2Pdf.java
将Office文档转换为PDF. 运行该函数需要用到OpenOffice, OpenOffice下载地址为
* http://www.openoffice.org/
public static int office2PDF(String sourceFile, String destFile) {
try {
File inputFile = new File(sourceFile);
// if (!inputFile.exists()) {
// return -1;// 找不到源文件, 则返回-1
// }
// 如果目标路径不存在, 则新建该路径
File outputFile = new File(destFile);
if (!outputFile.getParentFile().exists()) {
outputFile.getParentFile().mkdirs();
}
String OpenOffice_HOME = "D:\\Program Files\\OpenOffice.org 3";//这里是OpenOffice的安装目录, 在我的项目中,为了便于拓展接口,没有直接写成这个样子,但是这样是绝对没问题的
// 如果从文件中读取的URL地址最后一个字符不是 '\',则添加'\'
if (OpenOffice_HOME.charAt(OpenOffice_HOME.length() - 1) != '\\') {
OpenOffice_HOME += "\\";
}
// 启动OpenOffice的服务
// String command = OpenOffice_HOME
// + "program\\soffice.exe -headless -accept=\"socket,host=127.0.0.1,port=8100;urp;\"";
// Process pro = Runtime.getRuntime().exec(command);
// connect to an OpenOffice.org instance running on port 8100
OpenOfficeConnection connection = new SocketOpenOfficeConnection(
"127.0.0.1", 8100);
connection.connect();
// convert
DocumentConverter converter = new OpenOfficeDocumentConverter(
connection);
converter.convert(inputFile, outputFile);
// close the connection
connection.disconnect();
// 关闭OpenOffice服务的进程
// pro.destroy();
/OfficeExcelPrj/src/apkg/ppt2txt.java
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
public class ppt2txt {
@SuppressWarnings("all")
public static void main(String[] args) throws IOException, UnsupportedFlavorException {
// System.out.println( clipboard.getContents(DataFlavor.stringFlavor));
String f = "d:\\夯实硬实力、迎接腾飞——打造思维发展课堂-简化.pptx";
f = (String) ( (Transferable) Toolkit.getDefaultToolkit().getSystemClipboard().getContents(null)).getTransferData(DataFlavor.stringFlavor);
System.out.println(f);
System.out.println(readDoc1_2007fmt(new File(f)));
}
private static String readDoc1_2007fmt(File file) throws FileNotFoundException, IOException {
// 根据xml格式的文件得到一个ppt素材
XSLFPowerPointExtractor ppt = new XSLFPowerPointExtractor(new XMLSlideShow(new FileInputStream(file)));
return ppt.getText();
}
public static String readDoc1(InputStream is) throws IOException {
PowerPointExtractor extractor = new PowerPointExtractor(is);
return extractor.getText();
}