• poi提取docx中的文字和图片


    package com.fry.poiDemo.dao;
    
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.PrintStream;
    import java.util.List;
    
    import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
    import org.apache.poi.xwpf.usermodel.XWPFDocument;
    import org.apache.poi.xwpf.usermodel.XWPFPictureData;
    
    public class Word {
        // maven太好用了
        // 读取srcFile源word文件docx文字
        // 读取srcFile源word文件docx中的image图片并且存放在文件夹imageFile中
        public String readDocxImage(String srcFile, String imageFile) {
            String path = srcFile;
            File file = new File(path);
            try {
                // 用XWPFWordExtractor来获取文字
                FileInputStream fis = new FileInputStream(file);
                XWPFDocument document = new XWPFDocument(fis);
                XWPFWordExtractor xwpfWordExtractor = new XWPFWordExtractor(document);
                String text = xwpfWordExtractor.getText();
    //            System.out.println(text);
                //将获取到的文字存放到对应文件名中的txt文件中
                String temp[]=srcFile.split("\/");
                String temp1=temp[temp.length-1];
                String temp3[]=temp1.split("\.");
                String txtFileName="myRes//txt//"+temp3[0]+".txt";
                PrintStream ps = new PrintStream(txtFileName);
                ps.println(text);
                
    
                // 用XWPFDocument的getAllPictures来获取所有的图片
                List<XWPFPictureData> picList = document.getAllPictures();
                for (XWPFPictureData pic : picList) {
    //                System.out.println(pic.getPictureType() + file.separator + pic.suggestFileExtension() + file.separator
    //                        + pic.getFileName());
                    byte[] bytev = pic.getData();
    //                System.out.println(bytev.length);
                    // 大于1000bites的图片我们才弄下来,消除word中莫名的小图片的影响
                    if (bytev.length > 300) {
                        FileOutputStream fos = new FileOutputStream(imageFile + pic.getFileName());
                        fos.write(bytev);
                    }
                }
                fis.close();
                return text;
            } catch (IOException e) {
                e.printStackTrace();
            }
            return null;
        }
    }
    
    <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-ooxml</artifactId>
        <version>3.9</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi</artifactId>
        <version>3.9</version>
    </dependency>
    

      

      

  • 相关阅读:
    Crontab '2>&1 &' 含义
    form提交方式Methor
    oracle基本术语
    在工作中常用的sql语句
    常用的删除大数据方法(游标+分段)
    oracle9i、10g、11g区别
    SSH面试总结(Hibernage面试)
    实习生招聘笔试
    TopCoder上一道600分滴题
    Oracle数据库面试题汇总
  • 原文地址:https://www.cnblogs.com/qinyios/p/11121552.html
Copyright © 2020-2023  润新知