• POI读取Word与Excel


    import java.io.BufferedWriter;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.File;
    import java.io.OutputStreamWriter;
    import java.util.HashSet;
    
    import org.apache.poi.hslf.HSLFSlideShow;
    import org.apache.poi.hslf.model.Slide;
    import org.apache.poi.hslf.model.TextRun;
    import org.apache.poi.hslf.usermodel.RichTextRun;
    import org.apache.poi.hslf.usermodel.SlideShow;
    import org.apache.poi.hwpf.HWPFDocument;
    import org.apache.poi.hwpf.extractor.WordExtractor;
    import org.apache.poi.hwpf.usermodel.CharacterRun;
    import org.apache.poi.hwpf.usermodel.Paragraph;
    import org.apache.poi.hwpf.usermodel.Range;
    import org.apache.poi.hwpf.usermodel.Section;
    import org.apache.poi.xslf.usermodel.XMLSlideShow;
    import org.apache.poi.xslf.usermodel.XSLFShape;
    import org.apache.poi.xslf.usermodel.XSLFSlide;
    import org.apache.poi.xslf.usermodel.XSLFTable;
    import org.apache.poi.xslf.usermodel.XSLFTableCell;
    import org.apache.poi.xslf.usermodel.XSLFTableRow;
    import org.apache.poi.xslf.usermodel.XSLFTextParagraph;
    import org.apache.poi.xslf.usermodel.XSLFTextRun;
    import org.apache.poi.xslf.usermodel.XSLFTextShape;
    public class read {
    
        private static InputStream is =null;
    
    
        public static void readFiles(String path){
            File f = new File(path);
            File[] files = f.listFiles();
            for(File everyfile : files){
                StringBuilder sb = new StringBuilder();
                if(everyfile.isDirectory())    continue;
                String Filename = everyfile.getName();
                if(Filename.startsWith("~")) continue;
                if(!Filename.endsWith("doc")) continue;
                System.out.println(Filename);
                sb.append("###@@@").append(Filename.substring(0,Filename.lastIndexOf("."))).append("
    ");
                sb.append("----------------------").append("
    ");
                try {
                    //输入文件流
                    is = new FileInputStream(everyfile);
                    if(Filename.toLowerCase().endsWith("ppt")||Filename.toLowerCase().endsWith("pptm")){
                        try {
                            XMLSlideShow pptx = new XMLSlideShow(is);
                            is.close();
                            for(int x= 0 ; x< pptx.getSlides().length ; x++){
                                XSLFSlide slide = pptx.getSlides()[x];
                                if (slide.getShapes().length == 0) continue;
                                String title = getTitle(slide);
                                if(title != null) sb.append(title).append("	").append("title##@@").append("
    ");
                                for(XSLFShape shape : slide){
                                    if(shape instanceof XSLFTextShape){
                                        XSLFTextShape content = (XSLFTextShape)shape;
                                        for( XSLFTextParagraph ttp: content.getTextParagraphs()){    
                                            
                                            if(ttp.getText().equals(title)) continue;
                                            //用一个set统计到底有多少字体大小,如果只有一种字体大小,则直接添加paragraph
                                            HashSet<Float> sizeset = new HashSet<Float>();
                                            for(XSLFTextRun tr : ttp.getTextRuns()){
                                                if (tr.getText().trim().equals("")) continue;
                                                if(tr.getText().trim().equals(title)) continue;
                                                float size = (float) tr.getFontSize();
                                                sizeset.add(size);
                                            }
                                            if(sizeset.size()!=1){
                                                for(XSLFTextRun tr : ttp.getTextRuns()){
                                                    if (tr.getText().trim().equals("")) continue;
                                                    if(tr.getText().trim().equals(title)) continue;
                                                    String text = tr.getText();
                                                    float size = (float) tr.getFontSize();
                                                    sb.append(text.trim()).append("	").append(size).append("##@@").append("
    ");
                                                }
                                            }else{
                                                sb.append(ttp.getText().trim().replaceAll("[\n\r]", " ")).append("	").append((float)sizeset.toArray()[0]).append("##@@").append("
    ");
                                                    
                                            }
                                        }
                                    }else if(shape instanceof XSLFTable){
                                        XSLFTable txShape = (XSLFTable)shape;
                                        for(XSLFTableRow row : txShape.getRows()){
                                            for(XSLFTableCell cell: row.getCells()){
                                                XSLFTextShape content = (XSLFTextShape)cell;
                                                for( XSLFTextParagraph ttp: content.getTextParagraphs()){
                                                    if(ttp.getText().equals(title)) continue;
                                                    //用一个set统计到底有多少字体大小,如果只有一种字体大小,则直接添加paragraph
    
                                                    
                                                    HashSet<Float> sizeset = new HashSet<Float>();
                                                    for(XSLFTextRun tr : ttp.getTextRuns()){
                                                        if (tr.getText().trim().equals("")) continue;
                                                        if(tr.getText().trim().equals(title)) continue;
                                                        float size = (float) tr.getFontSize();
                                                        sizeset.add(size);
                                                    }
                                                    if(sizeset.size()!=1){
                                                        for(XSLFTextRun tr : ttp.getTextRuns()){
                                                            if (tr.getText().trim().equals("")) continue;
                                                            if(tr.getText().trim().equals(title)) continue;
                                                            String text = tr.getText();
                                                            float size = (float) tr.getFontSize();
                                                            sb.append(text.trim()).append("	").append(size).append("##@@").append("
    ");
                                                        }
                                                    }else{
                                                        sb.append(ttp.getText().trim().replaceAll("[\n\r]", " ")).append("	").append((float)sizeset.toArray()[0]).append("##@@").append("
    ");
                                                            
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                                if(x!=pptx.getSlides().length-1) sb.append("----------------------").append("
    ");
                            }
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                        
                    }else if(Filename.endsWith("ppt")){
                        
                        try {
                            SlideShow ss = new SlideShow(new HSLFSlideShow(is));
                            
                            is.close();
                            for(int x = 0 ; x < ss.getSlides().length ; x ++){
                                Slide slide = ss.getSlides()[x];
                                if (slide.getShapes().length ==0) continue;
                                String title = getTitle(slide);
                                if(title != null) sb.append(title).append("	").append("title##@@").append("
    ");
                            
                                for(TextRun tr : slide.getTextRuns()){
                                    
                                    HashSet<Float> sizeset = new HashSet<Float>();
                                    
                                    for(RichTextRun rtr : tr.getRichTextRuns()){
                                        if (rtr.getText().trim().equals("")|| rtr.getText() ==null) continue;
                                        if(rtr.getText().trim().equals(title)) continue;
                                        sizeset.add((float)rtr.getFontSize());
                                    }
                                    if(sizeset.size()!=1){
                                        for(RichTextRun rtr : tr.getRichTextRuns()){
                                            if (rtr.getText().trim().equals("") || rtr.getText() ==null) continue;
                                            if(rtr.getText().trim().equals(title)) continue;
                                            String text = rtr.getText();
                                            float size = (float) rtr.getFontSize();
                                            sb.append(text.trim()).append("	").append(size).append("##@@").append("
    ");
                                        }
                                    }else {
                                        for(RichTextRun rtr : tr.getRichTextRuns()){
                                            if (rtr.getText().trim().equals("")|| rtr.getText() ==null) continue;
                                            if(rtr.getText().trim().equals(title)) continue;
                                            sb.append(rtr.getText().trim()).append(" ");
                                        }
                                        sb.append("	").append((float)sizeset.toArray()[0]).append("##@@").append("
    ");
                                    }
                                }
                                if(x!=ss.getSlides().length-1) sb.append("----------------------").append("
    ");
                            }
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }else if(Filename.endsWith("doc")){
                        try {
                            HWPFDocument hwpf = new HWPFDocument(is);
                            Range range = hwpf.getRange();
                            for (int x = 0; x < range.numSections(); x++) {
                                   Section s = range.getSection(x);
                                   for (int y = 0; y < s.numParagraphs(); y++) {
                                          Paragraph p = s.getParagraph(y);
                                          for (int z = 0; z < p.numCharacterRuns(); z++) {
                                                 CharacterRun run = p.getCharacterRun(z);
                                                 //字符串文本                                       
                                                 String text = run.text().trim();
                                                 if(text ==null ||text == " "|| text=="")  continue;
                                                 sb.append(text.trim()).append("	").append(run.getFontSize()).append("##@@").append("
    ");
                                          }
                                   }
                                   if (x != range.numSections()-1)  sb.append("----------------------").append("
    ");    
                            }
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                        
                    }
            } catch (FileNotFoundException e) {
                    e.printStackTrace();
            }
                write(sb.toString());
            }
        }
        
        
        public static String getTitle(XSLFSlide slide){
            String title = null;
            if (slide.getTitle() != null && 
                    !slide.getTitle().trim().equals("")){ 
                title= slide.getTitle().trim();
            }
    
            return title;
    
        }
        
        public static String getTitle(Slide slide){
            String title = null;
            if (slide.getTitle() != null && 
                    !slide.getTitle().trim().equals("")){ 
                title= slide.getTitle().trim();
            }
            return title;
        }
        static FileOutputStream fos =null;
        static OutputStreamWriter osw =null;
        static BufferedWriter bw =null;
        public static void write(String content){
            File f = new File("ressss.csv");
            try {
                fos = new FileOutputStream(f,true);
                osw = new OutputStreamWriter(fos,"utf-8");
                bw  = new BufferedWriter(osw);
                bw.write(content);
                bw.flush();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }finally{
    
                if(bw !=null){
                    try {
                        bw.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    bw =null;
                }
                if(osw !=null){
                    try {
                        osw.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    osw =null;
                }
                if(fos !=null){
                    try {
                        fos.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    fos =null;
                }
            }
        }
            
        
        
        
        public static void main(String[] args) throws Exception {
            readFiles("C:\Users\ooon\Desktop\DKM_data\DKM_data");
        }
    }
  • 相关阅读:
    搭建MHA问题汇总
    NOIP2009 靶形数独
    get_mysql_conn_info.py
    NOIP 2005 篝火晚会
    MySQL启动关闭添加到 /etc/init.d/mysqld
    noip2002 矩形覆盖
    get_slave_status.py
    [JSOI2008]魔兽地图
    MySQL数据导出导入任务脚本
    8.30 牛客OI赛制测试赛1 F题 子序列
  • 原文地址:https://www.cnblogs.com/ooon/p/4828007.html
Copyright © 2020-2023  润新知