• 【Java解析PDF并获取想要的字符串】


    public class pdfAnalysis {
        /**
         * @throws IOException
         * @param从网络上下载PDF,截取PDF字符串,
         */
    
        public static void main(String[] args) throws IOException {
            // 下载的连接 下载下来的名字 下载下来的路径
            // pdfAnalysis.downLoadByUrl("", "KK.pdf", "F:/");
            // 读取文件
            pdfAnalysis pdf = new pdfAnalysis();
    
            // 读取文件
            String pdfName = "F:\CC.pdf";
            // 解析PDF里的值 存入变量pdf_Body
            String pdf_Body = pdf.readFileOfPDF(pdfName);
            //System.out.println(pdf_Body);
    
            
    
           /* String str = pdf_Body.substring(pdf_Body.indexOf("Arrival"), pdf_Body.indexOf("Payment Details"));
            String str1 = str.substring(str.indexOf("H ("));
            String [] pp ={"Monday","Tuesday","Wednesday","Thursday","Friday","Saturday" ,"Sunday" };
    
            for(String sto:pp){
                if(str1.contains(sto)){
                    String result = str1.substring(str1.indexOf(sto));
                    //System.out.println(result);
                    //System.out.println(result.length());
                    String result2 = result.substring(0,result.indexOf(","));
                    String result3 = result2.trim();
                    System.out.println("我要的时间:"+result3+"我是"+pdfName+"文件");
                }
    
            }*/
    
    
    
            /*if(str1.contains("Monday")||str1.contains("Tuesday")||
                    str1.contains("Wednesday")||str1.contains("Thursday")||
                    str1.contains("Friday")||str1.contains("Saturday")||str1.contains("Sunday")){
    
    
    
            }*/
            // System.out.println(str1);
    
    
            // 取出人名值
            String name_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive"), pdf_Body.indexOf("passenger details"));
            // System.out.println(str);
            String name_Temp1 = null;
            String result_name = null;
            List<String> list_Name = new ArrayList<>();
            for (int i = 1; i < name_Temp.length(); i++) {
    
                if (name_Temp.contains(i + ".")) {
                    name_Temp1 = name_Temp.substring(name_Temp.indexOf(i + "."));
    
                    result_name = name_Temp1.substring(name_Temp1.indexOf(i + ".") + 3,
                            name_Temp1.indexOf("Seat Number Services"));
                    list_Name.add(result_name);
                }
                // System.out.println(add);
                // System.out.println(str2);
                if (name_Temp1.equals("null")) {
                    continue;
                }
            }
            for (String i : list_Name) {
                System.out.println("所有的人名:" + i);
            }*/
    
    
            if (pdfAnalysis.infile != null) {
                pdfAnalysis.infile.close();
                System.out.println("我要准备关闭PDF文档了");
            }
    
        }
    
        public static int appearNumber(String srcText, String temp) {
            int count = 0;
            Pattern p = Pattern.compile(temp);
            Matcher m = p.matcher(srcText);
            while (m.find()) {
                count++;
            }
            return count;
        }
        public static FileInputStream infile = null;
    
        public String readFileOfPDF(String pdfName) throws IOException {
            String context = null;
            File file = new File(pdfName);// 创建一个文件对象
    
    
            try {
                infile = new FileInputStream(pdfName);// 创建一个文件输入流
                // 新建一个PDF解析器对象
                PDFParser parser = new PDFParser(infile);
                // 对PDF文件进行解析
                parser.parse();
                // 获取解析后得到的PDF文档对象
                PDDocument pdfdocument = parser.getPDDocument();
                // 新建一个PDF文本剥离器
                PDFTextStripper stripper = new PDFTextStripper();
                // 从PDF文档对象中剥离文本
                context = stripper.getText(pdfdocument);
                System.out.println("PDF文件" + file.getAbsolutePath() + "的文本内容如下:");
                // System.out.println(context);
    
            } catch (Exception e) {
                System.out.println("读取PDF文件" + file.getAbsolutePath() + "失败!" + e.getMessage());
            } finally {
    
                if (infile != null) {
                    try {
                        infile.close();
                    } catch (IOException e1) {
                    }
                }
            }
            return context;
        }
    
  • 相关阅读:
    剑指OFFER之包含min函数的栈
    剑指OFFER之二叉树的镜像
    关于【最长递增子序列(LIS)】
    题目1113:二叉树
    剑指OFFER之字符串的排列
    题目1120:全排列
    题目1460:Oil Deposit
    题目1459:Prime ring problem
    剑指OFFER之二叉树中和为某一值的路径
    python 线程、进程
  • 原文地址:https://www.cnblogs.com/iitxt/p/8984131.html
Copyright © 2020-2023  润新知