java通过url在线预览Word、excel、ppt、pdf、txt文档中的内容【只获得其中的文字】
在页面上显示各种文档中的内容。在servlet中的逻辑
word:
1 BufferedInputStream bis = null; 2 URL url = null; 3 HttpURLConnection httpUrl = null; // 建立链接 4 url = new URL(urlReal); 5 httpUrl = (HttpURLConnection) url.openConnection();// 连接指定的资源 6 httpUrl.connect();// 获取网络输入流 7 bis = new BufferedInputStream(httpUrl.getInputStream()); 8 String bodyText = null; 9 WordExtractor ex = new WordExtractor(bis); 10 bodyText = ex.getText(); 11 response.getWriter().write(bodyText);
excel:
1 BufferedInputStream bis = null; 2 URL url = null; 3 HttpURLConnection httpUrl = null; // 建立链接 4 url = new URL(urlReal); 5 httpUrl = (HttpURLConnection) url.openConnection();// 连接指定的资源 6 httpUrl.connect();// 获取网络输入流 7 bis = new BufferedInputStream(httpUrl.getInputStream()); 8 content = new StringBuffer(); 9 HSSFWorkbook workbook = new HSSFWorkbook(bis); 10 for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) { 11 HSSFSheet aSheet = workbook.getSheetAt(numSheets);// 获得一个sheet 12 content.append("/n"); 13 if (null == aSheet) { 14 continue; 15 } 16 for (int rowNum = 0; rowNum <= aSheet.getLastRowNum(); rowNum++) { 17 content.append("/n"); 18 HSSFRow aRow = aSheet.getRow(rowNum); 19 if (null == aRow) { 20 continue; 21 } 22 for (short cellNum = 0; cellNum <= aRow.getLastCellNum(); cellNum++) { 23 HSSFCell aCell = aRow.getCell(cellNum); 24 if (null == aCell) { 25 continue; 26 } 27 if (aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) { 28 content.append(aCell.getRichStringCellValue() 29 .getString()); 30 } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) { 31 boolean b = HSSFDateUtil.isCellDateFormatted(aCell); 32 if (b) { 33 Date date = aCell.getDateCellValue(); 34 SimpleDateFormat df = new SimpleDateFormat( 35 "yyyy-MM-dd"); 36 content.append(df.format(date)); 37 } 38 } 39 } 40 } 41 } 42 response.getWriter().write(content.toString());
ppt:
1 BufferedInputStream bis = null; 2 URL url = null; 3 HttpURLConnection httpUrl = null; // 建立链接 4 url = new URL(urlReal); 5 httpUrl = (HttpURLConnection) url.openConnection();// 连接指定的资源 6 httpUrl.connect();// 获取网络输入流 7 bis = new BufferedInputStream(httpUrl.getInputStream()); 8 StringBuffer content = new StringBuffer(""); 9 SlideShow ss = new SlideShow(new HSLFSlideShow(bis)); 10 Slide[] slides = ss.getSlides(); 11 for (int i = 0; i < slides.length; i++) { 12 TextRun[] t = slides[i].getTextRuns(); 13 for (int j = 0; j < t.length; j++) { 14 content.append(t[j].getText()); 15 } 16 content.append(slides[i].getTitle()); 17 } 18 response.getWriter().write(content.toString());
pdf:
1 BufferedInputStream bis = null; 2 URL url = null; 3 HttpURLConnection httpUrl = null; // 建立链接 4 url = new URL(urlReal); 5 httpUrl = (HttpURLConnection) url.openConnection();// 连接指定的资源 6 httpUrl.connect();// 获取网络输入流 7 bis = new BufferedInputStream(httpUrl.getInputStream()); 8 PDDocument pdfdocument = null; 9 PDFParser parser = new PDFParser(bis); 10 parser.parse(); 11 pdfdocument = parser.getPDDocument(); 12 ByteArrayOutputStream out = new ByteArrayOutputStream(); 13 OutputStreamWriter writer = new OutputStreamWriter(out); 14 PDFTextStripper stripper = new PDFTextStripper(); 15 stripper.writeText(pdfdocument.getDocument(), writer); 16 writer.close(); 17 byte[] contents = out.toByteArray(); 18 String ts = new String(contents); 19 response.getWriter().write(ts);
txt:
1 BufferedReader bis = null; 2 URL url = null; 3 HttpURLConnection httpUrl = null; // 建立链接 4 url = new URL(urlReal); 5 httpUrl = (HttpURLConnection) url.openConnection();// 连接指定的资源 6 httpUrl.connect();// 获取网络输入流 7 bis = new BufferedReader( new InputStreamReader(httpUrl.getInputStream())); 8 StringBuffer buf=new StringBuffer(); 9 String temp; 10 while ((temp = bis.readLine()) != null) { 11 buf.append(temp); 12 response.getWriter().write(temp); 13 if(buf.length()>=1000){ 14 break; 15 } 16 } 17 bis.close();