import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; public class ReadExcelUtil { /** * * @param inputStream 文件 * @param fileName 文件名(full name) * @param sheetNum 表單序號(不寫表示全部讀取,不建議這樣做) * @param flag 建議選擇false(true表示按單元格格式進行讀取數據,false表示所有按String格式讀取) * @param r 開始讀取的行號 * @param c 開始讀取的列號 * @return Map<String, List<List<Object>>> * String: sheet 的序号 * List<Object>: 每一行中每一个格子的值组成的数组,为有序 ArrayList * List<List<Object>>: 以每一行所有格子的值组成的List为元素,组成新的List,为有序 ArrayList */ public Map<String, List<List<Object>>> readExcel(InputStream inputStream, String fileName, int sheetNum, boolean flag, int r, int c) { Workbook wb = getExcelKind(inputStream, fileName); Map<String, List<List<Object>>> map = readSheet(wb, sheetNum, flag, r, c); return map; }
public Map<String, List<List<Object>>> readExcel(InputStream inputStream, String fileName, boolean flag, int r, int c) { Workbook wb = getExcelKind(inputStream, fileName); Map<String, List<List<Object>>> map = readSheet(wb, flag, r, c); return map; } /** * 选择sheet(工作表格区间)读取表格,可遍历 * * @param wb * @param sheetNum * @param flag * @param r * @param c * @return */ public Map<String, List<List<Object>>> readSheet(Workbook wb, int sheetNum, boolean flag, int r, int c) { Map<String, List<List<Object>>> map = new HashMap<String, List<List<Object>>>(); int sn = 0; for (Sheet sheet : wb) { if (sn >= sheetNum) { break; } List<List<Object>> list = readRow(sheet, flag, r, c); map.put(wb.getSheetName(sn), list); sn++; } return map; } public Map<String, List<List<Object>>> readSheet(Workbook wb, boolean flag, int r, int c) { Map<String, List<List<Object>>> map = new HashMap<String, List<List<Object>>>(); int sn = 0; for (Sheet sheet : wb) { List<List<Object>> list = readRow(sheet, flag, r, c);
// 以表单的名字为key存储 map.put(wb.getSheetName(sn), list); sn++; } return map; } /** * 按 行、列 遍历式 读取表格 * * @param sheet * @param flag * @param r * @param c * @return List<List<Object>> */ public List<List<Object>> readRow(Sheet sheet, boolean flag, int r, int c) { List<List<Object>> list = new ArrayList<List<Object>>(); for (Row row : sheet) { List<Object> olist = new ArrayList<Object>(); for (Cell cell : row) { int rowNum = row.getRowNum(); int columnIndex = cell.getColumnIndex(); if (rowNum >= r && columnIndex >= c) { Object obj = readCell(cell, flag); olist.add(obj); } } list.add(olist); } return list; } /** * 按 格子 读取表格 * 这个方法是根据官方的文档改编的,大致跟官方一样,可是总感觉哪里不舒服 * @param cell * @param flag * @return */ public Object readCell(Cell cell, boolean flag) { Object obj = null; if (flag) { switch (cell.getCellType()) { case Cell.CELL_TYPE_NUMERIC: if (DateUtil.isCellDateFormatted(cell)) { obj = cell.getDateCellValue(); } else obj = cell.getNumericCellValue(); break; case Cell.CELL_TYPE_STRING: obj = cell.getRichStringCellValue().getString(); break; case Cell.CELL_TYPE_FORMULA: obj = cell.getCellFormula(); break; case Cell.CELL_TYPE_BLANK:
// TODO 感觉这里处理的不是很好,应该有一个更好的处理行为才对 break; case Cell.CELL_TYPE_BOOLEAN: obj = cell.getBooleanCellValue(); break; case Cell.CELL_TYPE_ERROR: obj = "cell ERROR"; break; default: break; } } else obj = cell.toString(); return obj; } /** * 根据文件后缀名,创建不同的 workbook 的类型 * * @param file * @param suffix * @return workbook wb */ public Workbook getExcelKind(InputStream file, String fileName) { String suffix = fileName.substring(fileName.lastIndexOf(".") + 1); // Workbook wb = WorkbookFactory.create(file); 这是为了学习新的方法,才将这句注释掉的。否则,只需要这一句,下面的基本全都可以省略 Workbook wb = null; if (suffix != null && suffix.equals("xlsx")) { // 07+ try { OPCPackage pkg = OPCPackage.open(file); wb = new XSSFWorkbook(pkg); pkg.close(); } catch (InvalidFormatException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } else if (suffix != null && suffix.equals("xls")) { // 03- try { NPOIFSFileSystem fs = new NPOIFSFileSystem(file); wb = new HSSFWorkbook(fs.getRoot(), true); fs.close(); } catch (IOException e) { e.printStackTrace(); } } else if (suffix == null) { return null; } else { System.out.println("没找到对应的后缀名,检查后再试试呗。"); return null; } return wb; } }
只是一个小小的例子,本想着能把这个方法做成普适的,可惜总有几个牛角尖钻不透,智商着急啊。
=====================================第二版=====================================
1 import com.google.common.collect.Lists; 2 import com.vastio.exception.PlatformException; 3 import org.apache.poi.openxml4j.exceptions.InvalidFormatException; 4 import org.apache.poi.ss.usermodel.Cell; 5 import org.apache.poi.ss.usermodel.Sheet; 6 import org.apache.poi.ss.usermodel.Workbook; 7 import org.apache.poi.ss.usermodel.WorkbookFactory; 8 import org.slf4j.Logger; 9 import org.slf4j.LoggerFactory; 10 11 import java.io.FileInputStream; 12 import java.io.IOException; 13 import java.util.List; 14 15 public class ReadExcelFile { 16 17 private static final Logger LOGGER = LoggerFactory.getLogger(ReadExcelFile.class.getName()); 18 19 public static void main(String[] args) { 20 String path = "C:\test.xls"; 21 System.out.println(readExcel(path)); 22 } 23 24 /** 25 * 读取Excel文件并将数据放到list中 26 * 27 * @param path 文件的绝对路径 28 * @return list 29 */ 30 public static List<List<List<String>>> readExcel(String path) { 31 List<List<List<String>>> sheetList = Lists.newArrayList(); 32 try { 33 Workbook workbook = WorkbookFactory.create(new FileInputStream(path)); 34 int sheetNum = workbook.getNumberOfSheets(); // sheet数目 35 List<List<String>> rowList = Lists.newArrayList(); 36 for (int i = 0; i < sheetNum; i++) { 37 Sheet sheet = workbook.getSheetAt(i); 38 int rowNum = sheet.getPhysicalNumberOfRows(); // 总行数 39 for (int r = 0; r < rowNum; r++) { 40 int cellNum = sheet.getRow(r).getPhysicalNumberOfCells(); // 每一行包含的总格子数 41 List<String> cellList = Lists.newArrayList(); 42 for (short c = 0; c < cellNum; c++) { 43 Cell cell = sheet.getRow(r).getCell(c); 44 String value; 45 if (cell == null) continue; 46 switch (cell.getCellTypeEnum()) { 47 case FORMULA: // 公式 48 value = String.valueOf(cell.getCellFormula()); 49 cellList.add(value); 50 break; 51 case NUMERIC: // 数值 52 value = String.valueOf(cell.getNumericCellValue()); 53 cellList.add(value); 54 break; 55 case STRING: // 字符串 56 value = String.valueOf(cell.getStringCellValue()); 57 cellList.add(value); 58 break; 59 case BLANK: // 空白 60 value = ""; 61 cellList.add(value); 62 break; 63 case BOOLEAN: 64 value = String.valueOf(cell.getBooleanCellValue()); 65 cellList.add(value); 66 break; 67 case ERROR: 68 value = String.valueOf(cell.getErrorCellValue()); 69 cellList.add(value); 70 break; 71 case _NONE: 72 value = "error at : " + cell.getAddress(); 73 throw new PlatformException(value); // 自定义的异常类 74 default: 75 break; 76 } 77 } 78 rowList.add(cellList); 79 } 80 sheetList.add(rowList); 81 } 82 } catch (IOException e) { 83 LOGGER.debug(e.getMessage()); 84 } catch (InvalidFormatException e) { 85 LOGGER.debug(e.getMessage()); 86 } catch (PlatformException e) { 87 LOGGER.debug(e.getMessage()); 88 } 89 return sheetList; 90 } 91 }
这个是简易版,自定义的成分居多。
需要导入的包有
'org.apache.poi:poi:3.15',
'org.apache.poi:poi-ooxml:3.15',
'com.github.virtuald:curvesapi:1.04',
'commons-codec:commons-codec:1.10',
'org.apache.poi:poi-ooxml-schemas:3.15',
'org.apache.commons:commons-collections4:4.1'
构建工具为gradle。