• NPOI操作EXCEL(三)——反射机制进行excel表格数据的解析


    我们先来回忆回忆上篇文章讲到的通过xml配置文件实现excel批量模板解析的整体思路:

    1.对每个excel模板制定xml配置规则集,实现xml配置文件的解析服务

    2.为每个excel模板制定DTO,继承于一个BaseDTO

    3.实现两个工厂方法,一是获取某excel模板xml配置文件路径;一是获取某excel模板DTO对象

    4.EXCEL工具类对外暴露两个接口,一是数据验证接口,返回整个excel所有错误单元格信息;一是数据解析接口,读取单元格数据到DTO。通过构造方法传入配置文件

    5.整体流程:用户上传excel文件,首先存储到临时文件夹,其次获取xml配置文件路径,获取DTO对象,然后初始化excel工具类,再验证excel单元格数据,解析数据返回DTO,最后调用入库服务实现DTO数据入库

     

    接下来我们就来看看excel工具类的具体实现代码

     1     public class ExcelImportService : ExcelAnalyzeService, IExcelImportService
     2     {
     3         private string _filePath;
     4         private string _xmlPath;
     5         private Dictionary<int, int> _rowCount = new Dictionary<int, int>();
     6         private List<Regular> _list;// 规则集
     7 
     8         /// <summary>
     9         /// 构造方法
    10         /// </summary>
    11         /// <param name="filePath">excel文件路径</param>
    12         /// <param name="xmlPath">配置文件路径</param>
    13         public ExcelImportService(string filePath, string xmlPath)
    14         {
    15             _filePath = filePath;
    16             _xmlPath = xmlPath;
    17             _list = this.GetXMLInfo(_xmlPath);
    18         }
    19 
    // excel所有单元格数据验证 20 public UploadExcelFileResult ValidateExcel() 21 { 22 var result = new UploadExcelFileResult(); 23 result.Success = true; 24 25 _rowCount = new Dictionary<int, int>(); 26 27 Stream fileStream = new FileStream(_filePath, FileMode.Open); 28 int edition = this.GetExcelEdition(_filePath); 29 if (edition != 0) 30 { 31 IWorkbook workbook = this.CreateWorkBook(edition, fileStream); 32 int sheetCount = _list.Find(e => e.HeaderRegular != null).HeaderRegular["sheetCount"]; 33 34 for (int i = 0; i < sheetCount; i++) 35 { 36 ISheet sheet = workbook.GetSheetAt(i); 37 Dictionary<int, string> dict = this.GetExcelHeaders(sheet, ref result, _list); 38 if (result.Success) 39 { 40 _rowCount.Add(i, sheet.LastRowNum); 41 result = this.CheckExcelDatasEnableNull(sheet, _list, dict, _rowCount[i]); 42 } 43 else 44 { 45 break; 46 } 47 } 48 } 49 else 50 { 51 result.Success = false; 52 result.Message = "文件类型错误!"; 53 } 54 55 fileStream.Close(); 56 return result; 57 } 58
    // 解析excel数据到DTO 59 public List<TableDTO> Import<TableDTO>() 60 { 61 var uploadExcelFileResult = new UploadExcelFileResult(); 62 var resultList = new List<TableDTO>(); 63 64 Stream fileStream = new FileStream(_filePath, FileMode.Open); 65 int edition = this.GetExcelEdition(_filePath); 66 IWorkbook workbook = this.CreateWorkBook(edition, fileStream); 67 int sheetCount = _list.Find(e => e.HeaderRegular != null).HeaderRegular["sheetCount"]; 68 69 for (int i = 0; i < sheetCount; i++) 70 { 71 ISheet sheet = workbook.GetSheetAt(i); 72 string sheetName = sheet.SheetName; 73 Dictionary<int, string> dict = this.GetExcelHeaders(sheet, ref uploadExcelFileResult, _list); 74 var sheetLists = this.GetExcelDatas<TableDTO>(sheet, sheetName, _list, dict, _rowCount[i]); 75 resultList.AddRange(sheetLists); 76 } 77 78 fileStream.Close(); 79 return resultList; 80 } 81 }

    1.我们看到17行用到了GetXMLInfo()方法,就是第二篇文章中说到的XML文件解析方法,返回该excel的规则集

    2.第28行GetExcelEdition()方法,是基础解析接口IExcelAnalyzeService的方法,验证返回excel版本

     1         public int GetExcelEdition(string fileName)
     2         {
     3             var edition = 0;
     4             string[] items = fileName.Split(new char[] { '.' });
     5             int count = items.Length;
     6             switch (items[count - 1])
     7             {
     8                 case "xls":
     9                     edition = 3;
    10                     break;
    11                 case "xlsx":
    12                     edition = 7;
    13                     break;
    14                 default:
    15                     break;
    16             }
    17 
    18             return edition;
    19         }

    3.第31行CreateWorkBook()方法,是基础解析接口IExcelAnalyzeService的方法,返回excel工作簿对象

     1         public IWorkbook CreateWorkBook(int edition, Stream  excelFileStream)
     2         {
     3             switch (edition)
     4             {
     5                 case 7:
     6                     return new XSSFWorkbook(excelFileStream);
     7                 case 3:
     8                     return new HSSFWorkbook(excelFileStream);
     9                 default:
    10                     return null;
    11             }
    12         }

    4.第32行是读取配置文件中excel中sheet个数(例如员工模板:我们支持一个excel文件多个sheet表单,可以是每个表单代表一个地区等等)

    5.第37行中GetExcelHeaders()方法,是基础解析接口IExcelAnalyzeService的方法,验证返回excel表头数据

     1         public Dictionary<int, string> GetExcelHeaders(ISheet sheet, ref UploadExcelFileResult uploadExcelFileResult,
     2             List<Regular> list)
     3         {
     4             int firstHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["firstHeaderRow"];
     5             int lastHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["lastHeaderRow"];
     6 
     7             var dict = new Dictionary<int, string>();
     8 
     9             try
    10             {
    11                 // 循环获得表头
    12                 for (int i = firstHeaderRowIndex - 1; i < lastHeaderRowIndex; i++)
    13                 {
    14                     IRow headerRow = sheet.GetRow(i);
    15                     int cellCount = headerRow.LastCellNum;
    16 
    17                     for (int j = headerRow.FirstCellNum; j < cellCount; j++)
    18                     {
    19                         if (!string.IsNullOrEmpty(headerRow.GetCell(j).StringCellValue.Trim()))
    20                         {
    21                             // 根据 键-值 是否已存在做不同处理
    //TODO 代码待重构!!!
    22 try 23 { 24 string oldValue = dict[j]; 25 dict.Remove(j); 26 dict.Add(j, oldValue + headerRow.GetCell(j).StringCellValue.Trim()); 27 } 28 catch (Exception) 29 { 30 dict.Add(j, headerRow.GetCell(j).StringCellValue.Trim()); 31 } 32 } 33 } 34 } 35 // 遍历表头字典,消除空格 36 for (int i = 0; i < dict.Count; i++) 37 { 38 var value = dict[i]; 39 this.ReplaceSpace(ref value); 40 dict[i] = value; 41 } 42 // 检查表头模板是否被修改 43 for (int count = 0; count < dict.Count; count++) 44 { 45 Regular header = list.Find(h => h.HeaderText == dict[count]); 46 47 if (header == null) 48 { 49 uploadExcelFileResult.Success = false; 50 uploadExcelFileResult.Message = "读取EXCEL表头模板时发生错误,可能造成原因是:EXCEL模板被修改!请下载最新EXCEL模板!"; 51 } 52 } 53 } 54 catch (Exception e) 55 { 56 uploadExcelFileResult.Success = false; 57 uploadExcelFileResult.Message = "读取EXCEL表头模板时发生错误,可能造成原因是:EXCEL模板被修改!请下载最新EXCEL模板!"; 58 } 59 60 return dict; 61 }

    其中39行ReplaceSpace()是消除字符串中空格方法(所有半角、全角)。一直想通过正则表达式来做,但是没学到家,还没写好能够做到的正则表达式,所以写的有点复杂,若果谁有这样的正则表达式,请指点一二,感激不尽!!!

     1         // 去除空值
     2         public void ReplaceSpace(ref string cellValue)
     3         {
     4             cellValue = TruncateString(cellValue, new char[] { ' ' }, new char[] { ' ' });
     5         }
     6 
     7         // 对字符串做空格剔除处理
     8         private string TruncateString(string originalWord, char[] spiltWord1, char[] spiltWord2)
     9         {
    10             var result = "";
    11             var valueReplaceDbcCase = originalWord.Split(spiltWord1);
    12 
    13             if (valueReplaceDbcCase.Count() > 1)
    14             {
    15                 for (int i = 0; i < valueReplaceDbcCase.Count(); i++)
    16                 {
    17                     if (valueReplaceDbcCase[i] != "" && valueReplaceDbcCase[i] != " " &&
    18                         valueReplaceDbcCase[i] != " ")
    19                     {
    20                         result += TruncateString(valueReplaceDbcCase[i], spiltWord2, new char[0]);
    21                     }
    22                 }
    23             }
    24             else
    25             {
    26                 if (spiltWord2.Any())
    27                 {
    28                     result = TruncateString(originalWord, spiltWord2, new char[0]);
    29                 }
    30                 else
    31                 {
    32                     result = originalWord;
    33                 }
    34             }
    35 
    36             return result;
    37         }

    6.第41行CheckExcelDatasEnableNull()方法,是基础解析接口IExcelAnalyzeService的方法,返回excel数据验证结果

     1         public UploadExcelFileResult CheckExcelDatasEnableNull(ISheet sheet, List<Regular> list, Dictionary<int, string> dict, int rowCount)
     2         {
     3             var result = new UploadExcelFileResult();
     4             result.Success = true;
     5 
     6             // 记录单个sheet所有错误信息
     7             var sheetErrors = new List<ExcelFileErrorPosition>();
     8             // 表头结束行
     9             int lastHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["lastHeaderRow"];
    10 
    11             // 循环行数据
    12             for (int i = lastHeaderRowIndex; i <= rowCount; i++)
    13             {
    14                 // 标注该行是否出错
    15                 bool isrowfalse = false;
    16                 // 记录该行数据临时对象
    17                 var rowDatas = new List<string>();
    18                 // 记录该行错误列
    19                 var rowErrorCell = new List<int>();
    20                 // 记录该行错误列具体错误信息
    21                 var rowErrorMessages = new List<string>();
    22                 // 记录该行空值数
    23                 int nullcount = 0;
    24 
    25 
    26                 IRow dataRow = sheet.GetRow(i);
    27                 int cellCount = dict.Count;
    28 
    29                 // 循环列数据
    30                 for (int j = dataRow.FirstCellNum; j < cellCount; j++)
    31                 {
    32                     string value = "";
    33                     Regular header = list.Find(h => h.HeaderText == dict[j]);
    34                     //value = dataRow.GetCell(j).ToString();
    35                     switch (dataRow.GetCell(j).CellType)
    36                     {
    37                         case CellType.Formula:
    38                             value = dataRow.GetCell(j).StringCellValue.ToString();
    39                             break;
    40                         default:
    41                             value = dataRow.GetCell(j).ToString();
    42                             break;
    43                     }
    44 
    45                     // 记录可能出错数据
    46                     rowDatas.Add(value);
    47 
    48                     // 检查空值
    49                     if (!this.CheckNull(value, ref nullcount))
    50                     {
    51                         // 检查类型
    52                         if (!this.CheckDataType(header.DataType, value))
    53                         {
    54                             isrowfalse = true;
    55                             result.Success = false;
    56                             // 记录该行错误信息
    57                             rowErrorCell.Add(j + 1);
    58                             rowErrorMessages.Add("读取EXCEL数据时发生数据格式错误,请检查该行该列数据格式!");
    59                         }
    60                         else
    61                         {
    62                             if (header.DataType == "System.string" || header.DataType == "System.String")
    63                             {
    64                                 this.ReplaceSpace(ref value);
    65                             }
    66                         }
    67                     }
    68                 }
    69                 // 报错处理(空行不报错)
    70                 if (isrowfalse && nullcount < cellCount)
    71                 {
    72                     sheetErrors.Add(new ExcelFileErrorPosition
    73                     {
    74                         RowContent = rowDatas,
    75                         RowIndex = i + 1,
    76                         CellIndex = rowErrorCell,
    77                         ErrorMessage = rowErrorMessages
    78                     });
    79                 }
    80             }
    81             result.ExcelFileErrorPositions = sheetErrors;
    82             return result;
    83         }

    CheckNull()检查空值,是空值则nullcount++;

    7.第74行GetExcelDatas()方法,是基础解析接口IExcelAnalyzeService的方法,返回excel数据解析结果

      1         public List<TableDTO> GetExcelDatas<TableDTO>(ISheet sheet, string sheetName, List<Regular> list,
      2             Dictionary<int, string> dict, int rowCount)
      3         {
      4             // 返回数据对象集合
      5             var resultList = new List<TableDTO>();
      6             // 表头结束行
      7             int lastHeaderRowIndex = list.Find(e => e.HeaderRegular != null).HeaderRegular["lastHeaderRow"];
      8 
      9             // 循环行数据
     10             for (int i = lastHeaderRowIndex; i <= rowCount; i++)
     11             {
     12                 // 产生一个新的泛型对象
     13                 var model = Activator.CreateInstance<TableDTO>();
     14                 // 记录该行空值数
     15                 int nullcount = 0;
     16 
     17                 IRow dataRow = sheet.GetRow(i);
     18                 int cellCount = dict.Count;
     19 
     20                 if (dataRow != null)
     21                 {
     22                     // 循环列数据
     23                     for (int j = dataRow.FirstCellNum; j < cellCount; j++)
     24                     {
     25                         string value = "";
     26                         Regular header = list.Find(h => h.HeaderText == dict[j]);
     27                         PropertyInfo prop = model.GetType().GetProperty(header.PropertyName);
     28                         //value = dataRow.GetCell(j).ToString();
     29                         switch (dataRow.GetCell(j).CellType)
     30                         {
     31                             case CellType.Formula:
     32                                 value = dataRow.GetCell(j).StringCellValue.ToString();
     33                                 break;
     34                             default:
     35                                 value = dataRow.GetCell(j).ToString();
     36                                 break;
     37                         }
     38 
     39                         // 去除空值
     40                         this.ReplaceSpace(ref value);
     41 
     42                         if (value == "")
     43                         {
     44                             nullcount++;
     45                         }
     46 
     47                         // 赋值
     48                         switch (header.DataType)
     49                         {
     50                             case "System.double":
     51                                 double valueDecimal;
     52                                 if (double.TryParse(value, out valueDecimal))
     53                                 {
     54                                     prop.SetValue(model, valueDecimal, null);
     55                                 }
     56                                 break;
     57                             case "System.Int16":
     58                                 short valueInt16;
     59                                 if (Int16.TryParse(value, out valueInt16))
     60                                 {
     61                                     prop.SetValue(model, valueInt16, null);
     62                                 }
     63                                 break;
     64                             case "System.Int32":
     65                                 int valueInt32;
     66                                 if (Int32.TryParse(value, out valueInt32))
     67                                 {
     68                                     prop.SetValue(model, valueInt32, null);
     69                                 }
     70                                 break;
     71                             case "System.Boolean":
     72                                 bool valueBoolean;
     73                                 if (Boolean.TryParse(value, out valueBoolean))
     74                                 {
     75                                     prop.SetValue(model, valueBoolean, null);
     76                                 }
     77                                 break;
     78                             case "System.DateTime":
     79                                 DateTime valueDateTime;
     80                                 if (DateTime.TryParse(value, out valueDateTime))
     81                                 {
     82                                     prop.SetValue(model, valueDateTime, null);
     83                                 }
     84                                 break;
     85                             default:
     86                                 prop.SetValue(model, value, null);
     87                                 break;
     88                         }
     89                     }
     90                 }
     91 
     92                 // 添加非空行数据到DTO
     93                 if (nullcount < cellCount)
     94                 {
     95                     resultList.Add(model);
     96                 }
     97             }
     98 
     99             return resultList;
    100         }

     

    OK,整体流程中所有代码都贴出来了,写得比较匆忙,如有不当的地方,请大家不吝赐教~~~

     

    附:

    博主的需求中还有非单行的复杂表头,涉及到合并单元格表头,甚至左表头等等复杂excel模板,如:

     

    ......

     

    这部分excel的解析简直就是非人类的需求,如有需要,会在后续博文继续贴出相关代码,请多多支持....

     

    原创文章,代码都是从自己项目里贴出来的。转载请注明出处哦,亲~~~

  • 相关阅读:
    Python/WSGI 应用快速入门--转
    汇编题目:数字转字符,并在窗口上显示出来
    汇编题目:在窗口上显示Welcome to masm!
    VBA中的函数Timer用法
    用VBA计算两个日期之间的工作日(去掉周末两天)
    VBA记录当前系统时间并精确到毫秒
    上海房产税免征--积分或居住证
    学习汇编语言
    “Hello World”—— 第一个汇编程序
    汇编程序设计上机步骤
  • 原文地址:https://www.cnblogs.com/csqb-511612371/p/4891492.html
Copyright © 2020-2023  润新知