最近在项目开发过程中,有一个需求是将Excel的文字图片信息导入数据库。
数据库中已存在的数据格式是RTF富文本的内容转成byte[]写入Oracle数据库的Blob字段的,
因此在不影响软件和保证已存在数据格式的前提下,需要将Excel的文字图片信息做转换处理。
流程如下:
需要导入的Excel文件内容
软件界面显示效果
读取EXCEL文件文字、图片信息,并将EXCEL文字、图片转换成RTF格式
这里我用的NPOI 2.3.0.0
1 //RTF格式开始的固定字符串 2 private const string RtfStartTxt = @"{ tf1deff0{fonttbl{f0 Calibri;}{f1fcharset134 Tahoma;}}{colortbl ed0green0lue0 ; ed0green0lue255 ;}{*defchp fs22}{*listoverridetable}{stylesheet {qlfs22 Normal;}{*cs1f1fs18 Default Paragraph Font;}{*cs2sbasedon1f1fs18 Line Number;}{*cs3ulfs22cf1 Hyperlink;}{* s4 srowdfs22ql rautofit1 scellpaddfl3 scellpaddl108 scellpaddfr3 scellpaddr108 svertaltcltxlrtb Normal Table;}{* s5 srowdsbasedon4fs22ql rbrdrtrdrsrdrw10 rbrdrlrdrsrdrw10 rbrdrbrdrsrdrw10 rbrdrrrdrsrdrw10 rbrdrhrdrsrdrw10 rbrdrvrdrsrdrw10 rautofit1 scellpaddfl3 scellpaddl108 scellpaddfr3 scellpaddr108 svertaltcltxlrtb Table Simple 1;}} ouicompatsplytwninehtmautspsectdpardplainql"; 3 //RTF代表文本的首字符串 4 private const string RtfTxtStartTxt = @"{f1fs18cf0 "; 5 //RTF格式中代表一个回车 6 private const string RtfEnter = @"f2fs18cf0parpardplainql"; 7 //RTF格式结束的固定字符串 8 private const string RtfEndTxt = @"f1fs18cf0par}"; 9 //将字符串转换成RTF格式的内容 10 private static string GetRichText(string str) 11 { 12 return str.Select(b => ((int) b).ToString(CultureInfo.InvariantCulture)).Select(tmp1 => @"u" + tmp1 + "?").Aggregate("", (current, tmp2) => current + tmp2); 13 } 14 15 //将转化成字符串的图片信息按每200个字符加一个回车形成RTF图片信息 16 private static string GetSplitString(string str) 17 { 18 string temp = string.Empty; 19 for (int i = 0; i < str.Length; i += 200) 20 temp += (i + 200 < str.Length) ? (str.Substring(i, 200) + " ") : str.Substring(i); 21 return temp; 22 } 23 /// <summary> 24 /// 根据Excel列类型获取列的值 25 /// </summary> 26 /// <param name="cell">Excel列</param> 27 /// <returns></returns> 28 private static string GetCellValue(ICell cell) 29 { 30 if (cell == null) 31 return string.Empty; 32 switch (cell.CellType) 33 { 34 case CellType.Blank: 35 return string.Empty; 36 case CellType.Boolean: 37 return cell.BooleanCellValue.ToString(); 38 case CellType.Error: 39 return cell.ErrorCellValue.ToString(CultureInfo.InvariantCulture); 40 case CellType.Numeric: 41 short format = cell.CellStyle.DataFormat; 42 //对时间格式(2015.12.5、2015/12/5、2015-12-5等)的处理 43 if (format == 14 || format == 31 || format == 57 || format == 58) 44 return cell.DateCellValue.ToString(CultureInfo.InvariantCulture); 45 return cell.NumericCellValue.ToString(CultureInfo.InvariantCulture); 46 default: 47 return cell.ToString(); 48 case CellType.String: 49 return cell.StringCellValue; 50 case CellType.Formula: 51 try 52 { 53 var e = new HSSFFormulaEvaluator(cell.Sheet.Workbook); 54 e.EvaluateInCell(cell); 55 return cell.ToString(); 56 } 57 catch 58 { 59 return cell.NumericCellValue.ToString(CultureInfo.InvariantCulture); 60 } 61 } 62 } 63 //替换字符串前面内容 64 public static string TrimStarString(string s, string searchStr, string replaceStr) 65 { 66 var result = s; 67 try 68 { 69 if (string.IsNullOrEmpty(result)) 70 return result; 71 if (s.Length < searchStr.Length) 72 return result; 73 if (s.IndexOf(searchStr, 0, searchStr.Length, StringComparison.Ordinal) > -1) 74 result = s.Substring(searchStr.Length); 75 return result; 76 } 77 catch (Exception) 78 { 79 return result; 80 } 81 } 82 //替换字符串后面内容 83 public static string TrimEndString(string s, string searchStr, string replaceStr) 84 { 85 var result = s; 86 try 87 { 88 if (string.IsNullOrEmpty(result)) 89 return result; 90 if (s.Length < searchStr.Length) 91 return result; 92 if (s.IndexOf(searchStr, s.Length - searchStr.Length, searchStr.Length, StringComparison.Ordinal) > -1) 93 result = s.Substring(0, s.Length - searchStr.Length); 94 return result; 95 } 96 catch (Exception) 97 { 98 return result; 99 } 100 } 101 /// <summary> 102 /// 将excel导入到datatable 103 /// </summary> 104 /// <param name="filePath">excel路径</param> 105 /// <param name="sheetName"></param> 106 /// <param name="isColumnName">第一行是否是列名</param> 107 /// <param name="dataTable"></param> 108 /// <returns>返回datatable</returns> 109 public static bool ExcelToDataTable(string filePath, string sheetName, bool isColumnName, out DataTable dataTable) 110 { 111 dataTable = null; 112 FileStream fs = null; 113 IWorkbook workbook = null; 114 int startRow = 0; 115 var pictures = new List<PicturesInfo>(); 116 try 117 { 118 using (fs = File.OpenRead(filePath)) 119 { 120 // 2007版本 121 if (filePath.ToUpper().EndsWith(".XLSX")) 122 workbook = new XSSFWorkbook(fs); 123 // 2003版本 124 else if (filePath.ToUpper().EndsWith(".XLS")) 125 workbook = new HSSFWorkbook(fs); 126 if (workbook != null) 127 { 128 ISheet sheet = workbook.GetSheet(sheetName); 129 dataTable = new DataTable(); 130 if (sheet != null) 131 { 132 pictures = NpoiExtend.GetAllPictureInfos(sheet);//获取图片信息 133 int rowCount = sheet.LastRowNum; //总行数 134 if (rowCount > 0) 135 { 136 IRow firstRow = sheet.GetRow(0); //第一行 137 int cellCount = firstRow.LastCellNum; //列数 138 //构建datatable的列 139 ICell cell; 140 DataColumn column; 141 if (isColumnName) 142 { 143 startRow = 1; //如果第一行是列名,则从第二行开始读取 144 for (int i = firstRow.FirstCellNum; i < cellCount; ++i) 145 { 146 cell = firstRow.GetCell(i); 147 if (cell != null) 148 { 149 if (cell.StringCellValue != null) 150 { 151 column = new DataColumn(cell.StringCellValue); 152 dataTable.Columns.Add(column); 153 } 154 } 155 } 156 } 157 else 158 { 159 for (int i = firstRow.FirstCellNum; i < cellCount; ++i) 160 { 161 column = new DataColumn("column" + (i + 1)); 162 dataTable.Columns.Add(column); 163 } 164 } 165 //填充行 166 for (int i = startRow; i <= rowCount; ++i) 167 { 168 IRow row = sheet.GetRow(i); 169 if (row == null) continue; 170 DataRow dataRow = dataTable.NewRow(); 171 for (int j = row.FirstCellNum; j < cellCount; ++j) 172 { 173 cell = row.GetCell(j); 174 if (cell == null) 175 dataRow[j] = ""; 176 else 177 dataRow[j] = GetCellValue(cell); 178 } 179 dataTable.Rows.Add(dataRow); 180 } 181 } 182 } 183 } 184 } 185 if (dataTable != null && dataTable.Columns.Count >= 0 && pictures.Count > 0) 186 { 187 var columnIndex = 0; 188 for (int i = 0; i < dataTable.Columns.Count; i++) 189 { 190 if (dataTable.Columns[i].ColumnName == "说明")//找到说明列 191 { 192 columnIndex = i; 193 break; 194 } 195 } 196 for (int i = 0; i < dataTable.Rows.Count; i++) 197 { 198 DataRow row = dataTable.Rows[i]; 199 var str = row[columnIndex].ToString(); 200 var pics = pictures.Where(x => x.MinCol == columnIndex && x.MinRow == i + 1).ToList(); 201 if (pics.Count > 0) 202 { 203 var allPicStr = ""; 204 foreach (PicturesInfo pic in pics) 205 { 206 byte[] picdata = pic.PictureData;//Excel 中的图片信息,byte[]格式 207 var picstr = ""; 208 foreach (byte b in picdata) 209 { 210 string tempStr = Convert.ToString(b, 16); 211 if (tempStr.Length == 1) 212 tempStr = "0" + tempStr;//不足一位需要前面补0 213 picstr += tempStr; 214 }//将byte[]格式图片=>转换成16进制 215 picstr = GetSplitString(picstr);//转成RTF格式需要的内容 216 //RTF图片格式{*shppict{pictpngblippicw8494pich3175picwgoal4815pichgoal1800picscalex100picscaley100 [图片16进制信息]}} 217 picstr = @"{*shppict{pictpngblip" + 218 @"picw" + pic.SizeWidth * 26 + 219 @"pich" + pic.SizeHeight * 26 + 220 @"picwgoal" + pic.SizeWidth * 26 * 72 / 127 + 221 @"pichgoal" + pic.SizeHeight * 26 * 72 / 127 + 222 @"picscalex100picscaley100 " + 223 picstr + @"}}"; 224 allPicStr += picstr + RtfEnter; 225 } 226 var allTextStr = RtfTxtStartTxt + GetRichText(str) + "}" + RtfEnter;//Excel单元格文字内容转RTF格式 227 var textPic = RtfStartTxt + allTextStr + allPicStr + RtfEndTxt;//将Excel文字和图片拼接到一起 228 row[columnIndex] = textPic; 229 } 230 } 231 var k = dataTable.Rows.Count - 1; 232 for (int i = k; i > 0; i--)//将多个单元格合并到一起,处理如图Excel格式内容 233 { 234 DataRow row = dataTable.Rows[i]; 235 if (string.IsNullOrEmpty(row[0].ToString()) && !string.IsNullOrEmpty(row[columnIndex].ToString())) 236 { 237 var str1 = dataTable.Rows[i - 1][columnIndex].ToString(); 238 str1 = TrimEndString(str1, RtfEndTxt, ""); 239 var str2 = dataTable.Rows[i][columnIndex].ToString(); 240 str2 = TrimEndString(str2, RtfEndTxt, ""); 241 str2 = TrimStarString(str2, RtfStartTxt, ""); 242 dataTable.Rows[i - 1][columnIndex] = str1 + RtfEnter + str2 + RtfEndTxt; 243 dataTable.Rows.RemoveAt(i); 244 } 245 } 246 } 247 return true; 248 } 249 catch (Exception ex) 250 { 251 if (fs != null) 252 fs.Close(); 253 return false; 254 } 255 }
读取Excel图片我参考的是这篇博文:https://www.cnblogs.com/hanzhaoxin/p/4442369.html
PicturesInfo.cs
在原博文的基础上,我加了SizeWidth 和 SizeHeight两个属性,用于后续生成RTF格式图片Size内容。
public class PicturesInfo { public int MinRow { get; set; } public int MaxRow { get; set; } public int MinCol { get; set; } public int MaxCol { get; set; } public Byte[] PictureData { get; private set; } public int SizeWidth { get; set; } public int SizeHeight { get; set; } public PicturesInfo(int minRow, int maxRow, int minCol, int maxCol, Byte[] pictureData) { this.MinRow = minRow; this.MaxRow = maxRow; this.MinCol = minCol; this.MaxCol = maxCol; this.PictureData = pictureData; using (var ms = new MemoryStream(pictureData)) { var image = Image.FromStream(ms); this.SizeWidth = image.Size.Width; this.SizeHeight = image.Size.Height; } } }
NpoiExtend.cs
public static class NpoiExtend { public static List<PicturesInfo> GetAllPictureInfos(this ISheet sheet) { return sheet.GetAllPictureInfos(null,null,null,null); } public static List<PicturesInfo> GetAllPictureInfos(this ISheet sheet,int? minRow,int? maxRow,int? minCol,int? maxCol,bool onlyInternal = true) { if (sheet is HSSFSheet) { return GetAllPictureInfos((HSSFSheet)sheet,minRow,maxRow,minCol,maxCol,onlyInternal); } else if (sheet is XSSFSheet) { return GetAllPictureInfos((XSSFSheet)sheet, minRow, maxRow, minCol, maxCol, onlyInternal); } else { throw new Exception("未处理类型,没有为该类型添加:GetAllPicturesInfos()扩展方法!"); } } private static List<PicturesInfo> GetAllPictureInfos(HSSFSheet sheet,int? minRow, int? maxRow, int? minCol, int? maxCol, bool onlyInternal) { List<PicturesInfo> picturesInfoList = new List<PicturesInfo>(); var shapeContainer = sheet.DrawingPatriarch as HSSFShapeContainer; if (null != shapeContainer) { var shapeList = shapeContainer.Children; foreach (var shape in shapeList) { if (shape is HSSFPicture && shape.Anchor is HSSFClientAnchor) { var picture = (HSSFPicture)shape; var anchor = (HSSFClientAnchor)shape.Anchor; if (IsInternalOrIntersect(minRow, maxRow, minCol, maxCol, anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, onlyInternal)) { picturesInfoList.Add(new PicturesInfo(anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, picture.PictureData.Data)); } } } } return picturesInfoList; } private static List<PicturesInfo> GetAllPictureInfos(XSSFSheet sheet, int? minRow, int? maxRow, int? minCol, int? maxCol, bool onlyInternal) { List<PicturesInfo> picturesInfoList = new List<PicturesInfo>(); var documentPartList = sheet.GetRelations(); foreach (var documentPart in documentPartList) { if (documentPart is XSSFDrawing) { var drawing = (XSSFDrawing)documentPart; var shapeList = drawing.GetShapes(); foreach (var shape in shapeList) { if (shape is XSSFPicture) { var picture = (XSSFPicture)shape; var anchor = picture.GetPreferredSize(); if (IsInternalOrIntersect(minRow, maxRow, minCol, maxCol, anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, onlyInternal)) { picturesInfoList.Add(new PicturesInfo(anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, picture.PictureData.Data)); } } } } } return picturesInfoList; } private static bool IsInternalOrIntersect(int? rangeMinRow, int? rangeMaxRow, int? rangeMinCol, int? rangeMaxCol, int pictureMinRow, int pictureMaxRow, int pictureMinCol, int pictureMaxCol, bool onlyInternal) { int _rangeMinRow = rangeMinRow ?? pictureMinRow; int _rangeMaxRow = rangeMaxRow ?? pictureMaxRow; int _rangeMinCol = rangeMinCol ?? pictureMinCol; int _rangeMaxCol = rangeMaxCol ?? pictureMaxCol; if (onlyInternal) { return (_rangeMinRow <= pictureMinRow && _rangeMaxRow >= pictureMaxRow && _rangeMinCol <= pictureMinCol && _rangeMaxCol >= pictureMaxCol); } else { return ((Math.Abs(_rangeMaxRow - _rangeMinRow) + Math.Abs(pictureMaxRow - pictureMinRow) >= Math.Abs(_rangeMaxRow + _rangeMinRow - pictureMaxRow - pictureMinRow)) && (Math.Abs(_rangeMaxCol - _rangeMinCol) + Math.Abs(pictureMaxCol - pictureMinCol) >= Math.Abs(_rangeMaxCol + _rangeMinCol - pictureMaxCol - pictureMinCol))); } } }
将RTF格式转成byte[],转换完成后就可以直接写入数据库了。
foreach (DataRow row in dataTable.Rows)
{
byte[] filedBlob = Encoding.Default.GetBytes(row["说明"].ToString());//转成byte[]
}