1 //读取Word文档中的表格 2 // DataTable 需要添加引用 using System.Data; 3 public static DataTable Run() 4 { 5 try 6 { 7 //DataTable 申明DataTable变量,保存从Word获取到的数据 8 DataTable dt = new DataTable(); 9 DataColumn dc1 = new DataColumn("One", Type.GetType("System.String")); 10 DataColumn dc2 = new DataColumn("TwoText", Type.GetType("System.String")); 11 DataColumn dc3 = new DataColumn("TwoHtml", Type.GetType("System.String")); 12 dt.Columns.Add(dc1); 13 dt.Columns.Add(dc2); 14 dt.Columns.Add(dc3); 15 16 int number = 1;//记录有数据的文档数目 17 int troublecount = 0;//记录没有数据的文档数目 18 19 string Content = "";//申明变量,保存word文档内容 20 //获取目录下的所有文件 21 //DirectoryInfo FileInfo 需要添加引用 using System.IO; 22 DirectoryInfo dir = new DirectoryInfo("E:/20190917"); 23 FileInfo[] fileList = dir.GetFiles(); 24 foreach (var item in fileList) 25 { 26 object fileName = item.FullName; 27 object confirmCovert = false; 28 //判断文档类型是否为word文档 29 if (item.Extension.ToUpper() == ".DOC" || item.Extension.ToUpper() == ".DOCX") 30 { 31 //获取word文档内容 32 //Application Document需要添加引用 using Microsoft.Office.Interop.Word; 33 Application app = new Application(); 34 Document doc = null; 35 36 doc = app.Documents.Open(ref fileName, ref confirmCovert); 37 app.Visible = false; 38 Content = doc.Content.Text; 39 40 string[] arr = Content.Split(' '); 41 if (arr.Count() < 2) 42 { 43 troublecount++; 44 Console.WriteLine("文件{0}中没有正文!!!!!!!!。{1}", fileName, troublecount); 45 continue; 46 } 47 else 48 { 49 //抓取表格内容 50 DataRow dr = dt.NewRow(); 51 dr["One"] = arr[0].ToString(); 52 int contentIndex = Content.IndexOf("表格显示:"); 53 List<string> lst = GetContent(doc, Content, contentIndex); 54 55 dr["TwoText"] = lst[0].ToString(); 56 dr["TwoHtml"] = lst[1].ToString(); 57 dt.Rows.Add(dr); 58 59 number++; 60 } 61 62 doc.Close(); 63 app.Quit(); 64 65 } 66 WriteOuputInformation(string.Format("{0}:文档已经存入数据库。{1}", fileName, number)); 67 } 68 Console.WriteLine("所有文件已读取完毕,共读取了{0}条数据,没有数据的Word文档总条数为{1}", number, troublecount); 69 return dt; 70 } 71 catch (Exception exp) 72 { 73 WriteErrorInformation(string.Format("Exception: {0}", exp.Message)); 74 return null; 75 } 76 }
#region[获取表格纯文本内容和富文本内容] static List<string> GetContent(Document doc, string Content, int contentIndex) { List<string> lst = new List<string>(); string contentText = Content.Substring(contentIndex + 6); string contentHtml = contentText; int R = 0;//保存行索引 int C = 0;//保存列索引 //表格格式 if (doc.Tables.Count > 0) { string text = contentText; //遍历<table> for (int i = 1; i <= doc.Tables.Count; i++) { //读取到word文档中table的内容 string wordtable = doc.Tables[i].Range.Text; string htmltable = ""; htmltable += "<table cellspacing='0' bordercolor='black' border='1' cellpadding='5' text-align='center'>"; //遍历行 for (int row = 1; row <= doc.Tables[i].Rows.Count; row++) { htmltable += "<tr>"; //遍历列 for (int column = 1; column <= doc.Tables[i].Columns.Count; column++) { htmltable += "<td>"; //R = getTableRowIndex(row, column, doc, i); //C = getTablecolumnIndex(row, column, doc, i); //htmltable += doc.Tables[i].Cell(R, C).Range.Text.Replace(" ", "").Replace("a", ""); R = getTableRowIndex(row, column, doc, i); C = getTablecolumnIndex(row, column, doc, i); htmltable += doc.Tables[i].Cell(row, column).Range.Text.Replace(" ", "").Replace("a", ""); htmltable += "</td>"; contentText += " "; } htmltable += "</tr>"; } htmltable += "</table>"; contentHtml = contentHtml.Replace(wordtable, htmltable); } contentHtml = "<p>" + contentHtml; contentHtml = contentHtml.Replace(" ", "</p><p>"); contentHtml += "</p>"; } else//文本格式 { contentText = Content.Substring(contentIndex + 6); contentHtml = "<p>"; contentHtml += contentText; contentHtml = contentHtml.Replace(" ", "</p><p>"); contentHtml += "</p>"; } lst.Add(contentText); lst.Add(contentHtml); return lst; } #endregion
#region[操作后给出提示信息] static void WriteErrorInformation(string errorInformation) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Error: " + errorInformation); Console.ForegroundColor = ConsoleColor.Gray; } static void WriteOuputInformation(string outputInformation) { Console.ForegroundColor = ConsoleColor.DarkGreen; Console.WriteLine("-->>" + outputInformation); Console.ForegroundColor = ConsoleColor.Gray; } #endregion