Excel 电子表格文件格式,这种Excel和其他的Excel是不同的。他的本质上是一个Xml文件(用英文版的UtraEdit可以看到),所以他可以保存任何符号的字符,包括&(它在Xml文件中是一种特殊字符。所以用aspose等工具是不能读取这种字符的)。
反过来说,我们从中可以得到一种快速生成带有多个Worksheet的Workbook的Excel,从xml文件处理的个角度入手。
<?xml version="1.0"?> <?mso-application progid="Excel.Sheet"?> <Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:html="http://www.w3.org/TR/REC-html40"> <DocumentProperties xmlns="urn:schemas-microsoft-com:office:office"><Author>Gary Lim</Author></DocumentProperties><ExcelWorkbook xmlns="urn:schemas-microsoft-com:office:excel"> <ProtectStructure>False</ProtectStructure> <ProtectWindows>False</ProtectWindows> </ExcelWorkbook> <Styles> <Style ss:ID="Default" ss:Name="Normal"> <Alignment ss:Vertical="Bottom"/> <Borders/> <Font/> <Interior/> <NumberFormat/> <Protection/> </Style> <Style ss:ID="s27" ss:Name="Hyperlink"> <Font ss:Color="#0000FF" ss:Underline="Single"/> </Style> <Style ss:ID="s24"> <Font x:Family="Swiss" ss:Bold="1"/> </Style> <Style ss:ID="s25"> <Font x:Family="Swiss" ss:Italic="1"/> </Style> <Style ss:ID="s26"> <Alignment ss:Horizontal="Center" ss:Vertical="Bottom"/> </Style> <Style ss:ID="my24"> <Font x:Family="Swiss" ss:Size="12"/> </Style> <Style ss:ID="my28"> <Alignment ss:Horizontal="Left" ss:Vertical="Center"/> <Font x:Family="Swiss" ss:Size="12" ss:Bold="1"/> </Style> <Style ss:ID="my32"> <Alignment ss:Vertical="Center"/> <Font x:Family="Swiss" ss:Size="12"/> </Style> </Styles> <WorksheetOptions xmlns="urn:schemas-microsoft-com:office:excel"> <Selected/> </WorksheetOptions> <Worksheet ss:Name="Info"> <Table> <Column ss:AutoFitWidth="0" ss:Width="123"/> <Column ss:AutoFitWidth="0" ss:Width="196.5"/> <Row ss:AutoFitHeight="0" ss:Height="39.75"> <Cell ss:MergeAcross="1" ss:StyleID="my28"><Data ss:Type="String">Report Information</Data></Cell> </Row> <Row ss:Height="15"> <Cell ss:MergeAcross="1" ss:StyleID="my32"/> </Row> <Row ss:Height="15"> <Cell ss:StyleID="my24"><Data ss:Type="String">MoAddress</Data></Cell> <Cell ss:StyleID="my24"><Data ss:Type="String">SN=Eri3G,RNC=BORNC01</Data></Cell> </Row> <Row ss:Height="15"> <Cell ss:StyleID="my24"><Data ss:Type="String">Data Date</Data></Cell> <Cell ss:StyleID="my24"><Data ss:Type="String">6/23/2013 3:25:00 AM</Data></Cell> </Row> <Row ss:Height="15"> <Cell ss:StyleID="my24"><Data ss:Type="String">Report Created Date</Data></Cell> <Cell ss:StyleID="my24"><Data ss:Type="String">6/23/2013 4:04:01 AM</Data></Cell> </Row> <Row ss:Height="15"> <Cell ss:StyleID="my24"><Data ss:Type="String">VendorName</Data></Cell> <Cell ss:StyleID="my24"><Data ss:Type="String">Ericsson</Data></Cell> </Row> <Row ss:Height="15"> <Cell ss:StyleID="my24"><Data ss:Type="String">Data Version</Data></Cell> <Cell ss:StyleID="my24"><Data ss:Type="String">W10.1</Data></Cell> </Row> </Table> </Worksheet> <Worksheet ss:Name="NodeB_Info"> <Table> <Column ss:AutoFitWidth="0" ss:Width="300"/> <Row ss:AutoFitHeight="1" ss:Height="16"> <Cell ss:StyleID="s24"><Data ss:Type="String">MoAddress</Data></Cell> <Cell ss:StyleID="s24"><Data ss:Type="String">NodeB Label</Data></Cell> <Cell ss:StyleID="s24"><Data ss:Type="String">NodeB Id</Data></Cell> <Cell ss:StyleID="s24"><Data ss:Type="String">Last Updated Date</Data></Cell> </Row> <Row ss:AutoFitHeight="1" > <Cell><Data ss:Type="String">SN=Eri3G,RNC=BORNC01,NodeB=BA3036W</Data></Cell> <Cell><Data ss:Type="String">BA3036W</Data></Cell> <Cell><Data ss:Type="String">BA3036W</Data></Cell> <Cell><Data ss:Type="String">6/22/2013 5:01:05 AM</Data></Cell> </Row> <Row ss:AutoFitHeight="1" > <Cell><Data ss:Type="String">SN=Eri3G,RNC=BORNC01,NodeB=BA3039W</Data></Cell> <Cell><Data ss:Type="String">BA3039W</Data></Cell> <Cell><Data ss:Type="String">BA3039W</Data></Cell> <Cell><Data ss:Type="String">6/22/2013 5:01:05 AM</Data></Cell> </Row> <Row ss:AutoFitHeight="1" > <Cell><Data ss:Type="String">SN=Eri3G,RNC=BORNC01,NodeB=BA3040W</Data></Cell> <Cell><Data ss:Type="String">BA3040W</Data></Cell> <Cell><Data ss:Type="String">BA3040W</Data></Cell> <Cell><Data ss:Type="String">6/22/2013 5:01:05 AM</Data></Cell> </Row> </Table> </Worksheet> </Workbook>
这是一个txt文件,也可以是一个xml文件,当我们把他的后缀名修改成.xls的时候,我们居然可以看到是一个带有两个worksheet的workbook的excel 文件
读取方式:
public class TableReader { private List<System.Data.DataTable> _vlstDtDestination = new List<System.Data.DataTable>(); private TableParser _vTableParser = null; public List<System.Data.DataTable> DataTableList { get { return _vlstDtDestination; } } public void Read(System.IO.StringReader strReader) { System.Xml.XmlReaderSettings xmlReaderSettings = CustomXmlReaderSetting.Create(); using (System.Xml.XmlReader xmlReader = System.Xml.XmlReader.Create(strReader, xmlReaderSettings)) { string currentNode = string.Empty; string currentTableName = string.Empty; KeyValuePair<string, string> currentPair; while(xmlReader.Read()) { if (xmlReader.NodeType == System.Xml.XmlNodeType.Element && xmlReader.Name == "Worksheet") { currentTableName = xmlReader.GetAttribute("ss:Name"); #region Test if (currentTableName == "Rnc") { Console.WriteLine(); } #endregion if (currentTableName == "Info") { continue; } currentNode = xmlReader.ReadOuterXml(); currentPair=new KeyValuePair<string, string>(currentTableName,currentNode); Parse(currentPair); currentPair = new KeyValuePair<string, string>(null,null); } } } } #region Private's private void Parse(KeyValuePair<string, string> source) { if (_vTableParser==null) { _vTableParser = new TableParser(_vlstDtDestination); } _vTableParser.Parse(source); } #endregion }
public class TableParser { private List<System.Data.DataTable> _vlstDtDestination = null; public TableParser(List<System.Data.DataTable> dtList) { _vlstDtDestination = dtList; } public void Parse(KeyValuePair<string,string> source) { if(source.Key.Trim()=="") { return; } System.Data.DataTable currentDt = null; foreach(System.Data.DataTable dt in _vlstDtDestination) { if(dt.TableName==source.Key) { currentDt = dt; } } if(currentDt==null) { currentDt = new System.Data.DataTable(source.Key); _vlstDtDestination.Add(currentDt); } if(source.Value.Trim()=="") { return; } System.IO.StringReader reader = new System.IO.StringReader(source.Value); System.Xml.XmlReaderSettings xmlReaderSettings = CustomXmlReaderSetting.Create(); using(System.Xml.XmlReader xmlReader=System.Xml.XmlReader.Create(reader,xmlReaderSettings)) { System.Data.DataRow currentDr = null; bool isValued = false; bool isConstructed = false; int index = 0; while (xmlReader.Read()) { switch (xmlReader.NodeType) { case System.Xml.XmlNodeType.Element: if (xmlReader.Name == "Row") { if (isValued) { currentDt.Rows.Add(currentDr); } currentDr = currentDt.NewRow(); index = 0; } if (xmlReader.Name == "Data") { if (!isConstructed) { currentDt.Columns.Add(xmlReader.ReadString(), typeof(System.String)); } else { currentDr[index++] = xmlReader.ReadString(); if (!isValued) { isValued = true; } } } break; case System.Xml.XmlNodeType.EndElement: if (xmlReader.Name == "Row" && !isConstructed) { isConstructed = true; } break; } } if (isValued) { currentDt.Rows.Add(currentDr); } } } }
public class CustomXmlReaderSetting { public static System.Xml.XmlReaderSettings Create() { System.Xml.XmlReaderSettings xmlReaderSettings = new System.Xml.XmlReaderSettings(); xmlReaderSettings.ProhibitDtd = false; xmlReaderSettings.CheckCharacters = false; return xmlReaderSettings; } }
public class StreamFilter { public static System.IO.StringReader Filter(string path) { System.IO.FileStream fileStream = new System.IO.FileStream( path, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.ReadWrite ); StringBuilder builder = new StringBuilder(); fileStream.Seek(0,System.IO.SeekOrigin.Begin); System.IO.StreamReader streamReader = new System.IO.StreamReader(fileStream); for (string strLine = streamReader.ReadLine(); !streamReader.EndOfStream; strLine = streamReader.ReadLine()) { strLine = strLine.Replace("&", "&"); builder.AppendFormat("{0} ",strLine); } System.IO.StringReader strReader = new System.IO.StringReader(builder.ToString()); return strReader; } }
public class NameHandler { public static string ConvertName(string name) { string result = name.Trim(); result = Regex.Replace(result, "[^a-zA-Z\d]", "_"); result = Regex.Replace(result, "_{2,}", "_"); if (result.StartsWith("_")) result = result.Substring(1, result.Length - 1); if (result.EndsWith("_")) result = result.Substring(0, result.Length - 1); return result; } }
The xml iteself is broken up into several major section:
Workbook:Root node of the xml,parent to all the other sections.
DocumentProperties:Most of the importation one sees when accessing File->Properties is defined here.
Styles:Formatting information defined here is available to different row,columns and specific cells in the worksheets.
Worksheet(s):As many worksheets as you want are defined here.
Worksheets consist of two major sections:
Table:This is where all visible data in the spreadsheet is stored.
WorksheetOptions:Global options for the worksheet itself.
Finally,the Table section defines two components:
Column
Row / Cell
With this information in hand one can plan out how to create the xml document that will then load into Microsoft Excel and look,feel and operate like a document created by sophisticate,loving user.
参考:http://en.wikipedia.org/wiki/Microsoft_Office_XML_formats