• C# 读取 csv(大文件)


    上次读取了excel发现还是很慢(结果集为DataTable),后来研究了一下csv这个文件效率很高呀,特别是针对大文件的时候,话不多说上代码!

    本机配置:win10 i5900F 16G 500G固态

    1、csv文件帮助类

      1 public static class CsvHelper
      2     {
      3         /// <summary>
      4         /// 创建CSV文件并写入内容
      5         /// </summary>
      6         /// <param name="dt">DataTable</param>
      7         /// <param name="fileName">文件全名</param>
      8         /// <returns>是否写入成功</returns>
      9         public static Boolean SaveCSV(DataTable dt, string fullFileName)
     10         {
     11             Boolean r = false;
     12             FileStream fs = new FileStream(fullFileName, System.IO.FileMode.Create, System.IO.FileAccess.Write);
     13             StreamWriter sw = new StreamWriter(fs, System.Text.Encoding.Default);
     14             string data = "";
     15 
     16             //写出列名称
     17             for (int i = 0; i < dt.Columns.Count; i++)
     18             {
     19                 data += dt.Columns[i].ColumnName.ToString();
     20                 if (i < dt.Columns.Count - 1)
     21                 {
     22                     data += ",";
     23                 }
     24             }
     25             sw.WriteLine(data);
     26 
     27             //写出各行数据
     28             for (int i = 0; i < dt.Rows.Count; i++)
     29             {
     30                 data = "";
     31                 for (int j = 0; j < dt.Columns.Count; j++)
     32                 {
     33                     data += dt.Rows[i][j].ToString();
     34                     if (j < dt.Columns.Count - 1)
     35                     {
     36                         data += ",";
     37                     }
     38                 }
     39                 sw.WriteLine(data);
     40             }
     41 
     42             sw.Close();
     43             fs.Close();
     44 
     45             r = true;
     46             return r;
     47         }
     48 
     49         /// <summary>
     50         /// 读CSV 文件
     51         /// </summary>
     52         /// <param name="fileName">文件全名</param>
     53         /// <returns>DataTable</returns>
     54         public static DataTable ReadCSV(string fullFileName)
     55         {
     56             return ReadCSV(fullFileName, 0, 0, 0, 0, true);
     57         }
     58 
     59         /// <summary>
     60         /// 读CSV 文件
     61         /// </summary>
     62         /// <param name="fileName">文件全名</param>
     63         /// <param name="firstRow">开始行</param>
     64         /// <param name="firstColumn">开始列</param>
     65         /// <param name="getRows">获取多少行</param>
     66         /// <param name="getColumns">获取多少列</param>
     67         /// <param name="haveTitleRow">是有标题行</param>
     68         /// <returns>DataTable</returns>
     69         public static DataTable ReadCSV(string fullFileName, Int16 firstRow = 0, Int16 firstColumn = 0, Int16 getRows = 0, Int16 getColumns = 0, bool haveTitleRow = true)
     70         {
     71             DataTable dt = new DataTable();
     72             FileStream fs = new FileStream(fullFileName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
     73             StreamReader sr = new StreamReader(fs, System.Text.Encoding.Default);
     74             try
     75             {
     76                 string strLine = "";//记录每次读取的一行记录
     77                 string[] aryLine;//记录每行记录中的各字段内容
     78                 int columnCount = 0; //标示列数
     79                 bool bCreateTableColumns = false;//是否已建立了表的字段
     80                 int iRow = 1;//第几行
     81 
     82                 if (firstRow > 0) //去除无用行
     83                 {
     84                     for (int i = 1; i < firstRow; i++)
     85                     {
     86                         sr.ReadLine();
     87                     }
     88                 }
     89                 string[] separators = { "," };// { ",", ".", "!", "?", ";", ":", " " };
     90                 while ((strLine = sr.ReadLine()) != null)//逐行读取CSV中的数据
     91                 {
     92                     strLine = strLine.Trim();
     93                     aryLine = strLine.Split(separators, System.StringSplitOptions.RemoveEmptyEntries);
     94 
     95                     if (bCreateTableColumns == false)
     96                     {
     97                         bCreateTableColumns = true;
     98                         columnCount = aryLine.Length;
     99                         //创建列
    100                         for (int i = firstColumn; i < (getColumns == 0 ? columnCount : firstColumn + getColumns); i++)
    101                         {
    102                             DataColumn dc = new DataColumn(haveTitleRow == true ? aryLine[i] : "COL" + i.ToString());
    103                             dt.Columns.Add(dc);
    104                         }
    105 
    106                         bCreateTableColumns = true;
    107 
    108                         if (haveTitleRow == true)
    109                         {
    110                             continue;
    111                         }
    112                     }
    113 
    114                     DataRow dr = dt.NewRow();
    115                     for (int j = firstColumn; j < (getColumns == 0 ? columnCount : firstColumn + getColumns); j++)
    116                     {
    117                         dr[j - firstColumn] = aryLine[j];
    118                     }
    119                     dt.Rows.Add(dr);
    120 
    121                     iRow = iRow + 1;
    122                     if (getRows > 0)
    123                     {
    124                         if (iRow > getRows)
    125                         {
    126                             break;
    127                         }
    128                     }
    129                 }
    130             }
    131             catch (Exception ex)
    132             {
    133                 //异常处理
    134             }
    135             finally
    136             {
    137                 sr.Close();
    138                 fs.Close();
    139             }
    140             return dt;
    141         }
    142 
    143     }
    View Code

    2、使用

    2.1、创建csv文件,数据量为100W,21行

     1  public void CSV_Create()
     2         {
     3             string filePath = @"C:UsersAdministratorDesktop大数据.csv";
     4 
     5             #region 填充DataTable
     6             DataTable tblDatas = new DataTable("Datas");
     7             DataColumn dc = null;
     8             dc = tblDatas.Columns.Add("ID", Type.GetType("System.Int32"));
     9             dc.AutoIncrement = true;//自动增加
    10             dc.AutoIncrementSeed = 1;//起始为1
    11             dc.AutoIncrementStep = 1;//步长为1
    12             dc.AllowDBNull = false;//
    13 
    14             for (int i = 1; i < 21; i++)
    15             {
    16                 dc = tblDatas.Columns.Add("p" + i, Type.GetType("System.String"));
    17             }
    18 
    19             DataRow newRow = tblDatas.NewRow();
    20 
    21             for (int i = 0; i < 1000000; i++)
    22             {
    23                 newRow = tblDatas.NewRow();
    24                 newRow["p1"] = "大话西游大话西游大话西游大话西游" + i;
    25                 newRow["p2"] = "大话西游大话西游大话西游大话西游" + i;
    26                 newRow["p3"] = "大话西游大话西游大话西游大话西游" + i;
    27                 newRow["p4"] = "大话西游大话西游大话西游大话西游" + i;
    28                 newRow["p5"] = "大话西游大话西游大话西游大话西游" + i;
    29                 newRow["p6"] = "大话西游大话西游大话西游大话西游" + i;
    30                 newRow["p7"] = "大话西游大话西游大话西游大话西游" + i;
    31                 newRow["p8"] = "大话西游大话西游大话西游大话西游" + i;
    32                 newRow["p9"] = "大话西游大话西游大话西游大话西游" + i;
    33                 newRow["p10"] = "大话西游大话西游大话西游大话西游" + i;
    34                 newRow["p11"] = "大话西游大话西游大话西游大话西游" + i;
    35                 newRow["p12"] = "大话西游大话西游大话西游大话西游" + i;
    36                 newRow["p13"] = "大话西游大话西游大话西游大话西游" + i;
    37                 newRow["p14"] = "大话西游大话西游大话西游大话西游" + i;
    38                 newRow["p15"] = "大话西游大话西游大话西游大话西游" + i;
    39                 newRow["p16"] = "大话西游大话西游大话西游大话西游" + i;
    40                 newRow["p17"] = "大话西游大话西游大话西游大话西游" + i;
    41                 newRow["p18"] = "大话西游大话西游大话西游大话西游" + i;
    42                 newRow["p19"] = "大话西游大话西游大话西游大话西游" + i;
    43                 newRow["p20"] = "大话西游大话西游大话西游大话西游" + i;
    44                 tblDatas.Rows.Add(newRow);
    45             }
    46 
    47             #endregion
    48 
    49             Stopwatch sw = new Stopwatch();
    50             sw.Start();
    51 
    52             CsvHelper.SaveCSV(tblDatas, filePath);
    53 
    54             System.IO.FileInfo fileInfo = new System.IO.FileInfo(filePath);
    55             log.Info("生成.csv文件," + filePath + ",文件大小" + System.Math.Ceiling((fileInfo.Length / 1024.0) / 1024) + " M" + ",耗时:" + sw.Elapsed);
    56         }
    View Code

    耗时大概20秒左右,文件大小750M左右。

    2.2、读csv文件

    1  public void CSV_Read()
    2         {
    3             Stopwatch sw = new Stopwatch();
    4             sw.Start();
    5             string path = @"C:UsersAdministratorDesktop大数据.csv";
    6             DataTable dt = CsvHelper.ReadCSV(path);
    7             log.Info(path + ",文件读取完成,数据条数" + dt.Rows.Count / 10000 + "万,耗时:" + sw.Elapsed);
    8         }
    View Code

    生成Datatable类型的结果集,耗时10秒左右,测试结果log日志内容如下:

    感谢:https://www.cnblogs.com/fiozhao/p/3225112.html

  • 相关阅读:
    数据库基本概念
    Python语言特性之5:自省
    Python语言特性之4:类变量和实例变量
    Python语言特性之3:@staticmethod和@classmethod
    Python语言特性之2:元类
    Python语言特性之1:函数参数传递
    基础数学与算法学习
    推荐系统资料
    MySQL相关
    Python科学计算包模块的安装(ubuntu)
  • 原文地址:https://www.cnblogs.com/PrintY/p/14044598.html
Copyright © 2020-2023  润新知