一、前言
生命不息,折腾不止。近期公司有数据迁移的计划,从Sqlserver迁移到mysql,虽说网上有很多数据迁移方案,但闲着也是闲着,就自己整一个,权当做是练练手了
二、解决思路
整个迁移过程类似于ETL,将数据从来源端经过抽取(extract)、转换(transform)、加载(load)至目的端。读取并转换sqlserver库数据,将数据解析为csv文件,载入文件到mysql。流程如下:
- 抽取、转换
此过程主要是处理源数据库与目标数据库表字段的映射关系,为了保证程序的通用性,通过配置文件映射字段关系,解析配置文件并生成数据库脚本 - 加载
数据迁移的时候最好不要用INSERT语句插入批量插入,这样数据量稍稍大一点就很慢。sqlserver可通过SqlBulkCopy将DataTable对象快速插入到数据库,然后mysql并没有这东西,查阅资料后发现mysql可通过MySqlBulkLoader将csv文件快速导入到数据库。经测试迁移10K条数据MySqlBulkLoader可在1S内处理完,速度还是相当不错的
三、实现
- 配置文件
db_caption.xml(数据库),主要用来存储表描述文件名,若待迁移的表不存在外键关系即迁移时不用考虑先后顺序,此配置文件可以不要。其中maxClients参数指的是异步迁移时,最大并发数。
<?xml version="1.0" encoding="utf-8" ?> <root> <maxClients value="3"></maxClients> <tables> <table filename="t_drawtemplate.xml" caption="抽奖模板"></table> <table filename="t_drawprize.xml" caption="抽奖奖品"></table> <table filename="t_drawrecord.xml" caption="抽奖记录"></table> <table filename="t_drawwinner.xml" caption="中奖记录"></table> </tables> </root>
t_table.xml(表),主要用来描述待迁移表信息及字段描述
<?xml version="1.0" encoding="utf-8" ?> <root> <![CDATA[抽奖记录]]> <!--是否分页,默认不分页就好啦,false_不分页--> <isPaging value="true"></isPaging> <pageSize value="10000"></pageSize> <!--mssql数据库表主键--> <primaryKey value="DrawRecordId"></primaryKey> <!--mssql数据库表名--> <msTable value="DrawRecord"></msTable> <!--mysql数据库表名--> <myTable value="t_drawrecord"></myTable> <!--筛选条件,无特殊情况为空即可--> <filter value="1=1"></filter> <!--字段映射--> <fields> <field msName ="DrawRecordId" myName="id"></field> <field msName ="FK_MemberId" myName="user_id"></field> <field msName ="Remark" myName="remark"></field> <field msName ="DataStatus" myName="data_status"></field> <field msName ="DrawTime" myName="drawTime"></field> <!--需要调整字段示例--> <field msName ="CASE WHEN DrawWinnerId >0 THEN DrawWinnerId END" myName="drawwinner_id"></field> </fields> <!--迁移完成后,数据修复脚本,主要用来修正日期类型为0000-00-00 00:00:00问题--> <fixSql></fixSql> </root>
- 创建xml文件映射对象并重写ToString方法,将对象解析为sql
db_caption.xml映射对象
1 /// <summary> 2 /// 数据库描述类(db_caption) 3 /// </summary> 4 internal class DBCaptionModel 5 { 6 public DBCaptionModel() 7 { 8 this.Tables = new List<TableModel>(); 9 } 10 11 /// <summary> 12 /// 最大连接数 13 /// </summary> 14 public int MaxClients { get; set; } 15 16 /// <summary> 17 /// 表集合 18 /// </summary> 19 public IList<TableModel> Tables { get; private set; } 20 } 21 22 internal class TableModel 23 { 24 /// <summary> 25 /// 表xml文件名 26 /// </summary> 27 public string FileName { get; set; } 28 29 /// <summary> 30 /// 描述 31 /// </summary> 32 public string Caption { get; set; } 33 34 /// <summary> 35 /// 是否已同步 36 /// </summary> 37 public bool IsSync { get; set; } 38 }
1 /// <summary> 2 /// 表描述类 3 /// </summary> 4 internal class TableCaptionModel 5 { 6 public TableCaptionModel() 7 { 8 this.Fields = new List<FieldModel>(); 9 } 10 11 /// <summary> 12 /// 是否分页 13 /// </summary> 14 public bool IsPaging { get; set; } 15 16 /// <summary> 17 /// 分页大小 18 /// </summary> 19 public int PageSize { get; set; } 20 21 /// <summary> 22 /// 源数据表表名 23 /// </summary> 24 public string SourceTableName { get; set; } 25 26 /// <summary> 27 /// 目标数据表表名 28 /// </summary> 29 public string TargetTableName { get; set; } 30 31 /// <summary> 32 /// 源数据表主键 33 /// </summary> 34 public string PrimaryKey { get; set; } 35 36 /// <summary> 37 /// 过滤条件 38 /// </summary> 39 public string Filter { get; set; } 40 41 /// <summary> 42 /// 字段集合 43 /// </summary> 44 public List<FieldModel> Fields { get; set; } 45 46 /// <summary> 47 /// 数据迁移完成后,数据修复脚本 48 /// </summary> 49 public string FixSql { get; set; } 50 51 /// <summary> 52 /// ToString 53 /// </summary> 54 /// <returns>sql</returns> 55 public override string ToString() 56 { 57 string sql = GetBaseSql(); 58 string filter = GetFilterSql(); 59 if (!string.IsNullOrWhiteSpace(filter)) 60 { 61 sql += " WHERE " + filter; 62 } 63 64 sql += " ORDER BY " + this.PrimaryKey; 65 return sql; 66 } 67 68 /// <summary> 69 /// 获取基础查询Sql 70 /// </summary> 71 /// <![CDATA[SELECT SourceField AS TargetField,...... FROM table]]> 72 /// <returns></returns> 73 private string GetBaseSql() 74 { 75 StringBuilder sb = new StringBuilder("SELECT"); 76 77 foreach (var item in this.Fields) 78 { 79 sb.AppendFormat(" {0},", item.ToString()); 80 } 81 82 sb = sb.Remove(sb.Length - 1, 1); 83 84 sb.Append(" FROM "); 85 sb.Append(this.SourceTableName); 86 return sb.ToString(); 87 } 88 89 /// <summary> 90 /// 获取sql查询条件 91 /// </summary> 92 /// <![CDATA[filter || PrimaryKey NOT IN (SELECT PrimaryKey FORM table WHERE filter)]]> 93 /// <returns></returns> 94 private string GetFilterSql() 95 { 96 if (!this.IsPaging) 97 { 98 return this.Filter; 99 } 100 101 StringBuilder sb = new StringBuilder(); 102 sb.AppendFormat("SELECT ROW_NUMBER() OVER(ORDER BY {0}) RowNo,{0} FROM {1}", this.PrimaryKey, this.SourceTableName); 103 104 if (!string.IsNullOrWhiteSpace(this.Filter)) 105 { 106 sb.Append(" WHERE " + this.Filter); 107 } 108 109 sb.Insert(0, string.Format("SELECT {0} FROM (", this.PrimaryKey)); 110 sb.AppendFormat(") T WHERE RowNo BETWEEN @StartIndex AND @EndIndex"); 111 112 return string.Format("{0} IN ({1})", this.PrimaryKey, sb.ToString()); 113 } 114 } 115 116 /// <summary> 117 /// 字段类 118 /// </summary> 119 internal class FieldModel 120 { 121 /// <summary> 122 /// 源字段名 123 /// </summary> 124 public string SourceFieldName { get; set; } 125 126 /// <summary> 127 /// 目标字段名 128 /// </summary> 129 public string TargetFieldName { get; set; } 130 131 /// <summary> 132 /// ToString 133 /// </summary> 134 /// <returns>'SourceFieldName' AS 'TargetFieldName'" </returns> 135 public override string ToString() 136 { 137 if (this.SourceFieldName.IndexOfAny(new char[] { ' ', '(' }) < 0) 138 { 139 //非表达式 140 return string.Format("[{0}] AS '{1}'", SourceFieldName, TargetFieldName); 141 } 142 else 143 { 144 return string.Format("{0} AS '{1}'", SourceFieldName, TargetFieldName); 145 } 146 } 147 }
- 解析XML文件
XML解析可通过XmlSerializer直接反序列化为对象,此处只是为了温习XML解析方式,故采用此方法
1 /// <summary> 2 /// 载入数据库描述xml 3 /// </summary> 4 /// <returns></returns> 5 private static DBCaptionModel LoadDBCaption() 6 { 7 DBCaptionModel model = new DBCaptionModel(); 8 9 XmlDocument doc = new XmlDocument(); 10 doc.Load(CONN_XML_PATH + "db_caption.xml"); 11 12 XmlNode root = doc.SelectSingleNode("root"); 13 //获取最大连接数 14 model.MaxClients = root.SelectSingleNode("maxClients").GetAttribute<int>("value"); 15 16 //获取表描述 17 XmlNodeList tables = root.SelectSingleNode("tables").SelectNodes("table"); 18 foreach (XmlNode node in tables) 19 { 20 model.Tables.Add(new TableModel 21 { 22 FileName = node.GetAttribute("filename"), 23 Caption = node.GetAttribute("caption") 24 }); 25 } 26 27 return model; 28 } 29 30 /// <summary> 31 /// 载入表描述xml 32 /// </summary> 33 /// <param name="fileName">表描叙xml文件名</param> 34 /// <returns></returns> 35 private static TableCaptionModel LoadTableCaption(string fileName) 36 { 37 XmlDocument doc = new XmlDocument(); 38 doc.Load(CONN_XML_PATH + fileName); 39 40 TableCaptionModel model = new TableCaptionModel(); 41 42 XmlNode root = doc.SelectSingleNode("root"); 43 model.IsPaging = root.SelectSingleNode("isPaging").GetAttribute<bool>("value"); 44 if (model.IsPaging) 45 { 46 model.PageSize = root.SelectSingleNode("pageSize").GetAttribute<int>("value"); 47 } 48 model.SourceTableName = root.SelectSingleNode("msTable").GetAttribute("value"); 49 model.TargetTableName = root.SelectSingleNode("myTable").GetAttribute("value"); 50 model.PrimaryKey = root.SelectSingleNode("primaryKey").GetAttribute("value"); 51 model.FixSql = root.SelectSingleNode("fixSql").GetAttribute("value"); 52 53 XmlNodeList fields = root.SelectSingleNode("fields").SelectNodes("field"); 54 55 foreach (XmlNode field in fields) 56 { 57 model.Fields.Add(new FieldModel 58 { 59 SourceFieldName = field.GetAttribute("msName"), 60 TargetFieldName = field.GetAttribute("myName") 61 }); 62 } 63 64 return model; 65 }
Node.GetAttribute扩展方法,简化读取Node属性代码
1 public static class XmlNodeExtension 2 { 3 /// <summary> 4 /// 获取节点属性 5 /// </summary> 6 /// <param name="node">当前节点</param> 7 /// <param name="attrName">属性名称</param> 8 /// <returns></returns> 9 public static string GetAttribute(this XmlNode node, string attrName) 10 { 11 if (node == null) 12 { 13 return null; 14 } 15 return ((XmlElement)node).GetAttribute(attrName); 16 } 17 18 /// <summary> 19 /// 获取节点属性 20 /// </summary> 21 /// <param name="node">当前节点</param> 22 /// <param name="attrName">属性名称</param> 23 /// <returns></returns> 24 public static T GetAttribute<T>(this XmlNode node, string attrName) where T : struct 25 { 26 if (node == null) 27 { 28 return default(T); 29 } 30 string value = GetAttribute(node, attrName); 31 return (T)Convert.ChangeType(value, typeof(T)); 32 } 33 }
- 实现数据迁移帮助方法
FileHelper,将DataTable解析为CSV文件
1 public class FileHelper 2 { 3 /// <summary> 4 /// 将DataTable写入CSV 5 /// </summary> 6 /// <param name="dataTable"></param> 7 /// <param name="fileFullPath"></param> 8 public static void WriteDataTableToCSVFile(DataTable dataTable, string fileFullPath) 9 { 10 WriteDataTableToCSVFile(dataTable, fileFullPath, Encoding.UTF8); 11 } 12 13 /// <summary> 14 /// 将DataTable写入CSV 15 /// </summary> 16 /// <param name="dataTable"></param> 17 /// <param name="fileFullPath"></param> 18 /// <param name="codeType"></param> 19 public static void WriteDataTableToCSVFile(DataTable dataTable, string fileFullPath, Encoding codeType) 20 { 21 using (Stream stream = new FileStream(fileFullPath, FileMode.Create, FileAccess.Write)) 22 using (StreamWriter swriter = new StreamWriter(stream, codeType)) 23 { 24 try 25 { 26 int num = dataTable.Columns.Count; 27 string[] arr = new string[num]; 28 29 //写标题 30 for (int i = 0; i < num; i++) 31 { 32 arr[i] = dataTable.Columns[i].ColumnName; 33 } 34 WriteArrayToCSVFile(swriter, arr); 35 36 //写数据 37 foreach (DataRow item in dataTable.Rows) 38 { 39 for (int i = 0; i < num; i++) 40 { 41 arr[i] = Convert.IsDBNull(item[i]) ? "" : item[i].ToString(); 42 } 43 WriteArrayToCSVFile(swriter, arr); 44 } 45 } 46 catch (Exception ex) 47 { 48 throw new IOException(ex.Message); 49 } 50 } 51 } 52 53 /// <summary> 54 /// 将数据写入CSV文件 55 /// </summary> 56 /// <param name="swriter"></param> 57 /// <param name="arr"></param> 58 private static void WriteArrayToCSVFile(StreamWriter swriter, string[] arr) 59 { 60 for (int i = 0; i < arr.Length; i++) 61 { 62 if (!string.IsNullOrWhiteSpace(arr[i])) 63 { 64 swriter.Write(arr[i]); 65 } 66 67 if (i < arr.Length - 1) 68 { 69 swriter.Write("|||"); 70 } 71 } 72 swriter.Write(swriter.NewLine); 73 } 74 }
MysqlHelper,导入VCS文件到Mysql数据库
1 public class MySqlDBHelper 2 { 3 private static readonly string tmpBasePath = AppDomain.CurrentDomain.BaseDirectory; 4 private static readonly string tmpCSVFilePattern = "Temp\{0}.csv"; //0表示文件名称 5 6 /// <summary> 7 /// DB连接字符串 8 /// </summary> 9 public static string DBConnectionString 10 { 11 get 12 { 13 return ConfigHelper.GetConfigString("SQLConnStr_Mysql"); 14 } 15 } 16 17 public static int ExecNonQuery(string sqlText, CommandType cmdType, string[] paramNames, object[] paramValues) 18 { 19 int result = 0; 20 using (MySqlConnection mySqlCon = new MySqlConnection(DBConnectionString)) 21 { 22 MySqlCommand mySqlCmd = new MySqlCommand(sqlText, mySqlCon); 23 mySqlCmd.CommandType = cmdType; 24 try 25 { 26 fillParameters(mySqlCmd, paramNames, paramValues); 27 mySqlCon.Open(); 28 result = mySqlCmd.ExecuteNonQuery(); 29 } 30 catch (MySqlException mse) 31 { 32 throw mse; 33 } 34 } 35 return 0; 36 } 37 38 public static int ExecuteNonQuery(string sqlText) 39 { 40 return ExecNonQuery(sqlText, CommandType.Text, null, null); 41 } 42 43 public static bool BulkInsert(DataTable dataTable) 44 { 45 bool result = false; 46 if (dataTable != null && dataTable.Rows.Count > 0) 47 { 48 using (MySqlConnection mySqlCon = new MySqlConnection(DBConnectionString)) 49 { 50 mySqlCon.Open(); 51 MySqlTransaction sqlTran = mySqlCon.BeginTransaction(IsolationLevel.ReadCommitted); 52 MySqlBulkLoader sqlBulkCopy = new MySqlBulkLoader(mySqlCon); 53 sqlBulkCopy.Timeout = 60; 54 55 result = BulkInsert(sqlBulkCopy, dataTable, sqlTran); 56 } 57 } 58 return result; 59 } 60 61 public static bool BulkInsert<T, T1>(T sqlBulkCopy, DataTable dataTable, T1 sqlTrasaction) 62 { 63 bool result = false; 64 string tmpCsvPath = tmpBasePath + string.Format(tmpCSVFilePattern, dataTable.TableName + DateTime.Now.Ticks.ToString()); 65 string tmpFolder = tmpCsvPath.Remove(tmpCsvPath.LastIndexOf("\")); 66 67 if (!Directory.Exists(tmpFolder)) 68 Directory.CreateDirectory(tmpFolder); 69 70 FileHelper.WriteDataTableToCSVFile(dataTable, tmpCsvPath); //Write to csv File 71 72 MySqlBulkLoader sqlBC = (MySqlBulkLoader)Convert.ChangeType(sqlBulkCopy, typeof(MySqlBulkLoader)); 73 MySqlTransaction sqlTran = (MySqlTransaction)Convert.ChangeType(sqlTrasaction, typeof(MySqlTransaction)); 74 try 75 { 76 sqlBC.TableName = dataTable.TableName; 77 sqlBC.FieldTerminator = "|||"; 78 sqlBC.LineTerminator = " "; 79 sqlBC.FileName = tmpCsvPath; 80 sqlBC.NumberOfLinesToSkip = 1; 81 82 //Mapping Destination Field of Database Table 83 for (int i = 0; i < dataTable.Columns.Count; i++) 84 { 85 sqlBC.Columns.Add(dataTable.Columns[i].ColumnName); 86 } 87 //Write DataTable 88 sqlBC.Load(); 89 90 sqlTran.Commit(); 91 result = true; 92 } 93 catch (MySqlException mse) 94 { 95 result = false; 96 sqlTran.Rollback(); 97 throw mse; 98 } 99 finally 100 { 101 //T、T1给默认值为Null, 由系统调用GC 102 sqlBC = null; 103 sqlBulkCopy = default(T); 104 sqlTrasaction = default(T1); 105 File.Delete(tmpCsvPath); 106 } 107 return result; 108 } 109 110 private static void fillParameters(MySqlCommand mySqlCmd, string[] paramNames, object[] paramValues) 111 { 112 if (paramNames == null || paramNames.Length == 0) 113 return; 114 if (paramValues == null || paramValues.Length == 0) 115 return; 116 117 if (paramNames.Length != paramValues.Length) 118 throw new ArgumentException("The Name Count of parameters does not match its Value Count! "); 119 120 string name; 121 object value; 122 for (int i = 0; i < paramNames.Length; i++) 123 { 124 name = paramNames[i]; 125 value = paramValues[i]; 126 if (value != null) 127 mySqlCmd.Parameters.AddWithValue(name, value); 128 else 129 mySqlCmd.Parameters.AddWithValue(name, DBNull.Value); 130 } 131 } 132 }
- 数据迁移
1 private static void Main(string[] args) 2 { 3 DBCaptionModel tablesCaption = LoadDBCaption(); 4 5 Stopwatch watch = new Stopwatch(); 6 watch.Start(); 7 8 try 9 { 10 foreach (var item in tablesCaption.Tables) 11 { 12 int total = DataMigration(item); 13 } 14 //异步 15 //DataMigrationAsync(tablesCaption, 0, 0); 16 //Console.ReadKey(); 17 } 18 catch (Exception ex) 19 { 20 Console.WriteLine("迁移失败"); 21 Console.WriteLine(ex.StackTrace); 22 } 23 24 Console.WriteLine("总耗时:" + watch.ElapsedMilliseconds); 25 } 26 27 /// <summary> 28 /// 同步迁移 29 /// </summary> 30 /// <param name="model">表描述</param> 31 /// <returns>迁移记录数</returns> 32 private static int DataMigration(TableModel model) 33 { 34 Console.WriteLine(string.Format("【{0}】迁移开始", model.Caption)); 35 Stopwatch watch = new Stopwatch(); 36 watch.Start(); 37 38 TableCaptionModel tableCaption = LoadTableCaption(model.FileName); 39 40 string sql = tableCaption.ToString(); 41 Console.WriteLine(sql); 42 43 SqlParameter[] parms = 44 { 45 new SqlParameter("@StartIndex", SqlDbType.Int, 4), 46 new SqlParameter("@EndIndex", SqlDbType.Int, 4) 47 }; 48 49 int total = 0; 50 51 if (tableCaption.IsPaging) 52 { 53 //分页 54 int pageNo = 0; 55 while (true) 56 { 57 Console.WriteLine(string.Format("【{0}】当前分页:{1}", model.Caption, pageNo)); 58 59 parms[0].Value = pageNo * tableCaption.PageSize + 1; 60 parms[1].Value = (pageNo + 1) * tableCaption.PageSize; 61 int num = DataMigration(sql, parms, tableCaption.TargetTableName); 62 total += num; 63 if (num < tableCaption.PageSize) 64 { 65 break; 66 } 67 pageNo++; 68 } 69 } 70 else 71 { 72 //不分页 73 total = DataMigration(sql, parms, tableCaption.TargetTableName); 74 } 75 76 //修复数据 77 if (FixData(tableCaption) >= 0) 78 { 79 Console.WriteLine(string.Format("【{0}】数据修复完成", model.Caption)); 80 } 81 82 Console.WriteLine(string.Format("【{0}】迁移结束,耗时:{1},记录数:{2} ", model.Caption, watch.ElapsedMilliseconds, total)); 83 return total; 84 } 85 86 /// <summary> 87 /// 数据迁移 88 /// </summary> 89 /// <param name="sql"></param> 90 /// <param name="parms"></param> 91 /// <param name="tableName"></param> 92 /// <returns></returns> 93 private static int DataMigration(string sql, SqlParameter[] parms, string tableName) 94 { 95 DataTable dt = MsSqlDBHelper.ExecSql(sql, parms).Tables[0]; 96 dt.TableName = tableName; 97 MySqlDBHelper.BulkInsert(dt); 98 return dt.Rows.Count; 99 } 100 101 /// <summary> 102 /// 修复数据 103 /// </summary> 104 /// <param name="model"></param> 105 /// <returns></returns> 106 private static int FixData(TableCaptionModel model) 107 { 108 if (!string.IsNullOrWhiteSpace(model.FixSql)) 109 { 110 return MySqlDBHelper.ExecuteNonQuery(model.FixSql.Replace("@MyTable", model.TargetTableName)); 111 } 112 return -1; 113 }
- 迁移结果示例
- 数据迁移失败清空数据库脚本
1 -- 清空数据库 2 DELIMITER// 3 CREATE PROCEDURE sp_clear(IN dbname VARCHAR(128)) 4 BEGIN 5 -- 接收动态脚本 6 DECLARE v_sql VARCHAR(256); 7 8 -- 定义游标遍历时,作为判断是否遍历完全部记录的标记 9 DECLARE no_more_items INT DEFAULT 0; 10 11 -- 定义游标 12 DECLARE c_result CURSOR FOR SELECT CONCAT('TRUNCATE TABLE ',dbname,'.',TABLE_NAME,';') FROM information_schema.TABLES WHERE TABLE_SCHEMA = dbname AND TABLE_TYPE ='BASE TABLE'; 13 14 -- 声明当游标遍历完全部记录后将标志变量置成某个值 15 DECLARE CONTINUE HANDLER FOR NOT FOUND SET no_more_items = 1; 16 17 18 -- 禁用外键,外键会导致TRUNCATE TABLE语句执行失败,另:在SET FOREIGN_KEY_CHECKS之后声明变量会报错,暂不知原因 19 SET FOREIGN_KEY_CHECKS = 0; 20 21 -- 打开游标 22 OPEN c_result; 23 -- 循环开始 24 REPEAT 25 FETCH c_result INTO v_sql; 26 SET @v_sql=v_sql; 27 SELECT @v_sql; 28 29 -- 执行动态脚本 30 -- 预处理需要执行的动态SQL,其中stmt是一个变量 31 PREPARE stmt FROM @v_sql; 32 -- 执SQL语句 33 EXECUTE stmt; 34 -- 释放掉预处理段 35 DEALLOCATE PREPARE stmt; 36 37 -- 循环结束 38 UNTIL no_more_items END REPEAT; 39 -- 关闭游标 40 CLOSE c_result; 41 42 -- 恢复外键 43 SET FOREIGN_KEY_CHECKS = 1; 44 END// 45 DELIMITER ;
四、最后
特别提醒:生成CSV文件时一定要生成列名,否则会导致第一条记录主键数据异常,写demo时被这个问题坑了好久
参考链接:Mysql快速导入数据
五、补充
之前的数据迁移中,对于Sqlserver中的null迁移到mysql会变成当前字段类型的默认值,例如:int默认为值0、DateTime类型为0000-00-00 00:00:00。
原修复方案:数据迁移完成后,执行数据修复脚本fixSql,将默认值更新为null
优化方法:将DBNULL类型解析为CSV文件时,解析成N,CSV载入数据库时N会转换为NULL