注:本文思路已经应用于“飞梭TXT文本数据处理百宝箱”中,有兴趣的朋友可以下载试用。
我们知道,如果需要批量生成海量无重复的随机数据是个很麻烦的事情。如果每次生成都要和之前生成的数据进行比对的话,那效率会随着数据量的增大而越来越低,最后不堪忍受。今天介绍一种另类的思路,可以高效的做到随机数据无重复。
分析:所谓随机数据,就是在某个设定的区间内随机提取一批数据出来。那么我们可以变通的思考一下:我们可不可以将此设定的数据区间A按照所需的数据量N分成N个小的数据区间B,如果这样的话,我们只需要每次从数据区间B中取一个随机值,并且不需要验证是否重复,就可以很容易的得到N个唯一的数据。而这些数据集也是随机的,只不过是稍微带有一定的均布特性。
这里有个问题,按照以上的思路取出的数据虽然是随机的,但是还是按照从小到大的顺序排列,这是个遗憾。解决办法:获取到随机数据集合后,使用洗牌算法对数据进行打乱处理。
实现代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace DataGenerator.DataObject
{
public class RandomData : DynamicDataBase
{
private Random m_ro;
private List<string> m_codes;
private long m_lastNum;
private long m_span;
private char m_padStr;
public override string GetDataByIndex(int index)
{
return m_codes[index];
}
public override void Reset(int count)
{
//初始化m_fromNum;根据生成数量设置m_span值
try
{
//m_fromNum = 0;
//char[] cs = FromStr.ToArray();
//int idx;
//for (int i = 0; i < cs.Length; i++)
//{
// idx = CharList.IndexOf(cs[i]);
// m_fromNum += Convert.ToInt32(idx * Math.Pow(CharList.Length, cs.Length - i - 1));
//}
//m_lastNum = m_fromNum - 1;
m_lastNum = -1L;
//测试最后一个生成的最小数值是否越界
int idx = count - 1;
if (idx.ToString().Length > Length)
throw new Exception("数量超出长度界限");
//该数据长度下的最大值
long maxV = Math.Pow(CharList.Length, Length) - 1 > long.MaxValue ? long.MaxValue : (long)(Math.Pow(CharList.Length, Length) - 1);
if (maxV < idx)
throw new Exception();
//m_span = (maxV - idx) / count;
//数据余量初始化
m_span = maxV - idx;
//补齐字符初始化
m_padStr = CharList.ToCharArray()[0];
//初始化数据集合
m_codes = null;
}
catch
{
throw new Exception("随机码起始值或生成数量越界!尝试调整生成数量、数据长度、起始字符串等设置后再试。");
}
}
public void PrepareCodes(int count)
{
//m_codes = new List<string>(count);
string[] source = new string[count];
for (int i = 0; i < count; i++)
{
source[i] = GenerateCode(i, count);
}
m_codes = new List<string>(Shuffle<string>(source));
source = null;
}
private IEnumerable<T> Shuffle<T>(IEnumerable<T> source)
{
m_ro = new Random();
T[] elements = source.ToArray();
for (int i = elements.Length - 1; i > 0; i--)
{
int swapIndex = m_ro.Next(i + 1);
yield return elements[swapIndex];
elements[swapIndex] = elements[i];
}
yield return elements[0];
}
private string GenerateCode(int index, int count)
{
long num2 = DateTime.Now.Ticks + index;
m_ro = new Random(((int)(((ulong)num2) & 0xffffffffL)) | ((int)(num2 >> index)));
//本次可以使用的最大余量
long s = m_span / (count - index);
//本次使用的余量
s = m_ro.Next(s + 1 > int.MaxValue ? int.MaxValue : (int)(s + 1));
//余量减少
m_span -= s;
//记录最近一次使用的数值
m_lastNum += 1 + s;
long position = m_lastNum;
StringBuilder sb = new StringBuilder();
int yushu = 0;
while (position >= CharList.Length)
{
yushu = (int)(position % CharList.Length);
sb.Append(CharList.Substring(yushu, 1));
position = (position - yushu) / CharList.Length;
}
sb.Append(CharList.Substring((int)position, 1));
return (new string(sb.ToString().Reverse<char>().ToArray())).PadLeft(Length, m_padStr);
}
public void Dispose()
{
if(m_ro != null)
m_ro = null;
if (m_codes != null)
m_codes = null;
}
public override string ToString()
{
return string.Format("<字符集:{0},长度:{1},起始:{2}>", CharList, Length, FromStr);
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace DataGenerator.DataObject
{
public class RandomData : DynamicDataBase
{
private Random m_ro;
private List<string> m_codes;
private long m_lastNum;
private long m_span;
private char m_padStr;
public override string GetDataByIndex(int index)
{
return m_codes[index];
}
public override void Reset(int count)
{
//初始化m_fromNum;根据生成数量设置m_span值
try
{
//m_fromNum = 0;
//char[] cs = FromStr.ToArray();
//int idx;
//for (int i = 0; i < cs.Length; i++)
//{
// idx = CharList.IndexOf(cs[i]);
// m_fromNum += Convert.ToInt32(idx * Math.Pow(CharList.Length, cs.Length - i - 1));
//}
//m_lastNum = m_fromNum - 1;
m_lastNum = -1L;
//测试最后一个生成的最小数值是否越界
int idx = count - 1;
if (idx.ToString().Length > Length)
throw new Exception("数量超出长度界限");
//该数据长度下的最大值
long maxV = Math.Pow(CharList.Length, Length) - 1 > long.MaxValue ? long.MaxValue : (long)(Math.Pow(CharList.Length, Length) - 1);
if (maxV < idx)
throw new Exception();
//m_span = (maxV - idx) / count;
//数据余量初始化
m_span = maxV - idx;
//补齐字符初始化
m_padStr = CharList.ToCharArray()[0];
//初始化数据集合
m_codes = null;
}
catch
{
throw new Exception("随机码起始值或生成数量越界!尝试调整生成数量、数据长度、起始字符串等设置后再试。");
}
}
public void PrepareCodes(int count)
{
//m_codes = new List<string>(count);
string[] source = new string[count];
for (int i = 0; i < count; i++)
{
source[i] = GenerateCode(i, count);
}
m_codes = new List<string>(Shuffle<string>(source));
source = null;
}
private IEnumerable<T> Shuffle<T>(IEnumerable<T> source)
{
m_ro = new Random();
T[] elements = source.ToArray();
for (int i = elements.Length - 1; i > 0; i--)
{
int swapIndex = m_ro.Next(i + 1);
yield return elements[swapIndex];
elements[swapIndex] = elements[i];
}
yield return elements[0];
}
private string GenerateCode(int index, int count)
{
long num2 = DateTime.Now.Ticks + index;
m_ro = new Random(((int)(((ulong)num2) & 0xffffffffL)) | ((int)(num2 >> index)));
//本次可以使用的最大余量
long s = m_span / (count - index);
//本次使用的余量
s = m_ro.Next(s + 1 > int.MaxValue ? int.MaxValue : (int)(s + 1));
//余量减少
m_span -= s;
//记录最近一次使用的数值
m_lastNum += 1 + s;
long position = m_lastNum;
StringBuilder sb = new StringBuilder();
int yushu = 0;
while (position >= CharList.Length)
{
yushu = (int)(position % CharList.Length);
sb.Append(CharList.Substring(yushu, 1));
position = (position - yushu) / CharList.Length;
}
sb.Append(CharList.Substring((int)position, 1));
return (new string(sb.ToString().Reverse<char>().ToArray())).PadLeft(Length, m_padStr);
}
public void Dispose()
{
if(m_ro != null)
m_ro = null;
if (m_codes != null)
m_codes = null;
}
public override string ToString()
{
return string.Format("<字符集:{0},长度:{1},起始:{2}>", CharList, Length, FromStr);
}
}
}