一、谁能把这个程序的性能提升一倍?---并行排序算法
http://www.cnblogs.com/onlytiancai/archive/2009/04/21/1440829.html
如下,一组4元矢量的排序,如何把排序时间缩减一半?可以用并行算法。
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace Vector4Test
{
public class Vector
{
public double W;
public double X;
public double Y;
public double Z;
public double T;
}
internal class VectorComparer : IComparer<Vector>
{
public int Compare(Vector c1, Vector c2)
{
if (c1 == null || c2 == null)
throw new ArgumentNullException("Both objects must not be null");
double x = Math.Sqrt(Math.Pow(c1.X, 2)
+ Math.Pow(c1.Y, 2)
+ Math.Pow(c1.Z, 2)
+ Math.Pow(c1.W, 2));
double y = Math.Sqrt(Math.Pow(c2.X, 2)
+ Math.Pow(c2.Y, 2)
+ Math.Pow(c2.Z, 2)
+ Math.Pow(c2.W, 2));
if (x > y)
return 1;
else if (x < y)
return -1;
else
return 0;
}
}
internal class VectorComparer2 : IComparer<Vector> {
public int Compare(Vector c1, Vector c2) {
if (c1 == null || c2 == null)
throw new ArgumentNullException("Both objects must not be null");
if (c1.T > c2.T)
return 1;
else if (c1.T < c2.T)
return -1;
else
return 0;
}
}
internal class Program
{
private static void Main(string[] args)
{
Vector[] vectors = GetVectors();
var watch1 = new Stopwatch();
watch1.Start();
A(vectors);
watch1.Stop();
Console.WriteLine("A sort time: " + watch1.Elapsed);
vectors = GetVectors();
watch1.Reset();
watch1.Start();
B(vectors);
watch1.Stop();
Console.WriteLine("B sort time: " + watch1.Elapsed);
vectors = GetVectors();
watch1.Reset();
watch1.Start();
C(vectors);
watch1.Stop();
Console.WriteLine("C sort time: " + watch1.Elapsed);
Console.ReadKey();
}
private static Vector[] GetVectors()
{
int n = 1 << 15;
var vectors = new Vector[n];
var random = new Random();
for (int i = 0; i < n; i++)
{
vectors[i] = new Vector();
vectors[i].X = random.NextDouble();
vectors[i].Y = random.NextDouble();
vectors[i].Z = random.NextDouble();
vectors[i].W = random.NextDouble();
}
return vectors;
}
private static void A(Vector[] vectors)
{
Array.Sort(vectors, new VectorComparer());
}
private static void B(Vector[] vectors) {
int n = vectors.Length;
for (int i = 0; i < n; i++)
{
Vector c1 = vectors[i];
c1.T = Math.Sqrt(Math.Pow(c1.X, 2)
+ Math.Pow(c1.Y, 2)
+ Math.Pow(c1.Z, 2)
+ Math.Pow(c1.W, 2));
}
Array.Sort(vectors,new VectorComparer2());
}
private static void C(Vector[] vectors) {
int n = vectors.Length;
for (int i = 0; i < n; i++) {
Vector c1 = vectors[i];
c1.T = Math.Sqrt(c1.X * c1.X
+ c1.Y * c1.Y
+ c1.Z * c1.Z
+ c1.W * c1.W);
}
Array.Sort(vectors, new VectorComparer2());
}
}
}
我晕,刚开始我用的算法A,后来又写了个算法B,我还没用并行算法呢,一看B方法比A方法时间缩短了差不多两个数量级,如下
A sort time: 00:00:00.5346475
B sort time: 00:00:00.0169736
太奇怪了也,难道我的B算法二级缓存命中率比较高?谁能再把我的B方法消耗时间再降低一半,可以用任何语言,Vector类等也可以用自己的数据类型,比如结构啦,四维数组啥的,随意,只要是四元的矢量,每个分量是随机生成的,然后每个矢量的长度是根号下每个分量的平方和,满足这个条件就行。
modify by wawa at 2009-04-22 06:42
应大家回帖要求,
1、把随机数种子初始的语句放到了循环外面
2、每次执行排序重新获取新的乱序Vector
3、把B方法直接对计算出来的double[]排序换成了对对vector[]的排序,因为之前的代码实际上没有对vector[]排序
4、把Vector类增加了值T,用来保存该Vector的长度。
我这里结果如下
A sort time: 00:00:00.6661531
B sort time: 00:00:00.0423115
C sort time: 00:00:00.0302426
——————————————————————————————————————————————————————
————————————————————————————————————————————————————-————
二、并行排序算法
Author:Eaglet
今天早晨看到 蛙蛙池塘 的这篇博客 谁能把这个程序的性能提升一倍?---并行排序算法 。促使我写了一个并行排序算法,这个排序算法充分利用多核CPU进行并行计算,从而提高排序的效率。
先简单说一下蛙蛙池塘 给的A,B,C 三种算法(见上面引用的那篇博客),A算法将耗时的平方和开平方计算放到比较函数中,导致Array.Sort 时,每次亮亮比较都要执行平方和开平方计算,其平均算法复杂度为 O(nlog2n) 。 而B 将平方和开平方计算提取出来,算法复杂度降低到 O(n) ,这也就是为什么B比A效率要高很多的缘故。C 和 B 相比,将平方函数替换成了 x*x ,由于少了远程函数调用和Pow函数本身的开销,效率有提高了不少。我在C的基础上编写了D算法,D算法采用并行计算技术,在我的双核笔记本电脑上数据量比较大的情况下,其排序效率较C要提高30%左右。
下面重点介绍这个并行排序算法。算法思路其实很简单,就是将要排序的数组按照处理器数量等分成若干段,然后用和处理器数量等同的线程并行对各个小段进行排序,排序结束和,再在单一线程中对这若干个已经排序的小段进行归并排序,最后输出完整的排序结果。考虑到和.Net 2.0 兼容,我没有用微软提供的并行库,而是用多线程来实现。
下面是测试结果:
n |
A |
B |
C |
D |
32768 |
0.7345 |
0.04122 |
0.0216 |
0.0254 |
65535 |
1.5464 |
0.08863 |
0.05139 |
0.05149 |
131072 |
3.2706 |
0.1858 |
0.118 |
0.108 |
262144 |
6.8423 |
0.4056 |
0.29586 |
0.21849 |
524288 |
15.0342 |
0.9689 |
0.7318 |
0.4906 |
1048576 |
31.6312 |
1.9978 |
1.4646 |
1.074 |
2097152 |
66.9134 |
4.1763 |
3.0828 |
2.3095 |
从测试结果上看,当要排序的数组长度较短时,并行排序的效率甚至还没有不进行并行排序高,这主要是多线程的开销造成的。当数组长度增大到25万以上时,并行排序的优势开始体现出来,随着数组长度的增长,排序时间最后基本稳定在但线程排序时间的 74% 左右,其中并行排序的消耗大概在50%左右,归并排序的消耗在 14%左右。由此也可以推断,如果在4CPU的机器上,其排序时间最多可以减少到单线程的 14 + 25 = 39%。8 CPU 为 14 + 12.5 = 26.5%
目前这个算法在归并算法上可能还有提高的余地,如果哪位高手能够进一步提高这个算法,不妨贴出来一起交流交流。
下面分别给出并行排序和归并排序的代码:
并行排序类 ParallelSort
Paralletsort 类是一个通用的泛型,调用起来非常简单,下面给一个简单的int型数组的排序示例:
class IntComparer : IComparer<int>
{
IComparer Members
}
public void SortInt(int[] array)
{
Sort.ParallelSort<int> parallelSort = new Sort.ParallelSort<int>();
parallelSort.Sort(array, new IntComparer());
}
只要实现一个T类型两两比较的接口,然后调用ParallelSort 的 Sort 方法就可以了,是不是很简单?
下面是 ParallelSort类的代码
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
namespace Sort
{
/// <summary>
/// ParallelSort
/// </summary>
/// <typeparam name="T"></typeparam>
public class ParallelSort<T>
{
enum Status
{
Idle = 0,
Running = 1,
Finish = 2,
}
class ParallelEntity
{
public Status Status;
public T[] Array;
public IComparer<T> Comparer;
public ParallelEntity(Status status, T[] array, IComparer<T> comparer)
{
Status = status;
Array = array;
Comparer = comparer;
}
}
private void ThreadProc(Object stateInfo)
{
ParallelEntity pe = stateInfo as ParallelEntity;
lock (pe)
{
pe.Status = ParallelSort<T>.Status.Running;
Array.Sort(pe.Array, pe.Comparer);
pe.Status = ParallelSort<T>.Status.Finish;
}
}
public void Sort(T[] array, IComparer<T> comparer)
{
//Calculate process count
int processorCount = Environment.ProcessorCount;
//If array.Length too short, do not use Parallel sort
if (processorCount == 1 || array.Length < processorCount)
{
Array.Sort(array, comparer);
return;
}
//Split array
ParallelEntity[] partArray = new ParallelEntity[processorCount];
int remain = array.Length;
int partLen = array.Length / processorCount;
//Copy data to splited array
for (int i = 0; i < processorCount; i++)
{
if (i == processorCount - 1)
{
partArray[i] = new ParallelEntity(Status.Idle, new T[remain], comparer);
}
else
{
partArray[i] = new ParallelEntity(Status.Idle, new T[partLen], comparer);
remain -= partLen;
}
Array.Copy(array, i * partLen, partArray[i].Array, 0, partArray[i].Array.Length);
}
//Parallel sort
for (int i = 0; i < processorCount - 1; i++)
{
ThreadPool.QueueUserWorkItem(new WaitCallback(ThreadProc), partArray[i]);
}
ThreadProc(partArray[processorCount - 1]);
//Wait all threads finish
for (int i = 0; i < processorCount; i++)
{
while (true)
{
lock (partArray[i])
{
if (partArray[i].Status == ParallelSort<T>.Status.Finish)
{
break;
}
}
Thread.Sleep(0);
}
}
//Merge sort
MergeSort<T> mergeSort = new MergeSort<T>();
List<T[]> source = new List<T[]>(processorCount);
foreach (ParallelEntity pe in partArray)
{
source.Add(pe.Array);
}
mergeSort.Sort(array, source, comparer);
}
}
}
多路归并排序类 MergeSort
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Sort
{
/// <summary>
/// MergeSort
/// </summary>
/// <typeparam name="T"></typeparam>
public class MergeSort<T>
{
public void Sort(T[] destArray, List<T[]> source, IComparer<T> comparer)
{
//Merge Sort
int[] mergePoint = new int[source.Count];
for (int i = 0; i < source.Count; i++)
{
mergePoint[i] = 0;
}
int index = 0;
while (index < destArray.Length)
{
int min = -1;
for (int i = 0; i < source.Count; i++)
{
if (mergePoint[i] >= source[i].Length)
{
continue;
}
if (min < 0)
{
min = i;
}
else
{
if (comparer.Compare(source[i][mergePoint[i]], source[min][mergePoint[min]]) < 0)
{
min = i;
}
}
}
if (min < 0)
{
continue;
}
destArray[index++] = source[min][mergePoint[min]];
mergePoint[min]++;
}
}
}
}
主函数及测试代码 在蛙蛙池塘代码基础上修改
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace Vector4Test
{
public class Vector
{
public double W;
public double X;
public double Y;
public double Z;
public double T;
}
internal class VectorComparer : IComparer<Vector>
{
public int Compare(Vector c1, Vector c2)
{
if (c1 == null || c2 == null)
throw new ArgumentNullException("Both objects must not be null");
double x = Math.Sqrt(Math.Pow(c1.X, 2)
+ Math.Pow(c1.Y, 2)
+ Math.Pow(c1.Z, 2)
+ Math.Pow(c1.W, 2));
double y = Math.Sqrt(Math.Pow(c2.X, 2)
+ Math.Pow(c2.Y, 2)
+ Math.Pow(c2.Z, 2)
+ Math.Pow(c2.W, 2));
if (x > y)
return 1;
else if (x < y)
return -1;
else
return 0;
}
}
internal class VectorComparer2 : IComparer<Vector>
{
public int Compare(Vector c1, Vector c2)
{
if (c1 == null || c2 == null)
throw new ArgumentNullException("Both objects must not be null");
if (c1.T > c2.T)
return 1;
else if (c1.T < c2.T)
return -1;
else
return 0;
}
}
internal class Program
{
private static void Print(Vector[] vectors)
{
//foreach (Vector v in vectors)
//{
// Console.WriteLine(v.T);
//}
}
private static void Main(string[] args)
{
Vector[] vectors = GetVectors();
Console.WriteLine(string.Format("n = {0}", vectors.Length));
Stopwatch watch1 = new Stopwatch();
watch1.Start();
A(vectors);
watch1.Stop();
Console.WriteLine("A sort time: " + watch1.Elapsed);
Print(vectors);
vectors = GetVectors();
watch1.Reset();
watch1.Start();
B(vectors);
watch1.Stop();
Console.WriteLine("B sort time: " + watch1.Elapsed);
Print(vectors);
vectors = GetVectors();
watch1.Reset();
watch1.Start();
C(vectors);
watch1.Stop();
Console.WriteLine("C sort time: " + watch1.Elapsed);
Print(vectors);
vectors = GetVectors();
watch1.Reset();
watch1.Start();
D(vectors);
watch1.Stop();
Console.WriteLine("D sort time: " + watch1.Elapsed);
Print(vectors);
Console.ReadKey();
}
private static Vector[] GetVectors()
{
int n = 1 << 21;
Vector[] vectors = new Vector[n];
Random random = new Random();
for (int i = 0; i < n; i++)
{
vectors[i] = new Vector();
vectors[i].X = random.NextDouble();
vectors[i].Y = random.NextDouble();
vectors[i].Z = random.NextDouble();
vectors[i].W = random.NextDouble();
}
return vectors;
}
private static void A(Vector[] vectors)
{
Array.Sort(vectors, new VectorComparer());
}
private static void B(Vector[] vectors)
{
int n = vectors.Length;
for (int i = 0; i < n; i++)
{
Vector c1 = vectors[i];
c1.T = Math.Sqrt(Math.Pow(c1.X, 2)
+ Math.Pow(c1.Y, 2)
+ Math.Pow(c1.Z, 2)
+ Math.Pow(c1.W, 2));
}
Array.Sort(vectors, new VectorComparer2());
}
private static void C(Vector[] vectors)
{
int n = vectors.Length;
for (int i = 0; i < n; i++)
{
Vector c1 = vectors[i];
c1.T = Math.Sqrt(c1.X * c1.X
+ c1.Y * c1.Y
+ c1.Z * c1.Z
+ c1.W * c1.W);
}
Array.Sort(vectors, new VectorComparer2());
}
private static void D(Vector[] vectors)
{
int n = vectors.Length;
for (int i = 0; i < n; i++)
{
Vector c1 = vectors[i];
c1.T = Math.Sqrt(c1.X * c1.X
+ c1.Y * c1.Y
+ c1.Z * c1.Z
+ c1.W * c1.W);
}
Sort.ParallelSort<Vector> parallelSort = new Sort.ParallelSort<Vector>();
parallelSort.Sort(vectors, new VectorComparer2());
}
}
}