在工程实际中,经常需要将python代码转化成c++代码,为了获得一样的结果,需要保证算法的一致性。最近在目标检测的算法中,发现python默认排序算法为改进版的快速排序,描述如下:
* Quick sort is usually the fastest, but the worst case scenario is O(N^2) so
* the code switches to the O(NlogN) worst case heapsort if not enough progress
* is made on the large side of the two quicksort partitions. This improves the
* worst case while still retaining the speed of quicksort for the common case.
* This is variant known as introsort.
*
*
* def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2):
* # sort remainder with heapsort if we are not making enough progress
* # we arbitrarily choose 2 * log(n) as the cutoff point
* if recursion_limit < 0:
* heapsort(lower, higher)
* return
*
* if lower < higher:
* pivot_pos = partition(lower, higher)
* # recurse into smaller first and leave larger on stack
* # this limits the required stack space
* if (pivot_pos - lower > higher - pivot_pos):
* quicksort(pivot_pos + 1, higher, recursion_limit - 1)
* quicksort(lower, pivot_pos, recursion_limit - 1)
* else:
* quicksort(lower, pivot_pos, recursion_limit - 1)
* quicksort(pivot_pos + 1, higher, recursion_limit - 1)
*
*
* the below code implements this converted to an iteration and as an
* additional minor optimization skips the recursion depth checking on the
* smaller partition as it is always less than half of the remaining data and
* will thus terminate fast enough
从上面的内容可以看到,改进的算法结合了快速排序和堆排序,如果我们不按照这个算法去实现c++版的排序算法,很难保证结果一致性。
以下是c++代码,复现了python语言numpy库中的argsort的快速排序算法
#ifndef __myself__sort__h__h__ #define __myself__sort__h__h__ #define PYA_QS_STACK 100 #define SMALL_QUICKSORT 15 #define SMALL_MERGESORT 20 template<typename T> void TYPE_SWAP(T* a, T* b) { T t = *a; *a = *b; *b = t; } template<typename T> bool TYPE_LT(T a, T b) { return a<b; } int npy_get_msb(int n) { int k; for(k=0;n>1;n>>=1) ++k; return k; } template<typename T> int heap_sort(T *start, int n) { T tmp, *a; int i,j,l; /* The array needs to be offset by one for heapsort indexing */ a = start - 1; //先建立大顶堆 for (l = n>>1; l > 0; --l) { tmp = a[l]; for (i = l, j = l<<1; j <= n;) { //因为假设根结点的序号是1,所以i结点左孩子和右孩子分别为2j和2i+1 if (j < n && TYPE_LT(a[j], a[j+1])) //左右孩子的比较 { j += 1;//j为较大的记录的下标 } if (TYPE_LT(tmp, a[j])) { //将孩子结点上位,则以孩子结点的位置进行下一轮的筛选 a[i] = a[j]; i = j; j += j; } else { break; } } a[i] = tmp;//插入最开始不和谐的元素 } //进行排序 for (; n > 1;) { //最后一个元素和第一元素进行交换 tmp = a[n]; a[n] = a[1]; n -= 1; //然后将剩下的无序元素继续调整为大顶堆 for (i = 1, j = 2; j <= n;) { if (j < n && TYPE_LT(a[j], a[j+1])) { j++; } if (TYPE_LT(tmp, a[j])) { a[i] = a[j]; i = j; j += j; } else { break; } } a[i] = tmp; } return 0; } template<typename T> int quick_sort(T *start, int num) { T vp; T *pl = start; T *pr = pl + num - 1; T *stack[PYA_QS_STACK]; T **sptr = stack; T *pm, *pi, *pj, *pk; int depth[PYA_QS_STACK]; int * psdepth = depth; int cdepth = npy_get_msb(num) * 2; for (;;) { if (cdepth < 0) { heap_sort(pl, pr - pl + 1); goto stack_pop; } while ((pr - pl) > SMALL_QUICKSORT) { /* quicksort partition */ pm = pl + ((pr - pl) >> 1); if (TYPE_LT(*pm, *pl)) TYPE_SWAP(pm, pl); if (TYPE_LT(*pr, *pm)) TYPE_SWAP(pr, pm); if (TYPE_LT(*pm, *pl)) TYPE_SWAP(pm, pl); vp = *pm; pi = pl; pj = pr - 1; TYPE_SWAP(pm, pj); for (;;) { do ++pi; while (TYPE_LT(*pi, vp)); do --pj; while (TYPE_LT(vp, *pj)); if (pi >= pj) { break; } TYPE_SWAP(pi,pj); } pk = pr - 1; TYPE_SWAP(pi, pk); /* push largest partition on stack */ if (pi - pl < pr - pi) { *sptr++ = pi + 1; *sptr++ = pr; pr = pi - 1; } else { *sptr++ = pl; *sptr++ = pi - 1; pl = pi + 1; } *psdepth++ = --cdepth; } /* insertion sort */ for (pi = pl + 1; pi <= pr; ++pi) { vp = *pi; pj = pi; pk = pi - 1; while (pj > pl && TYPE_LT(vp, *pk)) { *pj-- = *pk--; } *pj = vp; } stack_pop: if (sptr == stack) { break; } pr = *(--sptr); pl = *(--sptr); cdepth = *(--psdepth); } return 0; } #endif
使用方法:
#include <iostream> #include <fstream> #include <cstdlib> #include <vector> #include <map> #include "mysort.h" using namespace std; typedef struct tagELEM { double value; int index; bool operator < (const tagELEM& e) { return value<e.value; } }ELEM, *PELEM; int main(int argc, char **argv) { fstream fdata("data.txt", std::ios::in); if(!fdata.is_open()) return -1; fstream fresult("result.txt", std::ios::in); if(!fresult.is_open()) return -1; char data_buffer[255] ={0}; char result_buffer[255] ={0}; multimap<double, int> mapScore; vector<int> vecIdx; int idx = 0; ELEM elems[15280]; while(!fresult.eof()) { fdata.getline(data_buffer, 255); fresult.getline(result_buffer, 255); if(data_buffer[0]==0) continue; elems[idx].index = idx; elems[idx].value = atof(data_buffer); idx++; vecIdx.push_back(atoi(result_buffer)); } quick_sort(elems, 15280);// 0, 15279);//, 15280); for(int i=0; i<15280; i++) { if(vecIdx[i] != elems[i].index) { std::cout<<i<<" "<<elems[i].index<<" "<<vecIdx[i]<<" ---------------------"<<elems[i].value<<std::endl; } else { std::cout<<i<<" "<<elems[i].index<<" "<<vecIdx[i]<<" "<<elems[i].value<<std::endl; } } fdata.close(); fresult.close(); return 0; }
data.txt为保存score的文件,result.tx保存了python排序结果
参考:
堆排序:http://www.cnblogs.com/mengdd/archive/2012/11/30/2796845.html
快速排序:http://blog.csdn.net/morewindows/article/details/6684558
python排序代码:https://github.com/numpy/numpy/tree/master/numpy/core/src/npysort