参考:《opencl实战》
双调排序
一个序列:进行升序排列
6 1 4 5 7 2 3 8
a、左右两部分别 升序、降序
1 4 5 6 8 7 3 2
b 、左右度应位置比较,小的左移
1 4 3 2 8 7 5 6
c、左右都整成升序
1 2 3 4 5 6 7 8
注:四个元素如何排序
opencl 中可用代码如下:
uint4 mask1 = (uint4)(1, 0, 3, 2);
uint4 mask2 = (uint4)(2, 3, 0, 1);
uint4 mask3 = (uint4)(3, 2, 1, 0);
int4 add1 = (int4)(1, 1, 3, 3);
int4 add2 = (int4)(2, 3, 2, 3);
int4 add3 = (int4)(1, 2, 2, 3);
int4 add4 = (int4)(4, 5, 6, 7);
// float4* : input input1 input2 // dir : 0升 -1降 #define UP 0
#define DOWN -1
/* Sort elements in a vector */
#define SORT_VECTOR(input, dir)
comp = input < shuffle(input, mask1) ^ dir;
input = shuffle(input, as_uint4(comp + add1));
comp = input < shuffle(input, mask2) ^ dir;
input = shuffle(input, as_uint4(comp * 2 + add2));
comp = input < shuffle(input, mask3) ^ dir;
input = shuffle(input, as_uint4(comp + add3));
/* Sort elements between two vectors */
#define SWAP_VECTORS(input1, input2, dir)
temp = input1;
comp = (input1 < input2 ^ dir) * 4 + add4;
input1 = shuffle2(input1, input2, as_uint4(comp));
input2 = shuffle2(input2, temp, as_uint4(comp));
demo: 对8个数值排序
1 /**************************** kernel *******************************/ 2 #define UP 0 3 #define DOWN -1 4 5 /* Sort elements in a vector */ 6 #define SORT_VECTOR(input, dir) 7 comp = input < shuffle(input, mask1) ^ dir; 8 input = shuffle(input, as_uint4(comp + add1)); 9 comp = input < shuffle(input, mask2) ^ dir; 10 input = shuffle(input, as_uint4(comp * 2 + add2)); 11 comp = input < shuffle(input, mask3) ^ dir; 12 input = shuffle(input, as_uint4(comp + add3)); 13 14 /* Sort elements between two vectors */ 15 #define SWAP_VECTORS(input1, input2, dir) 16 temp = input1; 17 comp = (input1 < input2 ^ dir) * 4 + add4; 18 input1 = shuffle2(input1, input2, as_uint4(comp)); 19 input2 = shuffle2(input2, temp, as_uint4(comp)); 20 21 __kernel void bsort8(__global float4 *data, int dir) { 22 23 float4 input1, input2, temp; 24 int4 comp; 25 26 uint4 mask1 = (uint4)(1, 0, 3, 2); 27 uint4 mask2 = (uint4)(2, 3, 0, 1); 28 uint4 mask3 = (uint4)(3, 2, 1, 0); 29 30 int4 add1 = (int4)(1, 1, 3, 3); 31 int4 add2 = (int4)(2, 3, 2, 3); 32 int4 add3 = (int4)(1, 2, 2, 3); 33 int4 add4 = (int4)(4, 5, 6, 7); 34 35 input1 = data[0]; 36 input2 = data[1]; 37 38 SORT_VECTOR(input1, UP) 39 SORT_VECTOR(input2, DOWN) 40 41 SWAP_VECTORS(input1, input2, dir) 42 43 SORT_VECTOR(input1, dir) 44 SORT_VECTOR(input2, dir) 45 46 data[0] = input1; 47 data[1] = input2; 48 } 49 50 51 /************************************** 主机程序 ***************************************/ 52 #define _CRT_SECURE_NO_WARNINGS 53 #define PROGRAM_FILE "bsort8.cl" 54 #define KERNEL_FUNC "bsort8" 55 56 #define ASCENDING 0 57 #define DESCENDING -1 58 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <string.h> 62 #include <time.h> 63 64 #ifdef MAC 65 #include <OpenCL/cl.h> 66 #else 67 #include <CL/cl.h> 68 #endif 69 70 /* Find a GPU or CPU associated with the first available platform */ 71 cl_device_id create_device() { 72 73 cl_platform_id platform; 74 cl_device_id dev; 75 int err; 76 77 /* Identify a platform */ 78 err = clGetPlatformIDs(1, &platform, NULL); 79 if(err < 0) { 80 perror("Couldn't identify a platform"); 81 exit(1); 82 } 83 84 /* Access a device */ 85 err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev, NULL); 86 if(err == CL_DEVICE_NOT_FOUND) { 87 err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL); 88 } 89 if(err < 0) { 90 perror("Couldn't access any devices"); 91 exit(1); 92 } 93 94 return dev; 95 } 96 97 /* Create program from a file and compile it */ 98 cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) { 99 100 cl_program program; 101 FILE *program_handle; 102 char *program_buffer, *program_log; 103 size_t program_size, log_size; 104 int err; 105 106 /* Read program file and place content into buffer */ 107 program_handle = fopen(filename, "r"); 108 if(program_handle == NULL) { 109 perror("Couldn't find the program file"); 110 exit(1); 111 } 112 fseek(program_handle, 0, SEEK_END); 113 program_size = ftell(program_handle); 114 rewind(program_handle); 115 program_buffer = (char*)malloc(program_size + 1); 116 program_buffer[program_size] = '