• 使用二维NDRange workgroup


    作为初学者一直,经过多次的上网搜索你一定会看到迈克老狼的向量加法的示例,不知道你是否和我一样,刚开始并不是很准确的知道他的add.cl写的代码的意思,源码如下:

    #pragma OPENCL EXTENSION cl_amd_printf : enable
    __kernel void vecadd(__global const float* a, __global const float* b, __global float* c)
    
    {
    
    int x = get_global_id(0);
    
    int y = get_global_id(1);
    
    int width = get_global_size(0);
    
    int height = get_global_size(1);
    
    if(x == 1 && y ==1)
    
    printf("%d, %d,%d,%d,%d,%d
    ",get_local_size(0),get_local_size(1),get_local_id(0),
    
    get_local_id(1),get_group_id(0),get_group_id(1));
    
    c[x + y * width] = a[x + y * width] + b[x + y * width];
    
    }
    
    我们在k

    这里面我把二维的这些值全部罗列出来如下:

    int idx = get_global_id(0);
    int idy = get_global_id(1);


    uint wiWidth = get_global_size(0);
    uint wiHeight = get_global_size(1);

    uint gix_t = get_group_id(0);
    uint giy_t = get_group_id(1); 

    uint num_of_blocks_x = get_num_groups(0);
    uint num_of_blocks_y = get_num_groups(1);

    uint lix = get_local_id(0);
    uint liy = get_local_id(1);

    你会奇怪为什么他的add.cl优化会这么写:

    c[x + y * width] = a[x + y * width] + b[x + y * width];

    下面进行分析:

    add.cpp关键的代码部分如下(只罗列出部分代码):

    #define width 8//256
    #define height 16//256
    #define NWITEMS  width*height//262144
    
    size_t globalThreads[] = {width, height};
    	size_t localx, localy;
    
    	localx = 2;
    	localy = 4;
    
    	size_t localThreads[] = {localx, localy};
    
    	clEnqueueNDRangeKernel( queue,
    	kernel,
    	2,
    	NULL,
    	globalThreads,
    	localThreads, 0, NULL, &ev);
    

    我自己的add.cl代码如下(里面添加啦打印,以便我们可以看出结果):

       int idx = get_global_id(0);
        int idy = get_global_id(1);
         
        uint wiWidth  = get_global_size(0);
        uint wiHeight  = get_global_size(1);
        printf("Magnum Global idx = %d, idy = %d, sizeX =%d,sizeY =%d
    ",idx,idy,wiWidth,wiHeight);
             
        uint gix_t = get_group_id(0); 
        uint giy_t = get_group_id(1);    
        uint num_of_blocks_x = get_num_groups(0); 
        uint num_of_blocks_y = get_num_groups(1); 
    
        printf("Magnum Group idx = %d, idy = %d, blockX=%d,blockY=%d
    ",gix_t,gix_t,num_of_blocks_x,num_of_blocks_y);
    
        uint lix = get_local_id(0); 
        uint liy = get_local_id(1);
    
        uint LocalX = get_local_size(0);
        uint LocalY = get_local_size(1);
    
        printf("Magnum Local idx = %d, idy = %d, localX=%d,localY=%d
    
    ",lix,liy,LocalX,LocalY);

    输出的结果如下:

    Magnum Global idx = 0, idy = 0, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 0, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 1, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 1, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 2, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 2, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 3, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 3, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 0, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 0, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 1, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 1, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 2, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 2, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 3, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 3, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 0, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 0, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 1, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 1, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 2, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 2, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 3, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 3, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 0, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 0, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 1, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 1, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 2, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 2, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 3, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 3, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 4, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 4, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 5, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 5, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 6, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 6, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 7, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 7, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 4, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 4, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 5, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 5, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 6, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 6, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 7, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 7, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 4, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 4, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 5, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 5, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 6, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 6, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 7, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 7, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 4, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 4, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 5, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 5, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 6, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 6, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 7, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 7, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 8, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 8, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 9, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 9, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 10, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 10, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 11, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 11, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 8, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 8, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 9, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 9, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 10, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 10, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 11, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 11, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 8, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 8, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 9, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 9, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 10, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 10, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 11, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 11, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 8, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 8, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 9, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 9, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 10, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 10, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 11, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 11, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 12, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 12, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 13, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 13, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 14, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 14, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 0, idy = 15, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 1, idy = 15, sizeX =8,sizeY =16
    Magnum Group idx = 0, idy = 0, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 12, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 12, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 13, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 13, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 14, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 14, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 2, idy = 15, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 3, idy = 15, sizeX =8,sizeY =16
    Magnum Group idx = 1, idy = 1, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 12, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 12, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 13, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 13, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 14, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 14, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 4, idy = 15, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 5, idy = 15, sizeX =8,sizeY =16
    Magnum Group idx = 2, idy = 2, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 12, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 12, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 0, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 13, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 13, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 1, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 14, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 14, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 2, localX=2,localY=4
    
    Magnum Global idx = 6, idy = 15, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 0, idy = 3, localX=2,localY=4
    
    Magnum Global idx = 7, idy = 15, sizeX =8,sizeY =16
    Magnum Group idx = 3, idy = 3, blockX=4,blockY=4
    Magnum Local idx = 1, idy = 3, localX=2,localY=4
    View Code

    从从这个结果首先能说明几个问题:

    首先我们在:

    1. clEnqueueNDRangeKernel( queue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, &ev); 设置维度是2,否则add.cl中所有类似get_local_size(1)将只会返回一。
    2. 设置GlobaThreads 和 LocalThreads 不一定是 GlobalThreads= {8, 8}; 也可以不相等,LocalThreads也是一样。
    3. 理解opencl中的内存分布可以理解为二维数组,而且内存寻址的方向就像是:左上角(0,0)为原点,一行一行的扫描下去。
    4. WorkGroup的大小,目前我认为X,Y可以分开来理解(后面若发现错误我会及时改正) WorkGroup number(x) = GlobalThread(x)/LocalThread(x).
    5. 最小的单位是LocalX * LocalY, 然后用总的GlobalThreads按照这个单位进行分割成一个一个的组。每个二维的小单位还是一行一行的扫描过去的。目前我们的这个是8*16,你会发现总的Thread也是8*16,对应的每一个线程执行一次加法操作,有人会问可不可以不用这么多呢,用4*16个Threads,每个线程里面做两次加法动作,其实是可以的,这个原则是根据你的具体的Device来说的,当前我们说的是Device是GPU,比如你的GPU当前支持最大的线程数是1024*1024,这种情况,你不用多的,而是用少的线程来做,会降低你的效率,你需要尽可能的让Device满负荷工作这样才能达到提高运算速度。

    继续分析为什么 NDRange改成二维的之后,add.cl变成:

    c[x + y * width] = a[x + y * width] + b[x + y * width];

    因为在CPU端每个 C[8*16],通过打印可以在这里C[x+y*width],x 最大是7,width是8,y最大是17,只有这样才能做完所有的向量相加。

  • 相关阅读:
    hdu_6836 Expectation
    hdu_6820 Tree
    luogu P1039 侦探推理
    自己动手实现区块链
    第六章 钱包管理界面和区块链浏览器
    第五章 自己动手写比特币之交易中继
    第四章 自己动手写比特币之钱包
    第三章 自己动手写区块链之交易
    第二章 工作量证明和挖矿
    第一章:最小可行区块链
  • 原文地址:https://www.cnblogs.com/biglucky/p/3757214.html
Copyright © 2020-2023  润新知