• OpenCL入门


    初入OpenCL,做个记录。

    在Windows下开发OpenCL程序,必须先下载OpenCL的SDK,现在AMD,NVIDIA,Intel均提供各自的OpenCL库,基本是大同小异。安装好SDK后新建Win32控制台项目,然后需要配置下包含文件路径和库路径,具体见下图(我安装的Intel的SDK )。

    1.其中那个包含Intel的路径就是包含cl.h文件的目录。

    2.如图中那个Intel的lib目录

    3.添加需要连接的静态库OpenCL.lib

    配置完成后就可以开始写代码调试了,OpenCL的初始化还是很复杂的,和CUDA几行代码搞定完全没可比性,刚开始可能对流程不太熟悉,慢慢熟悉就好,当然也可以自己写个框架来做这些复杂的初始化工作。OpenCL的内核代码是即时编译的,代码中我为了方便没有从cl文件中读入Kernel代码,直接以字符串的形式定义了。

      1 #include "stdafx.h"
      2 
      3 #include <iostream>
      4 #include <fstream>
      5 #include <string.h>
      6 #include <vector>
      7 using namespace std;
      8 
      9 #if defined(__APPLE__) || defined(__MACOSX)
     10 #include <OpenCL/cl.hpp>
     11 #else
     12 #include <CL/cl.h>
     13 #endif
     14 
     15 #define KERNEL(...) #__VA_ARGS__
     16 
     17 #define ARRAY_X_LEN 16
     18 #define ARRAY_Y_LEN 16
     19 
     20 const char *kernelSourceCode = KERNEL(
     21     __kernel void VecAdd(__global int *buffer1, __global int *buffer2, __global int *buffer3)
     22 {
     23         size_t idx = get_global_id(0);
     24         size_t idy = get_global_id(1);
     25         int dimX = get_global_size(0);
     26         int dimY = get_global_size(1);
     27         int id = idx + idy*dimX;
     28         buffer3[id] = buffer1[id] + buffer2[id];
     29     });
     30 
     31 int main()
     32 {
     33     cl_int status = 0;
     34     size_t deviceListSize;
     35     cl_uint numPlatforms;
     36     cl_platform_id platform = NULL;
     37     status = clGetPlatformIDs(0, NULL, &numPlatforms);
     38     if (status != CL_SUCCESS)
     39     {
     40         printf("获取平台数目失败");
     41         return EXIT_FAILURE;
     42     }
     43     if (numPlatforms >0)
     44     {
     45         cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));
     46         status = clGetPlatformIDs(numPlatforms, platforms, NULL);
     47         if (status != CL_SUCCESS)
     48         {
     49             printf("初始化平台失败");
     50             return -1;
     51         }
     52         for (unsigned int i = 0; i<numPlatforms; ++i)
     53         {
     54             char *vendor = (char*)malloc(100);
     55             status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(vendor), vendor, NULL);
     56             platform = platforms[i];
     57             if (!strcmp(vendor, "NVIDIA Corporation"))
     58             {
     59                 break;
     60             }
     61         }
     62         delete platforms;
     63     }
     64     cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
     65     cl_context_properties* cprops = (NULL == platform) ? NULL : cps;
     66     cl_context context = clCreateContextFromType(cprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
     67     if (status != CL_SUCCESS)
     68     {
     69         printf("创建上下文失败");
     70         return EXIT_FAILURE;
     71     }
     72     status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize);
     73     if (status != CL_SUCCESS)
     74     {
     75         printf("获取设备数目失败");
     76         return EXIT_FAILURE;
     77     }
     78     cl_device_id *devices = (cl_device_id *)malloc(deviceListSize);
     79     if (devices == 0)
     80     {
     81         printf("为设备分配空间失败");
     82         return EXIT_FAILURE;
     83     }
     84     status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize, devices, NULL);
     85     if (status != CL_SUCCESS)
     86     {
     87         printf("初始化设备失败");
     88         return EXIT_FAILURE;
     89     }
     90 
     91     size_t sourceSize[] = { strlen(kernelSourceCode) };
     92     cl_program program = clCreateProgramWithSource(context, 1, &kernelSourceCode, sourceSize, &status);
     93     if (status != CL_SUCCESS)
     94     {
     95         printf("创建程序失败");
     96         return EXIT_FAILURE;
     97     }
     98     status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
     99     if (status != CL_SUCCESS)
    100     {
    101         printf("编译程序失败");
    102         return EXIT_FAILURE;
    103     }
    104     cl_kernel kernel = clCreateKernel(program, "VecAdd", &status);
    105     if (status != CL_SUCCESS)
    106     {
    107         printf("创建内核失败");
    108         return EXIT_FAILURE;
    109     }
    110     cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, &status);
    111     if (status != CL_SUCCESS)
    112     {
    113         printf("创建命令队列失败");
    114         return EXIT_FAILURE;
    115     }
    116     int arrayLenght = ARRAY_X_LEN*ARRAY_Y_LEN;
    117     int arraySize = arrayLenght*sizeof(int);
    118 
    119     int *hA = new int[arrayLenght];
    120     int *hB = new int[arrayLenght];
    121     int *hC = new int[arrayLenght];
    122 
    123     memset(hA, 0, arraySize);
    124     memset(hB, 0, arraySize);
    125     memset(hC, 0, arraySize);
    126 
    127     for (int i = 0; i<arrayLenght; i++)
    128     {
    129         hA[i] = i;
    130         hB[i] = i;
    131     }
    132 
    133     cl_mem dA = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, arraySize, NULL, &status);
    134     if (status != CL_SUCCESS)
    135     {
    136         printf("创建内存对象失败");
    137         return EXIT_FAILURE;
    138     }
    139     cl_mem dB = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, arraySize, NULL, &status);
    140     if (status != CL_SUCCESS)
    141     {
    142         printf("创建内存对象失败");
    143         return EXIT_FAILURE;
    144     }
    145     cl_mem dC = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, arraySize, NULL, &status);
    146     if (status != CL_SUCCESS)
    147     {
    148         printf("创建内存对象失败");
    149         return EXIT_FAILURE;
    150     }
    151     status = clEnqueueWriteBuffer(commandQueue, dA, CL_TRUE, 0, arraySize, hA, 0, NULL, NULL);
    152     if (status != CL_SUCCESS)
    153     {
    154         printf("输入值写入内存对象失败");
    155         return EXIT_FAILURE;
    156     }
    157     status = clEnqueueWriteBuffer(commandQueue, dB, CL_TRUE, 0, arraySize, hB, 0, NULL, NULL);
    158     if (status != CL_SUCCESS)
    159     {
    160         printf("输入值写入内存对象失败");
    161         return EXIT_FAILURE;
    162     }
    163     status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&dA);
    164     if (status != CL_SUCCESS)
    165     {
    166         printf("设置内核参数失败");
    167         return EXIT_FAILURE;
    168     }
    169     status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&dB);
    170     if (status != CL_SUCCESS)
    171     {
    172         printf("设置内核参数失败");
    173         return EXIT_FAILURE;
    174     }
    175     status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&dC);
    176     if (status != CL_SUCCESS)
    177     {
    178         printf("设置内核参数失败");
    179         return EXIT_FAILURE;
    180     }
    181     size_t globalThreads[] = { ARRAY_X_LEN, ARRAY_Y_LEN };
    182     size_t localThreads[] = { 4, 4 };
    183     status = clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL);
    184     if (status != CL_SUCCESS)
    185     {
    186         printf("将内核放入命令队列失败");
    187         return EXIT_FAILURE;
    188     }
    189     status = clFinish(commandQueue);
    190     if (status != CL_SUCCESS)
    191     {
    192         printf("队列还没有完成");
    193         return EXIT_FAILURE;
    194     }
    195     status = clEnqueueReadBuffer(commandQueue, dC, CL_TRUE, 0, arraySize, hC, 0, NULL, NULL);
    196     if (status != CL_SUCCESS)
    197     {
    198         printf("读内存对象失败");
    199         return EXIT_FAILURE;
    200     }
    201     printf("结果:\n");
    202     for (int i = 0; i<arrayLenght; i++)
    203     {
    204         printf("%d ", hC[i]);
    205         if ((i + 1) % ARRAY_X_LEN == 0)
    206             printf("\n");
    207     }
    208     status = clReleaseKernel(kernel);
    209     status = clReleaseProgram(program);
    210     status = clReleaseMemObject(dA);
    211     status = clReleaseMemObject(dB);
    212     status = clReleaseMemObject(dC);
    213     status = clReleaseCommandQueue(commandQueue);
    214     status = clReleaseContext(context);
    215     free(devices);
    216     delete [] hA;
    217     delete [] hB;
    218     delete [] hC;
    219     return 0;
    220 }

    运行结果:

  • 相关阅读:
    第9课
    FreeRTOS 定时器组
    FMC—扩展外部 SDRAM
    FreeRTOS 事件标志组
    第8课
    FreeRTOS 系统时钟节拍和时间管理
    第七课 线性表的顺序存储结构
    手把手教你调试Linux C++ 代码(一步到位包含静态库和动态库调试)
    Windows GUI代码与Windows消息问题调试利器
    谈谈数据挖掘和机器学习
  • 原文地址:https://www.cnblogs.com/tevic/p/3699167.html
Copyright © 2020-2023  润新知