• OpenCL 操作流程


    摘自《OpenCL 编程指南》Page 29

    HelloWorld.cpp 中的main()函数会实现或调用一组函数,完成以下操作:

    (1) 在第一个可用平台上创建OpenCL上下文

    (2)在第一个可用设备上创建命令队列  

    (3)加载一个内核文件(HelloWorld.cl)并将它构建到程序对象中

    (4)为HelloWorld.cl 中的内核函数hello_kernel()创建一个内核对象

    (5)为内核函数的参数(a,b, result)创建内存对象

    (6)将待执行的内核排队

    (7)将内核结果读回结果缓冲区

    (1)、(2) 对应:initContext(),

      clGetPlatformIDs()、clGetContextInfo()、clCreateContextFromType()、clCreateCommandQueue()

    (3)对应: initPrograms()

    clCreateProgramWithSource()、clBuildProgram()、

    (4)对应:clCreateKernel    (->kernels.push_back(QCLKernel())

    (5)在程序中创建参数对象,调用 clCreateBuffer等创建内存对象,供内核执行

    (6)clSetKernelArg()、clEnqueueNDRangeKernel()、(在数据集上分布内核)

    (7)clEnqueueReadBuffer() 从内核中读回结果

    #include <cl/cl.h>
    #include <iostream>
    #include <fstream>
    #include <sstream>
    using namespace std;
    
    const int ARRAY_SIZE = 10;
    
    cl_context createContext()
    {
        cl_int errNum;
        cl_uint numPlatforms;
        cl_platform_id firstPlatformId;
        cl_context context = NULL;
        // 1.select an OpenCL platform  to run on
        errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
        if (errNum != CL_SUCCESS || numPlatforms <= 0)
        {
            cerr << "Failed to find any OpenCL platforms." << endl;
            return NULL;
        }
        // 2. create an OpenCL context on the platform 
        cl_context_properties contextProperties[] = 
        {
            CL_CONTEXT_PLATFORM,
            (cl_context_properties) firstPlatformId,
            0
        };
        context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
        if (errNum != CL_SUCCESS)
        {
            cout << "Could not create GPU context, trying CPU..." << endl;
            context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, NULL, NULL, &errNum);
            if(errNum != CL_SUCCESS)
            {
                cerr << "Failed to create an OpenCL GPU or CPU context. ";
                return NULL;
            }
        }
        return context;
    }
    
    cl_command_queue createCommandQueue(cl_context context, cl_device_id *device)
    {
        cl_int errNum;
        cl_device_id *devices;
        cl_command_queue commandQueue = NULL;
        size_t deviceBufferSize = -1;
    
        // 1. get the size of the devices buffer
        errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
        if (errNum != CL_SUCCESS)
        {
            cerr << "Failed call to clGetContextInfo()";
            return NULL;
        }
        if (deviceBufferSize <= 0)
        {
            cerr << "No devices available.";
            return NULL;
        }
        // 2. Allocate memory for the device buffer
        devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
        errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);
        if ( errNum != CL_SUCCESS)
        {
            cerr << "Failed to get device IDs";
            return NULL;
        }
        // 3. In this example, we just choose the first available device. In a real program, you would 
        // likely use all available devices or choose the highest performance device
        commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
        if (commandQueue == NULL)
        {
            cerr << "Failed to create commandQueue for device 0";
            return NULL;
        }
        *device = devices[0];
        delete []devices;
        return commandQueue;
    }
    
    cl_program createProgram(cl_context context, cl_device_id device, const char* fileName)
    {
        cl_int errNum;
        cl_program program;
    
        ifstream kernelFile(fileName, ios::in);
        if (!kernelFile.is_open())
        {
            cerr << "Failed to open file for reading: " << fileName << endl;
            return NULL;
        }
        ostringstream oss;
        oss << kernelFile.rdbuf();
    
        string srcStdStr = oss.str();
        const char *srcStr = srcStdStr.c_str();
        program = clCreateProgramWithSource(context, 1, (const char **) &srcStr, NULL, NULL);
        if (program == NULL)
        {
            cerr << "Failed to create CL program for from source.";
            return NULL;
        }
        errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
        if (errNum != CL_SUCCESS)
        {
            // Detemine the reason for the error
            char buildLog[16384];
            clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buildLog), buildLog, NULL);
            cerr << "Error in kernel: " << endl;
            cerr << buildLog;
            clReleaseProgram(program);
            return NULL;
        }
        return program;
    
    }
    
    bool createMemObject(cl_context context, cl_mem memObjects[3], float *a, float *b)
    {
        memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * ARRAY_SIZE, a, NULL);
        memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * ARRAY_SIZE, b, NULL);
        memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * ARRAY_SIZE, NULL, NULL);
    
        if(memObjects[0] == NULL || memObjects[1] == NULL || memObjects[2] == NULL)
        {
            cerr << "Error creating memory objects." << endl;
            return false;
        }
        return true;
    }
    
    int main(int argc, char** argv)
    {
        cl_context context = 0;
        cl_command_queue commandQueue = 0;
        cl_program program = 0;
        cl_device_id device = 0;
        cl_kernel kernel = 0;
        cl_mem memObjects[3] = {0, 0, 0};
        cl_int errNum;
    
        // 1.  Context
        context = createContext();
        if (context == NULL)
        {
            cerr << "Failed to create OpenCL context." << endl;
                system("pause");
            return 1;
        }
        // 2. Create a command-queue on the first device available on the created context
        commandQueue = createCommandQueue(context, &device);
        if(commandQueue == NULL)
        {
            // Cleanup();
                system("pause");
            return 1;
        }
        // 3. create OpenCL program from HelloWorld.cl kernel source
        program = createProgram(context, device , "HelloWorld.cl");
        if (program == NULL)
        {
            //Cleanup
                system("pause");
            return 1;
        }
        // 4. Create OpenCL kernel
        kernel = clCreateKernel(program, "hello_kernel", NULL);
        if(kernel == NULL)
        {
            cerr << "Failed to create kernel " << endl;
            // Cleanup();
            system("pause");
            return 1;
        }
        // 5. Create memory objects that will be used as arguments to kernel.
        float result[ARRAY_SIZE];
        float a[ARRAY_SIZE];
        float b[ARRAY_SIZE];
        for (int i = 0; i < ARRAY_SIZE; i++)
        {
            a[i] = i;
            b[i] = i * 2;
        }
        if (!createMemObject(context, memObjects, a, b))
        {
            // Cleanup()
             system("pause");
            return 1;
        }
        // 6. set the kernel arguments (result, a, b)
        errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]);
        errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]);
        errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]);
        if (errNum != CL_SUCCESS)
        {
            cerr << "Error setting kernel arguments." << endl;
            // Cleanup()
            system("pause");
            return 1;
        }
        size_t globalWorkSize[1] = {ARRAY_SIZE};
        size_t localWorkSize[1] = {1};
        // 7. Queue the kernel up for execution across the array
        errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
        if (errNum != CL_SUCCESS)
        {
            cerr << "Erro queuing kernel for execution." << endl;
            // Cleanup()
            system("pause");
            return 1;
        }
        // 8. Read the output buffer back to the host
        errNum = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0, ARRAY_SIZE * sizeof(float), result, 0, NULL, NULL);
        if (errNum != CL_SUCCESS)
        {
            cerr << "Error reading result buffer." << endl;
            //Cleanup();
            system("pause");
            return 1;
        }
        // 9. Output the result buffer
        for (int i = 0; i < ARRAY_SIZE; i++)
        {
            cout << result[i] << " ";
        }
        cout << endl;
        cout << "Exectued successfully. " << endl;
        // Cleanup()
        system("pause");
        return 0;
    }

     注意:  clCreateBuffer()执行时只创建了内存对象(initial()), 此时需立即执行 clEnqueueNDRangeKernel() 才能完成内存中的数据a,b,result到显存的拷贝,即 这两个命令需在同一个函数区域内。否则的话,需在clCreateBuffer()之后立即使用clEnqueueWriteBuffer() (即需要read()下),手动将内容写入显存。这样可以无需立即调用clEnqueueNDRangeKernel().

  • 相关阅读:
    TypeScript02 方法特性【参数种类、参数个数】、generate方法、析构表达式、箭头表达式、循环
    TypeScript01 编译环境的搭建、字符串特性、类型特性
    Angular04 组件动态地从外部接收值、在组件中使用组件
    Angular03 将数据添加到组件中
    Angular02 通过angular-cli来搭建web前端项目
    Angular01 利用grunt搭建自动web前端开发环境、利用angular-cli搭建web前端项目
    IDEA01 创建java项目、创建web项目
    Struts2框架07 Struts2 + Spring + Mybatis 整合
    素数应用
    二重指针实现排序
  • 原文地址:https://www.cnblogs.com/wenshanzh/p/2990788.html
Copyright © 2020-2023  润新知