• 如何使用OpenCL编写程序


    代码功能为计算2个数相加

    #include <CL/CL.h>
    #include <iostream>

    #pragma comment( lib, "opencl.lib" )

    int main(int argc, char** argv)
    {
        cl_context hContext;
        cl_context_properties prop = CL_CONTEXT_PLATFORM;
        cl_platform_id platform;
        clGetPlatformIDs( 1, &platform, NULL );
        cl_device_id device;
        clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL );
        cl_int err;
        hContext = clCreateContext( NULL, 1, &device, NULL, NULL , &err );
        size_t nCtxDescSize;
        clGetContextInfo( hContext, CL_CONTEXT_DEVICES, 0, 0, &nCtxDescSize );
        cl_device_id* pDevices = (cl_device_id*)new char[nCtxDescSize];
        clGetContextInfo( hContext, CL_CONTEXT_DEVICES, nCtxDescSize, pDevices, NULL );

        const char* sources =
        {
            "__kernel void VectorAdd(__global const float* a, __global const float* b, __global float* c)\n"
            "{\n"
            "    // get index into global data array\n"
            "    int iGID = get_global_id(0);\n"
            "\n"
            "    // bound check (equivalent to the limit on a 'for' loop for standard/serial C code\n"
            "    if (iGID >= 1)\n"
            "    {   \n"
            "        return; \n"
            "    }\n"
            "    c[iGID] = a[iGID] + b[iGID];\n"
            "}\n"
        };

        const char** sSources = (const char**)&sources;
        cl_command_queue hCmdQueue;
        hCmdQueue = clCreateCommandQueue( hContext, pDevices[0], 0, 0 );
        cl_program hProgram;
        hProgram = clCreateProgramWithSource( hContext, 1, sSources, 0, 0 );
        clBuildProgram( hProgram, NULL, NULL, NULL, NULL, NULL );

        cl_kernel hKernel;
        hKernel = clCreateKernel( hProgram, "VectorAdd", 0 );

        int cnBlockSize = 512;
        int cnBlocks = 3;
        size_t cnDimension = cnBlocks * cnBlockSize;

        float* pA = new float[cnDimension];
        float* pB = new float[cnDimension];
        float* pC = new float[cnDimension];
        memset( pA, 0, 4 * cnDimension );
        memset( pB, 0, 4 * cnDimension );
        *pA = 1.0f;
        *pB = 2.0f;
        cl_mem MemA, MemB, MemC;
        MemA = clCreateBuffer( hContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
            cnDimension * sizeof(float), pA, 0 );
        MemB = clCreateBuffer( hContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
            cnDimension * sizeof(float), pB, 0 );
        MemC = clCreateBuffer( hContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
            cnDimension * sizeof(float), pC, 0 );

        clSetKernelArg( hKernel, 0, sizeof(cl_mem), (void*)&MemA );
        clSetKernelArg( hKernel, 1, sizeof(cl_mem), (void*)&MemB );
        clSetKernelArg( hKernel, 2, sizeof(cl_mem), (void*)&MemC );
        clEnqueueNDRangeKernel( hCmdQueue, hKernel, 1, 0, &cnDimension, 0, 0, 0, 0 );
        clEnqueueReadBuffer( hCmdQueue, MemC, CL_TRUE, 0, cnBlockSize * sizeof(float),
            pC, NULL, NULL, NULL );

        delete[] pA;
        delete[] pB;
        delete[] pC;
        delete[] pDevices;

        clReleaseMemObject( MemA );
        clReleaseMemObject( MemB );
        clReleaseMemObject( MemC );

      return 0;
    }

  • 相关阅读:
    大话设计模式(二)代理模式 工厂方法模式 原型模式 模板方法模式 迪米特法模式 外观模式
    大话设计模式(一)简单工厂模式 策略模式 单一职责原则 开放-封闭原则 依赖倒置原则 装饰模式
    eclipse为hibernate.cfg.xml添加自动提示【转】
    Caused by: java.lang.NoSuchMethodError: javax.persistence.spi.PersistenceUnitInfo.getValidationMode
    Chrome 浏览器提示adobe flash player不是最新版本
    spring mvc <mvc:default-servlet-handler /> 。
    S2SH CRUD 整合
    Ms sql行转列。汇总
    Hibernate中的数据库方言(Dialect)
    SSH整合(1)异常
  • 原文地址:https://www.cnblogs.com/LinuxHunter/p/2535546.html
Copyright © 2020-2023  润新知