• CUDA与OpenGL互操作实例


    本文要解决的问题是如何实现CUDA和OpenGL的互操作,使得GPU能够将通用计算的运算结果交给OpenGL进行绘制。

    本文的应用程序主要包括两个方面:

    1.      使用CUDA核函数生成图像数据

    2.      将数据传递给OpenGL驱动程序并进行渲染

    实现这个功能需要按如下四个步骤:

    Step1: 申明两个全局变量,保存指向同一个缓冲区的不同句柄,指向要在OpenGL和CUDA之间共享的数据;

    Step2: 选择运行应用程序的CUDA设备(cudaChooseDevice),告诉cuda运行时使用哪个设备来执行CUDA和OpenGL (cudaGLSetGLDevice);

    Step3:在OpenGL中创建像素缓冲区对象;

    Step4: 通知CUDA运行时将像素缓冲区对象bufferObj注册为图形资源,实现缓冲区共享。

    然后就可以按照一般的CUDA程序调用核函数进行计算。运行结果如下:

    /********************************************************************  
    *  SharedBuffer.cu  
    *  interact between CUDA and OpenGL  
    *********************************************************************/  
      
    #include <stdio.h>  
    #include <stdlib.h>  
    #include "GLglut.h"  
    #include "GLglext.h"  
    #include <cuda_runtime.h>  
    #include <cutil_inline.h>  
    #include <cuda.h>  
    #include <cuda_gl_interop.h>  
      
    #define GET_PROC_ADDRESS(str) wglGetProcAddress(str)  
    #define DIM 512  
      
    PFNGLBINDBUFFERARBPROC    glBindBuffer     = NULL;  
    PFNGLDELETEBUFFERSARBPROC glDeleteBuffers  = NULL;  
    PFNGLGENBUFFERSARBPROC    glGenBuffers     = NULL;  
    PFNGLBUFFERDATAARBPROC    glBufferData     = NULL;  
      
    // step one:  
    GLuint bufferObj;  
    cudaGraphicsResource *resource;  
      
      
    __global__ void cudaGLKernel(uchar4 *ptr)  
    {  
        int x = threadIdx.x + blockIdx.x * blockDim.x;  
        int y = threadIdx.y + blockIdx.y * blockDim.y;  
        int offset = x + y * blockDim.x * gridDim.x;  
      
        float fx = x/(float)DIM - 0.5f;  
        float fy = y/(float)DIM - 0.5f;  
      
        unsigned char green = 128 + 127 * sin(abs(fx*100) - abs(fy*100));  
      
        ptr[offset].x = 0;  
        ptr[offset].y = green;  
        ptr[offset].z = 0;  
        ptr[offset].w = 255;  
      
    }  
      
     void drawFunc(void)  
    {  
        glDrawPixels(DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0);  
        glutSwapBuffers();  
    }  
      
    static void keyFunc(unsigned char key, int x, int y)  
    {  
        switch(key){  
            case 27:  
                cutilSafeCall(cudaGraphicsUnregisterResource(resource));  
                glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);  
                glDeleteBuffers(1, &bufferObj);  
                exit(0);  
        }  
    }  
      
    int main(int argc, char* argv[])  
    {  
        // step 2:  
        cudaDeviceProp prop;  
        int dev;  
      
        memset(&prop, 0, sizeof(cudaDeviceProp));  
        prop.major = 1;  
        prop.minor = 0;  
        cutilSafeCall(cudaChooseDevice(&dev, &prop));  
        cutilSafeCall(cudaGLSetGLDevice(dev));  
      
        glutInit(&argc, argv);  
        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);  
        glutInitWindowSize(DIM, DIM);  
        glutCreateWindow("CUDA interact with OpenGL");  
      
        // step 3:  
        glBindBuffer    = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");  
        glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");  
        glGenBuffers    = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");  
        glBufferData    = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");  
      
        glGenBuffers(1, &bufferObj);  
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj);  
        glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, DIM*DIM*4, NULL, GL_DYNAMIC_DRAW_ARB);  
      
        // step 4:  
        cutilSafeCall(cudaGraphicsGLRegisterBuffer(&resource, bufferObj, cudaGraphicsMapFlagsNone));  
      
        uchar4* devPtr;  
        size_t size;  
        cutilSafeCall(cudaGraphicsMapResources(1, &resource, NULL));  
        cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void**)&devPtr, &size, resource));  
      
        dim3 grids(DIM/16, DIM/16);  
        dim3 threads(16, 16);  
        cudaGLKernel<<<grids, threads>>>(devPtr);  
      
        cutilSafeCall(cudaGraphicsUnmapResources(1, &resource, NULL));  
        glutKeyboardFunc(keyFunc);  
        glutDisplayFunc(drawFunc);  
        glutMainLoop();  
        return 0;  
    }  

    程序编译的时候貌似要注意头文件glut.h和glext.h的顺序,否则会报错~

    参考资源:

    1、Jason Sanders, Edward Kandrot, CUDA By Example: An Introduction toGeneral-Purpose GPU Programming (2011).该书电子版下载源码下载

    2、[菜鸟每天来段CUDA_C]CUDA与OpenGL互操作

    3、CUDA与OpenGL交互开发

    4、cuda与opengl互操作之PBO

  • 相关阅读:
    STM32+ESP8266+AIR202基本控制篇-301-服务器单向SSL认证-MQTT服务器配置SSL单向认证(.Windows系统)
    STM32+ESP8266+AIR202基本控制篇-213-功能测试-微信小程序扫码绑定Air302(NB-IOT),并通过MQTT和Air302(NB-IOT)实现远程通信控制
    17-STM32+ESP8266+AIR202基本控制篇-完成功能2-微信小程序使用APUConfig配网绑定ESP8266,并通过MQTT和ESP8266实现远程通信控制
    Python 元类
    硬核!15张图解Redis为什么这么快
    Protobuf 中 any 的妙用
    Grpc性能压测方法:用ghz进行压测
    压测工具Locuse的使用
    Locust 多机器分布式测试
    kubespray部署kubernetes高可用集群
  • 原文地址:https://www.cnblogs.com/rainbow70626/p/7190093.html
Copyright © 2020-2023  润新知