• 0_Simple__simpleCubemapTexture


    立方体纹理贴图

    ▶ 源代码。用纹理方法把元素按原顺序从 CUDA3D 数组中取出来,求个相反数放入全局内存,输出。

      1 #include <stdio.h>
      2 #include "cuda_runtime.h"
      3 #include "device_launch_parameters.h"
      4 #include <helper_functions.h>
      5 #include <helper_cuda.h>
      6 
      7 #define MIN_EPSILON_ERROR 5e-3f
      8 
      9 texture<float, cudaTextureTypeCubemap> tex;
     10 
     11 __global__ void transformKernel(float *g_odata, int width)
     12 {
     13     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
     14     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
     15 
     16     float u = ((x + 0.5f) / (float)width) * 2.f - 1.f;// [0, width-1] 间隔 1 的坐标变换为 [-1+1/width,1-1/width] 间隔 1/width 的坐标
     17     float v = ((y + 0.5f) / (float)width) * 2.f - 1.f;
     18 
     19     float cx, cy, cz;
     20 
     21     for (unsigned int face = 0; face < 6; face++)
     22     {
     23         if (face == 0)// x 正层
     24         {
     25             cx = 1;
     26             cy = -v;
     27             cz = -u;
     28         }
     29         else if (face == 1)// x 负层
     30         {
     31             cx = -1;
     32             cy = -v;
     33             cz = u;
     34         }
     35         else if (face == 2)// y 正层
     36         {
     37             cx = u;
     38             cy = 1;
     39             cz = v;
     40         }
     41         else if (face == 3)// y 负层
     42         {
     43             cx = u;
     44             cy = -1;
     45             cz = -v;
     46         }
     47         else if (face == 4)// z 正层
     48         {
     49             cx = u;
     50             cy = -v;
     51             cz = 1;
     52         }
     53         else if (face == 5)// z 负层
     54         {
     55             cx = -u;
     56             cy = -v;
     57             cz = -1;
     58         }
     59         g_odata[face*width*width + y*width + x] = - texCubemap(tex, cx, cy, cz);// 纹理数据读取到全局内存中输出
     60     }
     61 }
     62 
     63 int main(int argc, char** argv)
     64 {
     65     unsigned int width = 64, num_faces = 6, num_layers = 1;
     66     unsigned int cubemap_size = width * width * num_faces;
     67     unsigned int size = cubemap_size * num_layers * sizeof(float);
     68     float *h_data = (float *)malloc(size);
     69     float *h_data_ref = (float *)malloc(size);  // 理论输出
     70     float *d_data = NULL;
     71     cudaMalloc((void **)&d_data, size);
     72 
     73     for (int i = 0; i < (int)(cubemap_size * num_layers); i++)
     74         h_data[i] = (float)i;
     75     for (unsigned int layer = 0; layer < num_layers; layer++)
     76     {
     77         for (int i = 0; i < (int)(cubemap_size); i++)
     78             h_data_ref[layer*cubemap_size + i] = -h_data[layer*cubemap_size + i] + layer;
     79     }
     80 
     81     printf("
    	Input data.n	");
     82     for (int i = 0; i < width * num_faces * num_layers; i++)
     83     {
     84         printf("%2.1f ", h_data[i]);
     85         if ((i + 1) % width == 0)
     86             printf("
    	");
     87         if ((i + 1) % (width *width) == 0)
     88             printf("
    	");
     89     }
     90     printf("
    	Ideal output data
    	");
     91     for (int i = 0; i < width * num_faces * num_layers; i++)
     92     {
     93         printf("%2.1f ", h_data_ref[i]);
     94         if ((i + 1) % width == 0)
     95             printf("
    	");
     96         if ((i + 1) % (width *width) == 0)
     97             printf("
    	");
     98     }
     99 
    100     // 设置 CUDA 3D 数组参数和数据拷贝
    101     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
    102     cudaArray *cu_3darray;
    103     cudaMalloc3DArray(&cu_3darray, &channelDesc, make_cudaExtent(width, width, num_faces), cudaArrayCubemap);
    104     cudaMemcpy3DParms myparms = { 0 };
    105     myparms.srcPos = make_cudaPos(0, 0, 0);
    106     myparms.dstPos = make_cudaPos(0, 0, 0);
    107     myparms.srcPtr = make_cudaPitchedPtr(h_data, width * sizeof(float), width, width);
    108     myparms.dstArray = cu_3darray;
    109     myparms.extent = make_cudaExtent(width, width, num_faces);
    110     myparms.kind = cudaMemcpyHostToDevice;
    111     cudaMemcpy3D(&myparms);
    112 
    113     // 设置纹理参数并绑定
    114     tex.addressMode[0] = cudaAddressModeWrap;
    115     tex.addressMode[1] = cudaAddressModeWrap;
    116     tex.filterMode = cudaFilterModeLinear;
    117     tex.normalized = true;
    118     cudaBindTextureToArray(tex, cu_3darray, channelDesc);
    119 
    120     dim3 dimBlock(8, 8, 1);
    121     dim3 dimGrid(width / dimBlock.x, width / dimBlock.y, 1);
    122     printf("
    	Cubemap data of %d * %d * %d: Grid size is %d x %d, each block has 8 x 8 threads.
    ", width, width, num_layers, dimGrid.x, dimGrid.y);
    123     transformKernel << < dimGrid, dimBlock >> >(d_data, width);// 预跑
    124     cudaDeviceSynchronize();
    125 
    126     StopWatchInterface *timer = NULL;// 新的计时工具
    127     sdkCreateTimer(&timer);
    128     sdkStartTimer(&timer);
    129 
    130     transformKernel << < dimGrid, dimBlock, 0 >> >(d_data, width);
    131     cudaDeviceSynchronize();
    132 
    133     sdkStopTimer(&timer);
    134     printf("
    Time: %.3f msec, %.2f Mtexlookups/sec
    ", sdkGetTimerValue(&timer), (cubemap_size / (sdkGetTimerValue(&timer) / 1000.0f) / 1e6));
    135     sdkDeleteTimer(&timer);
    136 
    137     // 返回计算结果并检验
    138     memset(h_data, 0, size);
    139     cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
    140     if (checkCmdLineFlag(argc, (const char **)argv, "regression"))
    141         sdkWriteFile<float>("./data/regression.dat", h_data, width * width, 0.0f, false);
    142     else
    143         printf("Comparing kernel output to expected data return %d
    ", compareData(h_data, h_data_ref, cubemap_size, MIN_EPSILON_ERROR, 0.0f));
    144 
    145     printf("
    	Actual output data
    	");
    146     for (int i = 0; i < width * num_faces * num_layers; i++)
    147     {
    148         printf("%2.1f ", h_data[i]);
    149         if ((i + 1) % width == 0)
    150             printf("
    	");
    151         if ((i + 1) % (width * width) == 0)
    152             printf("
    	");
    153     }
    154 
    155     free(h_data);
    156     free(h_data_ref);
    157     cudaFree(d_data);
    158     cudaFreeArray(cu_3darray);
    159 
    160     getchar();
    161     return 0;
    162 }

    ▶ 输出结果

        Input data.n    0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0 24.0 25.0 26.0 27.0 28.0 29.0 30.0 31.0 32.0 33.0 34.0 35.0 36.0 37.0 38.0 39.0 40.0 41.0 42.0 43.0 44.0 45.0 46.0 47.0 48.0 49.0 50.0 51.0 52.0 53.0 54.0 55.0 56.0 57.0 58.0 59.0 60.0 61.0 62.0 63.0
        64.0 65.0 66.0 67.0 68.0 69.0 70.0 71.0 72.0 73.0 74.0 75.0 76.0 77.0 78.0 79.0 80.0 81.0 82.0 83.0 84.0 85.0 86.0 87.0 88.0 89.0 90.0 91.0 92.0 93.0 94.0 95.0 96.0 97.0 98.0 99.0 100.0 101.0 102.0 103.0 104.0 105.0 106.0 107.0 108.0 109.0 110.0 111.0 112.0 113.0 114.0 115.0 116.0 117.0 118.0 119.0 120.0 121.0 122.0 123.0 124.0 125.0 126.0 127.0
        128.0 129.0 130.0 131.0 132.0 133.0 134.0 135.0 136.0 137.0 138.0 139.0 140.0 141.0 142.0 143.0 144.0 145.0 146.0 147.0 148.0 149.0 150.0 151.0 152.0 153.0 154.0 155.0 156.0 157.0 158.0 159.0 160.0 161.0 162.0 163.0 164.0 165.0 166.0 167.0 168.0 169.0 170.0 171.0 172.0 173.0 174.0 175.0 176.0 177.0 178.0 179.0 180.0 181.0 182.0 183.0 184.0 185.0 186.0 187.0 188.0 189.0 190.0 191.0
        192.0 193.0 194.0 195.0 196.0 197.0 198.0 199.0 200.0 201.0 202.0 203.0 204.0 205.0 206.0 207.0 208.0 209.0 210.0 211.0 212.0 213.0 214.0 215.0 216.0 217.0 218.0 219.0 220.0 221.0 222.0 223.0 224.0 225.0 226.0 227.0 228.0 229.0 230.0 231.0 232.0 233.0 234.0 235.0 236.0 237.0 238.0 239.0 240.0 241.0 242.0 243.0 244.0 245.0 246.0 247.0 248.0 249.0 250.0 251.0 252.0 253.0 254.0 255.0
        256.0 257.0 258.0 259.0 260.0 261.0 262.0 263.0 264.0 265.0 266.0 267.0 268.0 269.0 270.0 271.0 272.0 273.0 274.0 275.0 276.0 277.0 278.0 279.0 280.0 281.0 282.0 283.0 284.0 285.0 286.0 287.0 288.0 289.0 290.0 291.0 292.0 293.0 294.0 295.0 296.0 297.0 298.0 299.0 300.0 301.0 302.0 303.0 304.0 305.0 306.0 307.0 308.0 309.0 310.0 311.0 312.0 313.0 314.0 315.0 316.0 317.0 318.0 319.0
        320.0 321.0 322.0 323.0 324.0 325.0 326.0 327.0 328.0 329.0 330.0 331.0 332.0 333.0 334.0 335.0 336.0 337.0 338.0 339.0 340.0 341.0 342.0 343.0 344.0 345.0 346.0 347.0 348.0 349.0 350.0 351.0 352.0 353.0 354.0 355.0 356.0 357.0 358.0 359.0 360.0 361.0 362.0 363.0 364.0 365.0 366.0 367.0 368.0 369.0 370.0 371.0 372.0 373.0 374.0 375.0 376.0 377.0 378.0 379.0 380.0 381.0 382.0 383.0
    
        Ideal output data
        0.0 -1.0 -2.0 -3.0 -4.0 -5.0 -6.0 -7.0 -8.0 -9.0 -10.0 -11.0 -12.0 -13.0 -14.0 -15.0 -16.0 -17.0 -18.0 -19.0 -20.0 -21.0 -22.0 -23.0 -24.0 -25.0 -26.0 -27.0 -28.0 -29.0 -30.0 -31.0 -32.0 -33.0 -34.0 -35.0 -36.0 -37.0 -38.0 -39.0 -40.0 -41.0 -42.0 -43.0 -44.0 -45.0 -46.0 -47.0 -48.0 -49.0 -50.0 -51.0 -52.0 -53.0 -54.0 -55.0 -56.0 -57.0 -58.0 -59.0 -60.0 -61.0 -62.0 -63.0
        -64.0 -65.0 -66.0 -67.0 -68.0 -69.0 -70.0 -71.0 -72.0 -73.0 -74.0 -75.0 -76.0 -77.0 -78.0 -79.0 -80.0 -81.0 -82.0 -83.0 -84.0 -85.0 -86.0 -87.0 -88.0 -89.0 -90.0 -91.0 -92.0 -93.0 -94.0 -95.0 -96.0 -97.0 -98.0 -99.0 -100.0 -101.0 -102.0 -103.0 -104.0 -105.0 -106.0 -107.0 -108.0 -109.0 -110.0 -111.0 -112.0 -113.0 -114.0 -115.0 -116.0 -117.0 -118.0 -119.0 -120.0 -121.0 -122.0 -123.0 -124.0 -125.0 -126.0 -127.0
        -128.0 -129.0 -130.0 -131.0 -132.0 -133.0 -134.0 -135.0 -136.0 -137.0 -138.0 -139.0 -140.0 -141.0 -142.0 -143.0 -144.0 -145.0 -146.0 -147.0 -148.0 -149.0 -150.0 -151.0 -152.0 -153.0 -154.0 -155.0 -156.0 -157.0 -158.0 -159.0 -160.0 -161.0 -162.0 -163.0 -164.0 -165.0 -166.0 -167.0 -168.0 -169.0 -170.0 -171.0 -172.0 -173.0 -174.0 -175.0 -176.0 -177.0 -178.0 -179.0 -180.0 -181.0 -182.0 -183.0 -184.0 -185.0 -186.0 -187.0 -188.0 -189.0 -190.0 -191.0
        -192.0 -193.0 -194.0 -195.0 -196.0 -197.0 -198.0 -199.0 -200.0 -201.0 -202.0 -203.0 -204.0 -205.0 -206.0 -207.0 -208.0 -209.0 -210.0 -211.0 -212.0 -213.0 -214.0 -215.0 -216.0 -217.0 -218.0 -219.0 -220.0 -221.0 -222.0 -223.0 -224.0 -225.0 -226.0 -227.0 -228.0 -229.0 -230.0 -231.0 -232.0 -233.0 -234.0 -235.0 -236.0 -237.0 -238.0 -239.0 -240.0 -241.0 -242.0 -243.0 -244.0 -245.0 -246.0 -247.0 -248.0 -249.0 -250.0 -251.0 -252.0 -253.0 -254.0 -255.0
        -256.0 -257.0 -258.0 -259.0 -260.0 -261.0 -262.0 -263.0 -264.0 -265.0 -266.0 -267.0 -268.0 -269.0 -270.0 -271.0 -272.0 -273.0 -274.0 -275.0 -276.0 -277.0 -278.0 -279.0 -280.0 -281.0 -282.0 -283.0 -284.0 -285.0 -286.0 -287.0 -288.0 -289.0 -290.0 -291.0 -292.0 -293.0 -294.0 -295.0 -296.0 -297.0 -298.0 -299.0 -300.0 -301.0 -302.0 -303.0 -304.0 -305.0 -306.0 -307.0 -308.0 -309.0 -310.0 -311.0 -312.0 -313.0 -314.0 -315.0 -316.0 -317.0 -318.0 -319.0
        -320.0 -321.0 -322.0 -323.0 -324.0 -325.0 -326.0 -327.0 -328.0 -329.0 -330.0 -331.0 -332.0 -333.0 -334.0 -335.0 -336.0 -337.0 -338.0 -339.0 -340.0 -341.0 -342.0 -343.0 -344.0 -345.0 -346.0 -347.0 -348.0 -349.0 -350.0 -351.0 -352.0 -353.0 -354.0 -355.0 -356.0 -357.0 -358.0 -359.0 -360.0 -361.0 -362.0 -363.0 -364.0 -365.0 -366.0 -367.0 -368.0 -369.0 -370.0 -371.0 -372.0 -373.0 -374.0 -375.0 -376.0 -377.0 -378.0 -379.0 -380.0 -381.0 -382.0 -383.0
    
        Cubemap data of 64 * 64 * 1: Grid size is 8 x 8, each block has 8 x 8 threads.
    
    Time: 0.098 msec, 249.50 Mtexlookups/sec
    Comparing kernel output to expected data return 1
    
        Actual output data
        -0.0 -1.0 -2.0 -3.0 -4.0 -5.0 -6.0 -7.0 -8.0 -9.0 -10.0 -11.0 -12.0 -13.0 -14.0 -15.0 -16.0 -17.0 -18.0 -19.0 -20.0 -21.0 -22.0 -23.0 -24.0 -25.0 -26.0 -27.0 -28.0 -29.0 -30.0 -31.0 -32.0 -33.0 -34.0 -35.0 -36.0 -37.0 -38.0 -39.0 -40.0 -41.0 -42.0 -43.0 -44.0 -45.0 -46.0 -47.0 -48.0 -49.0 -50.0 -51.0 -52.0 -53.0 -54.0 -55.0 -56.0 -57.0 -58.0 -59.0 -60.0 -61.0 -62.0 -63.0
        -64.0 -65.0 -66.0 -67.0 -68.0 -69.0 -70.0 -71.0 -72.0 -73.0 -74.0 -75.0 -76.0 -77.0 -78.0 -79.0 -80.0 -81.0 -82.0 -83.0 -84.0 -85.0 -86.0 -87.0 -88.0 -89.0 -90.0 -91.0 -92.0 -93.0 -94.0 -95.0 -96.0 -97.0 -98.0 -99.0 -100.0 -101.0 -102.0 -103.0 -104.0 -105.0 -106.0 -107.0 -108.0 -109.0 -110.0 -111.0 -112.0 -113.0 -114.0 -115.0 -116.0 -117.0 -118.0 -119.0 -120.0 -121.0 -122.0 -123.0 -124.0 -125.0 -126.0 -127.0
        -128.0 -129.0 -130.0 -131.0 -132.0 -133.0 -134.0 -135.0 -136.0 -137.0 -138.0 -139.0 -140.0 -141.0 -142.0 -143.0 -144.0 -145.0 -146.0 -147.0 -148.0 -149.0 -150.0 -151.0 -152.0 -153.0 -154.0 -155.0 -156.0 -157.0 -158.0 -159.0 -160.0 -161.0 -162.0 -163.0 -164.0 -165.0 -166.0 -167.0 -168.0 -169.0 -170.0 -171.0 -172.0 -173.0 -174.0 -175.0 -176.0 -177.0 -178.0 -179.0 -180.0 -181.0 -182.0 -183.0 -184.0 -185.0 -186.0 -187.0 -188.0 -189.0 -190.0 -191.0
        -192.0 -193.0 -194.0 -195.0 -196.0 -197.0 -198.0 -199.0 -200.0 -201.0 -202.0 -203.0 -204.0 -205.0 -206.0 -207.0 -208.0 -209.0 -210.0 -211.0 -212.0 -213.0 -214.0 -215.0 -216.0 -217.0 -218.0 -219.0 -220.0 -221.0 -222.0 -223.0 -224.0 -225.0 -226.0 -227.0 -228.0 -229.0 -230.0 -231.0 -232.0 -233.0 -234.0 -235.0 -236.0 -237.0 -238.0 -239.0 -240.0 -241.0 -242.0 -243.0 -244.0 -245.0 -246.0 -247.0 -248.0 -249.0 -250.0 -251.0 -252.0 -253.0 -254.0 -255.0
        -256.0 -257.0 -258.0 -259.0 -260.0 -261.0 -262.0 -263.0 -264.0 -265.0 -266.0 -267.0 -268.0 -269.0 -270.0 -271.0 -272.0 -273.0 -274.0 -275.0 -276.0 -277.0 -278.0 -279.0 -280.0 -281.0 -282.0 -283.0 -284.0 -285.0 -286.0 -287.0 -288.0 -289.0 -290.0 -291.0 -292.0 -293.0 -294.0 -295.0 -296.0 -297.0 -298.0 -299.0 -300.0 -301.0 -302.0 -303.0 -304.0 -305.0 -306.0 -307.0 -308.0 -309.0 -310.0 -311.0 -312.0 -313.0 -314.0 -315.0 -316.0 -317.0 -318.0 -319.0
        -320.0 -321.0 -322.0 -323.0 -324.0 -325.0 -326.0 -327.0 -328.0 -329.0 -330.0 -331.0 -332.0 -333.0 -334.0 -335.0 -336.0 -337.0 -338.0 -339.0 -340.0 -341.0 -342.0 -343.0 -344.0 -345.0 -346.0 -347.0 -348.0 -349.0 -350.0 -351.0 -352.0 -353.0 -354.0 -355.0 -356.0 -357.0 -358.0 -359.0 -360.0 -361.0 -362.0 -363.0 -364.0 -365.0 -366.0 -367.0 -368.0 -369.0 -370.0 -371.0 -372.0 -373.0 -374.0 -375.0 -376.0 -377.0 -378.0 -379.0 -380.0 -381.0 -382.0 -383.0

    ▶ 涨姿势

    ● helper_time.h 中新定义的计时函数

     1 // 关键步骤
     2 StopWatchInterface *timer = NULL;
     3 sdkCreateTimer(&timer);
     4 sdkStartTimer(&timer);
     5 
     6 sdkStopTimer(&timer);
     7 sdkGetTimerValue(&timer);
     8 sdkDeleteTimer(&timer);
     9 
    10 // helper_time.h
    11 class StopWatchInterface
    12 {
    13     public:
    14         StopWatchInterface() {};
    15         virtual ~StopWatchInterface() {};
    16 
    17     public:
    18         virtual void start() = 0;
    19         virtual void stop() = 0;
    20         virtual void reset() = 0;
    21         virtual float getTime() = 0;// 获取计时(计时器不停)
    22         virtual float getAverageTime() = 0;
    23 };
    24 
    25 inline bool sdkCreateTimer(StopWatchInterface **timer_interface)
    26 {
    27 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
    28     *timer_interface = (StopWatchInterface *)new StopWatchWin();
    29 #else
    30     *timer_interface = (StopWatchInterface *)new StopWatchLinux();
    31 #endif
    32     return (*timer_interface != NULL) ? true : false;
    33 }
    34 
    35 inline bool sdkDeleteTimer(StopWatchInterface **timer_interface)
    36 {
    37     if (*timer_interface)
    38     {
    39         delete *timer_interface;
    40         *timer_interface = NULL;
    41     }
    42     return true;
    43 }
    44 
    45 inline bool sdkStartTimer(StopWatchInterface **timer_interface)
    46 {
    47     if (*timer_interface)
    48         (*timer_interface)->start();
    49     return true;
    50 }
    51 
    52 inline bool sdkStopTimer(StopWatchInterface **timer_interface)
    53 {
    54     if (*timer_interface)
    55         (*timer_interface)->stop();
    56     return true;
    57 }
    58 
    59 inline float sdkGetTimerValue(StopWatchInterface **timer_interface)
    60 {
    61     if (*timer_interface)    
    62         return (*timer_interface)->getTime();
    63     else
    64         return 0.0f;
    65 }

    ● 立方体纹理贴图。六个面分别为 x = 1 正面、x = -1 轴负面、y = 1 正面、y = -1 负面、z = 1 正面、x = -1 负面,对应前、后、右、左、上、下。按照线性下标 [0, width * width * 6 - 1] 顺序访问时,各元素存储位置如下图所示(width == 2 为例)。

        

  • 相关阅读:
    SqlServer事务日志满的解决方案
    关于.net反射和metadata加载致Jeffray Zhao等几位和firelong
    Context Root选/的原则
    [继续讨论]关于Windows PE和.net assembly的加载
    有趣的重写GetType()方法
    对Wintercn关于函数式编程的文章评论
    The experience to config Cisco 2811 for VOIP
    关于c#静态方法和实例方法的辨析和应用
    防止刷新时,密码输入框中的信息丢失
    计算百分比 JS
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/7881729.html
Copyright © 2020-2023  润新知