• cuda实践3


    share memory 使用

    template <int BLOCK_SIZE> __global__ void caculateShelter_cuda(
        float *uv_triangulation_0,
        float *uv_triangulation,
        float *w_triangulation,
        float *w_triangulation_center,
        float *position_panorama_vect, int triangule_num_d, int panorama_num_d, int imgHeight_d, int imgWidth_d, float dis_threshold, int*inside_ptr_d,  int* result)
    {
        int distance_threshold = dis_threshold;
        int times = triangule_num_d;
        int num = imgHeight_d * imgWidth_d;
    
        int x = threadIdx.x;
        int y = threadIdx.y;
        int Row = blockIdx.y * BLOCK_SIZE + threadIdx.y;
        int Col = blockIdx.x * BLOCK_SIZE + threadIdx.x;
        //int blockId = blockIdx.x + blockIdx.y * gridDim.x;
        //int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
        float searchPoint[2];
        searchPoint[0] = Col * 1.0 / imgWidth_d;
        searchPoint[1] = (imgHeight_d - Row) * 1.0 / imgHeight_d;
    
        int threadId = Row * imgWidth_d + Col;
    
        int inside = -1;
        int index_temp = -1;
        int step = BLOCK_SIZE * BLOCK_SIZE;
        int grid_size = (times / step) + 1;
    
        for (int t = 0; t < grid_size; t++)
        {            
            __shared__ float tile_uv[BLOCK_SIZE*BLOCK_SIZE * 6];
            if ((t*step + BLOCK_SIZE * y + x) < times)
            {
                tile_uv[BLOCK_SIZE * y * 6 + x] = uv_triangulation_0[t*step + BLOCK_SIZE * y + x];    //注意越域
                tile_uv[BLOCK_SIZE * y * 6 + 1 * BLOCK_SIZE + x] = uv_triangulation_0[1 * times + t * step + BLOCK_SIZE * y + x];
                tile_uv[BLOCK_SIZE * y * 6 + 2 * BLOCK_SIZE + x] = uv_triangulation_0[2 * times + t * step + BLOCK_SIZE * y + x];
                tile_uv[BLOCK_SIZE * y * 6 + 3 * BLOCK_SIZE + x] = uv_triangulation_0[3 * times + t * step + BLOCK_SIZE * y + x];
                tile_uv[BLOCK_SIZE * y * 6 + 4 * BLOCK_SIZE + x] = uv_triangulation_0[4 * times + t * step + BLOCK_SIZE * y + x];
                tile_uv[BLOCK_SIZE * y * 6 + 5 * BLOCK_SIZE + x] = uv_triangulation_0[5 * times + t * step + BLOCK_SIZE * y + x];
            }
            else
            {
                tile_uv[BLOCK_SIZE * y * 6 + x] = 0.0;    //注意越域
                tile_uv[BLOCK_SIZE * y * 6 + 1 * BLOCK_SIZE + x] = 0.0;
                tile_uv[BLOCK_SIZE * y * 6 + 2 * BLOCK_SIZE + x] = 0.0;
                tile_uv[BLOCK_SIZE * y * 6 + 3 * BLOCK_SIZE + x] = 0.0;
                tile_uv[BLOCK_SIZE * y * 6 + 4 * BLOCK_SIZE + x] = 0.0;
                tile_uv[BLOCK_SIZE * y * 6 + 5 * BLOCK_SIZE + x] = 0.0;
            }
            __syncthreads();
    
            for (int k = 0; k < step; ++k)
            {
                int indexy = k / BLOCK_SIZE;
                int indexx = k % BLOCK_SIZE;
    
                float A[2], B[2], C[2];
                A[0] = tile_uv[BLOCK_SIZE * indexy * 6 + indexx];
                A[1] = tile_uv[BLOCK_SIZE * indexy * 6 + 1 * BLOCK_SIZE + indexx];
                B[0] = tile_uv[BLOCK_SIZE * indexy * 6 + 2 * BLOCK_SIZE + indexx];
                B[1] = tile_uv[BLOCK_SIZE * indexy * 6 + 3 * BLOCK_SIZE + indexx];
                C[0] = tile_uv[BLOCK_SIZE * indexy * 6 + 4 * BLOCK_SIZE + indexx];
                C[1] = tile_uv[BLOCK_SIZE * indexy * 6 + 5 * BLOCK_SIZE + indexx];
    
                if ((t*step+k) >= times)
                {
                    break;
                }
                bool inornot = pointInTriangle_cuda(A, B, C, searchPoint);
                if (inornot && inside==-1 && inside==-1)   //inside /on 
                {
                    index_temp = t * step + k;
                    inside = 0;
                    break;
                }
            }    
            __syncthreads();
    
            //////////////////////////////////////////////
            //float A[2], B[2], C[2];
            //A[0] = uv_triangulation_0[t];
            //A[1] = uv_triangulation_0[1 * times + t];
            //B[0] = uv_triangulation_0[2 * times + t];
            //B[1] = uv_triangulation_0[3 * times + t];
            //C[0] = uv_triangulation_0[4 * times + t];
            //C[1] = uv_triangulation_0[5 * times + t];
            //if (pointInTriangle_cuda(A, B, C, searchPoint))   //inside /on 
            //{
            //    index_temp = t;
            //    inside = 0;
            //    break;
            //}
        }
    
        inside_ptr_d[2 * threadId] = inside;
        inside_ptr_d[2 * threadId + 1] = index_temp;
    
        if (inside == 0 && threadId < num)
        {
            int tr_index = index_temp;
            float pt3d[3];
            float uv_triangulation_temp[6];
            uv_triangulation_temp[0] = uv_triangulation[6 * tr_index];
            uv_triangulation_temp[1] = uv_triangulation[6 * tr_index + 1];
            uv_triangulation_temp[2] = uv_triangulation[6 * tr_index + 2];
            uv_triangulation_temp[3] = uv_triangulation[6 * tr_index + 3];
            uv_triangulation_temp[4] = uv_triangulation[6 * tr_index + 4];
            uv_triangulation_temp[5] = uv_triangulation[6 * tr_index + 5];
    
            float w_triangulation_temp[9];
            w_triangulation_temp[0] = w_triangulation[9 * tr_index];
            w_triangulation_temp[1] = w_triangulation[9 * tr_index + 1];
            w_triangulation_temp[2] = w_triangulation[9 * tr_index + 2];
            w_triangulation_temp[3] = w_triangulation[9 * tr_index + 3];
            w_triangulation_temp[4] = w_triangulation[9 * tr_index + 4];
            w_triangulation_temp[5] = w_triangulation[9 * tr_index + 5];
            w_triangulation_temp[6] = w_triangulation[9 * tr_index + 6];
            w_triangulation_temp[7] = w_triangulation[9 * tr_index + 7];
            w_triangulation_temp[8] = w_triangulation[9 * tr_index + 8];
    
            caculateMappingTriangle2dTo3d(uv_triangulation_temp, searchPoint, w_triangulation_temp, pt3d); // rewrite in cuda
    
            int not_in_shelter_num = 0;
            //Shelter check
            for (int m = 0; m < panorama_num_d; m++)
            {
                if (not_in_shelter_num >= PanoramaNUM)
                {
                    break;
                }
                // add threshold ditance (pt3d to optical_center)
                //...
                // add threshold  angle of triangle face normal and vector(pt3d to optical_center)
                //...
                float optical_center[3];
                optical_center[0] = position_panorama_vect[m * 3];
                optical_center[1] = position_panorama_vect[m * 3 +1];
                optical_center[2] = position_panorama_vect[m * 3 +2];
    
                //caculate shelter
                //caculate near triangles of the ray
                int ret0 = 0;
                float redius_ = distance3d(optical_center, pt3d);
                if (redius_ > distance_threshold)
                {
                    continue;
                }
    
                float mid_pt[3];
                mid_pt[0] = (optical_center[0] + pt3d[0])*0.5;
                mid_pt[1] = (optical_center[1] + pt3d[1])*0.5;
                mid_pt[2] = (optical_center[2] + pt3d[2])*0.5;
    
                for (int i = 0; i < times; i++)
                {
                    float triangle[9];
                    triangle[0] = w_triangulation[9 * i];
                    triangle[1] = w_triangulation[9 * i + 1];
                    triangle[2] = w_triangulation[9 * i + 2];
                    triangle[3] = w_triangulation[9 * i + 3];
                    triangle[4] = w_triangulation[9 * i + 4];
                    triangle[5] = w_triangulation[9 * i + 5];
                    triangle[6] = w_triangulation[9 * i + 6];
                    triangle[7] = w_triangulation[9 * i + 7];
                    triangle[8] = w_triangulation[9 * i + 8];
                    
                    float pt_temp[3];
                    pt_temp[0] = w_triangulation_center[3 * i];
                    pt_temp[1] = w_triangulation_center[3 * i + 1];
                    pt_temp[2] = w_triangulation_center[3 * i + 2];
    
                    float temp1 = distance3d(pt_temp, mid_pt);
                    if (temp1 > 0.45*redius_ || i == tr_index)
                    {
                        continue;
                    }
    
                    int ret1 = rayTracingShelterCaculate_cuda2(pt3d, optical_center, triangle);
                    if (ret1 == 0)  // in shelter
                    {
                        ret0 = -1;
                        break;
                    }
                }
         
                if (ret0 == 0)
                {
                    result[threadId*PanoramaNUM + not_in_shelter_num] = m+1;    //not in shelter
                    not_in_shelter_num++;
                }
            }
        }
    
    }
  • 相关阅读:
    linux ipsec
    inotify+rsync
    多实例tomcat
    Http和Nginx反代至Tomcat(LNMT、LAMT)
    cisco ipsec
    ansible基础
    Qt 汉字乱码
    Model/View
    面对焦虑
    QT中QWidget、QDialog及QMainWindow的区别
  • 原文地址:https://www.cnblogs.com/lovebay/p/13432295.html
Copyright © 2020-2023  润新知