• CUDA Thread Indexing


    1D grid of 1D blocks
    
    __device__ int getGlobalIdx_1D_1D()
    {
    return blockIdx.x *blockDim.x + threadIdx.x;
    }
    
    
    
    1D grid of 2D blocks
    
    __device__ int getGlobalIdx_1D_2D()
    {
    return blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
    }
    
    
    
    1D grid of 3D blocks
    
    __device__ int getGlobalIdx_1D_3D()
    {
    return blockIdx.x * blockDim.x * blockDim.y * blockDim.z 
    + threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
    }
    
    {
    return blockIdx.x * blockDim.x * blockDim.y * blockDim.z 
    + threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
    }
    
    
    
    2D grid of 1D blocks
    
     __device__ int getGlobalIdx_2D_1D()
    {
    int blockId   = blockIdx.y * gridDim.x + blockIdx.x; 
    int threadId = blockId * blockDim.x + threadIdx.x; 
    return threadId;
    }
    
    {
    int blockId   = blockIdx.y * gridDim.x + blockIdx.x; 
    int threadId = blockId * blockDim.x + threadIdx.x; 
    return threadId;
    }
    
    
    
    2D grid of 2D blocks  
    
    __device__ int getGlobalIdx_2D_2D()
    {
    int blockId = blockIdx.x + blockIdx.y * gridDim.x; 
    int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
    return threadId;
    }
    
    2D grid of 3D blocks
    
    __device__ int getGlobalIdx_2D_3D()
    {
    int blockId = blockIdx.x 
    + blockIdx.y * gridDim.x; 
    int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
      + (threadIdx.z * (blockDim.x * blockDim.y))
      + (threadIdx.y * blockDim.x)
      + threadIdx.x;
    return threadId;
    }
    
    
    
    3D grid of 1D blocks
    
    __device__ int getGlobalIdx_3D_1D()
    {
    int blockId = blockIdx.x 
    + blockIdx.y * gridDim.x 
    + gridDim.x * gridDim.y * blockIdx.z; 
    int threadId = blockId * blockDim.x + threadIdx.x;
    return threadId;
    }
    
    
    
    3D grid of 2D blocks
    
    __device__ int getGlobalIdx_3D_2D()
    {
    int blockId = blockIdx.x 
            + blockIdx.y * gridDim.x 
    + gridDim.x * gridDim.y * blockIdx.z; 
    int threadId = blockId * (blockDim.x * blockDim.y)
     + (threadIdx.y * blockDim.x)
     + threadIdx.x;
    return threadId;
    }
    
    
    
    3D grid of 3D blocks
    
    __device__ int getGlobalIdx_3D_3D()
    {
    int blockId = blockIdx.x 
    + blockIdx.y * gridDim.x 
    + gridDim.x * gridDim.y * blockIdx.z; 
    int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
     + (threadIdx.z * (blockDim.x * blockDim.y))
     + (threadIdx.y * blockDim.x)
     + threadIdx.x;
    return threadId;
    }
    

      

  • 相关阅读:
    40款不容错过的个人摄影设计作品集网站
    Google的全新在线地图API演示网站 More than a map
    绝对不容错过的超棒动物瞬间抓拍摄影作品
    超全超实用的Javascript类库和jQuery插件大全之一:Web印刷排版
    Java中方法重写和方法重载的6个区别?
    面试突击15:说一下HashMap底层实现?及元素添加流程?
    查询 MySQL 字段注释的 5 种方法!
    剑指Offer补充
    Cracking the Coding Interviewch11 | System Design and Memory Limits
    Cracking the Coding Interview – ch16,17,18
  • 原文地址:https://www.cnblogs.com/tibetanmastiff/p/4639194.html
Copyright © 2020-2023  润新知