• 0_Simple__simpleAtomicIntrinsics + 0_Simple__simpleAtomicIntrinsics_nvrtc


    原子操作。并且在静态代码和运行时编译两种条件下使用。

    ▶ 源代码:静态使用

     1 #ifndef _SIMPLEATOMICS_KERNEL_H_
     2 #define _SIMPLEATOMICS_KERNEL_H_
     3 //#include "device_launch_parameters.h"
     4 
     5 __global__ void testKernel(int *g_odata)
     6 {
     7     const unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
     8 
     9     // 算术运算原子指令
    10     atomicAdd(&g_odata[0], 10);                 // 0号位加 10
    11 
    12     atomicSub(&g_odata[1], 10);                 // 1号位减 10
    13 
    14     atomicExch(&g_odata[2], tid);               // 2号位与 tid 号值交换(获得最后一个访问的 tid 号)
    15 
    16     atomicMax(&g_odata[3], tid);                // 3号位获得最大的 tid 号
    17 
    18     atomicMin(&g_odata[4], tid);                // 4号位获得最小的 tid 号
    19 
    20     atomicInc((unsigned int *)&g_odata[5], 16); // 5号位做模 17 的加法(g_odata[5] == 15 时加 1 得 16,再加 1 得 0)
    21 
    22     atomicDec((unsigned int *)&g_odata[6], 136);// 6号位做模 137 的减法(g_odata[5] == 0 时减 1 得 136,再减 1 得 135)
    23 
    24     atomicCAS(&g_odata[7], tid - 1, tid);       // 7号位迭代 (g_odata[7] == tid - 1) ? tid : (g_odata[7]); 
    25                                                 // 即以 g_odata[7] 初值为起点,增量为 1 的子序列的最大长度(一旦有增量为 1 的元素插到前面去该值就再也不变)
    26     // 位运算原子指令
    27     atomicAnd(&g_odata[8], 2*tid+7);            // 8号位为 1,注意 (2k+7)%2 == 1 但 (2k+7)%(2^m) == 0 或 1,即仅最后一位能保证为 1
    28 
    29     atomicOr(&g_odata[9], 1 << tid);            // 9号位为 -1,所有的位均为 1
    30 
    31     atomicXor(&g_odata[10], tid);               // 10号位为 255,注意异或运算具有交换律和结合律,硬算
    32 }
    33 
    34 #endif // #ifndef _SIMPLEATOMICS_KERNEL_H_
     1 /*simpleAtomicIntrinsics_cpu.cpp*/
     2 #include <stdio.h>
     3 
     4 extern "C" int computeGold(int *gpuData, const int len);
     5 
     6 int computeGold(int *gpuData, const int len)
     7 {
     8     if (gpuData[0] != 10 * len)
     9     {
    10         printf("atomicAdd failed
    ");
    11         return false;
    12     }
    13     if (gpuData[1] != -10 * len)
    14     {
    15         printf("atomicSub failed
    ");
    16         return false;
    17     }
    18     if (gpuData[2] < 0 || gpuData[2] >= len)// gpuData[2] ∈ [0, len)
    19     {
    20         printf("atomicExch failed
    ");
    21         return false;
    22     }
    23     if (gpuData[3] != len - 1)
    24     {
    25         printf("atomicMax failed
    ");
    26         return false;
    27     }
    28     if (gpuData[4]!=0)
    29     {
    30         printf("atomicMin failed
    ");
    31         return false;
    32     }
    33     if (gpuData[5] != len % 17)
    34     {
    35         printf("atomicInc failed
    ");
    36         return false;
    37     }
    38     if (gpuData[6] != 137 - len % 137)
    39     {
    40         printf("atomicDec failed
    ");
    41         return false;
    42     }
    43     if (gpuData[7] < 0 || gpuData[7] >= len)// gpuData[7] ∈ [0, len)
    44     {
    45         printf("atomicCAS failed
    ");
    46         return false;
    47     }
    48     if (gpuData[8] != 1)
    49     {
    50         printf("atomicAnd failed
    ");
    51         return false;
    52     }
    53     if (gpuData[9] != -1) 
    54     {
    55         printf("atomicOr failed
    ");
    56         return false;
    57     }
    58     if (gpuData[10] != 0xff)
    59     {
    60         printf("atomicXor failed
    ");
    61         return false;
    62     }
    63     return true;
    64 }
     1 #include <stdio.h>
     2 #include <windows.h>
     3 #include <cuda_runtime.h>
     4 #include <helper_functions.h>
     5 #include <helper_cuda.h>
     6 #include "simpleAtomicIntrinsics_kernel.cuh"
     7 
     8 #define WINDOWS_LEAN_AND_MEAN
     9 #define NOMINMAX
    10 
    11 extern "C" bool computeGold(int *gpuData, const int len);
    12 
    13 bool runTest()
    14 {   
    15     bool testResult = false;
    16     unsigned int numThreads = 256;
    17     unsigned int numBlocks = 64;
    18     unsigned int numData = 11;
    19     unsigned int memSize = sizeof(int) * numData;
    20 
    21     int *h_data = (int *) malloc(memSize);
    22     for (unsigned int i = 0; i < numData; h_data[i] = 0, i++);  // 初始化为全零
    23     h_data[8] = h_data[10] = 0xff;                              // 搞点非零值
    24 
    25     int *d_data;
    26     cudaMalloc((void **) &d_data, memSize);
    27     cudaMemcpy(d_data, h_data, memSize, cudaMemcpyHostToDevice);
    28 
    29     // 输出运算前的结果
    30     printf("
    	Before:");                                             
    31     for (int i = 0; i < numData; i++)
    32         printf("%8d,", h_data[i]);
    33     printf("
    ");
    34 
    35     // 计算和计时
    36     StopWatchInterface *timer;
    37     sdkCreateTimer(&timer);
    38     sdkStartTimer(&timer);
    39 
    40     testKernel << <numBlocks, numThreads >> > (d_data);
    41     getLastCudaError("Kernel execution failed");
    42 
    43     sdkStopTimer(&timer);
    44     printf("
    Processing time: %f ms
    ", sdkGetTimerValue(&timer));
    45     sdkDeleteTimer(&timer);
    46 
    47     cudaMemcpy(h_data, d_data, memSize, cudaMemcpyDeviceToHost);
    48 
    49     // 输出运算后的结果
    50     printf("
    	After :");                                             
    51     for (int i = 0; i < numData; i++)
    52         printf("%8d,", h_data[i]);
    53     printf("
    ");
    54 
    55     testResult = computeGold(h_data, numThreads * numBlocks);
    56 
    57     free(h_data);
    58     cudaFree(d_data);
    59 
    60     return testResult;
    61 }
    62 
    63 int main()
    64 {
    65     bool testResult;                    
    66 
    67     printf("
    	Started!
    ");
    68 
    69     testResult = runTest();
    70 
    71     printf("
    	Completed! main function returned %s
    ", testResult ? "OK!" : "ERROR!");
    72     getchar();
    73 
    74     return 0;
    75 }

    ▶ 源代码:即时编译

    1 /*simpleAtomicIntrinsics_kernel.cuh 发生变化的地方*/
    2 extern "C" __global__ void testKernel(int *g_odata)
    1 /*simpleAtomicIntrinsics_cpu.cpp 完全一样*/
     1 /*simpleAtomicIntrinsics.cpp*/
     2 #include <stdio.h>
     3 #include <windows.h>
     4 #include <cuda_runtime.h>
     5 #include <nvrtc_helper.h>
     6 #include <helper_functions.h>// includes cuda.h and cuda_runtime_api.h
     7 
     8 #define WINDOWS_LEAN_AND_MEAN
     9 #define NOMINMAX
    10 
    11 extern "C" bool computeGold(int *gpuData, const int len);
    12 
    13 bool runTest()
    14 {
    15     bool testResult = false;
    16     unsigned int numThreads = 256;
    17     unsigned int numBlocks = 64;
    18     unsigned int numData = 11;
    19     unsigned int memSize = sizeof(int) * numData;
    20 
    21     //即时编译过程
    22     char *kernel_file = sdkFindFilePath("simpleAtomicIntrinsics_kernel.cuh", NULL);
    23     char *ptx;
    24     size_t ptxSize;
    25     compileFileToPTX(kernel_file, 0, NULL, &ptx, &ptxSize);
    26     CUmodule module = loadPTX(ptx, 1, NULL);
    27     CUfunction kernel_addr;
    28     cuModuleGetFunction(&kernel_addr, module, "testKernel");
    29 
    30     int *h_data = (int *) malloc(memSize);
    31     for (unsigned int i = 0; i < numData; h_data[i] = 0, i++);
    32     h_data[8] = h_data[10] = 0xff;
    33 
    34     CUdeviceptr d_data;
    35     cuMemAlloc(&d_data, memSize);
    36     cuMemcpyHtoD(d_data, h_data, memSize);
    37 
    38     dim3 cudaBlockSize(numThreads,1,1);
    39     dim3 cudaGridSize(numBlocks, 1, 1);
    40     void *arr[] = { (void *)&d_data };
    41     cuLaunchKernel(kernel_addr, cudaGridSize.x, cudaGridSize.y, cudaGridSize.z,
    42         cudaBlockSize.x, cudaBlockSize.y, cudaBlockSize.z, 0, 0, &arr[0], 0);
    43 
    44     cuCtxSynchronize();
    45 
    46     cuMemcpyDtoH(h_data, d_data, memSize);
    47 
    48     testResult = computeGold(h_data, numThreads * numBlocks);
    49 
    50     free(h_data);
    51     cuMemFree(d_data);
    52 
    53     return testResult;
    54 }
    55 
    56 int main()
    57 {
    58     bool testResult;
    59 
    60     printf("
    	Started!
    ");
    61 
    62     testResult = runTest();
    63 
    64     printf("
    	Completed! main function returned %s
    ", testResult ? "OK!" : "ERROR!");
    65     getchar();
    66 
    67     return 0;
    68 }

    ▶ 输出结果:

        Started!
    
        Before:       0,       0,       0,       0,       0,       0,       0,       0,     255,       0,     255,
    
    Processing time: 0.035352 ms
    
        After :  163840, -163840,   16287,   16383,       0,      13,      56,       7,       1,      -1,     255,
    
        Completed! main function returned OK!

    ▶ 涨姿势

    ● 一个有趣的数列:命 x0 = 0,xn = xn-1 XOR n,则有 x4n == 4n,x4n+1 = 1, x4n+2 == 4n+3, x4n+3 == 0。当改变初值的时候该表达式发生变化,结果如下图。三种颜色分别代表初始选作右边三个值的时候的结果。

    ● 解毒 device_atomic_functions.h 与原子操作。只保留了有效部分,去掉了注释和留白。

      1 #if !defined(__DEVICE_ATOMIC_FUNCTIONS_HPP__)
      2     #define __DEVICE_ATOMIC_FUNCTIONS_HPP__
      3 
      4     #if defined(__CUDACC_RTC__) // 主机编译
      5         #define __DEVICE_ATOMIC_FUNCTIONS_DECL__ __host__ __device__
      6     #else                       // 设备编译
      7         #define __DEVICE_ATOMIC_FUNCTIONS_DECL__ static __inline__ __device__
      8     #endif
      9 
     10     #if defined(__cplusplus) && defined(__CUDACC__)
     11 
     12 #include "builtin_types.h"
     13 #include "host_defines.h"
     14 
     15 // 整数原子加法。返回 *address 旧值,*address += val;
     16 __DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicAdd(int *address, int val)
     17 {
     18     return __iAtomicAdd(address, val);
     19 }
     20 
     21 // 无符号整数原子加法
     22 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicAdd(unsigned int *address, unsigned int val)
     23 {
     24     return __uAtomicAdd(address, val);
     25 }
     26 
     27 // 整数原子减法,注意转换为加法来运算。返回 *address 旧值,*address -= val;。
     28 __DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicSub(int *address, int val)
     29 {
     30     return __iAtomicAdd(address, (unsigned int)-(int)val);
     31 }
     32 
     33 // 无符号整数原子减法
     34 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicSub(unsigned int *address, unsigned int val)
     35 {
     36     return __uAtomicAdd(address, (unsigned int)-(int)val);
     37 }
     38 
     39 // 整数原子替换。返回 *address 旧值,*address = val;
     40 __DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicExch(int *address, int val)
     41 {
     42     return __iAtomicExch(address, val);
     43 }
     44 
     45 // 无符号整数原子替换
     46 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicExch(unsigned int *address, unsigned int val)
     47 {
     48     return __uAtomicExch(address, val);
     49 }
     50 
     51 // 浮点原子替换
     52 __DEVICE_ATOMIC_FUNCTIONS_DECL__ float atomicExch(float *address, float val)
     53 {
     54     return __fAtomicExch(address, val);
     55 }
     56 
     57 // 整数原子取小。返回 *address 旧值,*address = MIN(*adress, val);
     58 __DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicMin(int *address, int val)
     59 {
     60     return __iAtomicMin(address, val);
     61 }
     62 
     63 // 无符号整数原子取小
     64 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicMin(unsigned int *address, unsigned int val)
     65 {
     66     return __uAtomicMin(address, val);
     67 }
     68 
     69 // 整数原子取大。返回 *address 旧值,*address = MAX(*adress, val);
     70 __DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicMax(int *address, int val)
     71 {
     72     return __iAtomicMax(address, val);
     73 }
     74 
     75 // 无符号整数原子取大
     76 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicMax(unsigned int *address, unsigned int val)
     77 {
     78     return __uAtomicMax(address, val);
     79 }
     80 
     81 // 无符号整数原子模加法。返回 *address 旧值,*address = (*adress + 1) % (val + 1);
     82 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicInc(unsigned int *address, unsigned int val)
     83 {
     84     return __uAtomicInc(address, val);
     85 }
     86 
     87 // 无符号整数原子模减法。返回 *address 旧值,*address = (*adress + val) % (val + 1);
     88 // 不用 (*adress - 1) 是为了把结果控制在 [0, val] 中,防止变成负数,这与 C 中的 % 运算不同                                      
     89 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicDec(unsigned int *address, unsigned int val)
     90 {                                                                                               
     91     return __uAtomicDec(address, val);
     92 }
     93 
     94 // 整数原子按位且。返回 *address 旧值,*adress &= val;
     95 __DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicAnd(int *address, int val)
     96 {
     97     return __iAtomicAnd(address, val);
     98 }
     99 
    100 // 无符号整数原子按位且
    101 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicAnd(unsigned int *address, unsigned int val)
    102 {
    103     return __uAtomicAnd(address, val);
    104 }
    105 
    106 // 整数原子按位或。返回 *address 旧值,*adress |= val;
    107 __DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicOr(int *address, int val)
    108 {
    109     return __iAtomicOr(address, val);
    110 }
    111 
    112 // 无符号整数原子按位或
    113 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicOr(unsigned int *address, unsigned int val)
    114 {
    115     return __uAtomicOr(address, val);
    116 }
    117 
    118 // 整数原子按位异或。返回 *address 旧值,*adress ^= val;
    119 __DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicXor(int *address, int val)
    120 {
    121     return __iAtomicXor(address, val);
    122 }
    123 
    124 // 无符号整数原子按位异或
    125 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicXor(unsigned int *address, unsigned int val)
    126 {
    127     return __uAtomicXor(address, val);
    128 }
    129 
    130 // 整数原子比较赋值。返回 *address 旧值,*address = (*address == compare) ? val : *address;
    131 __DEVICE_ATOMIC_FUNCTIONS_DECL__ int atomicCAS(int *address, int compare, int val)
    132 {
    133     return __iAtomicCAS(address, compare, val);
    134 }
    135 
    136 // 无符号整数原子比较赋值
    137 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned int atomicCAS(unsigned int *address, unsigned int compare, unsigned int val)
    138 {
    139     return __uAtomicCAS(address, compare, val);
    140 }
    141 
    142 // 无符号长整数原子加法
    143 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned long long int atomicAdd(unsigned long long int *address, unsigned long long int val)
    144 {
    145     return __ullAtomicAdd(address, val);
    146 }
    147 
    148 // 无符号长整数原子替换
    149 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned long long int atomicExch(unsigned long long int *address, unsigned long long int val)
    150 {
    151     return __ullAtomicExch(address, val);
    152 }
    153 
    154 // 无符号长整数原子比较赋值
    155 __DEVICE_ATOMIC_FUNCTIONS_DECL__ unsigned long long int atomicCAS(unsigned long long int *address, unsigned long long int compare, unsigned long long int val)
    156 {
    157     return __ullAtomicCAS(address, compare, val);
    158 }
    159 
    160 // 原子存在量词
    161 __DEVICE_ATOMIC_FUNCTIONS_DECL__ bool any(bool cond)
    162 {
    163     return (bool)__any((int)cond);
    164 }
    165 
    166 // 原子全称量词
    167 __DEVICE_ATOMIC_FUNCTIONS_DECL__ bool all(bool cond)
    168 {
    169     return (bool)__all((int)cond);
    170 }
    171 
    172     #endif /* __cplusplus && __CUDACC__ */
    173 
    174 #undef __DEVICE_ATOMIC_FUNCTIONS_DECL__
    175 
    176 #endif /* !__DEVICE_ATOMIC_FUNCTIONS_HPP__ */

    ● 原子操作函数声明在 device_functions.h 中。

      当设备计算能力 > 320 或 > 600 时开放各原子操作对应的 block 和 system 函数,并开放对应 long long、float、double 型数据的同一个函数,例如:

     1 #if !defined(__CUDACC_RTC__) || __CUDA_ARCH__ >= 600
     2 __DEVICE_FUNCTIONS_STATIC_DECL__ float __fAtomicAdd_block(float *p, float val);
     3 
     4 __DEVICE_FUNCTIONS_STATIC_DECL__ float __fAtomicAdd_system(float *p, float val);
     5 
     6 __DEVICE_FUNCTIONS_STATIC_DECL__ double __dAtomicAdd(double *p, double val);
     7 
     8 __DEVICE_FUNCTIONS_STATIC_DECL__ double __dAtomicAdd_block(double *p, double val);
     9 
    10 __DEVICE_FUNCTIONS_STATIC_DECL__ double __dAtomicAdd_system(double *p, double val);
    11 #endif /* !__CUDACC_RTC__ || __CUDA_ARCH__ >= 600 */

      计算能力 600 以上能用的所有原子操作:

      1 #define __DEVICE_FUNCTIONS_STATIC_DECL__ __host__ __device__ __cudart_builtin__
      2 
      3 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicAdd(int *p, int val);
      4 
      5 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicAdd_block(int *p, int val);
      6 
      7 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicAdd_system(int *p, int val);
      8 
      9 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicAdd(unsigned int *p, unsigned int val);
     10 
     11 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicAdd_block(unsigned int *p, unsigned int val);
     12 
     13 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicAdd_system(unsigned int *p, unsigned int val);
     14 
     15 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicAdd(unsigned long long *p, unsigned long long val);
     16 
     17 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicAdd_block(unsigned long long *p, unsigned long long val);
     18 
     19 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicAdd_system(unsigned long long *p, unsigned long long val);
     20 
     21 __DEVICE_FUNCTIONS_STATIC_DECL__ float __fAtomicAdd(float *p, float val);
     22 
     23 __DEVICE_FUNCTIONS_STATIC_DECL__ float __fAtomicAdd_block(float *p, float val);
     24 
     25 __DEVICE_FUNCTIONS_STATIC_DECL__ float __fAtomicAdd_system(float *p, float val);
     26 
     27 __DEVICE_FUNCTIONS_STATIC_DECL__ double __dAtomicAdd(double *p, double val);
     28 
     29 __DEVICE_FUNCTIONS_STATIC_DECL__ double __dAtomicAdd_block(double *p, double val);
     30 
     31 __DEVICE_FUNCTIONS_STATIC_DECL__ double __dAtomicAdd_system(double *p, double val);
     32 
     33 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicExch(int *p, int val);
     34 
     35 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicExch_block(int *p, int val);
     36 
     37 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicExch_system(int *p, int val);
     38 
     39 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicExch(unsigned int *p, unsigned int val);
     40 
     41 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicExch_block(unsigned int *p, unsigned int val);
     42 
     43 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicExch_system(unsigned int *p, unsigned int val);
     44 
     45 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicExch(unsigned long long *p, unsigned long long val);
     46 
     47 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicExch_block(unsigned long long *p, unsigned long long val);
     48 
     49 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicExch_system(unsigned long long *p, unsigned long long val);
     50 
     51 __DEVICE_FUNCTIONS_STATIC_DECL__ float __fAtomicExch(float *p, float val);
     52 
     53 __DEVICE_FUNCTIONS_STATIC_DECL__ float __fAtomicExch_block(float *p, float val);
     54 
     55 __DEVICE_FUNCTIONS_STATIC_DECL__ float __fAtomicExch_system(float *p, float val);
     56 
     57 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicMin(int *p, int val);
     58 
     59 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicMin_block(int *p, int val);
     60 
     61 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicMin_system(int *p, int val);
     62 
     63 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __illAtomicMin(long long *p, long long val);
     64 
     65 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __illAtomicMin_block(long long *p, long long val);
     66 
     67 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __illAtomicMin_system(long long *p, long long val);
     68 
     69 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicMin(unsigned int *p, unsigned int val);
     70 
     71 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicMin_block(unsigned int *p, unsigned int val);
     72 
     73 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicMin_system(unsigned int *p, unsigned int val);
     74 
     75 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicMin(unsigned long long *p, unsigned long long val);
     76 
     77 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicMin_block(unsigned long long *p, unsigned long long val);
     78 
     79 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicMin_system(unsigned long long *p, unsigned long long val);
     80 
     81 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicMax(int *p, int val);
     82 
     83 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicMax_block(int *p, int val);
     84 
     85 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicMax_system(int *p, int val);
     86 
     87 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __illAtomicMax(long long *p, long long val);
     88 
     89 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __illAtomicMax_block(long long *p, long long val);
     90 
     91 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __illAtomicMax_system(long long *p, long long val);
     92 
     93 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicMax(unsigned int *p, unsigned int val);
     94 
     95 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicMax_block(unsigned int *p, unsigned int val);
     96 
     97 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicMax_system(unsigned int *p, unsigned int val);
     98 
     99 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicMax(unsigned long long *p, unsigned long long val);
    100 
    101 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicMax_block(unsigned long long *p, unsigned long long val);
    102 
    103 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicMax_system(unsigned long long *p, unsigned long long val);
    104 
    105 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicInc(unsigned int *p, unsigned int val);
    106 
    107 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicInc_block(unsigned int *p, unsigned int val);
    108 
    109 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicInc_system(unsigned int *p, unsigned int val);
    110 
    111 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicDec(unsigned int *p, unsigned int val);
    112 
    113 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicDec_block(unsigned int *p, unsigned int val);
    114 
    115 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicDec_system(unsigned int *p, unsigned int val);
    116 
    117 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicCAS(int *p, int compare, int val);
    118 
    119 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicCAS_block(int *p, int compare, int val);
    120 
    121 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicCAS_system(int *p, int compare, int val);
    122 
    123 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicCAS(unsigned int *p, unsigned int compare, unsigned int val);
    124 
    125 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicCAS_block(unsigned int *p, unsigned int compare, unsigned int val);
    126 
    127 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicCAS_system(unsigned int *p, unsigned int compare     unsigned int val);
    128 
    129 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long int __ullAtomicCAS(unsigned long long int *p     unsigned long long int compare     unsigned long long int val);
    130 
    131 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long int __ullAtomicCAS_block(unsigned long long int *p     unsigned long long int compare     unsigned long long int val);
    132 
    133 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long int __ullAtomicCAS_system(unsigned long long int *p     unsigned long long int compare     unsigned long long int val);
    134 
    135 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicAnd(int *p, int val);
    136 
    137 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicAnd_block(int *p, int val);
    138 
    139 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicAnd_system(int *p, int val);
    140 
    141 __DEVICE_FUNCTIONS_STATIC_DECL__ long long int __llAtomicAnd(long long int *p, long long int val);
    142 
    143 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __llAtomicAnd_block(long long *p, long long val);
    144 
    145 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __llAtomicAnd_system(long long *p, long long val);
    146 
    147 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicAnd(unsigned int *p, unsigned int val);
    148 
    149 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicAnd_block(unsigned int *p, unsigned int val);
    150 
    151 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicAnd_system(unsigned int *p, unsigned int val);
    152 
    153 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long int __ullAtomicAnd(unsigned long long int *p     unsigned long long int val);
    154 
    155 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicAnd_block(unsigned long long *p, unsigned long long val);
    156 
    157 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicAnd_system(unsigned long long *p, unsigned long long val);
    158 
    159 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicOr(int *p, int val);
    160 
    161 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicOr_block(int *p, int val);
    162 
    163 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicOr_system(int *p, int val);
    164 
    165 __DEVICE_FUNCTIONS_STATIC_DECL__ long long int __llAtomicOr(long long int *p, long long int val);
    166 
    167 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __llAtomicOr_block(long long *p, long long val);
    168 
    169 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __llAtomicOr_system(long long *p, long long val);
    170 
    171 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicOr(unsigned int *p, unsigned int val);
    172 
    173 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicOr_block(unsigned int *p, unsigned int val);
    174 
    175 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicOr_system(unsigned int *p, unsigned int val);
    176 
    177 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long int __ullAtomicOr(unsigned long long int *p     unsigned long long int val);
    178 
    179 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicOr_block(unsigned long long *p, unsigned long long val);
    180 
    181 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicOr_system(unsigned long long *p, unsigned long long val);
    182 
    183 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicXor(int *p, int val);
    184 
    185 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicXor_block(int *p, int val);
    186 
    187 __DEVICE_FUNCTIONS_STATIC_DECL__ int __iAtomicXor_system(int *p, int val);
    188 
    189 __DEVICE_FUNCTIONS_STATIC_DECL__ long long int __llAtomicXor(long long int *p, long long int val);
    190 
    191 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __llAtomicXor_block(long long *p, long long val);
    192 
    193 __DEVICE_FUNCTIONS_STATIC_DECL__ long long __llAtomicXor_system(long long *p, long long val);
    194 
    195 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicXor(unsigned int *p, unsigned int val);
    196 
    197 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicXor_block(unsigned int *p, unsigned int val);
    198 
    199 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned int __uAtomicXor_system(unsigned int *p, unsigned int val);
    200 
    201 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long int __ullAtomicXor(unsigned long long int *p     unsigned long long int val);
    202 
    203 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicXor_block(unsigned long long *p, unsigned long long val);
    204 
    205 __DEVICE_FUNCTIONS_STATIC_DECL__ unsigned long long __ullAtomicXor_system(unsigned long long *p, unsigned long long val);
  • 相关阅读:
    25 BasicUsageEnvironment0基本使用环境基类——Live555源码阅读(三)UsageEnvironment
    26 BasicUsageEnvironment基本使用环境——Live555源码阅读(三)UsageEnvironment
    24 UsageEnvironment使用环境抽象基类——Live555源码阅读(三)UsageEnvironment
    23 使用环境 UsageEnvironment——Live555源码阅读
    关于linux中文乱码的问题。
    关于ubuntukylin安装后界面中英文混杂的问题
    网络数据包发送工具PacketSender中文源码
    21 BasicTaskScheduler基本任务调度器(一)——Live555源码阅读(一)任务调度相关类
    20 BasicTaskScheduler0 基本任务调度类基类(二)——Live555源码阅读(一)任务调度相关类
    19 BasicTaskScheduler0 基本任务调度类基类(一)——Live555源码阅读(一)任务调度相关类
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/7777814.html
Copyright © 2020-2023  润新知