• CUDA实例练习(六):矢量求和


     1 #include <stdio.h>
     2 #include <cuda_runtime.h>
     3 #include <device_launch_parameters.h>
     4 #include <book.h>
     5 #include <gputimer.h>
     6 #define N (33 * 1024)
     7 
     8 __global__ void add(int *a, int *b, int *c){
     9     int tid = threadIdx.x + blockIdx.x * blockDim.x;
    10     while (tid < N){
    11         c[tid] = a[tid] + b[tid];
    12         tid += blockDim.x * gridDim.x;
    13     }
    14 }
    15 
    16 int main(void){
    17     int a[N], b[N], c[N];
    18     int *dev_a, *dev_b, *dev_c;
    19 
    20     HANDLE_ERROR(cudaMalloc((void **)&dev_a, N * sizeof(int)));
    21     HANDLE_ERROR(cudaMalloc((void **)&dev_b, N*sizeof(int)));
    22     HANDLE_ERROR(cudaMalloc((void **)&dev_c, N*sizeof(int)));
    23 
    24     for (int i = 0; i < N; i++){
    25         a[i] = i;
    26         b[i] = i*i;
    27     }
    28 
    29     HANDLE_ERROR(cudaMemcpy(dev_a, a, N*sizeof(int), cudaMemcpyHostToDevice));
    30     HANDLE_ERROR(cudaMemcpy(dev_b, b, N*sizeof(int), cudaMemcpyHostToDevice));
    31     add << <128, 128 >> >(dev_a, dev_b, dev_c);
    32     
    33     HANDLE_ERROR(cudaMemcpy(c, dev_c, N*sizeof(int), cudaMemcpyDeviceToHost));
    34 
    35     bool success = true;
    36     for (int i = 0; i < N; i++){
    37         if (a[i] + b[i] != c[i]){
    38             printf("Error: %d + %d != %d
    ", a[i], b[i], c[i]);
    39             success = false;
    40         }
    41     }
    42     if (success)
    43         printf("We did it!
    ");
    44 
    45     cudaFree(dev_a);
    46     cudaFree(dev_b);
    47     cudaFree(dev_c);
    48     return 0;
    49 }
  • 相关阅读:
    友好城市, 美团笔试题
    字符串计数, 美团笔试题
    公交车, 美团笔试题
    交错序列, 美团笔试题
    题目列表, 美团笔试题, 字符串数组比较
    图的遍历, 美团笔试题
    最长全1串, 美团笔试题
    外卖满减, 美团笔试题
    种花, 美团笔试题
    考试策略, 美团笔试题
  • 原文地址:https://www.cnblogs.com/zhangshuwen/p/7305984.html
Copyright © 2020-2023  润新知