1 #include <stdio.h> 2 #include <cuda_runtime.h> 3 #include <device_launch_parameters.h> 4 #include <book.h> 5 #include <gputimer.h> 6 #define N (33 * 1024) 7 8 __global__ void add(int *a, int *b, int *c){ 9 int tid = threadIdx.x + blockIdx.x * blockDim.x; 10 while (tid < N){ 11 c[tid] = a[tid] + b[tid]; 12 tid += blockDim.x * gridDim.x; 13 } 14 } 15 16 int main(void){ 17 int a[N], b[N], c[N]; 18 int *dev_a, *dev_b, *dev_c; 19 20 HANDLE_ERROR(cudaMalloc((void **)&dev_a, N * sizeof(int))); 21 HANDLE_ERROR(cudaMalloc((void **)&dev_b, N*sizeof(int))); 22 HANDLE_ERROR(cudaMalloc((void **)&dev_c, N*sizeof(int))); 23 24 for (int i = 0; i < N; i++){ 25 a[i] = i; 26 b[i] = i*i; 27 } 28 29 HANDLE_ERROR(cudaMemcpy(dev_a, a, N*sizeof(int), cudaMemcpyHostToDevice)); 30 HANDLE_ERROR(cudaMemcpy(dev_b, b, N*sizeof(int), cudaMemcpyHostToDevice)); 31 add << <128, 128 >> >(dev_a, dev_b, dev_c); 32 33 HANDLE_ERROR(cudaMemcpy(c, dev_c, N*sizeof(int), cudaMemcpyDeviceToHost)); 34 35 bool success = true; 36 for (int i = 0; i < N; i++){ 37 if (a[i] + b[i] != c[i]){ 38 printf("Error: %d + %d != %d ", a[i], b[i], c[i]); 39 success = false; 40 } 41 } 42 if (success) 43 printf("We did it! "); 44 45 cudaFree(dev_a); 46 cudaFree(dev_b); 47 cudaFree(dev_c); 48 return 0; 49 }