实现向量相加。。。实现与 矩阵相加一样~
1 #include <stdlib.h> 2 #include <stdio.h> 3 #include <opencv/cv.hpp> 4 #include <opencv/highgui.h> 5 6 #include <cuda_runtime.h> 7 #include <device_launch_parameters.h> 8 9 using namespace std; 10 using namespace cv; 11 12 #define N 10 13 14 __global__ void Add_kernel(const int2* d_a, const int2* d_b,int2* d_c ,int pp) 15 { 16 int x=threadIdx.x; 17 18 if(x < pp) 19 { 20 d_c[x].x=d_a[x].x + d_b[x].x; 21 d_c[x].y=d_a[x].y + d_b[x].y; 22 23 } 24 25 } 26 int main() 27 { 28 Mat a(1,N,CV_32S,Scalar_<int>(0)); 29 Mat b(1,N,CV_32S,Scalar_<int>(10)); 30 31 cout<<a<<endl; 32 cout<<endl; 33 cout<<b<<endl; 34 cout<<endl; 35 36 size_t memSize = a.step * a.rows; 37 38 int2* d_a = NULL; 39 int2* d_b = NULL; 40 int2* d_c = NULL; 41 42 cudaMalloc((void**)&d_a,memSize); 43 cudaMalloc((void**)&d_b,memSize); 44 cudaMalloc((void**)&d_c,memSize); 45 46 cudaMemcpy(d_a,a.data,memSize,cudaMemcpyHostToDevice); 47 cudaMemcpy(d_b,b.data,memSize,cudaMemcpyHostToDevice); 48 49 Add_kernel<<<1,N>>>(d_a, d_b, d_c, N); 50 cudaMemcpy(a.data, d_c,memSize, cudaMemcpyDeviceToHost); 51 cout<<endl; 52 cout<<a<<endl; 53 system("pause"); 54 return 0; 55 }