• cuda_opencv 矩阵相加


    实现矩阵相加

     1 #include <stdlib.h>
     2 #include <stdio.h>
     3 #include <opencv/cv.h>
     4 #include <opencv/highgui.h>
     5 #include <opencv2/opencv.hpp>
     6 
     7 #include "cuda_runtime.h"
     8 #include "device_launch_parameters.h"
     9 using namespace std;
    10 using namespace cv;
    11 
    12 __global__ void Add_kernel(const int2* d_A, const int2* d_B,int2*  d_C,int width, int height)
    13 {
    14     int x = threadIdx.x + blockIdx.x * blockDim.x;
    15     int y = threadIdx.y + blockIdx.y * blockDim.y;
    16 
    17     if(x < width && y < height)
    18     {
    19         int offset = x + y*width;
    20         d_C[offset].x = d_A[offset].x + d_B[offset].x;
    21         d_C[offset].y = d_A[offset].y + d_B[offset].y;
    22     }
    23 }
    24 int main()
    25 {
    26     Mat img(3, 4, CV_32S, Scalar_<int>(0));
    27 
    28     cout<<img<<endl;
    29     cout<<endl;
    30 
    31 
    32     for(int i = 0 ; i < img.rows; i++)
    33     {
    34         for(int j = 0 ; j < img.cols; j++)
    35         {
    36             img.at<int>(i,j)=i+j;
    37         }
    38     }
    39     cout<<endl;
    40 
    41     cout<<img<<endl;
    42 
    43 
    44     size_t memSize = img.step * img.rows;
    45     int2* d_A = NULL;
    46     int2* d_B = NULL;
    47     int2* d_C = NULL;
    48     cudaMalloc((void**)&d_A, memSize);
    49     cudaMalloc((void**)&d_B, memSize);
    50     cudaMalloc((void**)&d_C, memSize);
    51 
    52     cudaMemcpy(d_A,img.data,memSize, cudaMemcpyHostToDevice);
    53     cudaMemcpy(d_B,img.data,memSize, cudaMemcpyHostToDevice);
    54 
    55     dim3 threads(16, 16);
    56     dim3 grids((img.rows + threads.x - 1)/threads.x,(img.cols + threads.y - 1)/threads.y);
    57     Add_kernel<<<grids,threads>>>(d_A, d_B, d_C, img.rows, img.cols);
    58 
    59     cudaMemcpy(img.data, d_C,memSize,cudaMemcpyDeviceToHost);
    60     cout<<"GPU"<<endl;
    61     cout<<img<<endl;
    62     cudaFree(d_A);
    63     cudaFree(d_B);
    64     cudaFree(d_C);
    65 
    66     system("pause");
    67     return 0;
    68 }
  • 相关阅读:
    Linux从入门到入门到入门(一)
    计算机网络学习笔记
    计算机网络学习笔记
    c语言数据结构学习心得——排序
    c语言数据结构学习心得——查找
    adb 命令小结
    软件测试之面试题分享
    关于app启动时间测试的小思考
    软件缺陷
    黑盒测试,白盒测试
  • 原文地址:https://www.cnblogs.com/LzKlyhPorter/p/4611281.html
Copyright © 2020-2023  润新知