Visual Studio工程配置情况:
VC++目录配置:
1 C:ProgramDataNVIDIA CorporationCUDA Samplesv10.1commonlibx64 2 C:Program FilesNVIDIA GPU Computing ToolkitCUDAv10.1libx64 3 C:ProgramDataNVIDIA CorporationCUDA Samplesv10.1commoninc 4 C:Program FilesNVIDIA GPU Computing ToolkitCUDAv10.1include
链接器配置:
1 $(CudaToolkitLibDir) 2 C:Program FilesNVIDIA GPU Computing ToolkitCUDAv10.1libx64
输入:
1 cublas.lib 2 cuda.lib 3 cudadevrt.lib 4 cudart.lib 5 cudart_static.lib 6 cufft.lib 7 cufftw.lib 8 curand.lib 9 cusolver.lib 10 cusparse.lib 11 nppc.lib 12 nppial.lib 13 nppicc.lib 14 nppicom.lib 15 nppidei.lib 16 nppif.lib 17 nppig.lib 18 nppim.lib 19 nppist.lib 20 nppisu.lib 21 nppitc.lib 22 npps.lib 23 nvblas.lib 24 nvgraph.lib 25 nvml.lib 26 nvrtc.lib 27 OpenCL.lib
测试代码:
1 #include <iostream> 2 #include <cuda.h> 3 #include "cuda_runtime.h" 4 #include <cuda_runtime_api.h> 5 #include "device_launch_parameters.h" 6 #include <device_functions.h> 7 8 int main(void) 9 { 10 int deviceCount; 11 cudaGetDeviceCount(&deviceCount); 12 13 int dev; 14 for (dev = 0; dev < deviceCount; dev++) 15 { 16 int driver_version(0), runtime_version(0); 17 cudaDeviceProp deviceProp; 18 cudaGetDeviceProperties(&deviceProp, dev); 19 if (dev == 0) 20 if (deviceProp.minor = 9999 && deviceProp.major == 9999) 21 printf(" "); 22 printf(" Device%d:"%s" ", dev, deviceProp.name); 23 cudaDriverGetVersion(&driver_version); 24 printf("CUDA驱动版本: %d.%d ", driver_version / 1000, (driver_version % 1000) / 10); 25 cudaRuntimeGetVersion(&runtime_version); 26 printf("CUDA运行时版本: %d.%d ", runtime_version / 1000, (runtime_version % 1000) / 10); 27 printf("设备计算能力: %d.%d ", deviceProp.major, deviceProp.minor); 28 printf("Total amount of Global Memory: %u bytes ", deviceProp.totalGlobalMem); 29 printf("Number of SMs: %d ", deviceProp.multiProcessorCount); 30 printf("Total amount of Constant Memory: %u bytes ", deviceProp.totalConstMem); 31 printf("Total amount of Shared Memory per block: %u bytes ", deviceProp.sharedMemPerBlock); 32 printf("Total number of registers available per block: %d ", deviceProp.regsPerBlock); 33 printf("Warp size: %d ", deviceProp.warpSize); 34 printf("Maximum number of threads per SM: %d ", deviceProp.maxThreadsPerMultiProcessor); 35 printf("Maximum number of threads per block: %d ", deviceProp.maxThreadsPerBlock); 36 printf("Maximum size of each dimension of a block: %d x %d x %d ", deviceProp.maxThreadsDim[0],deviceProp.maxThreadsDim[1],deviceProp.maxThreadsDim[2]); 37 printf("Maximum size of each dimension of a grid: %d x %d x %d ", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]); 38 printf("Maximum memory pitch: %u bytes ", deviceProp.memPitch); 39 printf("Texture alignmemt: %u bytes ", deviceProp.texturePitchAlignment); 40 printf("Clock rate: %.2f GHz ", deviceProp.clockRate * 1e-6f); 41 printf("Memory Clock rate: %.0f MHz ", deviceProp.memoryClockRate * 1e-3f); 42 printf("Memory Bus Width: %d-bit ", deviceProp.memoryBusWidth); 43 } 44 }
未完待续!
运行结果: