malloc()分配的内存与cudaHostAlloc()分配的内存之间存在着一个重要差异。C库函数malloc()将分配标准的,可分页的(Pagable)主机内存,而cudaHostAlloc()将分配页锁定的主机内存。页锁定内存也称为固定内存(Pinned Memory)或者不可分页内存,它有一个重要的属性:操作系统将不会对这块内存分页并交换到磁盘上,从而确保了该内存始终驻留在物理内存中。因此,操作系统能够安全地使某个应用程序访问该内存的物理地址,因为这块内存将不会被破坏或者重新定位。
1 #include <stdio.h> 2 #include <cuda_runtime.h> 3 #include <device_launch_parameters.h> 4 #include "book.h" 5 6 #define SIZE (10*1024*1024) 7 8 float cuda_malloc_test(int size, bool up){ 9 cudaEvent_t start, stop; 10 int *a, *dev_a; 11 float elapsedTime; 12 13 HANDLE_ERROR(cudaEventCreate(&start)); 14 HANDLE_ERROR(cudaEventCreate(&stop)); 15 16 a = (int *)malloc(size * sizeof(*a)); 17 HANDLE_NULL(a); 18 HANDLE_ERROR(cudaMalloc((void**)&dev_a, size * sizeof(*dev_a))); 19 HANDLE_ERROR(cudaEventRecord(start, 0)); 20 for (int i = 0; i < 100; i++){ 21 if (up) 22 HANDLE_ERROR(cudaMemcpy(dev_a, a, size * sizeof(*dev_a), cudaMemcpyHostToDevice)); 23 else 24 HANDLE_ERROR(cudaMemcpy(a, dev_a, size * sizeof(*dev_a), cudaMemcpyDeviceToHost)); 25 } 26 HANDLE_ERROR(cudaEventRecord(stop, 0)); 27 HANDLE_ERROR(cudaEventSynchronize(stop)); 28 HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime, start, stop)); 29 free(a); 30 HANDLE_ERROR(cudaFree(dev_a)); 31 HANDLE_ERROR(cudaEventDestroy(start)); 32 HANDLE_ERROR(cudaEventDestroy(stop)); 33 34 return elapsedTime; 35 } 36 37 float cuda_host_alloc_test(int size, bool up){ 38 cudaEvent_t start, stop; 39 int *a, *dev_a; 40 float elapsedTime; 41 42 HANDLE_ERROR(cudaEventCreate(&start)); 43 HANDLE_ERROR(cudaEventCreate(&stop)); 44 45 HANDLE_ERROR(cudaHostAlloc((void **)&a, size * sizeof(*a), cudaHostAllocDefault)); 46 HANDLE_ERROR(cudaMalloc((void**)&dev_a, size * sizeof(*dev_a))); 47 48 HANDLE_ERROR(cudaEventRecord(start, 0)); 49 for (int i = 0; i < 100; i++){ 50 if (up) 51 HANDLE_ERROR(cudaMemcpy(dev_a, a, size * sizeof(*a), cudaMemcpyHostToDevice)); 52 else 53 HANDLE_ERROR(cudaMemcpy(a, dev_a, size * sizeof(*a), cudaMemcpyDeviceToHost)); 54 } 55 HANDLE_ERROR(cudaEventRecord(stop, 0)); 56 HANDLE_ERROR(cudaEventSynchronize(stop)); 57 HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime, start, stop)); 58 59 HANDLE_ERROR(cudaFreeHost(a)); 60 HANDLE_ERROR(cudaFree(dev_a)); 61 HANDLE_ERROR(cudaEventDestroy(start)); 62 HANDLE_ERROR(cudaEventDestroy(stop)); 63 64 return elapsedTime; 65 } 66 67 int main(void){ 68 float elapsedTime; 69 float MB = (float)100 * SIZE*sizeof(int) / 1024 / 1024; 70 elapsedTime = cuda_malloc_test(SIZE, true); 71 printf("Time using cudaMalloc: %3.1f ms ", elapsedTime); 72 printf(" MB/s during copy up: %3.1f ", MB / (elapsedTime / 1000)); 73 74 elapsedTime = cuda_malloc_test(SIZE, false); 75 printf("Time using cudaMalloc: %3.1f ms ", elapsedTime); 76 printf(" MB/s during copy down: %3.1f ", MB / (elapsedTime / 1000)); 77 78 elapsedTime = cuda_host_alloc_test(SIZE, true); 79 printf("Time using cudaHostAlloc: %3.1f ms ", elapsedTime); 80 printf(" MB/s during copy up: %3.1f ", MB / (elapsedTime / 1000)); 81 82 elapsedTime = cuda_host_alloc_test(SIZE, false); 83 printf("Time using cudaHostAlloc: %3.1f ms ", elapsedTime); 84 printf(" MB/s during copy down: %3.1f ", MB / (elapsedTime / 1000)); 85 }