https://developer.nvidia.com/cuda-gpus#compute 查询gpu及其Compute Capability
Q: ERROR: INVALID_CONFIG: The engine plan file is generated on an incompatible device, expecting compute 7.5 got compute 6.1, please rebuild.
A: tensorrt 的序列化和反序列化操作只能在特定硬件上做,两个操作需配套;
cuda gdb:
1 #include <iostream> 2 #include <stdio.h> 3 #include "book.h" 4 5 __global__ void add(int a, int b, int *c) { 6 *c = a + b; 7 } 8 9 __global__ void kernel() { 10 printf("tttt "); 11 } 12 13 int main(void) { 14 int c; 15 int *dev_c; 16 HANDLE_ERROR( cudaMalloc((void**)&dev_c, sizeof(int) ) ); 17 int count; 18 HANDLE_ERROR( cudaGetDeviceCount(&count) ); 19 printf("count: %d ", count); 20 add<<<1,1>>>(2,7,dev_c); 21 HANDLE_ERROR( cudaMemcpy( &c, dev_c, sizeof(int), 22 cudaMemcpyDeviceToHost ) ); 23 printf("2 + 7 = %d ", c); 24 cudaFree(dev_c); 25 return 0; 26 }
-g 表示将CPU代码(host)编译成可调式版本,-G表示将GPU代码(kernel)编译成可调式版本。
Breakpoint 2, add<<<(1,1,1),(1,1,1)>>> (a=2, b=7, c=0x7fffc7800000) at fircuda.cu:6 6 *c = a + b;
(cuda-gdb) p a
Python Exception <type 'exceptions.AttributeError'> 'gdb.Type' object has no attribute 'name':
Python Exception <type 'exceptions.AttributeError'> 'gdb.Type' object has no attribute 'name':
$1 = 2
...
(cuda-gdb) p *c
Python Exception <type 'exceptions.AttributeError'> 'gdb.Type' object has no attribute 'name':
Python Exception <type 'exceptions.AttributeError'> 'gdb.Type' object has no attribute 'name':
$6 = 9
apollo@in_dev_docker:/apollo/ttt/caffe/build$ cat detect_cuda_archs.cu #include <cstdio> int main() { int count = 0; if (cudaSuccess != cudaGetDeviceCount(&count)) return -1; if (count == 0) return -1; for (int device = 0; device < count; ++device) { cudaDeviceProp prop; if (cudaSuccess == cudaGetDeviceProperties(&prop, device)) std::printf("%d.%d ", prop.major, prop.minor); } return 0; } apollo@in_dev_docker:/apollo/ttt/caffe/build$ ./a.out 5.0
HELP: https://docs.nvidia.com/cuda/cuda-gdb/#what-is-cuda-gdb