项目打包下载
1 /*
2 * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
3 *
4 * NVIDIA Corporation and its licensors retain all intellectual property and
5 * proprietary rights in and to this software and related documentation.
6 * Any use, reproduction, disclosure, or distribution of this software
7 * and related documentation without an express license agreement from
8 * NVIDIA Corporation is strictly prohibited.
9 *
10 * Please refer to the applicable NVIDIA end user license agreement (EULA)
11 * associated with this source code for terms and conditions that govern
12 * your use of this NVIDIA software.
13 *
14 */
15
16 #include <GLglut.h>
17 #include "cuda.h"
18 #include "../common/book.h"
19 #include "../common/cpu_bitmap.h"
20 #include "cuda_runtime.h"
21 #include "device_launch_parameters.h"
22 #include <math.h>
23 #define DIM 1024
24
25 #define rnd( x ) (x * rand() / RAND_MAX)
26 #define INF 2e10f
27
28 struct Sphere {
29 float r, b, g;
30 float radius;
31 float x, y, z;
32 __device__ float hit(float ox, float oy, float *n) {
33 float dx = ox - x;
34 float dy = oy - y;
35 if (dx*dx + dy*dy < radius*radius) {
36 float dz = sqrtf(radius*radius - dx*dx - dy*dy);
37 *n = dz / sqrtf(radius * radius);
38 return dz + z;
39 }
40 return -INF;
41 }
42 };
43 #define SPHERES 20
44
45 __constant__ Sphere s[SPHERES];
46
47 __global__ void kernel(unsigned char *ptr) {
48 // map from threadIdx/BlockIdx to pixel position
49 int x = threadIdx.x + blockIdx.x * blockDim.x;
50 int y = threadIdx.y + blockIdx.y * blockDim.y;
51 int offset = x + y * blockDim.x * gridDim.x;
52 float ox = (x - DIM / 2);
53 float oy = (y - DIM / 2);
54
55 float r = 0, g = 0, b = 0;
56 float maxz = -INF;
57 for (int i = 0; i<SPHERES; i++) {
58 float n;
59 float t = s[i].hit(ox, oy, &n);
60 if (t > maxz) {
61 float fscale = n;
62 r = s[i].r * fscale;
63 g = s[i].g * fscale;
64 b = s[i].b * fscale;
65 maxz = t;
66 }
67 }
68
69 ptr[offset * 4 + 0] = (int)(r * 255);
70 ptr[offset * 4 + 1] = (int)(g * 255);
71 ptr[offset * 4 + 2] = (int)(b * 255);
72 ptr[offset * 4 + 3] = 255;
73 }
74
75 // globals needed by the update routine
76 struct DataBlock {
77 unsigned char *dev_bitmap;
78 };
79
80 int main(void) {
81 DataBlock data;
82 // capture the start time
83 cudaEvent_t start, stop;
84 HANDLE_ERROR(cudaEventCreate(&start));
85 HANDLE_ERROR(cudaEventCreate(&stop));
86 HANDLE_ERROR(cudaEventRecord(start, 0));
87
88 CPUBitmap bitmap(DIM, DIM, &data);
89 unsigned char *dev_bitmap;
90
91 // allocate memory on the GPU for the output bitmap
92 HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap,
93 bitmap.image_size()));
94
95 // allocate temp memory, initialize it, copy to constant
96 // memory on the GPU, then free our temp memory
97 Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere)* SPHERES);
98 for (int i = 0; i<SPHERES; i++) {
99 temp_s[i].r = rnd(1.0f);
100 temp_s[i].g = rnd(1.0f);
101 temp_s[i].b = rnd(1.0f);
102 temp_s[i].x = rnd(1000.0f) - 500;
103 temp_s[i].y = rnd(1000.0f) - 500;
104 temp_s[i].z = rnd(1000.0f) - 500;
105 temp_s[i].radius = rnd(100.0f) + 20;
106 }
107 /*
108 将SPHERES个球面对象存放在常量内存中
109 通过cudaMemcpyToSymbol来操作
110 */
111 HANDLE_ERROR(cudaMemcpyToSymbol(s, temp_s,
112 sizeof(Sphere)* SPHERES));
113 free(temp_s);
114
115 // generate a bitmap from our sphere data
116 dim3 grids(DIM / 16, DIM / 16);
117 dim3 threads(16, 16);
118 kernel <<<grids, threads >>>(dev_bitmap);
119
120 // copy our bitmap back from the GPU for display
121 HANDLE_ERROR(cudaMemcpy(bitmap.get_ptr(), dev_bitmap,
122 bitmap.image_size(),
123 cudaMemcpyDeviceToHost));
124
125 // get stop time, and display the timing results
126 HANDLE_ERROR(cudaEventRecord(stop, 0));
127 HANDLE_ERROR(cudaEventSynchronize(stop));
128 float elapsedTime;
129 HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime,
130 start, stop));
131 printf("Time to generate: %3.1f ms
", elapsedTime);
132
133 HANDLE_ERROR(cudaEventDestroy(start));
134 HANDLE_ERROR(cudaEventDestroy(stop));
135
136 HANDLE_ERROR(cudaFree(dev_bitmap));
137
138 // display
139 bitmap.display_and_exit();
140 }
结果如下所示: