gpu-hello.cpp (11308:7d8836fd043d) gpu-hello.cpp (11321:02e930db812d)
1/*
2 * Copyright (c) 2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Marc Orr, Brad Beckmann
34 */
35
36#include <CL/cl.h>
37#include <malloc.h>
38
39#include <cstdio>
40#include <cstring>
41#include <fstream>
42#include <string>
43
44#define SUCCESS 0
45#define FAILURE 1
46
47// OpenCL datastructures
48cl_context context;
49cl_device_id *devices;
50cl_command_queue commandQueue;
51cl_program program;
52cl_kernel readKernel;
53
54// Application datastructures
55const int CACHE_LINE_SIZE = 64;
56size_t grid_size = 512;
57size_t work_group_size = 256;
58
59// arguments
60const int code_size = 5;
61const char *code = "hello";
62int *keys;
63char *msg;
64int chars_decoded = 0;
65
66/*
67 Setup data structures for application/algorithm
68*/
69int
70setupDataStructs()
71{
72 msg = (char *)memalign(CACHE_LINE_SIZE, (grid_size + 1) * sizeof(char));
1/*
2 * Copyright (c) 2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Marc Orr, Brad Beckmann
34 */
35
36#include <CL/cl.h>
37#include <malloc.h>
38
39#include <cstdio>
40#include <cstring>
41#include <fstream>
42#include <string>
43
44#define SUCCESS 0
45#define FAILURE 1
46
47// OpenCL datastructures
48cl_context context;
49cl_device_id *devices;
50cl_command_queue commandQueue;
51cl_program program;
52cl_kernel readKernel;
53
54// Application datastructures
55const int CACHE_LINE_SIZE = 64;
56size_t grid_size = 512;
57size_t work_group_size = 256;
58
59// arguments
60const int code_size = 5;
61const char *code = "hello";
62int *keys;
63char *msg;
64int chars_decoded = 0;
65
66/*
67 Setup data structures for application/algorithm
68*/
69int
70setupDataStructs()
71{
72 msg = (char *)memalign(CACHE_LINE_SIZE, (grid_size + 1) * sizeof(char));
73 if(msg == NULL) {
73 if (msg == NULL) {
74 printf("%s:%d: error: %s\n", __FILE__, __LINE__,
75 "could not allocate host buffers\n");
76 exit(-1);
77 }
78 msg[grid_size] = '\0';
79
80 keys = (int *)memalign(CACHE_LINE_SIZE, code_size * sizeof(int));
81 keys[0] = 23;
82 keys[1] = 0;
83 keys[2] = 0;
84 keys[3] = 0;
85 keys[4] = 0;
86
87 return SUCCESS;
88}
89
90/* Setup OpenCL data structures */
91int
92setupOpenCL()
93{
94 cl_int status = 0;
95 size_t deviceListSize;
96
97 // 1. Get platform
98 cl_uint numPlatforms;
99 cl_platform_id platform = NULL;
100 status = clGetPlatformIDs(0, NULL, &numPlatforms);
101 if (status != CL_SUCCESS) {
102 printf("Error: Getting Platforms. (clGetPlatformsIDs)\n");
103 return FAILURE;
104 }
105
106 if (numPlatforms > 0) {
107 cl_platform_id *platforms = new cl_platform_id[numPlatforms];
108 status = clGetPlatformIDs(numPlatforms, platforms, NULL);
109 if (status != CL_SUCCESS) {
110 printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n");
111 return FAILURE;
112 }
113 for (int i = 0; i < numPlatforms; ++i) {
114 char pbuff[100];
115 status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
116 sizeof(pbuff), pbuff, NULL);
117 if (status != CL_SUCCESS) {
118 printf("Error: Getting Platform Info.(clGetPlatformInfo)\n");
119 return FAILURE;
120 }
121 platform = platforms[i];
122 if (!strcmp(pbuff, "Advanced Micro Devices, Inc.")) {
123 break;
124 }
125 }
126 delete platforms;
127 }
128
74 printf("%s:%d: error: %s\n", __FILE__, __LINE__,
75 "could not allocate host buffers\n");
76 exit(-1);
77 }
78 msg[grid_size] = '\0';
79
80 keys = (int *)memalign(CACHE_LINE_SIZE, code_size * sizeof(int));
81 keys[0] = 23;
82 keys[1] = 0;
83 keys[2] = 0;
84 keys[3] = 0;
85 keys[4] = 0;
86
87 return SUCCESS;
88}
89
90/* Setup OpenCL data structures */
91int
92setupOpenCL()
93{
94 cl_int status = 0;
95 size_t deviceListSize;
96
97 // 1. Get platform
98 cl_uint numPlatforms;
99 cl_platform_id platform = NULL;
100 status = clGetPlatformIDs(0, NULL, &numPlatforms);
101 if (status != CL_SUCCESS) {
102 printf("Error: Getting Platforms. (clGetPlatformsIDs)\n");
103 return FAILURE;
104 }
105
106 if (numPlatforms > 0) {
107 cl_platform_id *platforms = new cl_platform_id[numPlatforms];
108 status = clGetPlatformIDs(numPlatforms, platforms, NULL);
109 if (status != CL_SUCCESS) {
110 printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n");
111 return FAILURE;
112 }
113 for (int i = 0; i < numPlatforms; ++i) {
114 char pbuff[100];
115 status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
116 sizeof(pbuff), pbuff, NULL);
117 if (status != CL_SUCCESS) {
118 printf("Error: Getting Platform Info.(clGetPlatformInfo)\n");
119 return FAILURE;
120 }
121 platform = platforms[i];
122 if (!strcmp(pbuff, "Advanced Micro Devices, Inc.")) {
123 break;
124 }
125 }
126 delete platforms;
127 }
128
129 if(NULL == platform) {
129 if (NULL == platform) {
130 printf("NULL platform found so Exiting Application.\n");
131 return FAILURE;
132 }
133
134 // 2. create context from platform
135 cl_context_properties cps[3] =
136 {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0};
137 context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL,
138 &status);
139 if (status != CL_SUCCESS) {
140 printf("Error: Creating Context. (clCreateContextFromType)\n");
141 return FAILURE;
142 }
143
144 // 3. Get device info
145 // 3a. Get # of devices
146 status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
147 &deviceListSize);
148 if (status != CL_SUCCESS) {
149 printf("Error: Getting Context Info (1st clGetContextInfo)\n");
150 return FAILURE;
151 }
152
153 // 3b. Get the device list data
154 devices = (cl_device_id *)malloc(deviceListSize);
155 if (devices == 0) {
156 printf("Error: No devices found.\n");
157 return FAILURE;
158 }
159 status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize,
160 devices, NULL);
161 if (status != CL_SUCCESS) {
162 printf("Error: Getting Context Info (2nd clGetContextInfo)\n");
163 return FAILURE;
164 }
165
166 // 4. Create command queue for device
167 commandQueue = clCreateCommandQueue(context, devices[0], 0, &status);
168 if (status != CL_SUCCESS) {
169 printf("Creating Command Queue. (clCreateCommandQueue)\n");
170 return FAILURE;
171 }
172
173 const char *source = "dummy text";
174
175 size_t sourceSize[] = {strlen(source)};
176
177 // 5b. Register the kernel with the runtime
178 program = clCreateProgramWithSource(context, 1, &source, sourceSize,
179 &status);
180 if (status != CL_SUCCESS) {
181 printf("Error: Loading kernel (clCreateProgramWithSource)\n");
182 return FAILURE;
183 }
184
185 status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
186 if (status != CL_SUCCESS) {
187 printf("Error: Building kernel (clBuildProgram)\n");
188 return FAILURE;
189 }
190
191 readKernel = clCreateKernel(program, "read_kernel", &status);
192 if (status != CL_SUCCESS) {
193 printf("Error: Creating readKernel from program. (clCreateKernel)\n");
194 return FAILURE;
195 }
196
197 return SUCCESS;
198}
199
200
201/* Run kernels */
202int
203runCLKernel(cl_kernel kernel)
204{
205 cl_int status;
206 cl_event event;
207 size_t globalThreads[1] = {grid_size};
208 size_t localThreads[1] = {work_group_size};
209
210 // 1. Set arguments
211 // 1a. code size
212 size_t code_size = strlen(code);
213 status = clSetKernelArg(kernel, 0, sizeof(size_t), &code_size);
214 if (status != CL_SUCCESS) {
215 printf("Error: Setting kernel argument. (code_size)\n");
216 return FAILURE;
217 }
218
219 // 1b. code
220 status = clSetKernelArg(kernel, 1, sizeof(char *), (void *)&code);
221 if (status != CL_SUCCESS) {
222 printf("Error: Setting kernel argument. (code_in)\n");
223 return FAILURE;
224 }
225
226 // 1c. keys
227 printf("keys = %p, &keys = %p, keys[0] = %d\n", keys, &keys, keys[0]);
228 status = clSetKernelArg(kernel, 2, sizeof(int *), (void *)&keys);
229 if (status != CL_SUCCESS) {
230 printf("Error: Setting kernel argument. (key_arr)\n");
231 return FAILURE;
232 }
233
234 // 1d. msg
235 status = clSetKernelArg(kernel, 3, sizeof(char *), (void *)&msg);
236 if (status != CL_SUCCESS) {
237 printf("Error: Setting kernel argument. (memOut)\n");
238 return FAILURE;
239 }
240
241 // 1e. chars_decoded
242 int *chars_decoded_ptr = &chars_decoded;
243 status = clSetKernelArg(kernel, 4, sizeof(int *),
244 (void *)&chars_decoded_ptr);
245 if (status != CL_SUCCESS) {
246 printf("Error: Setting kernel argument. (memOut)\n");
247 return FAILURE;
248 }
249
250 // 2. Launch kernel
251 status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,
252 globalThreads, localThreads, 0, NULL,
253 &event);
254 if (status != CL_SUCCESS) {
255 printf("Error: Enqueue failed. (clEnqueueNDRangeKernel)\n");
256 return FAILURE;
257 }
258
259 // 3. Wait for the kernel
260 status = clWaitForEvents(1, &event);
261 if (status != CL_SUCCESS) {
262 printf("Error: Waiting for kernel run to finish. (clWaitForEvents)\n");
263 return FAILURE;
264 }
265
266 // 4. Cleanup
267 status = clReleaseEvent(event);
268 if (status != CL_SUCCESS) {
269 printf("Error: Release event object. (clReleaseEvent)\n");
270 return FAILURE;
271 }
272
273 return SUCCESS;
274}
275
276
277/* Release OpenCL resources (Context, Memory etc.) */
278int
279cleanupCL()
280{
281 cl_int status;
282 status = clReleaseKernel(readKernel);
283 if (status != CL_SUCCESS) {
284 printf("Error: In clReleaseKernel \n");
285 return FAILURE;
286 }
287 status = clReleaseProgram(program);
288 if (status != CL_SUCCESS) {
289 printf("Error: In clReleaseProgram\n");
290 return FAILURE;
291 }
292 status = clReleaseCommandQueue(commandQueue);
293 if (status != CL_SUCCESS) {
294 printf("Error: In clReleaseCommandQueue\n");
295 return FAILURE;
296 }
297 status = clReleaseContext(context);
298 if (status != CL_SUCCESS) {
299 printf("Error: In clReleaseContext\n");
300 return FAILURE;
301 }
302
303 return SUCCESS;
304}
305
306int
307main(int argc, char * argv[])
308{
309 // Initialize Host application
310 if (setupDataStructs() != SUCCESS) {
311 return FAILURE;
312 }
313
314 // Initialize OpenCL resources
315 if (setupOpenCL() != SUCCESS) {
316 return FAILURE;
317 }
318
319 // Run the CL program
320 if (runCLKernel(readKernel) != SUCCESS) {
321 return FAILURE;
322 }
323 printf("the gpu says:\n");
324 printf("%s\n", msg);
325
326 // Releases OpenCL resources
327 if (cleanupCL()!= SUCCESS) {
328 return FAILURE;
329 }
330
331 return SUCCESS;
332}
130 printf("NULL platform found so Exiting Application.\n");
131 return FAILURE;
132 }
133
134 // 2. create context from platform
135 cl_context_properties cps[3] =
136 {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0};
137 context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL,
138 &status);
139 if (status != CL_SUCCESS) {
140 printf("Error: Creating Context. (clCreateContextFromType)\n");
141 return FAILURE;
142 }
143
144 // 3. Get device info
145 // 3a. Get # of devices
146 status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
147 &deviceListSize);
148 if (status != CL_SUCCESS) {
149 printf("Error: Getting Context Info (1st clGetContextInfo)\n");
150 return FAILURE;
151 }
152
153 // 3b. Get the device list data
154 devices = (cl_device_id *)malloc(deviceListSize);
155 if (devices == 0) {
156 printf("Error: No devices found.\n");
157 return FAILURE;
158 }
159 status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize,
160 devices, NULL);
161 if (status != CL_SUCCESS) {
162 printf("Error: Getting Context Info (2nd clGetContextInfo)\n");
163 return FAILURE;
164 }
165
166 // 4. Create command queue for device
167 commandQueue = clCreateCommandQueue(context, devices[0], 0, &status);
168 if (status != CL_SUCCESS) {
169 printf("Creating Command Queue. (clCreateCommandQueue)\n");
170 return FAILURE;
171 }
172
173 const char *source = "dummy text";
174
175 size_t sourceSize[] = {strlen(source)};
176
177 // 5b. Register the kernel with the runtime
178 program = clCreateProgramWithSource(context, 1, &source, sourceSize,
179 &status);
180 if (status != CL_SUCCESS) {
181 printf("Error: Loading kernel (clCreateProgramWithSource)\n");
182 return FAILURE;
183 }
184
185 status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
186 if (status != CL_SUCCESS) {
187 printf("Error: Building kernel (clBuildProgram)\n");
188 return FAILURE;
189 }
190
191 readKernel = clCreateKernel(program, "read_kernel", &status);
192 if (status != CL_SUCCESS) {
193 printf("Error: Creating readKernel from program. (clCreateKernel)\n");
194 return FAILURE;
195 }
196
197 return SUCCESS;
198}
199
200
201/* Run kernels */
202int
203runCLKernel(cl_kernel kernel)
204{
205 cl_int status;
206 cl_event event;
207 size_t globalThreads[1] = {grid_size};
208 size_t localThreads[1] = {work_group_size};
209
210 // 1. Set arguments
211 // 1a. code size
212 size_t code_size = strlen(code);
213 status = clSetKernelArg(kernel, 0, sizeof(size_t), &code_size);
214 if (status != CL_SUCCESS) {
215 printf("Error: Setting kernel argument. (code_size)\n");
216 return FAILURE;
217 }
218
219 // 1b. code
220 status = clSetKernelArg(kernel, 1, sizeof(char *), (void *)&code);
221 if (status != CL_SUCCESS) {
222 printf("Error: Setting kernel argument. (code_in)\n");
223 return FAILURE;
224 }
225
226 // 1c. keys
227 printf("keys = %p, &keys = %p, keys[0] = %d\n", keys, &keys, keys[0]);
228 status = clSetKernelArg(kernel, 2, sizeof(int *), (void *)&keys);
229 if (status != CL_SUCCESS) {
230 printf("Error: Setting kernel argument. (key_arr)\n");
231 return FAILURE;
232 }
233
234 // 1d. msg
235 status = clSetKernelArg(kernel, 3, sizeof(char *), (void *)&msg);
236 if (status != CL_SUCCESS) {
237 printf("Error: Setting kernel argument. (memOut)\n");
238 return FAILURE;
239 }
240
241 // 1e. chars_decoded
242 int *chars_decoded_ptr = &chars_decoded;
243 status = clSetKernelArg(kernel, 4, sizeof(int *),
244 (void *)&chars_decoded_ptr);
245 if (status != CL_SUCCESS) {
246 printf("Error: Setting kernel argument. (memOut)\n");
247 return FAILURE;
248 }
249
250 // 2. Launch kernel
251 status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,
252 globalThreads, localThreads, 0, NULL,
253 &event);
254 if (status != CL_SUCCESS) {
255 printf("Error: Enqueue failed. (clEnqueueNDRangeKernel)\n");
256 return FAILURE;
257 }
258
259 // 3. Wait for the kernel
260 status = clWaitForEvents(1, &event);
261 if (status != CL_SUCCESS) {
262 printf("Error: Waiting for kernel run to finish. (clWaitForEvents)\n");
263 return FAILURE;
264 }
265
266 // 4. Cleanup
267 status = clReleaseEvent(event);
268 if (status != CL_SUCCESS) {
269 printf("Error: Release event object. (clReleaseEvent)\n");
270 return FAILURE;
271 }
272
273 return SUCCESS;
274}
275
276
277/* Release OpenCL resources (Context, Memory etc.) */
278int
279cleanupCL()
280{
281 cl_int status;
282 status = clReleaseKernel(readKernel);
283 if (status != CL_SUCCESS) {
284 printf("Error: In clReleaseKernel \n");
285 return FAILURE;
286 }
287 status = clReleaseProgram(program);
288 if (status != CL_SUCCESS) {
289 printf("Error: In clReleaseProgram\n");
290 return FAILURE;
291 }
292 status = clReleaseCommandQueue(commandQueue);
293 if (status != CL_SUCCESS) {
294 printf("Error: In clReleaseCommandQueue\n");
295 return FAILURE;
296 }
297 status = clReleaseContext(context);
298 if (status != CL_SUCCESS) {
299 printf("Error: In clReleaseContext\n");
300 return FAILURE;
301 }
302
303 return SUCCESS;
304}
305
306int
307main(int argc, char * argv[])
308{
309 // Initialize Host application
310 if (setupDataStructs() != SUCCESS) {
311 return FAILURE;
312 }
313
314 // Initialize OpenCL resources
315 if (setupOpenCL() != SUCCESS) {
316 return FAILURE;
317 }
318
319 // Run the CL program
320 if (runCLKernel(readKernel) != SUCCESS) {
321 return FAILURE;
322 }
323 printf("the gpu says:\n");
324 printf("%s\n", msg);
325
326 // Releases OpenCL resources
327 if (cleanupCL()!= SUCCESS) {
328 return FAILURE;
329 }
330
331 return SUCCESS;
332}