gpu-hello.cpp revision 11308
1/*
2 * Copyright (c) 2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Marc Orr, Brad Beckmann
34 */
35
36#include <CL/cl.h>
37#include <malloc.h>
38
39#include <cstdio>
40#include <cstring>
41#include <fstream>
42#include <string>
43
44#define SUCCESS 0
45#define FAILURE 1
46
47// OpenCL datastructures
48cl_context       context;
49cl_device_id     *devices;
50cl_command_queue commandQueue;
51cl_program       program;
52cl_kernel        readKernel;
53
54// Application datastructures
55const int CACHE_LINE_SIZE = 64;
56size_t grid_size = 512;
57size_t work_group_size = 256;
58
59// arguments
60const int code_size = 5;
61const char *code = "hello";
62int *keys;
63char *msg;
64int chars_decoded = 0;
65
66/*
67    Setup data structures for application/algorithm
68*/
69int
70setupDataStructs()
71{
72    msg = (char *)memalign(CACHE_LINE_SIZE, (grid_size + 1) * sizeof(char));
73    if(msg == NULL) {
74        printf("%s:%d: error: %s\n", __FILE__, __LINE__,
75               "could not allocate host buffers\n");
76       exit(-1);
77    }
78    msg[grid_size] = '\0';
79
80    keys = (int *)memalign(CACHE_LINE_SIZE, code_size * sizeof(int));
81    keys[0] = 23;
82    keys[1] = 0;
83    keys[2] = 0;
84    keys[3] = 0;
85    keys[4] = 0;
86
87    return SUCCESS;
88}
89
90/* Setup OpenCL data structures */
91int
92setupOpenCL()
93{
94    cl_int status = 0;
95    size_t deviceListSize;
96
97    // 1. Get platform
98    cl_uint numPlatforms;
99    cl_platform_id platform = NULL;
100    status = clGetPlatformIDs(0, NULL, &numPlatforms);
101    if (status != CL_SUCCESS) {
102        printf("Error: Getting Platforms. (clGetPlatformsIDs)\n");
103        return FAILURE;
104    }
105
106    if (numPlatforms > 0) {
107        cl_platform_id *platforms = new cl_platform_id[numPlatforms];
108        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
109        if (status != CL_SUCCESS) {
110            printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n");
111            return FAILURE;
112        }
113        for (int i = 0; i < numPlatforms; ++i) {
114            char pbuff[100];
115            status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
116                                       sizeof(pbuff), pbuff, NULL);
117            if (status != CL_SUCCESS) {
118                printf("Error: Getting Platform Info.(clGetPlatformInfo)\n");
119                return FAILURE;
120            }
121            platform = platforms[i];
122            if (!strcmp(pbuff, "Advanced Micro Devices, Inc.")) {
123                break;
124            }
125        }
126        delete platforms;
127    }
128
129    if(NULL == platform) {
130        printf("NULL platform found so Exiting Application.\n");
131        return FAILURE;
132    }
133
134    // 2. create context from platform
135    cl_context_properties cps[3] =
136        {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0};
137    context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL,
138                                      &status);
139    if (status != CL_SUCCESS) {
140        printf("Error: Creating Context. (clCreateContextFromType)\n");
141        return FAILURE;
142    }
143
144    // 3. Get device info
145    // 3a. Get # of devices
146    status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
147                              &deviceListSize);
148    if (status != CL_SUCCESS) {
149        printf("Error: Getting Context Info (1st clGetContextInfo)\n");
150        return FAILURE;
151    }
152
153    // 3b. Get the device list data
154    devices = (cl_device_id *)malloc(deviceListSize);
155    if (devices == 0) {
156        printf("Error: No devices found.\n");
157        return FAILURE;
158    }
159    status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize,
160                              devices, NULL);
161    if (status != CL_SUCCESS) {
162        printf("Error: Getting Context Info (2nd clGetContextInfo)\n");
163        return FAILURE;
164    }
165
166    // 4. Create command queue for device
167    commandQueue = clCreateCommandQueue(context, devices[0], 0, &status);
168    if (status != CL_SUCCESS) {
169        printf("Creating Command Queue. (clCreateCommandQueue)\n");
170        return FAILURE;
171    }
172
173    const char *source = "dummy text";
174
175    size_t sourceSize[] = {strlen(source)};
176
177    // 5b. Register the kernel with the runtime
178    program = clCreateProgramWithSource(context, 1, &source, sourceSize,
179                                        &status);
180    if (status != CL_SUCCESS) {
181      printf("Error: Loading kernel (clCreateProgramWithSource)\n");
182      return FAILURE;
183    }
184
185    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
186    if (status != CL_SUCCESS) {
187        printf("Error: Building kernel (clBuildProgram)\n");
188        return FAILURE;
189    }
190
191    readKernel = clCreateKernel(program, "read_kernel", &status);
192    if (status != CL_SUCCESS) {
193        printf("Error: Creating readKernel from program. (clCreateKernel)\n");
194        return FAILURE;
195    }
196
197    return SUCCESS;
198}
199
200
201/* Run kernels */
202int
203runCLKernel(cl_kernel kernel)
204{
205    cl_int   status;
206    cl_event event;
207    size_t globalThreads[1] = {grid_size};
208    size_t localThreads[1] = {work_group_size};
209
210    // 1. Set arguments
211    // 1a. code size
212    size_t code_size = strlen(code);
213    status = clSetKernelArg(kernel, 0, sizeof(size_t), &code_size);
214    if (status != CL_SUCCESS) {
215        printf("Error: Setting kernel argument. (code_size)\n");
216        return FAILURE;
217    }
218
219    // 1b. code
220    status = clSetKernelArg(kernel, 1, sizeof(char *), (void *)&code);
221    if (status != CL_SUCCESS) {
222        printf("Error: Setting kernel argument. (code_in)\n");
223        return FAILURE;
224    }
225
226    // 1c. keys
227    printf("keys = %p, &keys = %p, keys[0] = %d\n", keys, &keys, keys[0]);
228    status = clSetKernelArg(kernel, 2, sizeof(int *), (void *)&keys);
229    if (status != CL_SUCCESS) {
230        printf("Error: Setting kernel argument. (key_arr)\n");
231        return FAILURE;
232    }
233
234    // 1d. msg
235    status = clSetKernelArg(kernel, 3, sizeof(char *), (void *)&msg);
236    if (status != CL_SUCCESS) {
237        printf("Error: Setting kernel argument. (memOut)\n");
238        return FAILURE;
239    }
240
241    // 1e. chars_decoded
242    int *chars_decoded_ptr = &chars_decoded;
243    status = clSetKernelArg(kernel, 4, sizeof(int *),
244                            (void *)&chars_decoded_ptr);
245    if (status != CL_SUCCESS) {
246        printf("Error: Setting kernel argument. (memOut)\n");
247        return FAILURE;
248    }
249
250    // 2. Launch kernel
251    status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,
252                                    globalThreads, localThreads, 0, NULL,
253                                    &event);
254    if (status != CL_SUCCESS) {
255        printf("Error: Enqueue failed. (clEnqueueNDRangeKernel)\n");
256        return FAILURE;
257    }
258
259    // 3. Wait for the kernel
260    status = clWaitForEvents(1, &event);
261    if (status != CL_SUCCESS) {
262        printf("Error: Waiting for kernel run to finish. (clWaitForEvents)\n");
263        return FAILURE;
264    }
265
266    // 4. Cleanup
267    status = clReleaseEvent(event);
268    if (status != CL_SUCCESS) {
269        printf("Error: Release event object. (clReleaseEvent)\n");
270        return FAILURE;
271    }
272
273    return SUCCESS;
274}
275
276
277/* Release OpenCL resources (Context, Memory etc.) */
278int
279cleanupCL()
280{
281    cl_int status;
282    status = clReleaseKernel(readKernel);
283    if (status != CL_SUCCESS) {
284        printf("Error: In clReleaseKernel \n");
285        return FAILURE;
286    }
287    status = clReleaseProgram(program);
288    if (status != CL_SUCCESS) {
289        printf("Error: In clReleaseProgram\n");
290        return FAILURE;
291    }
292    status = clReleaseCommandQueue(commandQueue);
293    if (status != CL_SUCCESS) {
294        printf("Error: In clReleaseCommandQueue\n");
295        return FAILURE;
296    }
297    status = clReleaseContext(context);
298    if (status != CL_SUCCESS) {
299        printf("Error: In clReleaseContext\n");
300        return FAILURE;
301    }
302
303    return SUCCESS;
304}
305
306int
307main(int argc, char * argv[])
308{
309    // Initialize Host application
310    if (setupDataStructs() != SUCCESS) {
311        return FAILURE;
312    }
313
314    // Initialize OpenCL resources
315    if (setupOpenCL() != SUCCESS) {
316        return FAILURE;
317    }
318
319    // Run the CL program
320    if (runCLKernel(readKernel) != SUCCESS) {
321        return FAILURE;
322    }
323    printf("the gpu says:\n");
324    printf("%s\n", msg);
325
326    // Releases OpenCL resources
327    if (cleanupCL()!= SUCCESS) {
328        return FAILURE;
329    }
330
331    return SUCCESS;
332}
333