gpu-hello.cpp revision 11321
1/* 2 * Copyright (c) 2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Marc Orr, Brad Beckmann 34 */ 35 36#include <CL/cl.h> 37#include <malloc.h> 38 39#include <cstdio> 40#include <cstring> 41#include <fstream> 42#include <string> 43 44#define SUCCESS 0 45#define FAILURE 1 46 47// OpenCL datastructures 48cl_context context; 49cl_device_id *devices; 50cl_command_queue commandQueue; 51cl_program program; 52cl_kernel readKernel; 53 54// Application datastructures 55const int CACHE_LINE_SIZE = 64; 56size_t grid_size = 512; 57size_t work_group_size = 256; 58 59// arguments 60const int code_size = 5; 61const char *code = "hello"; 62int *keys; 63char *msg; 64int chars_decoded = 0; 65 66/* 67 Setup data structures for application/algorithm 68*/ 69int 70setupDataStructs() 71{ 72 msg = (char *)memalign(CACHE_LINE_SIZE, (grid_size + 1) * sizeof(char)); 73 if (msg == NULL) { 74 printf("%s:%d: error: %s\n", __FILE__, __LINE__, 75 "could not allocate host buffers\n"); 76 exit(-1); 77 } 78 msg[grid_size] = '\0'; 79 80 keys = (int *)memalign(CACHE_LINE_SIZE, code_size * sizeof(int)); 81 keys[0] = 23; 82 keys[1] = 0; 83 keys[2] = 0; 84 keys[3] = 0; 85 keys[4] = 0; 86 87 return SUCCESS; 88} 89 90/* Setup OpenCL data structures */ 91int 92setupOpenCL() 93{ 94 cl_int status = 0; 95 size_t deviceListSize; 96 97 // 1. Get platform 98 cl_uint numPlatforms; 99 cl_platform_id platform = NULL; 100 status = clGetPlatformIDs(0, NULL, &numPlatforms); 101 if (status != CL_SUCCESS) { 102 printf("Error: Getting Platforms. (clGetPlatformsIDs)\n"); 103 return FAILURE; 104 } 105 106 if (numPlatforms > 0) { 107 cl_platform_id *platforms = new cl_platform_id[numPlatforms]; 108 status = clGetPlatformIDs(numPlatforms, platforms, NULL); 109 if (status != CL_SUCCESS) { 110 printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n"); 111 return FAILURE; 112 } 113 for (int i = 0; i < numPlatforms; ++i) { 114 char pbuff[100]; 115 status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 116 sizeof(pbuff), pbuff, NULL); 117 if (status != CL_SUCCESS) { 118 printf("Error: Getting Platform Info.(clGetPlatformInfo)\n"); 119 return FAILURE; 120 } 121 platform = platforms[i]; 122 if (!strcmp(pbuff, "Advanced Micro Devices, Inc.")) { 123 break; 124 } 125 } 126 delete platforms; 127 } 128 129 if (NULL == platform) { 130 printf("NULL platform found so Exiting Application.\n"); 131 return FAILURE; 132 } 133 134 // 2. create context from platform 135 cl_context_properties cps[3] = 136 {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0}; 137 context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, 138 &status); 139 if (status != CL_SUCCESS) { 140 printf("Error: Creating Context. (clCreateContextFromType)\n"); 141 return FAILURE; 142 } 143 144 // 3. Get device info 145 // 3a. Get # of devices 146 status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, 147 &deviceListSize); 148 if (status != CL_SUCCESS) { 149 printf("Error: Getting Context Info (1st clGetContextInfo)\n"); 150 return FAILURE; 151 } 152 153 // 3b. Get the device list data 154 devices = (cl_device_id *)malloc(deviceListSize); 155 if (devices == 0) { 156 printf("Error: No devices found.\n"); 157 return FAILURE; 158 } 159 status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize, 160 devices, NULL); 161 if (status != CL_SUCCESS) { 162 printf("Error: Getting Context Info (2nd clGetContextInfo)\n"); 163 return FAILURE; 164 } 165 166 // 4. Create command queue for device 167 commandQueue = clCreateCommandQueue(context, devices[0], 0, &status); 168 if (status != CL_SUCCESS) { 169 printf("Creating Command Queue. (clCreateCommandQueue)\n"); 170 return FAILURE; 171 } 172 173 const char *source = "dummy text"; 174 175 size_t sourceSize[] = {strlen(source)}; 176 177 // 5b. Register the kernel with the runtime 178 program = clCreateProgramWithSource(context, 1, &source, sourceSize, 179 &status); 180 if (status != CL_SUCCESS) { 181 printf("Error: Loading kernel (clCreateProgramWithSource)\n"); 182 return FAILURE; 183 } 184 185 status = clBuildProgram(program, 1, devices, NULL, NULL, NULL); 186 if (status != CL_SUCCESS) { 187 printf("Error: Building kernel (clBuildProgram)\n"); 188 return FAILURE; 189 } 190 191 readKernel = clCreateKernel(program, "read_kernel", &status); 192 if (status != CL_SUCCESS) { 193 printf("Error: Creating readKernel from program. (clCreateKernel)\n"); 194 return FAILURE; 195 } 196 197 return SUCCESS; 198} 199 200 201/* Run kernels */ 202int 203runCLKernel(cl_kernel kernel) 204{ 205 cl_int status; 206 cl_event event; 207 size_t globalThreads[1] = {grid_size}; 208 size_t localThreads[1] = {work_group_size}; 209 210 // 1. Set arguments 211 // 1a. code size 212 size_t code_size = strlen(code); 213 status = clSetKernelArg(kernel, 0, sizeof(size_t), &code_size); 214 if (status != CL_SUCCESS) { 215 printf("Error: Setting kernel argument. (code_size)\n"); 216 return FAILURE; 217 } 218 219 // 1b. code 220 status = clSetKernelArg(kernel, 1, sizeof(char *), (void *)&code); 221 if (status != CL_SUCCESS) { 222 printf("Error: Setting kernel argument. (code_in)\n"); 223 return FAILURE; 224 } 225 226 // 1c. keys 227 printf("keys = %p, &keys = %p, keys[0] = %d\n", keys, &keys, keys[0]); 228 status = clSetKernelArg(kernel, 2, sizeof(int *), (void *)&keys); 229 if (status != CL_SUCCESS) { 230 printf("Error: Setting kernel argument. (key_arr)\n"); 231 return FAILURE; 232 } 233 234 // 1d. msg 235 status = clSetKernelArg(kernel, 3, sizeof(char *), (void *)&msg); 236 if (status != CL_SUCCESS) { 237 printf("Error: Setting kernel argument. (memOut)\n"); 238 return FAILURE; 239 } 240 241 // 1e. chars_decoded 242 int *chars_decoded_ptr = &chars_decoded; 243 status = clSetKernelArg(kernel, 4, sizeof(int *), 244 (void *)&chars_decoded_ptr); 245 if (status != CL_SUCCESS) { 246 printf("Error: Setting kernel argument. (memOut)\n"); 247 return FAILURE; 248 } 249 250 // 2. Launch kernel 251 status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, 252 globalThreads, localThreads, 0, NULL, 253 &event); 254 if (status != CL_SUCCESS) { 255 printf("Error: Enqueue failed. (clEnqueueNDRangeKernel)\n"); 256 return FAILURE; 257 } 258 259 // 3. Wait for the kernel 260 status = clWaitForEvents(1, &event); 261 if (status != CL_SUCCESS) { 262 printf("Error: Waiting for kernel run to finish. (clWaitForEvents)\n"); 263 return FAILURE; 264 } 265 266 // 4. Cleanup 267 status = clReleaseEvent(event); 268 if (status != CL_SUCCESS) { 269 printf("Error: Release event object. (clReleaseEvent)\n"); 270 return FAILURE; 271 } 272 273 return SUCCESS; 274} 275 276 277/* Release OpenCL resources (Context, Memory etc.) */ 278int 279cleanupCL() 280{ 281 cl_int status; 282 status = clReleaseKernel(readKernel); 283 if (status != CL_SUCCESS) { 284 printf("Error: In clReleaseKernel \n"); 285 return FAILURE; 286 } 287 status = clReleaseProgram(program); 288 if (status != CL_SUCCESS) { 289 printf("Error: In clReleaseProgram\n"); 290 return FAILURE; 291 } 292 status = clReleaseCommandQueue(commandQueue); 293 if (status != CL_SUCCESS) { 294 printf("Error: In clReleaseCommandQueue\n"); 295 return FAILURE; 296 } 297 status = clReleaseContext(context); 298 if (status != CL_SUCCESS) { 299 printf("Error: In clReleaseContext\n"); 300 return FAILURE; 301 } 302 303 return SUCCESS; 304} 305 306int 307main(int argc, char * argv[]) 308{ 309 // Initialize Host application 310 if (setupDataStructs() != SUCCESS) { 311 return FAILURE; 312 } 313 314 // Initialize OpenCL resources 315 if (setupOpenCL() != SUCCESS) { 316 return FAILURE; 317 } 318 319 // Run the CL program 320 if (runCLKernel(readKernel) != SUCCESS) { 321 return FAILURE; 322 } 323 printf("the gpu says:\n"); 324 printf("%s\n", msg); 325 326 // Releases OpenCL resources 327 if (cleanupCL()!= SUCCESS) { 328 return FAILURE; 329 } 330 331 return SUCCESS; 332} 333