1/* 2 * Copyright (c) 2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Marc Orr, Brad Beckmann 34 */ 35 36#include <CL/cl.h> 37#include <malloc.h> 38 39#include <cstdio> 40#include <cstring> 41#include <fstream> 42#include <string> 43 44#ifdef KVM_SWITCH 45#include "m5op.h" 46 47void *m5_mem = (void*)0xffffc90000000000; 48#endif 49 50#define SUCCESS 0 51#define FAILURE 1 52 53// OpenCL datastructures 54cl_context context; 55cl_device_id *devices; 56cl_command_queue commandQueue; 57cl_program program; 58cl_kernel readKernel; 59 60// Application datastructures 61const int CACHE_LINE_SIZE = 64; 62size_t grid_size = 512; 63size_t work_group_size = 256; 64 65// arguments 66const int code_size = 5; 67const char *code = "hello"; 68int *keys; 69char *msg; 70int chars_decoded = 0; 71 72/* 73 Setup data structures for application/algorithm 74*/ 75int 76setupDataStructs() 77{ 78 msg = (char *)memalign(CACHE_LINE_SIZE, (grid_size + 1) * sizeof(char)); 79 if (msg == NULL) { 80 printf("%s:%d: error: %s\n", __FILE__, __LINE__, 81 "could not allocate host buffers\n"); 82 exit(-1); 83 } 84 msg[grid_size] = '\0'; 85 86 keys = (int *)memalign(CACHE_LINE_SIZE, code_size * sizeof(int)); 87 keys[0] = 23; 88 keys[1] = 0; 89 keys[2] = 0; 90 keys[3] = 0; 91 keys[4] = 0; 92 93 return SUCCESS; 94} 95 96/* Setup OpenCL data structures */ 97int 98setupOpenCL() 99{ 100 cl_int status = 0; 101 size_t deviceListSize; 102 103 // 1. Get platform 104 cl_uint numPlatforms; 105 cl_platform_id platform = NULL; 106 status = clGetPlatformIDs(0, NULL, &numPlatforms); 107 if (status != CL_SUCCESS) { 108 printf("Error: Getting Platforms. (clGetPlatformsIDs)\n"); 109 return FAILURE; 110 } 111 112 if (numPlatforms > 0) { 113 cl_platform_id *platforms = new cl_platform_id[numPlatforms]; 114 status = clGetPlatformIDs(numPlatforms, platforms, NULL); 115 if (status != CL_SUCCESS) { 116 printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n"); 117 return FAILURE; 118 } 119 for (int i = 0; i < numPlatforms; ++i) { 120 char pbuff[100]; 121 status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 122 sizeof(pbuff), pbuff, NULL); 123 if (status != CL_SUCCESS) { 124 printf("Error: Getting Platform Info.(clGetPlatformInfo)\n"); 125 return FAILURE; 126 } 127 platform = platforms[i]; 128 if (!strcmp(pbuff, "Advanced Micro Devices, Inc.")) { 129 break; 130 } 131 } 132 delete platforms; 133 } 134 135 if (NULL == platform) { 136 printf("NULL platform found so Exiting Application.\n"); 137 return FAILURE; 138 } 139 140 // 2. create context from platform 141 cl_context_properties cps[3] = 142 {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0}; 143 context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, 144 &status); 145 if (status != CL_SUCCESS) { 146 printf("Error: Creating Context. (clCreateContextFromType)\n"); 147 return FAILURE; 148 } 149 150 // 3. Get device info 151 // 3a. Get # of devices 152 status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, 153 &deviceListSize); 154 if (status != CL_SUCCESS) { 155 printf("Error: Getting Context Info (1st clGetContextInfo)\n"); 156 return FAILURE; 157 } 158 159 // 3b. Get the device list data 160 devices = (cl_device_id *)malloc(deviceListSize); 161 if (devices == 0) { 162 printf("Error: No devices found.\n"); 163 return FAILURE; 164 } 165 status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize, 166 devices, NULL); 167 if (status != CL_SUCCESS) { 168 printf("Error: Getting Context Info (2nd clGetContextInfo)\n"); 169 return FAILURE; 170 } 171 172 // 4. Create command queue for device 173 commandQueue = clCreateCommandQueue(context, devices[0], 0, &status); 174 if (status != CL_SUCCESS) { 175 printf("Creating Command Queue. (clCreateCommandQueue)\n"); 176 return FAILURE; 177 } 178 179 const char *source = "dummy text"; 180 181 size_t sourceSize[] = {strlen(source)}; 182 183 // 5b. Register the kernel with the runtime 184 program = clCreateProgramWithSource(context, 1, &source, sourceSize, 185 &status); 186 if (status != CL_SUCCESS) { 187 printf("Error: Loading kernel (clCreateProgramWithSource)\n"); 188 return FAILURE; 189 } 190 191 status = clBuildProgram(program, 1, devices, NULL, NULL, NULL); 192 if (status != CL_SUCCESS) { 193 printf("Error: Building kernel (clBuildProgram)\n"); 194 return FAILURE; 195 } 196 197 readKernel = clCreateKernel(program, "read_kernel", &status); 198 if (status != CL_SUCCESS) { 199 printf("Error: Creating readKernel from program. (clCreateKernel)\n"); 200 return FAILURE; 201 } 202 203 return SUCCESS; 204} 205 206 207/* Run kernels */ 208int 209runCLKernel(cl_kernel kernel) 210{ 211 cl_int status; 212 cl_event event; 213 size_t globalThreads[1] = {grid_size}; 214 size_t localThreads[1] = {work_group_size}; 215 216 // 1. Set arguments 217 // 1a. code size 218 size_t code_size = strlen(code); 219 status = clSetKernelArg(kernel, 0, sizeof(size_t), &code_size); 220 if (status != CL_SUCCESS) { 221 printf("Error: Setting kernel argument. (code_size)\n"); 222 return FAILURE; 223 } 224 225 // 1b. code 226 status = clSetKernelArg(kernel, 1, sizeof(char *), (void *)&code); 227 if (status != CL_SUCCESS) { 228 printf("Error: Setting kernel argument. (code_in)\n"); 229 return FAILURE; 230 } 231 232 // 1c. keys 233 printf("keys = %p, &keys = %p, keys[0] = %d\n", keys, &keys, keys[0]); 234 status = clSetKernelArg(kernel, 2, sizeof(int *), (void *)&keys); 235 if (status != CL_SUCCESS) { 236 printf("Error: Setting kernel argument. (key_arr)\n"); 237 return FAILURE; 238 } 239 240 // 1d. msg 241 status = clSetKernelArg(kernel, 3, sizeof(char *), (void *)&msg); 242 if (status != CL_SUCCESS) { 243 printf("Error: Setting kernel argument. (memOut)\n"); 244 return FAILURE; 245 } 246 247 // 1e. chars_decoded 248 int *chars_decoded_ptr = &chars_decoded; 249 status = clSetKernelArg(kernel, 4, sizeof(int *), 250 (void *)&chars_decoded_ptr); 251 if (status != CL_SUCCESS) { 252 printf("Error: Setting kernel argument. (memOut)\n"); 253 return FAILURE; 254 } 255 256#ifdef KVM_SWITCH 257 m5_switchcpu(); 258#endif 259 260 // 2. Launch kernel 261 status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, 262 globalThreads, localThreads, 0, NULL, 263 &event); 264 if (status != CL_SUCCESS) { 265 printf("Error: Enqueue failed. (clEnqueueNDRangeKernel)\n"); 266 return FAILURE; 267 } 268 269 // 3. Wait for the kernel 270 status = clWaitForEvents(1, &event); 271 if (status != CL_SUCCESS) { 272 printf("Error: Waiting for kernel run to finish. (clWaitForEvents)\n"); 273 return FAILURE; 274 } 275 276 // 4. Cleanup 277 status = clReleaseEvent(event); 278 if (status != CL_SUCCESS) { 279 printf("Error: Release event object. (clReleaseEvent)\n"); 280 return FAILURE; 281 } 282 283 return SUCCESS; 284} 285 286 287/* Release OpenCL resources (Context, Memory etc.) */ 288int 289cleanupCL() 290{ 291 cl_int status; 292 status = clReleaseKernel(readKernel); 293 if (status != CL_SUCCESS) { 294 printf("Error: In clReleaseKernel \n"); 295 return FAILURE; 296 } 297 status = clReleaseProgram(program); 298 if (status != CL_SUCCESS) { 299 printf("Error: In clReleaseProgram\n"); 300 return FAILURE; 301 } 302 status = clReleaseCommandQueue(commandQueue); 303 if (status != CL_SUCCESS) { 304 printf("Error: In clReleaseCommandQueue\n"); 305 return FAILURE; 306 } 307 status = clReleaseContext(context); 308 if (status != CL_SUCCESS) { 309 printf("Error: In clReleaseContext\n"); 310 return FAILURE; 311 } 312 313 return SUCCESS; 314} 315 316int 317main(int argc, char * argv[]) 318{ 319 // Initialize Host application 320 if (setupDataStructs() != SUCCESS) { 321 return FAILURE; 322 } 323 324 // Initialize OpenCL resources 325 if (setupOpenCL() != SUCCESS) { 326 return FAILURE; 327 } 328 329 // Run the CL program 330 if (runCLKernel(readKernel) != SUCCESS) { 331 return FAILURE; 332 } 333 printf("the gpu says:\n"); 334 printf("%s\n", msg); 335 336 // Releases OpenCL resources 337 if (cleanupCL()!= SUCCESS) { 338 return FAILURE; 339 } 340 341 return SUCCESS; 342} 343