gpu-hello.cpp revision 11615
12SN/A/* 21762SN/A * Copyright (c) 2015 Advanced Micro Devices, Inc. 32SN/A * All rights reserved. 42SN/A * 52SN/A * For use for simulation and test purposes only 62SN/A * 72SN/A * Redistribution and use in source and binary forms, with or without 82SN/A * modification, are permitted provided that the following conditions are met: 92SN/A * 102SN/A * 1. Redistributions of source code must retain the above copyright notice, 112SN/A * this list of conditions and the following disclaimer. 122SN/A * 132SN/A * 2. Redistributions in binary form must reproduce the above copyright notice, 142SN/A * this list of conditions and the following disclaimer in the documentation 152SN/A * and/or other materials provided with the distribution. 162SN/A * 172SN/A * 3. Neither the name of the copyright holder nor the names of its contributors 182SN/A * may be used to endorse or promote products derived from this software 192SN/A * without specific prior written permission. 202SN/A * 212SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 222SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 232SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 242SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 252SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 262SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 272665Ssaidi@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 282665Ssaidi@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 292665Ssaidi@eecs.umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 302665Ssaidi@eecs.umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 312SN/A * POSSIBILITY OF SUCH DAMAGE. 322SN/A * 332SN/A * Author: Marc Orr, Brad Beckmann 342SN/A */ 352SN/A 362SN/A#include <CL/cl.h> 3775SN/A#include <malloc.h> 382SN/A 392439SN/A#include <cstdio> 402439SN/A#include <cstring> 41603SN/A#include <fstream> 422986Sgblack@eecs.umich.edu#include <string> 43603SN/A 442520SN/A#ifdef KVM_SWITCH 452378SN/A#include "m5op.h" 462378SN/A 47722SN/Avoid *m5_mem = (void*)0xffffc90000000000; 482521SN/A#endif 492378SN/A 50312SN/A#define SUCCESS 0 511634SN/A#define FAILURE 1 522680Sktlim@umich.edu 531634SN/A// OpenCL datastructures 542521SN/Acl_context context; 552378SN/Acl_device_id *devices; 562378SN/Acl_command_queue commandQueue; 57803SN/Acl_program program; 582378SN/Acl_kernel readKernel; 592SN/A 602378SN/A// Application datastructures 612SN/Aconst int CACHE_LINE_SIZE = 64; 622SN/Asize_t grid_size = 512; 632SN/Asize_t work_group_size = 256; 64603SN/A 652901Ssaidi@eecs.umich.edu// arguments 662902Ssaidi@eecs.umich.educonst int code_size = 5; 672902Ssaidi@eecs.umich.educonst char *code = "hello"; 683202Shsul@eecs.umich.eduint *keys; 692901Ssaidi@eecs.umich.educhar *msg; 702901Ssaidi@eecs.umich.eduint chars_decoded = 0; 712901Ssaidi@eecs.umich.edu 722901Ssaidi@eecs.umich.edu/* 732901Ssaidi@eecs.umich.edu Setup data structures for application/algorithm 743202Shsul@eecs.umich.edu*/ 752901Ssaidi@eecs.umich.eduint 762521SN/AsetupDataStructs() 772SN/A{ 782SN/A msg = (char *)memalign(CACHE_LINE_SIZE, (grid_size + 1) * sizeof(char)); 792680Sktlim@umich.edu if (msg == NULL) { 801806SN/A printf("%s:%d: error: %s\n", __FILE__, __LINE__, 811806SN/A "could not allocate host buffers\n"); 821806SN/A exit(-1); 831806SN/A } 842680Sktlim@umich.edu msg[grid_size] = '\0'; 851806SN/A 861806SN/A keys = (int *)memalign(CACHE_LINE_SIZE, code_size * sizeof(int)); 871806SN/A keys[0] = 23; 881806SN/A keys[1] = 0; 89180SN/A keys[2] = 0; 902378SN/A keys[3] = 0; 912378SN/A keys[4] = 0; 922378SN/A 932378SN/A return SUCCESS; 942520SN/A} 952520SN/A 962520SN/A/* Setup OpenCL data structures */ 972521SN/Aint 982520SN/AsetupOpenCL() 991885SN/A{ 1001070SN/A cl_int status = 0; 101954SN/A size_t deviceListSize; 1021070SN/A 1031070SN/A // 1. Get platform 1041070SN/A cl_uint numPlatforms; 1051070SN/A cl_platform_id platform = NULL; 1061070SN/A status = clGetPlatformIDs(0, NULL, &numPlatforms); 1071070SN/A if (status != CL_SUCCESS) { 1081070SN/A printf("Error: Getting Platforms. (clGetPlatformsIDs)\n"); 1091070SN/A return FAILURE; 1101070SN/A } 1111070SN/A 1121070SN/A if (numPlatforms > 0) { 1131070SN/A cl_platform_id *platforms = new cl_platform_id[numPlatforms]; 1142378SN/A status = clGetPlatformIDs(numPlatforms, platforms, NULL); 1152378SN/A if (status != CL_SUCCESS) { 1162378SN/A printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n"); 1172378SN/A return FAILURE; 1182378SN/A } 1192378SN/A for (int i = 0; i < numPlatforms; ++i) { 1202378SN/A char pbuff[100]; 1211885SN/A status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 1221885SN/A sizeof(pbuff), pbuff, NULL); 1233202Shsul@eecs.umich.edu if (status != CL_SUCCESS) { 1242901Ssaidi@eecs.umich.edu printf("Error: Getting Platform Info.(clGetPlatformInfo)\n"); 1252424SN/A return FAILURE; 1261885SN/A } 1271885SN/A platform = platforms[i]; 1281885SN/A if (!strcmp(pbuff, "Advanced Micro Devices, Inc.")) { 1291885SN/A break; 1301885SN/A } 1312158SN/A } 1321885SN/A delete platforms; 1331885SN/A } 1341885SN/A 1351885SN/A if (NULL == platform) { 1361885SN/A printf("NULL platform found so Exiting Application.\n"); 1371885SN/A return FAILURE; 1382989Ssaidi@eecs.umich.edu } 1391885SN/A 1401913SN/A // 2. create context from platform 1411885SN/A cl_context_properties cps[3] = 1421885SN/A {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0}; 1431885SN/A context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, 1441885SN/A &status); 1451885SN/A if (status != CL_SUCCESS) { 1461885SN/A printf("Error: Creating Context. (clCreateContextFromType)\n"); 1471885SN/A return FAILURE; 1481885SN/A } 1491885SN/A 1501885SN/A // 3. Get device info 1511885SN/A // 3a. Get # of devices 1522989Ssaidi@eecs.umich.edu status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, 1531885SN/A &deviceListSize); 1541885SN/A if (status != CL_SUCCESS) { 1551885SN/A printf("Error: Getting Context Info (1st clGetContextInfo)\n"); 1561885SN/A return FAILURE; 1572378SN/A } 15877SN/A 1592378SN/A // 3b. Get the device list data 1601070SN/A devices = (cl_device_id *)malloc(deviceListSize); 1611070SN/A if (devices == 0) { 1622158SN/A printf("Error: No devices found.\n"); 1632378SN/A return FAILURE; 1641070SN/A } 1651070SN/A status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize, 1661070SN/A devices, NULL); 1671070SN/A if (status != CL_SUCCESS) { 1681070SN/A printf("Error: Getting Context Info (2nd clGetContextInfo)\n"); 1692521SN/A return FAILURE; 1703202Shsul@eecs.umich.edu } 1712378SN/A 1722378SN/A // 4. Create command queue for device 1731634SN/A commandQueue = clCreateCommandQueue(context, devices[0], 0, &status); 1742567SN/A if (status != CL_SUCCESS) { 1751070SN/A printf("Creating Command Queue. (clCreateCommandQueue)\n"); 1761070SN/A return FAILURE; 1771070SN/A } 1782158SN/A 1792358SN/A const char *source = "dummy text"; 1802378SN/A 1812158SN/A size_t sourceSize[] = {strlen(source)}; 1821070SN/A 1832158SN/A // 5b. Register the kernel with the runtime 1842158SN/A program = clCreateProgramWithSource(context, 1, &source, sourceSize, 1851070SN/A &status); 1862158SN/A if (status != CL_SUCCESS) { 1871070SN/A printf("Error: Loading kernel (clCreateProgramWithSource)\n"); 1882SN/A return FAILURE; 1892SN/A } 1901129SN/A 1911129SN/A status = clBuildProgram(program, 1, devices, NULL, NULL, NULL); 1922158SN/A if (status != CL_SUCCESS) { 1932158SN/A printf("Error: Building kernel (clBuildProgram)\n"); 1941070SN/A return FAILURE; 1952378SN/A } 1962378SN/A 1971070SN/A readKernel = clCreateKernel(program, "read_kernel", &status); 1981070SN/A if (status != CL_SUCCESS) { 1991070SN/A printf("Error: Creating readKernel from program. (clCreateKernel)\n"); 2001070SN/A return FAILURE; 2011070SN/A } 2021070SN/A 2031070SN/A return SUCCESS; 2041070SN/A} 2051070SN/A 2061070SN/A 2071070SN/A/* Run kernels */ 2081070SN/Aint 2091070SN/ArunCLKernel(cl_kernel kernel) 2101070SN/A{ 2111070SN/A cl_int status; 2121070SN/A cl_event event; 2131070SN/A size_t globalThreads[1] = {grid_size}; 2141070SN/A size_t localThreads[1] = {work_group_size}; 2152378SN/A 2162378SN/A // 1. Set arguments 2172378SN/A // 1a. code size 2182378SN/A size_t code_size = strlen(code); 2192378SN/A status = clSetKernelArg(kernel, 0, sizeof(size_t), &code_size); 2202378SN/A if (status != CL_SUCCESS) { 2212680Sktlim@umich.edu printf("Error: Setting kernel argument. (code_size)\n"); 2222680Sktlim@umich.edu return FAILURE; 2231070SN/A } 2241070SN/A 2251070SN/A // 1b. code 2262SN/A status = clSetKernelArg(kernel, 1, sizeof(char *), (void *)&code); 22777SN/A if (status != CL_SUCCESS) { 2282SN/A printf("Error: Setting kernel argument. (code_in)\n"); 2292SN/A return FAILURE; 2302SN/A } 2312SN/A 2322SN/A // 1c. keys 2332SN/A printf("keys = %p, &keys = %p, keys[0] = %d\n", keys, &keys, keys[0]); 2342SN/A status = clSetKernelArg(kernel, 2, sizeof(int *), (void *)&keys); 2352SN/A if (status != CL_SUCCESS) { 2362SN/A printf("Error: Setting kernel argument. (key_arr)\n"); 2372SN/A return FAILURE; 2382158SN/A } 2392158SN/A 2402SN/A // 1d. msg 2412SN/A status = clSetKernelArg(kernel, 3, sizeof(char *), (void *)&msg); 2422SN/A if (status != CL_SUCCESS) { 243 printf("Error: Setting kernel argument. (memOut)\n"); 244 return FAILURE; 245 } 246 247 // 1e. chars_decoded 248 int *chars_decoded_ptr = &chars_decoded; 249 status = clSetKernelArg(kernel, 4, sizeof(int *), 250 (void *)&chars_decoded_ptr); 251 if (status != CL_SUCCESS) { 252 printf("Error: Setting kernel argument. (memOut)\n"); 253 return FAILURE; 254 } 255 256#ifdef KVM_SWITCH 257 m5_switchcpu(); 258#endif 259 260 // 2. Launch kernel 261 status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, 262 globalThreads, localThreads, 0, NULL, 263 &event); 264 if (status != CL_SUCCESS) { 265 printf("Error: Enqueue failed. (clEnqueueNDRangeKernel)\n"); 266 return FAILURE; 267 } 268 269 // 3. Wait for the kernel 270 status = clWaitForEvents(1, &event); 271 if (status != CL_SUCCESS) { 272 printf("Error: Waiting for kernel run to finish. (clWaitForEvents)\n"); 273 return FAILURE; 274 } 275 276 // 4. Cleanup 277 status = clReleaseEvent(event); 278 if (status != CL_SUCCESS) { 279 printf("Error: Release event object. (clReleaseEvent)\n"); 280 return FAILURE; 281 } 282 283 return SUCCESS; 284} 285 286 287/* Release OpenCL resources (Context, Memory etc.) */ 288int 289cleanupCL() 290{ 291 cl_int status; 292 status = clReleaseKernel(readKernel); 293 if (status != CL_SUCCESS) { 294 printf("Error: In clReleaseKernel \n"); 295 return FAILURE; 296 } 297 status = clReleaseProgram(program); 298 if (status != CL_SUCCESS) { 299 printf("Error: In clReleaseProgram\n"); 300 return FAILURE; 301 } 302 status = clReleaseCommandQueue(commandQueue); 303 if (status != CL_SUCCESS) { 304 printf("Error: In clReleaseCommandQueue\n"); 305 return FAILURE; 306 } 307 status = clReleaseContext(context); 308 if (status != CL_SUCCESS) { 309 printf("Error: In clReleaseContext\n"); 310 return FAILURE; 311 } 312 313 return SUCCESS; 314} 315 316int 317main(int argc, char * argv[]) 318{ 319 // Initialize Host application 320 if (setupDataStructs() != SUCCESS) { 321 return FAILURE; 322 } 323 324 // Initialize OpenCL resources 325 if (setupOpenCL() != SUCCESS) { 326 return FAILURE; 327 } 328 329 // Run the CL program 330 if (runCLKernel(readKernel) != SUCCESS) { 331 return FAILURE; 332 } 333 printf("the gpu says:\n"); 334 printf("%s\n", msg); 335 336 // Releases OpenCL resources 337 if (cleanupCL()!= SUCCESS) { 338 return FAILURE; 339 } 340 341 return SUCCESS; 342} 343