gpu-hello.cpp revision 11615
12SN/A/*
21762SN/A * Copyright (c) 2015 Advanced Micro Devices, Inc.
32SN/A * All rights reserved.
42SN/A *
52SN/A * For use for simulation and test purposes only
62SN/A *
72SN/A * Redistribution and use in source and binary forms, with or without
82SN/A * modification, are permitted provided that the following conditions are met:
92SN/A *
102SN/A * 1. Redistributions of source code must retain the above copyright notice,
112SN/A * this list of conditions and the following disclaimer.
122SN/A *
132SN/A * 2. Redistributions in binary form must reproduce the above copyright notice,
142SN/A * this list of conditions and the following disclaimer in the documentation
152SN/A * and/or other materials provided with the distribution.
162SN/A *
172SN/A * 3. Neither the name of the copyright holder nor the names of its contributors
182SN/A * may be used to endorse or promote products derived from this software
192SN/A * without specific prior written permission.
202SN/A *
212SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
222SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
232SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
242SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
252SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
262SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
272665Ssaidi@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
282665Ssaidi@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
292665Ssaidi@eecs.umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
302665Ssaidi@eecs.umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
312SN/A * POSSIBILITY OF SUCH DAMAGE.
322SN/A *
332SN/A * Author: Marc Orr, Brad Beckmann
342SN/A */
352SN/A
362SN/A#include <CL/cl.h>
3775SN/A#include <malloc.h>
382SN/A
392439SN/A#include <cstdio>
402439SN/A#include <cstring>
41603SN/A#include <fstream>
422986Sgblack@eecs.umich.edu#include <string>
43603SN/A
442520SN/A#ifdef KVM_SWITCH
452378SN/A#include "m5op.h"
462378SN/A
47722SN/Avoid *m5_mem = (void*)0xffffc90000000000;
482521SN/A#endif
492378SN/A
50312SN/A#define SUCCESS 0
511634SN/A#define FAILURE 1
522680Sktlim@umich.edu
531634SN/A// OpenCL datastructures
542521SN/Acl_context       context;
552378SN/Acl_device_id     *devices;
562378SN/Acl_command_queue commandQueue;
57803SN/Acl_program       program;
582378SN/Acl_kernel        readKernel;
592SN/A
602378SN/A// Application datastructures
612SN/Aconst int CACHE_LINE_SIZE = 64;
622SN/Asize_t grid_size = 512;
632SN/Asize_t work_group_size = 256;
64603SN/A
652901Ssaidi@eecs.umich.edu// arguments
662902Ssaidi@eecs.umich.educonst int code_size = 5;
672902Ssaidi@eecs.umich.educonst char *code = "hello";
683202Shsul@eecs.umich.eduint *keys;
692901Ssaidi@eecs.umich.educhar *msg;
702901Ssaidi@eecs.umich.eduint chars_decoded = 0;
712901Ssaidi@eecs.umich.edu
722901Ssaidi@eecs.umich.edu/*
732901Ssaidi@eecs.umich.edu    Setup data structures for application/algorithm
743202Shsul@eecs.umich.edu*/
752901Ssaidi@eecs.umich.eduint
762521SN/AsetupDataStructs()
772SN/A{
782SN/A    msg = (char *)memalign(CACHE_LINE_SIZE, (grid_size + 1) * sizeof(char));
792680Sktlim@umich.edu    if (msg == NULL) {
801806SN/A        printf("%s:%d: error: %s\n", __FILE__, __LINE__,
811806SN/A               "could not allocate host buffers\n");
821806SN/A       exit(-1);
831806SN/A    }
842680Sktlim@umich.edu    msg[grid_size] = '\0';
851806SN/A
861806SN/A    keys = (int *)memalign(CACHE_LINE_SIZE, code_size * sizeof(int));
871806SN/A    keys[0] = 23;
881806SN/A    keys[1] = 0;
89180SN/A    keys[2] = 0;
902378SN/A    keys[3] = 0;
912378SN/A    keys[4] = 0;
922378SN/A
932378SN/A    return SUCCESS;
942520SN/A}
952520SN/A
962520SN/A/* Setup OpenCL data structures */
972521SN/Aint
982520SN/AsetupOpenCL()
991885SN/A{
1001070SN/A    cl_int status = 0;
101954SN/A    size_t deviceListSize;
1021070SN/A
1031070SN/A    // 1. Get platform
1041070SN/A    cl_uint numPlatforms;
1051070SN/A    cl_platform_id platform = NULL;
1061070SN/A    status = clGetPlatformIDs(0, NULL, &numPlatforms);
1071070SN/A    if (status != CL_SUCCESS) {
1081070SN/A        printf("Error: Getting Platforms. (clGetPlatformsIDs)\n");
1091070SN/A        return FAILURE;
1101070SN/A    }
1111070SN/A
1121070SN/A    if (numPlatforms > 0) {
1131070SN/A        cl_platform_id *platforms = new cl_platform_id[numPlatforms];
1142378SN/A        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
1152378SN/A        if (status != CL_SUCCESS) {
1162378SN/A            printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n");
1172378SN/A            return FAILURE;
1182378SN/A        }
1192378SN/A        for (int i = 0; i < numPlatforms; ++i) {
1202378SN/A            char pbuff[100];
1211885SN/A            status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
1221885SN/A                                       sizeof(pbuff), pbuff, NULL);
1233202Shsul@eecs.umich.edu            if (status != CL_SUCCESS) {
1242901Ssaidi@eecs.umich.edu                printf("Error: Getting Platform Info.(clGetPlatformInfo)\n");
1252424SN/A                return FAILURE;
1261885SN/A            }
1271885SN/A            platform = platforms[i];
1281885SN/A            if (!strcmp(pbuff, "Advanced Micro Devices, Inc.")) {
1291885SN/A                break;
1301885SN/A            }
1312158SN/A        }
1321885SN/A        delete platforms;
1331885SN/A    }
1341885SN/A
1351885SN/A    if (NULL == platform) {
1361885SN/A        printf("NULL platform found so Exiting Application.\n");
1371885SN/A        return FAILURE;
1382989Ssaidi@eecs.umich.edu    }
1391885SN/A
1401913SN/A    // 2. create context from platform
1411885SN/A    cl_context_properties cps[3] =
1421885SN/A        {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0};
1431885SN/A    context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL,
1441885SN/A                                      &status);
1451885SN/A    if (status != CL_SUCCESS) {
1461885SN/A        printf("Error: Creating Context. (clCreateContextFromType)\n");
1471885SN/A        return FAILURE;
1481885SN/A    }
1491885SN/A
1501885SN/A    // 3. Get device info
1511885SN/A    // 3a. Get # of devices
1522989Ssaidi@eecs.umich.edu    status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
1531885SN/A                              &deviceListSize);
1541885SN/A    if (status != CL_SUCCESS) {
1551885SN/A        printf("Error: Getting Context Info (1st clGetContextInfo)\n");
1561885SN/A        return FAILURE;
1572378SN/A    }
15877SN/A
1592378SN/A    // 3b. Get the device list data
1601070SN/A    devices = (cl_device_id *)malloc(deviceListSize);
1611070SN/A    if (devices == 0) {
1622158SN/A        printf("Error: No devices found.\n");
1632378SN/A        return FAILURE;
1641070SN/A    }
1651070SN/A    status = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceListSize,
1661070SN/A                              devices, NULL);
1671070SN/A    if (status != CL_SUCCESS) {
1681070SN/A        printf("Error: Getting Context Info (2nd clGetContextInfo)\n");
1692521SN/A        return FAILURE;
1703202Shsul@eecs.umich.edu    }
1712378SN/A
1722378SN/A    // 4. Create command queue for device
1731634SN/A    commandQueue = clCreateCommandQueue(context, devices[0], 0, &status);
1742567SN/A    if (status != CL_SUCCESS) {
1751070SN/A        printf("Creating Command Queue. (clCreateCommandQueue)\n");
1761070SN/A        return FAILURE;
1771070SN/A    }
1782158SN/A
1792358SN/A    const char *source = "dummy text";
1802378SN/A
1812158SN/A    size_t sourceSize[] = {strlen(source)};
1821070SN/A
1832158SN/A    // 5b. Register the kernel with the runtime
1842158SN/A    program = clCreateProgramWithSource(context, 1, &source, sourceSize,
1851070SN/A                                        &status);
1862158SN/A    if (status != CL_SUCCESS) {
1871070SN/A      printf("Error: Loading kernel (clCreateProgramWithSource)\n");
1882SN/A      return FAILURE;
1892SN/A    }
1901129SN/A
1911129SN/A    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
1922158SN/A    if (status != CL_SUCCESS) {
1932158SN/A        printf("Error: Building kernel (clBuildProgram)\n");
1941070SN/A        return FAILURE;
1952378SN/A    }
1962378SN/A
1971070SN/A    readKernel = clCreateKernel(program, "read_kernel", &status);
1981070SN/A    if (status != CL_SUCCESS) {
1991070SN/A        printf("Error: Creating readKernel from program. (clCreateKernel)\n");
2001070SN/A        return FAILURE;
2011070SN/A    }
2021070SN/A
2031070SN/A    return SUCCESS;
2041070SN/A}
2051070SN/A
2061070SN/A
2071070SN/A/* Run kernels */
2081070SN/Aint
2091070SN/ArunCLKernel(cl_kernel kernel)
2101070SN/A{
2111070SN/A    cl_int   status;
2121070SN/A    cl_event event;
2131070SN/A    size_t globalThreads[1] = {grid_size};
2141070SN/A    size_t localThreads[1] = {work_group_size};
2152378SN/A
2162378SN/A    // 1. Set arguments
2172378SN/A    // 1a. code size
2182378SN/A    size_t code_size = strlen(code);
2192378SN/A    status = clSetKernelArg(kernel, 0, sizeof(size_t), &code_size);
2202378SN/A    if (status != CL_SUCCESS) {
2212680Sktlim@umich.edu        printf("Error: Setting kernel argument. (code_size)\n");
2222680Sktlim@umich.edu        return FAILURE;
2231070SN/A    }
2241070SN/A
2251070SN/A    // 1b. code
2262SN/A    status = clSetKernelArg(kernel, 1, sizeof(char *), (void *)&code);
22777SN/A    if (status != CL_SUCCESS) {
2282SN/A        printf("Error: Setting kernel argument. (code_in)\n");
2292SN/A        return FAILURE;
2302SN/A    }
2312SN/A
2322SN/A    // 1c. keys
2332SN/A    printf("keys = %p, &keys = %p, keys[0] = %d\n", keys, &keys, keys[0]);
2342SN/A    status = clSetKernelArg(kernel, 2, sizeof(int *), (void *)&keys);
2352SN/A    if (status != CL_SUCCESS) {
2362SN/A        printf("Error: Setting kernel argument. (key_arr)\n");
2372SN/A        return FAILURE;
2382158SN/A    }
2392158SN/A
2402SN/A    // 1d. msg
2412SN/A    status = clSetKernelArg(kernel, 3, sizeof(char *), (void *)&msg);
2422SN/A    if (status != CL_SUCCESS) {
243        printf("Error: Setting kernel argument. (memOut)\n");
244        return FAILURE;
245    }
246
247    // 1e. chars_decoded
248    int *chars_decoded_ptr = &chars_decoded;
249    status = clSetKernelArg(kernel, 4, sizeof(int *),
250                            (void *)&chars_decoded_ptr);
251    if (status != CL_SUCCESS) {
252        printf("Error: Setting kernel argument. (memOut)\n");
253        return FAILURE;
254    }
255
256#ifdef KVM_SWITCH
257    m5_switchcpu();
258#endif
259
260    // 2. Launch kernel
261    status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,
262                                    globalThreads, localThreads, 0, NULL,
263                                    &event);
264    if (status != CL_SUCCESS) {
265        printf("Error: Enqueue failed. (clEnqueueNDRangeKernel)\n");
266        return FAILURE;
267    }
268
269    // 3. Wait for the kernel
270    status = clWaitForEvents(1, &event);
271    if (status != CL_SUCCESS) {
272        printf("Error: Waiting for kernel run to finish. (clWaitForEvents)\n");
273        return FAILURE;
274    }
275
276    // 4. Cleanup
277    status = clReleaseEvent(event);
278    if (status != CL_SUCCESS) {
279        printf("Error: Release event object. (clReleaseEvent)\n");
280        return FAILURE;
281    }
282
283    return SUCCESS;
284}
285
286
287/* Release OpenCL resources (Context, Memory etc.) */
288int
289cleanupCL()
290{
291    cl_int status;
292    status = clReleaseKernel(readKernel);
293    if (status != CL_SUCCESS) {
294        printf("Error: In clReleaseKernel \n");
295        return FAILURE;
296    }
297    status = clReleaseProgram(program);
298    if (status != CL_SUCCESS) {
299        printf("Error: In clReleaseProgram\n");
300        return FAILURE;
301    }
302    status = clReleaseCommandQueue(commandQueue);
303    if (status != CL_SUCCESS) {
304        printf("Error: In clReleaseCommandQueue\n");
305        return FAILURE;
306    }
307    status = clReleaseContext(context);
308    if (status != CL_SUCCESS) {
309        printf("Error: In clReleaseContext\n");
310        return FAILURE;
311    }
312
313    return SUCCESS;
314}
315
316int
317main(int argc, char * argv[])
318{
319    // Initialize Host application
320    if (setupDataStructs() != SUCCESS) {
321        return FAILURE;
322    }
323
324    // Initialize OpenCL resources
325    if (setupOpenCL() != SUCCESS) {
326        return FAILURE;
327    }
328
329    // Run the CL program
330    if (runCLKernel(readKernel) != SUCCESS) {
331        return FAILURE;
332    }
333    printf("the gpu says:\n");
334    printf("%s\n", msg);
335
336    // Releases OpenCL resources
337    if (cleanupCL()!= SUCCESS) {
338        return FAILURE;
339    }
340
341    return SUCCESS;
342}
343