qstruct.hh revision 11534:7106f550afad
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Brad Beckmann, Marc Orr
34 */
35
36#ifndef __Q_STRUCT_HH__
37#define __Q_STRUCT_HH__
38
39#include <bitset>
40#include <cstdint>
41
42// Maximum number of arguments
43static const int KER_NUM_ARGS = 32;
44// Kernel argument buffer size
45static const int KER_ARGS_LENGTH = 512;
46
47class LdsChunk;
48struct NDRange;
49
50// Be very careful of alignment in this structure. The structure
51// must compile to the same layout in both 32-bit and 64-bit mode.
52struct HsaQueueEntry
53{
54    // Base pointer for array of instruction pointers
55    uint64_t code_ptr;
56    // Grid Size (3 dimensions)
57    uint32_t gdSize[3];
58    // Workgroup Size (3 dimensions)
59    uint32_t wgSize[3];
60    uint16_t sRegCount;
61    uint16_t dRegCount;
62    uint16_t cRegCount;
63    uint64_t privMemStart;
64    uint32_t privMemPerItem;
65    uint32_t privMemTotal;
66    uint64_t spillMemStart;
67    uint32_t spillMemPerItem;
68    uint32_t spillMemTotal;
69    uint64_t roMemStart;
70    uint32_t roMemTotal;
71    // Size (in bytes) of LDS
72    uint32_t ldsSize;
73    // Virtual Memory Id (unused right now)
74    uint32_t vmId;
75
76    // Pointer to dependency chain (unused now)
77    uint64_t depends;
78
79    // pointer to bool
80    uint64_t addrToNotify;
81    // pointer to uint32_t
82    uint64_t numDispLeft;
83
84    // variables to pass arguments when running in standalone mode,
85    // will be removed when run.py and sh.cpp have been updated to
86    // use args and offset arrays
87    uint64_t arg1;
88    uint64_t arg2;
89    uint64_t arg3;
90    uint64_t arg4;
91
92    // variables to pass arguments when running in cpu+gpu mode
93    uint8_t args[KER_ARGS_LENGTH];
94    uint16_t offsets[KER_NUM_ARGS];
95    uint16_t num_args;
96};
97
98// State used to start (or restart) a WF
99struct WFContext
100{
101    // 32 bit values
102    // barrier state
103    std::vector<int> bar_cnt;
104
105    // id (which WF in the WG)
106    int cnt;
107
108    // more barrier state
109    int max_bar_cnt;
110    int old_barrier_cnt;
111    int barrier_cnt;
112
113    // More Program Counter Stuff
114    uint32_t pc;
115
116    // Program counter of the immediate post-dominator instruction
117    uint32_t rpc;
118
119    // WG wide state (I don't see how to avoid redundancy here)
120    int cu_id;
121    uint32_t wg_id;
122    uint32_t barrier_id;
123
124    // 64 bit values (these values depend on the wavefront size)
125    // masks
126    uint64_t init_mask;
127    uint64_t exec_mask;
128
129    // private memory;
130    Addr privBase;
131    Addr spillBase;
132
133    LdsChunk *ldsChunk;
134
135    /*
136     * Kernel wide state
137     * This is a hack. This state should be moved through simulated memory
138     * during a yield. Though not much is being used here, so it's probably
139     * probably not a big deal.
140     *
141     * Just to add to this comment... The ndr is derived from simulated
142     * memory when the cl-runtime allocates an HsaQueueEntry and populates it
143     * for a kernel launch. So in theory the runtime should be able to keep
144     * that state around. Then a WF can reference it upon restart to derive
145     * kernel wide state. The runtime can deallocate the state when the
146     * kernel completes.
147     */
148    NDRange *ndr;
149};
150
151// State that needs to be passed between the simulation and simulated app, a
152// pointer to this struct can be passed through the depends field in the
153// HsaQueueEntry struct
154struct HostState
155{
156    // cl_event* has original HsaQueueEntry for init
157    uint64_t event;
158};
159
160// Total number of HSA queues
161static const int HSAQ_NQUEUES = 8;
162
163// These values will eventually live in memory mapped registers
164// and be settable by the kernel mode driver.
165
166// Number of entries in each HSA queue
167static const int HSAQ_SIZE = 64;
168// Address of first HSA queue index
169static const int HSAQ_INDX_BASE = 0x10000ll;
170// Address of first HSA queue
171static const int HSAQ_BASE = 0x11000ll;
172// Suggested start of HSA code
173static const int HSA_CODE_BASE = 0x18000ll;
174
175// These are shortcuts for deriving the address of a specific
176// HSA queue or queue index
177#define HSAQ(n) (HSAQ_BASE + HSAQ_SIZE * sizeof(struct fsaQueue) * n)
178#define HSAQE(n,i) (HSAQ_BASE + (HSAQ_SIZE * n + i) * sizeof(struct fsaQueue))
179#define HSAQ_RI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 0))
180#define HSAQ_WI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 1))
181#define HSAQ_CI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 2))
182
183/*
184 * Example code for writing to a queue
185 *
186 * void
187 * ToQueue(int n,struct fsaQueue *val)
188 * {
189 *     int wi = *(int*)HSAQ_WI(n);
190 *     int ri = *(int*)HSAQ_RI(n);
191 *     int ci = *(int*)HSAQ_CI(n);
192 *
193 *     if (ci - ri < HSAQ_SIZE) {
194 *         (*(int*)HSAQ_CI(n))++;
195 *         *(HsaQueueEntry*)(HSAQE(n, (wi % HSAQ_SIZE))) = *val;
196 *         (*(int*)HSAQ_WI(n))++;
197 *     }
198 * }
199 */
200
201#endif // __Q_STRUCT_HH__
202