qstruct.hh (11534:7106f550afad) | qstruct.hh (11638:b511733958d0) |
---|---|
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 81 unchanged lines hidden (view full) --- 90 uint64_t arg4; 91 92 // variables to pass arguments when running in cpu+gpu mode 93 uint8_t args[KER_ARGS_LENGTH]; 94 uint16_t offsets[KER_NUM_ARGS]; 95 uint16_t num_args; 96}; 97 | 1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: --- 81 unchanged lines hidden (view full) --- 90 uint64_t arg4; 91 92 // variables to pass arguments when running in cpu+gpu mode 93 uint8_t args[KER_ARGS_LENGTH]; 94 uint16_t offsets[KER_NUM_ARGS]; 95 uint16_t num_args; 96}; 97 |
98// State used to start (or restart) a WF 99struct WFContext 100{ 101 // 32 bit values 102 // barrier state 103 std::vector<int> bar_cnt; 104 105 // id (which WF in the WG) 106 int cnt; 107 108 // more barrier state 109 int max_bar_cnt; 110 int old_barrier_cnt; 111 int barrier_cnt; 112 113 // More Program Counter Stuff 114 uint32_t pc; 115 116 // Program counter of the immediate post-dominator instruction 117 uint32_t rpc; 118 119 // WG wide state (I don't see how to avoid redundancy here) 120 int cu_id; 121 uint32_t wg_id; 122 uint32_t barrier_id; 123 124 // 64 bit values (these values depend on the wavefront size) 125 // masks 126 uint64_t init_mask; 127 uint64_t exec_mask; 128 129 // private memory; 130 Addr privBase; 131 Addr spillBase; 132 133 LdsChunk *ldsChunk; 134 135 /* 136 * Kernel wide state 137 * This is a hack. This state should be moved through simulated memory 138 * during a yield. Though not much is being used here, so it's probably 139 * probably not a big deal. 140 * 141 * Just to add to this comment... The ndr is derived from simulated 142 * memory when the cl-runtime allocates an HsaQueueEntry and populates it 143 * for a kernel launch. So in theory the runtime should be able to keep 144 * that state around. Then a WF can reference it upon restart to derive 145 * kernel wide state. The runtime can deallocate the state when the 146 * kernel completes. 147 */ 148 NDRange *ndr; 149}; 150 | |
151// State that needs to be passed between the simulation and simulated app, a 152// pointer to this struct can be passed through the depends field in the 153// HsaQueueEntry struct 154struct HostState 155{ 156 // cl_event* has original HsaQueueEntry for init 157 uint64_t event; 158}; --- 43 unchanged lines hidden --- | 98// State that needs to be passed between the simulation and simulated app, a 99// pointer to this struct can be passed through the depends field in the 100// HsaQueueEntry struct 101struct HostState 102{ 103 // cl_event* has original HsaQueueEntry for init 104 uint64_t event; 105}; --- 43 unchanged lines hidden --- |