1/* 2 * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its 18 * contributors may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: John Kalamatianos, 34 * Mark Wyse 35 */ 36 37#include "gpu-compute/vector_register_file.hh" 38 39#include <string> 40 41#include "base/logging.hh" 42#include "gpu-compute/compute_unit.hh" 43#include "gpu-compute/gpu_dyn_inst.hh" 44#include "gpu-compute/shader.hh" 45#include "gpu-compute/simple_pool_manager.hh" 46#include "gpu-compute/wavefront.hh" 47#include "params/VectorRegisterFile.hh" 48 49VectorRegisterFile::VectorRegisterFile(const VectorRegisterFileParams *p) 50 : SimObject(p), 51 manager(new SimplePoolManager(p->min_alloc, p->num_regs_per_simd)), 52 simdId(p->simd_id), numRegsPerSimd(p->num_regs_per_simd), 53 vgprState(new VecRegisterState()) 54{ 55 fatal_if(numRegsPerSimd % 2, "VRF size is illegal\n"); 56 fatal_if(simdId < 0, "Illegal SIMD id for VRF"); 57 58 fatal_if(numRegsPerSimd % p->min_alloc, "Min VGPR region allocation is not " 59 "multiple of VRF size\n"); 60 61 busy.clear(); 62 busy.resize(numRegsPerSimd, 0); 63 nxtBusy.clear(); 64 nxtBusy.resize(numRegsPerSimd, 0); 65 66 vgprState->init(numRegsPerSimd, p->wfSize); 67} 68 69void 70VectorRegisterFile::setParent(ComputeUnit *_computeUnit) 71{ 72 computeUnit = _computeUnit; 73 vgprState->setParent(computeUnit); 74} 75 76uint8_t 77VectorRegisterFile::regNxtBusy(int idx, uint32_t operandSize) const 78{ 79 uint8_t status = nxtBusy.at(idx); 80 81 if (operandSize > 4) { 82 status = status | (nxtBusy.at((idx + 1) % numRegs())); 83 } 84 85 return status; 86} 87 88uint8_t 89VectorRegisterFile::regBusy(int idx, uint32_t operandSize) const 90{ 91 uint8_t status = busy.at(idx); 92 93 if (operandSize > 4) { 94 status = status | (busy.at((idx + 1) % numRegs())); 95 } 96 97 return status; 98} 99 100void 101VectorRegisterFile::preMarkReg(int regIdx, uint32_t operandSize, uint8_t value) 102{ 103 nxtBusy.at(regIdx) = value; 104 105 if (operandSize > 4) { 106 nxtBusy.at((regIdx + 1) % numRegs()) = value; 107 } 108} 109 110void 111VectorRegisterFile::markReg(int regIdx, uint32_t operandSize, uint8_t value) 112{ 113 busy.at(regIdx) = value; 114 115 if (operandSize > 4) { 116 busy.at((regIdx + 1) % numRegs()) = value; 117 } 118} 119 120bool 121VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const 122{ 123 for (int i = 0; i < ii->getNumOperands(); ++i) { 124 if (ii->isVectorRegister(i)) { 125 uint32_t vgprIdx = ii->getRegisterIndex(i, ii); 126 uint32_t pVgpr = w->remap(vgprIdx, ii->getOperandSize(i), 1); 127 128 if (regBusy(pVgpr, ii->getOperandSize(i)) == 1) { 129 if (ii->isDstOperand(i)) { 130 w->numTimesBlockedDueWAXDependencies++; 131 } else if (ii->isSrcOperand(i)) { 132 w->numTimesBlockedDueRAWDependencies++; 133 } 134 135 return false; 136 } 137 138 if (regNxtBusy(pVgpr, ii->getOperandSize(i)) == 1) { 139 if (ii->isDstOperand(i)) { 140 w->numTimesBlockedDueWAXDependencies++; 141 } else if (ii->isSrcOperand(i)) { 142 w->numTimesBlockedDueRAWDependencies++; 143 } 144 145 return false; 146 } 147 } 148 } 149 150 return true; 151} 152 153void 154VectorRegisterFile::exec(GPUDynInstPtr ii, Wavefront *w) 155{ 156 bool loadInstr = ii->isLoad(); 157 bool atomicInstr = ii->isAtomic() || ii->isMemFence(); 158 159 bool loadNoArgInstr = loadInstr && !ii->isArgLoad(); 160 161 // iterate over all register destination operands 162 for (int i = 0; i < ii->getNumOperands(); ++i) { 163 if (ii->isVectorRegister(i) && ii->isDstOperand(i)) { 164 uint32_t physReg = w->remap(ii->getRegisterIndex(i, ii), 165 ii->getOperandSize(i), 1); 166 167 // mark the destination vector register as busy 168 markReg(physReg, ii->getOperandSize(i), 1); 169 // clear the in-flight status of the destination vector register 170 preMarkReg(physReg, ii->getOperandSize(i), 0); 171 172 // FIXME: if we ever model correct timing behavior 173 // for load argument instructions then we should not 174 // set the destination register as busy now but when 175 // the data returns. Loads and Atomics should free 176 // their destination registers when the data returns, 177 // not now 178 if (!atomicInstr && !loadNoArgInstr) { 179 uint32_t pipeLen = ii->getOperandSize(i) <= 4 ? 180 computeUnit->spBypassLength() : 181 computeUnit->dpBypassLength(); 182 183 // schedule an event for marking the register as ready 184 computeUnit->registerEvent(w->simdId, physReg, 185 ii->getOperandSize(i), 186 computeUnit->shader->tick_cnt + 187 computeUnit->shader->ticks(pipeLen), 188 0); 189 } 190 } 191 } 192} 193 194int 195VectorRegisterFile::exec(uint64_t dynamic_id, Wavefront *w, 196 std::vector<uint32_t> ®Vec, uint32_t operandSize, 197 uint64_t timestamp) 198{ 199 int delay = 0; 200 201 panic_if(regVec.size() <= 0, "Illegal VGPR vector size=%d\n", 202 regVec.size()); 203 204 for (int i = 0; i < regVec.size(); ++i) { 205 // mark the destination VGPR as free when the timestamp expires 206 computeUnit->registerEvent(w->simdId, regVec[i], operandSize, 207 computeUnit->shader->tick_cnt + timestamp + 208 computeUnit->shader->ticks(delay), 0); 209 } 210 211 return delay; 212} 213 214void 215VectorRegisterFile::updateResources(Wavefront *w, GPUDynInstPtr ii) 216{ 217 // iterate over all register destination operands 218 for (int i = 0; i < ii->getNumOperands(); ++i) { 219 if (ii->isVectorRegister(i) && ii->isDstOperand(i)) { 220 uint32_t physReg = w->remap(ii->getRegisterIndex(i, ii), 221 ii->getOperandSize(i), 1); 222 // set the in-flight status of the destination vector register 223 preMarkReg(physReg, ii->getOperandSize(i), 1); 224 } 225 } 226} 227 228bool 229VectorRegisterFile::vrfOperandAccessReady(uint64_t dynamic_id, Wavefront *w, 230 GPUDynInstPtr ii, 231 VrfAccessType accessType) 232{ 233 bool ready = true; 234 235 return ready; 236} 237 238bool 239VectorRegisterFile::vrfOperandAccessReady(Wavefront *w, GPUDynInstPtr ii, 240 VrfAccessType accessType) 241{ 242 bool ready = true; 243 244 return ready; 245} 246 247VectorRegisterFile* 248VectorRegisterFileParams::create() 249{ 250 return new VectorRegisterFile(this); 251} 252