1/* 2 * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its 18 * contributors may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: Anthony Gutierrez 34 */ 35 36#include "gpu-compute/gpu_dyn_inst.hh" 37 38#include "debug/GPUMem.hh" 39#include "gpu-compute/gpu_static_inst.hh" 40#include "gpu-compute/shader.hh" 41#include "gpu-compute/wavefront.hh" 42 43GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, 44 GPUStaticInst *static_inst, uint64_t instSeqNum) 45 : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0), 46 n_reg(0), useContinuation(false), 47 statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum) 48{ 49 tlbHitLevel.assign(computeUnit()->wfSize(), -1); 50 d_data = new uint8_t[computeUnit()->wfSize() * 16]; 51 a_data = new uint8_t[computeUnit()->wfSize() * 8]; 52 x_data = new uint8_t[computeUnit()->wfSize() * 8]; 53 for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) { 54 a_data[i] = 0; 55 x_data[i] = 0; 56 } 57 for (int i = 0; i < (computeUnit()->wfSize() * 16); ++i) { 58 d_data[i] = 0; 59 } 60} 61 62GPUDynInst::~GPUDynInst() 63{ 64 delete[] d_data; 65 delete[] a_data; 66 delete[] x_data; 67} 68 69void 70GPUDynInst::execute(GPUDynInstPtr gpuDynInst) 71{ 72 _staticInst->execute(gpuDynInst); 73} 74 75int 76GPUDynInst::numSrcRegOperands() 77{ 78 return _staticInst->numSrcRegOperands(); 79} 80 81int 82GPUDynInst::numDstRegOperands() 83{ 84 return _staticInst->numDstRegOperands(); 85} 86 87int 88GPUDynInst::getNumOperands() 89{ 90 return _staticInst->getNumOperands(); 91} 92 93bool 94GPUDynInst::isVectorRegister(int operandIdx) 95{ 96 return _staticInst->isVectorRegister(operandIdx); 97} 98 99bool 100GPUDynInst::isScalarRegister(int operandIdx) 101{ 102 return _staticInst->isScalarRegister(operandIdx); 103} 104 105bool 106GPUDynInst::isCondRegister(int operandIdx) 107{ 108 return _staticInst->isCondRegister(operandIdx); 109} 110 111int 112GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst) 113{ 114 return _staticInst->getRegisterIndex(operandIdx, gpuDynInst); 115} 116 117int 118GPUDynInst::getOperandSize(int operandIdx) 119{ 120 return _staticInst->getOperandSize(operandIdx); 121} 122 123bool 124GPUDynInst::isDstOperand(int operandIdx) 125{ 126 return _staticInst->isDstOperand(operandIdx); 127} 128 129bool 130GPUDynInst::isSrcOperand(int operandIdx) 131{ 132 return _staticInst->isSrcOperand(operandIdx); 133} 134 135const std::string& 136GPUDynInst::disassemble() const 137{ 138 return _staticInst->disassemble(); 139} 140 141uint64_t 142GPUDynInst::seqNum() const 143{ 144 return _seqNum; 145} 146 147Enums::StorageClassType 148GPUDynInst::executedAs() 149{ 150 return _staticInst->executed_as; 151} 152 153// Process a memory instruction and (if necessary) submit timing request 154void 155GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst) 156{ 157 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n", 158 cu->cu_id, simdId, wfSlotId, exec_mask); 159 160 _staticInst->initiateAcc(gpuDynInst); 161 time = 0; 162} 163 164void 165GPUDynInst::completeAcc(GPUDynInstPtr gpuDynInst) 166{ 167 _staticInst->completeAcc(gpuDynInst); 168} 169 170/** 171 * accessor methods for the attributes of 172 * the underlying GPU static instruction 173 */ 174bool 175GPUDynInst::isALU() const 176{ 177 return _staticInst->isALU(); 178} 179 180bool 181GPUDynInst::isBranch() const 182{ 183 return _staticInst->isBranch(); 184} 185 186bool 187GPUDynInst::isNop() const 188{ 189 return _staticInst->isNop(); 190} 191 192bool 193GPUDynInst::isReturn() const 194{ 195 return _staticInst->isReturn(); 196} 197 198bool 199GPUDynInst::isUnconditionalJump() const 200{ 201 return _staticInst->isUnconditionalJump(); 202} 203 204bool 205GPUDynInst::isSpecialOp() const 206{ 207 return _staticInst->isSpecialOp(); 208} 209 210bool 211GPUDynInst::isWaitcnt() const 212{ 213 return _staticInst->isWaitcnt(); 214} 215 216bool 217GPUDynInst::isBarrier() const 218{ 219 return _staticInst->isBarrier(); 220} 221 222bool 223GPUDynInst::isMemFence() const 224{ 225 return _staticInst->isMemFence(); 226} 227 228bool 229GPUDynInst::isMemRef() const 230{ 231 return _staticInst->isMemRef(); 232} 233 234bool 235GPUDynInst::isFlat() const 236{ 237 return _staticInst->isFlat(); 238} 239 240bool 241GPUDynInst::isLoad() const 242{ 243 return _staticInst->isLoad(); 244} 245 246bool 247GPUDynInst::isStore() const 248{ 249 return _staticInst->isStore(); 250} 251 252bool 253GPUDynInst::isAtomic() const 254{ 255 return _staticInst->isAtomic(); 256} 257 258bool 259GPUDynInst::isAtomicNoRet() const 260{ 261 return _staticInst->isAtomicNoRet(); 262} 263 264bool 265GPUDynInst::isAtomicRet() const 266{ 267 return _staticInst->isAtomicRet(); 268} 269 270bool 271GPUDynInst::isScalar() const 272{ 273 return _staticInst->isScalar(); 274} 275 276bool 277GPUDynInst::readsSCC() const 278{ 279 return _staticInst->readsSCC(); 280} 281 282bool 283GPUDynInst::writesSCC() const 284{ 285 return _staticInst->writesSCC(); 286} 287 288bool 289GPUDynInst::readsVCC() const 290{ 291 return _staticInst->readsVCC(); 292} 293 294bool 295GPUDynInst::writesVCC() const 296{ 297 return _staticInst->writesVCC(); 298} 299 300bool 301GPUDynInst::isAtomicAnd() const 302{ 303 return _staticInst->isAtomicAnd(); 304} 305 306bool 307GPUDynInst::isAtomicOr() const 308{ 309 return _staticInst->isAtomicOr(); 310} 311 312bool 313GPUDynInst::isAtomicXor() const 314{ 315 return _staticInst->isAtomicXor(); 316} 317 318bool 319GPUDynInst::isAtomicCAS() const 320{ 321 return _staticInst->isAtomicCAS(); 322} 323 324bool GPUDynInst::isAtomicExch() const 325{ 326 return _staticInst->isAtomicExch(); 327} 328 329bool 330GPUDynInst::isAtomicAdd() const 331{ 332 return _staticInst->isAtomicAdd(); 333} 334 335bool 336GPUDynInst::isAtomicSub() const 337{ 338 return _staticInst->isAtomicSub(); 339} 340 341bool 342GPUDynInst::isAtomicInc() const 343{ 344 return _staticInst->isAtomicInc(); 345} 346 347bool 348GPUDynInst::isAtomicDec() const 349{ 350 return _staticInst->isAtomicDec(); 351} 352 353bool 354GPUDynInst::isAtomicMax() const 355{ 356 return _staticInst->isAtomicMax(); 357} 358 359bool 360GPUDynInst::isAtomicMin() const 361{ 362 return _staticInst->isAtomicMin(); 363} 364 365bool 366GPUDynInst::isArgLoad() const 367{ 368 return _staticInst->isArgLoad(); 369} 370 371bool 372GPUDynInst::isGlobalMem() const 373{ 374 return _staticInst->isGlobalMem(); 375} 376 377bool 378GPUDynInst::isLocalMem() const 379{ 380 return _staticInst->isLocalMem(); 381} 382 383bool 384GPUDynInst::isArgSeg() const 385{ 386 return _staticInst->isArgSeg(); 387} 388 389bool 390GPUDynInst::isGlobalSeg() const 391{ 392 return _staticInst->isGlobalSeg(); 393} 394 395bool 396GPUDynInst::isGroupSeg() const 397{ 398 return _staticInst->isGroupSeg(); 399} 400 401bool 402GPUDynInst::isKernArgSeg() const 403{ 404 return _staticInst->isKernArgSeg(); 405} 406 407bool 408GPUDynInst::isPrivateSeg() const 409{ 410 return _staticInst->isPrivateSeg(); 411} 412 413bool 414GPUDynInst::isReadOnlySeg() const 415{ 416 return _staticInst->isReadOnlySeg(); 417} 418 419bool 420GPUDynInst::isSpillSeg() const 421{ 422 return _staticInst->isSpillSeg(); 423} 424 425bool 426GPUDynInst::isWorkitemScope() const 427{ 428 return _staticInst->isWorkitemScope(); 429} 430 431bool 432GPUDynInst::isWavefrontScope() const 433{ 434 return _staticInst->isWavefrontScope(); 435} 436 437bool 438GPUDynInst::isWorkgroupScope() const 439{ 440 return _staticInst->isWorkgroupScope(); 441} 442 443bool 444GPUDynInst::isDeviceScope() const 445{ 446 return _staticInst->isDeviceScope(); 447} 448 449bool 450GPUDynInst::isSystemScope() const 451{ 452 return _staticInst->isSystemScope(); 453} 454 455bool 456GPUDynInst::isNoScope() const 457{ 458 return _staticInst->isNoScope(); 459} 460 461bool 462GPUDynInst::isRelaxedOrder() const 463{ 464 return _staticInst->isRelaxedOrder(); 465} 466 467bool 468GPUDynInst::isAcquire() const 469{ 470 return _staticInst->isAcquire(); 471} 472 473bool 474GPUDynInst::isRelease() const 475{ 476 return _staticInst->isRelease(); 477} 478 479bool 480GPUDynInst::isAcquireRelease() const 481{ 482 return _staticInst->isAcquireRelease(); 483} 484 485bool 486GPUDynInst::isNoOrder() const 487{ 488 return _staticInst->isNoOrder(); 489} 490 491bool 492GPUDynInst::isGloballyCoherent() const 493{ 494 return _staticInst->isGloballyCoherent(); 495} 496 497bool 498GPUDynInst::isSystemCoherent() const 499{ 500 return _staticInst->isSystemCoherent(); 501} 502 503void 504GPUDynInst::updateStats() 505{ 506 if (_staticInst->isLocalMem()) { 507 // access to LDS (shared) memory 508 cu->dynamicLMemInstrCnt++; 509 } else { 510 // access to global memory 511 512 // update PageDivergence histogram 513 int number_pages_touched = cu->pagesTouched.size(); 514 assert(number_pages_touched); 515 cu->pageDivergenceDist.sample(number_pages_touched); 516 517 std::pair<ComputeUnit::pageDataStruct::iterator, bool> ret; 518 519 for (auto it : cu->pagesTouched) { 520 // see if this page has been touched before. if not, this also 521 // inserts the page into the table. 522 ret = cu->pageAccesses 523 .insert(ComputeUnit::pageDataStruct::value_type(it.first, 524 std::make_pair(1, it.second))); 525 526 // if yes, then update the stats 527 if (!ret.second) { 528 ret.first->second.first++; 529 ret.first->second.second += it.second; 530 } 531 } 532 533 cu->pagesTouched.clear(); 534 535 // total number of memory instructions (dynamic) 536 // Atomics are counted as a single memory instruction. 537 // this is # memory instructions per wavefronts, not per workitem 538 cu->dynamicGMemInstrCnt++; 539 } 540} 541