gpu_dyn_inst.cc revision 11692:e772fdcd3809
1/* 2 * Copyright (c) 2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Anthony Gutierrez 34 */ 35 36#include "gpu-compute/gpu_dyn_inst.hh" 37 38#include "debug/GPUMem.hh" 39#include "gpu-compute/gpu_static_inst.hh" 40#include "gpu-compute/shader.hh" 41#include "gpu-compute/wavefront.hh" 42 43GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, 44 GPUStaticInst *static_inst, uint64_t instSeqNum) 45 : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0), 46 n_reg(0), useContinuation(false), 47 statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum) 48{ 49 tlbHitLevel.assign(computeUnit()->wfSize(), -1); 50 d_data = new uint8_t[computeUnit()->wfSize() * 16]; 51 a_data = new uint8_t[computeUnit()->wfSize() * 8]; 52 x_data = new uint8_t[computeUnit()->wfSize() * 8]; 53 for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) { 54 a_data[i] = 0; 55 x_data[i] = 0; 56 } 57 for (int i = 0; i < (computeUnit()->wfSize() * 16); ++i) { 58 d_data[i] = 0; 59 } 60} 61 62GPUDynInst::~GPUDynInst() 63{ 64 delete[] d_data; 65 delete[] a_data; 66 delete[] x_data; 67} 68 69void 70GPUDynInst::execute(GPUDynInstPtr gpuDynInst) 71{ 72 _staticInst->execute(gpuDynInst); 73} 74 75int 76GPUDynInst::numSrcRegOperands() 77{ 78 return _staticInst->numSrcRegOperands(); 79} 80 81int 82GPUDynInst::numDstRegOperands() 83{ 84 return _staticInst->numDstRegOperands(); 85} 86 87int 88GPUDynInst::getNumOperands() 89{ 90 return _staticInst->getNumOperands(); 91} 92 93bool 94GPUDynInst::isVectorRegister(int operandIdx) 95{ 96 return _staticInst->isVectorRegister(operandIdx); 97} 98 99bool 100GPUDynInst::isScalarRegister(int operandIdx) 101{ 102 return _staticInst->isScalarRegister(operandIdx); 103} 104 105int 106GPUDynInst::getRegisterIndex(int operandIdx) 107{ 108 return _staticInst->getRegisterIndex(operandIdx); 109} 110 111int 112GPUDynInst::getOperandSize(int operandIdx) 113{ 114 return _staticInst->getOperandSize(operandIdx); 115} 116 117bool 118GPUDynInst::isDstOperand(int operandIdx) 119{ 120 return _staticInst->isDstOperand(operandIdx); 121} 122 123bool 124GPUDynInst::isSrcOperand(int operandIdx) 125{ 126 return _staticInst->isSrcOperand(operandIdx); 127} 128 129const std::string& 130GPUDynInst::disassemble() const 131{ 132 return _staticInst->disassemble(); 133} 134 135uint64_t 136GPUDynInst::seqNum() const 137{ 138 return _seqNum; 139} 140 141Enums::StorageClassType 142GPUDynInst::executedAs() 143{ 144 return _staticInst->executed_as; 145} 146 147// Process a memory instruction and (if necessary) submit timing request 148void 149GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst) 150{ 151 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n", 152 cu->cu_id, simdId, wfSlotId, exec_mask); 153 154 _staticInst->initiateAcc(gpuDynInst); 155 time = 0; 156} 157 158/** 159 * accessor methods for the attributes of 160 * the underlying GPU static instruction 161 */ 162bool 163GPUDynInst::isALU() const 164{ 165 return _staticInst->isALU(); 166} 167 168bool 169GPUDynInst::isBranch() const 170{ 171 return _staticInst->isBranch(); 172} 173 174bool 175GPUDynInst::isNop() const 176{ 177 return _staticInst->isNop(); 178} 179 180bool 181GPUDynInst::isReturn() const 182{ 183 return _staticInst->isReturn(); 184} 185 186bool 187GPUDynInst::isUnconditionalJump() const 188{ 189 return _staticInst->isUnconditionalJump(); 190} 191 192bool 193GPUDynInst::isSpecialOp() const 194{ 195 return _staticInst->isSpecialOp(); 196} 197 198bool 199GPUDynInst::isWaitcnt() const 200{ 201 return _staticInst->isWaitcnt(); 202} 203 204bool 205GPUDynInst::isBarrier() const 206{ 207 return _staticInst->isBarrier(); 208} 209 210bool 211GPUDynInst::isMemFence() const 212{ 213 return _staticInst->isMemFence(); 214} 215 216bool 217GPUDynInst::isMemRef() const 218{ 219 return _staticInst->isMemRef(); 220} 221 222bool 223GPUDynInst::isFlat() const 224{ 225 return _staticInst->isFlat(); 226} 227 228bool 229GPUDynInst::isLoad() const 230{ 231 return _staticInst->isLoad(); 232} 233 234bool 235GPUDynInst::isStore() const 236{ 237 return _staticInst->isStore(); 238} 239 240bool 241GPUDynInst::isAtomic() const 242{ 243 return _staticInst->isAtomic(); 244} 245 246bool 247GPUDynInst::isAtomicNoRet() const 248{ 249 return _staticInst->isAtomicNoRet(); 250} 251 252bool 253GPUDynInst::isAtomicRet() const 254{ 255 return _staticInst->isAtomicRet(); 256} 257 258bool 259GPUDynInst::isScalar() const 260{ 261 return _staticInst->isScalar(); 262} 263 264bool 265GPUDynInst::readsSCC() const 266{ 267 return _staticInst->readsSCC(); 268} 269 270bool 271GPUDynInst::writesSCC() const 272{ 273 return _staticInst->writesSCC(); 274} 275 276bool 277GPUDynInst::readsVCC() const 278{ 279 return _staticInst->readsVCC(); 280} 281 282bool 283GPUDynInst::writesVCC() const 284{ 285 return _staticInst->writesVCC(); 286} 287 288bool 289GPUDynInst::isAtomicAnd() const 290{ 291 return _staticInst->isAtomicAnd(); 292} 293 294bool 295GPUDynInst::isAtomicOr() const 296{ 297 return _staticInst->isAtomicOr(); 298} 299 300bool 301GPUDynInst::isAtomicXor() const 302{ 303 return _staticInst->isAtomicXor(); 304} 305 306bool 307GPUDynInst::isAtomicCAS() const 308{ 309 return _staticInst->isAtomicCAS(); 310} 311 312bool GPUDynInst::isAtomicExch() const 313{ 314 return _staticInst->isAtomicExch(); 315} 316 317bool 318GPUDynInst::isAtomicAdd() const 319{ 320 return _staticInst->isAtomicAdd(); 321} 322 323bool 324GPUDynInst::isAtomicSub() const 325{ 326 return _staticInst->isAtomicSub(); 327} 328 329bool 330GPUDynInst::isAtomicInc() const 331{ 332 return _staticInst->isAtomicInc(); 333} 334 335bool 336GPUDynInst::isAtomicDec() const 337{ 338 return _staticInst->isAtomicDec(); 339} 340 341bool 342GPUDynInst::isAtomicMax() const 343{ 344 return _staticInst->isAtomicMax(); 345} 346 347bool 348GPUDynInst::isAtomicMin() const 349{ 350 return _staticInst->isAtomicMin(); 351} 352 353bool 354GPUDynInst::isArgLoad() const 355{ 356 return _staticInst->isArgLoad(); 357} 358 359bool 360GPUDynInst::isGlobalMem() const 361{ 362 return _staticInst->isGlobalMem(); 363} 364 365bool 366GPUDynInst::isLocalMem() const 367{ 368 return _staticInst->isLocalMem(); 369} 370 371bool 372GPUDynInst::isArgSeg() const 373{ 374 return _staticInst->isArgSeg(); 375} 376 377bool 378GPUDynInst::isGlobalSeg() const 379{ 380 return _staticInst->isGlobalSeg(); 381} 382 383bool 384GPUDynInst::isGroupSeg() const 385{ 386 return _staticInst->isGroupSeg(); 387} 388 389bool 390GPUDynInst::isKernArgSeg() const 391{ 392 return _staticInst->isKernArgSeg(); 393} 394 395bool 396GPUDynInst::isPrivateSeg() const 397{ 398 return _staticInst->isPrivateSeg(); 399} 400 401bool 402GPUDynInst::isReadOnlySeg() const 403{ 404 return _staticInst->isReadOnlySeg(); 405} 406 407bool 408GPUDynInst::isSpillSeg() const 409{ 410 return _staticInst->isSpillSeg(); 411} 412 413bool 414GPUDynInst::isWorkitemScope() const 415{ 416 return _staticInst->isWorkitemScope(); 417} 418 419bool 420GPUDynInst::isWavefrontScope() const 421{ 422 return _staticInst->isWavefrontScope(); 423} 424 425bool 426GPUDynInst::isWorkgroupScope() const 427{ 428 return _staticInst->isWorkgroupScope(); 429} 430 431bool 432GPUDynInst::isDeviceScope() const 433{ 434 return _staticInst->isDeviceScope(); 435} 436 437bool 438GPUDynInst::isSystemScope() const 439{ 440 return _staticInst->isSystemScope(); 441} 442 443bool 444GPUDynInst::isNoScope() const 445{ 446 return _staticInst->isNoScope(); 447} 448 449bool 450GPUDynInst::isRelaxedOrder() const 451{ 452 return _staticInst->isRelaxedOrder(); 453} 454 455bool 456GPUDynInst::isAcquire() const 457{ 458 return _staticInst->isAcquire(); 459} 460 461bool 462GPUDynInst::isRelease() const 463{ 464 return _staticInst->isRelease(); 465} 466 467bool 468GPUDynInst::isAcquireRelease() const 469{ 470 return _staticInst->isAcquireRelease(); 471} 472 473bool 474GPUDynInst::isNoOrder() const 475{ 476 return _staticInst->isNoOrder(); 477} 478 479bool 480GPUDynInst::isGloballyCoherent() const 481{ 482 return _staticInst->isGloballyCoherent(); 483} 484 485bool 486GPUDynInst::isSystemCoherent() const 487{ 488 return _staticInst->isSystemCoherent(); 489} 490 491void 492GPUDynInst::updateStats() 493{ 494 if (_staticInst->isLocalMem()) { 495 // access to LDS (shared) memory 496 cu->dynamicLMemInstrCnt++; 497 } else { 498 // access to global memory 499 500 // update PageDivergence histogram 501 int number_pages_touched = cu->pagesTouched.size(); 502 assert(number_pages_touched); 503 cu->pageDivergenceDist.sample(number_pages_touched); 504 505 std::pair<ComputeUnit::pageDataStruct::iterator, bool> ret; 506 507 for (auto it : cu->pagesTouched) { 508 // see if this page has been touched before. if not, this also 509 // inserts the page into the table. 510 ret = cu->pageAccesses 511 .insert(ComputeUnit::pageDataStruct::value_type(it.first, 512 std::make_pair(1, it.second))); 513 514 // if yes, then update the stats 515 if (!ret.second) { 516 ret.first->second.first++; 517 ret.first->second.second += it.second; 518 } 519 } 520 521 cu->pagesTouched.clear(); 522 523 // total number of memory instructions (dynamic) 524 // Atomics are counted as a single memory instruction. 525 // this is # memory instructions per wavefronts, not per workitem 526 cu->dynamicGMemInstrCnt++; 527 } 528} 529