mem.hh revision 11536
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#ifndef __ARCH_HSAIL_INSTS_MEM_HH__ 37#define __ARCH_HSAIL_INSTS_MEM_HH__ 38 39#include "arch/hsail/insts/decl.hh" 40#include "arch/hsail/insts/gpu_static_inst.hh" 41#include "arch/hsail/operand.hh" 42 43namespace HsailISA 44{ 45 class MemInst 46 { 47 public: 48 MemInst() : size(0), addr_operand(nullptr) { } 49 50 MemInst(Enums::MemType m_type) 51 { 52 if (m_type == Enums::M_U64 || 53 m_type == Enums::M_S64 || 54 m_type == Enums::M_F64) { 55 size = 8; 56 } else if (m_type == Enums::M_U32 || 57 m_type == Enums::M_S32 || 58 m_type == Enums::M_F32) { 59 size = 4; 60 } else if (m_type == Enums::M_U16 || 61 m_type == Enums::M_S16 || 62 m_type == Enums::M_F16) { 63 size = 2; 64 } else { 65 size = 1; 66 } 67 68 addr_operand = nullptr; 69 } 70 71 void 72 init_addr(AddrOperandBase *_addr_operand) 73 { 74 addr_operand = _addr_operand; 75 } 76 77 private: 78 int size; 79 AddrOperandBase *addr_operand; 80 81 public: 82 int getMemOperandSize() { return size; } 83 AddrOperandBase *getAddressOperand() { return addr_operand; } 84 }; 85 86 template<typename DestOperandType, typename AddrOperandType> 87 class LdaInstBase : public HsailGPUStaticInst 88 { 89 public: 90 typename DestOperandType::DestOperand dest; 91 AddrOperandType addr; 92 93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 94 const char *_opcode) 95 : HsailGPUStaticInst(obj, _opcode) 96 { 97 using namespace Brig; 98 99 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 100 dest.init(op_offs, obj); 101 op_offs = obj->getOperandPtr(ib->operands, 1); 102 addr.init(op_offs, obj); 103 } 104 105 int numSrcRegOperands() override 106 { return(this->addr.isVectorRegister()); } 107 int numDstRegOperands() override 108 { return dest.isVectorRegister(); } 109 bool isVectorRegister(int operandIndex) override 110 { 111 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 112 return((operandIndex == 0) ? dest.isVectorRegister() : 113 this->addr.isVectorRegister()); 114 } 115 bool isCondRegister(int operandIndex) override 116 { 117 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 118 return((operandIndex == 0) ? dest.isCondRegister() : 119 this->addr.isCondRegister()); 120 } 121 bool isScalarRegister(int operandIndex) override 122 { 123 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 124 return((operandIndex == 0) ? dest.isScalarRegister() : 125 this->addr.isScalarRegister()); 126 } 127 bool isSrcOperand(int operandIndex) override 128 { 129 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 130 if (operandIndex > 0) 131 return(this->addr.isVectorRegister()); 132 return false; 133 } 134 bool isDstOperand(int operandIndex) override { 135 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 136 return(operandIndex == 0); 137 } 138 int getOperandSize(int operandIndex) override 139 { 140 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 141 return((operandIndex == 0) ? dest.opSize() : 142 this->addr.opSize()); 143 } 144 int getRegisterIndex(int operandIndex) override 145 { 146 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 147 return((operandIndex == 0) ? dest.regIndex() : 148 this->addr.regIndex()); 149 } 150 int getNumOperands() override 151 { 152 if (this->addr.isVectorRegister()) 153 return 2; 154 return 1; 155 } 156 }; 157 158 template<typename DestDataType, typename AddrOperandType> 159 class LdaInst : 160 public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>, 161 public MemInst 162 { 163 public: 164 void generateDisassembly(); 165 166 LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 167 const char *_opcode) 168 : LdaInstBase<typename DestDataType::OperandType, 169 AddrOperandType>(ib, obj, _opcode) 170 { 171 init_addr(&this->addr); 172 } 173 174 void execute(GPUDynInstPtr gpuDynInst); 175 }; 176 177 template<typename DataType> 178 GPUStaticInst* 179 decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj) 180 { 181 unsigned op_offs = obj->getOperandPtr(ib->operands, 1); 182 BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj); 183 184 if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 185 return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas"); 186 } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 187 // V2/V4 not allowed 188 switch (regDataType.regKind) { 189 case Brig::BRIG_REGISTER_KIND_SINGLE: 190 return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas"); 191 case Brig::BRIG_REGISTER_KIND_DOUBLE: 192 return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas"); 193 default: 194 fatal("Bad ldas register operand type %d\n", regDataType.type); 195 } 196 } else { 197 fatal("Bad ldas register operand kind %d\n", regDataType.kind); 198 } 199 } 200 201 template<typename MemOperandType, typename DestOperandType, 202 typename AddrOperandType> 203 class LdInstBase : public HsailGPUStaticInst 204 { 205 public: 206 Brig::BrigWidth8_t width; 207 typename DestOperandType::DestOperand dest; 208 AddrOperandType addr; 209 210 Brig::BrigSegment segment; 211 Brig::BrigMemoryOrder memoryOrder; 212 Brig::BrigMemoryScope memoryScope; 213 unsigned int equivClass; 214 bool isArgLoad() 215 { 216 return segment == Brig::BRIG_SEGMENT_KERNARG || 217 segment == Brig::BRIG_SEGMENT_ARG; 218 } 219 void 220 initLd(const Brig::BrigInstBase *ib, const BrigObject *obj, 221 const char *_opcode) 222 { 223 using namespace Brig; 224 225 const BrigInstMem *ldst = (const BrigInstMem*)ib; 226 227 segment = (BrigSegment)ldst->segment; 228 memoryOrder = BRIG_MEMORY_ORDER_NONE; 229 memoryScope = BRIG_MEMORY_SCOPE_NONE; 230 equivClass = ldst->equivClass; 231 232 switch (segment) { 233 case BRIG_SEGMENT_GLOBAL: 234 o_type = Enums::OT_GLOBAL_READ; 235 break; 236 237 case BRIG_SEGMENT_GROUP: 238 o_type = Enums::OT_SHARED_READ; 239 break; 240 241 case BRIG_SEGMENT_PRIVATE: 242 o_type = Enums::OT_PRIVATE_READ; 243 break; 244 245 case BRIG_SEGMENT_READONLY: 246 o_type = Enums::OT_READONLY_READ; 247 break; 248 249 case BRIG_SEGMENT_SPILL: 250 o_type = Enums::OT_SPILL_READ; 251 break; 252 253 case BRIG_SEGMENT_FLAT: 254 o_type = Enums::OT_FLAT_READ; 255 break; 256 257 case BRIG_SEGMENT_KERNARG: 258 o_type = Enums::OT_KERN_READ; 259 break; 260 261 case BRIG_SEGMENT_ARG: 262 o_type = Enums::OT_ARG; 263 break; 264 265 default: 266 panic("Ld: segment %d not supported\n", segment); 267 } 268 269 width = ldst->width; 270 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 271 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 272 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 273 dest.init(op_offs, obj); 274 275 op_offs = obj->getOperandPtr(ib->operands, 1); 276 addr.init(op_offs, obj); 277 } 278 279 void 280 initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj, 281 const char *_opcode) 282 { 283 using namespace Brig; 284 285 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 286 287 segment = (BrigSegment)at->segment; 288 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 289 memoryScope = (BrigMemoryScope)at->memoryScope; 290 equivClass = 0; 291 292 switch (segment) { 293 case BRIG_SEGMENT_GLOBAL: 294 o_type = Enums::OT_GLOBAL_READ; 295 break; 296 297 case BRIG_SEGMENT_GROUP: 298 o_type = Enums::OT_SHARED_READ; 299 break; 300 301 case BRIG_SEGMENT_PRIVATE: 302 o_type = Enums::OT_PRIVATE_READ; 303 break; 304 305 case BRIG_SEGMENT_READONLY: 306 o_type = Enums::OT_READONLY_READ; 307 break; 308 309 case BRIG_SEGMENT_SPILL: 310 o_type = Enums::OT_SPILL_READ; 311 break; 312 313 case BRIG_SEGMENT_FLAT: 314 o_type = Enums::OT_FLAT_READ; 315 break; 316 317 case BRIG_SEGMENT_KERNARG: 318 o_type = Enums::OT_KERN_READ; 319 break; 320 321 case BRIG_SEGMENT_ARG: 322 o_type = Enums::OT_ARG; 323 break; 324 325 default: 326 panic("Ld: segment %d not supported\n", segment); 327 } 328 329 width = BRIG_WIDTH_1; 330 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 331 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 332 333 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 334 dest.init(op_offs, obj); 335 336 op_offs = obj->getOperandPtr(ib->operands,1); 337 addr.init(op_offs, obj); 338 } 339 340 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 341 const char *_opcode) 342 : HsailGPUStaticInst(obj, _opcode) 343 { 344 using namespace Brig; 345 346 if (ib->opcode == BRIG_OPCODE_LD) { 347 initLd(ib, obj, _opcode); 348 } else { 349 initAtomicLd(ib, obj, _opcode); 350 } 351 } 352 353 int numSrcRegOperands() override 354 { return(this->addr.isVectorRegister()); } 355 int numDstRegOperands() override { return dest.isVectorRegister(); } 356 int getNumOperands() override 357 { 358 if (this->addr.isVectorRegister()) 359 return 2; 360 else 361 return 1; 362 } 363 bool isVectorRegister(int operandIndex) override 364 { 365 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 366 return((operandIndex == 0) ? dest.isVectorRegister() : 367 this->addr.isVectorRegister()); 368 } 369 bool isCondRegister(int operandIndex) override 370 { 371 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 372 return((operandIndex == 0) ? dest.isCondRegister() : 373 this->addr.isCondRegister()); 374 } 375 bool isScalarRegister(int operandIndex) override 376 { 377 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 378 return((operandIndex == 0) ? dest.isScalarRegister() : 379 this->addr.isScalarRegister()); 380 } 381 bool isSrcOperand(int operandIndex) override 382 { 383 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 384 if (operandIndex > 0) 385 return(this->addr.isVectorRegister()); 386 return false; 387 } 388 bool isDstOperand(int operandIndex) override 389 { 390 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 391 return(operandIndex == 0); 392 } 393 int getOperandSize(int operandIndex) override 394 { 395 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 396 return((operandIndex == 0) ? dest.opSize() : 397 this->addr.opSize()); 398 } 399 int getRegisterIndex(int operandIndex) override 400 { 401 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 402 return((operandIndex == 0) ? dest.regIndex() : 403 this->addr.regIndex()); 404 } 405 }; 406 407 template<typename MemDataType, typename DestDataType, 408 typename AddrOperandType> 409 class LdInst : 410 public LdInstBase<typename MemDataType::CType, 411 typename DestDataType::OperandType, AddrOperandType>, 412 public MemInst 413 { 414 typename DestDataType::OperandType::DestOperand dest_vect[4]; 415 uint16_t num_dest_operands; 416 void generateDisassembly() override; 417 418 public: 419 LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 420 const char *_opcode) 421 : LdInstBase<typename MemDataType::CType, 422 typename DestDataType::OperandType, 423 AddrOperandType>(ib, obj, _opcode), 424 MemInst(MemDataType::memType) 425 { 426 init_addr(&this->addr); 427 428 unsigned op_offs = obj->getOperandPtr(ib->operands,0); 429 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 430 431 if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 432 const Brig::BrigOperandOperandList *brigRegVecOp = 433 (const Brig::BrigOperandOperandList*)brigOp; 434 435 num_dest_operands = 436 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; 437 438 assert(num_dest_operands <= 4); 439 } else { 440 num_dest_operands = 1; 441 } 442 443 if (num_dest_operands > 1) { 444 assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 445 446 for (int i = 0; i < num_dest_operands; ++i) { 447 dest_vect[i].init_from_vect(op_offs, obj, i); 448 } 449 } 450 } 451 452 void 453 initiateAcc(GPUDynInstPtr gpuDynInst) override 454 { 455 typedef typename MemDataType::CType c0; 456 457 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 458 459 if (num_dest_operands > 1) { 460 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) 461 if (gpuDynInst->exec_mask[i]) 462 gpuDynInst->statusVector.push_back(num_dest_operands); 463 else 464 gpuDynInst->statusVector.push_back(0); 465 } 466 467 for (int k = 0; k < num_dest_operands; ++k) { 468 469 c0 *d = &((c0*)gpuDynInst->d_data) 470 [k * gpuDynInst->computeUnit()->wfSize()]; 471 472 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 473 if (gpuDynInst->exec_mask[i]) { 474 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 475 476 if (isLocalMem()) { 477 // load from shared memory 478 *d = gpuDynInst->wavefront()->ldsChunk-> 479 read<c0>(vaddr); 480 } else { 481 Request *req = new Request(0, vaddr, sizeof(c0), 0, 482 gpuDynInst->computeUnit()->masterId(), 483 0, gpuDynInst->wfDynId); 484 485 gpuDynInst->setRequestFlags(req); 486 PacketPtr pkt = new Packet(req, MemCmd::ReadReq); 487 pkt->dataStatic(d); 488 489 if (gpuDynInst->computeUnit()->shader-> 490 separate_acquire_release && 491 gpuDynInst->memoryOrder == 492 Enums::MEMORY_ORDER_SC_ACQUIRE) { 493 // if this load has acquire semantics, 494 // set the response continuation function 495 // to perform an Acquire request 496 gpuDynInst->execContinuation = 497 &GPUStaticInst::execLdAcq; 498 499 gpuDynInst->useContinuation = true; 500 } else { 501 // the request will be finished when 502 // the load completes 503 gpuDynInst->useContinuation = false; 504 } 505 // translation is performed in sendRequest() 506 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, 507 i, pkt); 508 } 509 } 510 ++d; 511 } 512 } 513 514 gpuDynInst->updateStats(); 515 } 516 517 private: 518 void 519 execLdAcq(GPUDynInstPtr gpuDynInst) override 520 { 521 // after the load has complete and if the load has acquire 522 // semantics, issue an acquire request. 523 if (!isLocalMem()) { 524 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 525 && gpuDynInst->memoryOrder == 526 Enums::MEMORY_ORDER_SC_ACQUIRE) { 527 gpuDynInst->statusBitVector = VectorMask(1); 528 gpuDynInst->useContinuation = false; 529 // create request 530 Request *req = new Request(0, 0, 0, 0, 531 gpuDynInst->computeUnit()->masterId(), 532 0, gpuDynInst->wfDynId); 533 req->setFlags(Request::ACQUIRE); 534 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 535 } 536 } 537 } 538 539 public: 540 bool 541 isLocalMem() const override 542 { 543 return this->segment == Brig::BRIG_SEGMENT_GROUP; 544 } 545 546 bool isVectorRegister(int operandIndex) override 547 { 548 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 549 if ((num_dest_operands != getNumOperands()) && 550 (operandIndex == (getNumOperands()-1))) 551 return(this->addr.isVectorRegister()); 552 if (num_dest_operands > 1) { 553 return dest_vect[operandIndex].isVectorRegister(); 554 } 555 else if (num_dest_operands == 1) { 556 return LdInstBase<typename MemDataType::CType, 557 typename DestDataType::OperandType, 558 AddrOperandType>::dest.isVectorRegister(); 559 } 560 return false; 561 } 562 bool isCondRegister(int operandIndex) override 563 { 564 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 565 if ((num_dest_operands != getNumOperands()) && 566 (operandIndex == (getNumOperands()-1))) 567 return(this->addr.isCondRegister()); 568 if (num_dest_operands > 1) 569 return dest_vect[operandIndex].isCondRegister(); 570 else if (num_dest_operands == 1) 571 return LdInstBase<typename MemDataType::CType, 572 typename DestDataType::OperandType, 573 AddrOperandType>::dest.isCondRegister(); 574 return false; 575 } 576 bool isScalarRegister(int operandIndex) override 577 { 578 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 579 if ((num_dest_operands != getNumOperands()) && 580 (operandIndex == (getNumOperands()-1))) 581 return(this->addr.isScalarRegister()); 582 if (num_dest_operands > 1) 583 return dest_vect[operandIndex].isScalarRegister(); 584 else if (num_dest_operands == 1) 585 return LdInstBase<typename MemDataType::CType, 586 typename DestDataType::OperandType, 587 AddrOperandType>::dest.isScalarRegister(); 588 return false; 589 } 590 bool isSrcOperand(int operandIndex) override 591 { 592 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 593 if ((num_dest_operands != getNumOperands()) && 594 (operandIndex == (getNumOperands()-1))) 595 return(this->addr.isVectorRegister()); 596 return false; 597 } 598 bool isDstOperand(int operandIndex) override 599 { 600 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 601 if ((num_dest_operands != getNumOperands()) && 602 (operandIndex == (getNumOperands()-1))) 603 return false; 604 return true; 605 } 606 int getOperandSize(int operandIndex) override 607 { 608 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 609 if ((num_dest_operands != getNumOperands()) && 610 (operandIndex == (getNumOperands()-1))) 611 return(this->addr.opSize()); 612 if (num_dest_operands > 1) 613 return(dest_vect[operandIndex].opSize()); 614 else if (num_dest_operands == 1) 615 return(LdInstBase<typename MemDataType::CType, 616 typename DestDataType::OperandType, 617 AddrOperandType>::dest.opSize()); 618 return 0; 619 } 620 int getRegisterIndex(int operandIndex) override 621 { 622 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 623 if ((num_dest_operands != getNumOperands()) && 624 (operandIndex == (getNumOperands()-1))) 625 return(this->addr.regIndex()); 626 if (num_dest_operands > 1) 627 return(dest_vect[operandIndex].regIndex()); 628 else if (num_dest_operands == 1) 629 return(LdInstBase<typename MemDataType::CType, 630 typename DestDataType::OperandType, 631 AddrOperandType>::dest.regIndex()); 632 return -1; 633 } 634 int getNumOperands() override 635 { 636 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 637 return(num_dest_operands+1); 638 else 639 return(num_dest_operands); 640 } 641 void execute(GPUDynInstPtr gpuDynInst) override; 642 }; 643 644 template<typename MemDT, typename DestDT> 645 GPUStaticInst* 646 decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj) 647 { 648 unsigned op_offs = obj->getOperandPtr(ib->operands,1); 649 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 650 651 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 652 return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld"); 653 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER || 654 tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 655 switch (tmp.regKind) { 656 case Brig::BRIG_REGISTER_KIND_SINGLE: 657 return new LdInst<MemDT, DestDT, 658 SRegAddrOperand>(ib, obj, "ld"); 659 case Brig::BRIG_REGISTER_KIND_DOUBLE: 660 return new LdInst<MemDT, DestDT, 661 DRegAddrOperand>(ib, obj, "ld"); 662 default: 663 fatal("Bad ld register operand type %d\n", tmp.regKind); 664 } 665 } else { 666 fatal("Bad ld register operand kind %d\n", tmp.kind); 667 } 668 } 669 670 template<typename MemDT> 671 GPUStaticInst* 672 decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj) 673 { 674 unsigned op_offs = obj->getOperandPtr(ib->operands,0); 675 BrigRegOperandInfo dest = findRegDataType(op_offs, obj); 676 677 assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER || 678 dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 679 switch(dest.regKind) { 680 case Brig::BRIG_REGISTER_KIND_SINGLE: 681 switch (ib->type) { 682 case Brig::BRIG_TYPE_B8: 683 case Brig::BRIG_TYPE_B16: 684 case Brig::BRIG_TYPE_B32: 685 return decodeLd2<MemDT, B32>(ib, obj); 686 case Brig::BRIG_TYPE_U8: 687 case Brig::BRIG_TYPE_U16: 688 case Brig::BRIG_TYPE_U32: 689 return decodeLd2<MemDT, U32>(ib, obj); 690 case Brig::BRIG_TYPE_S8: 691 case Brig::BRIG_TYPE_S16: 692 case Brig::BRIG_TYPE_S32: 693 return decodeLd2<MemDT, S32>(ib, obj); 694 case Brig::BRIG_TYPE_F16: 695 case Brig::BRIG_TYPE_F32: 696 return decodeLd2<MemDT, U32>(ib, obj); 697 default: 698 fatal("Bad ld register operand type %d, %d\n", 699 dest.regKind, ib->type); 700 }; 701 case Brig::BRIG_REGISTER_KIND_DOUBLE: 702 switch (ib->type) { 703 case Brig::BRIG_TYPE_B64: 704 return decodeLd2<MemDT, B64>(ib, obj); 705 case Brig::BRIG_TYPE_U64: 706 return decodeLd2<MemDT, U64>(ib, obj); 707 case Brig::BRIG_TYPE_S64: 708 return decodeLd2<MemDT, S64>(ib, obj); 709 case Brig::BRIG_TYPE_F64: 710 return decodeLd2<MemDT, U64>(ib, obj); 711 default: 712 fatal("Bad ld register operand type %d, %d\n", 713 dest.regKind, ib->type); 714 }; 715 default: 716 fatal("Bad ld register operand type %d, %d\n", dest.regKind, 717 ib->type); 718 } 719 } 720 721 template<typename MemDataType, typename SrcOperandType, 722 typename AddrOperandType> 723 class StInstBase : public HsailGPUStaticInst 724 { 725 public: 726 typename SrcOperandType::SrcOperand src; 727 AddrOperandType addr; 728 729 Brig::BrigSegment segment; 730 Brig::BrigMemoryScope memoryScope; 731 Brig::BrigMemoryOrder memoryOrder; 732 unsigned int equivClass; 733 734 void 735 initSt(const Brig::BrigInstBase *ib, const BrigObject *obj, 736 const char *_opcode) 737 { 738 using namespace Brig; 739 740 const BrigInstMem *ldst = (const BrigInstMem*)ib; 741 742 segment = (BrigSegment)ldst->segment; 743 memoryOrder = BRIG_MEMORY_ORDER_NONE; 744 memoryScope = BRIG_MEMORY_SCOPE_NONE; 745 equivClass = ldst->equivClass; 746 747 switch (segment) { 748 case BRIG_SEGMENT_GLOBAL: 749 o_type = Enums::OT_GLOBAL_WRITE; 750 break; 751 752 case BRIG_SEGMENT_GROUP: 753 o_type = Enums::OT_SHARED_WRITE; 754 break; 755 756 case BRIG_SEGMENT_PRIVATE: 757 o_type = Enums::OT_PRIVATE_WRITE; 758 break; 759 760 case BRIG_SEGMENT_READONLY: 761 o_type = Enums::OT_READONLY_WRITE; 762 break; 763 764 case BRIG_SEGMENT_SPILL: 765 o_type = Enums::OT_SPILL_WRITE; 766 break; 767 768 case BRIG_SEGMENT_FLAT: 769 o_type = Enums::OT_FLAT_WRITE; 770 break; 771 772 case BRIG_SEGMENT_ARG: 773 o_type = Enums::OT_ARG; 774 break; 775 776 default: 777 panic("St: segment %d not supported\n", segment); 778 } 779 780 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 781 const BrigOperand *baseOp = obj->getOperand(op_offs); 782 783 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) || 784 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) { 785 src.init(op_offs, obj); 786 } 787 788 op_offs = obj->getOperandPtr(ib->operands, 1); 789 addr.init(op_offs, obj); 790 } 791 792 void 793 initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj, 794 const char *_opcode) 795 { 796 using namespace Brig; 797 798 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 799 800 segment = (BrigSegment)at->segment; 801 memoryScope = (BrigMemoryScope)at->memoryScope; 802 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 803 equivClass = 0; 804 805 switch (segment) { 806 case BRIG_SEGMENT_GLOBAL: 807 o_type = Enums::OT_GLOBAL_WRITE; 808 break; 809 810 case BRIG_SEGMENT_GROUP: 811 o_type = Enums::OT_SHARED_WRITE; 812 break; 813 814 case BRIG_SEGMENT_PRIVATE: 815 o_type = Enums::OT_PRIVATE_WRITE; 816 break; 817 818 case BRIG_SEGMENT_READONLY: 819 o_type = Enums::OT_READONLY_WRITE; 820 break; 821 822 case BRIG_SEGMENT_SPILL: 823 o_type = Enums::OT_SPILL_WRITE; 824 break; 825 826 case BRIG_SEGMENT_FLAT: 827 o_type = Enums::OT_FLAT_WRITE; 828 break; 829 830 case BRIG_SEGMENT_ARG: 831 o_type = Enums::OT_ARG; 832 break; 833 834 default: 835 panic("St: segment %d not supported\n", segment); 836 } 837 838 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 839 addr.init(op_offs, obj); 840 841 op_offs = obj->getOperandPtr(ib->operands, 1); 842 src.init(op_offs, obj); 843 } 844 845 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 846 const char *_opcode) 847 : HsailGPUStaticInst(obj, _opcode) 848 { 849 using namespace Brig; 850 851 if (ib->opcode == BRIG_OPCODE_ST) { 852 initSt(ib, obj, _opcode); 853 } else { 854 initAtomicSt(ib, obj, _opcode); 855 } 856 } 857 858 int numDstRegOperands() override { return 0; } 859 int numSrcRegOperands() override 860 { 861 return src.isVectorRegister() + this->addr.isVectorRegister(); 862 } 863 int getNumOperands() override 864 { 865 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 866 return 2; 867 else 868 return 1; 869 } 870 bool isVectorRegister(int operandIndex) override 871 { 872 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 873 return !operandIndex ? src.isVectorRegister() : 874 this->addr.isVectorRegister(); 875 } 876 bool isCondRegister(int operandIndex) override 877 { 878 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 879 return !operandIndex ? src.isCondRegister() : 880 this->addr.isCondRegister(); 881 } 882 bool isScalarRegister(int operandIndex) override 883 { 884 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 885 return !operandIndex ? src.isScalarRegister() : 886 this->addr.isScalarRegister(); 887 } 888 bool isSrcOperand(int operandIndex) override 889 { 890 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 891 return true; 892 } 893 bool isDstOperand(int operandIndex) override { return false; } 894 int getOperandSize(int operandIndex) override 895 { 896 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 897 return !operandIndex ? src.opSize() : this->addr.opSize(); 898 } 899 int getRegisterIndex(int operandIndex) override 900 { 901 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 902 return !operandIndex ? src.regIndex() : this->addr.regIndex(); 903 } 904 }; 905 906 907 template<typename MemDataType, typename SrcDataType, 908 typename AddrOperandType> 909 class StInst : 910 public StInstBase<MemDataType, typename SrcDataType::OperandType, 911 AddrOperandType>, 912 public MemInst 913 { 914 public: 915 typename SrcDataType::OperandType::SrcOperand src_vect[4]; 916 uint16_t num_src_operands; 917 void generateDisassembly() override; 918 919 StInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 920 const char *_opcode, int srcIdx) 921 : StInstBase<MemDataType, typename SrcDataType::OperandType, 922 AddrOperandType>(ib, obj, _opcode), 923 MemInst(SrcDataType::memType) 924 { 925 init_addr(&this->addr); 926 927 BrigRegOperandInfo rinfo; 928 unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx); 929 const Brig::BrigOperand *baseOp = obj->getOperand(op_offs); 930 931 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) { 932 const Brig::BrigOperandConstantBytes *op = 933 (Brig::BrigOperandConstantBytes*)baseOp; 934 935 rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind, 936 Brig::BRIG_TYPE_NONE); 937 } else { 938 rinfo = findRegDataType(op_offs, obj); 939 } 940 941 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 942 const Brig::BrigOperandOperandList *brigRegVecOp = 943 (const Brig::BrigOperandOperandList*)baseOp; 944 945 num_src_operands = 946 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; 947 948 assert(num_src_operands <= 4); 949 } else { 950 num_src_operands = 1; 951 } 952 953 if (num_src_operands > 1) { 954 assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 955 956 for (int i = 0; i < num_src_operands; ++i) { 957 src_vect[i].init_from_vect(op_offs, obj, i); 958 } 959 } 960 } 961 962 void 963 initiateAcc(GPUDynInstPtr gpuDynInst) override 964 { 965 // before performing a store, check if this store has 966 // release semantics, and if so issue a release first 967 if (!isLocalMem()) { 968 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 969 && gpuDynInst->memoryOrder == 970 Enums::MEMORY_ORDER_SC_RELEASE) { 971 972 gpuDynInst->statusBitVector = VectorMask(1); 973 gpuDynInst->execContinuation = &GPUStaticInst::execSt; 974 gpuDynInst->useContinuation = true; 975 // create request 976 Request *req = new Request(0, 0, 0, 0, 977 gpuDynInst->computeUnit()->masterId(), 978 0, gpuDynInst->wfDynId); 979 req->setFlags(Request::RELEASE); 980 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 981 982 return; 983 } 984 } 985 986 // if there is no release semantic, perform stores immediately 987 execSt(gpuDynInst); 988 } 989 990 bool 991 isLocalMem() const override 992 { 993 return this->segment == Brig::BRIG_SEGMENT_GROUP; 994 } 995 996 private: 997 // execSt may be called through a continuation 998 // if the store had release semantics. see comment for 999 // execSt in gpu_static_inst.hh 1000 void 1001 execSt(GPUDynInstPtr gpuDynInst) override 1002 { 1003 typedef typename MemDataType::CType c0; 1004 1005 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 1006 1007 if (num_src_operands > 1) { 1008 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) 1009 if (gpuDynInst->exec_mask[i]) 1010 gpuDynInst->statusVector.push_back(num_src_operands); 1011 else 1012 gpuDynInst->statusVector.push_back(0); 1013 } 1014 1015 for (int k = 0; k < num_src_operands; ++k) { 1016 c0 *d = &((c0*)gpuDynInst->d_data) 1017 [k * gpuDynInst->computeUnit()->wfSize()]; 1018 1019 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 1020 if (gpuDynInst->exec_mask[i]) { 1021 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 1022 1023 if (isLocalMem()) { 1024 //store to shared memory 1025 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr, 1026 *d); 1027 } else { 1028 Request *req = 1029 new Request(0, vaddr, sizeof(c0), 0, 1030 gpuDynInst->computeUnit()->masterId(), 1031 0, gpuDynInst->wfDynId); 1032 1033 gpuDynInst->setRequestFlags(req); 1034 PacketPtr pkt = new Packet(req, MemCmd::WriteReq); 1035 pkt->dataStatic<c0>(d); 1036 1037 // translation is performed in sendRequest() 1038 // the request will be finished when the store completes 1039 gpuDynInst->useContinuation = false; 1040 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, 1041 i, pkt); 1042 1043 } 1044 } 1045 ++d; 1046 } 1047 } 1048 1049 gpuDynInst->updateStats(); 1050 } 1051 1052 public: 1053 bool isVectorRegister(int operandIndex) override 1054 { 1055 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1056 if (operandIndex == num_src_operands) 1057 return this->addr.isVectorRegister(); 1058 if (num_src_operands > 1) 1059 return src_vect[operandIndex].isVectorRegister(); 1060 else if (num_src_operands == 1) 1061 return StInstBase<MemDataType, 1062 typename SrcDataType::OperandType, 1063 AddrOperandType>::src.isVectorRegister(); 1064 return false; 1065 } 1066 bool isCondRegister(int operandIndex) override 1067 { 1068 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1069 if (operandIndex == num_src_operands) 1070 return this->addr.isCondRegister(); 1071 if (num_src_operands > 1) 1072 return src_vect[operandIndex].isCondRegister(); 1073 else if (num_src_operands == 1) 1074 return StInstBase<MemDataType, 1075 typename SrcDataType::OperandType, 1076 AddrOperandType>::src.isCondRegister(); 1077 return false; 1078 } 1079 bool isScalarRegister(int operandIndex) override 1080 { 1081 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1082 if (operandIndex == num_src_operands) 1083 return this->addr.isScalarRegister(); 1084 if (num_src_operands > 1) 1085 return src_vect[operandIndex].isScalarRegister(); 1086 else if (num_src_operands == 1) 1087 return StInstBase<MemDataType, 1088 typename SrcDataType::OperandType, 1089 AddrOperandType>::src.isScalarRegister(); 1090 return false; 1091 } 1092 bool isSrcOperand(int operandIndex) override 1093 { 1094 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1095 return true; 1096 } 1097 bool isDstOperand(int operandIndex) override { return false; } 1098 int getOperandSize(int operandIndex) override 1099 { 1100 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1101 if (operandIndex == num_src_operands) 1102 return this->addr.opSize(); 1103 if (num_src_operands > 1) 1104 return src_vect[operandIndex].opSize(); 1105 else if (num_src_operands == 1) 1106 return StInstBase<MemDataType, 1107 typename SrcDataType::OperandType, 1108 AddrOperandType>::src.opSize(); 1109 return 0; 1110 } 1111 int getRegisterIndex(int operandIndex) override 1112 { 1113 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1114 if (operandIndex == num_src_operands) 1115 return this->addr.regIndex(); 1116 if (num_src_operands > 1) 1117 return src_vect[operandIndex].regIndex(); 1118 else if (num_src_operands == 1) 1119 return StInstBase<MemDataType, 1120 typename SrcDataType::OperandType, 1121 AddrOperandType>::src.regIndex(); 1122 return -1; 1123 } 1124 int getNumOperands() override 1125 { 1126 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 1127 return num_src_operands + 1; 1128 else 1129 return num_src_operands; 1130 } 1131 void execute(GPUDynInstPtr gpuDynInst) override; 1132 }; 1133 1134 template<typename DataType, typename SrcDataType> 1135 GPUStaticInst* 1136 decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj) 1137 { 1138 int srcIdx = 0; 1139 int destIdx = 1; 1140 if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC || 1141 ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) { 1142 srcIdx = 1; 1143 destIdx = 0; 1144 } 1145 unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx); 1146 1147 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 1148 1149 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 1150 return new StInst<DataType, SrcDataType, 1151 NoRegAddrOperand>(ib, obj, "st", srcIdx); 1152 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 1153 // V2/V4 not allowed 1154 switch (tmp.regKind) { 1155 case Brig::BRIG_REGISTER_KIND_SINGLE: 1156 return new StInst<DataType, SrcDataType, 1157 SRegAddrOperand>(ib, obj, "st", srcIdx); 1158 case Brig::BRIG_REGISTER_KIND_DOUBLE: 1159 return new StInst<DataType, SrcDataType, 1160 DRegAddrOperand>(ib, obj, "st", srcIdx); 1161 default: 1162 fatal("Bad st register operand type %d\n", tmp.type); 1163 } 1164 } else { 1165 fatal("Bad st register operand kind %d\n", tmp.kind); 1166 } 1167 } 1168 1169 Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode, 1170 Brig::BrigAtomicOperation brigOp); 1171 1172 template<typename OperandType, typename AddrOperandType, int NumSrcOperands, 1173 bool HasDst> 1174 class AtomicInstBase : public HsailGPUStaticInst 1175 { 1176 public: 1177 typename OperandType::DestOperand dest; 1178 typename OperandType::SrcOperand src[NumSrcOperands]; 1179 AddrOperandType addr; 1180 1181 Brig::BrigSegment segment; 1182 Brig::BrigMemoryOrder memoryOrder; 1183 Brig::BrigAtomicOperation atomicOperation; 1184 Brig::BrigMemoryScope memoryScope; 1185 Brig::BrigOpcode opcode; 1186 Enums::MemOpType opType; 1187 1188 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 1189 const char *_opcode) 1190 : HsailGPUStaticInst(obj, _opcode) 1191 { 1192 using namespace Brig; 1193 1194 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 1195 1196 segment = (BrigSegment)at->segment; 1197 memoryScope = (BrigMemoryScope)at->memoryScope; 1198 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 1199 atomicOperation = (BrigAtomicOperation)at->atomicOperation; 1200 opcode = (BrigOpcode)ib->opcode; 1201 opType = brigAtomicToMemOpType(opcode, atomicOperation); 1202 1203 switch (segment) { 1204 case BRIG_SEGMENT_GLOBAL: 1205 o_type = Enums::OT_GLOBAL_ATOMIC; 1206 break; 1207 1208 case BRIG_SEGMENT_GROUP: 1209 o_type = Enums::OT_SHARED_ATOMIC; 1210 break; 1211 1212 case BRIG_SEGMENT_FLAT: 1213 o_type = Enums::OT_FLAT_ATOMIC; 1214 break; 1215 1216 default: 1217 panic("Atomic: segment %d not supported\n", segment); 1218 } 1219 1220 if (HasDst) { 1221 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1222 dest.init(op_offs, obj); 1223 1224 op_offs = obj->getOperandPtr(ib->operands, 1); 1225 addr.init(op_offs, obj); 1226 1227 for (int i = 0; i < NumSrcOperands; ++i) { 1228 op_offs = obj->getOperandPtr(ib->operands, i + 2); 1229 src[i].init(op_offs, obj); 1230 } 1231 } else { 1232 1233 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1234 addr.init(op_offs, obj); 1235 1236 for (int i = 0; i < NumSrcOperands; ++i) { 1237 op_offs = obj->getOperandPtr(ib->operands, i + 1); 1238 src[i].init(op_offs, obj); 1239 } 1240 } 1241 } 1242 1243 int numSrcRegOperands() 1244 { 1245 int operands = 0; 1246 for (int i = 0; i < NumSrcOperands; i++) { 1247 if (src[i].isVectorRegister()) { 1248 operands++; 1249 } 1250 } 1251 if (addr.isVectorRegister()) 1252 operands++; 1253 return operands; 1254 } 1255 int numDstRegOperands() { return dest.isVectorRegister(); } 1256 int getNumOperands() 1257 { 1258 if (addr.isVectorRegister()) 1259 return(NumSrcOperands + 2); 1260 return(NumSrcOperands + 1); 1261 } 1262 bool isVectorRegister(int operandIndex) 1263 { 1264 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1265 if (operandIndex < NumSrcOperands) 1266 return src[operandIndex].isVectorRegister(); 1267 else if (operandIndex == NumSrcOperands) 1268 return(addr.isVectorRegister()); 1269 else 1270 return dest.isVectorRegister(); 1271 } 1272 bool isCondRegister(int operandIndex) 1273 { 1274 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1275 if (operandIndex < NumSrcOperands) 1276 return src[operandIndex].isCondRegister(); 1277 else if (operandIndex == NumSrcOperands) 1278 return(addr.isCondRegister()); 1279 else 1280 return dest.isCondRegister(); 1281 } 1282 bool isScalarRegister(int operandIndex) 1283 { 1284 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1285 if (operandIndex < NumSrcOperands) 1286 return src[operandIndex].isScalarRegister(); 1287 else if (operandIndex == NumSrcOperands) 1288 return(addr.isScalarRegister()); 1289 else 1290 return dest.isScalarRegister(); 1291 } 1292 bool isSrcOperand(int operandIndex) 1293 { 1294 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1295 if (operandIndex < NumSrcOperands) 1296 return true; 1297 else if (operandIndex == NumSrcOperands) 1298 return(addr.isVectorRegister()); 1299 else 1300 return false; 1301 } 1302 bool isDstOperand(int operandIndex) 1303 { 1304 if (operandIndex <= NumSrcOperands) 1305 return false; 1306 else 1307 return true; 1308 } 1309 int getOperandSize(int operandIndex) 1310 { 1311 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1312 if (operandIndex < NumSrcOperands) 1313 return(src[operandIndex].opSize()); 1314 else if (operandIndex == NumSrcOperands) 1315 return(addr.opSize()); 1316 else 1317 return(dest.opSize()); 1318 } 1319 int getRegisterIndex(int operandIndex) 1320 { 1321 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1322 if (operandIndex < NumSrcOperands) 1323 return(src[operandIndex].regIndex()); 1324 else if (operandIndex == NumSrcOperands) 1325 return(addr.regIndex()); 1326 else 1327 return(dest.regIndex()); 1328 return -1; 1329 } 1330 }; 1331 1332 template<typename MemDataType, typename AddrOperandType, int NumSrcOperands, 1333 bool HasDst> 1334 class AtomicInst : 1335 public AtomicInstBase<typename MemDataType::OperandType, 1336 AddrOperandType, NumSrcOperands, HasDst>, 1337 public MemInst 1338 { 1339 public: 1340 void generateDisassembly() override; 1341 1342 AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 1343 const char *_opcode) 1344 : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType, 1345 NumSrcOperands, HasDst> 1346 (ib, obj, _opcode), 1347 MemInst(MemDataType::memType) 1348 { 1349 init_addr(&this->addr); 1350 } 1351 1352 void 1353 initiateAcc(GPUDynInstPtr gpuDynInst) override 1354 { 1355 // before doing the RMW, check if this atomic has 1356 // release semantics, and if so issue a release first 1357 if (!isLocalMem()) { 1358 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 1359 && (gpuDynInst->memoryOrder == 1360 Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder == 1361 Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) { 1362 1363 gpuDynInst->statusBitVector = VectorMask(1); 1364 1365 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic; 1366 gpuDynInst->useContinuation = true; 1367 1368 // create request 1369 Request *req = new Request(0, 0, 0, 0, 1370 gpuDynInst->computeUnit()->masterId(), 1371 0, gpuDynInst->wfDynId); 1372 req->setFlags(Request::RELEASE); 1373 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 1374 1375 return; 1376 } 1377 } 1378 1379 // if there is no release semantic, execute the RMW immediately 1380 execAtomic(gpuDynInst); 1381 1382 } 1383 1384 void execute(GPUDynInstPtr gpuDynInst) override; 1385 1386 bool 1387 isLocalMem() const override 1388 { 1389 return this->segment == Brig::BRIG_SEGMENT_GROUP; 1390 } 1391 1392 private: 1393 // execAtomic may be called through a continuation 1394 // if the RMW had release semantics. see comment for 1395 // execContinuation in gpu_dyn_inst.hh 1396 void 1397 execAtomic(GPUDynInstPtr gpuDynInst) override 1398 { 1399 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 1400 1401 typedef typename MemDataType::CType c0; 1402 1403 c0 *d = &((c0*) gpuDynInst->d_data)[0]; 1404 c0 *e = &((c0*) gpuDynInst->a_data)[0]; 1405 c0 *f = &((c0*) gpuDynInst->x_data)[0]; 1406 1407 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 1408 if (gpuDynInst->exec_mask[i]) { 1409 Addr vaddr = gpuDynInst->addr[i]; 1410 1411 if (isLocalMem()) { 1412 Wavefront *wavefront = gpuDynInst->wavefront(); 1413 *d = wavefront->ldsChunk->read<c0>(vaddr); 1414 1415 switch (this->opType) { 1416 case Enums::MO_AADD: 1417 case Enums::MO_ANRADD: 1418 wavefront->ldsChunk->write<c0>(vaddr, 1419 wavefront->ldsChunk->read<c0>(vaddr) + (*e)); 1420 break; 1421 case Enums::MO_ASUB: 1422 case Enums::MO_ANRSUB: 1423 wavefront->ldsChunk->write<c0>(vaddr, 1424 wavefront->ldsChunk->read<c0>(vaddr) - (*e)); 1425 break; 1426 case Enums::MO_AMAX: 1427 case Enums::MO_ANRMAX: 1428 wavefront->ldsChunk->write<c0>(vaddr, 1429 std::max(wavefront->ldsChunk->read<c0>(vaddr), 1430 (*e))); 1431 break; 1432 case Enums::MO_AMIN: 1433 case Enums::MO_ANRMIN: 1434 wavefront->ldsChunk->write<c0>(vaddr, 1435 std::min(wavefront->ldsChunk->read<c0>(vaddr), 1436 (*e))); 1437 break; 1438 case Enums::MO_AAND: 1439 case Enums::MO_ANRAND: 1440 wavefront->ldsChunk->write<c0>(vaddr, 1441 wavefront->ldsChunk->read<c0>(vaddr) & (*e)); 1442 break; 1443 case Enums::MO_AOR: 1444 case Enums::MO_ANROR: 1445 wavefront->ldsChunk->write<c0>(vaddr, 1446 wavefront->ldsChunk->read<c0>(vaddr) | (*e)); 1447 break; 1448 case Enums::MO_AXOR: 1449 case Enums::MO_ANRXOR: 1450 wavefront->ldsChunk->write<c0>(vaddr, 1451 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e)); 1452 break; 1453 case Enums::MO_AINC: 1454 case Enums::MO_ANRINC: 1455 wavefront->ldsChunk->write<c0>(vaddr, 1456 wavefront->ldsChunk->read<c0>(vaddr) + 1); 1457 break; 1458 case Enums::MO_ADEC: 1459 case Enums::MO_ANRDEC: 1460 wavefront->ldsChunk->write<c0>(vaddr, 1461 wavefront->ldsChunk->read<c0>(vaddr) - 1); 1462 break; 1463 case Enums::MO_AEXCH: 1464 case Enums::MO_ANREXCH: 1465 wavefront->ldsChunk->write<c0>(vaddr, (*e)); 1466 break; 1467 case Enums::MO_ACAS: 1468 case Enums::MO_ANRCAS: 1469 wavefront->ldsChunk->write<c0>(vaddr, 1470 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ? 1471 (*f) : wavefront->ldsChunk->read<c0>(vaddr)); 1472 break; 1473 default: 1474 fatal("Unrecognized or invalid HSAIL atomic op " 1475 "type.\n"); 1476 break; 1477 } 1478 } else { 1479 Request *req = 1480 new Request(0, vaddr, sizeof(c0), 0, 1481 gpuDynInst->computeUnit()->masterId(), 1482 0, gpuDynInst->wfDynId, 1483 gpuDynInst->makeAtomicOpFunctor<c0>(e, 1484 f, this->opType)); 1485 1486 gpuDynInst->setRequestFlags(req); 1487 PacketPtr pkt = new Packet(req, MemCmd::SwapReq); 1488 pkt->dataStatic(d); 1489 1490 if (gpuDynInst->computeUnit()->shader-> 1491 separate_acquire_release && 1492 (gpuDynInst->memoryOrder == 1493 Enums::MEMORY_ORDER_SC_ACQUIRE)) { 1494 // if this atomic has acquire semantics, 1495 // schedule the continuation to perform an 1496 // acquire after the RMW completes 1497 gpuDynInst->execContinuation = 1498 &GPUStaticInst::execAtomicAcq; 1499 1500 gpuDynInst->useContinuation = true; 1501 } else { 1502 // the request will be finished when the RMW completes 1503 gpuDynInst->useContinuation = false; 1504 } 1505 // translation is performed in sendRequest() 1506 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i, 1507 pkt); 1508 } 1509 } 1510 1511 ++d; 1512 ++e; 1513 ++f; 1514 } 1515 1516 gpuDynInst->updateStats(); 1517 } 1518 1519 // execAtomicACq will always be called through a continuation. 1520 // see comment for execContinuation in gpu_dyn_inst.hh 1521 void 1522 execAtomicAcq(GPUDynInstPtr gpuDynInst) override 1523 { 1524 // after performing the RMW, check to see if this instruction 1525 // has acquire semantics, and if so, issue an acquire 1526 if (!isLocalMem()) { 1527 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 1528 && gpuDynInst->memoryOrder == 1529 Enums::MEMORY_ORDER_SC_ACQUIRE) { 1530 gpuDynInst->statusBitVector = VectorMask(1); 1531 1532 // the request will be finished when 1533 // the acquire completes 1534 gpuDynInst->useContinuation = false; 1535 // create request 1536 Request *req = new Request(0, 0, 0, 0, 1537 gpuDynInst->computeUnit()->masterId(), 1538 0, gpuDynInst->wfDynId); 1539 req->setFlags(Request::ACQUIRE); 1540 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 1541 } 1542 } 1543 } 1544 }; 1545 1546 template<typename DataType, typename AddrOperandType, int NumSrcOperands> 1547 GPUStaticInst* 1548 constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) 1549 { 1550 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1551 1552 if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) { 1553 return decodeLd<DataType>(ib, obj); 1554 } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) { 1555 switch (ib->type) { 1556 case Brig::BRIG_TYPE_B8: 1557 return decodeSt<S8,S8>(ib, obj); 1558 case Brig::BRIG_TYPE_B16: 1559 return decodeSt<S16,S16>(ib, obj); 1560 case Brig::BRIG_TYPE_B32: 1561 return decodeSt<S32,S32>(ib, obj); 1562 case Brig::BRIG_TYPE_B64: 1563 return decodeSt<S64,S64>(ib, obj); 1564 default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type); 1565 } 1566 } else { 1567 if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) 1568 return new AtomicInst<DataType, AddrOperandType, 1569 NumSrcOperands, false>(ib, obj, "atomicnoret"); 1570 else 1571 return new AtomicInst<DataType, AddrOperandType, 1572 NumSrcOperands, true>(ib, obj, "atomic"); 1573 } 1574 } 1575 1576 template<typename DataType, int NumSrcOperands> 1577 GPUStaticInst* 1578 decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj) 1579 { 1580 unsigned addrIndex = (Brig::BrigOpcode)ib->opcode == 1581 Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1; 1582 1583 unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex); 1584 1585 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 1586 1587 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 1588 return constructAtomic<DataType, NoRegAddrOperand, 1589 NumSrcOperands>(ib, obj); 1590 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 1591 // V2/V4 not allowed 1592 switch (tmp.regKind) { 1593 case Brig::BRIG_REGISTER_KIND_SINGLE: 1594 return constructAtomic<DataType, SRegAddrOperand, 1595 NumSrcOperands>(ib, obj); 1596 case Brig::BRIG_REGISTER_KIND_DOUBLE: 1597 return constructAtomic<DataType, DRegAddrOperand, 1598 NumSrcOperands>(ib, obj); 1599 default: 1600 fatal("Bad atomic register operand type %d\n", tmp.type); 1601 } 1602 } else { 1603 fatal("Bad atomic register operand kind %d\n", tmp.kind); 1604 } 1605 } 1606 1607 1608 template<typename DataType> 1609 GPUStaticInst* 1610 decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) 1611 { 1612 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1613 1614 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { 1615 return decodeAtomicHelper<DataType, 2>(ib, obj); 1616 } else { 1617 return decodeAtomicHelper<DataType, 1>(ib, obj); 1618 } 1619 } 1620 1621 template<typename DataType> 1622 GPUStaticInst* 1623 decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj) 1624 { 1625 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1626 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { 1627 return decodeAtomicHelper<DataType, 2>(ib, obj); 1628 } else { 1629 return decodeAtomicHelper<DataType, 1>(ib, obj); 1630 } 1631 } 1632} // namespace HsailISA 1633 1634#endif // __ARCH_HSAIL_INSTS_MEM_HH__ 1635