mem.hh revision 11347
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#ifndef __ARCH_HSAIL_INSTS_MEM_HH__ 37#define __ARCH_HSAIL_INSTS_MEM_HH__ 38 39#include "arch/hsail/insts/decl.hh" 40#include "arch/hsail/insts/gpu_static_inst.hh" 41#include "arch/hsail/operand.hh" 42 43namespace HsailISA 44{ 45 class MemInst 46 { 47 public: 48 MemInst() : size(0), addr_operand(nullptr) { } 49 50 MemInst(Enums::MemType m_type) 51 { 52 if (m_type == Enums::M_U64 || 53 m_type == Enums::M_S64 || 54 m_type == Enums::M_F64) { 55 size = 8; 56 } else if (m_type == Enums::M_U32 || 57 m_type == Enums::M_S32 || 58 m_type == Enums::M_F32) { 59 size = 4; 60 } else if (m_type == Enums::M_U16 || 61 m_type == Enums::M_S16 || 62 m_type == Enums::M_F16) { 63 size = 2; 64 } else { 65 size = 1; 66 } 67 68 addr_operand = nullptr; 69 } 70 71 void 72 init_addr(AddrOperandBase *_addr_operand) 73 { 74 addr_operand = _addr_operand; 75 } 76 77 private: 78 int size; 79 AddrOperandBase *addr_operand; 80 81 public: 82 int getMemOperandSize() { return size; } 83 AddrOperandBase *getAddressOperand() { return addr_operand; } 84 }; 85 86 template<typename DestOperandType, typename AddrOperandType> 87 class LdaInstBase : public HsailGPUStaticInst 88 { 89 public: 90 typename DestOperandType::DestOperand dest; 91 AddrOperandType addr; 92 93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 94 const char *_opcode) 95 : HsailGPUStaticInst(obj, _opcode) 96 { 97 using namespace Brig; 98 99 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 100 dest.init(op_offs, obj); 101 op_offs = obj->getOperandPtr(ib->operands, 1); 102 addr.init(op_offs, obj); 103 } 104 105 int numSrcRegOperands() override 106 { return(this->addr.isVectorRegister()); } 107 int numDstRegOperands() override 108 { return dest.isVectorRegister(); } 109 bool isVectorRegister(int operandIndex) override 110 { 111 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 112 return((operandIndex == 0) ? dest.isVectorRegister() : 113 this->addr.isVectorRegister()); 114 } 115 bool isCondRegister(int operandIndex) override 116 { 117 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 118 return((operandIndex == 0) ? dest.isCondRegister() : 119 this->addr.isCondRegister()); 120 } 121 bool isScalarRegister(int operandIndex) override 122 { 123 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 124 return((operandIndex == 0) ? dest.isScalarRegister() : 125 this->addr.isScalarRegister()); 126 } 127 bool isSrcOperand(int operandIndex) override 128 { 129 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 130 if (operandIndex > 0) 131 return(this->addr.isVectorRegister()); 132 return false; 133 } 134 bool isDstOperand(int operandIndex) override { 135 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 136 return(operandIndex == 0); 137 } 138 int getOperandSize(int operandIndex) override 139 { 140 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 141 return((operandIndex == 0) ? dest.opSize() : 142 this->addr.opSize()); 143 } 144 int getRegisterIndex(int operandIndex) override 145 { 146 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 147 return((operandIndex == 0) ? dest.regIndex() : 148 this->addr.regIndex()); 149 } 150 int getNumOperands() override 151 { 152 if (this->addr.isVectorRegister()) 153 return 2; 154 return 1; 155 } 156 }; 157 158 template<typename DestDataType, typename AddrOperandType> 159 class LdaInst : 160 public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>, 161 public MemInst 162 { 163 public: 164 void generateDisassembly(); 165 166 LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 167 const char *_opcode) 168 : LdaInstBase<typename DestDataType::OperandType, 169 AddrOperandType>(ib, obj, _opcode) 170 { 171 init_addr(&this->addr); 172 } 173 174 void execute(GPUDynInstPtr gpuDynInst); 175 }; 176 177 template<typename DataType> 178 GPUStaticInst* 179 decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj) 180 { 181 unsigned op_offs = obj->getOperandPtr(ib->operands, 1); 182 BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj); 183 184 if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 185 return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas"); 186 } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 187 // V2/V4 not allowed 188 switch (regDataType.regKind) { 189 case Brig::BRIG_REGISTER_KIND_SINGLE: 190 return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas"); 191 case Brig::BRIG_REGISTER_KIND_DOUBLE: 192 return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas"); 193 default: 194 fatal("Bad ldas register operand type %d\n", regDataType.type); 195 } 196 } else { 197 fatal("Bad ldas register operand kind %d\n", regDataType.kind); 198 } 199 } 200 201 template<typename MemOperandType, typename DestOperandType, 202 typename AddrOperandType> 203 class LdInstBase : public HsailGPUStaticInst 204 { 205 public: 206 Brig::BrigWidth8_t width; 207 typename DestOperandType::DestOperand dest; 208 AddrOperandType addr; 209 210 Brig::BrigSegment segment; 211 Brig::BrigMemoryOrder memoryOrder; 212 Brig::BrigMemoryScope memoryScope; 213 unsigned int equivClass; 214 bool isArgLoad() 215 { 216 return segment == Brig::BRIG_SEGMENT_KERNARG || 217 segment == Brig::BRIG_SEGMENT_ARG; 218 } 219 void 220 initLd(const Brig::BrigInstBase *ib, const BrigObject *obj, 221 const char *_opcode) 222 { 223 using namespace Brig; 224 225 const BrigInstMem *ldst = (const BrigInstMem*)ib; 226 227 segment = (BrigSegment)ldst->segment; 228 memoryOrder = BRIG_MEMORY_ORDER_NONE; 229 memoryScope = BRIG_MEMORY_SCOPE_NONE; 230 equivClass = ldst->equivClass; 231 232 switch (segment) { 233 case BRIG_SEGMENT_GLOBAL: 234 o_type = Enums::OT_GLOBAL_READ; 235 break; 236 237 case BRIG_SEGMENT_GROUP: 238 o_type = Enums::OT_SHARED_READ; 239 break; 240 241 case BRIG_SEGMENT_PRIVATE: 242 o_type = Enums::OT_PRIVATE_READ; 243 break; 244 245 case BRIG_SEGMENT_READONLY: 246 o_type = Enums::OT_READONLY_READ; 247 break; 248 249 case BRIG_SEGMENT_SPILL: 250 o_type = Enums::OT_SPILL_READ; 251 break; 252 253 case BRIG_SEGMENT_FLAT: 254 o_type = Enums::OT_FLAT_READ; 255 break; 256 257 case BRIG_SEGMENT_KERNARG: 258 o_type = Enums::OT_KERN_READ; 259 break; 260 261 case BRIG_SEGMENT_ARG: 262 o_type = Enums::OT_ARG; 263 break; 264 265 default: 266 panic("Ld: segment %d not supported\n", segment); 267 } 268 269 width = ldst->width; 270 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 271 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 272 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 273 dest.init(op_offs, obj); 274 275 op_offs = obj->getOperandPtr(ib->operands, 1); 276 addr.init(op_offs, obj); 277 } 278 279 void 280 initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj, 281 const char *_opcode) 282 { 283 using namespace Brig; 284 285 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 286 287 segment = (BrigSegment)at->segment; 288 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 289 memoryScope = (BrigMemoryScope)at->memoryScope; 290 equivClass = 0; 291 292 switch (segment) { 293 case BRIG_SEGMENT_GLOBAL: 294 o_type = Enums::OT_GLOBAL_READ; 295 break; 296 297 case BRIG_SEGMENT_GROUP: 298 o_type = Enums::OT_SHARED_READ; 299 break; 300 301 case BRIG_SEGMENT_PRIVATE: 302 o_type = Enums::OT_PRIVATE_READ; 303 break; 304 305 case BRIG_SEGMENT_READONLY: 306 o_type = Enums::OT_READONLY_READ; 307 break; 308 309 case BRIG_SEGMENT_SPILL: 310 o_type = Enums::OT_SPILL_READ; 311 break; 312 313 case BRIG_SEGMENT_FLAT: 314 o_type = Enums::OT_FLAT_READ; 315 break; 316 317 case BRIG_SEGMENT_KERNARG: 318 o_type = Enums::OT_KERN_READ; 319 break; 320 321 case BRIG_SEGMENT_ARG: 322 o_type = Enums::OT_ARG; 323 break; 324 325 default: 326 panic("Ld: segment %d not supported\n", segment); 327 } 328 329 width = BRIG_WIDTH_1; 330 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 331 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 332 333 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 334 dest.init(op_offs, obj); 335 336 op_offs = obj->getOperandPtr(ib->operands,1); 337 addr.init(op_offs, obj); 338 } 339 340 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 341 const char *_opcode) 342 : HsailGPUStaticInst(obj, _opcode) 343 { 344 using namespace Brig; 345 346 if (ib->opcode == BRIG_OPCODE_LD) { 347 initLd(ib, obj, _opcode); 348 } else { 349 initAtomicLd(ib, obj, _opcode); 350 } 351 } 352 353 int numSrcRegOperands() override 354 { return(this->addr.isVectorRegister()); } 355 int numDstRegOperands() override { return dest.isVectorRegister(); } 356 int getNumOperands() override 357 { 358 if (this->addr.isVectorRegister()) 359 return 2; 360 else 361 return 1; 362 } 363 bool isVectorRegister(int operandIndex) override 364 { 365 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 366 return((operandIndex == 0) ? dest.isVectorRegister() : 367 this->addr.isVectorRegister()); 368 } 369 bool isCondRegister(int operandIndex) override 370 { 371 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 372 return((operandIndex == 0) ? dest.isCondRegister() : 373 this->addr.isCondRegister()); 374 } 375 bool isScalarRegister(int operandIndex) override 376 { 377 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 378 return((operandIndex == 0) ? dest.isScalarRegister() : 379 this->addr.isScalarRegister()); 380 } 381 bool isSrcOperand(int operandIndex) override 382 { 383 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 384 if (operandIndex > 0) 385 return(this->addr.isVectorRegister()); 386 return false; 387 } 388 bool isDstOperand(int operandIndex) override 389 { 390 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 391 return(operandIndex == 0); 392 } 393 int getOperandSize(int operandIndex) override 394 { 395 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 396 return((operandIndex == 0) ? dest.opSize() : 397 this->addr.opSize()); 398 } 399 int getRegisterIndex(int operandIndex) override 400 { 401 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 402 return((operandIndex == 0) ? dest.regIndex() : 403 this->addr.regIndex()); 404 } 405 }; 406 407 template<typename MemDataType, typename DestDataType, 408 typename AddrOperandType> 409 class LdInst : 410 public LdInstBase<typename MemDataType::CType, 411 typename DestDataType::OperandType, AddrOperandType>, 412 public MemInst 413 { 414 typename DestDataType::OperandType::DestOperand dest_vect[4]; 415 uint16_t num_dest_operands; 416 void generateDisassembly() override; 417 418 public: 419 LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 420 const char *_opcode) 421 : LdInstBase<typename MemDataType::CType, 422 typename DestDataType::OperandType, 423 AddrOperandType>(ib, obj, _opcode), 424 MemInst(MemDataType::memType) 425 { 426 init_addr(&this->addr); 427 428 unsigned op_offs = obj->getOperandPtr(ib->operands,0); 429 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 430 431 if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 432 const Brig::BrigOperandOperandList *brigRegVecOp = 433 (const Brig::BrigOperandOperandList*)brigOp; 434 435 num_dest_operands = 436 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; 437 438 assert(num_dest_operands <= 4); 439 } else { 440 num_dest_operands = 1; 441 } 442 443 if (num_dest_operands > 1) { 444 assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 445 446 for (int i = 0; i < num_dest_operands; ++i) { 447 dest_vect[i].init_from_vect(op_offs, obj, i); 448 } 449 } 450 } 451 452 void 453 initiateAcc(GPUDynInstPtr gpuDynInst) override 454 { 455 typedef typename MemDataType::CType c0; 456 457 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 458 459 if (num_dest_operands > 1) { 460 for (int i = 0; i < VSZ; ++i) 461 if (gpuDynInst->exec_mask[i]) 462 gpuDynInst->statusVector.push_back(num_dest_operands); 463 else 464 gpuDynInst->statusVector.push_back(0); 465 } 466 467 for (int k = 0; k < num_dest_operands; ++k) { 468 469 c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ]; 470 471 for (int i = 0; i < VSZ; ++i) { 472 if (gpuDynInst->exec_mask[i]) { 473 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 474 475 if (isLocalMem()) { 476 // load from shared memory 477 *d = gpuDynInst->wavefront()->ldsChunk-> 478 read<c0>(vaddr); 479 } else { 480 Request *req = new Request(0, vaddr, sizeof(c0), 0, 481 gpuDynInst->computeUnit()->masterId(), 482 0, gpuDynInst->wfDynId, i); 483 484 gpuDynInst->setRequestFlags(req); 485 PacketPtr pkt = new Packet(req, MemCmd::ReadReq); 486 pkt->dataStatic(d); 487 488 if (gpuDynInst->computeUnit()->shader-> 489 separate_acquire_release && 490 gpuDynInst->memoryOrder == 491 Enums::MEMORY_ORDER_SC_ACQUIRE) { 492 // if this load has acquire semantics, 493 // set the response continuation function 494 // to perform an Acquire request 495 gpuDynInst->execContinuation = 496 &GPUStaticInst::execLdAcq; 497 498 gpuDynInst->useContinuation = true; 499 } else { 500 // the request will be finished when 501 // the load completes 502 gpuDynInst->useContinuation = false; 503 } 504 // translation is performed in sendRequest() 505 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, 506 i, pkt); 507 } 508 } 509 ++d; 510 } 511 } 512 513 gpuDynInst->updateStats(); 514 } 515 516 private: 517 void 518 execLdAcq(GPUDynInstPtr gpuDynInst) override 519 { 520 // after the load has complete and if the load has acquire 521 // semantics, issue an acquire request. 522 if (!isLocalMem()) { 523 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 524 && gpuDynInst->memoryOrder == 525 Enums::MEMORY_ORDER_SC_ACQUIRE) { 526 gpuDynInst->statusBitVector = VectorMask(1); 527 gpuDynInst->useContinuation = false; 528 // create request 529 Request *req = new Request(0, 0, 0, 0, 530 gpuDynInst->computeUnit()->masterId(), 531 0, gpuDynInst->wfDynId, -1); 532 req->setFlags(Request::ACQUIRE); 533 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 534 } 535 } 536 } 537 538 public: 539 bool 540 isLocalMem() const override 541 { 542 return this->segment == Brig::BRIG_SEGMENT_GROUP; 543 } 544 545 bool isVectorRegister(int operandIndex) override 546 { 547 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 548 if ((num_dest_operands != getNumOperands()) && 549 (operandIndex == (getNumOperands()-1))) 550 return(this->addr.isVectorRegister()); 551 if (num_dest_operands > 1) { 552 return dest_vect[operandIndex].isVectorRegister(); 553 } 554 else if (num_dest_operands == 1) { 555 return LdInstBase<typename MemDataType::CType, 556 typename DestDataType::OperandType, 557 AddrOperandType>::dest.isVectorRegister(); 558 } 559 return false; 560 } 561 bool isCondRegister(int operandIndex) override 562 { 563 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 564 if ((num_dest_operands != getNumOperands()) && 565 (operandIndex == (getNumOperands()-1))) 566 return(this->addr.isCondRegister()); 567 if (num_dest_operands > 1) 568 return dest_vect[operandIndex].isCondRegister(); 569 else if (num_dest_operands == 1) 570 return LdInstBase<typename MemDataType::CType, 571 typename DestDataType::OperandType, 572 AddrOperandType>::dest.isCondRegister(); 573 return false; 574 } 575 bool isScalarRegister(int operandIndex) override 576 { 577 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 578 if ((num_dest_operands != getNumOperands()) && 579 (operandIndex == (getNumOperands()-1))) 580 return(this->addr.isScalarRegister()); 581 if (num_dest_operands > 1) 582 return dest_vect[operandIndex].isScalarRegister(); 583 else if (num_dest_operands == 1) 584 return LdInstBase<typename MemDataType::CType, 585 typename DestDataType::OperandType, 586 AddrOperandType>::dest.isScalarRegister(); 587 return false; 588 } 589 bool isSrcOperand(int operandIndex) override 590 { 591 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 592 if ((num_dest_operands != getNumOperands()) && 593 (operandIndex == (getNumOperands()-1))) 594 return(this->addr.isVectorRegister()); 595 return false; 596 } 597 bool isDstOperand(int operandIndex) override 598 { 599 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 600 if ((num_dest_operands != getNumOperands()) && 601 (operandIndex == (getNumOperands()-1))) 602 return false; 603 return true; 604 } 605 int getOperandSize(int operandIndex) override 606 { 607 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 608 if ((num_dest_operands != getNumOperands()) && 609 (operandIndex == (getNumOperands()-1))) 610 return(this->addr.opSize()); 611 if (num_dest_operands > 1) 612 return(dest_vect[operandIndex].opSize()); 613 else if (num_dest_operands == 1) 614 return(LdInstBase<typename MemDataType::CType, 615 typename DestDataType::OperandType, 616 AddrOperandType>::dest.opSize()); 617 return 0; 618 } 619 int getRegisterIndex(int operandIndex) override 620 { 621 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 622 if ((num_dest_operands != getNumOperands()) && 623 (operandIndex == (getNumOperands()-1))) 624 return(this->addr.regIndex()); 625 if (num_dest_operands > 1) 626 return(dest_vect[operandIndex].regIndex()); 627 else if (num_dest_operands == 1) 628 return(LdInstBase<typename MemDataType::CType, 629 typename DestDataType::OperandType, 630 AddrOperandType>::dest.regIndex()); 631 return -1; 632 } 633 int getNumOperands() override 634 { 635 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 636 return(num_dest_operands+1); 637 else 638 return(num_dest_operands); 639 } 640 void execute(GPUDynInstPtr gpuDynInst) override; 641 }; 642 643 template<typename MemDT, typename DestDT> 644 GPUStaticInst* 645 decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj) 646 { 647 unsigned op_offs = obj->getOperandPtr(ib->operands,1); 648 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 649 650 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 651 return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld"); 652 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER || 653 tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 654 switch (tmp.regKind) { 655 case Brig::BRIG_REGISTER_KIND_SINGLE: 656 return new LdInst<MemDT, DestDT, 657 SRegAddrOperand>(ib, obj, "ld"); 658 case Brig::BRIG_REGISTER_KIND_DOUBLE: 659 return new LdInst<MemDT, DestDT, 660 DRegAddrOperand>(ib, obj, "ld"); 661 default: 662 fatal("Bad ld register operand type %d\n", tmp.regKind); 663 } 664 } else { 665 fatal("Bad ld register operand kind %d\n", tmp.kind); 666 } 667 } 668 669 template<typename MemDT> 670 GPUStaticInst* 671 decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj) 672 { 673 unsigned op_offs = obj->getOperandPtr(ib->operands,0); 674 BrigRegOperandInfo dest = findRegDataType(op_offs, obj); 675 676 assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER || 677 dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 678 switch(dest.regKind) { 679 case Brig::BRIG_REGISTER_KIND_SINGLE: 680 switch (ib->type) { 681 case Brig::BRIG_TYPE_B8: 682 case Brig::BRIG_TYPE_B16: 683 case Brig::BRIG_TYPE_B32: 684 return decodeLd2<MemDT, B32>(ib, obj); 685 case Brig::BRIG_TYPE_U8: 686 case Brig::BRIG_TYPE_U16: 687 case Brig::BRIG_TYPE_U32: 688 return decodeLd2<MemDT, U32>(ib, obj); 689 case Brig::BRIG_TYPE_S8: 690 case Brig::BRIG_TYPE_S16: 691 case Brig::BRIG_TYPE_S32: 692 return decodeLd2<MemDT, S32>(ib, obj); 693 case Brig::BRIG_TYPE_F16: 694 case Brig::BRIG_TYPE_F32: 695 return decodeLd2<MemDT, U32>(ib, obj); 696 default: 697 fatal("Bad ld register operand type %d, %d\n", 698 dest.regKind, ib->type); 699 }; 700 case Brig::BRIG_REGISTER_KIND_DOUBLE: 701 switch (ib->type) { 702 case Brig::BRIG_TYPE_B64: 703 return decodeLd2<MemDT, B64>(ib, obj); 704 case Brig::BRIG_TYPE_U64: 705 return decodeLd2<MemDT, U64>(ib, obj); 706 case Brig::BRIG_TYPE_S64: 707 return decodeLd2<MemDT, S64>(ib, obj); 708 case Brig::BRIG_TYPE_F64: 709 return decodeLd2<MemDT, U64>(ib, obj); 710 default: 711 fatal("Bad ld register operand type %d, %d\n", 712 dest.regKind, ib->type); 713 }; 714 default: 715 fatal("Bad ld register operand type %d, %d\n", dest.regKind, 716 ib->type); 717 } 718 } 719 720 template<typename MemDataType, typename SrcOperandType, 721 typename AddrOperandType> 722 class StInstBase : public HsailGPUStaticInst 723 { 724 public: 725 typename SrcOperandType::SrcOperand src; 726 AddrOperandType addr; 727 728 Brig::BrigSegment segment; 729 Brig::BrigMemoryScope memoryScope; 730 Brig::BrigMemoryOrder memoryOrder; 731 unsigned int equivClass; 732 733 void 734 initSt(const Brig::BrigInstBase *ib, const BrigObject *obj, 735 const char *_opcode) 736 { 737 using namespace Brig; 738 739 const BrigInstMem *ldst = (const BrigInstMem*)ib; 740 741 segment = (BrigSegment)ldst->segment; 742 memoryOrder = BRIG_MEMORY_ORDER_NONE; 743 memoryScope = BRIG_MEMORY_SCOPE_NONE; 744 equivClass = ldst->equivClass; 745 746 switch (segment) { 747 case BRIG_SEGMENT_GLOBAL: 748 o_type = Enums::OT_GLOBAL_WRITE; 749 break; 750 751 case BRIG_SEGMENT_GROUP: 752 o_type = Enums::OT_SHARED_WRITE; 753 break; 754 755 case BRIG_SEGMENT_PRIVATE: 756 o_type = Enums::OT_PRIVATE_WRITE; 757 break; 758 759 case BRIG_SEGMENT_READONLY: 760 o_type = Enums::OT_READONLY_WRITE; 761 break; 762 763 case BRIG_SEGMENT_SPILL: 764 o_type = Enums::OT_SPILL_WRITE; 765 break; 766 767 case BRIG_SEGMENT_FLAT: 768 o_type = Enums::OT_FLAT_WRITE; 769 break; 770 771 case BRIG_SEGMENT_ARG: 772 o_type = Enums::OT_ARG; 773 break; 774 775 default: 776 panic("St: segment %d not supported\n", segment); 777 } 778 779 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 780 const BrigOperand *baseOp = obj->getOperand(op_offs); 781 782 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) || 783 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) { 784 src.init(op_offs, obj); 785 } 786 787 op_offs = obj->getOperandPtr(ib->operands, 1); 788 addr.init(op_offs, obj); 789 } 790 791 void 792 initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj, 793 const char *_opcode) 794 { 795 using namespace Brig; 796 797 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 798 799 segment = (BrigSegment)at->segment; 800 memoryScope = (BrigMemoryScope)at->memoryScope; 801 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 802 equivClass = 0; 803 804 switch (segment) { 805 case BRIG_SEGMENT_GLOBAL: 806 o_type = Enums::OT_GLOBAL_WRITE; 807 break; 808 809 case BRIG_SEGMENT_GROUP: 810 o_type = Enums::OT_SHARED_WRITE; 811 break; 812 813 case BRIG_SEGMENT_PRIVATE: 814 o_type = Enums::OT_PRIVATE_WRITE; 815 break; 816 817 case BRIG_SEGMENT_READONLY: 818 o_type = Enums::OT_READONLY_WRITE; 819 break; 820 821 case BRIG_SEGMENT_SPILL: 822 o_type = Enums::OT_SPILL_WRITE; 823 break; 824 825 case BRIG_SEGMENT_FLAT: 826 o_type = Enums::OT_FLAT_WRITE; 827 break; 828 829 case BRIG_SEGMENT_ARG: 830 o_type = Enums::OT_ARG; 831 break; 832 833 default: 834 panic("St: segment %d not supported\n", segment); 835 } 836 837 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 838 addr.init(op_offs, obj); 839 840 op_offs = obj->getOperandPtr(ib->operands, 1); 841 src.init(op_offs, obj); 842 } 843 844 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 845 const char *_opcode) 846 : HsailGPUStaticInst(obj, _opcode) 847 { 848 using namespace Brig; 849 850 if (ib->opcode == BRIG_OPCODE_ST) { 851 initSt(ib, obj, _opcode); 852 } else { 853 initAtomicSt(ib, obj, _opcode); 854 } 855 } 856 857 int numDstRegOperands() override { return 0; } 858 int numSrcRegOperands() override 859 { 860 return src.isVectorRegister() + this->addr.isVectorRegister(); 861 } 862 int getNumOperands() override 863 { 864 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 865 return 2; 866 else 867 return 1; 868 } 869 bool isVectorRegister(int operandIndex) override 870 { 871 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 872 return !operandIndex ? src.isVectorRegister() : 873 this->addr.isVectorRegister(); 874 } 875 bool isCondRegister(int operandIndex) override 876 { 877 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 878 return !operandIndex ? src.isCondRegister() : 879 this->addr.isCondRegister(); 880 } 881 bool isScalarRegister(int operandIndex) override 882 { 883 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 884 return !operandIndex ? src.isScalarRegister() : 885 this->addr.isScalarRegister(); 886 } 887 bool isSrcOperand(int operandIndex) override 888 { 889 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 890 return true; 891 } 892 bool isDstOperand(int operandIndex) override { return false; } 893 int getOperandSize(int operandIndex) override 894 { 895 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 896 return !operandIndex ? src.opSize() : this->addr.opSize(); 897 } 898 int getRegisterIndex(int operandIndex) override 899 { 900 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 901 return !operandIndex ? src.regIndex() : this->addr.regIndex(); 902 } 903 }; 904 905 906 template<typename MemDataType, typename SrcDataType, 907 typename AddrOperandType> 908 class StInst : 909 public StInstBase<MemDataType, typename SrcDataType::OperandType, 910 AddrOperandType>, 911 public MemInst 912 { 913 public: 914 typename SrcDataType::OperandType::SrcOperand src_vect[4]; 915 uint16_t num_src_operands; 916 void generateDisassembly() override; 917 918 StInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 919 const char *_opcode, int srcIdx) 920 : StInstBase<MemDataType, typename SrcDataType::OperandType, 921 AddrOperandType>(ib, obj, _opcode), 922 MemInst(SrcDataType::memType) 923 { 924 init_addr(&this->addr); 925 926 BrigRegOperandInfo rinfo; 927 unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx); 928 const Brig::BrigOperand *baseOp = obj->getOperand(op_offs); 929 930 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) { 931 const Brig::BrigOperandConstantBytes *op = 932 (Brig::BrigOperandConstantBytes*)baseOp; 933 934 rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind, 935 Brig::BRIG_TYPE_NONE); 936 } else { 937 rinfo = findRegDataType(op_offs, obj); 938 } 939 940 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 941 const Brig::BrigOperandOperandList *brigRegVecOp = 942 (const Brig::BrigOperandOperandList*)baseOp; 943 944 num_src_operands = 945 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; 946 947 assert(num_src_operands <= 4); 948 } else { 949 num_src_operands = 1; 950 } 951 952 if (num_src_operands > 1) { 953 assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 954 955 for (int i = 0; i < num_src_operands; ++i) { 956 src_vect[i].init_from_vect(op_offs, obj, i); 957 } 958 } 959 } 960 961 void 962 initiateAcc(GPUDynInstPtr gpuDynInst) override 963 { 964 // before performing a store, check if this store has 965 // release semantics, and if so issue a release first 966 if (!isLocalMem()) { 967 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 968 && gpuDynInst->memoryOrder == 969 Enums::MEMORY_ORDER_SC_RELEASE) { 970 971 gpuDynInst->statusBitVector = VectorMask(1); 972 gpuDynInst->execContinuation = &GPUStaticInst::execSt; 973 gpuDynInst->useContinuation = true; 974 // create request 975 Request *req = new Request(0, 0, 0, 0, 976 gpuDynInst->computeUnit()->masterId(), 977 0, gpuDynInst->wfDynId, -1); 978 req->setFlags(Request::RELEASE); 979 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 980 981 return; 982 } 983 } 984 985 // if there is no release semantic, perform stores immediately 986 execSt(gpuDynInst); 987 } 988 989 bool 990 isLocalMem() const override 991 { 992 return this->segment == Brig::BRIG_SEGMENT_GROUP; 993 } 994 995 private: 996 // execSt may be called through a continuation 997 // if the store had release semantics. see comment for 998 // execSt in gpu_static_inst.hh 999 void 1000 execSt(GPUDynInstPtr gpuDynInst) override 1001 { 1002 typedef typename MemDataType::CType c0; 1003 1004 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 1005 1006 if (num_src_operands > 1) { 1007 for (int i = 0; i < VSZ; ++i) 1008 if (gpuDynInst->exec_mask[i]) 1009 gpuDynInst->statusVector.push_back(num_src_operands); 1010 else 1011 gpuDynInst->statusVector.push_back(0); 1012 } 1013 1014 for (int k = 0; k < num_src_operands; ++k) { 1015 c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ]; 1016 1017 for (int i = 0; i < VSZ; ++i) { 1018 if (gpuDynInst->exec_mask[i]) { 1019 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 1020 1021 if (isLocalMem()) { 1022 //store to shared memory 1023 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr, 1024 *d); 1025 } else { 1026 Request *req = 1027 new Request(0, vaddr, sizeof(c0), 0, 1028 gpuDynInst->computeUnit()->masterId(), 1029 0, gpuDynInst->wfDynId, i); 1030 1031 gpuDynInst->setRequestFlags(req); 1032 PacketPtr pkt = new Packet(req, MemCmd::WriteReq); 1033 pkt->dataStatic<c0>(d); 1034 1035 // translation is performed in sendRequest() 1036 // the request will be finished when the store completes 1037 gpuDynInst->useContinuation = false; 1038 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, 1039 i, pkt); 1040 1041 } 1042 } 1043 ++d; 1044 } 1045 } 1046 1047 gpuDynInst->updateStats(); 1048 } 1049 1050 public: 1051 bool isVectorRegister(int operandIndex) override 1052 { 1053 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1054 if (operandIndex == num_src_operands) 1055 return this->addr.isVectorRegister(); 1056 if (num_src_operands > 1) 1057 return src_vect[operandIndex].isVectorRegister(); 1058 else if (num_src_operands == 1) 1059 return StInstBase<MemDataType, 1060 typename SrcDataType::OperandType, 1061 AddrOperandType>::src.isVectorRegister(); 1062 return false; 1063 } 1064 bool isCondRegister(int operandIndex) override 1065 { 1066 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1067 if (operandIndex == num_src_operands) 1068 return this->addr.isCondRegister(); 1069 if (num_src_operands > 1) 1070 return src_vect[operandIndex].isCondRegister(); 1071 else if (num_src_operands == 1) 1072 return StInstBase<MemDataType, 1073 typename SrcDataType::OperandType, 1074 AddrOperandType>::src.isCondRegister(); 1075 return false; 1076 } 1077 bool isScalarRegister(int operandIndex) override 1078 { 1079 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1080 if (operandIndex == num_src_operands) 1081 return this->addr.isScalarRegister(); 1082 if (num_src_operands > 1) 1083 return src_vect[operandIndex].isScalarRegister(); 1084 else if (num_src_operands == 1) 1085 return StInstBase<MemDataType, 1086 typename SrcDataType::OperandType, 1087 AddrOperandType>::src.isScalarRegister(); 1088 return false; 1089 } 1090 bool isSrcOperand(int operandIndex) override 1091 { 1092 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1093 return true; 1094 } 1095 bool isDstOperand(int operandIndex) override { return false; } 1096 int getOperandSize(int operandIndex) override 1097 { 1098 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1099 if (operandIndex == num_src_operands) 1100 return this->addr.opSize(); 1101 if (num_src_operands > 1) 1102 return src_vect[operandIndex].opSize(); 1103 else if (num_src_operands == 1) 1104 return StInstBase<MemDataType, 1105 typename SrcDataType::OperandType, 1106 AddrOperandType>::src.opSize(); 1107 return 0; 1108 } 1109 int getRegisterIndex(int operandIndex) override 1110 { 1111 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1112 if (operandIndex == num_src_operands) 1113 return this->addr.regIndex(); 1114 if (num_src_operands > 1) 1115 return src_vect[operandIndex].regIndex(); 1116 else if (num_src_operands == 1) 1117 return StInstBase<MemDataType, 1118 typename SrcDataType::OperandType, 1119 AddrOperandType>::src.regIndex(); 1120 return -1; 1121 } 1122 int getNumOperands() override 1123 { 1124 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 1125 return num_src_operands + 1; 1126 else 1127 return num_src_operands; 1128 } 1129 void execute(GPUDynInstPtr gpuDynInst) override; 1130 }; 1131 1132 template<typename DataType, typename SrcDataType> 1133 GPUStaticInst* 1134 decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj) 1135 { 1136 int srcIdx = 0; 1137 int destIdx = 1; 1138 if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC || 1139 ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) { 1140 srcIdx = 1; 1141 destIdx = 0; 1142 } 1143 unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx); 1144 1145 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 1146 1147 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 1148 return new StInst<DataType, SrcDataType, 1149 NoRegAddrOperand>(ib, obj, "st", srcIdx); 1150 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 1151 // V2/V4 not allowed 1152 switch (tmp.regKind) { 1153 case Brig::BRIG_REGISTER_KIND_SINGLE: 1154 return new StInst<DataType, SrcDataType, 1155 SRegAddrOperand>(ib, obj, "st", srcIdx); 1156 case Brig::BRIG_REGISTER_KIND_DOUBLE: 1157 return new StInst<DataType, SrcDataType, 1158 DRegAddrOperand>(ib, obj, "st", srcIdx); 1159 default: 1160 fatal("Bad st register operand type %d\n", tmp.type); 1161 } 1162 } else { 1163 fatal("Bad st register operand kind %d\n", tmp.kind); 1164 } 1165 } 1166 1167 Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode, 1168 Brig::BrigAtomicOperation brigOp); 1169 1170 template<typename OperandType, typename AddrOperandType, int NumSrcOperands, 1171 bool HasDst> 1172 class AtomicInstBase : public HsailGPUStaticInst 1173 { 1174 public: 1175 typename OperandType::DestOperand dest; 1176 typename OperandType::SrcOperand src[NumSrcOperands]; 1177 AddrOperandType addr; 1178 1179 Brig::BrigSegment segment; 1180 Brig::BrigMemoryOrder memoryOrder; 1181 Brig::BrigAtomicOperation atomicOperation; 1182 Brig::BrigMemoryScope memoryScope; 1183 Brig::BrigOpcode opcode; 1184 Enums::MemOpType opType; 1185 1186 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 1187 const char *_opcode) 1188 : HsailGPUStaticInst(obj, _opcode) 1189 { 1190 using namespace Brig; 1191 1192 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 1193 1194 segment = (BrigSegment)at->segment; 1195 memoryScope = (BrigMemoryScope)at->memoryScope; 1196 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 1197 atomicOperation = (BrigAtomicOperation)at->atomicOperation; 1198 opcode = (BrigOpcode)ib->opcode; 1199 opType = brigAtomicToMemOpType(opcode, atomicOperation); 1200 1201 switch (segment) { 1202 case BRIG_SEGMENT_GLOBAL: 1203 o_type = Enums::OT_GLOBAL_ATOMIC; 1204 break; 1205 1206 case BRIG_SEGMENT_GROUP: 1207 o_type = Enums::OT_SHARED_ATOMIC; 1208 break; 1209 1210 case BRIG_SEGMENT_FLAT: 1211 o_type = Enums::OT_FLAT_ATOMIC; 1212 break; 1213 1214 default: 1215 panic("Atomic: segment %d not supported\n", segment); 1216 } 1217 1218 if (HasDst) { 1219 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1220 dest.init(op_offs, obj); 1221 1222 op_offs = obj->getOperandPtr(ib->operands, 1); 1223 addr.init(op_offs, obj); 1224 1225 for (int i = 0; i < NumSrcOperands; ++i) { 1226 op_offs = obj->getOperandPtr(ib->operands, i + 2); 1227 src[i].init(op_offs, obj); 1228 } 1229 } else { 1230 1231 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1232 addr.init(op_offs, obj); 1233 1234 for (int i = 0; i < NumSrcOperands; ++i) { 1235 op_offs = obj->getOperandPtr(ib->operands, i + 1); 1236 src[i].init(op_offs, obj); 1237 } 1238 } 1239 } 1240 1241 int numSrcRegOperands() 1242 { 1243 int operands = 0; 1244 for (int i = 0; i < NumSrcOperands; i++) { 1245 if (src[i].isVectorRegister()) { 1246 operands++; 1247 } 1248 } 1249 if (addr.isVectorRegister()) 1250 operands++; 1251 return operands; 1252 } 1253 int numDstRegOperands() { return dest.isVectorRegister(); } 1254 int getNumOperands() 1255 { 1256 if (addr.isVectorRegister()) 1257 return(NumSrcOperands + 2); 1258 return(NumSrcOperands + 1); 1259 } 1260 bool isVectorRegister(int operandIndex) 1261 { 1262 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1263 if (operandIndex < NumSrcOperands) 1264 return src[operandIndex].isVectorRegister(); 1265 else if (operandIndex == NumSrcOperands) 1266 return(addr.isVectorRegister()); 1267 else 1268 return dest.isVectorRegister(); 1269 } 1270 bool isCondRegister(int operandIndex) 1271 { 1272 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1273 if (operandIndex < NumSrcOperands) 1274 return src[operandIndex].isCondRegister(); 1275 else if (operandIndex == NumSrcOperands) 1276 return(addr.isCondRegister()); 1277 else 1278 return dest.isCondRegister(); 1279 } 1280 bool isScalarRegister(int operandIndex) 1281 { 1282 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1283 if (operandIndex < NumSrcOperands) 1284 return src[operandIndex].isScalarRegister(); 1285 else if (operandIndex == NumSrcOperands) 1286 return(addr.isScalarRegister()); 1287 else 1288 return dest.isScalarRegister(); 1289 } 1290 bool isSrcOperand(int operandIndex) 1291 { 1292 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1293 if (operandIndex < NumSrcOperands) 1294 return true; 1295 else if (operandIndex == NumSrcOperands) 1296 return(addr.isVectorRegister()); 1297 else 1298 return false; 1299 } 1300 bool isDstOperand(int operandIndex) 1301 { 1302 if (operandIndex <= NumSrcOperands) 1303 return false; 1304 else 1305 return true; 1306 } 1307 int getOperandSize(int operandIndex) 1308 { 1309 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1310 if (operandIndex < NumSrcOperands) 1311 return(src[operandIndex].opSize()); 1312 else if (operandIndex == NumSrcOperands) 1313 return(addr.opSize()); 1314 else 1315 return(dest.opSize()); 1316 } 1317 int getRegisterIndex(int operandIndex) 1318 { 1319 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1320 if (operandIndex < NumSrcOperands) 1321 return(src[operandIndex].regIndex()); 1322 else if (operandIndex == NumSrcOperands) 1323 return(addr.regIndex()); 1324 else 1325 return(dest.regIndex()); 1326 return -1; 1327 } 1328 }; 1329 1330 template<typename MemDataType, typename AddrOperandType, int NumSrcOperands, 1331 bool HasDst> 1332 class AtomicInst : 1333 public AtomicInstBase<typename MemDataType::OperandType, 1334 AddrOperandType, NumSrcOperands, HasDst>, 1335 public MemInst 1336 { 1337 public: 1338 void generateDisassembly() override; 1339 1340 AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 1341 const char *_opcode) 1342 : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType, 1343 NumSrcOperands, HasDst> 1344 (ib, obj, _opcode), 1345 MemInst(MemDataType::memType) 1346 { 1347 init_addr(&this->addr); 1348 } 1349 1350 void 1351 initiateAcc(GPUDynInstPtr gpuDynInst) override 1352 { 1353 // before doing the RMW, check if this atomic has 1354 // release semantics, and if so issue a release first 1355 if (!isLocalMem()) { 1356 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 1357 && (gpuDynInst->memoryOrder == 1358 Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder == 1359 Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) { 1360 1361 gpuDynInst->statusBitVector = VectorMask(1); 1362 1363 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic; 1364 gpuDynInst->useContinuation = true; 1365 1366 // create request 1367 Request *req = new Request(0, 0, 0, 0, 1368 gpuDynInst->computeUnit()->masterId(), 1369 0, gpuDynInst->wfDynId, -1); 1370 req->setFlags(Request::RELEASE); 1371 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 1372 1373 return; 1374 } 1375 } 1376 1377 // if there is no release semantic, execute the RMW immediately 1378 execAtomic(gpuDynInst); 1379 1380 } 1381 1382 void execute(GPUDynInstPtr gpuDynInst) override; 1383 1384 bool 1385 isLocalMem() const override 1386 { 1387 return this->segment == Brig::BRIG_SEGMENT_GROUP; 1388 } 1389 1390 private: 1391 // execAtomic may be called through a continuation 1392 // if the RMW had release semantics. see comment for 1393 // execContinuation in gpu_dyn_inst.hh 1394 void 1395 execAtomic(GPUDynInstPtr gpuDynInst) override 1396 { 1397 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 1398 1399 typedef typename MemDataType::CType c0; 1400 1401 c0 *d = &((c0*) gpuDynInst->d_data)[0]; 1402 c0 *e = &((c0*) gpuDynInst->a_data)[0]; 1403 c0 *f = &((c0*) gpuDynInst->x_data)[0]; 1404 1405 for (int i = 0; i < VSZ; ++i) { 1406 if (gpuDynInst->exec_mask[i]) { 1407 Addr vaddr = gpuDynInst->addr[i]; 1408 1409 if (isLocalMem()) { 1410 Wavefront *wavefront = gpuDynInst->wavefront(); 1411 *d = wavefront->ldsChunk->read<c0>(vaddr); 1412 1413 switch (this->opType) { 1414 case Enums::MO_AADD: 1415 case Enums::MO_ANRADD: 1416 wavefront->ldsChunk->write<c0>(vaddr, 1417 wavefront->ldsChunk->read<c0>(vaddr) + (*e)); 1418 break; 1419 case Enums::MO_ASUB: 1420 case Enums::MO_ANRSUB: 1421 wavefront->ldsChunk->write<c0>(vaddr, 1422 wavefront->ldsChunk->read<c0>(vaddr) - (*e)); 1423 break; 1424 case Enums::MO_AMAX: 1425 case Enums::MO_ANRMAX: 1426 wavefront->ldsChunk->write<c0>(vaddr, 1427 std::max(wavefront->ldsChunk->read<c0>(vaddr), 1428 (*e))); 1429 break; 1430 case Enums::MO_AMIN: 1431 case Enums::MO_ANRMIN: 1432 wavefront->ldsChunk->write<c0>(vaddr, 1433 std::min(wavefront->ldsChunk->read<c0>(vaddr), 1434 (*e))); 1435 break; 1436 case Enums::MO_AAND: 1437 case Enums::MO_ANRAND: 1438 wavefront->ldsChunk->write<c0>(vaddr, 1439 wavefront->ldsChunk->read<c0>(vaddr) & (*e)); 1440 break; 1441 case Enums::MO_AOR: 1442 case Enums::MO_ANROR: 1443 wavefront->ldsChunk->write<c0>(vaddr, 1444 wavefront->ldsChunk->read<c0>(vaddr) | (*e)); 1445 break; 1446 case Enums::MO_AXOR: 1447 case Enums::MO_ANRXOR: 1448 wavefront->ldsChunk->write<c0>(vaddr, 1449 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e)); 1450 break; 1451 case Enums::MO_AINC: 1452 case Enums::MO_ANRINC: 1453 wavefront->ldsChunk->write<c0>(vaddr, 1454 wavefront->ldsChunk->read<c0>(vaddr) + 1); 1455 break; 1456 case Enums::MO_ADEC: 1457 case Enums::MO_ANRDEC: 1458 wavefront->ldsChunk->write<c0>(vaddr, 1459 wavefront->ldsChunk->read<c0>(vaddr) - 1); 1460 break; 1461 case Enums::MO_AEXCH: 1462 case Enums::MO_ANREXCH: 1463 wavefront->ldsChunk->write<c0>(vaddr, (*e)); 1464 break; 1465 case Enums::MO_ACAS: 1466 case Enums::MO_ANRCAS: 1467 wavefront->ldsChunk->write<c0>(vaddr, 1468 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ? 1469 (*f) : wavefront->ldsChunk->read<c0>(vaddr)); 1470 break; 1471 default: 1472 fatal("Unrecognized or invalid HSAIL atomic op " 1473 "type.\n"); 1474 break; 1475 } 1476 } else { 1477 Request *req = 1478 new Request(0, vaddr, sizeof(c0), 0, 1479 gpuDynInst->computeUnit()->masterId(), 1480 0, gpuDynInst->wfDynId, i, 1481 gpuDynInst->makeAtomicOpFunctor<c0>(e, 1482 f, this->opType)); 1483 1484 gpuDynInst->setRequestFlags(req); 1485 PacketPtr pkt = new Packet(req, MemCmd::SwapReq); 1486 pkt->dataStatic(d); 1487 1488 if (gpuDynInst->computeUnit()->shader-> 1489 separate_acquire_release && 1490 (gpuDynInst->memoryOrder == 1491 Enums::MEMORY_ORDER_SC_ACQUIRE)) { 1492 // if this atomic has acquire semantics, 1493 // schedule the continuation to perform an 1494 // acquire after the RMW completes 1495 gpuDynInst->execContinuation = 1496 &GPUStaticInst::execAtomicAcq; 1497 1498 gpuDynInst->useContinuation = true; 1499 } else { 1500 // the request will be finished when the RMW completes 1501 gpuDynInst->useContinuation = false; 1502 } 1503 // translation is performed in sendRequest() 1504 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i, 1505 pkt); 1506 } 1507 } 1508 1509 ++d; 1510 ++e; 1511 ++f; 1512 } 1513 1514 gpuDynInst->updateStats(); 1515 } 1516 1517 // execAtomicACq will always be called through a continuation. 1518 // see comment for execContinuation in gpu_dyn_inst.hh 1519 void 1520 execAtomicAcq(GPUDynInstPtr gpuDynInst) override 1521 { 1522 // after performing the RMW, check to see if this instruction 1523 // has acquire semantics, and if so, issue an acquire 1524 if (!isLocalMem()) { 1525 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 1526 && gpuDynInst->memoryOrder == 1527 Enums::MEMORY_ORDER_SC_ACQUIRE) { 1528 gpuDynInst->statusBitVector = VectorMask(1); 1529 1530 // the request will be finished when 1531 // the acquire completes 1532 gpuDynInst->useContinuation = false; 1533 // create request 1534 Request *req = new Request(0, 0, 0, 0, 1535 gpuDynInst->computeUnit()->masterId(), 1536 0, gpuDynInst->wfDynId, -1); 1537 req->setFlags(Request::ACQUIRE); 1538 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 1539 } 1540 } 1541 } 1542 }; 1543 1544 template<typename DataType, typename AddrOperandType, int NumSrcOperands> 1545 GPUStaticInst* 1546 constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) 1547 { 1548 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1549 1550 if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) { 1551 return decodeLd<DataType>(ib, obj); 1552 } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) { 1553 switch (ib->type) { 1554 case Brig::BRIG_TYPE_B8: 1555 return decodeSt<S8,S8>(ib, obj); 1556 case Brig::BRIG_TYPE_B16: 1557 return decodeSt<S8,S16>(ib, obj); 1558 case Brig::BRIG_TYPE_B32: 1559 return decodeSt<S8,S32>(ib, obj); 1560 case Brig::BRIG_TYPE_B64: 1561 return decodeSt<S8,S64>(ib, obj); 1562 default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type); 1563 } 1564 } else { 1565 if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) 1566 return new AtomicInst<DataType, AddrOperandType, 1567 NumSrcOperands, false>(ib, obj, "atomicnoret"); 1568 else 1569 return new AtomicInst<DataType, AddrOperandType, 1570 NumSrcOperands, true>(ib, obj, "atomic"); 1571 } 1572 } 1573 1574 template<typename DataType, int NumSrcOperands> 1575 GPUStaticInst* 1576 decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj) 1577 { 1578 unsigned addrIndex = (Brig::BrigOpcode)ib->opcode == 1579 Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1; 1580 1581 unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex); 1582 1583 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 1584 1585 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 1586 return constructAtomic<DataType, NoRegAddrOperand, 1587 NumSrcOperands>(ib, obj); 1588 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 1589 // V2/V4 not allowed 1590 switch (tmp.regKind) { 1591 case Brig::BRIG_REGISTER_KIND_SINGLE: 1592 return constructAtomic<DataType, SRegAddrOperand, 1593 NumSrcOperands>(ib, obj); 1594 case Brig::BRIG_REGISTER_KIND_DOUBLE: 1595 return constructAtomic<DataType, DRegAddrOperand, 1596 NumSrcOperands>(ib, obj); 1597 default: 1598 fatal("Bad atomic register operand type %d\n", tmp.type); 1599 } 1600 } else { 1601 fatal("Bad atomic register operand kind %d\n", tmp.kind); 1602 } 1603 } 1604 1605 1606 template<typename DataType> 1607 GPUStaticInst* 1608 decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) 1609 { 1610 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1611 1612 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { 1613 return decodeAtomicHelper<DataType, 2>(ib, obj); 1614 } else { 1615 return decodeAtomicHelper<DataType, 1>(ib, obj); 1616 } 1617 } 1618 1619 template<typename DataType> 1620 GPUStaticInst* 1621 decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj) 1622 { 1623 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1624 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { 1625 return decodeAtomicHelper<DataType, 2>(ib, obj); 1626 } else { 1627 return decodeAtomicHelper<DataType, 1>(ib, obj); 1628 } 1629 } 1630} // namespace HsailISA 1631 1632#endif // __ARCH_HSAIL_INSTS_MEM_HH__ 1633