mem.hh revision 11325
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#ifndef __ARCH_HSAIL_INSTS_MEM_HH__ 37#define __ARCH_HSAIL_INSTS_MEM_HH__ 38 39#include "arch/hsail/insts/decl.hh" 40#include "arch/hsail/insts/gpu_static_inst.hh" 41#include "arch/hsail/operand.hh" 42 43namespace HsailISA 44{ 45 class MemInst 46 { 47 public: 48 MemInst() : size(0), addr_operand(nullptr) { } 49 50 MemInst(Enums::MemType m_type) 51 { 52 if (m_type == Enums::M_U64 || 53 m_type == Enums::M_S64 || 54 m_type == Enums::M_F64) { 55 size = 8; 56 } else if (m_type == Enums::M_U32 || 57 m_type == Enums::M_S32 || 58 m_type == Enums::M_F32) { 59 size = 4; 60 } else if (m_type == Enums::M_U16 || 61 m_type == Enums::M_S16 || 62 m_type == Enums::M_F16) { 63 size = 2; 64 } else { 65 size = 1; 66 } 67 68 addr_operand = nullptr; 69 } 70 71 void 72 init_addr(AddrOperandBase *_addr_operand) 73 { 74 addr_operand = _addr_operand; 75 } 76 77 private: 78 int size; 79 AddrOperandBase *addr_operand; 80 81 public: 82 int getMemOperandSize() { return size; } 83 AddrOperandBase *getAddressOperand() { return addr_operand; } 84 }; 85 86 template<typename DestOperandType, typename AddrOperandType> 87 class LdaInstBase : public HsailGPUStaticInst 88 { 89 public: 90 typename DestOperandType::DestOperand dest; 91 AddrOperandType addr; 92 93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 94 const char *_opcode) 95 : HsailGPUStaticInst(obj, _opcode) 96 { 97 using namespace Brig; 98 99 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 100 dest.init(op_offs, obj); 101 op_offs = obj->getOperandPtr(ib->operands, 1); 102 addr.init(op_offs, obj); 103 } 104 105 int numSrcRegOperands() { return(this->addr.isVectorRegister()); } 106 int numDstRegOperands() { return dest.isVectorRegister(); } 107 bool isVectorRegister(int operandIndex) 108 { 109 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 110 return((operandIndex == 0) ? dest.isVectorRegister() : 111 this->addr.isVectorRegister()); 112 } 113 bool isCondRegister(int operandIndex) 114 { 115 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 116 return((operandIndex == 0) ? dest.isCondRegister() : 117 this->addr.isCondRegister()); 118 } 119 bool isScalarRegister(int operandIndex) 120 { 121 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 122 return((operandIndex == 0) ? dest.isScalarRegister() : 123 this->addr.isScalarRegister()); 124 } 125 bool isSrcOperand(int operandIndex) 126 { 127 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 128 if (operandIndex > 0) 129 return(this->addr.isVectorRegister()); 130 return false; 131 } 132 bool isDstOperand(int operandIndex) { 133 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 134 return(operandIndex == 0); 135 } 136 int getOperandSize(int operandIndex) 137 { 138 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 139 return((operandIndex == 0) ? dest.opSize() : 140 this->addr.opSize()); 141 } 142 int getRegisterIndex(int operandIndex) 143 { 144 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 145 return((operandIndex == 0) ? dest.regIndex() : 146 this->addr.regIndex()); 147 } 148 int getNumOperands() 149 { 150 if (this->addr.isVectorRegister()) 151 return 2; 152 return 1; 153 } 154 }; 155 156 template<typename DestDataType, typename AddrOperandType> 157 class LdaInst : 158 public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>, 159 public MemInst 160 { 161 public: 162 void generateDisassembly(); 163 164 LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 165 const char *_opcode) 166 : LdaInstBase<typename DestDataType::OperandType, 167 AddrOperandType>(ib, obj, _opcode) 168 { 169 init_addr(&this->addr); 170 } 171 172 void execute(GPUDynInstPtr gpuDynInst); 173 }; 174 175 template<typename DataType> 176 GPUStaticInst* 177 decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj) 178 { 179 unsigned op_offs = obj->getOperandPtr(ib->operands, 1); 180 BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj); 181 182 if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 183 return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas"); 184 } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 185 // V2/V4 not allowed 186 switch (regDataType.regKind) { 187 case Brig::BRIG_REGISTER_KIND_SINGLE: 188 return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas"); 189 case Brig::BRIG_REGISTER_KIND_DOUBLE: 190 return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas"); 191 default: 192 fatal("Bad ldas register operand type %d\n", regDataType.type); 193 } 194 } else { 195 fatal("Bad ldas register operand kind %d\n", regDataType.kind); 196 } 197 } 198 199 template<typename MemOperandType, typename DestOperandType, 200 typename AddrOperandType> 201 class LdInstBase : public HsailGPUStaticInst 202 { 203 public: 204 Brig::BrigWidth8_t width; 205 typename DestOperandType::DestOperand dest; 206 AddrOperandType addr; 207 208 Brig::BrigSegment segment; 209 Brig::BrigMemoryOrder memoryOrder; 210 Brig::BrigMemoryScope memoryScope; 211 unsigned int equivClass; 212 bool isArgLoad() 213 { 214 return segment == Brig::BRIG_SEGMENT_KERNARG || 215 segment == Brig::BRIG_SEGMENT_ARG; 216 } 217 void 218 initLd(const Brig::BrigInstBase *ib, const BrigObject *obj, 219 const char *_opcode) 220 { 221 using namespace Brig; 222 223 const BrigInstMem *ldst = (const BrigInstMem*)ib; 224 225 segment = (BrigSegment)ldst->segment; 226 memoryOrder = BRIG_MEMORY_ORDER_NONE; 227 memoryScope = BRIG_MEMORY_SCOPE_NONE; 228 equivClass = ldst->equivClass; 229 230 switch (segment) { 231 case BRIG_SEGMENT_GLOBAL: 232 o_type = Enums::OT_GLOBAL_READ; 233 break; 234 235 case BRIG_SEGMENT_GROUP: 236 o_type = Enums::OT_SHARED_READ; 237 break; 238 239 case BRIG_SEGMENT_PRIVATE: 240 o_type = Enums::OT_PRIVATE_READ; 241 break; 242 243 case BRIG_SEGMENT_READONLY: 244 o_type = Enums::OT_READONLY_READ; 245 break; 246 247 case BRIG_SEGMENT_SPILL: 248 o_type = Enums::OT_SPILL_READ; 249 break; 250 251 case BRIG_SEGMENT_FLAT: 252 o_type = Enums::OT_FLAT_READ; 253 break; 254 255 case BRIG_SEGMENT_KERNARG: 256 o_type = Enums::OT_KERN_READ; 257 break; 258 259 case BRIG_SEGMENT_ARG: 260 o_type = Enums::OT_ARG; 261 break; 262 263 default: 264 panic("Ld: segment %d not supported\n", segment); 265 } 266 267 width = ldst->width; 268 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 269 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 270 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 271 dest.init(op_offs, obj); 272 273 op_offs = obj->getOperandPtr(ib->operands, 1); 274 addr.init(op_offs, obj); 275 } 276 277 void 278 initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj, 279 const char *_opcode) 280 { 281 using namespace Brig; 282 283 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 284 285 segment = (BrigSegment)at->segment; 286 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 287 memoryScope = (BrigMemoryScope)at->memoryScope; 288 equivClass = 0; 289 290 switch (segment) { 291 case BRIG_SEGMENT_GLOBAL: 292 o_type = Enums::OT_GLOBAL_READ; 293 break; 294 295 case BRIG_SEGMENT_GROUP: 296 o_type = Enums::OT_SHARED_READ; 297 break; 298 299 case BRIG_SEGMENT_PRIVATE: 300 o_type = Enums::OT_PRIVATE_READ; 301 break; 302 303 case BRIG_SEGMENT_READONLY: 304 o_type = Enums::OT_READONLY_READ; 305 break; 306 307 case BRIG_SEGMENT_SPILL: 308 o_type = Enums::OT_SPILL_READ; 309 break; 310 311 case BRIG_SEGMENT_FLAT: 312 o_type = Enums::OT_FLAT_READ; 313 break; 314 315 case BRIG_SEGMENT_KERNARG: 316 o_type = Enums::OT_KERN_READ; 317 break; 318 319 case BRIG_SEGMENT_ARG: 320 o_type = Enums::OT_ARG; 321 break; 322 323 default: 324 panic("Ld: segment %d not supported\n", segment); 325 } 326 327 width = BRIG_WIDTH_1; 328 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 329 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 330 331 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 332 dest.init(op_offs, obj); 333 334 op_offs = obj->getOperandPtr(ib->operands,1); 335 addr.init(op_offs, obj); 336 } 337 338 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 339 const char *_opcode) 340 : HsailGPUStaticInst(obj, _opcode) 341 { 342 using namespace Brig; 343 344 if (ib->opcode == BRIG_OPCODE_LD) { 345 initLd(ib, obj, _opcode); 346 } else { 347 initAtomicLd(ib, obj, _opcode); 348 } 349 } 350 351 int numSrcRegOperands() { return(this->addr.isVectorRegister()); } 352 int numDstRegOperands() { return dest.isVectorRegister(); } 353 int getNumOperands() 354 { 355 if (this->addr.isVectorRegister()) 356 return 2; 357 else 358 return 1; 359 } 360 bool isVectorRegister(int operandIndex) 361 { 362 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 363 return((operandIndex == 0) ? dest.isVectorRegister() : 364 this->addr.isVectorRegister()); 365 } 366 bool isCondRegister(int operandIndex) 367 { 368 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 369 return((operandIndex == 0) ? dest.isCondRegister() : 370 this->addr.isCondRegister()); 371 } 372 bool isScalarRegister(int operandIndex) 373 { 374 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 375 return((operandIndex == 0) ? dest.isScalarRegister() : 376 this->addr.isScalarRegister()); 377 } 378 bool isSrcOperand(int operandIndex) 379 { 380 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 381 if (operandIndex > 0) 382 return(this->addr.isVectorRegister()); 383 return false; 384 } 385 bool isDstOperand(int operandIndex) 386 { 387 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 388 return(operandIndex == 0); 389 } 390 int getOperandSize(int operandIndex) 391 { 392 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 393 return((operandIndex == 0) ? dest.opSize() : 394 this->addr.opSize()); 395 } 396 int getRegisterIndex(int operandIndex) 397 { 398 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 399 return((operandIndex == 0) ? dest.regIndex() : 400 this->addr.regIndex()); 401 } 402 }; 403 404 template<typename MemDataType, typename DestDataType, 405 typename AddrOperandType> 406 class LdInst : 407 public LdInstBase<typename MemDataType::CType, 408 typename DestDataType::OperandType, AddrOperandType>, 409 public MemInst 410 { 411 typename DestDataType::OperandType::DestOperand dest_vect[4]; 412 uint16_t num_dest_operands; 413 void generateDisassembly(); 414 415 public: 416 LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 417 const char *_opcode) 418 : LdInstBase<typename MemDataType::CType, 419 typename DestDataType::OperandType, 420 AddrOperandType>(ib, obj, _opcode), 421 MemInst(MemDataType::memType) 422 { 423 init_addr(&this->addr); 424 425 unsigned op_offs = obj->getOperandPtr(ib->operands,0); 426 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 427 428 if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 429 const Brig::BrigOperandOperandList *brigRegVecOp = 430 (const Brig::BrigOperandOperandList*)brigOp; 431 432 num_dest_operands = 433 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; 434 435 assert(num_dest_operands <= 4); 436 } else { 437 num_dest_operands = 1; 438 } 439 440 if (num_dest_operands > 1) { 441 assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 442 443 for (int i = 0; i < num_dest_operands; ++i) { 444 dest_vect[i].init_from_vect(op_offs, obj, i); 445 } 446 } 447 } 448 449 void 450 initiateAcc(GPUDynInstPtr gpuDynInst) override 451 { 452 typedef typename MemDataType::CType c0; 453 454 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 455 456 if (num_dest_operands > 1) { 457 for (int i = 0; i < VSZ; ++i) 458 if (gpuDynInst->exec_mask[i]) 459 gpuDynInst->statusVector.push_back(num_dest_operands); 460 else 461 gpuDynInst->statusVector.push_back(0); 462 } 463 464 for (int k = 0; k < num_dest_operands; ++k) { 465 466 c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ]; 467 468 for (int i = 0; i < VSZ; ++i) { 469 if (gpuDynInst->exec_mask[i]) { 470 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 471 472 if (isLocalMem()) { 473 // load from shared memory 474 *d = gpuDynInst->wavefront()->ldsChunk-> 475 read<c0>(vaddr); 476 } else { 477 Request *req = new Request(0, vaddr, sizeof(c0), 0, 478 gpuDynInst->computeUnit()->masterId(), 479 0, gpuDynInst->wfDynId, i); 480 481 gpuDynInst->setRequestFlags(req); 482 PacketPtr pkt = new Packet(req, MemCmd::ReadReq); 483 pkt->dataStatic(d); 484 485 if (gpuDynInst->computeUnit()->shader-> 486 separate_acquire_release && 487 gpuDynInst->memoryOrder == 488 Enums::MEMORY_ORDER_SC_ACQUIRE) { 489 // if this load has acquire semantics, 490 // set the response continuation function 491 // to perform an Acquire request 492 gpuDynInst->execContinuation = 493 &GPUStaticInst::execLdAcq; 494 495 gpuDynInst->useContinuation = true; 496 } else { 497 // the request will be finished when 498 // the load completes 499 gpuDynInst->useContinuation = false; 500 } 501 // translation is performed in sendRequest() 502 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, 503 i, pkt); 504 } 505 } 506 ++d; 507 } 508 } 509 510 gpuDynInst->updateStats(); 511 } 512 513 private: 514 void 515 execLdAcq(GPUDynInstPtr gpuDynInst) override 516 { 517 // after the load has complete and if the load has acquire 518 // semantics, issue an acquire request. 519 if (!isLocalMem()) { 520 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 521 && gpuDynInst->memoryOrder == 522 Enums::MEMORY_ORDER_SC_ACQUIRE) { 523 gpuDynInst->statusBitVector = VectorMask(1); 524 gpuDynInst->useContinuation = false; 525 // create request 526 Request *req = new Request(0, 0, 0, 0, 527 gpuDynInst->computeUnit()->masterId(), 528 0, gpuDynInst->wfDynId, -1); 529 req->setFlags(Request::ACQUIRE); 530 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 531 } 532 } 533 } 534 535 public: 536 bool 537 isLocalMem() const override 538 { 539 return this->segment == Brig::BRIG_SEGMENT_GROUP; 540 } 541 542 bool isVectorRegister(int operandIndex) 543 { 544 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 545 if ((num_dest_operands != getNumOperands()) && 546 (operandIndex == (getNumOperands()-1))) 547 return(this->addr.isVectorRegister()); 548 if (num_dest_operands > 1) { 549 return dest_vect[operandIndex].isVectorRegister(); 550 } 551 else if (num_dest_operands == 1) { 552 return LdInstBase<typename MemDataType::CType, 553 typename DestDataType::OperandType, 554 AddrOperandType>::dest.isVectorRegister(); 555 } 556 return false; 557 } 558 bool isCondRegister(int operandIndex) 559 { 560 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 561 if ((num_dest_operands != getNumOperands()) && 562 (operandIndex == (getNumOperands()-1))) 563 return(this->addr.isCondRegister()); 564 if (num_dest_operands > 1) 565 return dest_vect[operandIndex].isCondRegister(); 566 else if (num_dest_operands == 1) 567 return LdInstBase<typename MemDataType::CType, 568 typename DestDataType::OperandType, 569 AddrOperandType>::dest.isCondRegister(); 570 return false; 571 } 572 bool isScalarRegister(int operandIndex) 573 { 574 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 575 if ((num_dest_operands != getNumOperands()) && 576 (operandIndex == (getNumOperands()-1))) 577 return(this->addr.isScalarRegister()); 578 if (num_dest_operands > 1) 579 return dest_vect[operandIndex].isScalarRegister(); 580 else if (num_dest_operands == 1) 581 return LdInstBase<typename MemDataType::CType, 582 typename DestDataType::OperandType, 583 AddrOperandType>::dest.isScalarRegister(); 584 return false; 585 } 586 bool isSrcOperand(int operandIndex) 587 { 588 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 589 if ((num_dest_operands != getNumOperands()) && 590 (operandIndex == (getNumOperands()-1))) 591 return(this->addr.isVectorRegister()); 592 return false; 593 } 594 bool isDstOperand(int operandIndex) 595 { 596 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 597 if ((num_dest_operands != getNumOperands()) && 598 (operandIndex == (getNumOperands()-1))) 599 return false; 600 return true; 601 } 602 int getOperandSize(int operandIndex) 603 { 604 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 605 if ((num_dest_operands != getNumOperands()) && 606 (operandIndex == (getNumOperands()-1))) 607 return(this->addr.opSize()); 608 if (num_dest_operands > 1) 609 return(dest_vect[operandIndex].opSize()); 610 else if (num_dest_operands == 1) 611 return(LdInstBase<typename MemDataType::CType, 612 typename DestDataType::OperandType, 613 AddrOperandType>::dest.opSize()); 614 return 0; 615 } 616 int getRegisterIndex(int operandIndex) 617 { 618 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 619 if ((num_dest_operands != getNumOperands()) && 620 (operandIndex == (getNumOperands()-1))) 621 return(this->addr.regIndex()); 622 if (num_dest_operands > 1) 623 return(dest_vect[operandIndex].regIndex()); 624 else if (num_dest_operands == 1) 625 return(LdInstBase<typename MemDataType::CType, 626 typename DestDataType::OperandType, 627 AddrOperandType>::dest.regIndex()); 628 return -1; 629 } 630 int getNumOperands() 631 { 632 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 633 return(num_dest_operands+1); 634 else 635 return(num_dest_operands); 636 } 637 void execute(GPUDynInstPtr gpuDynInst); 638 }; 639 640 template<typename MemDT, typename DestDT> 641 GPUStaticInst* 642 decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj) 643 { 644 unsigned op_offs = obj->getOperandPtr(ib->operands,1); 645 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 646 647 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 648 return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld"); 649 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER || 650 tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 651 switch (tmp.regKind) { 652 case Brig::BRIG_REGISTER_KIND_SINGLE: 653 return new LdInst<MemDT, DestDT, 654 SRegAddrOperand>(ib, obj, "ld"); 655 case Brig::BRIG_REGISTER_KIND_DOUBLE: 656 return new LdInst<MemDT, DestDT, 657 DRegAddrOperand>(ib, obj, "ld"); 658 default: 659 fatal("Bad ld register operand type %d\n", tmp.regKind); 660 } 661 } else { 662 fatal("Bad ld register operand kind %d\n", tmp.kind); 663 } 664 } 665 666 template<typename MemDT> 667 GPUStaticInst* 668 decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj) 669 { 670 unsigned op_offs = obj->getOperandPtr(ib->operands,0); 671 BrigRegOperandInfo dest = findRegDataType(op_offs, obj); 672 673 assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER || 674 dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 675 switch(dest.regKind) { 676 case Brig::BRIG_REGISTER_KIND_SINGLE: 677 switch (ib->type) { 678 case Brig::BRIG_TYPE_B8: 679 case Brig::BRIG_TYPE_B16: 680 case Brig::BRIG_TYPE_B32: 681 return decodeLd2<MemDT, B32>(ib, obj); 682 case Brig::BRIG_TYPE_U8: 683 case Brig::BRIG_TYPE_U16: 684 case Brig::BRIG_TYPE_U32: 685 return decodeLd2<MemDT, U32>(ib, obj); 686 case Brig::BRIG_TYPE_S8: 687 case Brig::BRIG_TYPE_S16: 688 case Brig::BRIG_TYPE_S32: 689 return decodeLd2<MemDT, S32>(ib, obj); 690 case Brig::BRIG_TYPE_F16: 691 case Brig::BRIG_TYPE_F32: 692 return decodeLd2<MemDT, U32>(ib, obj); 693 default: 694 fatal("Bad ld register operand type %d, %d\n", 695 dest.regKind, ib->type); 696 }; 697 case Brig::BRIG_REGISTER_KIND_DOUBLE: 698 switch (ib->type) { 699 case Brig::BRIG_TYPE_B64: 700 return decodeLd2<MemDT, B64>(ib, obj); 701 case Brig::BRIG_TYPE_U64: 702 return decodeLd2<MemDT, U64>(ib, obj); 703 case Brig::BRIG_TYPE_S64: 704 return decodeLd2<MemDT, S64>(ib, obj); 705 case Brig::BRIG_TYPE_F64: 706 return decodeLd2<MemDT, U64>(ib, obj); 707 default: 708 fatal("Bad ld register operand type %d, %d\n", 709 dest.regKind, ib->type); 710 }; 711 default: 712 fatal("Bad ld register operand type %d, %d\n", dest.regKind, 713 ib->type); 714 } 715 } 716 717 template<typename MemDataType, typename SrcOperandType, 718 typename AddrOperandType> 719 class StInstBase : public HsailGPUStaticInst 720 { 721 public: 722 typename SrcOperandType::SrcOperand src; 723 AddrOperandType addr; 724 725 Brig::BrigSegment segment; 726 Brig::BrigMemoryScope memoryScope; 727 Brig::BrigMemoryOrder memoryOrder; 728 unsigned int equivClass; 729 730 void 731 initSt(const Brig::BrigInstBase *ib, const BrigObject *obj, 732 const char *_opcode) 733 { 734 using namespace Brig; 735 736 const BrigInstMem *ldst = (const BrigInstMem*)ib; 737 738 segment = (BrigSegment)ldst->segment; 739 memoryOrder = BRIG_MEMORY_ORDER_NONE; 740 memoryScope = BRIG_MEMORY_SCOPE_NONE; 741 equivClass = ldst->equivClass; 742 743 switch (segment) { 744 case BRIG_SEGMENT_GLOBAL: 745 o_type = Enums::OT_GLOBAL_WRITE; 746 break; 747 748 case BRIG_SEGMENT_GROUP: 749 o_type = Enums::OT_SHARED_WRITE; 750 break; 751 752 case BRIG_SEGMENT_PRIVATE: 753 o_type = Enums::OT_PRIVATE_WRITE; 754 break; 755 756 case BRIG_SEGMENT_READONLY: 757 o_type = Enums::OT_READONLY_WRITE; 758 break; 759 760 case BRIG_SEGMENT_SPILL: 761 o_type = Enums::OT_SPILL_WRITE; 762 break; 763 764 case BRIG_SEGMENT_FLAT: 765 o_type = Enums::OT_FLAT_WRITE; 766 break; 767 768 case BRIG_SEGMENT_ARG: 769 o_type = Enums::OT_ARG; 770 break; 771 772 default: 773 panic("St: segment %d not supported\n", segment); 774 } 775 776 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 777 const BrigOperand *baseOp = obj->getOperand(op_offs); 778 779 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) || 780 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) { 781 src.init(op_offs, obj); 782 } 783 784 op_offs = obj->getOperandPtr(ib->operands, 1); 785 addr.init(op_offs, obj); 786 } 787 788 void 789 initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj, 790 const char *_opcode) 791 { 792 using namespace Brig; 793 794 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 795 796 segment = (BrigSegment)at->segment; 797 memoryScope = (BrigMemoryScope)at->memoryScope; 798 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 799 equivClass = 0; 800 801 switch (segment) { 802 case BRIG_SEGMENT_GLOBAL: 803 o_type = Enums::OT_GLOBAL_WRITE; 804 break; 805 806 case BRIG_SEGMENT_GROUP: 807 o_type = Enums::OT_SHARED_WRITE; 808 break; 809 810 case BRIG_SEGMENT_PRIVATE: 811 o_type = Enums::OT_PRIVATE_WRITE; 812 break; 813 814 case BRIG_SEGMENT_READONLY: 815 o_type = Enums::OT_READONLY_WRITE; 816 break; 817 818 case BRIG_SEGMENT_SPILL: 819 o_type = Enums::OT_SPILL_WRITE; 820 break; 821 822 case BRIG_SEGMENT_FLAT: 823 o_type = Enums::OT_FLAT_WRITE; 824 break; 825 826 case BRIG_SEGMENT_ARG: 827 o_type = Enums::OT_ARG; 828 break; 829 830 default: 831 panic("St: segment %d not supported\n", segment); 832 } 833 834 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 835 addr.init(op_offs, obj); 836 837 op_offs = obj->getOperandPtr(ib->operands, 1); 838 src.init(op_offs, obj); 839 } 840 841 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 842 const char *_opcode) 843 : HsailGPUStaticInst(obj, _opcode) 844 { 845 using namespace Brig; 846 847 if (ib->opcode == BRIG_OPCODE_ST) { 848 initSt(ib, obj, _opcode); 849 } else { 850 initAtomicSt(ib, obj, _opcode); 851 } 852 } 853 854 int numDstRegOperands() { return 0; } 855 int numSrcRegOperands() 856 { 857 return src.isVectorRegister() + this->addr.isVectorRegister(); 858 } 859 int getNumOperands() 860 { 861 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 862 return 2; 863 else 864 return 1; 865 } 866 bool isVectorRegister(int operandIndex) 867 { 868 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 869 return !operandIndex ? src.isVectorRegister() : 870 this->addr.isVectorRegister(); 871 } 872 bool isCondRegister(int operandIndex) 873 { 874 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 875 return !operandIndex ? src.isCondRegister() : 876 this->addr.isCondRegister(); 877 } 878 bool isScalarRegister(int operandIndex) 879 { 880 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 881 return !operandIndex ? src.isScalarRegister() : 882 this->addr.isScalarRegister(); 883 } 884 bool isSrcOperand(int operandIndex) 885 { 886 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 887 return true; 888 } 889 bool isDstOperand(int operandIndex) { return false; } 890 int getOperandSize(int operandIndex) 891 { 892 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 893 return !operandIndex ? src.opSize() : this->addr.opSize(); 894 } 895 int getRegisterIndex(int operandIndex) 896 { 897 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 898 return !operandIndex ? src.regIndex() : this->addr.regIndex(); 899 } 900 }; 901 902 903 template<typename MemDataType, typename SrcDataType, 904 typename AddrOperandType> 905 class StInst : 906 public StInstBase<MemDataType, typename SrcDataType::OperandType, 907 AddrOperandType>, 908 public MemInst 909 { 910 public: 911 typename SrcDataType::OperandType::SrcOperand src_vect[4]; 912 uint16_t num_src_operands; 913 void generateDisassembly(); 914 915 StInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 916 const char *_opcode, int srcIdx) 917 : StInstBase<MemDataType, typename SrcDataType::OperandType, 918 AddrOperandType>(ib, obj, _opcode), 919 MemInst(SrcDataType::memType) 920 { 921 init_addr(&this->addr); 922 923 BrigRegOperandInfo rinfo; 924 unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx); 925 const Brig::BrigOperand *baseOp = obj->getOperand(op_offs); 926 927 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) { 928 const Brig::BrigOperandConstantBytes *op = 929 (Brig::BrigOperandConstantBytes*)baseOp; 930 931 rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind, 932 Brig::BRIG_TYPE_NONE); 933 } else { 934 rinfo = findRegDataType(op_offs, obj); 935 } 936 937 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 938 const Brig::BrigOperandOperandList *brigRegVecOp = 939 (const Brig::BrigOperandOperandList*)baseOp; 940 941 num_src_operands = 942 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; 943 944 assert(num_src_operands <= 4); 945 } else { 946 num_src_operands = 1; 947 } 948 949 if (num_src_operands > 1) { 950 assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 951 952 for (int i = 0; i < num_src_operands; ++i) { 953 src_vect[i].init_from_vect(op_offs, obj, i); 954 } 955 } 956 } 957 958 void 959 initiateAcc(GPUDynInstPtr gpuDynInst) override 960 { 961 // before performing a store, check if this store has 962 // release semantics, and if so issue a release first 963 if (!isLocalMem()) { 964 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 965 && gpuDynInst->memoryOrder == 966 Enums::MEMORY_ORDER_SC_RELEASE) { 967 968 gpuDynInst->statusBitVector = VectorMask(1); 969 gpuDynInst->execContinuation = &GPUStaticInst::execSt; 970 gpuDynInst->useContinuation = true; 971 // create request 972 Request *req = new Request(0, 0, 0, 0, 973 gpuDynInst->computeUnit()->masterId(), 974 0, gpuDynInst->wfDynId, -1); 975 req->setFlags(Request::RELEASE); 976 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 977 978 return; 979 } 980 } 981 982 // if there is no release semantic, perform stores immediately 983 execSt(gpuDynInst); 984 } 985 986 bool 987 isLocalMem() const override 988 { 989 return this->segment == Brig::BRIG_SEGMENT_GROUP; 990 } 991 992 private: 993 // execSt may be called through a continuation 994 // if the store had release semantics. see comment for 995 // execSt in gpu_static_inst.hh 996 void 997 execSt(GPUDynInstPtr gpuDynInst) override 998 { 999 typedef typename MemDataType::CType c0; 1000 1001 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 1002 1003 if (num_src_operands > 1) { 1004 for (int i = 0; i < VSZ; ++i) 1005 if (gpuDynInst->exec_mask[i]) 1006 gpuDynInst->statusVector.push_back(num_src_operands); 1007 else 1008 gpuDynInst->statusVector.push_back(0); 1009 } 1010 1011 for (int k = 0; k < num_src_operands; ++k) { 1012 c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ]; 1013 1014 for (int i = 0; i < VSZ; ++i) { 1015 if (gpuDynInst->exec_mask[i]) { 1016 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 1017 1018 if (isLocalMem()) { 1019 //store to shared memory 1020 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr, 1021 *d); 1022 } else { 1023 Request *req = 1024 new Request(0, vaddr, sizeof(c0), 0, 1025 gpuDynInst->computeUnit()->masterId(), 1026 0, gpuDynInst->wfDynId, i); 1027 1028 gpuDynInst->setRequestFlags(req); 1029 PacketPtr pkt = new Packet(req, MemCmd::WriteReq); 1030 pkt->dataStatic<c0>(d); 1031 1032 // translation is performed in sendRequest() 1033 // the request will be finished when the store completes 1034 gpuDynInst->useContinuation = false; 1035 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, 1036 i, pkt); 1037 1038 } 1039 } 1040 ++d; 1041 } 1042 } 1043 1044 gpuDynInst->updateStats(); 1045 } 1046 1047 public: 1048 bool isVectorRegister(int operandIndex) 1049 { 1050 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1051 if (operandIndex == num_src_operands) 1052 return this->addr.isVectorRegister(); 1053 if (num_src_operands > 1) 1054 return src_vect[operandIndex].isVectorRegister(); 1055 else if (num_src_operands == 1) 1056 return StInstBase<MemDataType, 1057 typename SrcDataType::OperandType, 1058 AddrOperandType>::src.isVectorRegister(); 1059 return false; 1060 } 1061 bool isCondRegister(int operandIndex) 1062 { 1063 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1064 if (operandIndex == num_src_operands) 1065 return this->addr.isCondRegister(); 1066 if (num_src_operands > 1) 1067 return src_vect[operandIndex].isCondRegister(); 1068 else if (num_src_operands == 1) 1069 return StInstBase<MemDataType, 1070 typename SrcDataType::OperandType, 1071 AddrOperandType>::src.isCondRegister(); 1072 return false; 1073 } 1074 bool isScalarRegister(int operandIndex) 1075 { 1076 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1077 if (operandIndex == num_src_operands) 1078 return this->addr.isScalarRegister(); 1079 if (num_src_operands > 1) 1080 return src_vect[operandIndex].isScalarRegister(); 1081 else if (num_src_operands == 1) 1082 return StInstBase<MemDataType, 1083 typename SrcDataType::OperandType, 1084 AddrOperandType>::src.isScalarRegister(); 1085 return false; 1086 } 1087 bool isSrcOperand(int operandIndex) 1088 { 1089 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1090 return true; 1091 } 1092 bool isDstOperand(int operandIndex) { return false; } 1093 int getOperandSize(int operandIndex) 1094 { 1095 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1096 if (operandIndex == num_src_operands) 1097 return this->addr.opSize(); 1098 if (num_src_operands > 1) 1099 return src_vect[operandIndex].opSize(); 1100 else if (num_src_operands == 1) 1101 return StInstBase<MemDataType, 1102 typename SrcDataType::OperandType, 1103 AddrOperandType>::src.opSize(); 1104 return 0; 1105 } 1106 int getRegisterIndex(int operandIndex) 1107 { 1108 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1109 if (operandIndex == num_src_operands) 1110 return this->addr.regIndex(); 1111 if (num_src_operands > 1) 1112 return src_vect[operandIndex].regIndex(); 1113 else if (num_src_operands == 1) 1114 return StInstBase<MemDataType, 1115 typename SrcDataType::OperandType, 1116 AddrOperandType>::src.regIndex(); 1117 return -1; 1118 } 1119 int getNumOperands() 1120 { 1121 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 1122 return num_src_operands + 1; 1123 else 1124 return num_src_operands; 1125 } 1126 void execute(GPUDynInstPtr gpuDynInst); 1127 }; 1128 1129 template<typename DataType, typename SrcDataType> 1130 GPUStaticInst* 1131 decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj) 1132 { 1133 int srcIdx = 0; 1134 int destIdx = 1; 1135 if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC || 1136 ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) { 1137 srcIdx = 1; 1138 destIdx = 0; 1139 } 1140 unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx); 1141 1142 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 1143 1144 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 1145 return new StInst<DataType, SrcDataType, 1146 NoRegAddrOperand>(ib, obj, "st", srcIdx); 1147 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 1148 // V2/V4 not allowed 1149 switch (tmp.regKind) { 1150 case Brig::BRIG_REGISTER_KIND_SINGLE: 1151 return new StInst<DataType, SrcDataType, 1152 SRegAddrOperand>(ib, obj, "st", srcIdx); 1153 case Brig::BRIG_REGISTER_KIND_DOUBLE: 1154 return new StInst<DataType, SrcDataType, 1155 DRegAddrOperand>(ib, obj, "st", srcIdx); 1156 default: 1157 fatal("Bad st register operand type %d\n", tmp.type); 1158 } 1159 } else { 1160 fatal("Bad st register operand kind %d\n", tmp.kind); 1161 } 1162 } 1163 1164 Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode, 1165 Brig::BrigAtomicOperation brigOp); 1166 1167 template<typename OperandType, typename AddrOperandType, int NumSrcOperands, 1168 bool HasDst> 1169 class AtomicInstBase : public HsailGPUStaticInst 1170 { 1171 public: 1172 typename OperandType::DestOperand dest; 1173 typename OperandType::SrcOperand src[NumSrcOperands]; 1174 AddrOperandType addr; 1175 1176 Brig::BrigSegment segment; 1177 Brig::BrigMemoryOrder memoryOrder; 1178 Brig::BrigAtomicOperation atomicOperation; 1179 Brig::BrigMemoryScope memoryScope; 1180 Brig::BrigOpcode opcode; 1181 Enums::MemOpType opType; 1182 1183 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 1184 const char *_opcode) 1185 : HsailGPUStaticInst(obj, _opcode) 1186 { 1187 using namespace Brig; 1188 1189 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 1190 1191 segment = (BrigSegment)at->segment; 1192 memoryScope = (BrigMemoryScope)at->memoryScope; 1193 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 1194 atomicOperation = (BrigAtomicOperation)at->atomicOperation; 1195 opcode = (BrigOpcode)ib->opcode; 1196 opType = brigAtomicToMemOpType(opcode, atomicOperation); 1197 1198 switch (segment) { 1199 case BRIG_SEGMENT_GLOBAL: 1200 o_type = Enums::OT_GLOBAL_ATOMIC; 1201 break; 1202 1203 case BRIG_SEGMENT_GROUP: 1204 o_type = Enums::OT_SHARED_ATOMIC; 1205 break; 1206 1207 case BRIG_SEGMENT_FLAT: 1208 o_type = Enums::OT_FLAT_ATOMIC; 1209 break; 1210 1211 default: 1212 panic("Atomic: segment %d not supported\n", segment); 1213 } 1214 1215 if (HasDst) { 1216 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1217 dest.init(op_offs, obj); 1218 1219 op_offs = obj->getOperandPtr(ib->operands, 1); 1220 addr.init(op_offs, obj); 1221 1222 for (int i = 0; i < NumSrcOperands; ++i) { 1223 op_offs = obj->getOperandPtr(ib->operands, i + 2); 1224 src[i].init(op_offs, obj); 1225 } 1226 } else { 1227 1228 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1229 addr.init(op_offs, obj); 1230 1231 for (int i = 0; i < NumSrcOperands; ++i) { 1232 op_offs = obj->getOperandPtr(ib->operands, i + 1); 1233 src[i].init(op_offs, obj); 1234 } 1235 } 1236 } 1237 1238 int numSrcRegOperands() 1239 { 1240 int operands = 0; 1241 for (int i = 0; i < NumSrcOperands; i++) { 1242 if (src[i].isVectorRegister()) { 1243 operands++; 1244 } 1245 } 1246 if (addr.isVectorRegister()) 1247 operands++; 1248 return operands; 1249 } 1250 int numDstRegOperands() { return dest.isVectorRegister(); } 1251 int getNumOperands() 1252 { 1253 if (addr.isVectorRegister()) 1254 return(NumSrcOperands + 2); 1255 return(NumSrcOperands + 1); 1256 } 1257 bool isVectorRegister(int operandIndex) 1258 { 1259 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1260 if (operandIndex < NumSrcOperands) 1261 return src[operandIndex].isVectorRegister(); 1262 else if (operandIndex == NumSrcOperands) 1263 return(addr.isVectorRegister()); 1264 else 1265 return dest.isVectorRegister(); 1266 } 1267 bool isCondRegister(int operandIndex) 1268 { 1269 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1270 if (operandIndex < NumSrcOperands) 1271 return src[operandIndex].isCondRegister(); 1272 else if (operandIndex == NumSrcOperands) 1273 return(addr.isCondRegister()); 1274 else 1275 return dest.isCondRegister(); 1276 } 1277 bool isScalarRegister(int operandIndex) 1278 { 1279 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1280 if (operandIndex < NumSrcOperands) 1281 return src[operandIndex].isScalarRegister(); 1282 else if (operandIndex == NumSrcOperands) 1283 return(addr.isScalarRegister()); 1284 else 1285 return dest.isScalarRegister(); 1286 } 1287 bool isSrcOperand(int operandIndex) 1288 { 1289 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1290 if (operandIndex < NumSrcOperands) 1291 return true; 1292 else if (operandIndex == NumSrcOperands) 1293 return(addr.isVectorRegister()); 1294 else 1295 return false; 1296 } 1297 bool isDstOperand(int operandIndex) 1298 { 1299 if (operandIndex <= NumSrcOperands) 1300 return false; 1301 else 1302 return true; 1303 } 1304 int getOperandSize(int operandIndex) 1305 { 1306 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1307 if (operandIndex < NumSrcOperands) 1308 return(src[operandIndex].opSize()); 1309 else if (operandIndex == NumSrcOperands) 1310 return(addr.opSize()); 1311 else 1312 return(dest.opSize()); 1313 } 1314 int getRegisterIndex(int operandIndex) 1315 { 1316 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1317 if (operandIndex < NumSrcOperands) 1318 return(src[operandIndex].regIndex()); 1319 else if (operandIndex == NumSrcOperands) 1320 return(addr.regIndex()); 1321 else 1322 return(dest.regIndex()); 1323 return -1; 1324 } 1325 }; 1326 1327 template<typename MemDataType, typename AddrOperandType, int NumSrcOperands, 1328 bool HasDst> 1329 class AtomicInst : 1330 public AtomicInstBase<typename MemDataType::OperandType, 1331 AddrOperandType, NumSrcOperands, HasDst>, 1332 public MemInst 1333 { 1334 public: 1335 void generateDisassembly(); 1336 1337 AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 1338 const char *_opcode) 1339 : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType, 1340 NumSrcOperands, HasDst> 1341 (ib, obj, _opcode), 1342 MemInst(MemDataType::memType) 1343 { 1344 init_addr(&this->addr); 1345 } 1346 1347 void 1348 initiateAcc(GPUDynInstPtr gpuDynInst) override 1349 { 1350 // before doing the RMW, check if this atomic has 1351 // release semantics, and if so issue a release first 1352 if (!isLocalMem()) { 1353 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 1354 && (gpuDynInst->memoryOrder == 1355 Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder == 1356 Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) { 1357 1358 gpuDynInst->statusBitVector = VectorMask(1); 1359 1360 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic; 1361 gpuDynInst->useContinuation = true; 1362 1363 // create request 1364 Request *req = new Request(0, 0, 0, 0, 1365 gpuDynInst->computeUnit()->masterId(), 1366 0, gpuDynInst->wfDynId, -1); 1367 req->setFlags(Request::RELEASE); 1368 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 1369 1370 return; 1371 } 1372 } 1373 1374 // if there is no release semantic, execute the RMW immediately 1375 execAtomic(gpuDynInst); 1376 1377 } 1378 1379 void execute(GPUDynInstPtr gpuDynInst); 1380 1381 bool 1382 isLocalMem() const override 1383 { 1384 return this->segment == Brig::BRIG_SEGMENT_GROUP; 1385 } 1386 1387 private: 1388 // execAtomic may be called through a continuation 1389 // if the RMW had release semantics. see comment for 1390 // execContinuation in gpu_dyn_inst.hh 1391 void 1392 execAtomic(GPUDynInstPtr gpuDynInst) override 1393 { 1394 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 1395 1396 typedef typename MemDataType::CType c0; 1397 1398 c0 *d = &((c0*) gpuDynInst->d_data)[0]; 1399 c0 *e = &((c0*) gpuDynInst->a_data)[0]; 1400 c0 *f = &((c0*) gpuDynInst->x_data)[0]; 1401 1402 for (int i = 0; i < VSZ; ++i) { 1403 if (gpuDynInst->exec_mask[i]) { 1404 Addr vaddr = gpuDynInst->addr[i]; 1405 1406 if (isLocalMem()) { 1407 Wavefront *wavefront = gpuDynInst->wavefront(); 1408 *d = wavefront->ldsChunk->read<c0>(vaddr); 1409 1410 switch (this->opType) { 1411 case Enums::MO_AADD: 1412 case Enums::MO_ANRADD: 1413 wavefront->ldsChunk->write<c0>(vaddr, 1414 wavefront->ldsChunk->read<c0>(vaddr) + (*e)); 1415 break; 1416 case Enums::MO_ASUB: 1417 case Enums::MO_ANRSUB: 1418 wavefront->ldsChunk->write<c0>(vaddr, 1419 wavefront->ldsChunk->read<c0>(vaddr) - (*e)); 1420 break; 1421 case Enums::MO_AMAX: 1422 case Enums::MO_ANRMAX: 1423 wavefront->ldsChunk->write<c0>(vaddr, 1424 std::max(wavefront->ldsChunk->read<c0>(vaddr), 1425 (*e))); 1426 break; 1427 case Enums::MO_AMIN: 1428 case Enums::MO_ANRMIN: 1429 wavefront->ldsChunk->write<c0>(vaddr, 1430 std::min(wavefront->ldsChunk->read<c0>(vaddr), 1431 (*e))); 1432 break; 1433 case Enums::MO_AAND: 1434 case Enums::MO_ANRAND: 1435 wavefront->ldsChunk->write<c0>(vaddr, 1436 wavefront->ldsChunk->read<c0>(vaddr) & (*e)); 1437 break; 1438 case Enums::MO_AOR: 1439 case Enums::MO_ANROR: 1440 wavefront->ldsChunk->write<c0>(vaddr, 1441 wavefront->ldsChunk->read<c0>(vaddr) | (*e)); 1442 break; 1443 case Enums::MO_AXOR: 1444 case Enums::MO_ANRXOR: 1445 wavefront->ldsChunk->write<c0>(vaddr, 1446 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e)); 1447 break; 1448 case Enums::MO_AINC: 1449 case Enums::MO_ANRINC: 1450 wavefront->ldsChunk->write<c0>(vaddr, 1451 wavefront->ldsChunk->read<c0>(vaddr) + 1); 1452 break; 1453 case Enums::MO_ADEC: 1454 case Enums::MO_ANRDEC: 1455 wavefront->ldsChunk->write<c0>(vaddr, 1456 wavefront->ldsChunk->read<c0>(vaddr) - 1); 1457 break; 1458 case Enums::MO_AEXCH: 1459 case Enums::MO_ANREXCH: 1460 wavefront->ldsChunk->write<c0>(vaddr, (*e)); 1461 break; 1462 case Enums::MO_ACAS: 1463 case Enums::MO_ANRCAS: 1464 wavefront->ldsChunk->write<c0>(vaddr, 1465 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ? 1466 (*f) : wavefront->ldsChunk->read<c0>(vaddr)); 1467 break; 1468 default: 1469 fatal("Unrecognized or invalid HSAIL atomic op " 1470 "type.\n"); 1471 break; 1472 } 1473 } else { 1474 Request *req = 1475 new Request(0, vaddr, sizeof(c0), 0, 1476 gpuDynInst->computeUnit()->masterId(), 1477 0, gpuDynInst->wfDynId, i, 1478 gpuDynInst->makeAtomicOpFunctor<c0>(e, 1479 f, this->opType)); 1480 1481 gpuDynInst->setRequestFlags(req); 1482 PacketPtr pkt = new Packet(req, MemCmd::SwapReq); 1483 pkt->dataStatic(d); 1484 1485 if (gpuDynInst->computeUnit()->shader-> 1486 separate_acquire_release && 1487 (gpuDynInst->memoryOrder == 1488 Enums::MEMORY_ORDER_SC_ACQUIRE)) { 1489 // if this atomic has acquire semantics, 1490 // schedule the continuation to perform an 1491 // acquire after the RMW completes 1492 gpuDynInst->execContinuation = 1493 &GPUStaticInst::execAtomicAcq; 1494 1495 gpuDynInst->useContinuation = true; 1496 } else { 1497 // the request will be finished when the RMW completes 1498 gpuDynInst->useContinuation = false; 1499 } 1500 // translation is performed in sendRequest() 1501 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i, 1502 pkt); 1503 } 1504 } 1505 1506 ++d; 1507 ++e; 1508 ++f; 1509 } 1510 1511 gpuDynInst->updateStats(); 1512 } 1513 1514 // execAtomicACq will always be called through a continuation. 1515 // see comment for execContinuation in gpu_dyn_inst.hh 1516 void 1517 execAtomicAcq(GPUDynInstPtr gpuDynInst) override 1518 { 1519 // after performing the RMW, check to see if this instruction 1520 // has acquire semantics, and if so, issue an acquire 1521 if (!isLocalMem()) { 1522 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 1523 && gpuDynInst->memoryOrder == 1524 Enums::MEMORY_ORDER_SC_ACQUIRE) { 1525 gpuDynInst->statusBitVector = VectorMask(1); 1526 1527 // the request will be finished when 1528 // the acquire completes 1529 gpuDynInst->useContinuation = false; 1530 // create request 1531 Request *req = new Request(0, 0, 0, 0, 1532 gpuDynInst->computeUnit()->masterId(), 1533 0, gpuDynInst->wfDynId, -1); 1534 req->setFlags(Request::ACQUIRE); 1535 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 1536 } 1537 } 1538 } 1539 }; 1540 1541 template<typename DataType, typename AddrOperandType, int NumSrcOperands> 1542 GPUStaticInst* 1543 constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) 1544 { 1545 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1546 1547 if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) { 1548 return decodeLd<DataType>(ib, obj); 1549 } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) { 1550 switch (ib->type) { 1551 case Brig::BRIG_TYPE_B8: 1552 return decodeSt<S8,S8>(ib, obj); 1553 case Brig::BRIG_TYPE_B16: 1554 return decodeSt<S8,S16>(ib, obj); 1555 case Brig::BRIG_TYPE_B32: 1556 return decodeSt<S8,S32>(ib, obj); 1557 case Brig::BRIG_TYPE_B64: 1558 return decodeSt<S8,S64>(ib, obj); 1559 default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type); 1560 } 1561 } else { 1562 if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) 1563 return new AtomicInst<DataType, AddrOperandType, 1564 NumSrcOperands, false>(ib, obj, "atomicnoret"); 1565 else 1566 return new AtomicInst<DataType, AddrOperandType, 1567 NumSrcOperands, true>(ib, obj, "atomic"); 1568 } 1569 } 1570 1571 template<typename DataType, int NumSrcOperands> 1572 GPUStaticInst* 1573 decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj) 1574 { 1575 unsigned addrIndex = (Brig::BrigOpcode)ib->opcode == 1576 Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1; 1577 1578 unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex); 1579 1580 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 1581 1582 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 1583 return constructAtomic<DataType, NoRegAddrOperand, 1584 NumSrcOperands>(ib, obj); 1585 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 1586 // V2/V4 not allowed 1587 switch (tmp.regKind) { 1588 case Brig::BRIG_REGISTER_KIND_SINGLE: 1589 return constructAtomic<DataType, SRegAddrOperand, 1590 NumSrcOperands>(ib, obj); 1591 case Brig::BRIG_REGISTER_KIND_DOUBLE: 1592 return constructAtomic<DataType, DRegAddrOperand, 1593 NumSrcOperands>(ib, obj); 1594 default: 1595 fatal("Bad atomic register operand type %d\n", tmp.type); 1596 } 1597 } else { 1598 fatal("Bad atomic register operand kind %d\n", tmp.kind); 1599 } 1600 } 1601 1602 1603 template<typename DataType> 1604 GPUStaticInst* 1605 decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) 1606 { 1607 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1608 1609 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { 1610 return decodeAtomicHelper<DataType, 2>(ib, obj); 1611 } else { 1612 return decodeAtomicHelper<DataType, 1>(ib, obj); 1613 } 1614 } 1615 1616 template<typename DataType> 1617 GPUStaticInst* 1618 decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj) 1619 { 1620 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1621 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { 1622 return decodeAtomicHelper<DataType, 2>(ib, obj); 1623 } else { 1624 return decodeAtomicHelper<DataType, 1>(ib, obj); 1625 } 1626 } 1627} // namespace HsailISA 1628 1629#endif // __ARCH_HSAIL_INSTS_MEM_HH__ 1630