mem.hh revision 11692:e772fdcd3809
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#ifndef __ARCH_HSAIL_INSTS_MEM_HH__ 37#define __ARCH_HSAIL_INSTS_MEM_HH__ 38 39#include "arch/hsail/insts/decl.hh" 40#include "arch/hsail/insts/gpu_static_inst.hh" 41#include "arch/hsail/operand.hh" 42 43namespace HsailISA 44{ 45 class MemInst 46 { 47 public: 48 MemInst() : size(0), addr_operand(nullptr) { } 49 50 MemInst(Enums::MemType m_type) 51 { 52 if (m_type == Enums::M_U64 || 53 m_type == Enums::M_S64 || 54 m_type == Enums::M_F64) { 55 size = 8; 56 } else if (m_type == Enums::M_U32 || 57 m_type == Enums::M_S32 || 58 m_type == Enums::M_F32) { 59 size = 4; 60 } else if (m_type == Enums::M_U16 || 61 m_type == Enums::M_S16 || 62 m_type == Enums::M_F16) { 63 size = 2; 64 } else { 65 size = 1; 66 } 67 68 addr_operand = nullptr; 69 } 70 71 void 72 init_addr(AddrOperandBase *_addr_operand) 73 { 74 addr_operand = _addr_operand; 75 } 76 77 private: 78 int size; 79 AddrOperandBase *addr_operand; 80 81 public: 82 int getMemOperandSize() { return size; } 83 AddrOperandBase *getAddressOperand() { return addr_operand; } 84 }; 85 86 template<typename DestOperandType, typename AddrOperandType> 87 class LdaInstBase : public HsailGPUStaticInst 88 { 89 public: 90 typename DestOperandType::DestOperand dest; 91 AddrOperandType addr; 92 93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 94 const char *_opcode) 95 : HsailGPUStaticInst(obj, _opcode) 96 { 97 using namespace Brig; 98 99 setFlag(ALU); 100 101 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 102 dest.init(op_offs, obj); 103 op_offs = obj->getOperandPtr(ib->operands, 1); 104 addr.init(op_offs, obj); 105 } 106 107 int numSrcRegOperands() override 108 { return(this->addr.isVectorRegister()); } 109 int numDstRegOperands() override 110 { return dest.isVectorRegister(); } 111 bool isVectorRegister(int operandIndex) override 112 { 113 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 114 return((operandIndex == 0) ? dest.isVectorRegister() : 115 this->addr.isVectorRegister()); 116 } 117 bool isCondRegister(int operandIndex) override 118 { 119 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 120 return((operandIndex == 0) ? dest.isCondRegister() : 121 this->addr.isCondRegister()); 122 } 123 bool isScalarRegister(int operandIndex) override 124 { 125 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 126 return((operandIndex == 0) ? dest.isScalarRegister() : 127 this->addr.isScalarRegister()); 128 } 129 bool isSrcOperand(int operandIndex) override 130 { 131 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 132 if (operandIndex > 0) 133 return(this->addr.isVectorRegister()); 134 return false; 135 } 136 bool isDstOperand(int operandIndex) override { 137 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 138 return(operandIndex == 0); 139 } 140 int getOperandSize(int operandIndex) override 141 { 142 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 143 return((operandIndex == 0) ? dest.opSize() : 144 this->addr.opSize()); 145 } 146 int getRegisterIndex(int operandIndex) override 147 { 148 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 149 return((operandIndex == 0) ? dest.regIndex() : 150 this->addr.regIndex()); 151 } 152 int getNumOperands() override 153 { 154 if (this->addr.isVectorRegister()) 155 return 2; 156 return 1; 157 } 158 }; 159 160 template<typename DestDataType, typename AddrOperandType> 161 class LdaInst : 162 public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>, 163 public MemInst 164 { 165 public: 166 void generateDisassembly(); 167 168 LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 169 const char *_opcode) 170 : LdaInstBase<typename DestDataType::OperandType, 171 AddrOperandType>(ib, obj, _opcode) 172 { 173 init_addr(&this->addr); 174 } 175 176 void execute(GPUDynInstPtr gpuDynInst); 177 }; 178 179 template<typename DataType> 180 GPUStaticInst* 181 decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj) 182 { 183 unsigned op_offs = obj->getOperandPtr(ib->operands, 1); 184 BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj); 185 186 if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 187 return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas"); 188 } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 189 // V2/V4 not allowed 190 switch (regDataType.regKind) { 191 case Brig::BRIG_REGISTER_KIND_SINGLE: 192 return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas"); 193 case Brig::BRIG_REGISTER_KIND_DOUBLE: 194 return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas"); 195 default: 196 fatal("Bad ldas register operand type %d\n", regDataType.type); 197 } 198 } else { 199 fatal("Bad ldas register operand kind %d\n", regDataType.kind); 200 } 201 } 202 203 template<typename MemOperandType, typename DestOperandType, 204 typename AddrOperandType> 205 class LdInstBase : public HsailGPUStaticInst 206 { 207 public: 208 Brig::BrigWidth8_t width; 209 typename DestOperandType::DestOperand dest; 210 AddrOperandType addr; 211 212 Brig::BrigSegment segment; 213 Brig::BrigMemoryOrder memoryOrder; 214 Brig::BrigMemoryScope memoryScope; 215 unsigned int equivClass; 216 217 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 218 const char *_opcode) 219 : HsailGPUStaticInst(obj, _opcode) 220 { 221 using namespace Brig; 222 223 setFlag(MemoryRef); 224 setFlag(Load); 225 226 if (ib->opcode == BRIG_OPCODE_LD) { 227 const BrigInstMem *ldst = (const BrigInstMem*)ib; 228 229 segment = (BrigSegment)ldst->segment; 230 memoryOrder = BRIG_MEMORY_ORDER_NONE; 231 memoryScope = BRIG_MEMORY_SCOPE_NONE; 232 equivClass = ldst->equivClass; 233 234 width = ldst->width; 235 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 236 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 237 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 238 dest.init(op_offs, obj); 239 240 op_offs = obj->getOperandPtr(ib->operands, 1); 241 addr.init(op_offs, obj); 242 } else { 243 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 244 245 segment = (BrigSegment)at->segment; 246 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 247 memoryScope = (BrigMemoryScope)at->memoryScope; 248 equivClass = 0; 249 250 width = BRIG_WIDTH_1; 251 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 252 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 253 254 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) 255 dest.init(op_offs, obj); 256 257 op_offs = obj->getOperandPtr(ib->operands,1); 258 addr.init(op_offs, obj); 259 } 260 261 switch (memoryOrder) { 262 case BRIG_MEMORY_ORDER_NONE: 263 setFlag(NoOrder); 264 break; 265 case BRIG_MEMORY_ORDER_RELAXED: 266 setFlag(RelaxedOrder); 267 break; 268 case BRIG_MEMORY_ORDER_SC_ACQUIRE: 269 setFlag(Acquire); 270 break; 271 case BRIG_MEMORY_ORDER_SC_RELEASE: 272 setFlag(Release); 273 break; 274 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: 275 setFlag(AcquireRelease); 276 break; 277 default: 278 fatal("LdInst has bad memory order type\n"); 279 } 280 281 switch (memoryScope) { 282 case BRIG_MEMORY_SCOPE_NONE: 283 setFlag(NoScope); 284 break; 285 case BRIG_MEMORY_SCOPE_WORKITEM: 286 setFlag(WorkitemScope); 287 break; 288 case BRIG_MEMORY_SCOPE_WORKGROUP: 289 setFlag(WorkgroupScope); 290 break; 291 case BRIG_MEMORY_SCOPE_AGENT: 292 setFlag(DeviceScope); 293 break; 294 case BRIG_MEMORY_SCOPE_SYSTEM: 295 setFlag(SystemScope); 296 break; 297 default: 298 fatal("LdInst has bad memory scope type\n"); 299 } 300 301 switch (segment) { 302 case BRIG_SEGMENT_GLOBAL: 303 setFlag(GlobalSegment); 304 break; 305 case BRIG_SEGMENT_GROUP: 306 setFlag(GroupSegment); 307 break; 308 case BRIG_SEGMENT_PRIVATE: 309 setFlag(PrivateSegment); 310 break; 311 case BRIG_SEGMENT_READONLY: 312 setFlag(ReadOnlySegment); 313 break; 314 case BRIG_SEGMENT_SPILL: 315 setFlag(SpillSegment); 316 break; 317 case BRIG_SEGMENT_FLAT: 318 setFlag(Flat); 319 break; 320 case BRIG_SEGMENT_KERNARG: 321 setFlag(KernArgSegment); 322 break; 323 case BRIG_SEGMENT_ARG: 324 setFlag(ArgSegment); 325 break; 326 default: 327 panic("Ld: segment %d not supported\n", segment); 328 } 329 } 330 331 int numSrcRegOperands() override 332 { return(this->addr.isVectorRegister()); } 333 int numDstRegOperands() override { return dest.isVectorRegister(); } 334 int getNumOperands() override 335 { 336 if (this->addr.isVectorRegister()) 337 return 2; 338 else 339 return 1; 340 } 341 bool isVectorRegister(int operandIndex) override 342 { 343 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 344 return((operandIndex == 0) ? dest.isVectorRegister() : 345 this->addr.isVectorRegister()); 346 } 347 bool isCondRegister(int operandIndex) override 348 { 349 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 350 return((operandIndex == 0) ? dest.isCondRegister() : 351 this->addr.isCondRegister()); 352 } 353 bool isScalarRegister(int operandIndex) override 354 { 355 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 356 return((operandIndex == 0) ? dest.isScalarRegister() : 357 this->addr.isScalarRegister()); 358 } 359 bool isSrcOperand(int operandIndex) override 360 { 361 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 362 if (operandIndex > 0) 363 return(this->addr.isVectorRegister()); 364 return false; 365 } 366 bool isDstOperand(int operandIndex) override 367 { 368 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 369 return(operandIndex == 0); 370 } 371 int getOperandSize(int operandIndex) override 372 { 373 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 374 return((operandIndex == 0) ? dest.opSize() : 375 this->addr.opSize()); 376 } 377 int getRegisterIndex(int operandIndex) override 378 { 379 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 380 return((operandIndex == 0) ? dest.regIndex() : 381 this->addr.regIndex()); 382 } 383 }; 384 385 template<typename MemDataType, typename DestDataType, 386 typename AddrOperandType> 387 class LdInst : 388 public LdInstBase<typename MemDataType::CType, 389 typename DestDataType::OperandType, AddrOperandType>, 390 public MemInst 391 { 392 typename DestDataType::OperandType::DestOperand dest_vect[4]; 393 uint16_t num_dest_operands; 394 void generateDisassembly() override; 395 396 public: 397 LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 398 const char *_opcode) 399 : LdInstBase<typename MemDataType::CType, 400 typename DestDataType::OperandType, 401 AddrOperandType>(ib, obj, _opcode), 402 MemInst(MemDataType::memType) 403 { 404 init_addr(&this->addr); 405 406 unsigned op_offs = obj->getOperandPtr(ib->operands,0); 407 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); 408 409 if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 410 const Brig::BrigOperandOperandList *brigRegVecOp = 411 (const Brig::BrigOperandOperandList*)brigOp; 412 413 num_dest_operands = 414 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; 415 416 assert(num_dest_operands <= 4); 417 } else { 418 num_dest_operands = 1; 419 } 420 421 if (num_dest_operands > 1) { 422 assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 423 424 for (int i = 0; i < num_dest_operands; ++i) { 425 dest_vect[i].init_from_vect(op_offs, obj, i); 426 } 427 } 428 } 429 430 void 431 initiateAcc(GPUDynInstPtr gpuDynInst) override 432 { 433 typedef typename MemDataType::CType c0; 434 435 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 436 437 if (num_dest_operands > 1) { 438 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) 439 if (gpuDynInst->exec_mask[i]) 440 gpuDynInst->statusVector.push_back(num_dest_operands); 441 else 442 gpuDynInst->statusVector.push_back(0); 443 } 444 445 for (int k = 0; k < num_dest_operands; ++k) { 446 447 c0 *d = &((c0*)gpuDynInst->d_data) 448 [k * gpuDynInst->computeUnit()->wfSize()]; 449 450 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 451 if (gpuDynInst->exec_mask[i]) { 452 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 453 454 if (this->isLocalMem()) { 455 // load from shared memory 456 *d = gpuDynInst->wavefront()->ldsChunk-> 457 read<c0>(vaddr); 458 } else { 459 Request *req = new Request(0, vaddr, sizeof(c0), 0, 460 gpuDynInst->computeUnit()->masterId(), 461 0, gpuDynInst->wfDynId); 462 463 gpuDynInst->setRequestFlags(req); 464 PacketPtr pkt = new Packet(req, MemCmd::ReadReq); 465 pkt->dataStatic(d); 466 467 if (gpuDynInst->computeUnit()->shader-> 468 separate_acquire_release && 469 gpuDynInst->isAcquire()) { 470 // if this load has acquire semantics, 471 // set the response continuation function 472 // to perform an Acquire request 473 gpuDynInst->execContinuation = 474 &GPUStaticInst::execLdAcq; 475 476 gpuDynInst->useContinuation = true; 477 } else { 478 // the request will be finished when 479 // the load completes 480 gpuDynInst->useContinuation = false; 481 } 482 // translation is performed in sendRequest() 483 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, 484 i, pkt); 485 } 486 } 487 ++d; 488 } 489 } 490 491 gpuDynInst->updateStats(); 492 } 493 494 private: 495 void 496 execLdAcq(GPUDynInstPtr gpuDynInst) override 497 { 498 // after the load has complete and if the load has acquire 499 // semantics, issue an acquire request. 500 if (!this->isLocalMem()) { 501 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 502 && gpuDynInst->isAcquire()) { 503 gpuDynInst->statusBitVector = VectorMask(1); 504 gpuDynInst->useContinuation = false; 505 // create request 506 Request *req = new Request(0, 0, 0, 0, 507 gpuDynInst->computeUnit()->masterId(), 508 0, gpuDynInst->wfDynId); 509 req->setFlags(Request::ACQUIRE); 510 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 511 } 512 } 513 } 514 515 public: 516 bool isVectorRegister(int operandIndex) override 517 { 518 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 519 if ((num_dest_operands != getNumOperands()) && 520 (operandIndex == (getNumOperands()-1))) 521 return(this->addr.isVectorRegister()); 522 if (num_dest_operands > 1) { 523 return dest_vect[operandIndex].isVectorRegister(); 524 } 525 else if (num_dest_operands == 1) { 526 return LdInstBase<typename MemDataType::CType, 527 typename DestDataType::OperandType, 528 AddrOperandType>::dest.isVectorRegister(); 529 } 530 return false; 531 } 532 bool isCondRegister(int operandIndex) override 533 { 534 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 535 if ((num_dest_operands != getNumOperands()) && 536 (operandIndex == (getNumOperands()-1))) 537 return(this->addr.isCondRegister()); 538 if (num_dest_operands > 1) 539 return dest_vect[operandIndex].isCondRegister(); 540 else if (num_dest_operands == 1) 541 return LdInstBase<typename MemDataType::CType, 542 typename DestDataType::OperandType, 543 AddrOperandType>::dest.isCondRegister(); 544 return false; 545 } 546 bool isScalarRegister(int operandIndex) override 547 { 548 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 549 if ((num_dest_operands != getNumOperands()) && 550 (operandIndex == (getNumOperands()-1))) 551 return(this->addr.isScalarRegister()); 552 if (num_dest_operands > 1) 553 return dest_vect[operandIndex].isScalarRegister(); 554 else if (num_dest_operands == 1) 555 return LdInstBase<typename MemDataType::CType, 556 typename DestDataType::OperandType, 557 AddrOperandType>::dest.isScalarRegister(); 558 return false; 559 } 560 bool isSrcOperand(int operandIndex) override 561 { 562 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 563 if ((num_dest_operands != getNumOperands()) && 564 (operandIndex == (getNumOperands()-1))) 565 return(this->addr.isVectorRegister()); 566 return false; 567 } 568 bool isDstOperand(int operandIndex) override 569 { 570 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 571 if ((num_dest_operands != getNumOperands()) && 572 (operandIndex == (getNumOperands()-1))) 573 return false; 574 return true; 575 } 576 int getOperandSize(int operandIndex) override 577 { 578 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 579 if ((num_dest_operands != getNumOperands()) && 580 (operandIndex == (getNumOperands()-1))) 581 return(this->addr.opSize()); 582 if (num_dest_operands > 1) 583 return(dest_vect[operandIndex].opSize()); 584 else if (num_dest_operands == 1) 585 return(LdInstBase<typename MemDataType::CType, 586 typename DestDataType::OperandType, 587 AddrOperandType>::dest.opSize()); 588 return 0; 589 } 590 int getRegisterIndex(int operandIndex) override 591 { 592 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 593 if ((num_dest_operands != getNumOperands()) && 594 (operandIndex == (getNumOperands()-1))) 595 return(this->addr.regIndex()); 596 if (num_dest_operands > 1) 597 return(dest_vect[operandIndex].regIndex()); 598 else if (num_dest_operands == 1) 599 return(LdInstBase<typename MemDataType::CType, 600 typename DestDataType::OperandType, 601 AddrOperandType>::dest.regIndex()); 602 return -1; 603 } 604 int getNumOperands() override 605 { 606 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 607 return(num_dest_operands+1); 608 else 609 return(num_dest_operands); 610 } 611 void execute(GPUDynInstPtr gpuDynInst) override; 612 }; 613 614 template<typename MemDT, typename DestDT> 615 GPUStaticInst* 616 decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj) 617 { 618 unsigned op_offs = obj->getOperandPtr(ib->operands,1); 619 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 620 621 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 622 return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld"); 623 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER || 624 tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 625 switch (tmp.regKind) { 626 case Brig::BRIG_REGISTER_KIND_SINGLE: 627 return new LdInst<MemDT, DestDT, 628 SRegAddrOperand>(ib, obj, "ld"); 629 case Brig::BRIG_REGISTER_KIND_DOUBLE: 630 return new LdInst<MemDT, DestDT, 631 DRegAddrOperand>(ib, obj, "ld"); 632 default: 633 fatal("Bad ld register operand type %d\n", tmp.regKind); 634 } 635 } else { 636 fatal("Bad ld register operand kind %d\n", tmp.kind); 637 } 638 } 639 640 template<typename MemDT> 641 GPUStaticInst* 642 decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj) 643 { 644 unsigned op_offs = obj->getOperandPtr(ib->operands,0); 645 BrigRegOperandInfo dest = findRegDataType(op_offs, obj); 646 647 assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER || 648 dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 649 switch(dest.regKind) { 650 case Brig::BRIG_REGISTER_KIND_SINGLE: 651 switch (ib->type) { 652 case Brig::BRIG_TYPE_B8: 653 case Brig::BRIG_TYPE_B16: 654 case Brig::BRIG_TYPE_B32: 655 return decodeLd2<MemDT, B32>(ib, obj); 656 case Brig::BRIG_TYPE_U8: 657 case Brig::BRIG_TYPE_U16: 658 case Brig::BRIG_TYPE_U32: 659 return decodeLd2<MemDT, U32>(ib, obj); 660 case Brig::BRIG_TYPE_S8: 661 case Brig::BRIG_TYPE_S16: 662 case Brig::BRIG_TYPE_S32: 663 return decodeLd2<MemDT, S32>(ib, obj); 664 case Brig::BRIG_TYPE_F16: 665 case Brig::BRIG_TYPE_F32: 666 return decodeLd2<MemDT, U32>(ib, obj); 667 default: 668 fatal("Bad ld register operand type %d, %d\n", 669 dest.regKind, ib->type); 670 }; 671 case Brig::BRIG_REGISTER_KIND_DOUBLE: 672 switch (ib->type) { 673 case Brig::BRIG_TYPE_B64: 674 return decodeLd2<MemDT, B64>(ib, obj); 675 case Brig::BRIG_TYPE_U64: 676 return decodeLd2<MemDT, U64>(ib, obj); 677 case Brig::BRIG_TYPE_S64: 678 return decodeLd2<MemDT, S64>(ib, obj); 679 case Brig::BRIG_TYPE_F64: 680 return decodeLd2<MemDT, U64>(ib, obj); 681 default: 682 fatal("Bad ld register operand type %d, %d\n", 683 dest.regKind, ib->type); 684 }; 685 default: 686 fatal("Bad ld register operand type %d, %d\n", dest.regKind, 687 ib->type); 688 } 689 } 690 691 template<typename MemDataType, typename SrcOperandType, 692 typename AddrOperandType> 693 class StInstBase : public HsailGPUStaticInst 694 { 695 public: 696 typename SrcOperandType::SrcOperand src; 697 AddrOperandType addr; 698 699 Brig::BrigSegment segment; 700 Brig::BrigMemoryScope memoryScope; 701 Brig::BrigMemoryOrder memoryOrder; 702 unsigned int equivClass; 703 704 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 705 const char *_opcode) 706 : HsailGPUStaticInst(obj, _opcode) 707 { 708 using namespace Brig; 709 710 setFlag(MemoryRef); 711 setFlag(Store); 712 713 if (ib->opcode == BRIG_OPCODE_ST) { 714 const BrigInstMem *ldst = (const BrigInstMem*)ib; 715 716 segment = (BrigSegment)ldst->segment; 717 memoryOrder = BRIG_MEMORY_ORDER_NONE; 718 memoryScope = BRIG_MEMORY_SCOPE_NONE; 719 equivClass = ldst->equivClass; 720 721 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 722 const BrigOperand *baseOp = obj->getOperand(op_offs); 723 724 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) || 725 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) { 726 src.init(op_offs, obj); 727 } 728 729 op_offs = obj->getOperandPtr(ib->operands, 1); 730 addr.init(op_offs, obj); 731 } else { 732 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 733 734 segment = (BrigSegment)at->segment; 735 memoryScope = (BrigMemoryScope)at->memoryScope; 736 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 737 equivClass = 0; 738 739 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 740 addr.init(op_offs, obj); 741 742 op_offs = obj->getOperandPtr(ib->operands, 1); 743 src.init(op_offs, obj); 744 } 745 746 switch (memoryOrder) { 747 case BRIG_MEMORY_ORDER_NONE: 748 setFlag(NoOrder); 749 break; 750 case BRIG_MEMORY_ORDER_RELAXED: 751 setFlag(RelaxedOrder); 752 break; 753 case BRIG_MEMORY_ORDER_SC_ACQUIRE: 754 setFlag(Acquire); 755 break; 756 case BRIG_MEMORY_ORDER_SC_RELEASE: 757 setFlag(Release); 758 break; 759 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: 760 setFlag(AcquireRelease); 761 break; 762 default: 763 fatal("StInst has bad memory order type\n"); 764 } 765 766 switch (memoryScope) { 767 case BRIG_MEMORY_SCOPE_NONE: 768 setFlag(NoScope); 769 break; 770 case BRIG_MEMORY_SCOPE_WORKITEM: 771 setFlag(WorkitemScope); 772 break; 773 case BRIG_MEMORY_SCOPE_WORKGROUP: 774 setFlag(WorkgroupScope); 775 break; 776 case BRIG_MEMORY_SCOPE_AGENT: 777 setFlag(DeviceScope); 778 break; 779 case BRIG_MEMORY_SCOPE_SYSTEM: 780 setFlag(SystemScope); 781 break; 782 default: 783 fatal("StInst has bad memory scope type\n"); 784 } 785 786 switch (segment) { 787 case BRIG_SEGMENT_GLOBAL: 788 setFlag(GlobalSegment); 789 break; 790 case BRIG_SEGMENT_GROUP: 791 setFlag(GroupSegment); 792 break; 793 case BRIG_SEGMENT_PRIVATE: 794 setFlag(PrivateSegment); 795 break; 796 case BRIG_SEGMENT_READONLY: 797 setFlag(ReadOnlySegment); 798 break; 799 case BRIG_SEGMENT_SPILL: 800 setFlag(SpillSegment); 801 break; 802 case BRIG_SEGMENT_FLAT: 803 setFlag(Flat); 804 break; 805 case BRIG_SEGMENT_ARG: 806 setFlag(ArgSegment); 807 break; 808 default: 809 panic("St: segment %d not supported\n", segment); 810 } 811 } 812 813 int numDstRegOperands() override { return 0; } 814 int numSrcRegOperands() override 815 { 816 return src.isVectorRegister() + this->addr.isVectorRegister(); 817 } 818 int getNumOperands() override 819 { 820 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 821 return 2; 822 else 823 return 1; 824 } 825 bool isVectorRegister(int operandIndex) override 826 { 827 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 828 return !operandIndex ? src.isVectorRegister() : 829 this->addr.isVectorRegister(); 830 } 831 bool isCondRegister(int operandIndex) override 832 { 833 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 834 return !operandIndex ? src.isCondRegister() : 835 this->addr.isCondRegister(); 836 } 837 bool isScalarRegister(int operandIndex) override 838 { 839 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 840 return !operandIndex ? src.isScalarRegister() : 841 this->addr.isScalarRegister(); 842 } 843 bool isSrcOperand(int operandIndex) override 844 { 845 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 846 return true; 847 } 848 bool isDstOperand(int operandIndex) override { return false; } 849 int getOperandSize(int operandIndex) override 850 { 851 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 852 return !operandIndex ? src.opSize() : this->addr.opSize(); 853 } 854 int getRegisterIndex(int operandIndex) override 855 { 856 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 857 return !operandIndex ? src.regIndex() : this->addr.regIndex(); 858 } 859 }; 860 861 862 template<typename MemDataType, typename SrcDataType, 863 typename AddrOperandType> 864 class StInst : 865 public StInstBase<MemDataType, typename SrcDataType::OperandType, 866 AddrOperandType>, 867 public MemInst 868 { 869 public: 870 typename SrcDataType::OperandType::SrcOperand src_vect[4]; 871 uint16_t num_src_operands; 872 void generateDisassembly() override; 873 874 StInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 875 const char *_opcode, int srcIdx) 876 : StInstBase<MemDataType, typename SrcDataType::OperandType, 877 AddrOperandType>(ib, obj, _opcode), 878 MemInst(SrcDataType::memType) 879 { 880 init_addr(&this->addr); 881 882 BrigRegOperandInfo rinfo; 883 unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx); 884 const Brig::BrigOperand *baseOp = obj->getOperand(op_offs); 885 886 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) { 887 const Brig::BrigOperandConstantBytes *op = 888 (Brig::BrigOperandConstantBytes*)baseOp; 889 890 rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind, 891 Brig::BRIG_TYPE_NONE); 892 } else { 893 rinfo = findRegDataType(op_offs, obj); 894 } 895 896 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { 897 const Brig::BrigOperandOperandList *brigRegVecOp = 898 (const Brig::BrigOperandOperandList*)baseOp; 899 900 num_src_operands = 901 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; 902 903 assert(num_src_operands <= 4); 904 } else { 905 num_src_operands = 1; 906 } 907 908 if (num_src_operands > 1) { 909 assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); 910 911 for (int i = 0; i < num_src_operands; ++i) { 912 src_vect[i].init_from_vect(op_offs, obj, i); 913 } 914 } 915 } 916 917 void 918 initiateAcc(GPUDynInstPtr gpuDynInst) override 919 { 920 // before performing a store, check if this store has 921 // release semantics, and if so issue a release first 922 if (!this->isLocalMem()) { 923 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 924 && gpuDynInst->isRelease()) { 925 926 gpuDynInst->statusBitVector = VectorMask(1); 927 gpuDynInst->execContinuation = &GPUStaticInst::execSt; 928 gpuDynInst->useContinuation = true; 929 // create request 930 Request *req = new Request(0, 0, 0, 0, 931 gpuDynInst->computeUnit()->masterId(), 932 0, gpuDynInst->wfDynId); 933 req->setFlags(Request::RELEASE); 934 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 935 936 return; 937 } 938 } 939 940 // if there is no release semantic, perform stores immediately 941 execSt(gpuDynInst); 942 } 943 944 private: 945 // execSt may be called through a continuation 946 // if the store had release semantics. see comment for 947 // execSt in gpu_static_inst.hh 948 void 949 execSt(GPUDynInstPtr gpuDynInst) override 950 { 951 typedef typename MemDataType::CType c0; 952 953 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 954 955 if (num_src_operands > 1) { 956 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) 957 if (gpuDynInst->exec_mask[i]) 958 gpuDynInst->statusVector.push_back(num_src_operands); 959 else 960 gpuDynInst->statusVector.push_back(0); 961 } 962 963 for (int k = 0; k < num_src_operands; ++k) { 964 c0 *d = &((c0*)gpuDynInst->d_data) 965 [k * gpuDynInst->computeUnit()->wfSize()]; 966 967 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 968 if (gpuDynInst->exec_mask[i]) { 969 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); 970 971 if (this->isLocalMem()) { 972 //store to shared memory 973 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr, 974 *d); 975 } else { 976 Request *req = 977 new Request(0, vaddr, sizeof(c0), 0, 978 gpuDynInst->computeUnit()->masterId(), 979 0, gpuDynInst->wfDynId); 980 981 gpuDynInst->setRequestFlags(req); 982 PacketPtr pkt = new Packet(req, MemCmd::WriteReq); 983 pkt->dataStatic<c0>(d); 984 985 // translation is performed in sendRequest() 986 // the request will be finished when the store completes 987 gpuDynInst->useContinuation = false; 988 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, 989 i, pkt); 990 991 } 992 } 993 ++d; 994 } 995 } 996 997 gpuDynInst->updateStats(); 998 } 999 1000 public: 1001 bool isVectorRegister(int operandIndex) override 1002 { 1003 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1004 if (operandIndex == num_src_operands) 1005 return this->addr.isVectorRegister(); 1006 if (num_src_operands > 1) 1007 return src_vect[operandIndex].isVectorRegister(); 1008 else if (num_src_operands == 1) 1009 return StInstBase<MemDataType, 1010 typename SrcDataType::OperandType, 1011 AddrOperandType>::src.isVectorRegister(); 1012 return false; 1013 } 1014 bool isCondRegister(int operandIndex) override 1015 { 1016 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1017 if (operandIndex == num_src_operands) 1018 return this->addr.isCondRegister(); 1019 if (num_src_operands > 1) 1020 return src_vect[operandIndex].isCondRegister(); 1021 else if (num_src_operands == 1) 1022 return StInstBase<MemDataType, 1023 typename SrcDataType::OperandType, 1024 AddrOperandType>::src.isCondRegister(); 1025 return false; 1026 } 1027 bool isScalarRegister(int operandIndex) override 1028 { 1029 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1030 if (operandIndex == num_src_operands) 1031 return this->addr.isScalarRegister(); 1032 if (num_src_operands > 1) 1033 return src_vect[operandIndex].isScalarRegister(); 1034 else if (num_src_operands == 1) 1035 return StInstBase<MemDataType, 1036 typename SrcDataType::OperandType, 1037 AddrOperandType>::src.isScalarRegister(); 1038 return false; 1039 } 1040 bool isSrcOperand(int operandIndex) override 1041 { 1042 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1043 return true; 1044 } 1045 bool isDstOperand(int operandIndex) override { return false; } 1046 int getOperandSize(int operandIndex) override 1047 { 1048 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1049 if (operandIndex == num_src_operands) 1050 return this->addr.opSize(); 1051 if (num_src_operands > 1) 1052 return src_vect[operandIndex].opSize(); 1053 else if (num_src_operands == 1) 1054 return StInstBase<MemDataType, 1055 typename SrcDataType::OperandType, 1056 AddrOperandType>::src.opSize(); 1057 return 0; 1058 } 1059 int getRegisterIndex(int operandIndex) override 1060 { 1061 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1062 if (operandIndex == num_src_operands) 1063 return this->addr.regIndex(); 1064 if (num_src_operands > 1) 1065 return src_vect[operandIndex].regIndex(); 1066 else if (num_src_operands == 1) 1067 return StInstBase<MemDataType, 1068 typename SrcDataType::OperandType, 1069 AddrOperandType>::src.regIndex(); 1070 return -1; 1071 } 1072 int getNumOperands() override 1073 { 1074 if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) 1075 return num_src_operands + 1; 1076 else 1077 return num_src_operands; 1078 } 1079 void execute(GPUDynInstPtr gpuDynInst) override; 1080 }; 1081 1082 template<typename DataType, typename SrcDataType> 1083 GPUStaticInst* 1084 decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj) 1085 { 1086 int srcIdx = 0; 1087 int destIdx = 1; 1088 if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC || 1089 ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) { 1090 srcIdx = 1; 1091 destIdx = 0; 1092 } 1093 unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx); 1094 1095 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 1096 1097 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 1098 return new StInst<DataType, SrcDataType, 1099 NoRegAddrOperand>(ib, obj, "st", srcIdx); 1100 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 1101 // V2/V4 not allowed 1102 switch (tmp.regKind) { 1103 case Brig::BRIG_REGISTER_KIND_SINGLE: 1104 return new StInst<DataType, SrcDataType, 1105 SRegAddrOperand>(ib, obj, "st", srcIdx); 1106 case Brig::BRIG_REGISTER_KIND_DOUBLE: 1107 return new StInst<DataType, SrcDataType, 1108 DRegAddrOperand>(ib, obj, "st", srcIdx); 1109 default: 1110 fatal("Bad st register operand type %d\n", tmp.type); 1111 } 1112 } else { 1113 fatal("Bad st register operand kind %d\n", tmp.kind); 1114 } 1115 } 1116 1117 template<typename OperandType, typename AddrOperandType, int NumSrcOperands, 1118 bool HasDst> 1119 class AtomicInstBase : public HsailGPUStaticInst 1120 { 1121 public: 1122 typename OperandType::DestOperand dest; 1123 typename OperandType::SrcOperand src[NumSrcOperands]; 1124 AddrOperandType addr; 1125 1126 Brig::BrigSegment segment; 1127 Brig::BrigMemoryOrder memoryOrder; 1128 Brig::BrigAtomicOperation atomicOperation; 1129 Brig::BrigMemoryScope memoryScope; 1130 Brig::BrigOpcode opcode; 1131 1132 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 1133 const char *_opcode) 1134 : HsailGPUStaticInst(obj, _opcode) 1135 { 1136 using namespace Brig; 1137 1138 const BrigInstAtomic *at = (const BrigInstAtomic*)ib; 1139 1140 segment = (BrigSegment)at->segment; 1141 memoryScope = (BrigMemoryScope)at->memoryScope; 1142 memoryOrder = (BrigMemoryOrder)at->memoryOrder; 1143 atomicOperation = (BrigAtomicOperation)at->atomicOperation; 1144 opcode = (BrigOpcode)ib->opcode; 1145 1146 assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET || 1147 opcode == Brig::BRIG_OPCODE_ATOMIC); 1148 1149 setFlag(MemoryRef); 1150 1151 if (opcode == Brig::BRIG_OPCODE_ATOMIC) { 1152 setFlag(AtomicReturn); 1153 } else { 1154 setFlag(AtomicNoReturn); 1155 } 1156 1157 switch (memoryOrder) { 1158 case BRIG_MEMORY_ORDER_NONE: 1159 setFlag(NoOrder); 1160 break; 1161 case BRIG_MEMORY_ORDER_RELAXED: 1162 setFlag(RelaxedOrder); 1163 break; 1164 case BRIG_MEMORY_ORDER_SC_ACQUIRE: 1165 setFlag(Acquire); 1166 break; 1167 case BRIG_MEMORY_ORDER_SC_RELEASE: 1168 setFlag(Release); 1169 break; 1170 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: 1171 setFlag(AcquireRelease); 1172 break; 1173 default: 1174 fatal("AtomicInst has bad memory order type\n"); 1175 } 1176 1177 switch (memoryScope) { 1178 case BRIG_MEMORY_SCOPE_NONE: 1179 setFlag(NoScope); 1180 break; 1181 case BRIG_MEMORY_SCOPE_WORKITEM: 1182 setFlag(WorkitemScope); 1183 break; 1184 case BRIG_MEMORY_SCOPE_WORKGROUP: 1185 setFlag(WorkgroupScope); 1186 break; 1187 case BRIG_MEMORY_SCOPE_AGENT: 1188 setFlag(DeviceScope); 1189 break; 1190 case BRIG_MEMORY_SCOPE_SYSTEM: 1191 setFlag(SystemScope); 1192 break; 1193 default: 1194 fatal("AtomicInst has bad memory scope type\n"); 1195 } 1196 1197 switch (atomicOperation) { 1198 case Brig::BRIG_ATOMIC_AND: 1199 setFlag(AtomicAnd); 1200 break; 1201 case Brig::BRIG_ATOMIC_OR: 1202 setFlag(AtomicOr); 1203 break; 1204 case Brig::BRIG_ATOMIC_XOR: 1205 setFlag(AtomicXor); 1206 break; 1207 case Brig::BRIG_ATOMIC_CAS: 1208 setFlag(AtomicCAS); 1209 break; 1210 case Brig::BRIG_ATOMIC_EXCH: 1211 setFlag(AtomicExch); 1212 break; 1213 case Brig::BRIG_ATOMIC_ADD: 1214 setFlag(AtomicAdd); 1215 break; 1216 case Brig::BRIG_ATOMIC_WRAPINC: 1217 setFlag(AtomicInc); 1218 break; 1219 case Brig::BRIG_ATOMIC_WRAPDEC: 1220 setFlag(AtomicDec); 1221 break; 1222 case Brig::BRIG_ATOMIC_MIN: 1223 setFlag(AtomicMin); 1224 break; 1225 case Brig::BRIG_ATOMIC_MAX: 1226 setFlag(AtomicMax); 1227 break; 1228 case Brig::BRIG_ATOMIC_SUB: 1229 setFlag(AtomicSub); 1230 break; 1231 default: 1232 fatal("Bad BrigAtomicOperation code %d\n", atomicOperation); 1233 } 1234 1235 switch (segment) { 1236 case BRIG_SEGMENT_GLOBAL: 1237 setFlag(GlobalSegment); 1238 break; 1239 case BRIG_SEGMENT_GROUP: 1240 setFlag(GroupSegment); 1241 break; 1242 case BRIG_SEGMENT_FLAT: 1243 setFlag(Flat); 1244 break; 1245 default: 1246 panic("Atomic: segment %d not supported\n", segment); 1247 } 1248 1249 if (HasDst) { 1250 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1251 dest.init(op_offs, obj); 1252 1253 op_offs = obj->getOperandPtr(ib->operands, 1); 1254 addr.init(op_offs, obj); 1255 1256 for (int i = 0; i < NumSrcOperands; ++i) { 1257 op_offs = obj->getOperandPtr(ib->operands, i + 2); 1258 src[i].init(op_offs, obj); 1259 } 1260 } else { 1261 1262 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1263 addr.init(op_offs, obj); 1264 1265 for (int i = 0; i < NumSrcOperands; ++i) { 1266 op_offs = obj->getOperandPtr(ib->operands, i + 1); 1267 src[i].init(op_offs, obj); 1268 } 1269 } 1270 } 1271 1272 int numSrcRegOperands() 1273 { 1274 int operands = 0; 1275 for (int i = 0; i < NumSrcOperands; i++) { 1276 if (src[i].isVectorRegister()) { 1277 operands++; 1278 } 1279 } 1280 if (addr.isVectorRegister()) 1281 operands++; 1282 return operands; 1283 } 1284 int numDstRegOperands() { return dest.isVectorRegister(); } 1285 int getNumOperands() 1286 { 1287 if (addr.isVectorRegister()) 1288 return(NumSrcOperands + 2); 1289 return(NumSrcOperands + 1); 1290 } 1291 bool isVectorRegister(int operandIndex) 1292 { 1293 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1294 if (operandIndex < NumSrcOperands) 1295 return src[operandIndex].isVectorRegister(); 1296 else if (operandIndex == NumSrcOperands) 1297 return(addr.isVectorRegister()); 1298 else 1299 return dest.isVectorRegister(); 1300 } 1301 bool isCondRegister(int operandIndex) 1302 { 1303 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1304 if (operandIndex < NumSrcOperands) 1305 return src[operandIndex].isCondRegister(); 1306 else if (operandIndex == NumSrcOperands) 1307 return(addr.isCondRegister()); 1308 else 1309 return dest.isCondRegister(); 1310 } 1311 bool isScalarRegister(int operandIndex) 1312 { 1313 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1314 if (operandIndex < NumSrcOperands) 1315 return src[operandIndex].isScalarRegister(); 1316 else if (operandIndex == NumSrcOperands) 1317 return(addr.isScalarRegister()); 1318 else 1319 return dest.isScalarRegister(); 1320 } 1321 bool isSrcOperand(int operandIndex) 1322 { 1323 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1324 if (operandIndex < NumSrcOperands) 1325 return true; 1326 else if (operandIndex == NumSrcOperands) 1327 return(addr.isVectorRegister()); 1328 else 1329 return false; 1330 } 1331 bool isDstOperand(int operandIndex) 1332 { 1333 if (operandIndex <= NumSrcOperands) 1334 return false; 1335 else 1336 return true; 1337 } 1338 int getOperandSize(int operandIndex) 1339 { 1340 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1341 if (operandIndex < NumSrcOperands) 1342 return(src[operandIndex].opSize()); 1343 else if (operandIndex == NumSrcOperands) 1344 return(addr.opSize()); 1345 else 1346 return(dest.opSize()); 1347 } 1348 int getRegisterIndex(int operandIndex) 1349 { 1350 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 1351 if (operandIndex < NumSrcOperands) 1352 return(src[operandIndex].regIndex()); 1353 else if (operandIndex == NumSrcOperands) 1354 return(addr.regIndex()); 1355 else 1356 return(dest.regIndex()); 1357 return -1; 1358 } 1359 }; 1360 1361 template<typename MemDataType, typename AddrOperandType, int NumSrcOperands, 1362 bool HasDst> 1363 class AtomicInst : 1364 public AtomicInstBase<typename MemDataType::OperandType, 1365 AddrOperandType, NumSrcOperands, HasDst>, 1366 public MemInst 1367 { 1368 public: 1369 void generateDisassembly() override; 1370 1371 AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 1372 const char *_opcode) 1373 : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType, 1374 NumSrcOperands, HasDst> 1375 (ib, obj, _opcode), 1376 MemInst(MemDataType::memType) 1377 { 1378 init_addr(&this->addr); 1379 } 1380 1381 void 1382 initiateAcc(GPUDynInstPtr gpuDynInst) override 1383 { 1384 // before doing the RMW, check if this atomic has 1385 // release semantics, and if so issue a release first 1386 if (!this->isLocalMem()) { 1387 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 1388 && (gpuDynInst->isRelease() 1389 || gpuDynInst->isAcquireRelease())) { 1390 1391 gpuDynInst->statusBitVector = VectorMask(1); 1392 1393 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic; 1394 gpuDynInst->useContinuation = true; 1395 1396 // create request 1397 Request *req = new Request(0, 0, 0, 0, 1398 gpuDynInst->computeUnit()->masterId(), 1399 0, gpuDynInst->wfDynId); 1400 req->setFlags(Request::RELEASE); 1401 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 1402 1403 return; 1404 } 1405 } 1406 1407 // if there is no release semantic, execute the RMW immediately 1408 execAtomic(gpuDynInst); 1409 1410 } 1411 1412 void execute(GPUDynInstPtr gpuDynInst) override; 1413 1414 private: 1415 // execAtomic may be called through a continuation 1416 // if the RMW had release semantics. see comment for 1417 // execContinuation in gpu_dyn_inst.hh 1418 void 1419 execAtomic(GPUDynInstPtr gpuDynInst) override 1420 { 1421 gpuDynInst->statusBitVector = gpuDynInst->exec_mask; 1422 1423 typedef typename MemDataType::CType c0; 1424 1425 c0 *d = &((c0*) gpuDynInst->d_data)[0]; 1426 c0 *e = &((c0*) gpuDynInst->a_data)[0]; 1427 c0 *f = &((c0*) gpuDynInst->x_data)[0]; 1428 1429 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { 1430 if (gpuDynInst->exec_mask[i]) { 1431 Addr vaddr = gpuDynInst->addr[i]; 1432 1433 if (this->isLocalMem()) { 1434 Wavefront *wavefront = gpuDynInst->wavefront(); 1435 *d = wavefront->ldsChunk->read<c0>(vaddr); 1436 1437 if (this->isAtomicAdd()) { 1438 wavefront->ldsChunk->write<c0>(vaddr, 1439 wavefront->ldsChunk->read<c0>(vaddr) + (*e)); 1440 } else if (this->isAtomicSub()) { 1441 wavefront->ldsChunk->write<c0>(vaddr, 1442 wavefront->ldsChunk->read<c0>(vaddr) - (*e)); 1443 } else if (this->isAtomicMax()) { 1444 wavefront->ldsChunk->write<c0>(vaddr, 1445 std::max(wavefront->ldsChunk->read<c0>(vaddr), 1446 (*e))); 1447 } else if (this->isAtomicMin()) { 1448 wavefront->ldsChunk->write<c0>(vaddr, 1449 std::min(wavefront->ldsChunk->read<c0>(vaddr), 1450 (*e))); 1451 } else if (this->isAtomicAnd()) { 1452 wavefront->ldsChunk->write<c0>(vaddr, 1453 wavefront->ldsChunk->read<c0>(vaddr) & (*e)); 1454 } else if (this->isAtomicOr()) { 1455 wavefront->ldsChunk->write<c0>(vaddr, 1456 wavefront->ldsChunk->read<c0>(vaddr) | (*e)); 1457 } else if (this->isAtomicXor()) { 1458 wavefront->ldsChunk->write<c0>(vaddr, 1459 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e)); 1460 } else if (this->isAtomicInc()) { 1461 wavefront->ldsChunk->write<c0>(vaddr, 1462 wavefront->ldsChunk->read<c0>(vaddr) + 1); 1463 } else if (this->isAtomicDec()) { 1464 wavefront->ldsChunk->write<c0>(vaddr, 1465 wavefront->ldsChunk->read<c0>(vaddr) - 1); 1466 } else if (this->isAtomicExch()) { 1467 wavefront->ldsChunk->write<c0>(vaddr, (*e)); 1468 } else if (this->isAtomicCAS()) { 1469 wavefront->ldsChunk->write<c0>(vaddr, 1470 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ? 1471 (*f) : wavefront->ldsChunk->read<c0>(vaddr)); 1472 } else { 1473 fatal("Unrecognized or invalid HSAIL atomic op " 1474 "type.\n"); 1475 } 1476 } else { 1477 Request *req = 1478 new Request(0, vaddr, sizeof(c0), 0, 1479 gpuDynInst->computeUnit()->masterId(), 1480 0, gpuDynInst->wfDynId, 1481 gpuDynInst->makeAtomicOpFunctor<c0>(e, 1482 f)); 1483 1484 gpuDynInst->setRequestFlags(req); 1485 PacketPtr pkt = new Packet(req, MemCmd::SwapReq); 1486 pkt->dataStatic(d); 1487 1488 if (gpuDynInst->computeUnit()->shader-> 1489 separate_acquire_release && 1490 (gpuDynInst->isAcquire())) { 1491 // if this atomic has acquire semantics, 1492 // schedule the continuation to perform an 1493 // acquire after the RMW completes 1494 gpuDynInst->execContinuation = 1495 &GPUStaticInst::execAtomicAcq; 1496 1497 gpuDynInst->useContinuation = true; 1498 } else { 1499 // the request will be finished when the RMW completes 1500 gpuDynInst->useContinuation = false; 1501 } 1502 // translation is performed in sendRequest() 1503 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i, 1504 pkt); 1505 } 1506 } 1507 1508 ++d; 1509 ++e; 1510 ++f; 1511 } 1512 1513 gpuDynInst->updateStats(); 1514 } 1515 1516 // execAtomicACq will always be called through a continuation. 1517 // see comment for execContinuation in gpu_dyn_inst.hh 1518 void 1519 execAtomicAcq(GPUDynInstPtr gpuDynInst) override 1520 { 1521 // after performing the RMW, check to see if this instruction 1522 // has acquire semantics, and if so, issue an acquire 1523 if (!this->isLocalMem()) { 1524 if (gpuDynInst->computeUnit()->shader->separate_acquire_release 1525 && gpuDynInst->isAcquire()) { 1526 gpuDynInst->statusBitVector = VectorMask(1); 1527 1528 // the request will be finished when 1529 // the acquire completes 1530 gpuDynInst->useContinuation = false; 1531 // create request 1532 Request *req = new Request(0, 0, 0, 0, 1533 gpuDynInst->computeUnit()->masterId(), 1534 0, gpuDynInst->wfDynId); 1535 req->setFlags(Request::ACQUIRE); 1536 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); 1537 } 1538 } 1539 } 1540 }; 1541 1542 template<typename DataType, typename AddrOperandType, int NumSrcOperands> 1543 GPUStaticInst* 1544 constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) 1545 { 1546 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1547 1548 if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) { 1549 return decodeLd<DataType>(ib, obj); 1550 } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) { 1551 switch (ib->type) { 1552 case Brig::BRIG_TYPE_B8: 1553 return decodeSt<S8,S8>(ib, obj); 1554 case Brig::BRIG_TYPE_B16: 1555 return decodeSt<S16,S16>(ib, obj); 1556 case Brig::BRIG_TYPE_B32: 1557 return decodeSt<S32,S32>(ib, obj); 1558 case Brig::BRIG_TYPE_B64: 1559 return decodeSt<S64,S64>(ib, obj); 1560 default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type); 1561 } 1562 } else { 1563 if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) 1564 return new AtomicInst<DataType, AddrOperandType, 1565 NumSrcOperands, false>(ib, obj, "atomicnoret"); 1566 else 1567 return new AtomicInst<DataType, AddrOperandType, 1568 NumSrcOperands, true>(ib, obj, "atomic"); 1569 } 1570 } 1571 1572 template<typename DataType, int NumSrcOperands> 1573 GPUStaticInst* 1574 decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj) 1575 { 1576 unsigned addrIndex = (Brig::BrigOpcode)ib->opcode == 1577 Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1; 1578 1579 unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex); 1580 1581 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); 1582 1583 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { 1584 return constructAtomic<DataType, NoRegAddrOperand, 1585 NumSrcOperands>(ib, obj); 1586 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { 1587 // V2/V4 not allowed 1588 switch (tmp.regKind) { 1589 case Brig::BRIG_REGISTER_KIND_SINGLE: 1590 return constructAtomic<DataType, SRegAddrOperand, 1591 NumSrcOperands>(ib, obj); 1592 case Brig::BRIG_REGISTER_KIND_DOUBLE: 1593 return constructAtomic<DataType, DRegAddrOperand, 1594 NumSrcOperands>(ib, obj); 1595 default: 1596 fatal("Bad atomic register operand type %d\n", tmp.type); 1597 } 1598 } else { 1599 fatal("Bad atomic register operand kind %d\n", tmp.kind); 1600 } 1601 } 1602 1603 1604 template<typename DataType> 1605 GPUStaticInst* 1606 decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) 1607 { 1608 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1609 1610 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { 1611 return decodeAtomicHelper<DataType, 2>(ib, obj); 1612 } else { 1613 return decodeAtomicHelper<DataType, 1>(ib, obj); 1614 } 1615 } 1616 1617 template<typename DataType> 1618 GPUStaticInst* 1619 decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj) 1620 { 1621 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; 1622 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { 1623 return decodeAtomicHelper<DataType, 2>(ib, obj); 1624 } else { 1625 return decodeAtomicHelper<DataType, 1>(ib, obj); 1626 } 1627 } 1628} // namespace HsailISA 1629 1630#endif // __ARCH_HSAIL_INSTS_MEM_HH__ 1631