1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#ifndef __ARCH_HSAIL_INSTS_DECL_HH__ 37#define __ARCH_HSAIL_INSTS_DECL_HH__ 38 39#include <cmath> 40 41#include "arch/hsail/insts/gpu_static_inst.hh" 42#include "arch/hsail/operand.hh" 43#include "debug/HSAIL.hh" 44#include "gpu-compute/gpu_dyn_inst.hh" 45#include "gpu-compute/shader.hh" 46 47namespace HsailISA 48{ 49 template<typename _DestOperand, typename _SrcOperand> 50 class HsailOperandType 51 { 52 public: 53 typedef _DestOperand DestOperand; 54 typedef _SrcOperand SrcOperand; 55 }; 56 57 typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType; 58 typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType; 59 typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType; 60 61 // The IsBits parameter serves only to disambiguate tbhe B* types from 62 // the U* types, which otherwise would be identical (and 63 // indistinguishable). 64 template<typename _OperandType, typename _CType, Enums::MemType _memType, 65 vgpr_type _vgprType, int IsBits=0> 66 class HsailDataType 67 { 68 public: 69 typedef _OperandType OperandType; 70 typedef _CType CType; 71 static const Enums::MemType memType = _memType; 72 static const vgpr_type vgprType = _vgprType; 73 static const char *label; 74 }; 75 76 typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1; 77 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8; 78 79 typedef HsailDataType<SRegOperandType, uint16_t, 80 Enums::M_U16, VT_32, 1> B16; 81 82 typedef HsailDataType<SRegOperandType, uint32_t, 83 Enums::M_U32, VT_32, 1> B32; 84 85 typedef HsailDataType<DRegOperandType, uint64_t, 86 Enums::M_U64, VT_64, 1> B64; 87 88 typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8; 89 typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16; 90 typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32; 91 typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64; 92 93 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8; 94 typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16; 95 typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32; 96 typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64; 97 98 typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32; 99 typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64; 100 101 template<typename DestOperandType, typename SrcOperandType, 102 int NumSrcOperands> 103 class CommonInstBase : public HsailGPUStaticInst 104 { 105 protected: 106 typename DestOperandType::DestOperand dest; 107 typename SrcOperandType::SrcOperand src[NumSrcOperands]; 108 109 void 110 generateDisassembly() 111 { 112 disassembly = csprintf("%s%s %s", opcode, opcode_suffix(), 113 dest.disassemble()); 114 115 for (int i = 0; i < NumSrcOperands; ++i) { 116 disassembly += ","; 117 disassembly += src[i].disassemble(); 118 } 119 } 120 121 virtual std::string opcode_suffix() = 0; 122 123 public: 124 CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 125 const char *opcode) 126 : HsailGPUStaticInst(obj, opcode) 127 { 128 setFlag(ALU); 129 130 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 131 132 dest.init(op_offs, obj); 133 134 for (int i = 0; i < NumSrcOperands; ++i) { 135 op_offs = obj->getOperandPtr(ib->operands, i + 1); 136 src[i].init(op_offs, obj); 137 } 138 } 139 140 bool isVectorRegister(int operandIndex) { 141 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 142 if (operandIndex < NumSrcOperands) 143 return src[operandIndex].isVectorRegister(); 144 else 145 return dest.isVectorRegister(); 146 } 147 bool isCondRegister(int operandIndex) { 148 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 149 if (operandIndex < NumSrcOperands) 150 return src[operandIndex].isCondRegister(); 151 else 152 return dest.isCondRegister(); 153 } 154 bool isScalarRegister(int operandIndex) { 155 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 156 if (operandIndex < NumSrcOperands) 157 return src[operandIndex].isScalarRegister(); 158 else 159 return dest.isScalarRegister(); 160 } 161 bool isSrcOperand(int operandIndex) { 162 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 163 if (operandIndex < NumSrcOperands) 164 return true; 165 return false; 166 } 167 168 bool isDstOperand(int operandIndex) { 169 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 170 if (operandIndex >= NumSrcOperands) 171 return true; 172 return false; 173 } 174 int getOperandSize(int operandIndex) { 175 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 176 if (operandIndex < NumSrcOperands) 177 return src[operandIndex].opSize(); 178 else 179 return dest.opSize(); 180 } 181 int 182 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 183 { 184 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 185 186 if (operandIndex < NumSrcOperands) 187 return src[operandIndex].regIndex(); 188 else 189 return dest.regIndex(); 190 } 191 int numSrcRegOperands() { 192 int operands = 0; 193 for (int i = 0; i < NumSrcOperands; i++) { 194 if (src[i].isVectorRegister()) { 195 operands++; 196 } 197 } 198 return operands; 199 } 200 int numDstRegOperands() { return dest.isVectorRegister(); } 201 int getNumOperands() { return NumSrcOperands + 1; } 202 }; 203 204 template<typename DataType, int NumSrcOperands> 205 class ArithInst : public CommonInstBase<typename DataType::OperandType, 206 typename DataType::OperandType, 207 NumSrcOperands> 208 { 209 public: 210 std::string opcode_suffix() { return csprintf("_%s", DataType::label); } 211 212 ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 213 const char *opcode) 214 : CommonInstBase<typename DataType::OperandType, 215 typename DataType::OperandType, 216 NumSrcOperands>(ib, obj, opcode) 217 { 218 } 219 }; 220 221 template<typename DestOperandType, typename Src0OperandType, 222 typename Src1OperandType, typename Src2OperandType> 223 class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst 224 { 225 protected: 226 typename DestOperandType::DestOperand dest; 227 typename Src0OperandType::SrcOperand src0; 228 typename Src1OperandType::SrcOperand src1; 229 typename Src2OperandType::SrcOperand src2; 230 231 void 232 generateDisassembly() 233 { 234 disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(), 235 src0.disassemble(), src1.disassemble(), 236 src2.disassemble()); 237 } 238 239 public: 240 ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib, 241 const BrigObject *obj, 242 const char *opcode) 243 : HsailGPUStaticInst(obj, opcode) 244 { 245 setFlag(ALU); 246 247 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 248 dest.init(op_offs, obj); 249 250 op_offs = obj->getOperandPtr(ib->operands, 1); 251 src0.init(op_offs, obj); 252 253 op_offs = obj->getOperandPtr(ib->operands, 2); 254 src1.init(op_offs, obj); 255 256 op_offs = obj->getOperandPtr(ib->operands, 3); 257 src2.init(op_offs, obj); 258 } 259 260 bool isVectorRegister(int operandIndex) { 261 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 262 if (!operandIndex) 263 return src0.isVectorRegister(); 264 else if (operandIndex == 1) 265 return src1.isVectorRegister(); 266 else if (operandIndex == 2) 267 return src2.isVectorRegister(); 268 else 269 return dest.isVectorRegister(); 270 } 271 bool isCondRegister(int operandIndex) { 272 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 273 if (!operandIndex) 274 return src0.isCondRegister(); 275 else if (operandIndex == 1) 276 return src1.isCondRegister(); 277 else if (operandIndex == 2) 278 return src2.isCondRegister(); 279 else 280 return dest.isCondRegister(); 281 } 282 bool isScalarRegister(int operandIndex) { 283 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 284 if (!operandIndex) 285 return src0.isScalarRegister(); 286 else if (operandIndex == 1) 287 return src1.isScalarRegister(); 288 else if (operandIndex == 2) 289 return src2.isScalarRegister(); 290 else 291 return dest.isScalarRegister(); 292 } 293 bool isSrcOperand(int operandIndex) { 294 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 295 if (operandIndex < 3) 296 return true; 297 else 298 return false; 299 } 300 bool isDstOperand(int operandIndex) { 301 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 302 if (operandIndex >= 3) 303 return true; 304 else 305 return false; 306 } 307 int getOperandSize(int operandIndex) { 308 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 309 if (!operandIndex) 310 return src0.opSize(); 311 else if (operandIndex == 1) 312 return src1.opSize(); 313 else if (operandIndex == 2) 314 return src2.opSize(); 315 else 316 return dest.opSize(); 317 } 318 319 int 320 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 321 { 322 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 323 if (!operandIndex) 324 return src0.regIndex(); 325 else if (operandIndex == 1) 326 return src1.regIndex(); 327 else if (operandIndex == 2) 328 return src2.regIndex(); 329 else 330 return dest.regIndex(); 331 } 332 333 int numSrcRegOperands() { 334 int operands = 0; 335 if (src0.isVectorRegister()) { 336 operands++; 337 } 338 if (src1.isVectorRegister()) { 339 operands++; 340 } 341 if (src2.isVectorRegister()) { 342 operands++; 343 } 344 return operands; 345 } 346 int numDstRegOperands() { return dest.isVectorRegister(); } 347 int getNumOperands() { return 4; } 348 }; 349 350 template<typename DestDataType, typename Src0DataType, 351 typename Src1DataType, typename Src2DataType> 352 class ThreeNonUniformSourceInst : 353 public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType, 354 typename Src0DataType::OperandType, 355 typename Src1DataType::OperandType, 356 typename Src2DataType::OperandType> 357 { 358 public: 359 typedef typename DestDataType::CType DestCType; 360 typedef typename Src0DataType::CType Src0CType; 361 typedef typename Src1DataType::CType Src1CType; 362 typedef typename Src2DataType::CType Src2CType; 363 364 ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib, 365 const BrigObject *obj, const char *opcode) 366 : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType, 367 typename Src0DataType::OperandType, 368 typename Src1DataType::OperandType, 369 typename Src2DataType::OperandType>(ib, 370 obj, opcode) 371 { 372 } 373 }; 374 375 template<typename DataType> 376 class CmovInst : public ThreeNonUniformSourceInst<DataType, B1, 377 DataType, DataType> 378 { 379 public: 380 CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 381 const char *opcode) 382 : ThreeNonUniformSourceInst<DataType, B1, DataType, 383 DataType>(ib, obj, opcode) 384 { 385 } 386 }; 387 388 template<typename DataType> 389 class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType, 390 DataType, U32, 391 U32> 392 { 393 public: 394 ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 395 const char *opcode) 396 : ThreeNonUniformSourceInst<DataType, DataType, U32, 397 U32>(ib, obj, opcode) 398 { 399 } 400 }; 401 402 template<typename DestOperandType, typename Src0OperandType, 403 typename Src1OperandType> 404 class TwoNonUniformSourceInstBase : public HsailGPUStaticInst 405 { 406 protected: 407 typename DestOperandType::DestOperand dest; 408 typename Src0OperandType::SrcOperand src0; 409 typename Src1OperandType::SrcOperand src1; 410 411 void 412 generateDisassembly() 413 { 414 disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(), 415 src0.disassemble(), src1.disassemble()); 416 } 417 418 419 public: 420 TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib, 421 const BrigObject *obj, const char *opcode) 422 : HsailGPUStaticInst(obj, opcode) 423 { 424 setFlag(ALU); 425 426 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 427 dest.init(op_offs, obj); 428 429 op_offs = obj->getOperandPtr(ib->operands, 1); 430 src0.init(op_offs, obj); 431 432 op_offs = obj->getOperandPtr(ib->operands, 2); 433 src1.init(op_offs, obj); 434 } 435 bool isVectorRegister(int operandIndex) { 436 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 437 if (!operandIndex) 438 return src0.isVectorRegister(); 439 else if (operandIndex == 1) 440 return src1.isVectorRegister(); 441 else 442 return dest.isVectorRegister(); 443 } 444 bool isCondRegister(int operandIndex) { 445 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 446 if (!operandIndex) 447 return src0.isCondRegister(); 448 else if (operandIndex == 1) 449 return src1.isCondRegister(); 450 else 451 return dest.isCondRegister(); 452 } 453 bool isScalarRegister(int operandIndex) { 454 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 455 if (!operandIndex) 456 return src0.isScalarRegister(); 457 else if (operandIndex == 1) 458 return src1.isScalarRegister(); 459 else 460 return dest.isScalarRegister(); 461 } 462 bool isSrcOperand(int operandIndex) { 463 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 464 if (operandIndex < 2) 465 return true; 466 else 467 return false; 468 } 469 bool isDstOperand(int operandIndex) { 470 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 471 if (operandIndex >= 2) 472 return true; 473 else 474 return false; 475 } 476 int getOperandSize(int operandIndex) { 477 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 478 if (!operandIndex) 479 return src0.opSize(); 480 else if (operandIndex == 1) 481 return src1.opSize(); 482 else 483 return dest.opSize(); 484 } 485 486 int 487 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 488 { 489 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 490 if (!operandIndex) 491 return src0.regIndex(); 492 else if (operandIndex == 1) 493 return src1.regIndex(); 494 else 495 return dest.regIndex(); 496 } 497 498 int numSrcRegOperands() { 499 int operands = 0; 500 if (src0.isVectorRegister()) { 501 operands++; 502 } 503 if (src1.isVectorRegister()) { 504 operands++; 505 } 506 return operands; 507 } 508 int numDstRegOperands() { return dest.isVectorRegister(); } 509 int getNumOperands() { return 3; } 510 }; 511 512 template<typename DestDataType, typename Src0DataType, 513 typename Src1DataType> 514 class TwoNonUniformSourceInst : 515 public TwoNonUniformSourceInstBase<typename DestDataType::OperandType, 516 typename Src0DataType::OperandType, 517 typename Src1DataType::OperandType> 518 { 519 public: 520 typedef typename DestDataType::CType DestCType; 521 typedef typename Src0DataType::CType Src0CType; 522 typedef typename Src1DataType::CType Src1CType; 523 524 TwoNonUniformSourceInst(const Brig::BrigInstBase *ib, 525 const BrigObject *obj, const char *opcode) 526 : TwoNonUniformSourceInstBase<typename DestDataType::OperandType, 527 typename Src0DataType::OperandType, 528 typename Src1DataType::OperandType>(ib, 529 obj, opcode) 530 { 531 } 532 }; 533 534 // helper function for ClassInst 535 template<typename T> 536 bool 537 fpclassify(T src0, uint32_t src1) 538 { 539 int fpclass = std::fpclassify(src0); 540 541 if ((src1 & 0x3) && (fpclass == FP_NAN)) { 542 return true; 543 } 544 545 if (src0 <= -0.0) { 546 if ((src1 & 0x4) && fpclass == FP_INFINITE) 547 return true; 548 if ((src1 & 0x8) && fpclass == FP_NORMAL) 549 return true; 550 if ((src1 & 0x10) && fpclass == FP_SUBNORMAL) 551 return true; 552 if ((src1 & 0x20) && fpclass == FP_ZERO) 553 return true; 554 } else { 555 if ((src1 & 0x40) && fpclass == FP_ZERO) 556 return true; 557 if ((src1 & 0x80) && fpclass == FP_SUBNORMAL) 558 return true; 559 if ((src1 & 0x100) && fpclass == FP_NORMAL) 560 return true; 561 if ((src1 & 0x200) && fpclass == FP_INFINITE) 562 return true; 563 } 564 return false; 565 } 566 567 template<typename DataType> 568 class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32> 569 { 570 public: 571 ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 572 const char *opcode) 573 : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode) 574 { 575 } 576 }; 577 578 template<typename DataType> 579 class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32> 580 { 581 public: 582 ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 583 const char *opcode) 584 : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode) 585 { 586 } 587 }; 588 589 // helper function for CmpInst 590 template<typename T> 591 bool 592 compare(T src0, T src1, Brig::BrigCompareOperation cmpOp) 593 { 594 using namespace Brig; 595 596 switch (cmpOp) { 597 case BRIG_COMPARE_EQ: 598 case BRIG_COMPARE_EQU: 599 case BRIG_COMPARE_SEQ: 600 case BRIG_COMPARE_SEQU: 601 return (src0 == src1); 602 603 case BRIG_COMPARE_NE: 604 case BRIG_COMPARE_NEU: 605 case BRIG_COMPARE_SNE: 606 case BRIG_COMPARE_SNEU: 607 return (src0 != src1); 608 609 case BRIG_COMPARE_LT: 610 case BRIG_COMPARE_LTU: 611 case BRIG_COMPARE_SLT: 612 case BRIG_COMPARE_SLTU: 613 return (src0 < src1); 614 615 case BRIG_COMPARE_LE: 616 case BRIG_COMPARE_LEU: 617 case BRIG_COMPARE_SLE: 618 case BRIG_COMPARE_SLEU: 619 return (src0 <= src1); 620 621 case BRIG_COMPARE_GT: 622 case BRIG_COMPARE_GTU: 623 case BRIG_COMPARE_SGT: 624 case BRIG_COMPARE_SGTU: 625 return (src0 > src1); 626 627 case BRIG_COMPARE_GE: 628 case BRIG_COMPARE_GEU: 629 case BRIG_COMPARE_SGE: 630 case BRIG_COMPARE_SGEU: 631 return (src0 >= src1); 632 633 case BRIG_COMPARE_NUM: 634 case BRIG_COMPARE_SNUM: 635 return (src0 == src0) || (src1 == src1); 636 637 case BRIG_COMPARE_NAN: 638 case BRIG_COMPARE_SNAN: 639 return (src0 != src0) || (src1 != src1); 640 641 default: 642 fatal("Bad cmpOp value %d\n", (int)cmpOp); 643 } 644 } 645 646 template<typename T> 647 int32_t 648 firstbit(T src0) 649 { 650 if (!src0) 651 return -1; 652 653 //handle positive and negative numbers 654 T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0); 655 656 //the starting pos is MSB 657 int pos = 8 * sizeof(T) - 1; 658 int cnt = 0; 659 660 //search the first bit set to 1 661 while (!(tmp & (1 << pos))) { 662 ++cnt; 663 --pos; 664 } 665 return cnt; 666 } 667 668 const char* cmpOpToString(Brig::BrigCompareOperation cmpOp); 669 670 template<typename DestOperandType, typename SrcOperandType> 671 class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType, 672 2> 673 { 674 protected: 675 Brig::BrigCompareOperation cmpOp; 676 677 public: 678 CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 679 const char *_opcode) 680 : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj, 681 _opcode) 682 { 683 assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP); 684 Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib; 685 cmpOp = (Brig::BrigCompareOperation)i->compare; 686 } 687 }; 688 689 template<typename DestDataType, typename SrcDataType> 690 class CmpInst : public CmpInstBase<typename DestDataType::OperandType, 691 typename SrcDataType::OperandType> 692 { 693 public: 694 std::string 695 opcode_suffix() 696 { 697 return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp), 698 DestDataType::label, SrcDataType::label); 699 } 700 701 CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 702 const char *_opcode) 703 : CmpInstBase<typename DestDataType::OperandType, 704 typename SrcDataType::OperandType>(ib, obj, _opcode) 705 { 706 } 707 }; 708 709 template<typename DestDataType, typename SrcDataType> 710 class CvtInst : public CommonInstBase<typename DestDataType::OperandType, 711 typename SrcDataType::OperandType, 1> 712 { 713 public: 714 std::string opcode_suffix() 715 { 716 return csprintf("_%s_%s", DestDataType::label, SrcDataType::label); 717 } 718 719 CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 720 const char *_opcode) 721 : CommonInstBase<typename DestDataType::OperandType, 722 typename SrcDataType::OperandType, 723 1>(ib, obj, _opcode) 724 { 725 } 726 }; 727 728 template<typename DestDataType, typename SrcDataType> 729 class PopcountInst : 730 public CommonInstBase<typename DestDataType::OperandType, 731 typename SrcDataType::OperandType, 1> 732 { 733 public: 734 std::string opcode_suffix() 735 { 736 return csprintf("_%s_%s", DestDataType::label, SrcDataType::label); 737 } 738 739 PopcountInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 740 const char *_opcode) 741 : CommonInstBase<typename DestDataType::OperandType, 742 typename SrcDataType::OperandType, 743 1>(ib, obj, _opcode) 744 { 745 } 746 }; 747 748 class Stub : public HsailGPUStaticInst 749 { 750 public: 751 Stub(const Brig::BrigInstBase *ib, const BrigObject *obj, 752 const char *_opcode) 753 : HsailGPUStaticInst(obj, _opcode) 754 { 755 } 756 757 void generateDisassembly() override 758 { 759 disassembly = csprintf("%s", opcode); 760 } 761 762 bool isVectorRegister(int operandIndex) override { return false; } 763 bool isCondRegister(int operandIndex) override { return false; } 764 bool isScalarRegister(int operandIndex) override { return false; } 765 bool isSrcOperand(int operandIndex) override { return false; } 766 bool isDstOperand(int operandIndex) override { return false; } 767 int getOperandSize(int operandIndex) override { return 0; } 768 769 int 770 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override 771 { 772 return -1; 773 } 774 775 int numSrcRegOperands() override { return 0; } 776 int numDstRegOperands() override { return 0; } 777 int getNumOperands() override { return 0; } 778 }; 779 780 class SpecialInstNoSrcNoDest : public HsailGPUStaticInst 781 { 782 public: 783 SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib, 784 const BrigObject *obj, const char *_opcode) 785 : HsailGPUStaticInst(obj, _opcode) 786 { 787 } 788 789 bool isVectorRegister(int operandIndex) override { return false; } 790 bool isCondRegister(int operandIndex) override { return false; } 791 bool isScalarRegister(int operandIndex) override { return false; } 792 bool isSrcOperand(int operandIndex) override { return false; } 793 bool isDstOperand(int operandIndex) override { return false; } 794 int getOperandSize(int operandIndex) override { return 0; } 795 796 int 797 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override 798 { 799 return -1; 800 } 801 802 int numSrcRegOperands() override { return 0; } 803 int numDstRegOperands() override { return 0; } 804 int getNumOperands() override { return 0; } 805 }; 806 807 template<typename DestOperandType> 808 class SpecialInstNoSrcBase : public HsailGPUStaticInst 809 { 810 protected: 811 typename DestOperandType::DestOperand dest; 812 813 void generateDisassembly() 814 { 815 disassembly = csprintf("%s %s", opcode, dest.disassemble()); 816 } 817 818 public: 819 SpecialInstNoSrcBase(const Brig::BrigInstBase *ib, 820 const BrigObject *obj, const char *_opcode) 821 : HsailGPUStaticInst(obj, _opcode) 822 { 823 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 824 dest.init(op_offs, obj); 825 } 826 827 bool isVectorRegister(int operandIndex) { 828 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 829 return dest.isVectorRegister(); 830 } 831 bool isCondRegister(int operandIndex) { 832 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 833 return dest.isCondRegister(); 834 } 835 bool isScalarRegister(int operandIndex) { 836 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 837 return dest.isScalarRegister(); 838 } 839 bool isSrcOperand(int operandIndex) { return false; } 840 bool isDstOperand(int operandIndex) { return true; } 841 int getOperandSize(int operandIndex) { 842 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 843 return dest.opSize(); 844 } 845 846 int 847 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 848 { 849 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 850 return dest.regIndex(); 851 } 852 853 int numSrcRegOperands() { return 0; } 854 int numDstRegOperands() { return dest.isVectorRegister(); } 855 int getNumOperands() { return 1; } 856 }; 857 858 template<typename DestDataType> 859 class SpecialInstNoSrc : 860 public SpecialInstNoSrcBase<typename DestDataType::OperandType> 861 { 862 public: 863 typedef typename DestDataType::CType DestCType; 864 865 SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj, 866 const char *_opcode) 867 : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj, 868 _opcode) 869 { 870 } 871 }; 872 873 template<typename DestOperandType> 874 class SpecialInst1SrcBase : public HsailGPUStaticInst 875 { 876 protected: 877 typedef int SrcCType; // used in execute() template 878 879 typename DestOperandType::DestOperand dest; 880 ImmOperand<SrcCType> src0; 881 882 void 883 generateDisassembly() 884 { 885 disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(), 886 src0.disassemble()); 887 } 888 889 public: 890 SpecialInst1SrcBase(const Brig::BrigInstBase *ib, 891 const BrigObject *obj, const char *_opcode) 892 : HsailGPUStaticInst(obj, _opcode) 893 { 894 setFlag(ALU); 895 896 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 897 dest.init(op_offs, obj); 898 899 op_offs = obj->getOperandPtr(ib->operands, 1); 900 src0.init(op_offs, obj); 901 } 902 bool isVectorRegister(int operandIndex) { 903 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 904 return dest.isVectorRegister(); 905 } 906 bool isCondRegister(int operandIndex) { 907 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 908 return dest.isCondRegister(); 909 } 910 bool isScalarRegister(int operandIndex) { 911 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 912 return dest.isScalarRegister(); 913 } 914 bool isSrcOperand(int operandIndex) { return false; } 915 bool isDstOperand(int operandIndex) { return true; } 916 int getOperandSize(int operandIndex) { 917 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 918 return dest.opSize(); 919 } 920 921 int 922 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 923 { 924 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 925 return dest.regIndex(); 926 } 927 928 int numSrcRegOperands() { return 0; } 929 int numDstRegOperands() { return dest.isVectorRegister(); } 930 int getNumOperands() { return 1; } 931 }; 932 933 template<typename DestDataType> 934 class SpecialInst1Src : 935 public SpecialInst1SrcBase<typename DestDataType::OperandType> 936 { 937 public: 938 typedef typename DestDataType::CType DestCType; 939 940 SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj, 941 const char *_opcode) 942 : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj, 943 _opcode) 944 { 945 } 946 }; 947 948 class Ret : public SpecialInstNoSrcNoDest 949 { 950 public: 951 typedef SpecialInstNoSrcNoDest Base; 952 953 Ret(const Brig::BrigInstBase *ib, const BrigObject *obj) 954 : Base(ib, obj, "ret") 955 { 956 setFlag(GPUStaticInst::Return); 957 } 958 959 void execute(GPUDynInstPtr gpuDynInst); 960 }; 961 962 class Barrier : public SpecialInstNoSrcNoDest 963 { 964 public: 965 typedef SpecialInstNoSrcNoDest Base; 966 uint8_t width; 967 968 Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj) 969 : Base(ib, obj, "barrier") 970 { 971 setFlag(GPUStaticInst::MemBarrier); 972 assert(ib->base.kind == Brig::BRIG_KIND_INST_BR); 973 width = (uint8_t)((Brig::BrigInstBr*)ib)->width; 974 } 975 976 void execute(GPUDynInstPtr gpuDynInst); 977 }; 978 979 class MemFence : public SpecialInstNoSrcNoDest 980 { 981 public: 982 typedef SpecialInstNoSrcNoDest Base; 983 984 Brig::BrigMemoryOrder memFenceMemOrder; 985 Brig::BrigMemoryScope memFenceScopeSegGroup; 986 Brig::BrigMemoryScope memFenceScopeSegGlobal; 987 Brig::BrigMemoryScope memFenceScopeSegImage; 988 989 MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj) 990 : Base(ib, obj, "memfence") 991 { 992 assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE); 993 994 memFenceScopeSegGlobal = (Brig::BrigMemoryScope) 995 ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope; 996 997 memFenceScopeSegGroup = (Brig::BrigMemoryScope) 998 ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope; 999 1000 memFenceScopeSegImage = (Brig::BrigMemoryScope) 1001 ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope; 1002 1003 memFenceMemOrder = (Brig::BrigMemoryOrder) 1004 ((Brig::BrigInstMemFence*)ib)->memoryOrder; 1005 1006 setFlag(MemoryRef); 1007 setFlag(GPUStaticInst::MemFence); 1008 1009 switch (memFenceMemOrder) { 1010 case Brig::BRIG_MEMORY_ORDER_NONE: 1011 setFlag(NoOrder); 1012 break; 1013 case Brig::BRIG_MEMORY_ORDER_RELAXED: 1014 setFlag(RelaxedOrder); 1015 break; 1016 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE: 1017 setFlag(Acquire); 1018 break; 1019 case Brig::BRIG_MEMORY_ORDER_SC_RELEASE: 1020 setFlag(Release); 1021 break; 1022 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: 1023 setFlag(AcquireRelease); 1024 break; 1025 default: 1026 fatal("MemInst has bad BrigMemoryOrder\n"); 1027 } 1028 1029 // set inst flags based on scopes 1030 if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE && 1031 memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { 1032 setFlag(GPUStaticInst::GlobalSegment); 1033 1034 /** 1035 * A memory fence that has scope for 1036 * both segments will use the global 1037 * segment, and be executed in the 1038 * global memory pipeline, therefore, 1039 * we set the segment to match the 1040 * global scope only 1041 */ 1042 switch (memFenceScopeSegGlobal) { 1043 case Brig::BRIG_MEMORY_SCOPE_NONE: 1044 setFlag(NoScope); 1045 break; 1046 case Brig::BRIG_MEMORY_SCOPE_WORKITEM: 1047 setFlag(WorkitemScope); 1048 break; 1049 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: 1050 setFlag(WorkgroupScope); 1051 break; 1052 case Brig::BRIG_MEMORY_SCOPE_AGENT: 1053 setFlag(DeviceScope); 1054 break; 1055 case Brig::BRIG_MEMORY_SCOPE_SYSTEM: 1056 setFlag(SystemScope); 1057 break; 1058 default: 1059 fatal("MemFence has bad global scope type\n"); 1060 } 1061 } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) { 1062 setFlag(GPUStaticInst::GlobalSegment); 1063 1064 switch (memFenceScopeSegGlobal) { 1065 case Brig::BRIG_MEMORY_SCOPE_NONE: 1066 setFlag(NoScope); 1067 break; 1068 case Brig::BRIG_MEMORY_SCOPE_WORKITEM: 1069 setFlag(WorkitemScope); 1070 break; 1071 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: 1072 setFlag(WorkgroupScope); 1073 break; 1074 case Brig::BRIG_MEMORY_SCOPE_AGENT: 1075 setFlag(DeviceScope); 1076 break; 1077 case Brig::BRIG_MEMORY_SCOPE_SYSTEM: 1078 setFlag(SystemScope); 1079 break; 1080 default: 1081 fatal("MemFence has bad global scope type\n"); 1082 } 1083 } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { 1084 setFlag(GPUStaticInst::GroupSegment); 1085 1086 switch (memFenceScopeSegGroup) { 1087 case Brig::BRIG_MEMORY_SCOPE_NONE: 1088 setFlag(NoScope); 1089 break; 1090 case Brig::BRIG_MEMORY_SCOPE_WORKITEM: 1091 setFlag(WorkitemScope); 1092 break; 1093 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: 1094 setFlag(WorkgroupScope); 1095 break; 1096 case Brig::BRIG_MEMORY_SCOPE_AGENT: 1097 setFlag(DeviceScope); 1098 break; 1099 case Brig::BRIG_MEMORY_SCOPE_SYSTEM: 1100 setFlag(SystemScope); 1101 break; 1102 default: 1103 fatal("MemFence has bad group scope type\n"); 1104 } 1105 } else { 1106 fatal("MemFence constructor: bad scope specifiers\n"); 1107 } 1108 } 1109 1110 void 1111 initiateAcc(GPUDynInstPtr gpuDynInst) 1112 { 1113 Wavefront *wave = gpuDynInst->wavefront(); 1114 wave->computeUnit->injectGlobalMemFence(gpuDynInst); 1115 } 1116 1117 void 1118 execute(GPUDynInstPtr gpuDynInst) 1119 { 1120 Wavefront *w = gpuDynInst->wavefront(); 1121 // 2 cases: 1122 // * memfence to a sequentially consistent memory (e.g., LDS). 1123 // These can be handled as no-ops. 1124 // * memfence to a relaxed consistency cache (e.g., Hermes, Viper, 1125 // etc.). We send a packet, tagged with the memory order and 1126 // scope, and let the GPU coalescer handle it. 1127 1128 if (isGlobalSeg()) { 1129 gpuDynInst->simdId = w->simdId; 1130 gpuDynInst->wfSlotId = w->wfSlotId; 1131 gpuDynInst->wfDynId = w->wfDynId; 1132 gpuDynInst->kern_id = w->kernId; 1133 gpuDynInst->cu_id = w->computeUnit->cu_id; 1134 1135 gpuDynInst->useContinuation = false; 1136 GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe); 1137 gmp->issueRequest(gpuDynInst); 1138 1139 w->wrGmReqsInPipe--; 1140 w->rdGmReqsInPipe--; 1141 w->memReqsInPipe--; 1142 w->outstandingReqs++; 1143 } else if (isGroupSeg()) { 1144 // no-op 1145 } else { 1146 fatal("MemFence execute: bad op type\n"); 1147 } 1148 } 1149 }; 1150 1151 class Call : public HsailGPUStaticInst 1152 { 1153 public: 1154 // private helper functions 1155 void calcAddr(Wavefront* w, GPUDynInstPtr m); 1156 1157 void 1158 generateDisassembly() 1159 { 1160 if (dest.disassemble() == "") { 1161 disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(), 1162 src1.disassemble()); 1163 } else { 1164 disassembly = csprintf("%s %s (%s) (%s)", opcode, 1165 src0.disassemble(), dest.disassemble(), 1166 src1.disassemble()); 1167 } 1168 } 1169 1170 bool 1171 isPseudoOp() 1172 { 1173 std::string func_name = src0.disassemble(); 1174 if (func_name.find("__gem5_hsail_op") != std::string::npos) { 1175 return true; 1176 } 1177 return false; 1178 } 1179 1180 // member variables 1181 ListOperand dest; 1182 FunctionRefOperand src0; 1183 ListOperand src1; 1184 HsailCode *func_ptr; 1185 1186 // exec function for pseudo instructions mapped on top of call opcode 1187 void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst); 1188 1189 // user-defined pseudo instructions 1190 void MagicPrintLane(Wavefront *w); 1191 void MagicPrintLane64(Wavefront *w); 1192 void MagicPrintWF32(Wavefront *w); 1193 void MagicPrintWF64(Wavefront *w); 1194 void MagicPrintWFFloat(Wavefront *w); 1195 void MagicSimBreak(Wavefront *w); 1196 void MagicPrefixSum(Wavefront *w); 1197 void MagicReduction(Wavefront *w); 1198 void MagicMaskLower(Wavefront *w); 1199 void MagicMaskUpper(Wavefront *w); 1200 void MagicJoinWFBar(Wavefront *w); 1201 void MagicWaitWFBar(Wavefront *w); 1202 void MagicPanic(Wavefront *w); 1203 1204 void MagicAtomicNRAddGlobalU32Reg(Wavefront *w, 1205 GPUDynInstPtr gpuDynInst); 1206 1207 void MagicAtomicNRAddGroupU32Reg(Wavefront *w, 1208 GPUDynInstPtr gpuDynInst); 1209 1210 void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst); 1211 1212 void MagicXactCasLd(Wavefront *w); 1213 void MagicMostSigThread(Wavefront *w); 1214 void MagicMostSigBroadcast(Wavefront *w); 1215 1216 void MagicPrintWF32ID(Wavefront *w); 1217 void MagicPrintWFID64(Wavefront *w); 1218 1219 Call(const Brig::BrigInstBase *ib, const BrigObject *obj) 1220 : HsailGPUStaticInst(obj, "call") 1221 { 1222 setFlag(ALU); 1223 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1224 dest.init(op_offs, obj); 1225 op_offs = obj->getOperandPtr(ib->operands, 1); 1226 src0.init(op_offs, obj); 1227 1228 func_ptr = nullptr; 1229 std::string func_name = src0.disassemble(); 1230 if (!isPseudoOp()) { 1231 func_ptr = dynamic_cast<HsailCode*>(obj-> 1232 getFunction(func_name)); 1233 1234 if (!func_ptr) 1235 fatal("call::exec cannot find function: %s\n", func_name); 1236 } 1237 1238 op_offs = obj->getOperandPtr(ib->operands, 2); 1239 src1.init(op_offs, obj); 1240 } 1241 1242 bool isVectorRegister(int operandIndex) { return false; } 1243 bool isCondRegister(int operandIndex) { return false; } 1244 bool isScalarRegister(int operandIndex) { return false; } 1245 bool isSrcOperand(int operandIndex) { return false; } 1246 bool isDstOperand(int operandIndex) { return false; } 1247 int getOperandSize(int operandIndex) { return 0; } 1248 1249 int 1250 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 1251 { 1252 return -1; 1253 } 1254 1255 void 1256 execute(GPUDynInstPtr gpuDynInst) 1257 { 1258 Wavefront *w = gpuDynInst->wavefront(); 1259 1260 std::string func_name = src0.disassemble(); 1261 if (isPseudoOp()) { 1262 execPseudoInst(w, gpuDynInst); 1263 } else { 1264 fatal("Native HSAIL functions are not yet implemented: %s\n", 1265 func_name); 1266 } 1267 } 1268 int numSrcRegOperands() { return 0; } 1269 int numDstRegOperands() { return 0; } 1270 int getNumOperands() { return 2; } 1271 }; 1272 1273 template<typename T> T heynot(T arg) { return ~arg; } 1274 template<> inline bool heynot<bool>(bool arg) { return !arg; } 1275 1276 1277 /* Explicitly declare template static member variables to avoid 1278 * warnings in some clang versions 1279 */ 1280 template<> const char *B1::label; 1281 template<> const char *B8::label; 1282 template<> const char *B16::label; 1283 template<> const char *B32::label; 1284 template<> const char *B64::label; 1285 template<> const char *S8::label; 1286 template<> const char *S16::label; 1287 template<> const char *S32::label; 1288 template<> const char *S64::label; 1289 template<> const char *U8::label; 1290 template<> const char *U16::label; 1291 template<> const char *U32::label; 1292 template<> const char *U64::label; 1293 template<> const char *F32::label; 1294 template<> const char *F64::label; 1295 1296} // namespace HsailISA 1297 1298#endif // __ARCH_HSAIL_INSTS_DECL_HH__ 1299