decl.hh revision 11699
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#ifndef __ARCH_HSAIL_INSTS_DECL_HH__ 37#define __ARCH_HSAIL_INSTS_DECL_HH__ 38 39#include <cmath> 40 41#include "arch/hsail/insts/gpu_static_inst.hh" 42#include "arch/hsail/operand.hh" 43#include "debug/HSAIL.hh" 44#include "gpu-compute/gpu_dyn_inst.hh" 45#include "gpu-compute/shader.hh" 46 47namespace HsailISA 48{ 49 template<typename _DestOperand, typename _SrcOperand> 50 class HsailOperandType 51 { 52 public: 53 typedef _DestOperand DestOperand; 54 typedef _SrcOperand SrcOperand; 55 }; 56 57 typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType; 58 typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType; 59 typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType; 60 61 // The IsBits parameter serves only to disambiguate tbhe B* types from 62 // the U* types, which otherwise would be identical (and 63 // indistinguishable). 64 template<typename _OperandType, typename _CType, Enums::MemType _memType, 65 vgpr_type _vgprType, int IsBits=0> 66 class HsailDataType 67 { 68 public: 69 typedef _OperandType OperandType; 70 typedef _CType CType; 71 static const Enums::MemType memType = _memType; 72 static const vgpr_type vgprType = _vgprType; 73 static const char *label; 74 }; 75 76 typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1; 77 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8; 78 79 typedef HsailDataType<SRegOperandType, uint16_t, 80 Enums::M_U16, VT_32, 1> B16; 81 82 typedef HsailDataType<SRegOperandType, uint32_t, 83 Enums::M_U32, VT_32, 1> B32; 84 85 typedef HsailDataType<DRegOperandType, uint64_t, 86 Enums::M_U64, VT_64, 1> B64; 87 88 typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8; 89 typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16; 90 typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32; 91 typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64; 92 93 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8; 94 typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16; 95 typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32; 96 typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64; 97 98 typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32; 99 typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64; 100 101 template<typename DestOperandType, typename SrcOperandType, 102 int NumSrcOperands> 103 class CommonInstBase : public HsailGPUStaticInst 104 { 105 protected: 106 typename DestOperandType::DestOperand dest; 107 typename SrcOperandType::SrcOperand src[NumSrcOperands]; 108 109 void 110 generateDisassembly() 111 { 112 disassembly = csprintf("%s%s %s", opcode, opcode_suffix(), 113 dest.disassemble()); 114 115 for (int i = 0; i < NumSrcOperands; ++i) { 116 disassembly += ","; 117 disassembly += src[i].disassemble(); 118 } 119 } 120 121 virtual std::string opcode_suffix() = 0; 122 123 public: 124 CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 125 const char *opcode) 126 : HsailGPUStaticInst(obj, opcode) 127 { 128 setFlag(ALU); 129 130 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 131 132 dest.init(op_offs, obj); 133 134 for (int i = 0; i < NumSrcOperands; ++i) { 135 op_offs = obj->getOperandPtr(ib->operands, i + 1); 136 src[i].init(op_offs, obj); 137 } 138 } 139 140 bool isVectorRegister(int operandIndex) { 141 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 142 if (operandIndex < NumSrcOperands) 143 return src[operandIndex].isVectorRegister(); 144 else 145 return dest.isVectorRegister(); 146 } 147 bool isCondRegister(int operandIndex) { 148 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 149 if (operandIndex < NumSrcOperands) 150 return src[operandIndex].isCondRegister(); 151 else 152 return dest.isCondRegister(); 153 } 154 bool isScalarRegister(int operandIndex) { 155 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 156 if (operandIndex < NumSrcOperands) 157 return src[operandIndex].isScalarRegister(); 158 else 159 return dest.isScalarRegister(); 160 } 161 bool isSrcOperand(int operandIndex) { 162 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 163 if (operandIndex < NumSrcOperands) 164 return true; 165 return false; 166 } 167 168 bool isDstOperand(int operandIndex) { 169 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 170 if (operandIndex >= NumSrcOperands) 171 return true; 172 return false; 173 } 174 int getOperandSize(int operandIndex) { 175 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 176 if (operandIndex < NumSrcOperands) 177 return src[operandIndex].opSize(); 178 else 179 return dest.opSize(); 180 } 181 int 182 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 183 { 184 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 185 186 if (operandIndex < NumSrcOperands) 187 return src[operandIndex].regIndex(); 188 else 189 return dest.regIndex(); 190 } 191 int numSrcRegOperands() { 192 int operands = 0; 193 for (int i = 0; i < NumSrcOperands; i++) { 194 if (src[i].isVectorRegister()) { 195 operands++; 196 } 197 } 198 return operands; 199 } 200 int numDstRegOperands() { return dest.isVectorRegister(); } 201 int getNumOperands() { return NumSrcOperands + 1; } 202 }; 203 204 template<typename DataType, int NumSrcOperands> 205 class ArithInst : public CommonInstBase<typename DataType::OperandType, 206 typename DataType::OperandType, 207 NumSrcOperands> 208 { 209 public: 210 std::string opcode_suffix() { return csprintf("_%s", DataType::label); } 211 212 ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 213 const char *opcode) 214 : CommonInstBase<typename DataType::OperandType, 215 typename DataType::OperandType, 216 NumSrcOperands>(ib, obj, opcode) 217 { 218 } 219 }; 220 221 template<typename DestOperandType, typename Src0OperandType, 222 typename Src1OperandType, typename Src2OperandType> 223 class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst 224 { 225 protected: 226 typename DestOperandType::DestOperand dest; 227 typename Src0OperandType::SrcOperand src0; 228 typename Src1OperandType::SrcOperand src1; 229 typename Src2OperandType::SrcOperand src2; 230 231 void 232 generateDisassembly() 233 { 234 disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(), 235 src0.disassemble(), src1.disassemble(), 236 src2.disassemble()); 237 } 238 239 public: 240 ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib, 241 const BrigObject *obj, 242 const char *opcode) 243 : HsailGPUStaticInst(obj, opcode) 244 { 245 setFlag(ALU); 246 247 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 248 dest.init(op_offs, obj); 249 250 op_offs = obj->getOperandPtr(ib->operands, 1); 251 src0.init(op_offs, obj); 252 253 op_offs = obj->getOperandPtr(ib->operands, 2); 254 src1.init(op_offs, obj); 255 256 op_offs = obj->getOperandPtr(ib->operands, 3); 257 src2.init(op_offs, obj); 258 } 259 260 bool isVectorRegister(int operandIndex) { 261 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 262 if (!operandIndex) 263 return src0.isVectorRegister(); 264 else if (operandIndex == 1) 265 return src1.isVectorRegister(); 266 else if (operandIndex == 2) 267 return src2.isVectorRegister(); 268 else 269 return dest.isVectorRegister(); 270 } 271 bool isCondRegister(int operandIndex) { 272 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 273 if (!operandIndex) 274 return src0.isCondRegister(); 275 else if (operandIndex == 1) 276 return src1.isCondRegister(); 277 else if (operandIndex == 2) 278 return src2.isCondRegister(); 279 else 280 return dest.isCondRegister(); 281 } 282 bool isScalarRegister(int operandIndex) { 283 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 284 if (!operandIndex) 285 return src0.isScalarRegister(); 286 else if (operandIndex == 1) 287 return src1.isScalarRegister(); 288 else if (operandIndex == 2) 289 return src2.isScalarRegister(); 290 else 291 return dest.isScalarRegister(); 292 } 293 bool isSrcOperand(int operandIndex) { 294 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 295 if (operandIndex < 3) 296 return true; 297 else 298 return false; 299 } 300 bool isDstOperand(int operandIndex) { 301 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 302 if (operandIndex >= 3) 303 return true; 304 else 305 return false; 306 } 307 int getOperandSize(int operandIndex) { 308 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 309 if (!operandIndex) 310 return src0.opSize(); 311 else if (operandIndex == 1) 312 return src1.opSize(); 313 else if (operandIndex == 2) 314 return src2.opSize(); 315 else 316 return dest.opSize(); 317 } 318 319 int 320 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 321 { 322 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 323 if (!operandIndex) 324 return src0.regIndex(); 325 else if (operandIndex == 1) 326 return src1.regIndex(); 327 else if (operandIndex == 2) 328 return src2.regIndex(); 329 else 330 return dest.regIndex(); 331 } 332 333 int numSrcRegOperands() { 334 int operands = 0; 335 if (src0.isVectorRegister()) { 336 operands++; 337 } 338 if (src1.isVectorRegister()) { 339 operands++; 340 } 341 if (src2.isVectorRegister()) { 342 operands++; 343 } 344 return operands; 345 } 346 int numDstRegOperands() { return dest.isVectorRegister(); } 347 int getNumOperands() { return 4; } 348 }; 349 350 template<typename DestDataType, typename Src0DataType, 351 typename Src1DataType, typename Src2DataType> 352 class ThreeNonUniformSourceInst : 353 public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType, 354 typename Src0DataType::OperandType, 355 typename Src1DataType::OperandType, 356 typename Src2DataType::OperandType> 357 { 358 public: 359 typedef typename DestDataType::CType DestCType; 360 typedef typename Src0DataType::CType Src0CType; 361 typedef typename Src1DataType::CType Src1CType; 362 typedef typename Src2DataType::CType Src2CType; 363 364 ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib, 365 const BrigObject *obj, const char *opcode) 366 : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType, 367 typename Src0DataType::OperandType, 368 typename Src1DataType::OperandType, 369 typename Src2DataType::OperandType>(ib, 370 obj, opcode) 371 { 372 } 373 }; 374 375 template<typename DataType> 376 class CmovInst : public ThreeNonUniformSourceInst<DataType, B1, 377 DataType, DataType> 378 { 379 public: 380 CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 381 const char *opcode) 382 : ThreeNonUniformSourceInst<DataType, B1, DataType, 383 DataType>(ib, obj, opcode) 384 { 385 } 386 }; 387 388 template<typename DataType> 389 class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType, 390 DataType, U32, 391 U32> 392 { 393 public: 394 ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 395 const char *opcode) 396 : ThreeNonUniformSourceInst<DataType, DataType, U32, 397 U32>(ib, obj, opcode) 398 { 399 } 400 }; 401 402 template<typename DestOperandType, typename Src0OperandType, 403 typename Src1OperandType> 404 class TwoNonUniformSourceInstBase : public HsailGPUStaticInst 405 { 406 protected: 407 typename DestOperandType::DestOperand dest; 408 typename Src0OperandType::SrcOperand src0; 409 typename Src1OperandType::SrcOperand src1; 410 411 void 412 generateDisassembly() 413 { 414 disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(), 415 src0.disassemble(), src1.disassemble()); 416 } 417 418 419 public: 420 TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib, 421 const BrigObject *obj, const char *opcode) 422 : HsailGPUStaticInst(obj, opcode) 423 { 424 setFlag(ALU); 425 426 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 427 dest.init(op_offs, obj); 428 429 op_offs = obj->getOperandPtr(ib->operands, 1); 430 src0.init(op_offs, obj); 431 432 op_offs = obj->getOperandPtr(ib->operands, 2); 433 src1.init(op_offs, obj); 434 } 435 bool isVectorRegister(int operandIndex) { 436 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 437 if (!operandIndex) 438 return src0.isVectorRegister(); 439 else if (operandIndex == 1) 440 return src1.isVectorRegister(); 441 else 442 return dest.isVectorRegister(); 443 } 444 bool isCondRegister(int operandIndex) { 445 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 446 if (!operandIndex) 447 return src0.isCondRegister(); 448 else if (operandIndex == 1) 449 return src1.isCondRegister(); 450 else 451 return dest.isCondRegister(); 452 } 453 bool isScalarRegister(int operandIndex) { 454 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 455 if (!operandIndex) 456 return src0.isScalarRegister(); 457 else if (operandIndex == 1) 458 return src1.isScalarRegister(); 459 else 460 return dest.isScalarRegister(); 461 } 462 bool isSrcOperand(int operandIndex) { 463 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 464 if (operandIndex < 2) 465 return true; 466 else 467 return false; 468 } 469 bool isDstOperand(int operandIndex) { 470 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 471 if (operandIndex >= 2) 472 return true; 473 else 474 return false; 475 } 476 int getOperandSize(int operandIndex) { 477 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 478 if (!operandIndex) 479 return src0.opSize(); 480 else if (operandIndex == 1) 481 return src1.opSize(); 482 else 483 return dest.opSize(); 484 } 485 486 int 487 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 488 { 489 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 490 if (!operandIndex) 491 return src0.regIndex(); 492 else if (operandIndex == 1) 493 return src1.regIndex(); 494 else 495 return dest.regIndex(); 496 } 497 498 int numSrcRegOperands() { 499 int operands = 0; 500 if (src0.isVectorRegister()) { 501 operands++; 502 } 503 if (src1.isVectorRegister()) { 504 operands++; 505 } 506 return operands; 507 } 508 int numDstRegOperands() { return dest.isVectorRegister(); } 509 int getNumOperands() { return 3; } 510 }; 511 512 template<typename DestDataType, typename Src0DataType, 513 typename Src1DataType> 514 class TwoNonUniformSourceInst : 515 public TwoNonUniformSourceInstBase<typename DestDataType::OperandType, 516 typename Src0DataType::OperandType, 517 typename Src1DataType::OperandType> 518 { 519 public: 520 typedef typename DestDataType::CType DestCType; 521 typedef typename Src0DataType::CType Src0CType; 522 typedef typename Src1DataType::CType Src1CType; 523 524 TwoNonUniformSourceInst(const Brig::BrigInstBase *ib, 525 const BrigObject *obj, const char *opcode) 526 : TwoNonUniformSourceInstBase<typename DestDataType::OperandType, 527 typename Src0DataType::OperandType, 528 typename Src1DataType::OperandType>(ib, 529 obj, opcode) 530 { 531 } 532 }; 533 534 // helper function for ClassInst 535 template<typename T> 536 bool 537 fpclassify(T src0, uint32_t src1) 538 { 539 int fpclass = std::fpclassify(src0); 540 541 if ((src1 & 0x3) && (fpclass == FP_NAN)) { 542 return true; 543 } 544 545 if (src0 <= -0.0) { 546 if ((src1 & 0x4) && fpclass == FP_INFINITE) 547 return true; 548 if ((src1 & 0x8) && fpclass == FP_NORMAL) 549 return true; 550 if ((src1 & 0x10) && fpclass == FP_SUBNORMAL) 551 return true; 552 if ((src1 & 0x20) && fpclass == FP_ZERO) 553 return true; 554 } else { 555 if ((src1 & 0x40) && fpclass == FP_ZERO) 556 return true; 557 if ((src1 & 0x80) && fpclass == FP_SUBNORMAL) 558 return true; 559 if ((src1 & 0x100) && fpclass == FP_NORMAL) 560 return true; 561 if ((src1 & 0x200) && fpclass == FP_INFINITE) 562 return true; 563 } 564 return false; 565 } 566 567 template<typename DataType> 568 class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32> 569 { 570 public: 571 ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 572 const char *opcode) 573 : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode) 574 { 575 } 576 }; 577 578 template<typename DataType> 579 class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32> 580 { 581 public: 582 ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 583 const char *opcode) 584 : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode) 585 { 586 } 587 }; 588 589 // helper function for CmpInst 590 template<typename T> 591 bool 592 compare(T src0, T src1, Brig::BrigCompareOperation cmpOp) 593 { 594 using namespace Brig; 595 596 switch (cmpOp) { 597 case BRIG_COMPARE_EQ: 598 case BRIG_COMPARE_EQU: 599 case BRIG_COMPARE_SEQ: 600 case BRIG_COMPARE_SEQU: 601 return (src0 == src1); 602 603 case BRIG_COMPARE_NE: 604 case BRIG_COMPARE_NEU: 605 case BRIG_COMPARE_SNE: 606 case BRIG_COMPARE_SNEU: 607 return (src0 != src1); 608 609 case BRIG_COMPARE_LT: 610 case BRIG_COMPARE_LTU: 611 case BRIG_COMPARE_SLT: 612 case BRIG_COMPARE_SLTU: 613 return (src0 < src1); 614 615 case BRIG_COMPARE_LE: 616 case BRIG_COMPARE_LEU: 617 case BRIG_COMPARE_SLE: 618 case BRIG_COMPARE_SLEU: 619 return (src0 <= src1); 620 621 case BRIG_COMPARE_GT: 622 case BRIG_COMPARE_GTU: 623 case BRIG_COMPARE_SGT: 624 case BRIG_COMPARE_SGTU: 625 return (src0 > src1); 626 627 case BRIG_COMPARE_GE: 628 case BRIG_COMPARE_GEU: 629 case BRIG_COMPARE_SGE: 630 case BRIG_COMPARE_SGEU: 631 return (src0 >= src1); 632 633 case BRIG_COMPARE_NUM: 634 case BRIG_COMPARE_SNUM: 635 return (src0 == src0) || (src1 == src1); 636 637 case BRIG_COMPARE_NAN: 638 case BRIG_COMPARE_SNAN: 639 return (src0 != src0) || (src1 != src1); 640 641 default: 642 fatal("Bad cmpOp value %d\n", (int)cmpOp); 643 } 644 } 645 646 template<typename T> 647 int32_t 648 firstbit(T src0) 649 { 650 if (!src0) 651 return -1; 652 653 //handle positive and negative numbers 654 T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0); 655 656 //the starting pos is MSB 657 int pos = 8 * sizeof(T) - 1; 658 int cnt = 0; 659 660 //search the first bit set to 1 661 while (!(tmp & (1 << pos))) { 662 ++cnt; 663 --pos; 664 } 665 return cnt; 666 } 667 668 const char* cmpOpToString(Brig::BrigCompareOperation cmpOp); 669 670 template<typename DestOperandType, typename SrcOperandType> 671 class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType, 672 2> 673 { 674 protected: 675 Brig::BrigCompareOperation cmpOp; 676 677 public: 678 CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, 679 const char *_opcode) 680 : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj, 681 _opcode) 682 { 683 assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP); 684 Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib; 685 cmpOp = (Brig::BrigCompareOperation)i->compare; 686 } 687 }; 688 689 template<typename DestDataType, typename SrcDataType> 690 class CmpInst : public CmpInstBase<typename DestDataType::OperandType, 691 typename SrcDataType::OperandType> 692 { 693 public: 694 std::string 695 opcode_suffix() 696 { 697 return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp), 698 DestDataType::label, SrcDataType::label); 699 } 700 701 CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 702 const char *_opcode) 703 : CmpInstBase<typename DestDataType::OperandType, 704 typename SrcDataType::OperandType>(ib, obj, _opcode) 705 { 706 } 707 }; 708 709 template<typename DestDataType, typename SrcDataType> 710 class CvtInst : public CommonInstBase<typename DestDataType::OperandType, 711 typename SrcDataType::OperandType, 1> 712 { 713 public: 714 std::string opcode_suffix() 715 { 716 return csprintf("_%s_%s", DestDataType::label, SrcDataType::label); 717 } 718 719 CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj, 720 const char *_opcode) 721 : CommonInstBase<typename DestDataType::OperandType, 722 typename SrcDataType::OperandType, 723 1>(ib, obj, _opcode) 724 { 725 } 726 }; 727 728 class SpecialInstNoSrcNoDest : public HsailGPUStaticInst 729 { 730 public: 731 SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib, 732 const BrigObject *obj, const char *_opcode) 733 : HsailGPUStaticInst(obj, _opcode) 734 { 735 } 736 737 bool isVectorRegister(int operandIndex) { return false; } 738 bool isCondRegister(int operandIndex) { return false; } 739 bool isScalarRegister(int operandIndex) { return false; } 740 bool isSrcOperand(int operandIndex) { return false; } 741 bool isDstOperand(int operandIndex) { return false; } 742 int getOperandSize(int operandIndex) { return 0; } 743 744 int 745 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 746 { 747 return -1; 748 } 749 750 int numSrcRegOperands() { return 0; } 751 int numDstRegOperands() { return 0; } 752 int getNumOperands() { return 0; } 753 }; 754 755 template<typename DestOperandType> 756 class SpecialInstNoSrcBase : public HsailGPUStaticInst 757 { 758 protected: 759 typename DestOperandType::DestOperand dest; 760 761 void generateDisassembly() 762 { 763 disassembly = csprintf("%s %s", opcode, dest.disassemble()); 764 } 765 766 public: 767 SpecialInstNoSrcBase(const Brig::BrigInstBase *ib, 768 const BrigObject *obj, const char *_opcode) 769 : HsailGPUStaticInst(obj, _opcode) 770 { 771 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 772 dest.init(op_offs, obj); 773 } 774 775 bool isVectorRegister(int operandIndex) { 776 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 777 return dest.isVectorRegister(); 778 } 779 bool isCondRegister(int operandIndex) { 780 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 781 return dest.isCondRegister(); 782 } 783 bool isScalarRegister(int operandIndex) { 784 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 785 return dest.isScalarRegister(); 786 } 787 bool isSrcOperand(int operandIndex) { return false; } 788 bool isDstOperand(int operandIndex) { return true; } 789 int getOperandSize(int operandIndex) { 790 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 791 return dest.opSize(); 792 } 793 794 int 795 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 796 { 797 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 798 return dest.regIndex(); 799 } 800 801 int numSrcRegOperands() { return 0; } 802 int numDstRegOperands() { return dest.isVectorRegister(); } 803 int getNumOperands() { return 1; } 804 }; 805 806 template<typename DestDataType> 807 class SpecialInstNoSrc : 808 public SpecialInstNoSrcBase<typename DestDataType::OperandType> 809 { 810 public: 811 typedef typename DestDataType::CType DestCType; 812 813 SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj, 814 const char *_opcode) 815 : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj, 816 _opcode) 817 { 818 } 819 }; 820 821 template<typename DestOperandType> 822 class SpecialInst1SrcBase : public HsailGPUStaticInst 823 { 824 protected: 825 typedef int SrcCType; // used in execute() template 826 827 typename DestOperandType::DestOperand dest; 828 ImmOperand<SrcCType> src0; 829 830 void 831 generateDisassembly() 832 { 833 disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(), 834 src0.disassemble()); 835 } 836 837 public: 838 SpecialInst1SrcBase(const Brig::BrigInstBase *ib, 839 const BrigObject *obj, const char *_opcode) 840 : HsailGPUStaticInst(obj, _opcode) 841 { 842 setFlag(ALU); 843 844 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 845 dest.init(op_offs, obj); 846 847 op_offs = obj->getOperandPtr(ib->operands, 1); 848 src0.init(op_offs, obj); 849 } 850 bool isVectorRegister(int operandIndex) { 851 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 852 return dest.isVectorRegister(); 853 } 854 bool isCondRegister(int operandIndex) { 855 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 856 return dest.isCondRegister(); 857 } 858 bool isScalarRegister(int operandIndex) { 859 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 860 return dest.isScalarRegister(); 861 } 862 bool isSrcOperand(int operandIndex) { return false; } 863 bool isDstOperand(int operandIndex) { return true; } 864 int getOperandSize(int operandIndex) { 865 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 866 return dest.opSize(); 867 } 868 869 int 870 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 871 { 872 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 873 return dest.regIndex(); 874 } 875 876 int numSrcRegOperands() { return 0; } 877 int numDstRegOperands() { return dest.isVectorRegister(); } 878 int getNumOperands() { return 1; } 879 }; 880 881 template<typename DestDataType> 882 class SpecialInst1Src : 883 public SpecialInst1SrcBase<typename DestDataType::OperandType> 884 { 885 public: 886 typedef typename DestDataType::CType DestCType; 887 888 SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj, 889 const char *_opcode) 890 : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj, 891 _opcode) 892 { 893 } 894 }; 895 896 class Ret : public SpecialInstNoSrcNoDest 897 { 898 public: 899 typedef SpecialInstNoSrcNoDest Base; 900 901 Ret(const Brig::BrigInstBase *ib, const BrigObject *obj) 902 : Base(ib, obj, "ret") 903 { 904 setFlag(GPUStaticInst::Return); 905 } 906 907 void execute(GPUDynInstPtr gpuDynInst); 908 }; 909 910 class Barrier : public SpecialInstNoSrcNoDest 911 { 912 public: 913 typedef SpecialInstNoSrcNoDest Base; 914 uint8_t width; 915 916 Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj) 917 : Base(ib, obj, "barrier") 918 { 919 setFlag(GPUStaticInst::MemBarrier); 920 assert(ib->base.kind == Brig::BRIG_KIND_INST_BR); 921 width = (uint8_t)((Brig::BrigInstBr*)ib)->width; 922 } 923 924 void execute(GPUDynInstPtr gpuDynInst); 925 }; 926 927 class MemFence : public SpecialInstNoSrcNoDest 928 { 929 public: 930 typedef SpecialInstNoSrcNoDest Base; 931 932 Brig::BrigMemoryOrder memFenceMemOrder; 933 Brig::BrigMemoryScope memFenceScopeSegGroup; 934 Brig::BrigMemoryScope memFenceScopeSegGlobal; 935 Brig::BrigMemoryScope memFenceScopeSegImage; 936 937 MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj) 938 : Base(ib, obj, "memfence") 939 { 940 assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE); 941 942 memFenceScopeSegGlobal = (Brig::BrigMemoryScope) 943 ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope; 944 945 memFenceScopeSegGroup = (Brig::BrigMemoryScope) 946 ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope; 947 948 memFenceScopeSegImage = (Brig::BrigMemoryScope) 949 ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope; 950 951 memFenceMemOrder = (Brig::BrigMemoryOrder) 952 ((Brig::BrigInstMemFence*)ib)->memoryOrder; 953 954 setFlag(MemoryRef); 955 setFlag(GPUStaticInst::MemFence); 956 957 switch (memFenceMemOrder) { 958 case Brig::BRIG_MEMORY_ORDER_NONE: 959 setFlag(NoOrder); 960 break; 961 case Brig::BRIG_MEMORY_ORDER_RELAXED: 962 setFlag(RelaxedOrder); 963 break; 964 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE: 965 setFlag(Acquire); 966 break; 967 case Brig::BRIG_MEMORY_ORDER_SC_RELEASE: 968 setFlag(Release); 969 break; 970 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: 971 setFlag(AcquireRelease); 972 break; 973 default: 974 fatal("MemInst has bad BrigMemoryOrder\n"); 975 } 976 977 // set inst flags based on scopes 978 if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE && 979 memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { 980 setFlag(GPUStaticInst::GlobalSegment); 981 982 /** 983 * A memory fence that has scope for 984 * both segments will use the global 985 * segment, and be executed in the 986 * global memory pipeline, therefore, 987 * we set the segment to match the 988 * global scope only 989 */ 990 switch (memFenceScopeSegGlobal) { 991 case Brig::BRIG_MEMORY_SCOPE_NONE: 992 setFlag(NoScope); 993 break; 994 case Brig::BRIG_MEMORY_SCOPE_WORKITEM: 995 setFlag(WorkitemScope); 996 break; 997 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: 998 setFlag(WorkgroupScope); 999 break; 1000 case Brig::BRIG_MEMORY_SCOPE_AGENT: 1001 setFlag(DeviceScope); 1002 break; 1003 case Brig::BRIG_MEMORY_SCOPE_SYSTEM: 1004 setFlag(SystemScope); 1005 break; 1006 default: 1007 fatal("MemFence has bad global scope type\n"); 1008 } 1009 } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) { 1010 setFlag(GPUStaticInst::GlobalSegment); 1011 1012 switch (memFenceScopeSegGlobal) { 1013 case Brig::BRIG_MEMORY_SCOPE_NONE: 1014 setFlag(NoScope); 1015 break; 1016 case Brig::BRIG_MEMORY_SCOPE_WORKITEM: 1017 setFlag(WorkitemScope); 1018 break; 1019 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: 1020 setFlag(WorkgroupScope); 1021 break; 1022 case Brig::BRIG_MEMORY_SCOPE_AGENT: 1023 setFlag(DeviceScope); 1024 break; 1025 case Brig::BRIG_MEMORY_SCOPE_SYSTEM: 1026 setFlag(SystemScope); 1027 break; 1028 default: 1029 fatal("MemFence has bad global scope type\n"); 1030 } 1031 } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { 1032 setFlag(GPUStaticInst::GroupSegment); 1033 1034 switch (memFenceScopeSegGroup) { 1035 case Brig::BRIG_MEMORY_SCOPE_NONE: 1036 setFlag(NoScope); 1037 break; 1038 case Brig::BRIG_MEMORY_SCOPE_WORKITEM: 1039 setFlag(WorkitemScope); 1040 break; 1041 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: 1042 setFlag(WorkgroupScope); 1043 break; 1044 case Brig::BRIG_MEMORY_SCOPE_AGENT: 1045 setFlag(DeviceScope); 1046 break; 1047 case Brig::BRIG_MEMORY_SCOPE_SYSTEM: 1048 setFlag(SystemScope); 1049 break; 1050 default: 1051 fatal("MemFence has bad group scope type\n"); 1052 } 1053 } else { 1054 fatal("MemFence constructor: bad scope specifiers\n"); 1055 } 1056 } 1057 1058 void 1059 initiateAcc(GPUDynInstPtr gpuDynInst) 1060 { 1061 Wavefront *wave = gpuDynInst->wavefront(); 1062 wave->computeUnit->injectGlobalMemFence(gpuDynInst); 1063 } 1064 1065 void 1066 execute(GPUDynInstPtr gpuDynInst) 1067 { 1068 Wavefront *w = gpuDynInst->wavefront(); 1069 // 2 cases: 1070 // * memfence to a sequentially consistent memory (e.g., LDS). 1071 // These can be handled as no-ops. 1072 // * memfence to a relaxed consistency cache (e.g., Hermes, Viper, 1073 // etc.). We send a packet, tagged with the memory order and 1074 // scope, and let the GPU coalescer handle it. 1075 1076 if (isGlobalSeg()) { 1077 gpuDynInst->simdId = w->simdId; 1078 gpuDynInst->wfSlotId = w->wfSlotId; 1079 gpuDynInst->wfDynId = w->wfDynId; 1080 gpuDynInst->kern_id = w->kernId; 1081 gpuDynInst->cu_id = w->computeUnit->cu_id; 1082 1083 gpuDynInst->useContinuation = false; 1084 GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe); 1085 gmp->getGMReqFIFO().push(gpuDynInst); 1086 1087 w->wrGmReqsInPipe--; 1088 w->rdGmReqsInPipe--; 1089 w->memReqsInPipe--; 1090 w->outstandingReqs++; 1091 } else if (isGroupSeg()) { 1092 // no-op 1093 } else { 1094 fatal("MemFence execute: bad op type\n"); 1095 } 1096 } 1097 }; 1098 1099 class Call : public HsailGPUStaticInst 1100 { 1101 public: 1102 // private helper functions 1103 void calcAddr(Wavefront* w, GPUDynInstPtr m); 1104 1105 void 1106 generateDisassembly() 1107 { 1108 if (dest.disassemble() == "") { 1109 disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(), 1110 src1.disassemble()); 1111 } else { 1112 disassembly = csprintf("%s %s (%s) (%s)", opcode, 1113 src0.disassemble(), dest.disassemble(), 1114 src1.disassemble()); 1115 } 1116 } 1117 1118 bool 1119 isPseudoOp() 1120 { 1121 std::string func_name = src0.disassemble(); 1122 if (func_name.find("__gem5_hsail_op") != std::string::npos) { 1123 return true; 1124 } 1125 return false; 1126 } 1127 1128 // member variables 1129 ListOperand dest; 1130 FunctionRefOperand src0; 1131 ListOperand src1; 1132 HsailCode *func_ptr; 1133 1134 // exec function for pseudo instructions mapped on top of call opcode 1135 void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst); 1136 1137 // user-defined pseudo instructions 1138 void MagicPrintLane(Wavefront *w); 1139 void MagicPrintLane64(Wavefront *w); 1140 void MagicPrintWF32(Wavefront *w); 1141 void MagicPrintWF64(Wavefront *w); 1142 void MagicPrintWFFloat(Wavefront *w); 1143 void MagicSimBreak(Wavefront *w); 1144 void MagicPrefixSum(Wavefront *w); 1145 void MagicReduction(Wavefront *w); 1146 void MagicMaskLower(Wavefront *w); 1147 void MagicMaskUpper(Wavefront *w); 1148 void MagicJoinWFBar(Wavefront *w); 1149 void MagicWaitWFBar(Wavefront *w); 1150 void MagicPanic(Wavefront *w); 1151 1152 void MagicAtomicNRAddGlobalU32Reg(Wavefront *w, 1153 GPUDynInstPtr gpuDynInst); 1154 1155 void MagicAtomicNRAddGroupU32Reg(Wavefront *w, 1156 GPUDynInstPtr gpuDynInst); 1157 1158 void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst); 1159 1160 void MagicXactCasLd(Wavefront *w); 1161 void MagicMostSigThread(Wavefront *w); 1162 void MagicMostSigBroadcast(Wavefront *w); 1163 1164 void MagicPrintWF32ID(Wavefront *w); 1165 void MagicPrintWFID64(Wavefront *w); 1166 1167 Call(const Brig::BrigInstBase *ib, const BrigObject *obj) 1168 : HsailGPUStaticInst(obj, "call") 1169 { 1170 setFlag(ALU); 1171 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 1172 dest.init(op_offs, obj); 1173 op_offs = obj->getOperandPtr(ib->operands, 1); 1174 src0.init(op_offs, obj); 1175 1176 func_ptr = nullptr; 1177 std::string func_name = src0.disassemble(); 1178 if (!isPseudoOp()) { 1179 func_ptr = dynamic_cast<HsailCode*>(obj-> 1180 getFunction(func_name)); 1181 1182 if (!func_ptr) 1183 fatal("call::exec cannot find function: %s\n", func_name); 1184 } 1185 1186 op_offs = obj->getOperandPtr(ib->operands, 2); 1187 src1.init(op_offs, obj); 1188 } 1189 1190 bool isVectorRegister(int operandIndex) { return false; } 1191 bool isCondRegister(int operandIndex) { return false; } 1192 bool isScalarRegister(int operandIndex) { return false; } 1193 bool isSrcOperand(int operandIndex) { return false; } 1194 bool isDstOperand(int operandIndex) { return false; } 1195 int getOperandSize(int operandIndex) { return 0; } 1196 1197 int 1198 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) 1199 { 1200 return -1; 1201 } 1202 1203 void 1204 execute(GPUDynInstPtr gpuDynInst) 1205 { 1206 Wavefront *w = gpuDynInst->wavefront(); 1207 1208 std::string func_name = src0.disassemble(); 1209 if (isPseudoOp()) { 1210 execPseudoInst(w, gpuDynInst); 1211 } else { 1212 fatal("Native HSAIL functions are not yet implemented: %s\n", 1213 func_name); 1214 } 1215 } 1216 int numSrcRegOperands() { return 0; } 1217 int numDstRegOperands() { return 0; } 1218 int getNumOperands() { return 2; } 1219 }; 1220 1221 template<typename T> T heynot(T arg) { return ~arg; } 1222 template<> inline bool heynot<bool>(bool arg) { return !arg; } 1223} // namespace HsailISA 1224 1225#endif // __ARCH_HSAIL_INSTS_DECL_HH__ 1226