/* * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Author: Steve Reinhardt */ #ifndef __ARCH_HSAIL_INSTS_DECL_HH__ #define __ARCH_HSAIL_INSTS_DECL_HH__ #include #include "arch/hsail/insts/gpu_static_inst.hh" #include "arch/hsail/operand.hh" #include "debug/HSAIL.hh" #include "gpu-compute/gpu_dyn_inst.hh" #include "gpu-compute/shader.hh" namespace HsailISA { template class HsailOperandType { public: typedef _DestOperand DestOperand; typedef _SrcOperand SrcOperand; }; typedef HsailOperandType CRegOperandType; typedef HsailOperandType SRegOperandType; typedef HsailOperandType DRegOperandType; // The IsBits parameter serves only to disambiguate tbhe B* types from // the U* types, which otherwise would be identical (and // indistinguishable). template class HsailDataType { public: typedef _OperandType OperandType; typedef _CType CType; static const Enums::MemType memType = _memType; static const vgpr_type vgprType = _vgprType; static const char *label; }; typedef HsailDataType B1; typedef HsailDataType B8; typedef HsailDataType B16; typedef HsailDataType B32; typedef HsailDataType B64; typedef HsailDataType S8; typedef HsailDataType S16; typedef HsailDataType S32; typedef HsailDataType S64; typedef HsailDataType U8; typedef HsailDataType U16; typedef HsailDataType U32; typedef HsailDataType U64; typedef HsailDataType F32; typedef HsailDataType F64; template class CommonInstBase : public HsailGPUStaticInst { protected: typename DestOperandType::DestOperand dest; typename SrcOperandType::SrcOperand src[NumSrcOperands]; void generateDisassembly() { disassembly = csprintf("%s%s %s", opcode, opcode_suffix(), dest.disassemble()); for (int i = 0; i < NumSrcOperands; ++i) { disassembly += ","; disassembly += src[i].disassemble(); } } virtual std::string opcode_suffix() = 0; public: CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *opcode) : HsailGPUStaticInst(obj, opcode) { setFlag(ALU); unsigned op_offs = obj->getOperandPtr(ib->operands, 0); dest.init(op_offs, obj); for (int i = 0; i < NumSrcOperands; ++i) { op_offs = obj->getOperandPtr(ib->operands, i + 1); src[i].init(op_offs, obj); } } bool isVectorRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex < NumSrcOperands) return src[operandIndex].isVectorRegister(); else return dest.isVectorRegister(); } bool isCondRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex < NumSrcOperands) return src[operandIndex].isCondRegister(); else return dest.isCondRegister(); } bool isScalarRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex < NumSrcOperands) return src[operandIndex].isScalarRegister(); else return dest.isScalarRegister(); } bool isSrcOperand(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex < NumSrcOperands) return true; return false; } bool isDstOperand(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex >= NumSrcOperands) return true; return false; } int getOperandSize(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex < NumSrcOperands) return src[operandIndex].opSize(); else return dest.opSize(); } int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) { assert(operandIndex >= 0 && operandIndex < getNumOperands()); if (operandIndex < NumSrcOperands) return src[operandIndex].regIndex(); else return dest.regIndex(); } int numSrcRegOperands() { int operands = 0; for (int i = 0; i < NumSrcOperands; i++) { if (src[i].isVectorRegister()) { operands++; } } return operands; } int numDstRegOperands() { return dest.isVectorRegister(); } int getNumOperands() { return NumSrcOperands + 1; } }; template class ArithInst : public CommonInstBase { public: std::string opcode_suffix() { return csprintf("_%s", DataType::label); } ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *opcode) : CommonInstBase(ib, obj, opcode) { } }; template class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst { protected: typename DestOperandType::DestOperand dest; typename Src0OperandType::SrcOperand src0; typename Src1OperandType::SrcOperand src1; typename Src2OperandType::SrcOperand src2; void generateDisassembly() { disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(), src0.disassemble(), src1.disassemble(), src2.disassemble()); } public: ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *opcode) : HsailGPUStaticInst(obj, opcode) { setFlag(ALU); unsigned op_offs = obj->getOperandPtr(ib->operands, 0); dest.init(op_offs, obj); op_offs = obj->getOperandPtr(ib->operands, 1); src0.init(op_offs, obj); op_offs = obj->getOperandPtr(ib->operands, 2); src1.init(op_offs, obj); op_offs = obj->getOperandPtr(ib->operands, 3); src2.init(op_offs, obj); } bool isVectorRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.isVectorRegister(); else if (operandIndex == 1) return src1.isVectorRegister(); else if (operandIndex == 2) return src2.isVectorRegister(); else return dest.isVectorRegister(); } bool isCondRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.isCondRegister(); else if (operandIndex == 1) return src1.isCondRegister(); else if (operandIndex == 2) return src2.isCondRegister(); else return dest.isCondRegister(); } bool isScalarRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.isScalarRegister(); else if (operandIndex == 1) return src1.isScalarRegister(); else if (operandIndex == 2) return src2.isScalarRegister(); else return dest.isScalarRegister(); } bool isSrcOperand(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex < 3) return true; else return false; } bool isDstOperand(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex >= 3) return true; else return false; } int getOperandSize(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.opSize(); else if (operandIndex == 1) return src1.opSize(); else if (operandIndex == 2) return src2.opSize(); else return dest.opSize(); } int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.regIndex(); else if (operandIndex == 1) return src1.regIndex(); else if (operandIndex == 2) return src2.regIndex(); else return dest.regIndex(); } int numSrcRegOperands() { int operands = 0; if (src0.isVectorRegister()) { operands++; } if (src1.isVectorRegister()) { operands++; } if (src2.isVectorRegister()) { operands++; } return operands; } int numDstRegOperands() { return dest.isVectorRegister(); } int getNumOperands() { return 4; } }; template class ThreeNonUniformSourceInst : public ThreeNonUniformSourceInstBase { public: typedef typename DestDataType::CType DestCType; typedef typename Src0DataType::CType Src0CType; typedef typename Src1DataType::CType Src1CType; typedef typename Src2DataType::CType Src2CType; ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *opcode) : ThreeNonUniformSourceInstBase(ib, obj, opcode) { } }; template class CmovInst : public ThreeNonUniformSourceInst { public: CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *opcode) : ThreeNonUniformSourceInst(ib, obj, opcode) { } }; template class ExtractInsertInst : public ThreeNonUniformSourceInst { public: ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *opcode) : ThreeNonUniformSourceInst(ib, obj, opcode) { } }; template class TwoNonUniformSourceInstBase : public HsailGPUStaticInst { protected: typename DestOperandType::DestOperand dest; typename Src0OperandType::SrcOperand src0; typename Src1OperandType::SrcOperand src1; void generateDisassembly() { disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(), src0.disassemble(), src1.disassemble()); } public: TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *opcode) : HsailGPUStaticInst(obj, opcode) { setFlag(ALU); unsigned op_offs = obj->getOperandPtr(ib->operands, 0); dest.init(op_offs, obj); op_offs = obj->getOperandPtr(ib->operands, 1); src0.init(op_offs, obj); op_offs = obj->getOperandPtr(ib->operands, 2); src1.init(op_offs, obj); } bool isVectorRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.isVectorRegister(); else if (operandIndex == 1) return src1.isVectorRegister(); else return dest.isVectorRegister(); } bool isCondRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.isCondRegister(); else if (operandIndex == 1) return src1.isCondRegister(); else return dest.isCondRegister(); } bool isScalarRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.isScalarRegister(); else if (operandIndex == 1) return src1.isScalarRegister(); else return dest.isScalarRegister(); } bool isSrcOperand(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex < 2) return true; else return false; } bool isDstOperand(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (operandIndex >= 2) return true; else return false; } int getOperandSize(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.opSize(); else if (operandIndex == 1) return src1.opSize(); else return dest.opSize(); } int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); if (!operandIndex) return src0.regIndex(); else if (operandIndex == 1) return src1.regIndex(); else return dest.regIndex(); } int numSrcRegOperands() { int operands = 0; if (src0.isVectorRegister()) { operands++; } if (src1.isVectorRegister()) { operands++; } return operands; } int numDstRegOperands() { return dest.isVectorRegister(); } int getNumOperands() { return 3; } }; template class TwoNonUniformSourceInst : public TwoNonUniformSourceInstBase { public: typedef typename DestDataType::CType DestCType; typedef typename Src0DataType::CType Src0CType; typedef typename Src1DataType::CType Src1CType; TwoNonUniformSourceInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *opcode) : TwoNonUniformSourceInstBase(ib, obj, opcode) { } }; // helper function for ClassInst template bool fpclassify(T src0, uint32_t src1) { int fpclass = std::fpclassify(src0); if ((src1 & 0x3) && (fpclass == FP_NAN)) { return true; } if (src0 <= -0.0) { if ((src1 & 0x4) && fpclass == FP_INFINITE) return true; if ((src1 & 0x8) && fpclass == FP_NORMAL) return true; if ((src1 & 0x10) && fpclass == FP_SUBNORMAL) return true; if ((src1 & 0x20) && fpclass == FP_ZERO) return true; } else { if ((src1 & 0x40) && fpclass == FP_ZERO) return true; if ((src1 & 0x80) && fpclass == FP_SUBNORMAL) return true; if ((src1 & 0x100) && fpclass == FP_NORMAL) return true; if ((src1 & 0x200) && fpclass == FP_INFINITE) return true; } return false; } template class ClassInst : public TwoNonUniformSourceInst { public: ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *opcode) : TwoNonUniformSourceInst(ib, obj, opcode) { } }; template class ShiftInst : public TwoNonUniformSourceInst { public: ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *opcode) : TwoNonUniformSourceInst(ib, obj, opcode) { } }; // helper function for CmpInst template bool compare(T src0, T src1, Brig::BrigCompareOperation cmpOp) { using namespace Brig; switch (cmpOp) { case BRIG_COMPARE_EQ: case BRIG_COMPARE_EQU: case BRIG_COMPARE_SEQ: case BRIG_COMPARE_SEQU: return (src0 == src1); case BRIG_COMPARE_NE: case BRIG_COMPARE_NEU: case BRIG_COMPARE_SNE: case BRIG_COMPARE_SNEU: return (src0 != src1); case BRIG_COMPARE_LT: case BRIG_COMPARE_LTU: case BRIG_COMPARE_SLT: case BRIG_COMPARE_SLTU: return (src0 < src1); case BRIG_COMPARE_LE: case BRIG_COMPARE_LEU: case BRIG_COMPARE_SLE: case BRIG_COMPARE_SLEU: return (src0 <= src1); case BRIG_COMPARE_GT: case BRIG_COMPARE_GTU: case BRIG_COMPARE_SGT: case BRIG_COMPARE_SGTU: return (src0 > src1); case BRIG_COMPARE_GE: case BRIG_COMPARE_GEU: case BRIG_COMPARE_SGE: case BRIG_COMPARE_SGEU: return (src0 >= src1); case BRIG_COMPARE_NUM: case BRIG_COMPARE_SNUM: return (src0 == src0) || (src1 == src1); case BRIG_COMPARE_NAN: case BRIG_COMPARE_SNAN: return (src0 != src0) || (src1 != src1); default: fatal("Bad cmpOp value %d\n", (int)cmpOp); } } template int32_t firstbit(T src0) { if (!src0) return -1; //handle positive and negative numbers T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0); //the starting pos is MSB int pos = 8 * sizeof(T) - 1; int cnt = 0; //search the first bit set to 1 while (!(tmp & (1 << pos))) { ++cnt; --pos; } return cnt; } const char* cmpOpToString(Brig::BrigCompareOperation cmpOp); template class CmpInstBase : public CommonInstBase { protected: Brig::BrigCompareOperation cmpOp; public: CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode) : CommonInstBase(ib, obj, _opcode) { assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP); Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib; cmpOp = (Brig::BrigCompareOperation)i->compare; } }; template class CmpInst : public CmpInstBase { public: std::string opcode_suffix() { return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp), DestDataType::label, SrcDataType::label); } CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode) : CmpInstBase(ib, obj, _opcode) { } }; template class CvtInst : public CommonInstBase { public: std::string opcode_suffix() { return csprintf("_%s_%s", DestDataType::label, SrcDataType::label); } CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode) : CommonInstBase(ib, obj, _opcode) { } }; template class PopcountInst : public CommonInstBase { public: std::string opcode_suffix() { return csprintf("_%s_%s", DestDataType::label, SrcDataType::label); } PopcountInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode) : CommonInstBase(ib, obj, _opcode) { } }; class Stub : public HsailGPUStaticInst { public: Stub(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode) : HsailGPUStaticInst(obj, _opcode) { } void generateDisassembly() override { disassembly = csprintf("%s", opcode); } bool isVectorRegister(int operandIndex) override { return false; } bool isCondRegister(int operandIndex) override { return false; } bool isScalarRegister(int operandIndex) override { return false; } bool isSrcOperand(int operandIndex) override { return false; } bool isDstOperand(int operandIndex) override { return false; } int getOperandSize(int operandIndex) override { return 0; } int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override { return -1; } int numSrcRegOperands() override { return 0; } int numDstRegOperands() override { return 0; } int getNumOperands() override { return 0; } }; class SpecialInstNoSrcNoDest : public HsailGPUStaticInst { public: SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode) : HsailGPUStaticInst(obj, _opcode) { } bool isVectorRegister(int operandIndex) override { return false; } bool isCondRegister(int operandIndex) override { return false; } bool isScalarRegister(int operandIndex) override { return false; } bool isSrcOperand(int operandIndex) override { return false; } bool isDstOperand(int operandIndex) override { return false; } int getOperandSize(int operandIndex) override { return 0; } int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override { return -1; } int numSrcRegOperands() override { return 0; } int numDstRegOperands() override { return 0; } int getNumOperands() override { return 0; } }; template class SpecialInstNoSrcBase : public HsailGPUStaticInst { protected: typename DestOperandType::DestOperand dest; void generateDisassembly() { disassembly = csprintf("%s %s", opcode, dest.disassemble()); } public: SpecialInstNoSrcBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode) : HsailGPUStaticInst(obj, _opcode) { unsigned op_offs = obj->getOperandPtr(ib->operands, 0); dest.init(op_offs, obj); } bool isVectorRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.isVectorRegister(); } bool isCondRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.isCondRegister(); } bool isScalarRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.isScalarRegister(); } bool isSrcOperand(int operandIndex) { return false; } bool isDstOperand(int operandIndex) { return true; } int getOperandSize(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.opSize(); } int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.regIndex(); } int numSrcRegOperands() { return 0; } int numDstRegOperands() { return dest.isVectorRegister(); } int getNumOperands() { return 1; } }; template class SpecialInstNoSrc : public SpecialInstNoSrcBase { public: typedef typename DestDataType::CType DestCType; SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode) : SpecialInstNoSrcBase(ib, obj, _opcode) { } }; template class SpecialInst1SrcBase : public HsailGPUStaticInst { protected: typedef int SrcCType; // used in execute() template typename DestOperandType::DestOperand dest; ImmOperand src0; void generateDisassembly() { disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(), src0.disassemble()); } public: SpecialInst1SrcBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode) : HsailGPUStaticInst(obj, _opcode) { setFlag(ALU); unsigned op_offs = obj->getOperandPtr(ib->operands, 0); dest.init(op_offs, obj); op_offs = obj->getOperandPtr(ib->operands, 1); src0.init(op_offs, obj); } bool isVectorRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.isVectorRegister(); } bool isCondRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.isCondRegister(); } bool isScalarRegister(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.isScalarRegister(); } bool isSrcOperand(int operandIndex) { return false; } bool isDstOperand(int operandIndex) { return true; } int getOperandSize(int operandIndex) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.opSize(); } int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) { assert((operandIndex >= 0) && (operandIndex < getNumOperands())); return dest.regIndex(); } int numSrcRegOperands() { return 0; } int numDstRegOperands() { return dest.isVectorRegister(); } int getNumOperands() { return 1; } }; template class SpecialInst1Src : public SpecialInst1SrcBase { public: typedef typename DestDataType::CType DestCType; SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode) : SpecialInst1SrcBase(ib, obj, _opcode) { } }; class Ret : public SpecialInstNoSrcNoDest { public: typedef SpecialInstNoSrcNoDest Base; Ret(const Brig::BrigInstBase *ib, const BrigObject *obj) : Base(ib, obj, "ret") { setFlag(GPUStaticInst::Return); } void execute(GPUDynInstPtr gpuDynInst); }; class Barrier : public SpecialInstNoSrcNoDest { public: typedef SpecialInstNoSrcNoDest Base; uint8_t width; Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj) : Base(ib, obj, "barrier") { setFlag(GPUStaticInst::MemBarrier); assert(ib->base.kind == Brig::BRIG_KIND_INST_BR); width = (uint8_t)((Brig::BrigInstBr*)ib)->width; } void execute(GPUDynInstPtr gpuDynInst); }; class MemFence : public SpecialInstNoSrcNoDest { public: typedef SpecialInstNoSrcNoDest Base; Brig::BrigMemoryOrder memFenceMemOrder; Brig::BrigMemoryScope memFenceScopeSegGroup; Brig::BrigMemoryScope memFenceScopeSegGlobal; Brig::BrigMemoryScope memFenceScopeSegImage; MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj) : Base(ib, obj, "memfence") { assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE); memFenceScopeSegGlobal = (Brig::BrigMemoryScope) ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope; memFenceScopeSegGroup = (Brig::BrigMemoryScope) ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope; memFenceScopeSegImage = (Brig::BrigMemoryScope) ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope; memFenceMemOrder = (Brig::BrigMemoryOrder) ((Brig::BrigInstMemFence*)ib)->memoryOrder; setFlag(MemoryRef); setFlag(GPUStaticInst::MemFence); switch (memFenceMemOrder) { case Brig::BRIG_MEMORY_ORDER_NONE: setFlag(NoOrder); break; case Brig::BRIG_MEMORY_ORDER_RELAXED: setFlag(RelaxedOrder); break; case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE: setFlag(Acquire); break; case Brig::BRIG_MEMORY_ORDER_SC_RELEASE: setFlag(Release); break; case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: setFlag(AcquireRelease); break; default: fatal("MemInst has bad BrigMemoryOrder\n"); } // set inst flags based on scopes if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE && memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { setFlag(GPUStaticInst::GlobalSegment); /** * A memory fence that has scope for * both segments will use the global * segment, and be executed in the * global memory pipeline, therefore, * we set the segment to match the * global scope only */ switch (memFenceScopeSegGlobal) { case Brig::BRIG_MEMORY_SCOPE_NONE: setFlag(NoScope); break; case Brig::BRIG_MEMORY_SCOPE_WORKITEM: setFlag(WorkitemScope); break; case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: setFlag(WorkgroupScope); break; case Brig::BRIG_MEMORY_SCOPE_AGENT: setFlag(DeviceScope); break; case Brig::BRIG_MEMORY_SCOPE_SYSTEM: setFlag(SystemScope); break; default: fatal("MemFence has bad global scope type\n"); } } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) { setFlag(GPUStaticInst::GlobalSegment); switch (memFenceScopeSegGlobal) { case Brig::BRIG_MEMORY_SCOPE_NONE: setFlag(NoScope); break; case Brig::BRIG_MEMORY_SCOPE_WORKITEM: setFlag(WorkitemScope); break; case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: setFlag(WorkgroupScope); break; case Brig::BRIG_MEMORY_SCOPE_AGENT: setFlag(DeviceScope); break; case Brig::BRIG_MEMORY_SCOPE_SYSTEM: setFlag(SystemScope); break; default: fatal("MemFence has bad global scope type\n"); } } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { setFlag(GPUStaticInst::GroupSegment); switch (memFenceScopeSegGroup) { case Brig::BRIG_MEMORY_SCOPE_NONE: setFlag(NoScope); break; case Brig::BRIG_MEMORY_SCOPE_WORKITEM: setFlag(WorkitemScope); break; case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: setFlag(WorkgroupScope); break; case Brig::BRIG_MEMORY_SCOPE_AGENT: setFlag(DeviceScope); break; case Brig::BRIG_MEMORY_SCOPE_SYSTEM: setFlag(SystemScope); break; default: fatal("MemFence has bad group scope type\n"); } } else { fatal("MemFence constructor: bad scope specifiers\n"); } } void initiateAcc(GPUDynInstPtr gpuDynInst) { Wavefront *wave = gpuDynInst->wavefront(); wave->computeUnit->injectGlobalMemFence(gpuDynInst); } void execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); // 2 cases: // * memfence to a sequentially consistent memory (e.g., LDS). // These can be handled as no-ops. // * memfence to a relaxed consistency cache (e.g., Hermes, Viper, // etc.). We send a packet, tagged with the memory order and // scope, and let the GPU coalescer handle it. if (isGlobalSeg()) { gpuDynInst->simdId = w->simdId; gpuDynInst->wfSlotId = w->wfSlotId; gpuDynInst->wfDynId = w->wfDynId; gpuDynInst->kern_id = w->kernId; gpuDynInst->cu_id = w->computeUnit->cu_id; gpuDynInst->useContinuation = false; GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe); gmp->issueRequest(gpuDynInst); w->wrGmReqsInPipe--; w->rdGmReqsInPipe--; w->memReqsInPipe--; w->outstandingReqs++; } else if (isGroupSeg()) { // no-op } else { fatal("MemFence execute: bad op type\n"); } } }; class Call : public HsailGPUStaticInst { public: // private helper functions void calcAddr(Wavefront* w, GPUDynInstPtr m); void generateDisassembly() { if (dest.disassemble() == "") { disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(), src1.disassemble()); } else { disassembly = csprintf("%s %s (%s) (%s)", opcode, src0.disassemble(), dest.disassemble(), src1.disassemble()); } } bool isPseudoOp() { std::string func_name = src0.disassemble(); if (func_name.find("__gem5_hsail_op") != std::string::npos) { return true; } return false; } // member variables ListOperand dest; FunctionRefOperand src0; ListOperand src1; HsailCode *func_ptr; // exec function for pseudo instructions mapped on top of call opcode void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst); // user-defined pseudo instructions void MagicPrintLane(Wavefront *w); void MagicPrintLane64(Wavefront *w); void MagicPrintWF32(Wavefront *w); void MagicPrintWF64(Wavefront *w); void MagicPrintWFFloat(Wavefront *w); void MagicSimBreak(Wavefront *w); void MagicPrefixSum(Wavefront *w); void MagicReduction(Wavefront *w); void MagicMaskLower(Wavefront *w); void MagicMaskUpper(Wavefront *w); void MagicJoinWFBar(Wavefront *w); void MagicWaitWFBar(Wavefront *w); void MagicPanic(Wavefront *w); void MagicAtomicNRAddGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst); void MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst); void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst); void MagicXactCasLd(Wavefront *w); void MagicMostSigThread(Wavefront *w); void MagicMostSigBroadcast(Wavefront *w); void MagicPrintWF32ID(Wavefront *w); void MagicPrintWFID64(Wavefront *w); Call(const Brig::BrigInstBase *ib, const BrigObject *obj) : HsailGPUStaticInst(obj, "call") { setFlag(ALU); unsigned op_offs = obj->getOperandPtr(ib->operands, 0); dest.init(op_offs, obj); op_offs = obj->getOperandPtr(ib->operands, 1); src0.init(op_offs, obj); func_ptr = nullptr; std::string func_name = src0.disassemble(); if (!isPseudoOp()) { func_ptr = dynamic_cast(obj-> getFunction(func_name)); if (!func_ptr) fatal("call::exec cannot find function: %s\n", func_name); } op_offs = obj->getOperandPtr(ib->operands, 2); src1.init(op_offs, obj); } bool isVectorRegister(int operandIndex) { return false; } bool isCondRegister(int operandIndex) { return false; } bool isScalarRegister(int operandIndex) { return false; } bool isSrcOperand(int operandIndex) { return false; } bool isDstOperand(int operandIndex) { return false; } int getOperandSize(int operandIndex) { return 0; } int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) { return -1; } void execute(GPUDynInstPtr gpuDynInst) { Wavefront *w = gpuDynInst->wavefront(); std::string func_name = src0.disassemble(); if (isPseudoOp()) { execPseudoInst(w, gpuDynInst); } else { fatal("Native HSAIL functions are not yet implemented: %s\n", func_name); } } int numSrcRegOperands() { return 0; } int numDstRegOperands() { return 0; } int getNumOperands() { return 2; } }; template T heynot(T arg) { return ~arg; } template<> inline bool heynot(bool arg) { return !arg; } /* Explicitly declare template static member variables to avoid * warnings in some clang versions */ template<> const char *B1::label; template<> const char *B8::label; template<> const char *B16::label; template<> const char *B32::label; template<> const char *B64::label; template<> const char *S8::label; template<> const char *S16::label; template<> const char *S32::label; template<> const char *S64::label; template<> const char *U8::label; template<> const char *U16::label; template<> const char *U32::label; template<> const char *U64::label; template<> const char *F32::label; template<> const char *F64::label; } // namespace HsailISA #endif // __ARCH_HSAIL_INSTS_DECL_HH__