decl.hh revision 11699:c7453f485a5f
110234Syasuko.eckert@amd.com/*
210234Syasuko.eckert@amd.com * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
310234Syasuko.eckert@amd.com * All rights reserved.
410234Syasuko.eckert@amd.com *
510234Syasuko.eckert@amd.com * For use for simulation and test purposes only
610234Syasuko.eckert@amd.com *
710234Syasuko.eckert@amd.com * Redistribution and use in source and binary forms, with or without
810234Syasuko.eckert@amd.com * modification, are permitted provided that the following conditions are met:
910234Syasuko.eckert@amd.com *
1010234Syasuko.eckert@amd.com * 1. Redistributions of source code must retain the above copyright notice,
1110234Syasuko.eckert@amd.com * this list of conditions and the following disclaimer.
1210234Syasuko.eckert@amd.com *
1310234Syasuko.eckert@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice,
1410234Syasuko.eckert@amd.com * this list of conditions and the following disclaimer in the documentation
1510234Syasuko.eckert@amd.com * and/or other materials provided with the distribution.
1610234Syasuko.eckert@amd.com *
1710234Syasuko.eckert@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors
1810234Syasuko.eckert@amd.com * may be used to endorse or promote products derived from this software
1910234Syasuko.eckert@amd.com * without specific prior written permission.
2010234Syasuko.eckert@amd.com *
2110234Syasuko.eckert@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2210234Syasuko.eckert@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2310234Syasuko.eckert@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2410234Syasuko.eckert@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
2510234Syasuko.eckert@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2610234Syasuko.eckert@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2710234Syasuko.eckert@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2810234Syasuko.eckert@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2910234Syasuko.eckert@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
3010234Syasuko.eckert@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3110234Syasuko.eckert@amd.com * POSSIBILITY OF SUCH DAMAGE.
3210234Syasuko.eckert@amd.com *
3310234Syasuko.eckert@amd.com * Author: Steve Reinhardt
3410234Syasuko.eckert@amd.com */
3510234Syasuko.eckert@amd.com
3610234Syasuko.eckert@amd.com#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
3710234Syasuko.eckert@amd.com#define __ARCH_HSAIL_INSTS_DECL_HH__
3810234Syasuko.eckert@amd.com
3910234Syasuko.eckert@amd.com#include <cmath>
4010234Syasuko.eckert@amd.com
4110234Syasuko.eckert@amd.com#include "arch/hsail/insts/gpu_static_inst.hh"
4210234Syasuko.eckert@amd.com#include "arch/hsail/operand.hh"
4310234Syasuko.eckert@amd.com#include "debug/HSAIL.hh"
4410234Syasuko.eckert@amd.com#include "gpu-compute/gpu_dyn_inst.hh"
4510234Syasuko.eckert@amd.com#include "gpu-compute/shader.hh"
4610234Syasuko.eckert@amd.com
4710234Syasuko.eckert@amd.comnamespace HsailISA
4810234Syasuko.eckert@amd.com{
4910234Syasuko.eckert@amd.com    template<typename _DestOperand, typename _SrcOperand>
5010234Syasuko.eckert@amd.com    class HsailOperandType
5110234Syasuko.eckert@amd.com    {
5210234Syasuko.eckert@amd.com      public:
5310234Syasuko.eckert@amd.com        typedef _DestOperand DestOperand;
5410234Syasuko.eckert@amd.com        typedef _SrcOperand SrcOperand;
5510234Syasuko.eckert@amd.com    };
5610234Syasuko.eckert@amd.com
5710234Syasuko.eckert@amd.com    typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
5810234Syasuko.eckert@amd.com    typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
5910234Syasuko.eckert@amd.com    typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
6010234Syasuko.eckert@amd.com
6110234Syasuko.eckert@amd.com    // The IsBits parameter serves only to disambiguate tbhe B* types from
6210234Syasuko.eckert@amd.com    // the U* types, which otherwise would be identical (and
6310234Syasuko.eckert@amd.com    // indistinguishable).
6410234Syasuko.eckert@amd.com    template<typename _OperandType, typename _CType, Enums::MemType _memType,
6510234Syasuko.eckert@amd.com             vgpr_type _vgprType, int IsBits=0>
6610234Syasuko.eckert@amd.com    class HsailDataType
6710234Syasuko.eckert@amd.com    {
6810234Syasuko.eckert@amd.com      public:
6910234Syasuko.eckert@amd.com        typedef _OperandType OperandType;
7010234Syasuko.eckert@amd.com        typedef _CType CType;
7110234Syasuko.eckert@amd.com        static const Enums::MemType memType = _memType;
7210234Syasuko.eckert@amd.com        static const vgpr_type vgprType = _vgprType;
7310234Syasuko.eckert@amd.com        static const char *label;
7410234Syasuko.eckert@amd.com    };
7510234Syasuko.eckert@amd.com
7610234Syasuko.eckert@amd.com    typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
7710234Syasuko.eckert@amd.com    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
7810234Syasuko.eckert@amd.com
7910234Syasuko.eckert@amd.com    typedef HsailDataType<SRegOperandType, uint16_t,
8010234Syasuko.eckert@amd.com                          Enums::M_U16, VT_32, 1> B16;
8110234Syasuko.eckert@amd.com
8210234Syasuko.eckert@amd.com    typedef HsailDataType<SRegOperandType, uint32_t,
8310234Syasuko.eckert@amd.com                          Enums::M_U32, VT_32, 1> B32;
8410234Syasuko.eckert@amd.com
8510234Syasuko.eckert@amd.com    typedef HsailDataType<DRegOperandType, uint64_t,
8610234Syasuko.eckert@amd.com                          Enums::M_U64, VT_64, 1> B64;
8710234Syasuko.eckert@amd.com
8810234Syasuko.eckert@amd.com    typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
8910234Syasuko.eckert@amd.com    typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
9010234Syasuko.eckert@amd.com    typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
9110234Syasuko.eckert@amd.com    typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
9210234Syasuko.eckert@amd.com
9310234Syasuko.eckert@amd.com    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
9410234Syasuko.eckert@amd.com    typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
9510234Syasuko.eckert@amd.com    typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
9610234Syasuko.eckert@amd.com    typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
9710234Syasuko.eckert@amd.com
9810234Syasuko.eckert@amd.com    typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
9910234Syasuko.eckert@amd.com    typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
10010234Syasuko.eckert@amd.com
10110234Syasuko.eckert@amd.com    template<typename DestOperandType, typename SrcOperandType,
10210234Syasuko.eckert@amd.com             int NumSrcOperands>
10310234Syasuko.eckert@amd.com    class CommonInstBase : public HsailGPUStaticInst
10410234Syasuko.eckert@amd.com    {
10510234Syasuko.eckert@amd.com      protected:
10610234Syasuko.eckert@amd.com        typename DestOperandType::DestOperand dest;
10710234Syasuko.eckert@amd.com        typename SrcOperandType::SrcOperand src[NumSrcOperands];
10810234Syasuko.eckert@amd.com
10910234Syasuko.eckert@amd.com        void
11010234Syasuko.eckert@amd.com        generateDisassembly()
11110234Syasuko.eckert@amd.com        {
11210234Syasuko.eckert@amd.com            disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
11310234Syasuko.eckert@amd.com                                   dest.disassemble());
11410234Syasuko.eckert@amd.com
11510234Syasuko.eckert@amd.com            for (int i = 0; i < NumSrcOperands; ++i) {
11610234Syasuko.eckert@amd.com                disassembly += ",";
11710234Syasuko.eckert@amd.com                disassembly += src[i].disassemble();
11810234Syasuko.eckert@amd.com            }
11910234Syasuko.eckert@amd.com        }
12010234Syasuko.eckert@amd.com
12110234Syasuko.eckert@amd.com        virtual std::string opcode_suffix() = 0;
12210234Syasuko.eckert@amd.com
12310234Syasuko.eckert@amd.com      public:
12410234Syasuko.eckert@amd.com        CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
12510234Syasuko.eckert@amd.com                       const char *opcode)
12610234Syasuko.eckert@amd.com            : HsailGPUStaticInst(obj, opcode)
12710234Syasuko.eckert@amd.com        {
12810234Syasuko.eckert@amd.com            setFlag(ALU);
12910234Syasuko.eckert@amd.com
13010234Syasuko.eckert@amd.com            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
13110234Syasuko.eckert@amd.com
13210234Syasuko.eckert@amd.com            dest.init(op_offs, obj);
13310234Syasuko.eckert@amd.com
13410234Syasuko.eckert@amd.com            for (int i = 0; i < NumSrcOperands; ++i) {
13510234Syasuko.eckert@amd.com                op_offs = obj->getOperandPtr(ib->operands, i + 1);
13610234Syasuko.eckert@amd.com                src[i].init(op_offs, obj);
13710234Syasuko.eckert@amd.com            }
13810234Syasuko.eckert@amd.com        }
13910234Syasuko.eckert@amd.com
14010234Syasuko.eckert@amd.com        bool isVectorRegister(int operandIndex) {
14110234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
14210234Syasuko.eckert@amd.com            if (operandIndex < NumSrcOperands)
14310234Syasuko.eckert@amd.com                return src[operandIndex].isVectorRegister();
14410234Syasuko.eckert@amd.com            else
14510234Syasuko.eckert@amd.com                return dest.isVectorRegister();
14610234Syasuko.eckert@amd.com        }
14710234Syasuko.eckert@amd.com        bool isCondRegister(int operandIndex) {
14810234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
14910234Syasuko.eckert@amd.com            if (operandIndex < NumSrcOperands)
15010234Syasuko.eckert@amd.com                return src[operandIndex].isCondRegister();
15110234Syasuko.eckert@amd.com            else
15210234Syasuko.eckert@amd.com                return dest.isCondRegister();
15310234Syasuko.eckert@amd.com        }
15410234Syasuko.eckert@amd.com        bool isScalarRegister(int operandIndex) {
15510234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
15610234Syasuko.eckert@amd.com            if (operandIndex < NumSrcOperands)
15710234Syasuko.eckert@amd.com                return src[operandIndex].isScalarRegister();
15810234Syasuko.eckert@amd.com            else
15910234Syasuko.eckert@amd.com                return dest.isScalarRegister();
16010234Syasuko.eckert@amd.com        }
16110234Syasuko.eckert@amd.com        bool isSrcOperand(int operandIndex) {
16210234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
16310234Syasuko.eckert@amd.com            if (operandIndex < NumSrcOperands)
16410234Syasuko.eckert@amd.com                return true;
16510234Syasuko.eckert@amd.com            return false;
16610234Syasuko.eckert@amd.com        }
16710234Syasuko.eckert@amd.com
16810234Syasuko.eckert@amd.com        bool isDstOperand(int operandIndex) {
16910234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
17010234Syasuko.eckert@amd.com            if (operandIndex >= NumSrcOperands)
17110234Syasuko.eckert@amd.com                return true;
17210234Syasuko.eckert@amd.com            return false;
17310234Syasuko.eckert@amd.com        }
17410234Syasuko.eckert@amd.com        int getOperandSize(int operandIndex) {
17510234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
17610234Syasuko.eckert@amd.com            if (operandIndex < NumSrcOperands)
17710234Syasuko.eckert@amd.com                return src[operandIndex].opSize();
17810234Syasuko.eckert@amd.com            else
17910234Syasuko.eckert@amd.com                return dest.opSize();
18010234Syasuko.eckert@amd.com        }
18110234Syasuko.eckert@amd.com        int
18210234Syasuko.eckert@amd.com        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
18310234Syasuko.eckert@amd.com        {
18410234Syasuko.eckert@amd.com            assert(operandIndex >= 0 && operandIndex < getNumOperands());
18510234Syasuko.eckert@amd.com
18610234Syasuko.eckert@amd.com            if (operandIndex < NumSrcOperands)
18710234Syasuko.eckert@amd.com                return src[operandIndex].regIndex();
18810234Syasuko.eckert@amd.com            else
18910234Syasuko.eckert@amd.com                return dest.regIndex();
19010234Syasuko.eckert@amd.com        }
19110234Syasuko.eckert@amd.com        int numSrcRegOperands() {
19210234Syasuko.eckert@amd.com            int operands = 0;
19310234Syasuko.eckert@amd.com            for (int i = 0; i < NumSrcOperands; i++) {
19410234Syasuko.eckert@amd.com                if (src[i].isVectorRegister()) {
19510234Syasuko.eckert@amd.com                    operands++;
19610234Syasuko.eckert@amd.com                }
19710234Syasuko.eckert@amd.com            }
19810234Syasuko.eckert@amd.com            return operands;
19910234Syasuko.eckert@amd.com        }
20010234Syasuko.eckert@amd.com        int numDstRegOperands() { return dest.isVectorRegister(); }
20110234Syasuko.eckert@amd.com        int getNumOperands() { return NumSrcOperands + 1; }
20210234Syasuko.eckert@amd.com    };
20310234Syasuko.eckert@amd.com
20410234Syasuko.eckert@amd.com    template<typename DataType, int NumSrcOperands>
20510234Syasuko.eckert@amd.com    class ArithInst : public CommonInstBase<typename DataType::OperandType,
20610234Syasuko.eckert@amd.com                                            typename DataType::OperandType,
20710234Syasuko.eckert@amd.com                                            NumSrcOperands>
20810234Syasuko.eckert@amd.com    {
20910234Syasuko.eckert@amd.com      public:
21010234Syasuko.eckert@amd.com        std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
21110234Syasuko.eckert@amd.com
21210234Syasuko.eckert@amd.com        ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
21310234Syasuko.eckert@amd.com                  const char *opcode)
21410234Syasuko.eckert@amd.com            : CommonInstBase<typename DataType::OperandType,
21510234Syasuko.eckert@amd.com                             typename DataType::OperandType,
21610234Syasuko.eckert@amd.com                             NumSrcOperands>(ib, obj, opcode)
21710234Syasuko.eckert@amd.com        {
21810234Syasuko.eckert@amd.com        }
21910234Syasuko.eckert@amd.com    };
22010234Syasuko.eckert@amd.com
22110234Syasuko.eckert@amd.com    template<typename DestOperandType, typename Src0OperandType,
22210234Syasuko.eckert@amd.com             typename Src1OperandType, typename Src2OperandType>
22310234Syasuko.eckert@amd.com    class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
22410234Syasuko.eckert@amd.com    {
22510234Syasuko.eckert@amd.com      protected:
22610234Syasuko.eckert@amd.com        typename DestOperandType::DestOperand dest;
22710234Syasuko.eckert@amd.com        typename Src0OperandType::SrcOperand  src0;
22810234Syasuko.eckert@amd.com        typename Src1OperandType::SrcOperand  src1;
22910234Syasuko.eckert@amd.com        typename Src2OperandType::SrcOperand  src2;
23010234Syasuko.eckert@amd.com
23110234Syasuko.eckert@amd.com        void
23210234Syasuko.eckert@amd.com        generateDisassembly()
23310234Syasuko.eckert@amd.com        {
23410234Syasuko.eckert@amd.com            disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
23510234Syasuko.eckert@amd.com                                   src0.disassemble(), src1.disassemble(),
23610234Syasuko.eckert@amd.com                                   src2.disassemble());
23710234Syasuko.eckert@amd.com        }
23810234Syasuko.eckert@amd.com
23910234Syasuko.eckert@amd.com      public:
24010234Syasuko.eckert@amd.com        ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
24110234Syasuko.eckert@amd.com                                      const BrigObject *obj,
24210234Syasuko.eckert@amd.com                                      const char *opcode)
24310234Syasuko.eckert@amd.com            : HsailGPUStaticInst(obj, opcode)
24410234Syasuko.eckert@amd.com        {
24510234Syasuko.eckert@amd.com            setFlag(ALU);
24610234Syasuko.eckert@amd.com
24710234Syasuko.eckert@amd.com            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
24810234Syasuko.eckert@amd.com            dest.init(op_offs, obj);
24910234Syasuko.eckert@amd.com
25010234Syasuko.eckert@amd.com            op_offs = obj->getOperandPtr(ib->operands, 1);
25110234Syasuko.eckert@amd.com            src0.init(op_offs, obj);
25210234Syasuko.eckert@amd.com
25310234Syasuko.eckert@amd.com            op_offs = obj->getOperandPtr(ib->operands, 2);
25410234Syasuko.eckert@amd.com            src1.init(op_offs, obj);
25510234Syasuko.eckert@amd.com
25610234Syasuko.eckert@amd.com            op_offs = obj->getOperandPtr(ib->operands, 3);
25710234Syasuko.eckert@amd.com            src2.init(op_offs, obj);
25810234Syasuko.eckert@amd.com        }
25910234Syasuko.eckert@amd.com
26010234Syasuko.eckert@amd.com        bool isVectorRegister(int operandIndex) {
26110234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
26210234Syasuko.eckert@amd.com            if (!operandIndex)
26310234Syasuko.eckert@amd.com                return src0.isVectorRegister();
26410234Syasuko.eckert@amd.com            else if (operandIndex == 1)
26510234Syasuko.eckert@amd.com                return src1.isVectorRegister();
26610234Syasuko.eckert@amd.com            else if (operandIndex == 2)
26710234Syasuko.eckert@amd.com                return src2.isVectorRegister();
26810234Syasuko.eckert@amd.com            else
26910234Syasuko.eckert@amd.com                return dest.isVectorRegister();
27010234Syasuko.eckert@amd.com        }
27110234Syasuko.eckert@amd.com        bool isCondRegister(int operandIndex) {
27210234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
27310234Syasuko.eckert@amd.com            if (!operandIndex)
27410234Syasuko.eckert@amd.com                return src0.isCondRegister();
27510234Syasuko.eckert@amd.com            else if (operandIndex == 1)
27610234Syasuko.eckert@amd.com                return src1.isCondRegister();
27710234Syasuko.eckert@amd.com            else if (operandIndex == 2)
27810234Syasuko.eckert@amd.com                return src2.isCondRegister();
27910234Syasuko.eckert@amd.com            else
28010234Syasuko.eckert@amd.com                return dest.isCondRegister();
28110234Syasuko.eckert@amd.com        }
28210234Syasuko.eckert@amd.com        bool isScalarRegister(int operandIndex) {
28310234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
28410234Syasuko.eckert@amd.com            if (!operandIndex)
28510234Syasuko.eckert@amd.com                return src0.isScalarRegister();
28610234Syasuko.eckert@amd.com            else if (operandIndex == 1)
28710234Syasuko.eckert@amd.com                return src1.isScalarRegister();
28810234Syasuko.eckert@amd.com            else if (operandIndex == 2)
28910234Syasuko.eckert@amd.com                return src2.isScalarRegister();
29010234Syasuko.eckert@amd.com            else
29110234Syasuko.eckert@amd.com                return dest.isScalarRegister();
29210234Syasuko.eckert@amd.com        }
29310234Syasuko.eckert@amd.com        bool isSrcOperand(int operandIndex) {
29410234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
29510234Syasuko.eckert@amd.com            if (operandIndex < 3)
29610234Syasuko.eckert@amd.com                return true;
29710234Syasuko.eckert@amd.com            else
29810234Syasuko.eckert@amd.com                return false;
29910234Syasuko.eckert@amd.com        }
30010234Syasuko.eckert@amd.com        bool isDstOperand(int operandIndex) {
30110234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
30210234Syasuko.eckert@amd.com            if (operandIndex >= 3)
30310234Syasuko.eckert@amd.com                return true;
30410234Syasuko.eckert@amd.com            else
30510234Syasuko.eckert@amd.com                return false;
30610234Syasuko.eckert@amd.com        }
30710234Syasuko.eckert@amd.com        int getOperandSize(int operandIndex) {
30810234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
30910234Syasuko.eckert@amd.com            if (!operandIndex)
31010234Syasuko.eckert@amd.com                return src0.opSize();
31110234Syasuko.eckert@amd.com            else if (operandIndex == 1)
31210234Syasuko.eckert@amd.com                return src1.opSize();
31310234Syasuko.eckert@amd.com            else if (operandIndex == 2)
31410234Syasuko.eckert@amd.com                return src2.opSize();
31510234Syasuko.eckert@amd.com            else
31610234Syasuko.eckert@amd.com                return dest.opSize();
31710234Syasuko.eckert@amd.com        }
31810234Syasuko.eckert@amd.com
31910234Syasuko.eckert@amd.com        int
32010234Syasuko.eckert@amd.com        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
32110234Syasuko.eckert@amd.com        {
32210234Syasuko.eckert@amd.com            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
32310234Syasuko.eckert@amd.com            if (!operandIndex)
32410234Syasuko.eckert@amd.com                return src0.regIndex();
32510234Syasuko.eckert@amd.com            else if (operandIndex == 1)
32610234Syasuko.eckert@amd.com                return src1.regIndex();
32710234Syasuko.eckert@amd.com            else if (operandIndex == 2)
32810234Syasuko.eckert@amd.com                return src2.regIndex();
32910234Syasuko.eckert@amd.com            else
33010234Syasuko.eckert@amd.com                return dest.regIndex();
33110234Syasuko.eckert@amd.com        }
33210234Syasuko.eckert@amd.com
33310234Syasuko.eckert@amd.com        int numSrcRegOperands() {
33410234Syasuko.eckert@amd.com            int operands = 0;
33510234Syasuko.eckert@amd.com            if (src0.isVectorRegister()) {
33610234Syasuko.eckert@amd.com                operands++;
33710234Syasuko.eckert@amd.com            }
33810234Syasuko.eckert@amd.com            if (src1.isVectorRegister()) {
33910234Syasuko.eckert@amd.com                operands++;
34010234Syasuko.eckert@amd.com            }
34110234Syasuko.eckert@amd.com            if (src2.isVectorRegister()) {
34210234Syasuko.eckert@amd.com                operands++;
34310234Syasuko.eckert@amd.com            }
34410234Syasuko.eckert@amd.com            return operands;
34510234Syasuko.eckert@amd.com        }
34610234Syasuko.eckert@amd.com        int numDstRegOperands() { return dest.isVectorRegister(); }
34710234Syasuko.eckert@amd.com        int getNumOperands() { return 4; }
34810234Syasuko.eckert@amd.com    };
34910234Syasuko.eckert@amd.com
35010234Syasuko.eckert@amd.com    template<typename DestDataType, typename Src0DataType,
351             typename Src1DataType, typename Src2DataType>
352    class ThreeNonUniformSourceInst :
353        public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
354                                             typename Src0DataType::OperandType,
355                                             typename Src1DataType::OperandType,
356                                             typename Src2DataType::OperandType>
357    {
358      public:
359        typedef typename DestDataType::CType DestCType;
360        typedef typename Src0DataType::CType Src0CType;
361        typedef typename Src1DataType::CType Src1CType;
362        typedef typename Src2DataType::CType Src2CType;
363
364        ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
365                                  const BrigObject *obj, const char *opcode)
366            : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
367                                         typename Src0DataType::OperandType,
368                                         typename Src1DataType::OperandType,
369                                         typename Src2DataType::OperandType>(ib,
370                                                                    obj, opcode)
371        {
372        }
373    };
374
375    template<typename DataType>
376    class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
377                                                      DataType, DataType>
378    {
379      public:
380        CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
381                 const char *opcode)
382            : ThreeNonUniformSourceInst<DataType, B1, DataType,
383                                        DataType>(ib, obj, opcode)
384        {
385        }
386    };
387
388    template<typename DataType>
389    class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
390                                                               DataType, U32,
391                                                               U32>
392    {
393      public:
394        ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
395                          const char *opcode)
396            : ThreeNonUniformSourceInst<DataType, DataType, U32,
397                                        U32>(ib, obj, opcode)
398        {
399        }
400    };
401
402    template<typename DestOperandType, typename Src0OperandType,
403             typename Src1OperandType>
404    class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
405    {
406      protected:
407        typename DestOperandType::DestOperand dest;
408        typename Src0OperandType::SrcOperand src0;
409        typename Src1OperandType::SrcOperand src1;
410
411        void
412        generateDisassembly()
413        {
414            disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
415                                   src0.disassemble(), src1.disassemble());
416        }
417
418
419      public:
420        TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
421                                    const BrigObject *obj, const char *opcode)
422            : HsailGPUStaticInst(obj, opcode)
423        {
424            setFlag(ALU);
425
426            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
427            dest.init(op_offs, obj);
428
429            op_offs = obj->getOperandPtr(ib->operands, 1);
430            src0.init(op_offs, obj);
431
432            op_offs = obj->getOperandPtr(ib->operands, 2);
433            src1.init(op_offs, obj);
434        }
435        bool isVectorRegister(int operandIndex) {
436            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
437            if (!operandIndex)
438                return src0.isVectorRegister();
439            else if (operandIndex == 1)
440                return src1.isVectorRegister();
441            else
442                return dest.isVectorRegister();
443        }
444        bool isCondRegister(int operandIndex) {
445            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
446            if (!operandIndex)
447                return src0.isCondRegister();
448            else if (operandIndex == 1)
449                return src1.isCondRegister();
450            else
451                return dest.isCondRegister();
452        }
453        bool isScalarRegister(int operandIndex) {
454            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
455            if (!operandIndex)
456                return src0.isScalarRegister();
457            else if (operandIndex == 1)
458                return src1.isScalarRegister();
459            else
460                return dest.isScalarRegister();
461        }
462        bool isSrcOperand(int operandIndex) {
463            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
464            if (operandIndex < 2)
465                return true;
466            else
467                return false;
468        }
469        bool isDstOperand(int operandIndex) {
470            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
471            if (operandIndex >= 2)
472                return true;
473            else
474                return false;
475        }
476        int getOperandSize(int operandIndex) {
477            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
478            if (!operandIndex)
479                return src0.opSize();
480            else if (operandIndex == 1)
481                return src1.opSize();
482            else
483                return dest.opSize();
484        }
485
486        int
487        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
488        {
489            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
490            if (!operandIndex)
491                return src0.regIndex();
492            else if (operandIndex == 1)
493                return src1.regIndex();
494            else
495                return dest.regIndex();
496        }
497
498        int numSrcRegOperands() {
499            int operands = 0;
500            if (src0.isVectorRegister()) {
501                operands++;
502            }
503            if (src1.isVectorRegister()) {
504                operands++;
505            }
506            return operands;
507        }
508        int numDstRegOperands() { return dest.isVectorRegister(); }
509        int getNumOperands() { return 3; }
510    };
511
512    template<typename DestDataType, typename Src0DataType,
513             typename Src1DataType>
514    class TwoNonUniformSourceInst :
515        public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
516                                           typename Src0DataType::OperandType,
517                                           typename Src1DataType::OperandType>
518    {
519      public:
520        typedef typename DestDataType::CType DestCType;
521        typedef typename Src0DataType::CType Src0CType;
522        typedef typename Src1DataType::CType Src1CType;
523
524        TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
525                                const BrigObject *obj, const char *opcode)
526            : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
527                                         typename Src0DataType::OperandType,
528                                         typename Src1DataType::OperandType>(ib,
529                                                                    obj, opcode)
530        {
531        }
532    };
533
534    // helper function for ClassInst
535    template<typename T>
536    bool
537    fpclassify(T src0, uint32_t src1)
538    {
539        int fpclass = std::fpclassify(src0);
540
541        if ((src1 & 0x3) && (fpclass == FP_NAN)) {
542            return true;
543        }
544
545        if (src0 <= -0.0) {
546            if ((src1 & 0x4) && fpclass == FP_INFINITE)
547                return true;
548            if ((src1 & 0x8) && fpclass == FP_NORMAL)
549                return true;
550            if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
551                return true;
552            if ((src1 & 0x20) && fpclass == FP_ZERO)
553                return true;
554        } else {
555            if ((src1 & 0x40) && fpclass == FP_ZERO)
556                return true;
557            if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
558                return true;
559            if ((src1 & 0x100) && fpclass == FP_NORMAL)
560                return true;
561            if ((src1 & 0x200) && fpclass == FP_INFINITE)
562                return true;
563        }
564        return false;
565    }
566
567    template<typename DataType>
568    class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
569    {
570      public:
571        ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
572                  const char *opcode)
573            : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
574        {
575        }
576    };
577
578    template<typename DataType>
579    class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
580    {
581      public:
582        ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
583                  const char *opcode)
584            : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
585        {
586        }
587    };
588
589    // helper function for CmpInst
590    template<typename T>
591    bool
592    compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
593    {
594        using namespace Brig;
595
596        switch (cmpOp) {
597          case BRIG_COMPARE_EQ:
598          case BRIG_COMPARE_EQU:
599          case BRIG_COMPARE_SEQ:
600          case BRIG_COMPARE_SEQU:
601            return (src0 == src1);
602
603          case BRIG_COMPARE_NE:
604          case BRIG_COMPARE_NEU:
605          case BRIG_COMPARE_SNE:
606          case BRIG_COMPARE_SNEU:
607            return (src0 != src1);
608
609          case BRIG_COMPARE_LT:
610          case BRIG_COMPARE_LTU:
611          case BRIG_COMPARE_SLT:
612          case BRIG_COMPARE_SLTU:
613            return (src0 < src1);
614
615          case BRIG_COMPARE_LE:
616          case BRIG_COMPARE_LEU:
617          case BRIG_COMPARE_SLE:
618          case BRIG_COMPARE_SLEU:
619            return (src0 <= src1);
620
621          case BRIG_COMPARE_GT:
622          case BRIG_COMPARE_GTU:
623          case BRIG_COMPARE_SGT:
624          case BRIG_COMPARE_SGTU:
625            return (src0 > src1);
626
627          case BRIG_COMPARE_GE:
628          case BRIG_COMPARE_GEU:
629          case BRIG_COMPARE_SGE:
630          case BRIG_COMPARE_SGEU:
631            return (src0 >= src1);
632
633          case BRIG_COMPARE_NUM:
634          case BRIG_COMPARE_SNUM:
635            return (src0 == src0) || (src1 == src1);
636
637          case BRIG_COMPARE_NAN:
638          case BRIG_COMPARE_SNAN:
639            return (src0 != src0) || (src1 != src1);
640
641          default:
642            fatal("Bad cmpOp value %d\n", (int)cmpOp);
643        }
644    }
645
646    template<typename T>
647    int32_t
648    firstbit(T src0)
649    {
650        if (!src0)
651            return -1;
652
653        //handle positive and negative numbers
654        T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0);
655
656        //the starting pos is MSB
657        int pos = 8 * sizeof(T) - 1;
658        int cnt = 0;
659
660        //search the first bit set to 1
661        while (!(tmp & (1 << pos))) {
662            ++cnt;
663            --pos;
664        }
665        return cnt;
666    }
667
668    const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
669
670    template<typename DestOperandType, typename SrcOperandType>
671    class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
672                                              2>
673    {
674      protected:
675        Brig::BrigCompareOperation cmpOp;
676
677      public:
678        CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
679                    const char *_opcode)
680            : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
681                                                                 _opcode)
682        {
683            assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
684            Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
685            cmpOp = (Brig::BrigCompareOperation)i->compare;
686        }
687    };
688
689    template<typename DestDataType, typename SrcDataType>
690    class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
691                                       typename SrcDataType::OperandType>
692    {
693      public:
694        std::string
695        opcode_suffix()
696        {
697            return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
698                            DestDataType::label, SrcDataType::label);
699        }
700
701        CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
702                const char *_opcode)
703            : CmpInstBase<typename DestDataType::OperandType,
704                          typename SrcDataType::OperandType>(ib, obj, _opcode)
705        {
706        }
707    };
708
709    template<typename DestDataType, typename SrcDataType>
710    class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
711                                          typename SrcDataType::OperandType, 1>
712    {
713      public:
714        std::string opcode_suffix()
715        {
716            return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
717        }
718
719        CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
720                const char *_opcode)
721            : CommonInstBase<typename DestDataType::OperandType,
722                             typename SrcDataType::OperandType,
723                             1>(ib, obj, _opcode)
724        {
725        }
726    };
727
728    class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
729    {
730      public:
731        SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
732                               const BrigObject *obj, const char *_opcode)
733            : HsailGPUStaticInst(obj, _opcode)
734        {
735        }
736
737        bool isVectorRegister(int operandIndex) { return false; }
738        bool isCondRegister(int operandIndex) { return false; }
739        bool isScalarRegister(int operandIndex) { return false; }
740        bool isSrcOperand(int operandIndex) { return false; }
741        bool isDstOperand(int operandIndex) { return false; }
742        int getOperandSize(int operandIndex) { return 0; }
743
744        int
745        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
746        {
747            return -1;
748        }
749
750        int numSrcRegOperands() { return 0; }
751        int numDstRegOperands() { return 0; }
752        int getNumOperands() { return 0; }
753    };
754
755    template<typename DestOperandType>
756    class SpecialInstNoSrcBase : public HsailGPUStaticInst
757    {
758      protected:
759        typename DestOperandType::DestOperand dest;
760
761        void generateDisassembly()
762        {
763            disassembly = csprintf("%s %s", opcode, dest.disassemble());
764        }
765
766      public:
767        SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
768                             const BrigObject *obj, const char *_opcode)
769            : HsailGPUStaticInst(obj, _opcode)
770        {
771            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
772            dest.init(op_offs, obj);
773        }
774
775        bool isVectorRegister(int operandIndex) {
776            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
777            return dest.isVectorRegister();
778        }
779        bool isCondRegister(int operandIndex) {
780            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
781            return dest.isCondRegister();
782        }
783        bool isScalarRegister(int operandIndex) {
784            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
785            return dest.isScalarRegister();
786        }
787        bool isSrcOperand(int operandIndex) { return false; }
788        bool isDstOperand(int operandIndex) { return true; }
789        int getOperandSize(int operandIndex) {
790            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
791            return dest.opSize();
792        }
793
794        int
795        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
796        {
797            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
798            return dest.regIndex();
799        }
800
801        int numSrcRegOperands() { return 0; }
802        int numDstRegOperands() { return dest.isVectorRegister(); }
803        int getNumOperands() { return 1; }
804    };
805
806    template<typename DestDataType>
807    class SpecialInstNoSrc :
808        public SpecialInstNoSrcBase<typename DestDataType::OperandType>
809    {
810      public:
811        typedef typename DestDataType::CType DestCType;
812
813        SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
814                         const char *_opcode)
815            : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
816                                                                       _opcode)
817        {
818        }
819    };
820
821    template<typename DestOperandType>
822    class SpecialInst1SrcBase : public HsailGPUStaticInst
823    {
824      protected:
825        typedef int SrcCType;  // used in execute() template
826
827        typename DestOperandType::DestOperand dest;
828        ImmOperand<SrcCType> src0;
829
830        void
831        generateDisassembly()
832        {
833            disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
834                                   src0.disassemble());
835        }
836
837      public:
838        SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
839                            const BrigObject *obj, const char *_opcode)
840            : HsailGPUStaticInst(obj, _opcode)
841        {
842            setFlag(ALU);
843
844            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
845            dest.init(op_offs, obj);
846
847            op_offs = obj->getOperandPtr(ib->operands, 1);
848            src0.init(op_offs, obj);
849        }
850        bool isVectorRegister(int operandIndex) {
851            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
852            return dest.isVectorRegister();
853        }
854        bool isCondRegister(int operandIndex) {
855            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
856            return dest.isCondRegister();
857        }
858        bool isScalarRegister(int operandIndex) {
859            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
860            return dest.isScalarRegister();
861        }
862        bool isSrcOperand(int operandIndex) { return false; }
863        bool isDstOperand(int operandIndex) { return true; }
864        int getOperandSize(int operandIndex) {
865            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
866            return dest.opSize();
867        }
868
869        int
870        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
871        {
872            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
873            return dest.regIndex();
874        }
875
876        int numSrcRegOperands() { return 0; }
877        int numDstRegOperands() { return dest.isVectorRegister(); }
878        int getNumOperands() { return 1; }
879    };
880
881    template<typename DestDataType>
882    class SpecialInst1Src :
883        public SpecialInst1SrcBase<typename DestDataType::OperandType>
884    {
885      public:
886        typedef typename DestDataType::CType DestCType;
887
888        SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
889                        const char *_opcode)
890            : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
891                                                                      _opcode)
892        {
893        }
894    };
895
896    class Ret : public SpecialInstNoSrcNoDest
897    {
898      public:
899        typedef SpecialInstNoSrcNoDest Base;
900
901        Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
902           : Base(ib, obj, "ret")
903        {
904            setFlag(GPUStaticInst::Return);
905        }
906
907        void execute(GPUDynInstPtr gpuDynInst);
908    };
909
910    class Barrier : public SpecialInstNoSrcNoDest
911    {
912      public:
913        typedef SpecialInstNoSrcNoDest Base;
914        uint8_t width;
915
916        Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
917            : Base(ib, obj, "barrier")
918        {
919            setFlag(GPUStaticInst::MemBarrier);
920            assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
921            width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
922        }
923
924        void execute(GPUDynInstPtr gpuDynInst);
925    };
926
927    class MemFence : public SpecialInstNoSrcNoDest
928    {
929      public:
930        typedef SpecialInstNoSrcNoDest Base;
931
932        Brig::BrigMemoryOrder memFenceMemOrder;
933        Brig::BrigMemoryScope memFenceScopeSegGroup;
934        Brig::BrigMemoryScope memFenceScopeSegGlobal;
935        Brig::BrigMemoryScope memFenceScopeSegImage;
936
937        MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
938            : Base(ib, obj, "memfence")
939        {
940            assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
941
942            memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
943                ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
944
945            memFenceScopeSegGroup = (Brig::BrigMemoryScope)
946                ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
947
948            memFenceScopeSegImage = (Brig::BrigMemoryScope)
949                ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
950
951            memFenceMemOrder = (Brig::BrigMemoryOrder)
952                ((Brig::BrigInstMemFence*)ib)->memoryOrder;
953
954            setFlag(MemoryRef);
955            setFlag(GPUStaticInst::MemFence);
956
957            switch (memFenceMemOrder) {
958              case Brig::BRIG_MEMORY_ORDER_NONE:
959                setFlag(NoOrder);
960                break;
961              case Brig::BRIG_MEMORY_ORDER_RELAXED:
962                setFlag(RelaxedOrder);
963                break;
964              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
965                setFlag(Acquire);
966                break;
967              case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
968                setFlag(Release);
969                break;
970              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
971                setFlag(AcquireRelease);
972                break;
973              default:
974                fatal("MemInst has bad BrigMemoryOrder\n");
975            }
976
977            // set inst flags based on scopes
978            if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
979                memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
980                setFlag(GPUStaticInst::GlobalSegment);
981
982                /**
983                 * A memory fence that has scope for
984                 * both segments will use the global
985                 * segment, and be executed in the
986                 * global memory pipeline, therefore,
987                 * we set the segment to match the
988                 * global scope only
989                 */
990                switch (memFenceScopeSegGlobal) {
991                  case Brig::BRIG_MEMORY_SCOPE_NONE:
992                    setFlag(NoScope);
993                    break;
994                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
995                    setFlag(WorkitemScope);
996                    break;
997                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
998                    setFlag(WorkgroupScope);
999                    break;
1000                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
1001                    setFlag(DeviceScope);
1002                    break;
1003                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1004                    setFlag(SystemScope);
1005                    break;
1006                  default:
1007                    fatal("MemFence has bad global scope type\n");
1008                }
1009            } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
1010                setFlag(GPUStaticInst::GlobalSegment);
1011
1012                switch (memFenceScopeSegGlobal) {
1013                  case Brig::BRIG_MEMORY_SCOPE_NONE:
1014                    setFlag(NoScope);
1015                    break;
1016                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1017                    setFlag(WorkitemScope);
1018                    break;
1019                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1020                    setFlag(WorkgroupScope);
1021                    break;
1022                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
1023                    setFlag(DeviceScope);
1024                    break;
1025                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1026                    setFlag(SystemScope);
1027                    break;
1028                  default:
1029                    fatal("MemFence has bad global scope type\n");
1030                }
1031            } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1032                setFlag(GPUStaticInst::GroupSegment);
1033
1034                switch (memFenceScopeSegGroup) {
1035                  case Brig::BRIG_MEMORY_SCOPE_NONE:
1036                    setFlag(NoScope);
1037                    break;
1038                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1039                    setFlag(WorkitemScope);
1040                    break;
1041                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1042                    setFlag(WorkgroupScope);
1043                    break;
1044                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
1045                    setFlag(DeviceScope);
1046                    break;
1047                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1048                    setFlag(SystemScope);
1049                    break;
1050                  default:
1051                    fatal("MemFence has bad group scope type\n");
1052                }
1053            } else {
1054                fatal("MemFence constructor: bad scope specifiers\n");
1055            }
1056        }
1057
1058        void
1059        initiateAcc(GPUDynInstPtr gpuDynInst)
1060        {
1061            Wavefront *wave = gpuDynInst->wavefront();
1062            wave->computeUnit->injectGlobalMemFence(gpuDynInst);
1063        }
1064
1065        void
1066        execute(GPUDynInstPtr gpuDynInst)
1067        {
1068            Wavefront *w = gpuDynInst->wavefront();
1069            // 2 cases:
1070            //   * memfence to a sequentially consistent memory (e.g., LDS).
1071            //     These can be handled as no-ops.
1072            //   * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
1073            //     etc.). We send a packet, tagged with the memory order and
1074            //     scope, and let the GPU coalescer handle it.
1075
1076            if (isGlobalSeg()) {
1077                gpuDynInst->simdId = w->simdId;
1078                gpuDynInst->wfSlotId = w->wfSlotId;
1079                gpuDynInst->wfDynId = w->wfDynId;
1080                gpuDynInst->kern_id = w->kernId;
1081                gpuDynInst->cu_id = w->computeUnit->cu_id;
1082
1083                gpuDynInst->useContinuation = false;
1084                GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
1085                gmp->getGMReqFIFO().push(gpuDynInst);
1086
1087                w->wrGmReqsInPipe--;
1088                w->rdGmReqsInPipe--;
1089                w->memReqsInPipe--;
1090                w->outstandingReqs++;
1091            } else if (isGroupSeg()) {
1092                // no-op
1093            } else {
1094                fatal("MemFence execute: bad op type\n");
1095            }
1096        }
1097    };
1098
1099    class Call : public HsailGPUStaticInst
1100    {
1101      public:
1102        // private helper functions
1103        void calcAddr(Wavefront* w, GPUDynInstPtr m);
1104
1105        void
1106        generateDisassembly()
1107        {
1108            if (dest.disassemble() == "") {
1109                disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
1110                                       src1.disassemble());
1111            } else {
1112                disassembly = csprintf("%s %s (%s) (%s)", opcode,
1113                                       src0.disassemble(), dest.disassemble(),
1114                                       src1.disassemble());
1115            }
1116        }
1117
1118        bool
1119        isPseudoOp()
1120        {
1121            std::string func_name = src0.disassemble();
1122            if (func_name.find("__gem5_hsail_op") != std::string::npos) {
1123                return true;
1124            }
1125            return false;
1126        }
1127
1128        // member variables
1129        ListOperand dest;
1130        FunctionRefOperand src0;
1131        ListOperand src1;
1132        HsailCode *func_ptr;
1133
1134        // exec function for pseudo instructions mapped on top of call opcode
1135        void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
1136
1137        // user-defined pseudo instructions
1138        void MagicPrintLane(Wavefront *w);
1139        void MagicPrintLane64(Wavefront *w);
1140        void MagicPrintWF32(Wavefront *w);
1141        void MagicPrintWF64(Wavefront *w);
1142        void MagicPrintWFFloat(Wavefront *w);
1143        void MagicSimBreak(Wavefront *w);
1144        void MagicPrefixSum(Wavefront *w);
1145        void MagicReduction(Wavefront *w);
1146        void MagicMaskLower(Wavefront *w);
1147        void MagicMaskUpper(Wavefront *w);
1148        void MagicJoinWFBar(Wavefront *w);
1149        void MagicWaitWFBar(Wavefront *w);
1150        void MagicPanic(Wavefront *w);
1151
1152        void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
1153                                          GPUDynInstPtr gpuDynInst);
1154
1155        void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
1156                                         GPUDynInstPtr gpuDynInst);
1157
1158        void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
1159
1160        void MagicXactCasLd(Wavefront *w);
1161        void MagicMostSigThread(Wavefront *w);
1162        void MagicMostSigBroadcast(Wavefront *w);
1163
1164        void MagicPrintWF32ID(Wavefront *w);
1165        void MagicPrintWFID64(Wavefront *w);
1166
1167        Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
1168            : HsailGPUStaticInst(obj, "call")
1169        {
1170            setFlag(ALU);
1171            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1172            dest.init(op_offs, obj);
1173            op_offs = obj->getOperandPtr(ib->operands, 1);
1174            src0.init(op_offs, obj);
1175
1176            func_ptr = nullptr;
1177            std::string func_name = src0.disassemble();
1178            if (!isPseudoOp()) {
1179                func_ptr = dynamic_cast<HsailCode*>(obj->
1180                                                    getFunction(func_name));
1181
1182                if (!func_ptr)
1183                    fatal("call::exec cannot find function: %s\n", func_name);
1184            }
1185
1186            op_offs = obj->getOperandPtr(ib->operands, 2);
1187            src1.init(op_offs, obj);
1188        }
1189
1190        bool isVectorRegister(int operandIndex) { return false; }
1191        bool isCondRegister(int operandIndex) { return false; }
1192        bool isScalarRegister(int operandIndex) { return false; }
1193        bool isSrcOperand(int operandIndex) { return false; }
1194        bool isDstOperand(int operandIndex) { return false; }
1195        int getOperandSize(int operandIndex) { return 0; }
1196
1197        int
1198        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
1199        {
1200            return -1;
1201        }
1202
1203        void
1204        execute(GPUDynInstPtr gpuDynInst)
1205        {
1206            Wavefront *w = gpuDynInst->wavefront();
1207
1208            std::string func_name = src0.disassemble();
1209            if (isPseudoOp()) {
1210                execPseudoInst(w, gpuDynInst);
1211            } else {
1212                fatal("Native HSAIL functions are not yet implemented: %s\n",
1213                      func_name);
1214            }
1215        }
1216        int numSrcRegOperands() { return 0; }
1217        int numDstRegOperands() { return 0; }
1218        int getNumOperands() { return 2; }
1219    };
1220
1221    template<typename T> T heynot(T arg) { return ~arg; }
1222    template<> inline bool heynot<bool>(bool arg) { return !arg; }
1223} // namespace HsailISA
1224
1225#endif // __ARCH_HSAIL_INSTS_DECL_HH__
1226