decl.hh revision 11692:e772fdcd3809
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
37#define __ARCH_HSAIL_INSTS_DECL_HH__
38
39#include <cmath>
40
41#include "arch/hsail/insts/gpu_static_inst.hh"
42#include "arch/hsail/operand.hh"
43#include "debug/HSAIL.hh"
44#include "gpu-compute/gpu_dyn_inst.hh"
45#include "gpu-compute/shader.hh"
46
47namespace HsailISA
48{
49    template<typename _DestOperand, typename _SrcOperand>
50    class HsailOperandType
51    {
52      public:
53        typedef _DestOperand DestOperand;
54        typedef _SrcOperand SrcOperand;
55    };
56
57    typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
58    typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
59    typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
60
61    // The IsBits parameter serves only to disambiguate tbhe B* types from
62    // the U* types, which otherwise would be identical (and
63    // indistinguishable).
64    template<typename _OperandType, typename _CType, Enums::MemType _memType,
65             vgpr_type _vgprType, int IsBits=0>
66    class HsailDataType
67    {
68      public:
69        typedef _OperandType OperandType;
70        typedef _CType CType;
71        static const Enums::MemType memType = _memType;
72        static const vgpr_type vgprType = _vgprType;
73        static const char *label;
74    };
75
76    typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
77    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
78
79    typedef HsailDataType<SRegOperandType, uint16_t,
80                          Enums::M_U16, VT_32, 1> B16;
81
82    typedef HsailDataType<SRegOperandType, uint32_t,
83                          Enums::M_U32, VT_32, 1> B32;
84
85    typedef HsailDataType<DRegOperandType, uint64_t,
86                          Enums::M_U64, VT_64, 1> B64;
87
88    typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
89    typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
90    typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
91    typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
92
93    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
94    typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
95    typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
96    typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
97
98    typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
99    typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
100
101    template<typename DestOperandType, typename SrcOperandType,
102             int NumSrcOperands>
103    class CommonInstBase : public HsailGPUStaticInst
104    {
105      protected:
106        typename DestOperandType::DestOperand dest;
107        typename SrcOperandType::SrcOperand src[NumSrcOperands];
108
109        void
110        generateDisassembly()
111        {
112            disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
113                                   dest.disassemble());
114
115            for (int i = 0; i < NumSrcOperands; ++i) {
116                disassembly += ",";
117                disassembly += src[i].disassemble();
118            }
119        }
120
121        virtual std::string opcode_suffix() = 0;
122
123      public:
124        CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
125                       const char *opcode)
126            : HsailGPUStaticInst(obj, opcode)
127        {
128            setFlag(ALU);
129
130            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
131
132            dest.init(op_offs, obj);
133
134            for (int i = 0; i < NumSrcOperands; ++i) {
135                op_offs = obj->getOperandPtr(ib->operands, i + 1);
136                src[i].init(op_offs, obj);
137            }
138        }
139
140        bool isVectorRegister(int operandIndex) {
141            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
142            if (operandIndex < NumSrcOperands)
143                return src[operandIndex].isVectorRegister();
144            else
145                return dest.isVectorRegister();
146        }
147        bool isCondRegister(int operandIndex) {
148            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
149            if (operandIndex < NumSrcOperands)
150                return src[operandIndex].isCondRegister();
151            else
152                return dest.isCondRegister();
153        }
154        bool isScalarRegister(int operandIndex) {
155            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
156            if (operandIndex < NumSrcOperands)
157                return src[operandIndex].isScalarRegister();
158            else
159                return dest.isScalarRegister();
160        }
161        bool isSrcOperand(int operandIndex) {
162            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
163            if (operandIndex < NumSrcOperands)
164                return true;
165            return false;
166        }
167
168        bool isDstOperand(int operandIndex) {
169            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
170            if (operandIndex >= NumSrcOperands)
171                return true;
172            return false;
173        }
174        int getOperandSize(int operandIndex) {
175            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
176            if (operandIndex < NumSrcOperands)
177                return src[operandIndex].opSize();
178            else
179                return dest.opSize();
180        }
181        int getRegisterIndex(int operandIndex) {
182            assert(operandIndex >= 0 && operandIndex < getNumOperands());
183
184            if (operandIndex < NumSrcOperands)
185                return src[operandIndex].regIndex();
186            else
187                return dest.regIndex();
188        }
189        int numSrcRegOperands() {
190            int operands = 0;
191            for (int i = 0; i < NumSrcOperands; i++) {
192                if (src[i].isVectorRegister()) {
193                    operands++;
194                }
195            }
196            return operands;
197        }
198        int numDstRegOperands() { return dest.isVectorRegister(); }
199        int getNumOperands() { return NumSrcOperands + 1; }
200    };
201
202    template<typename DataType, int NumSrcOperands>
203    class ArithInst : public CommonInstBase<typename DataType::OperandType,
204                                            typename DataType::OperandType,
205                                            NumSrcOperands>
206    {
207      public:
208        std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
209
210        ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
211                  const char *opcode)
212            : CommonInstBase<typename DataType::OperandType,
213                             typename DataType::OperandType,
214                             NumSrcOperands>(ib, obj, opcode)
215        {
216        }
217    };
218
219    template<typename DestOperandType, typename Src0OperandType,
220             typename Src1OperandType, typename Src2OperandType>
221    class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
222    {
223      protected:
224        typename DestOperandType::DestOperand dest;
225        typename Src0OperandType::SrcOperand  src0;
226        typename Src1OperandType::SrcOperand  src1;
227        typename Src2OperandType::SrcOperand  src2;
228
229        void
230        generateDisassembly()
231        {
232            disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
233                                   src0.disassemble(), src1.disassemble(),
234                                   src2.disassemble());
235        }
236
237      public:
238        ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
239                                      const BrigObject *obj,
240                                      const char *opcode)
241            : HsailGPUStaticInst(obj, opcode)
242        {
243            setFlag(ALU);
244
245            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
246            dest.init(op_offs, obj);
247
248            op_offs = obj->getOperandPtr(ib->operands, 1);
249            src0.init(op_offs, obj);
250
251            op_offs = obj->getOperandPtr(ib->operands, 2);
252            src1.init(op_offs, obj);
253
254            op_offs = obj->getOperandPtr(ib->operands, 3);
255            src2.init(op_offs, obj);
256        }
257
258        bool isVectorRegister(int operandIndex) {
259            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
260            if (!operandIndex)
261                return src0.isVectorRegister();
262            else if (operandIndex == 1)
263                return src1.isVectorRegister();
264            else if (operandIndex == 2)
265                return src2.isVectorRegister();
266            else
267                return dest.isVectorRegister();
268        }
269        bool isCondRegister(int operandIndex) {
270            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
271            if (!operandIndex)
272                return src0.isCondRegister();
273            else if (operandIndex == 1)
274                return src1.isCondRegister();
275            else if (operandIndex == 2)
276                return src2.isCondRegister();
277            else
278                return dest.isCondRegister();
279        }
280        bool isScalarRegister(int operandIndex) {
281            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
282            if (!operandIndex)
283                return src0.isScalarRegister();
284            else if (operandIndex == 1)
285                return src1.isScalarRegister();
286            else if (operandIndex == 2)
287                return src2.isScalarRegister();
288            else
289                return dest.isScalarRegister();
290        }
291        bool isSrcOperand(int operandIndex) {
292            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
293            if (operandIndex < 3)
294                return true;
295            else
296                return false;
297        }
298        bool isDstOperand(int operandIndex) {
299            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
300            if (operandIndex >= 3)
301                return true;
302            else
303                return false;
304        }
305        int getOperandSize(int operandIndex) {
306            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
307            if (!operandIndex)
308                return src0.opSize();
309            else if (operandIndex == 1)
310                return src1.opSize();
311            else if (operandIndex == 2)
312                return src2.opSize();
313            else
314                return dest.opSize();
315        }
316        int getRegisterIndex(int operandIndex) {
317            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
318            if (!operandIndex)
319                return src0.regIndex();
320            else if (operandIndex == 1)
321                return src1.regIndex();
322            else if (operandIndex == 2)
323                return src2.regIndex();
324            else
325                return dest.regIndex();
326        }
327
328        int numSrcRegOperands() {
329            int operands = 0;
330            if (src0.isVectorRegister()) {
331                operands++;
332            }
333            if (src1.isVectorRegister()) {
334                operands++;
335            }
336            if (src2.isVectorRegister()) {
337                operands++;
338            }
339            return operands;
340        }
341        int numDstRegOperands() { return dest.isVectorRegister(); }
342        int getNumOperands() { return 4; }
343    };
344
345    template<typename DestDataType, typename Src0DataType,
346             typename Src1DataType, typename Src2DataType>
347    class ThreeNonUniformSourceInst :
348        public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
349                                             typename Src0DataType::OperandType,
350                                             typename Src1DataType::OperandType,
351                                             typename Src2DataType::OperandType>
352    {
353      public:
354        typedef typename DestDataType::CType DestCType;
355        typedef typename Src0DataType::CType Src0CType;
356        typedef typename Src1DataType::CType Src1CType;
357        typedef typename Src2DataType::CType Src2CType;
358
359        ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
360                                  const BrigObject *obj, const char *opcode)
361            : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
362                                         typename Src0DataType::OperandType,
363                                         typename Src1DataType::OperandType,
364                                         typename Src2DataType::OperandType>(ib,
365                                                                    obj, opcode)
366        {
367        }
368    };
369
370    template<typename DataType>
371    class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
372                                                      DataType, DataType>
373    {
374      public:
375        CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
376                 const char *opcode)
377            : ThreeNonUniformSourceInst<DataType, B1, DataType,
378                                        DataType>(ib, obj, opcode)
379        {
380        }
381    };
382
383    template<typename DataType>
384    class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
385                                                               DataType, U32,
386                                                               U32>
387    {
388      public:
389        ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
390                          const char *opcode)
391            : ThreeNonUniformSourceInst<DataType, DataType, U32,
392                                        U32>(ib, obj, opcode)
393        {
394        }
395    };
396
397    template<typename DestOperandType, typename Src0OperandType,
398             typename Src1OperandType>
399    class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
400    {
401      protected:
402        typename DestOperandType::DestOperand dest;
403        typename Src0OperandType::SrcOperand src0;
404        typename Src1OperandType::SrcOperand src1;
405
406        void
407        generateDisassembly()
408        {
409            disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
410                                   src0.disassemble(), src1.disassemble());
411        }
412
413
414      public:
415        TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
416                                    const BrigObject *obj, const char *opcode)
417            : HsailGPUStaticInst(obj, opcode)
418        {
419            setFlag(ALU);
420
421            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
422            dest.init(op_offs, obj);
423
424            op_offs = obj->getOperandPtr(ib->operands, 1);
425            src0.init(op_offs, obj);
426
427            op_offs = obj->getOperandPtr(ib->operands, 2);
428            src1.init(op_offs, obj);
429        }
430        bool isVectorRegister(int operandIndex) {
431            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
432            if (!operandIndex)
433                return src0.isVectorRegister();
434            else if (operandIndex == 1)
435                return src1.isVectorRegister();
436            else
437                return dest.isVectorRegister();
438        }
439        bool isCondRegister(int operandIndex) {
440            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
441            if (!operandIndex)
442                return src0.isCondRegister();
443            else if (operandIndex == 1)
444                return src1.isCondRegister();
445            else
446                return dest.isCondRegister();
447        }
448        bool isScalarRegister(int operandIndex) {
449            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
450            if (!operandIndex)
451                return src0.isScalarRegister();
452            else if (operandIndex == 1)
453                return src1.isScalarRegister();
454            else
455                return dest.isScalarRegister();
456        }
457        bool isSrcOperand(int operandIndex) {
458            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
459            if (operandIndex < 2)
460                return true;
461            else
462                return false;
463        }
464        bool isDstOperand(int operandIndex) {
465            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
466            if (operandIndex >= 2)
467                return true;
468            else
469                return false;
470        }
471        int getOperandSize(int operandIndex) {
472            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
473            if (!operandIndex)
474                return src0.opSize();
475            else if (operandIndex == 1)
476                return src1.opSize();
477            else
478                return dest.opSize();
479        }
480        int getRegisterIndex(int operandIndex) {
481            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
482            if (!operandIndex)
483                return src0.regIndex();
484            else if (operandIndex == 1)
485                return src1.regIndex();
486            else
487                return dest.regIndex();
488        }
489
490        int numSrcRegOperands() {
491            int operands = 0;
492            if (src0.isVectorRegister()) {
493                operands++;
494            }
495            if (src1.isVectorRegister()) {
496                operands++;
497            }
498            return operands;
499        }
500        int numDstRegOperands() { return dest.isVectorRegister(); }
501        int getNumOperands() { return 3; }
502    };
503
504    template<typename DestDataType, typename Src0DataType,
505             typename Src1DataType>
506    class TwoNonUniformSourceInst :
507        public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
508                                           typename Src0DataType::OperandType,
509                                           typename Src1DataType::OperandType>
510    {
511      public:
512        typedef typename DestDataType::CType DestCType;
513        typedef typename Src0DataType::CType Src0CType;
514        typedef typename Src1DataType::CType Src1CType;
515
516        TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
517                                const BrigObject *obj, const char *opcode)
518            : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
519                                         typename Src0DataType::OperandType,
520                                         typename Src1DataType::OperandType>(ib,
521                                                                    obj, opcode)
522        {
523        }
524    };
525
526    // helper function for ClassInst
527    template<typename T>
528    bool
529    fpclassify(T src0, uint32_t src1)
530    {
531        int fpclass = std::fpclassify(src0);
532
533        if ((src1 & 0x3) && (fpclass == FP_NAN)) {
534            return true;
535        }
536
537        if (src0 <= -0.0) {
538            if ((src1 & 0x4) && fpclass == FP_INFINITE)
539                return true;
540            if ((src1 & 0x8) && fpclass == FP_NORMAL)
541                return true;
542            if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
543                return true;
544            if ((src1 & 0x20) && fpclass == FP_ZERO)
545                return true;
546        } else {
547            if ((src1 & 0x40) && fpclass == FP_ZERO)
548                return true;
549            if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
550                return true;
551            if ((src1 & 0x100) && fpclass == FP_NORMAL)
552                return true;
553            if ((src1 & 0x200) && fpclass == FP_INFINITE)
554                return true;
555        }
556        return false;
557    }
558
559    template<typename DataType>
560    class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
561    {
562      public:
563        ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
564                  const char *opcode)
565            : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
566        {
567        }
568    };
569
570    template<typename DataType>
571    class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
572    {
573      public:
574        ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
575                  const char *opcode)
576            : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
577        {
578        }
579    };
580
581    // helper function for CmpInst
582    template<typename T>
583    bool
584    compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
585    {
586        using namespace Brig;
587
588        switch (cmpOp) {
589          case BRIG_COMPARE_EQ:
590          case BRIG_COMPARE_EQU:
591          case BRIG_COMPARE_SEQ:
592          case BRIG_COMPARE_SEQU:
593            return (src0 == src1);
594
595          case BRIG_COMPARE_NE:
596          case BRIG_COMPARE_NEU:
597          case BRIG_COMPARE_SNE:
598          case BRIG_COMPARE_SNEU:
599            return (src0 != src1);
600
601          case BRIG_COMPARE_LT:
602          case BRIG_COMPARE_LTU:
603          case BRIG_COMPARE_SLT:
604          case BRIG_COMPARE_SLTU:
605            return (src0 < src1);
606
607          case BRIG_COMPARE_LE:
608          case BRIG_COMPARE_LEU:
609          case BRIG_COMPARE_SLE:
610          case BRIG_COMPARE_SLEU:
611            return (src0 <= src1);
612
613          case BRIG_COMPARE_GT:
614          case BRIG_COMPARE_GTU:
615          case BRIG_COMPARE_SGT:
616          case BRIG_COMPARE_SGTU:
617            return (src0 > src1);
618
619          case BRIG_COMPARE_GE:
620          case BRIG_COMPARE_GEU:
621          case BRIG_COMPARE_SGE:
622          case BRIG_COMPARE_SGEU:
623            return (src0 >= src1);
624
625          case BRIG_COMPARE_NUM:
626          case BRIG_COMPARE_SNUM:
627            return (src0 == src0) || (src1 == src1);
628
629          case BRIG_COMPARE_NAN:
630          case BRIG_COMPARE_SNAN:
631            return (src0 != src0) || (src1 != src1);
632
633          default:
634            fatal("Bad cmpOp value %d\n", (int)cmpOp);
635        }
636    }
637
638    template<typename T>
639    int32_t
640    firstbit(T src0)
641    {
642        if (!src0)
643            return -1;
644
645        //handle positive and negative numbers
646        T tmp = (src0 < 0) ? (~src0) : (src0);
647
648        //the starting pos is MSB
649        int pos = 8 * sizeof(T) - 1;
650        int cnt = 0;
651
652        //search the first bit set to 1
653        while (!(tmp & (1 << pos))) {
654            ++cnt;
655            --pos;
656        }
657        return cnt;
658    }
659
660    const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
661
662    template<typename DestOperandType, typename SrcOperandType>
663    class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
664                                              2>
665    {
666      protected:
667        Brig::BrigCompareOperation cmpOp;
668
669      public:
670        CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
671                    const char *_opcode)
672            : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
673                                                                 _opcode)
674        {
675            assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
676            Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
677            cmpOp = (Brig::BrigCompareOperation)i->compare;
678        }
679    };
680
681    template<typename DestDataType, typename SrcDataType>
682    class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
683                                       typename SrcDataType::OperandType>
684    {
685      public:
686        std::string
687        opcode_suffix()
688        {
689            return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
690                            DestDataType::label, SrcDataType::label);
691        }
692
693        CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
694                const char *_opcode)
695            : CmpInstBase<typename DestDataType::OperandType,
696                          typename SrcDataType::OperandType>(ib, obj, _opcode)
697        {
698        }
699    };
700
701    template<typename DestDataType, typename SrcDataType>
702    class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
703                                          typename SrcDataType::OperandType, 1>
704    {
705      public:
706        std::string opcode_suffix()
707        {
708            return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
709        }
710
711        CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
712                const char *_opcode)
713            : CommonInstBase<typename DestDataType::OperandType,
714                             typename SrcDataType::OperandType,
715                             1>(ib, obj, _opcode)
716        {
717        }
718    };
719
720    class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
721    {
722      public:
723        SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
724                               const BrigObject *obj, const char *_opcode)
725            : HsailGPUStaticInst(obj, _opcode)
726        {
727        }
728
729        bool isVectorRegister(int operandIndex) { return false; }
730        bool isCondRegister(int operandIndex) { return false; }
731        bool isScalarRegister(int operandIndex) { return false; }
732        bool isSrcOperand(int operandIndex) { return false; }
733        bool isDstOperand(int operandIndex) { return false; }
734        int getOperandSize(int operandIndex) { return 0; }
735        int getRegisterIndex(int operandIndex) { return -1; }
736
737        int numSrcRegOperands() { return 0; }
738        int numDstRegOperands() { return 0; }
739        int getNumOperands() { return 0; }
740    };
741
742    template<typename DestOperandType>
743    class SpecialInstNoSrcBase : public HsailGPUStaticInst
744    {
745      protected:
746        typename DestOperandType::DestOperand dest;
747
748        void generateDisassembly()
749        {
750            disassembly = csprintf("%s %s", opcode, dest.disassemble());
751        }
752
753      public:
754        SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
755                             const BrigObject *obj, const char *_opcode)
756            : HsailGPUStaticInst(obj, _opcode)
757        {
758            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
759            dest.init(op_offs, obj);
760        }
761
762        bool isVectorRegister(int operandIndex) {
763            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
764            return dest.isVectorRegister();
765        }
766        bool isCondRegister(int operandIndex) {
767            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
768            return dest.isCondRegister();
769        }
770        bool isScalarRegister(int operandIndex) {
771            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
772            return dest.isScalarRegister();
773        }
774        bool isSrcOperand(int operandIndex) { return false; }
775        bool isDstOperand(int operandIndex) { return true; }
776        int getOperandSize(int operandIndex) {
777            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
778            return dest.opSize();
779        }
780        int getRegisterIndex(int operandIndex) {
781            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
782            return dest.regIndex();
783        }
784        int numSrcRegOperands() { return 0; }
785        int numDstRegOperands() { return dest.isVectorRegister(); }
786        int getNumOperands() { return 1; }
787    };
788
789    template<typename DestDataType>
790    class SpecialInstNoSrc :
791        public SpecialInstNoSrcBase<typename DestDataType::OperandType>
792    {
793      public:
794        typedef typename DestDataType::CType DestCType;
795
796        SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
797                         const char *_opcode)
798            : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
799                                                                       _opcode)
800        {
801        }
802    };
803
804    template<typename DestOperandType>
805    class SpecialInst1SrcBase : public HsailGPUStaticInst
806    {
807      protected:
808        typedef int SrcCType;  // used in execute() template
809
810        typename DestOperandType::DestOperand dest;
811        ImmOperand<SrcCType> src0;
812
813        void
814        generateDisassembly()
815        {
816            disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
817                                   src0.disassemble());
818        }
819
820      public:
821        SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
822                            const BrigObject *obj, const char *_opcode)
823            : HsailGPUStaticInst(obj, _opcode)
824        {
825            setFlag(ALU);
826
827            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
828            dest.init(op_offs, obj);
829
830            op_offs = obj->getOperandPtr(ib->operands, 1);
831            src0.init(op_offs, obj);
832        }
833        bool isVectorRegister(int operandIndex) {
834            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
835            return dest.isVectorRegister();
836        }
837        bool isCondRegister(int operandIndex) {
838            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
839            return dest.isCondRegister();
840        }
841        bool isScalarRegister(int operandIndex) {
842            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
843            return dest.isScalarRegister();
844        }
845        bool isSrcOperand(int operandIndex) { return false; }
846        bool isDstOperand(int operandIndex) { return true; }
847        int getOperandSize(int operandIndex) {
848            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
849            return dest.opSize();
850        }
851        int getRegisterIndex(int operandIndex) {
852            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
853            return dest.regIndex();
854        }
855        int numSrcRegOperands() { return 0; }
856        int numDstRegOperands() { return dest.isVectorRegister(); }
857        int getNumOperands() { return 1; }
858    };
859
860    template<typename DestDataType>
861    class SpecialInst1Src :
862        public SpecialInst1SrcBase<typename DestDataType::OperandType>
863    {
864      public:
865        typedef typename DestDataType::CType DestCType;
866
867        SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
868                        const char *_opcode)
869            : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
870                                                                      _opcode)
871        {
872        }
873    };
874
875    class Ret : public SpecialInstNoSrcNoDest
876    {
877      public:
878        typedef SpecialInstNoSrcNoDest Base;
879
880        Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
881           : Base(ib, obj, "ret")
882        {
883            setFlag(GPUStaticInst::Return);
884        }
885
886        void execute(GPUDynInstPtr gpuDynInst);
887    };
888
889    class Barrier : public SpecialInstNoSrcNoDest
890    {
891      public:
892        typedef SpecialInstNoSrcNoDest Base;
893        uint8_t width;
894
895        Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
896            : Base(ib, obj, "barrier")
897        {
898            setFlag(GPUStaticInst::MemBarrier);
899            assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
900            width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
901        }
902
903        void execute(GPUDynInstPtr gpuDynInst);
904    };
905
906    class MemFence : public SpecialInstNoSrcNoDest
907    {
908      public:
909        typedef SpecialInstNoSrcNoDest Base;
910
911        Brig::BrigMemoryOrder memFenceMemOrder;
912        Brig::BrigMemoryScope memFenceScopeSegGroup;
913        Brig::BrigMemoryScope memFenceScopeSegGlobal;
914        Brig::BrigMemoryScope memFenceScopeSegImage;
915
916        MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
917            : Base(ib, obj, "memfence")
918        {
919            assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
920
921            memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
922                ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
923
924            memFenceScopeSegGroup = (Brig::BrigMemoryScope)
925                ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
926
927            memFenceScopeSegImage = (Brig::BrigMemoryScope)
928                ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
929
930            memFenceMemOrder = (Brig::BrigMemoryOrder)
931                ((Brig::BrigInstMemFence*)ib)->memoryOrder;
932
933            setFlag(MemoryRef);
934            setFlag(GPUStaticInst::MemFence);
935
936            switch (memFenceMemOrder) {
937              case Brig::BRIG_MEMORY_ORDER_NONE:
938                setFlag(NoOrder);
939                break;
940              case Brig::BRIG_MEMORY_ORDER_RELAXED:
941                setFlag(RelaxedOrder);
942                break;
943              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
944                setFlag(Acquire);
945                break;
946              case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
947                setFlag(Release);
948                break;
949              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
950                setFlag(AcquireRelease);
951                break;
952              default:
953                fatal("MemInst has bad BrigMemoryOrder\n");
954            }
955
956            // set inst flags based on scopes
957            if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
958                memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
959                setFlag(GPUStaticInst::GlobalSegment);
960
961                /**
962                 * A memory fence that has scope for
963                 * both segments will use the global
964                 * segment, and be executed in the
965                 * global memory pipeline, therefore,
966                 * we set the segment to match the
967                 * global scope only
968                 */
969                switch (memFenceScopeSegGlobal) {
970                  case Brig::BRIG_MEMORY_SCOPE_NONE:
971                    setFlag(NoScope);
972                    break;
973                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
974                    setFlag(WorkitemScope);
975                    break;
976                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
977                    setFlag(WorkgroupScope);
978                    break;
979                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
980                    setFlag(DeviceScope);
981                    break;
982                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
983                    setFlag(SystemScope);
984                    break;
985                  default:
986                    fatal("MemFence has bad global scope type\n");
987                }
988            } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
989                setFlag(GPUStaticInst::GlobalSegment);
990
991                switch (memFenceScopeSegGlobal) {
992                  case Brig::BRIG_MEMORY_SCOPE_NONE:
993                    setFlag(NoScope);
994                    break;
995                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
996                    setFlag(WorkitemScope);
997                    break;
998                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
999                    setFlag(WorkgroupScope);
1000                    break;
1001                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
1002                    setFlag(DeviceScope);
1003                    break;
1004                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1005                    setFlag(SystemScope);
1006                    break;
1007                  default:
1008                    fatal("MemFence has bad global scope type\n");
1009                }
1010            } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1011                setFlag(GPUStaticInst::GroupSegment);
1012
1013                switch (memFenceScopeSegGroup) {
1014                  case Brig::BRIG_MEMORY_SCOPE_NONE:
1015                    setFlag(NoScope);
1016                    break;
1017                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1018                    setFlag(WorkitemScope);
1019                    break;
1020                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1021                    setFlag(WorkgroupScope);
1022                    break;
1023                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
1024                    setFlag(DeviceScope);
1025                    break;
1026                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1027                    setFlag(SystemScope);
1028                    break;
1029                  default:
1030                    fatal("MemFence has bad group scope type\n");
1031                }
1032            } else {
1033                fatal("MemFence constructor: bad scope specifiers\n");
1034            }
1035        }
1036
1037        void
1038        initiateAcc(GPUDynInstPtr gpuDynInst)
1039        {
1040            Wavefront *wave = gpuDynInst->wavefront();
1041            wave->computeUnit->injectGlobalMemFence(gpuDynInst);
1042        }
1043
1044        void
1045        execute(GPUDynInstPtr gpuDynInst)
1046        {
1047            Wavefront *w = gpuDynInst->wavefront();
1048            // 2 cases:
1049            //   * memfence to a sequentially consistent memory (e.g., LDS).
1050            //     These can be handled as no-ops.
1051            //   * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
1052            //     etc.). We send a packet, tagged with the memory order and
1053            //     scope, and let the GPU coalescer handle it.
1054
1055            if (isGlobalSeg()) {
1056                gpuDynInst->simdId = w->simdId;
1057                gpuDynInst->wfSlotId = w->wfSlotId;
1058                gpuDynInst->wfDynId = w->wfDynId;
1059                gpuDynInst->kern_id = w->kernId;
1060                gpuDynInst->cu_id = w->computeUnit->cu_id;
1061
1062                gpuDynInst->useContinuation = false;
1063                GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
1064                gmp->getGMReqFIFO().push(gpuDynInst);
1065
1066                w->wrGmReqsInPipe--;
1067                w->rdGmReqsInPipe--;
1068                w->memReqsInPipe--;
1069                w->outstandingReqs++;
1070            } else if (isGroupSeg()) {
1071                // no-op
1072            } else {
1073                fatal("MemFence execute: bad op type\n");
1074            }
1075        }
1076    };
1077
1078    class Call : public HsailGPUStaticInst
1079    {
1080      public:
1081        // private helper functions
1082        void calcAddr(Wavefront* w, GPUDynInstPtr m);
1083
1084        void
1085        generateDisassembly()
1086        {
1087            if (dest.disassemble() == "") {
1088                disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
1089                                       src1.disassemble());
1090            } else {
1091                disassembly = csprintf("%s %s (%s) (%s)", opcode,
1092                                       src0.disassemble(), dest.disassemble(),
1093                                       src1.disassemble());
1094            }
1095        }
1096
1097        bool
1098        isPseudoOp()
1099        {
1100            std::string func_name = src0.disassemble();
1101            if (func_name.find("__gem5_hsail_op") != std::string::npos) {
1102                return true;
1103            }
1104            return false;
1105        }
1106
1107        // member variables
1108        ListOperand dest;
1109        FunctionRefOperand src0;
1110        ListOperand src1;
1111        HsailCode *func_ptr;
1112
1113        // exec function for pseudo instructions mapped on top of call opcode
1114        void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
1115
1116        // user-defined pseudo instructions
1117        void MagicPrintLane(Wavefront *w);
1118        void MagicPrintLane64(Wavefront *w);
1119        void MagicPrintWF32(Wavefront *w);
1120        void MagicPrintWF64(Wavefront *w);
1121        void MagicPrintWFFloat(Wavefront *w);
1122        void MagicSimBreak(Wavefront *w);
1123        void MagicPrefixSum(Wavefront *w);
1124        void MagicReduction(Wavefront *w);
1125        void MagicMaskLower(Wavefront *w);
1126        void MagicMaskUpper(Wavefront *w);
1127        void MagicJoinWFBar(Wavefront *w);
1128        void MagicWaitWFBar(Wavefront *w);
1129        void MagicPanic(Wavefront *w);
1130
1131        void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
1132                                          GPUDynInstPtr gpuDynInst);
1133
1134        void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
1135                                         GPUDynInstPtr gpuDynInst);
1136
1137        void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
1138
1139        void MagicXactCasLd(Wavefront *w);
1140        void MagicMostSigThread(Wavefront *w);
1141        void MagicMostSigBroadcast(Wavefront *w);
1142
1143        void MagicPrintWF32ID(Wavefront *w);
1144        void MagicPrintWFID64(Wavefront *w);
1145
1146        Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
1147            : HsailGPUStaticInst(obj, "call")
1148        {
1149            setFlag(ALU);
1150            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1151            dest.init(op_offs, obj);
1152            op_offs = obj->getOperandPtr(ib->operands, 1);
1153            src0.init(op_offs, obj);
1154
1155            func_ptr = nullptr;
1156            std::string func_name = src0.disassemble();
1157            if (!isPseudoOp()) {
1158                func_ptr = dynamic_cast<HsailCode*>(obj->
1159                                                    getFunction(func_name));
1160
1161                if (!func_ptr)
1162                    fatal("call::exec cannot find function: %s\n", func_name);
1163            }
1164
1165            op_offs = obj->getOperandPtr(ib->operands, 2);
1166            src1.init(op_offs, obj);
1167        }
1168
1169        bool isVectorRegister(int operandIndex) { return false; }
1170        bool isCondRegister(int operandIndex) { return false; }
1171        bool isScalarRegister(int operandIndex) { return false; }
1172        bool isSrcOperand(int operandIndex) { return false; }
1173        bool isDstOperand(int operandIndex) { return false; }
1174        int  getOperandSize(int operandIndex) { return 0; }
1175        int  getRegisterIndex(int operandIndex) { return -1; }
1176
1177        void
1178        execute(GPUDynInstPtr gpuDynInst)
1179        {
1180            Wavefront *w = gpuDynInst->wavefront();
1181
1182            std::string func_name = src0.disassemble();
1183            if (isPseudoOp()) {
1184                execPseudoInst(w, gpuDynInst);
1185            } else {
1186                fatal("Native HSAIL functions are not yet implemented: %s\n",
1187                      func_name);
1188            }
1189        }
1190        int numSrcRegOperands() { return 0; }
1191        int numDstRegOperands() { return 0; }
1192        int getNumOperands() { return 2; }
1193    };
1194
1195    template<typename T> T heynot(T arg) { return ~arg; }
1196    template<> inline bool heynot<bool>(bool arg) { return !arg; }
1197} // namespace HsailISA
1198
1199#endif // __ARCH_HSAIL_INSTS_DECL_HH__
1200