decl.hh revision 11325:67cc559d513a
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
37#define __ARCH_HSAIL_INSTS_DECL_HH__
38
39#include <cmath>
40
41#include "arch/hsail/generic_types.hh"
42#include "arch/hsail/insts/gpu_static_inst.hh"
43#include "arch/hsail/operand.hh"
44#include "debug/HSAIL.hh"
45#include "enums/OpType.hh"
46#include "gpu-compute/gpu_dyn_inst.hh"
47#include "gpu-compute/shader.hh"
48
49namespace HsailISA
50{
51    template<typename _DestOperand, typename _SrcOperand>
52    class HsailOperandType
53    {
54      public:
55        typedef _DestOperand DestOperand;
56        typedef _SrcOperand SrcOperand;
57    };
58
59    typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
60    typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
61    typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
62
63    // The IsBits parameter serves only to disambiguate tbhe B* types from
64    // the U* types, which otherwise would be identical (and
65    // indistinguishable).
66    template<typename _OperandType, typename _CType, Enums::MemType _memType,
67             vgpr_type _vgprType, int IsBits=0>
68    class HsailDataType
69    {
70      public:
71        typedef _OperandType OperandType;
72        typedef _CType CType;
73        static const Enums::MemType memType = _memType;
74        static const vgpr_type vgprType = _vgprType;
75        static const char *label;
76    };
77
78    typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
79    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
80
81    typedef HsailDataType<SRegOperandType, uint16_t,
82                          Enums::M_U16, VT_32, 1> B16;
83
84    typedef HsailDataType<SRegOperandType, uint32_t,
85                          Enums::M_U32, VT_32, 1> B32;
86
87    typedef HsailDataType<DRegOperandType, uint64_t,
88                          Enums::M_U64, VT_64, 1> B64;
89
90    typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
91    typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
92    typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
93    typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
94
95    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
96    typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
97    typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
98    typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
99
100    typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
101    typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
102
103    template<typename DestOperandType, typename SrcOperandType,
104             int NumSrcOperands>
105    class CommonInstBase : public HsailGPUStaticInst
106    {
107      protected:
108        typename DestOperandType::DestOperand dest;
109        typename SrcOperandType::SrcOperand src[NumSrcOperands];
110
111        void
112        generateDisassembly()
113        {
114            disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
115                                   dest.disassemble());
116
117            for (int i = 0; i < NumSrcOperands; ++i) {
118                disassembly += ",";
119                disassembly += src[i].disassemble();
120            }
121        }
122
123        virtual std::string opcode_suffix() = 0;
124
125      public:
126        CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
127                       const char *opcode)
128            : HsailGPUStaticInst(obj, opcode)
129        {
130            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
131
132            dest.init(op_offs, obj);
133
134            for (int i = 0; i < NumSrcOperands; ++i) {
135                op_offs = obj->getOperandPtr(ib->operands, i + 1);
136                src[i].init(op_offs, obj);
137            }
138        }
139
140        bool isVectorRegister(int operandIndex) {
141            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
142            if (operandIndex < NumSrcOperands)
143                return src[operandIndex].isVectorRegister();
144            else
145                return dest.isVectorRegister();
146        }
147        bool isCondRegister(int operandIndex) {
148            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
149            if (operandIndex < NumSrcOperands)
150                return src[operandIndex].isCondRegister();
151            else
152                return dest.isCondRegister();
153        }
154        bool isScalarRegister(int operandIndex) {
155            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
156            if (operandIndex < NumSrcOperands)
157                return src[operandIndex].isScalarRegister();
158            else
159                return dest.isScalarRegister();
160        }
161        bool isSrcOperand(int operandIndex) {
162            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
163            if (operandIndex < NumSrcOperands)
164                return true;
165            return false;
166        }
167
168        bool isDstOperand(int operandIndex) {
169            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
170            if (operandIndex >= NumSrcOperands)
171                return true;
172            return false;
173        }
174        int getOperandSize(int operandIndex) {
175            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
176            if (operandIndex < NumSrcOperands)
177                return src[operandIndex].opSize();
178            else
179                return dest.opSize();
180        }
181        int getRegisterIndex(int operandIndex) {
182            assert(operandIndex >= 0 && operandIndex < getNumOperands());
183
184            if (operandIndex < NumSrcOperands)
185                return src[operandIndex].regIndex();
186            else
187                return dest.regIndex();
188        }
189        int numSrcRegOperands() {
190            int operands = 0;
191            for (int i = 0; i < NumSrcOperands; i++) {
192                if (src[i].isVectorRegister()) {
193                    operands++;
194                }
195            }
196            return operands;
197        }
198        int numDstRegOperands() { return dest.isVectorRegister(); }
199        int getNumOperands() { return NumSrcOperands + 1; }
200    };
201
202    template<typename DataType, int NumSrcOperands>
203    class ArithInst : public CommonInstBase<typename DataType::OperandType,
204                                            typename DataType::OperandType,
205                                            NumSrcOperands>
206    {
207      public:
208        std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
209
210        ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
211                  const char *opcode)
212            : CommonInstBase<typename DataType::OperandType,
213                             typename DataType::OperandType,
214                             NumSrcOperands>(ib, obj, opcode)
215        {
216        }
217    };
218
219    template<typename DestOperandType, typename Src0OperandType,
220             typename Src1OperandType, typename Src2OperandType>
221    class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
222    {
223      protected:
224        typename DestOperandType::DestOperand dest;
225        typename Src0OperandType::SrcOperand  src0;
226        typename Src1OperandType::SrcOperand  src1;
227        typename Src2OperandType::SrcOperand  src2;
228
229        void
230        generateDisassembly()
231        {
232            disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
233                                   src0.disassemble(), src1.disassemble(),
234                                   src2.disassemble());
235        }
236
237      public:
238        ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
239                                      const BrigObject *obj,
240                                      const char *opcode)
241            : HsailGPUStaticInst(obj, opcode)
242        {
243            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
244            dest.init(op_offs, obj);
245
246            op_offs = obj->getOperandPtr(ib->operands, 1);
247            src0.init(op_offs, obj);
248
249            op_offs = obj->getOperandPtr(ib->operands, 2);
250            src1.init(op_offs, obj);
251
252            op_offs = obj->getOperandPtr(ib->operands, 3);
253            src2.init(op_offs, obj);
254        }
255
256        bool isVectorRegister(int operandIndex) {
257            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
258            if (!operandIndex)
259                return src0.isVectorRegister();
260            else if (operandIndex == 1)
261                return src1.isVectorRegister();
262            else if (operandIndex == 2)
263                return src2.isVectorRegister();
264            else
265                return dest.isVectorRegister();
266        }
267        bool isCondRegister(int operandIndex) {
268            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
269            if (!operandIndex)
270                return src0.isCondRegister();
271            else if (operandIndex == 1)
272                return src1.isCondRegister();
273            else if (operandIndex == 2)
274                return src2.isCondRegister();
275            else
276                return dest.isCondRegister();
277        }
278        bool isScalarRegister(int operandIndex) {
279            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
280            if (!operandIndex)
281                return src0.isScalarRegister();
282            else if (operandIndex == 1)
283                return src1.isScalarRegister();
284            else if (operandIndex == 2)
285                return src2.isScalarRegister();
286            else
287                return dest.isScalarRegister();
288        }
289        bool isSrcOperand(int operandIndex) {
290            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
291            if (operandIndex < 3)
292                return true;
293            else
294                return false;
295        }
296        bool isDstOperand(int operandIndex) {
297            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
298            if (operandIndex >= 3)
299                return true;
300            else
301                return false;
302        }
303        int getOperandSize(int operandIndex) {
304            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
305            if (!operandIndex)
306                return src0.opSize();
307            else if (operandIndex == 1)
308                return src1.opSize();
309            else if (operandIndex == 2)
310                return src2.opSize();
311            else
312                return dest.opSize();
313        }
314        int getRegisterIndex(int operandIndex) {
315            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
316            if (!operandIndex)
317                return src0.regIndex();
318            else if (operandIndex == 1)
319                return src1.regIndex();
320            else if (operandIndex == 2)
321                return src2.regIndex();
322            else
323                return dest.regIndex();
324        }
325
326        int numSrcRegOperands() {
327            int operands = 0;
328            if (src0.isVectorRegister()) {
329                operands++;
330            }
331            if (src1.isVectorRegister()) {
332                operands++;
333            }
334            if (src2.isVectorRegister()) {
335                operands++;
336            }
337            return operands;
338        }
339        int numDstRegOperands() { return dest.isVectorRegister(); }
340        int getNumOperands() { return 4; }
341    };
342
343    template<typename DestDataType, typename Src0DataType,
344             typename Src1DataType, typename Src2DataType>
345    class ThreeNonUniformSourceInst :
346        public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
347                                             typename Src0DataType::OperandType,
348                                             typename Src1DataType::OperandType,
349                                             typename Src2DataType::OperandType>
350    {
351      public:
352        typedef typename DestDataType::CType DestCType;
353        typedef typename Src0DataType::CType Src0CType;
354        typedef typename Src1DataType::CType Src1CType;
355        typedef typename Src2DataType::CType Src2CType;
356
357        ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
358                                  const BrigObject *obj, const char *opcode)
359            : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
360                                         typename Src0DataType::OperandType,
361                                         typename Src1DataType::OperandType,
362                                         typename Src2DataType::OperandType>(ib,
363                                                                    obj, opcode)
364        {
365        }
366    };
367
368    template<typename DataType>
369    class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
370                                                      DataType, DataType>
371    {
372      public:
373        CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
374                 const char *opcode)
375            : ThreeNonUniformSourceInst<DataType, B1, DataType,
376                                        DataType>(ib, obj, opcode)
377        {
378        }
379    };
380
381    template<typename DataType>
382    class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
383                                                               DataType, U32,
384                                                               U32>
385    {
386      public:
387        ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
388                          const char *opcode)
389            : ThreeNonUniformSourceInst<DataType, DataType, U32,
390                                        U32>(ib, obj, opcode)
391        {
392        }
393    };
394
395    template<typename DestOperandType, typename Src0OperandType,
396             typename Src1OperandType>
397    class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
398    {
399      protected:
400        typename DestOperandType::DestOperand dest;
401        typename Src0OperandType::SrcOperand src0;
402        typename Src1OperandType::SrcOperand src1;
403
404        void
405        generateDisassembly()
406        {
407            disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
408                                   src0.disassemble(), src1.disassemble());
409        }
410
411
412      public:
413        TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
414                                    const BrigObject *obj, const char *opcode)
415            : HsailGPUStaticInst(obj, opcode)
416        {
417            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
418            dest.init(op_offs, obj);
419
420            op_offs = obj->getOperandPtr(ib->operands, 1);
421            src0.init(op_offs, obj);
422
423            op_offs = obj->getOperandPtr(ib->operands, 2);
424            src1.init(op_offs, obj);
425        }
426        bool isVectorRegister(int operandIndex) {
427            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
428            if (!operandIndex)
429                return src0.isVectorRegister();
430            else if (operandIndex == 1)
431                return src1.isVectorRegister();
432            else
433                return dest.isVectorRegister();
434        }
435        bool isCondRegister(int operandIndex) {
436            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
437            if (!operandIndex)
438                return src0.isCondRegister();
439            else if (operandIndex == 1)
440                return src1.isCondRegister();
441            else
442                return dest.isCondRegister();
443        }
444        bool isScalarRegister(int operandIndex) {
445            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
446            if (!operandIndex)
447                return src0.isScalarRegister();
448            else if (operandIndex == 1)
449                return src1.isScalarRegister();
450            else
451                return dest.isScalarRegister();
452        }
453        bool isSrcOperand(int operandIndex) {
454            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
455            if (operandIndex < 2)
456                return true;
457            else
458                return false;
459        }
460        bool isDstOperand(int operandIndex) {
461            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
462            if (operandIndex >= 2)
463                return true;
464            else
465                return false;
466        }
467        int getOperandSize(int operandIndex) {
468            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
469            if (!operandIndex)
470                return src0.opSize();
471            else if (operandIndex == 1)
472                return src1.opSize();
473            else
474                return dest.opSize();
475        }
476        int getRegisterIndex(int operandIndex) {
477            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
478            if (!operandIndex)
479                return src0.regIndex();
480            else if (operandIndex == 1)
481                return src1.regIndex();
482            else
483                return dest.regIndex();
484        }
485
486        int numSrcRegOperands() {
487            int operands = 0;
488            if (src0.isVectorRegister()) {
489                operands++;
490            }
491            if (src1.isVectorRegister()) {
492                operands++;
493            }
494            return operands;
495        }
496        int numDstRegOperands() { return dest.isVectorRegister(); }
497        int getNumOperands() { return 3; }
498    };
499
500    template<typename DestDataType, typename Src0DataType,
501             typename Src1DataType>
502    class TwoNonUniformSourceInst :
503        public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
504                                           typename Src0DataType::OperandType,
505                                           typename Src1DataType::OperandType>
506    {
507      public:
508        typedef typename DestDataType::CType DestCType;
509        typedef typename Src0DataType::CType Src0CType;
510        typedef typename Src1DataType::CType Src1CType;
511
512        TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
513                                const BrigObject *obj, const char *opcode)
514            : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
515                                         typename Src0DataType::OperandType,
516                                         typename Src1DataType::OperandType>(ib,
517                                                                    obj, opcode)
518        {
519        }
520    };
521
522    // helper function for ClassInst
523    template<typename T>
524    bool
525    fpclassify(T src0, uint32_t src1)
526    {
527        int fpclass = std::fpclassify(src0);
528
529        if ((src1 & 0x3) && (fpclass == FP_NAN)) {
530            return true;
531        }
532
533        if (src0 <= -0.0) {
534            if ((src1 & 0x4) && fpclass == FP_INFINITE)
535                return true;
536            if ((src1 & 0x8) && fpclass == FP_NORMAL)
537                return true;
538            if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
539                return true;
540            if ((src1 & 0x20) && fpclass == FP_ZERO)
541                return true;
542        } else {
543            if ((src1 & 0x40) && fpclass == FP_ZERO)
544                return true;
545            if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
546                return true;
547            if ((src1 & 0x100) && fpclass == FP_NORMAL)
548                return true;
549            if ((src1 & 0x200) && fpclass == FP_INFINITE)
550                return true;
551        }
552        return false;
553    }
554
555    template<typename DataType>
556    class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
557    {
558      public:
559        ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
560                  const char *opcode)
561            : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
562        {
563        }
564    };
565
566    template<typename DataType>
567    class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
568    {
569      public:
570        ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
571                  const char *opcode)
572            : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
573        {
574        }
575    };
576
577    // helper function for CmpInst
578    template<typename T>
579    bool
580    compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
581    {
582        using namespace Brig;
583
584        switch (cmpOp) {
585          case BRIG_COMPARE_EQ:
586          case BRIG_COMPARE_EQU:
587          case BRIG_COMPARE_SEQ:
588          case BRIG_COMPARE_SEQU:
589            return (src0 == src1);
590
591          case BRIG_COMPARE_NE:
592          case BRIG_COMPARE_NEU:
593          case BRIG_COMPARE_SNE:
594          case BRIG_COMPARE_SNEU:
595            return (src0 != src1);
596
597          case BRIG_COMPARE_LT:
598          case BRIG_COMPARE_LTU:
599          case BRIG_COMPARE_SLT:
600          case BRIG_COMPARE_SLTU:
601            return (src0 < src1);
602
603          case BRIG_COMPARE_LE:
604          case BRIG_COMPARE_LEU:
605          case BRIG_COMPARE_SLE:
606          case BRIG_COMPARE_SLEU:
607            return (src0 <= src1);
608
609          case BRIG_COMPARE_GT:
610          case BRIG_COMPARE_GTU:
611          case BRIG_COMPARE_SGT:
612          case BRIG_COMPARE_SGTU:
613            return (src0 > src1);
614
615          case BRIG_COMPARE_GE:
616          case BRIG_COMPARE_GEU:
617          case BRIG_COMPARE_SGE:
618          case BRIG_COMPARE_SGEU:
619            return (src0 >= src1);
620
621          case BRIG_COMPARE_NUM:
622          case BRIG_COMPARE_SNUM:
623            return (src0 == src0) || (src1 == src1);
624
625          case BRIG_COMPARE_NAN:
626          case BRIG_COMPARE_SNAN:
627            return (src0 != src0) || (src1 != src1);
628
629          default:
630            fatal("Bad cmpOp value %d\n", (int)cmpOp);
631        }
632    }
633
634    template<typename T>
635    int32_t
636    firstbit(T src0)
637    {
638        if (!src0)
639            return -1;
640
641        //handle positive and negative numbers
642        T tmp = (src0 < 0) ? (~src0) : (src0);
643
644        //the starting pos is MSB
645        int pos = 8 * sizeof(T) - 1;
646        int cnt = 0;
647
648        //search the first bit set to 1
649        while (!(tmp & (1 << pos))) {
650            ++cnt;
651            --pos;
652        }
653        return cnt;
654    }
655
656    const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
657
658    template<typename DestOperandType, typename SrcOperandType>
659    class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
660                                              2>
661    {
662      protected:
663        Brig::BrigCompareOperation cmpOp;
664
665      public:
666        CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
667                    const char *_opcode)
668            : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
669                                                                 _opcode)
670        {
671            assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
672            Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
673            cmpOp = (Brig::BrigCompareOperation)i->compare;
674        }
675    };
676
677    template<typename DestDataType, typename SrcDataType>
678    class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
679                                       typename SrcDataType::OperandType>
680    {
681      public:
682        std::string
683        opcode_suffix()
684        {
685            return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
686                            DestDataType::label, SrcDataType::label);
687        }
688
689        CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
690                const char *_opcode)
691            : CmpInstBase<typename DestDataType::OperandType,
692                          typename SrcDataType::OperandType>(ib, obj, _opcode)
693        {
694        }
695    };
696
697    template<typename DestDataType, typename SrcDataType>
698    class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
699                                          typename SrcDataType::OperandType, 1>
700    {
701      public:
702        std::string opcode_suffix()
703        {
704            return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
705        }
706
707        CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
708                const char *_opcode)
709            : CommonInstBase<typename DestDataType::OperandType,
710                             typename SrcDataType::OperandType,
711                             1>(ib, obj, _opcode)
712        {
713        }
714    };
715
716    class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
717    {
718      public:
719        SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
720                               const BrigObject *obj, const char *_opcode)
721            : HsailGPUStaticInst(obj, _opcode)
722        {
723        }
724
725        bool isVectorRegister(int operandIndex) { return false; }
726        bool isCondRegister(int operandIndex) { return false; }
727        bool isScalarRegister(int operandIndex) { return false; }
728        bool isSrcOperand(int operandIndex) { return false; }
729        bool isDstOperand(int operandIndex) { return false; }
730        int getOperandSize(int operandIndex) { return 0; }
731        int getRegisterIndex(int operandIndex) { return -1; }
732
733        int numSrcRegOperands() { return 0; }
734        int numDstRegOperands() { return 0; }
735        int getNumOperands() { return 0; }
736    };
737
738    template<typename DestOperandType>
739    class SpecialInstNoSrcBase : public HsailGPUStaticInst
740    {
741      protected:
742        typename DestOperandType::DestOperand dest;
743
744        void generateDisassembly()
745        {
746            disassembly = csprintf("%s %s", opcode, dest.disassemble());
747        }
748
749      public:
750        SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
751                             const BrigObject *obj, const char *_opcode)
752            : HsailGPUStaticInst(obj, _opcode)
753        {
754            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
755            dest.init(op_offs, obj);
756        }
757
758        bool isVectorRegister(int operandIndex) {
759            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
760            return dest.isVectorRegister();
761        }
762        bool isCondRegister(int operandIndex) {
763            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
764            return dest.isCondRegister();
765        }
766        bool isScalarRegister(int operandIndex) {
767            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
768            return dest.isScalarRegister();
769        }
770        bool isSrcOperand(int operandIndex) { return false; }
771        bool isDstOperand(int operandIndex) { return true; }
772        int getOperandSize(int operandIndex) {
773            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
774            return dest.opSize();
775        }
776        int getRegisterIndex(int operandIndex) {
777            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
778            return dest.regIndex();
779        }
780        int numSrcRegOperands() { return 0; }
781        int numDstRegOperands() { return dest.isVectorRegister(); }
782        int getNumOperands() { return 1; }
783    };
784
785    template<typename DestDataType>
786    class SpecialInstNoSrc :
787        public SpecialInstNoSrcBase<typename DestDataType::OperandType>
788    {
789      public:
790        typedef typename DestDataType::CType DestCType;
791
792        SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
793                         const char *_opcode)
794            : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
795                                                                       _opcode)
796        {
797        }
798    };
799
800    template<typename DestOperandType>
801    class SpecialInst1SrcBase : public HsailGPUStaticInst
802    {
803      protected:
804        typedef int SrcCType;  // used in execute() template
805
806        typename DestOperandType::DestOperand dest;
807        ImmOperand<SrcCType> src0;
808
809        void
810        generateDisassembly()
811        {
812            disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
813                                   src0.disassemble());
814        }
815
816      public:
817        SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
818                            const BrigObject *obj, const char *_opcode)
819            : HsailGPUStaticInst(obj, _opcode)
820        {
821            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
822            dest.init(op_offs, obj);
823
824            op_offs = obj->getOperandPtr(ib->operands, 1);
825            src0.init(op_offs, obj);
826        }
827        bool isVectorRegister(int operandIndex) {
828            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
829            return dest.isVectorRegister();
830        }
831        bool isCondRegister(int operandIndex) {
832            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
833            return dest.isCondRegister();
834        }
835        bool isScalarRegister(int operandIndex) {
836            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
837            return dest.isScalarRegister();
838        }
839        bool isSrcOperand(int operandIndex) { return false; }
840        bool isDstOperand(int operandIndex) { return true; }
841        int getOperandSize(int operandIndex) {
842            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
843            return dest.opSize();
844        }
845        int getRegisterIndex(int operandIndex) {
846            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
847            return dest.regIndex();
848        }
849        int numSrcRegOperands() { return 0; }
850        int numDstRegOperands() { return dest.isVectorRegister(); }
851        int getNumOperands() { return 1; }
852    };
853
854    template<typename DestDataType>
855    class SpecialInst1Src :
856        public SpecialInst1SrcBase<typename DestDataType::OperandType>
857    {
858      public:
859        typedef typename DestDataType::CType DestCType;
860
861        SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
862                        const char *_opcode)
863            : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
864                                                                      _opcode)
865        {
866        }
867    };
868
869    class Ret : public SpecialInstNoSrcNoDest
870    {
871      public:
872        typedef SpecialInstNoSrcNoDest Base;
873
874        Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
875           : Base(ib, obj, "ret")
876        {
877            o_type = Enums::OT_RET;
878        }
879
880        void execute(GPUDynInstPtr gpuDynInst);
881    };
882
883    class Barrier : public SpecialInstNoSrcNoDest
884    {
885      public:
886        typedef SpecialInstNoSrcNoDest Base;
887        uint8_t width;
888
889        Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
890            : Base(ib, obj, "barrier")
891        {
892            o_type = Enums::OT_BARRIER;
893            assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
894            width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
895        }
896
897        void execute(GPUDynInstPtr gpuDynInst);
898    };
899
900    class MemFence : public SpecialInstNoSrcNoDest
901    {
902      public:
903        typedef SpecialInstNoSrcNoDest Base;
904
905        Brig::BrigMemoryOrder memFenceMemOrder;
906        Brig::BrigMemoryScope memFenceScopeSegGroup;
907        Brig::BrigMemoryScope memFenceScopeSegGlobal;
908        Brig::BrigMemoryScope memFenceScopeSegImage;
909
910        MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
911            : Base(ib, obj, "memfence")
912        {
913            assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
914
915            memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
916                ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
917
918            memFenceScopeSegGroup = (Brig::BrigMemoryScope)
919                ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
920
921            memFenceScopeSegImage = (Brig::BrigMemoryScope)
922                ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
923
924            memFenceMemOrder = (Brig::BrigMemoryOrder)
925                ((Brig::BrigInstMemFence*)ib)->memoryOrder;
926
927            // set o_type based on scopes
928            if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
929                memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
930                o_type = Enums::OT_BOTH_MEMFENCE;
931            } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
932                o_type = Enums::OT_GLOBAL_MEMFENCE;
933            } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
934                o_type = Enums::OT_SHARED_MEMFENCE;
935            } else {
936                fatal("MemFence constructor: bad scope specifiers\n");
937            }
938        }
939
940        void
941        initiateAcc(GPUDynInstPtr gpuDynInst)
942        {
943            Wavefront *wave = gpuDynInst->wavefront();
944            wave->computeUnit->injectGlobalMemFence(gpuDynInst);
945        }
946
947        void
948        execute(GPUDynInstPtr gpuDynInst)
949        {
950            Wavefront *w = gpuDynInst->wavefront();
951            // 2 cases:
952            //   * memfence to a sequentially consistent memory (e.g., LDS).
953            //     These can be handled as no-ops.
954            //   * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
955            //     etc.). We send a packet, tagged with the memory order and
956            //     scope, and let the GPU coalescer handle it.
957
958            if (o_type == Enums::OT_GLOBAL_MEMFENCE ||
959                o_type == Enums::OT_BOTH_MEMFENCE) {
960                gpuDynInst->simdId = w->simdId;
961                gpuDynInst->wfSlotId = w->wfSlotId;
962                gpuDynInst->wfDynId = w->wfDynId;
963                gpuDynInst->kern_id = w->kern_id;
964                gpuDynInst->cu_id = w->computeUnit->cu_id;
965
966                gpuDynInst->memoryOrder =
967                    getGenericMemoryOrder(memFenceMemOrder);
968                gpuDynInst->scope =
969                    getGenericMemoryScope(memFenceScopeSegGlobal);
970                gpuDynInst->useContinuation = false;
971                GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
972                gmp->getGMReqFIFO().push(gpuDynInst);
973
974                w->wr_gm_reqs_in_pipe--;
975                w->rd_gm_reqs_in_pipe--;
976                w->mem_reqs_in_pipe--;
977                w->outstanding_reqs++;
978            } else if (o_type == Enums::OT_SHARED_MEMFENCE) {
979                // no-op
980            } else {
981                fatal("MemFence execute: bad o_type\n");
982            }
983        }
984    };
985
986    class Call : public HsailGPUStaticInst
987    {
988      public:
989        // private helper functions
990        void calcAddr(Wavefront* w, GPUDynInstPtr m);
991
992        void
993        generateDisassembly()
994        {
995            if (dest.disassemble() == "") {
996                disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
997                                       src1.disassemble());
998            } else {
999                disassembly = csprintf("%s %s (%s) (%s)", opcode,
1000                                       src0.disassemble(), dest.disassemble(),
1001                                       src1.disassemble());
1002            }
1003        }
1004
1005        bool
1006        isPseudoOp()
1007        {
1008            std::string func_name = src0.disassemble();
1009            if (func_name.find("__gem5_hsail_op") != std::string::npos) {
1010                return true;
1011            }
1012            return false;
1013        }
1014
1015        // member variables
1016        ListOperand dest;
1017        FunctionRefOperand src0;
1018        ListOperand src1;
1019        HsailCode *func_ptr;
1020
1021        // exec function for pseudo instructions mapped on top of call opcode
1022        void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
1023
1024        // user-defined pseudo instructions
1025        void MagicPrintLane(Wavefront *w);
1026        void MagicPrintLane64(Wavefront *w);
1027        void MagicPrintWF32(Wavefront *w);
1028        void MagicPrintWF64(Wavefront *w);
1029        void MagicPrintWFFloat(Wavefront *w);
1030        void MagicSimBreak(Wavefront *w);
1031        void MagicPrefixSum(Wavefront *w);
1032        void MagicReduction(Wavefront *w);
1033        void MagicMaskLower(Wavefront *w);
1034        void MagicMaskUpper(Wavefront *w);
1035        void MagicJoinWFBar(Wavefront *w);
1036        void MagicWaitWFBar(Wavefront *w);
1037        void MagicPanic(Wavefront *w);
1038
1039        void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
1040                                          GPUDynInstPtr gpuDynInst);
1041
1042        void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
1043                                         GPUDynInstPtr gpuDynInst);
1044
1045        void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
1046
1047        void MagicXactCasLd(Wavefront *w);
1048        void MagicMostSigThread(Wavefront *w);
1049        void MagicMostSigBroadcast(Wavefront *w);
1050
1051        void MagicPrintWF32ID(Wavefront *w);
1052        void MagicPrintWFID64(Wavefront *w);
1053
1054        Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
1055            : HsailGPUStaticInst(obj, "call")
1056        {
1057            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1058            dest.init(op_offs, obj);
1059            op_offs = obj->getOperandPtr(ib->operands, 1);
1060            src0.init(op_offs, obj);
1061
1062            func_ptr = nullptr;
1063            std::string func_name = src0.disassemble();
1064            if (!isPseudoOp()) {
1065                func_ptr = dynamic_cast<HsailCode*>(obj->
1066                                                    getFunction(func_name));
1067
1068                if (!func_ptr)
1069                    fatal("call::exec cannot find function: %s\n", func_name);
1070            }
1071
1072            op_offs = obj->getOperandPtr(ib->operands, 2);
1073            src1.init(op_offs, obj);
1074        }
1075
1076        bool isVectorRegister(int operandIndex) { return false; }
1077        bool isCondRegister(int operandIndex) { return false; }
1078        bool isScalarRegister(int operandIndex) { return false; }
1079        bool isSrcOperand(int operandIndex) { return false; }
1080        bool isDstOperand(int operandIndex) { return false; }
1081        int  getOperandSize(int operandIndex) { return 0; }
1082        int  getRegisterIndex(int operandIndex) { return -1; }
1083
1084        void
1085        execute(GPUDynInstPtr gpuDynInst)
1086        {
1087            Wavefront *w = gpuDynInst->wavefront();
1088
1089            std::string func_name = src0.disassemble();
1090            if (isPseudoOp()) {
1091                execPseudoInst(w, gpuDynInst);
1092            } else {
1093                fatal("Native HSAIL functions are not yet implemented: %s\n",
1094                      func_name);
1095            }
1096        }
1097        int numSrcRegOperands() { return 0; }
1098        int numDstRegOperands() { return 0; }
1099        int getNumOperands() { return 2; }
1100    };
1101
1102    template<typename T> T heynot(T arg) { return ~arg; }
1103    template<> inline bool heynot<bool>(bool arg) { return !arg; }
1104} // namespace HsailISA
1105
1106#endif // __ARCH_HSAIL_INSTS_DECL_HH__
1107