1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
37#define __ARCH_HSAIL_INSTS_DECL_HH__
38
39#include <cmath>
40
41#include "arch/hsail/insts/gpu_static_inst.hh"
42#include "arch/hsail/operand.hh"
43#include "debug/HSAIL.hh"
44#include "gpu-compute/gpu_dyn_inst.hh"
45#include "gpu-compute/shader.hh"
46
47namespace HsailISA
48{
49    template<typename _DestOperand, typename _SrcOperand>
50    class HsailOperandType
51    {
52      public:
53        typedef _DestOperand DestOperand;
54        typedef _SrcOperand SrcOperand;
55    };
56
57    typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
58    typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
59    typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
60
61    // The IsBits parameter serves only to disambiguate tbhe B* types from
62    // the U* types, which otherwise would be identical (and
63    // indistinguishable).
64    template<typename _OperandType, typename _CType, Enums::MemType _memType,
65             vgpr_type _vgprType, int IsBits=0>
66    class HsailDataType
67    {
68      public:
69        typedef _OperandType OperandType;
70        typedef _CType CType;
71        static const Enums::MemType memType = _memType;
72        static const vgpr_type vgprType = _vgprType;
73        static const char *label;
74    };
75
76    typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
77    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
78
79    typedef HsailDataType<SRegOperandType, uint16_t,
80                          Enums::M_U16, VT_32, 1> B16;
81
82    typedef HsailDataType<SRegOperandType, uint32_t,
83                          Enums::M_U32, VT_32, 1> B32;
84
85    typedef HsailDataType<DRegOperandType, uint64_t,
86                          Enums::M_U64, VT_64, 1> B64;
87
88    typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
89    typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
90    typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
91    typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
92
93    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
94    typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
95    typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
96    typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
97
98    typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
99    typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
100
101    template<typename DestOperandType, typename SrcOperandType,
102             int NumSrcOperands>
103    class CommonInstBase : public HsailGPUStaticInst
104    {
105      protected:
106        typename DestOperandType::DestOperand dest;
107        typename SrcOperandType::SrcOperand src[NumSrcOperands];
108
109        void
110        generateDisassembly()
111        {
112            disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
113                                   dest.disassemble());
114
115            for (int i = 0; i < NumSrcOperands; ++i) {
116                disassembly += ",";
117                disassembly += src[i].disassemble();
118            }
119        }
120
121        virtual std::string opcode_suffix() = 0;
122
123      public:
124        CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
125                       const char *opcode)
126            : HsailGPUStaticInst(obj, opcode)
127        {
128            setFlag(ALU);
129
130            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
131
132            dest.init(op_offs, obj);
133
134            for (int i = 0; i < NumSrcOperands; ++i) {
135                op_offs = obj->getOperandPtr(ib->operands, i + 1);
136                src[i].init(op_offs, obj);
137            }
138        }
139
140        bool isVectorRegister(int operandIndex) {
141            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
142            if (operandIndex < NumSrcOperands)
143                return src[operandIndex].isVectorRegister();
144            else
145                return dest.isVectorRegister();
146        }
147        bool isCondRegister(int operandIndex) {
148            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
149            if (operandIndex < NumSrcOperands)
150                return src[operandIndex].isCondRegister();
151            else
152                return dest.isCondRegister();
153        }
154        bool isScalarRegister(int operandIndex) {
155            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
156            if (operandIndex < NumSrcOperands)
157                return src[operandIndex].isScalarRegister();
158            else
159                return dest.isScalarRegister();
160        }
161        bool isSrcOperand(int operandIndex) {
162            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
163            if (operandIndex < NumSrcOperands)
164                return true;
165            return false;
166        }
167
168        bool isDstOperand(int operandIndex) {
169            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
170            if (operandIndex >= NumSrcOperands)
171                return true;
172            return false;
173        }
174        int getOperandSize(int operandIndex) {
175            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
176            if (operandIndex < NumSrcOperands)
177                return src[operandIndex].opSize();
178            else
179                return dest.opSize();
180        }
181        int
182        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
183        {
184            assert(operandIndex >= 0 && operandIndex < getNumOperands());
185
186            if (operandIndex < NumSrcOperands)
187                return src[operandIndex].regIndex();
188            else
189                return dest.regIndex();
190        }
191        int numSrcRegOperands() {
192            int operands = 0;
193            for (int i = 0; i < NumSrcOperands; i++) {
194                if (src[i].isVectorRegister()) {
195                    operands++;
196                }
197            }
198            return operands;
199        }
200        int numDstRegOperands() { return dest.isVectorRegister(); }
201        int getNumOperands() { return NumSrcOperands + 1; }
202    };
203
204    template<typename DataType, int NumSrcOperands>
205    class ArithInst : public CommonInstBase<typename DataType::OperandType,
206                                            typename DataType::OperandType,
207                                            NumSrcOperands>
208    {
209      public:
210        std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
211
212        ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
213                  const char *opcode)
214            : CommonInstBase<typename DataType::OperandType,
215                             typename DataType::OperandType,
216                             NumSrcOperands>(ib, obj, opcode)
217        {
218        }
219    };
220
221    template<typename DestOperandType, typename Src0OperandType,
222             typename Src1OperandType, typename Src2OperandType>
223    class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
224    {
225      protected:
226        typename DestOperandType::DestOperand dest;
227        typename Src0OperandType::SrcOperand  src0;
228        typename Src1OperandType::SrcOperand  src1;
229        typename Src2OperandType::SrcOperand  src2;
230
231        void
232        generateDisassembly()
233        {
234            disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
235                                   src0.disassemble(), src1.disassemble(),
236                                   src2.disassemble());
237        }
238
239      public:
240        ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
241                                      const BrigObject *obj,
242                                      const char *opcode)
243            : HsailGPUStaticInst(obj, opcode)
244        {
245            setFlag(ALU);
246
247            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
248            dest.init(op_offs, obj);
249
250            op_offs = obj->getOperandPtr(ib->operands, 1);
251            src0.init(op_offs, obj);
252
253            op_offs = obj->getOperandPtr(ib->operands, 2);
254            src1.init(op_offs, obj);
255
256            op_offs = obj->getOperandPtr(ib->operands, 3);
257            src2.init(op_offs, obj);
258        }
259
260        bool isVectorRegister(int operandIndex) {
261            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
262            if (!operandIndex)
263                return src0.isVectorRegister();
264            else if (operandIndex == 1)
265                return src1.isVectorRegister();
266            else if (operandIndex == 2)
267                return src2.isVectorRegister();
268            else
269                return dest.isVectorRegister();
270        }
271        bool isCondRegister(int operandIndex) {
272            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
273            if (!operandIndex)
274                return src0.isCondRegister();
275            else if (operandIndex == 1)
276                return src1.isCondRegister();
277            else if (operandIndex == 2)
278                return src2.isCondRegister();
279            else
280                return dest.isCondRegister();
281        }
282        bool isScalarRegister(int operandIndex) {
283            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
284            if (!operandIndex)
285                return src0.isScalarRegister();
286            else if (operandIndex == 1)
287                return src1.isScalarRegister();
288            else if (operandIndex == 2)
289                return src2.isScalarRegister();
290            else
291                return dest.isScalarRegister();
292        }
293        bool isSrcOperand(int operandIndex) {
294            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
295            if (operandIndex < 3)
296                return true;
297            else
298                return false;
299        }
300        bool isDstOperand(int operandIndex) {
301            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
302            if (operandIndex >= 3)
303                return true;
304            else
305                return false;
306        }
307        int getOperandSize(int operandIndex) {
308            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
309            if (!operandIndex)
310                return src0.opSize();
311            else if (operandIndex == 1)
312                return src1.opSize();
313            else if (operandIndex == 2)
314                return src2.opSize();
315            else
316                return dest.opSize();
317        }
318
319        int
320        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
321        {
322            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
323            if (!operandIndex)
324                return src0.regIndex();
325            else if (operandIndex == 1)
326                return src1.regIndex();
327            else if (operandIndex == 2)
328                return src2.regIndex();
329            else
330                return dest.regIndex();
331        }
332
333        int numSrcRegOperands() {
334            int operands = 0;
335            if (src0.isVectorRegister()) {
336                operands++;
337            }
338            if (src1.isVectorRegister()) {
339                operands++;
340            }
341            if (src2.isVectorRegister()) {
342                operands++;
343            }
344            return operands;
345        }
346        int numDstRegOperands() { return dest.isVectorRegister(); }
347        int getNumOperands() { return 4; }
348    };
349
350    template<typename DestDataType, typename Src0DataType,
351             typename Src1DataType, typename Src2DataType>
352    class ThreeNonUniformSourceInst :
353        public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
354                                             typename Src0DataType::OperandType,
355                                             typename Src1DataType::OperandType,
356                                             typename Src2DataType::OperandType>
357    {
358      public:
359        typedef typename DestDataType::CType DestCType;
360        typedef typename Src0DataType::CType Src0CType;
361        typedef typename Src1DataType::CType Src1CType;
362        typedef typename Src2DataType::CType Src2CType;
363
364        ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
365                                  const BrigObject *obj, const char *opcode)
366            : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
367                                         typename Src0DataType::OperandType,
368                                         typename Src1DataType::OperandType,
369                                         typename Src2DataType::OperandType>(ib,
370                                                                    obj, opcode)
371        {
372        }
373    };
374
375    template<typename DataType>
376    class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
377                                                      DataType, DataType>
378    {
379      public:
380        CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
381                 const char *opcode)
382            : ThreeNonUniformSourceInst<DataType, B1, DataType,
383                                        DataType>(ib, obj, opcode)
384        {
385        }
386    };
387
388    template<typename DataType>
389    class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
390                                                               DataType, U32,
391                                                               U32>
392    {
393      public:
394        ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
395                          const char *opcode)
396            : ThreeNonUniformSourceInst<DataType, DataType, U32,
397                                        U32>(ib, obj, opcode)
398        {
399        }
400    };
401
402    template<typename DestOperandType, typename Src0OperandType,
403             typename Src1OperandType>
404    class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
405    {
406      protected:
407        typename DestOperandType::DestOperand dest;
408        typename Src0OperandType::SrcOperand src0;
409        typename Src1OperandType::SrcOperand src1;
410
411        void
412        generateDisassembly()
413        {
414            disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
415                                   src0.disassemble(), src1.disassemble());
416        }
417
418
419      public:
420        TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
421                                    const BrigObject *obj, const char *opcode)
422            : HsailGPUStaticInst(obj, opcode)
423        {
424            setFlag(ALU);
425
426            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
427            dest.init(op_offs, obj);
428
429            op_offs = obj->getOperandPtr(ib->operands, 1);
430            src0.init(op_offs, obj);
431
432            op_offs = obj->getOperandPtr(ib->operands, 2);
433            src1.init(op_offs, obj);
434        }
435        bool isVectorRegister(int operandIndex) {
436            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
437            if (!operandIndex)
438                return src0.isVectorRegister();
439            else if (operandIndex == 1)
440                return src1.isVectorRegister();
441            else
442                return dest.isVectorRegister();
443        }
444        bool isCondRegister(int operandIndex) {
445            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
446            if (!operandIndex)
447                return src0.isCondRegister();
448            else if (operandIndex == 1)
449                return src1.isCondRegister();
450            else
451                return dest.isCondRegister();
452        }
453        bool isScalarRegister(int operandIndex) {
454            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
455            if (!operandIndex)
456                return src0.isScalarRegister();
457            else if (operandIndex == 1)
458                return src1.isScalarRegister();
459            else
460                return dest.isScalarRegister();
461        }
462        bool isSrcOperand(int operandIndex) {
463            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
464            if (operandIndex < 2)
465                return true;
466            else
467                return false;
468        }
469        bool isDstOperand(int operandIndex) {
470            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
471            if (operandIndex >= 2)
472                return true;
473            else
474                return false;
475        }
476        int getOperandSize(int operandIndex) {
477            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
478            if (!operandIndex)
479                return src0.opSize();
480            else if (operandIndex == 1)
481                return src1.opSize();
482            else
483                return dest.opSize();
484        }
485
486        int
487        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
488        {
489            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
490            if (!operandIndex)
491                return src0.regIndex();
492            else if (operandIndex == 1)
493                return src1.regIndex();
494            else
495                return dest.regIndex();
496        }
497
498        int numSrcRegOperands() {
499            int operands = 0;
500            if (src0.isVectorRegister()) {
501                operands++;
502            }
503            if (src1.isVectorRegister()) {
504                operands++;
505            }
506            return operands;
507        }
508        int numDstRegOperands() { return dest.isVectorRegister(); }
509        int getNumOperands() { return 3; }
510    };
511
512    template<typename DestDataType, typename Src0DataType,
513             typename Src1DataType>
514    class TwoNonUniformSourceInst :
515        public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
516                                           typename Src0DataType::OperandType,
517                                           typename Src1DataType::OperandType>
518    {
519      public:
520        typedef typename DestDataType::CType DestCType;
521        typedef typename Src0DataType::CType Src0CType;
522        typedef typename Src1DataType::CType Src1CType;
523
524        TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
525                                const BrigObject *obj, const char *opcode)
526            : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
527                                         typename Src0DataType::OperandType,
528                                         typename Src1DataType::OperandType>(ib,
529                                                                    obj, opcode)
530        {
531        }
532    };
533
534    // helper function for ClassInst
535    template<typename T>
536    bool
537    fpclassify(T src0, uint32_t src1)
538    {
539        int fpclass = std::fpclassify(src0);
540
541        if ((src1 & 0x3) && (fpclass == FP_NAN)) {
542            return true;
543        }
544
545        if (src0 <= -0.0) {
546            if ((src1 & 0x4) && fpclass == FP_INFINITE)
547                return true;
548            if ((src1 & 0x8) && fpclass == FP_NORMAL)
549                return true;
550            if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
551                return true;
552            if ((src1 & 0x20) && fpclass == FP_ZERO)
553                return true;
554        } else {
555            if ((src1 & 0x40) && fpclass == FP_ZERO)
556                return true;
557            if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
558                return true;
559            if ((src1 & 0x100) && fpclass == FP_NORMAL)
560                return true;
561            if ((src1 & 0x200) && fpclass == FP_INFINITE)
562                return true;
563        }
564        return false;
565    }
566
567    template<typename DataType>
568    class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
569    {
570      public:
571        ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
572                  const char *opcode)
573            : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
574        {
575        }
576    };
577
578    template<typename DataType>
579    class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
580    {
581      public:
582        ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
583                  const char *opcode)
584            : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
585        {
586        }
587    };
588
589    // helper function for CmpInst
590    template<typename T>
591    bool
592    compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
593    {
594        using namespace Brig;
595
596        switch (cmpOp) {
597          case BRIG_COMPARE_EQ:
598          case BRIG_COMPARE_EQU:
599          case BRIG_COMPARE_SEQ:
600          case BRIG_COMPARE_SEQU:
601            return (src0 == src1);
602
603          case BRIG_COMPARE_NE:
604          case BRIG_COMPARE_NEU:
605          case BRIG_COMPARE_SNE:
606          case BRIG_COMPARE_SNEU:
607            return (src0 != src1);
608
609          case BRIG_COMPARE_LT:
610          case BRIG_COMPARE_LTU:
611          case BRIG_COMPARE_SLT:
612          case BRIG_COMPARE_SLTU:
613            return (src0 < src1);
614
615          case BRIG_COMPARE_LE:
616          case BRIG_COMPARE_LEU:
617          case BRIG_COMPARE_SLE:
618          case BRIG_COMPARE_SLEU:
619            return (src0 <= src1);
620
621          case BRIG_COMPARE_GT:
622          case BRIG_COMPARE_GTU:
623          case BRIG_COMPARE_SGT:
624          case BRIG_COMPARE_SGTU:
625            return (src0 > src1);
626
627          case BRIG_COMPARE_GE:
628          case BRIG_COMPARE_GEU:
629          case BRIG_COMPARE_SGE:
630          case BRIG_COMPARE_SGEU:
631            return (src0 >= src1);
632
633          case BRIG_COMPARE_NUM:
634          case BRIG_COMPARE_SNUM:
635            return (src0 == src0) || (src1 == src1);
636
637          case BRIG_COMPARE_NAN:
638          case BRIG_COMPARE_SNAN:
639            return (src0 != src0) || (src1 != src1);
640
641          default:
642            fatal("Bad cmpOp value %d\n", (int)cmpOp);
643        }
644    }
645
646    template<typename T>
647    int32_t
648    firstbit(T src0)
649    {
650        if (!src0)
651            return -1;
652
653        //handle positive and negative numbers
654        T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0);
655
656        //the starting pos is MSB
657        int pos = 8 * sizeof(T) - 1;
658        int cnt = 0;
659
660        //search the first bit set to 1
661        while (!(tmp & (1 << pos))) {
662            ++cnt;
663            --pos;
664        }
665        return cnt;
666    }
667
668    const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
669
670    template<typename DestOperandType, typename SrcOperandType>
671    class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
672                                              2>
673    {
674      protected:
675        Brig::BrigCompareOperation cmpOp;
676
677      public:
678        CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
679                    const char *_opcode)
680            : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
681                                                                 _opcode)
682        {
683            assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
684            Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
685            cmpOp = (Brig::BrigCompareOperation)i->compare;
686        }
687    };
688
689    template<typename DestDataType, typename SrcDataType>
690    class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
691                                       typename SrcDataType::OperandType>
692    {
693      public:
694        std::string
695        opcode_suffix()
696        {
697            return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
698                            DestDataType::label, SrcDataType::label);
699        }
700
701        CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
702                const char *_opcode)
703            : CmpInstBase<typename DestDataType::OperandType,
704                          typename SrcDataType::OperandType>(ib, obj, _opcode)
705        {
706        }
707    };
708
709    template<typename DestDataType, typename SrcDataType>
710    class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
711                                          typename SrcDataType::OperandType, 1>
712    {
713      public:
714        std::string opcode_suffix()
715        {
716            return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
717        }
718
719        CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
720                const char *_opcode)
721            : CommonInstBase<typename DestDataType::OperandType,
722                             typename SrcDataType::OperandType,
723                             1>(ib, obj, _opcode)
724        {
725        }
726    };
727
728    template<typename DestDataType, typename SrcDataType>
729    class PopcountInst :
730        public CommonInstBase<typename DestDataType::OperandType,
731                              typename SrcDataType::OperandType, 1>
732    {
733      public:
734        std::string opcode_suffix()
735        {
736            return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
737        }
738
739        PopcountInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
740                     const char *_opcode)
741            : CommonInstBase<typename DestDataType::OperandType,
742                             typename SrcDataType::OperandType,
743                             1>(ib, obj, _opcode)
744        {
745        }
746    };
747
748    class Stub : public HsailGPUStaticInst
749    {
750      public:
751        Stub(const Brig::BrigInstBase *ib, const BrigObject *obj,
752             const char *_opcode)
753            : HsailGPUStaticInst(obj, _opcode)
754        {
755        }
756
757        void generateDisassembly() override
758        {
759            disassembly = csprintf("%s", opcode);
760        }
761
762        bool isVectorRegister(int operandIndex) override { return false; }
763        bool isCondRegister(int operandIndex) override { return false; }
764        bool isScalarRegister(int operandIndex) override { return false; }
765        bool isSrcOperand(int operandIndex) override { return false; }
766        bool isDstOperand(int operandIndex) override { return false; }
767        int getOperandSize(int operandIndex) override { return 0; }
768
769        int
770        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
771        {
772            return -1;
773        }
774
775        int numSrcRegOperands() override { return 0; }
776        int numDstRegOperands() override { return 0; }
777        int getNumOperands() override { return 0; }
778    };
779
780    class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
781    {
782      public:
783        SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
784                               const BrigObject *obj, const char *_opcode)
785            : HsailGPUStaticInst(obj, _opcode)
786        {
787        }
788
789        bool isVectorRegister(int operandIndex) override { return false; }
790        bool isCondRegister(int operandIndex) override { return false; }
791        bool isScalarRegister(int operandIndex) override { return false; }
792        bool isSrcOperand(int operandIndex) override { return false; }
793        bool isDstOperand(int operandIndex) override { return false; }
794        int getOperandSize(int operandIndex) override { return 0; }
795
796        int
797        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
798        {
799            return -1;
800        }
801
802        int numSrcRegOperands() override { return 0; }
803        int numDstRegOperands() override { return 0; }
804        int getNumOperands() override { return 0; }
805    };
806
807    template<typename DestOperandType>
808    class SpecialInstNoSrcBase : public HsailGPUStaticInst
809    {
810      protected:
811        typename DestOperandType::DestOperand dest;
812
813        void generateDisassembly()
814        {
815            disassembly = csprintf("%s %s", opcode, dest.disassemble());
816        }
817
818      public:
819        SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
820                             const BrigObject *obj, const char *_opcode)
821            : HsailGPUStaticInst(obj, _opcode)
822        {
823            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
824            dest.init(op_offs, obj);
825        }
826
827        bool isVectorRegister(int operandIndex) {
828            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
829            return dest.isVectorRegister();
830        }
831        bool isCondRegister(int operandIndex) {
832            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
833            return dest.isCondRegister();
834        }
835        bool isScalarRegister(int operandIndex) {
836            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
837            return dest.isScalarRegister();
838        }
839        bool isSrcOperand(int operandIndex) { return false; }
840        bool isDstOperand(int operandIndex) { return true; }
841        int getOperandSize(int operandIndex) {
842            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
843            return dest.opSize();
844        }
845
846        int
847        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
848        {
849            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
850            return dest.regIndex();
851        }
852
853        int numSrcRegOperands() { return 0; }
854        int numDstRegOperands() { return dest.isVectorRegister(); }
855        int getNumOperands() { return 1; }
856    };
857
858    template<typename DestDataType>
859    class SpecialInstNoSrc :
860        public SpecialInstNoSrcBase<typename DestDataType::OperandType>
861    {
862      public:
863        typedef typename DestDataType::CType DestCType;
864
865        SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
866                         const char *_opcode)
867            : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
868                                                                       _opcode)
869        {
870        }
871    };
872
873    template<typename DestOperandType>
874    class SpecialInst1SrcBase : public HsailGPUStaticInst
875    {
876      protected:
877        typedef int SrcCType;  // used in execute() template
878
879        typename DestOperandType::DestOperand dest;
880        ImmOperand<SrcCType> src0;
881
882        void
883        generateDisassembly()
884        {
885            disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
886                                   src0.disassemble());
887        }
888
889      public:
890        SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
891                            const BrigObject *obj, const char *_opcode)
892            : HsailGPUStaticInst(obj, _opcode)
893        {
894            setFlag(ALU);
895
896            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
897            dest.init(op_offs, obj);
898
899            op_offs = obj->getOperandPtr(ib->operands, 1);
900            src0.init(op_offs, obj);
901        }
902        bool isVectorRegister(int operandIndex) {
903            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
904            return dest.isVectorRegister();
905        }
906        bool isCondRegister(int operandIndex) {
907            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
908            return dest.isCondRegister();
909        }
910        bool isScalarRegister(int operandIndex) {
911            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
912            return dest.isScalarRegister();
913        }
914        bool isSrcOperand(int operandIndex) { return false; }
915        bool isDstOperand(int operandIndex) { return true; }
916        int getOperandSize(int operandIndex) {
917            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
918            return dest.opSize();
919        }
920
921        int
922        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
923        {
924            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
925            return dest.regIndex();
926        }
927
928        int numSrcRegOperands() { return 0; }
929        int numDstRegOperands() { return dest.isVectorRegister(); }
930        int getNumOperands() { return 1; }
931    };
932
933    template<typename DestDataType>
934    class SpecialInst1Src :
935        public SpecialInst1SrcBase<typename DestDataType::OperandType>
936    {
937      public:
938        typedef typename DestDataType::CType DestCType;
939
940        SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
941                        const char *_opcode)
942            : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
943                                                                      _opcode)
944        {
945        }
946    };
947
948    class Ret : public SpecialInstNoSrcNoDest
949    {
950      public:
951        typedef SpecialInstNoSrcNoDest Base;
952
953        Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
954           : Base(ib, obj, "ret")
955        {
956            setFlag(GPUStaticInst::Return);
957        }
958
959        void execute(GPUDynInstPtr gpuDynInst);
960    };
961
962    class Barrier : public SpecialInstNoSrcNoDest
963    {
964      public:
965        typedef SpecialInstNoSrcNoDest Base;
966        uint8_t width;
967
968        Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
969            : Base(ib, obj, "barrier")
970        {
971            setFlag(GPUStaticInst::MemBarrier);
972            assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
973            width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
974        }
975
976        void execute(GPUDynInstPtr gpuDynInst);
977    };
978
979    class MemFence : public SpecialInstNoSrcNoDest
980    {
981      public:
982        typedef SpecialInstNoSrcNoDest Base;
983
984        Brig::BrigMemoryOrder memFenceMemOrder;
985        Brig::BrigMemoryScope memFenceScopeSegGroup;
986        Brig::BrigMemoryScope memFenceScopeSegGlobal;
987        Brig::BrigMemoryScope memFenceScopeSegImage;
988
989        MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
990            : Base(ib, obj, "memfence")
991        {
992            assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
993
994            memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
995                ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
996
997            memFenceScopeSegGroup = (Brig::BrigMemoryScope)
998                ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
999
1000            memFenceScopeSegImage = (Brig::BrigMemoryScope)
1001                ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
1002
1003            memFenceMemOrder = (Brig::BrigMemoryOrder)
1004                ((Brig::BrigInstMemFence*)ib)->memoryOrder;
1005
1006            setFlag(MemoryRef);
1007            setFlag(GPUStaticInst::MemFence);
1008
1009            switch (memFenceMemOrder) {
1010              case Brig::BRIG_MEMORY_ORDER_NONE:
1011                setFlag(NoOrder);
1012                break;
1013              case Brig::BRIG_MEMORY_ORDER_RELAXED:
1014                setFlag(RelaxedOrder);
1015                break;
1016              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
1017                setFlag(Acquire);
1018                break;
1019              case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
1020                setFlag(Release);
1021                break;
1022              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
1023                setFlag(AcquireRelease);
1024                break;
1025              default:
1026                fatal("MemInst has bad BrigMemoryOrder\n");
1027            }
1028
1029            // set inst flags based on scopes
1030            if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
1031                memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1032                setFlag(GPUStaticInst::GlobalSegment);
1033
1034                /**
1035                 * A memory fence that has scope for
1036                 * both segments will use the global
1037                 * segment, and be executed in the
1038                 * global memory pipeline, therefore,
1039                 * we set the segment to match the
1040                 * global scope only
1041                 */
1042                switch (memFenceScopeSegGlobal) {
1043                  case Brig::BRIG_MEMORY_SCOPE_NONE:
1044                    setFlag(NoScope);
1045                    break;
1046                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1047                    setFlag(WorkitemScope);
1048                    break;
1049                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1050                    setFlag(WorkgroupScope);
1051                    break;
1052                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
1053                    setFlag(DeviceScope);
1054                    break;
1055                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1056                    setFlag(SystemScope);
1057                    break;
1058                  default:
1059                    fatal("MemFence has bad global scope type\n");
1060                }
1061            } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
1062                setFlag(GPUStaticInst::GlobalSegment);
1063
1064                switch (memFenceScopeSegGlobal) {
1065                  case Brig::BRIG_MEMORY_SCOPE_NONE:
1066                    setFlag(NoScope);
1067                    break;
1068                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1069                    setFlag(WorkitemScope);
1070                    break;
1071                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1072                    setFlag(WorkgroupScope);
1073                    break;
1074                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
1075                    setFlag(DeviceScope);
1076                    break;
1077                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1078                    setFlag(SystemScope);
1079                    break;
1080                  default:
1081                    fatal("MemFence has bad global scope type\n");
1082                }
1083            } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1084                setFlag(GPUStaticInst::GroupSegment);
1085
1086                switch (memFenceScopeSegGroup) {
1087                  case Brig::BRIG_MEMORY_SCOPE_NONE:
1088                    setFlag(NoScope);
1089                    break;
1090                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1091                    setFlag(WorkitemScope);
1092                    break;
1093                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1094                    setFlag(WorkgroupScope);
1095                    break;
1096                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
1097                    setFlag(DeviceScope);
1098                    break;
1099                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1100                    setFlag(SystemScope);
1101                    break;
1102                  default:
1103                    fatal("MemFence has bad group scope type\n");
1104                }
1105            } else {
1106                fatal("MemFence constructor: bad scope specifiers\n");
1107            }
1108        }
1109
1110        void
1111        initiateAcc(GPUDynInstPtr gpuDynInst)
1112        {
1113            Wavefront *wave = gpuDynInst->wavefront();
1114            wave->computeUnit->injectGlobalMemFence(gpuDynInst);
1115        }
1116
1117        void
1118        execute(GPUDynInstPtr gpuDynInst)
1119        {
1120            Wavefront *w = gpuDynInst->wavefront();
1121            // 2 cases:
1122            //   * memfence to a sequentially consistent memory (e.g., LDS).
1123            //     These can be handled as no-ops.
1124            //   * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
1125            //     etc.). We send a packet, tagged with the memory order and
1126            //     scope, and let the GPU coalescer handle it.
1127
1128            if (isGlobalSeg()) {
1129                gpuDynInst->simdId = w->simdId;
1130                gpuDynInst->wfSlotId = w->wfSlotId;
1131                gpuDynInst->wfDynId = w->wfDynId;
1132                gpuDynInst->kern_id = w->kernId;
1133                gpuDynInst->cu_id = w->computeUnit->cu_id;
1134
1135                gpuDynInst->useContinuation = false;
1136                GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
1137                gmp->issueRequest(gpuDynInst);
1138
1139                w->wrGmReqsInPipe--;
1140                w->rdGmReqsInPipe--;
1141                w->memReqsInPipe--;
1142                w->outstandingReqs++;
1143            } else if (isGroupSeg()) {
1144                // no-op
1145            } else {
1146                fatal("MemFence execute: bad op type\n");
1147            }
1148        }
1149    };
1150
1151    class Call : public HsailGPUStaticInst
1152    {
1153      public:
1154        // private helper functions
1155        void calcAddr(Wavefront* w, GPUDynInstPtr m);
1156
1157        void
1158        generateDisassembly()
1159        {
1160            if (dest.disassemble() == "") {
1161                disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
1162                                       src1.disassemble());
1163            } else {
1164                disassembly = csprintf("%s %s (%s) (%s)", opcode,
1165                                       src0.disassemble(), dest.disassemble(),
1166                                       src1.disassemble());
1167            }
1168        }
1169
1170        bool
1171        isPseudoOp()
1172        {
1173            std::string func_name = src0.disassemble();
1174            if (func_name.find("__gem5_hsail_op") != std::string::npos) {
1175                return true;
1176            }
1177            return false;
1178        }
1179
1180        // member variables
1181        ListOperand dest;
1182        FunctionRefOperand src0;
1183        ListOperand src1;
1184        HsailCode *func_ptr;
1185
1186        // exec function for pseudo instructions mapped on top of call opcode
1187        void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
1188
1189        // user-defined pseudo instructions
1190        void MagicPrintLane(Wavefront *w);
1191        void MagicPrintLane64(Wavefront *w);
1192        void MagicPrintWF32(Wavefront *w);
1193        void MagicPrintWF64(Wavefront *w);
1194        void MagicPrintWFFloat(Wavefront *w);
1195        void MagicSimBreak(Wavefront *w);
1196        void MagicPrefixSum(Wavefront *w);
1197        void MagicReduction(Wavefront *w);
1198        void MagicMaskLower(Wavefront *w);
1199        void MagicMaskUpper(Wavefront *w);
1200        void MagicJoinWFBar(Wavefront *w);
1201        void MagicWaitWFBar(Wavefront *w);
1202        void MagicPanic(Wavefront *w);
1203
1204        void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
1205                                          GPUDynInstPtr gpuDynInst);
1206
1207        void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
1208                                         GPUDynInstPtr gpuDynInst);
1209
1210        void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
1211
1212        void MagicXactCasLd(Wavefront *w);
1213        void MagicMostSigThread(Wavefront *w);
1214        void MagicMostSigBroadcast(Wavefront *w);
1215
1216        void MagicPrintWF32ID(Wavefront *w);
1217        void MagicPrintWFID64(Wavefront *w);
1218
1219        Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
1220            : HsailGPUStaticInst(obj, "call")
1221        {
1222            setFlag(ALU);
1223            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1224            dest.init(op_offs, obj);
1225            op_offs = obj->getOperandPtr(ib->operands, 1);
1226            src0.init(op_offs, obj);
1227
1228            func_ptr = nullptr;
1229            std::string func_name = src0.disassemble();
1230            if (!isPseudoOp()) {
1231                func_ptr = dynamic_cast<HsailCode*>(obj->
1232                                                    getFunction(func_name));
1233
1234                if (!func_ptr)
1235                    fatal("call::exec cannot find function: %s\n", func_name);
1236            }
1237
1238            op_offs = obj->getOperandPtr(ib->operands, 2);
1239            src1.init(op_offs, obj);
1240        }
1241
1242        bool isVectorRegister(int operandIndex) { return false; }
1243        bool isCondRegister(int operandIndex) { return false; }
1244        bool isScalarRegister(int operandIndex) { return false; }
1245        bool isSrcOperand(int operandIndex) { return false; }
1246        bool isDstOperand(int operandIndex) { return false; }
1247        int getOperandSize(int operandIndex) { return 0; }
1248
1249        int
1250        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
1251        {
1252            return -1;
1253        }
1254
1255        void
1256        execute(GPUDynInstPtr gpuDynInst)
1257        {
1258            Wavefront *w = gpuDynInst->wavefront();
1259
1260            std::string func_name = src0.disassemble();
1261            if (isPseudoOp()) {
1262                execPseudoInst(w, gpuDynInst);
1263            } else {
1264                fatal("Native HSAIL functions are not yet implemented: %s\n",
1265                      func_name);
1266            }
1267        }
1268        int numSrcRegOperands() { return 0; }
1269        int numDstRegOperands() { return 0; }
1270        int getNumOperands() { return 2; }
1271    };
1272
1273    template<typename T> T heynot(T arg) { return ~arg; }
1274    template<> inline bool heynot<bool>(bool arg) { return !arg; }
1275
1276
1277    /* Explicitly declare template static member variables to avoid
1278     * warnings in some clang versions
1279     */
1280    template<> const char *B1::label;
1281    template<> const char *B8::label;
1282    template<> const char *B16::label;
1283    template<> const char *B32::label;
1284    template<> const char *B64::label;
1285    template<> const char *S8::label;
1286    template<> const char *S16::label;
1287    template<> const char *S32::label;
1288    template<> const char *S64::label;
1289    template<> const char *U8::label;
1290    template<> const char *U16::label;
1291    template<> const char *U32::label;
1292    template<> const char *U64::label;
1293    template<> const char *F32::label;
1294    template<> const char *F64::label;
1295
1296} // namespace HsailISA
1297
1298#endif // __ARCH_HSAIL_INSTS_DECL_HH__
1299