decl.hh revision 11737:50eceddc2286
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
37#define __ARCH_HSAIL_INSTS_DECL_HH__
38
39#include <cmath>
40
41#include "arch/hsail/insts/gpu_static_inst.hh"
42#include "arch/hsail/operand.hh"
43#include "debug/HSAIL.hh"
44#include "gpu-compute/gpu_dyn_inst.hh"
45#include "gpu-compute/shader.hh"
46
47namespace HsailISA
48{
49    template<typename _DestOperand, typename _SrcOperand>
50    class HsailOperandType
51    {
52      public:
53        typedef _DestOperand DestOperand;
54        typedef _SrcOperand SrcOperand;
55    };
56
57    typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
58    typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
59    typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
60
61    // The IsBits parameter serves only to disambiguate tbhe B* types from
62    // the U* types, which otherwise would be identical (and
63    // indistinguishable).
64    template<typename _OperandType, typename _CType, Enums::MemType _memType,
65             vgpr_type _vgprType, int IsBits=0>
66    class HsailDataType
67    {
68      public:
69        typedef _OperandType OperandType;
70        typedef _CType CType;
71        static const Enums::MemType memType = _memType;
72        static const vgpr_type vgprType = _vgprType;
73        static const char *label;
74    };
75
76    typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
77    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
78
79    typedef HsailDataType<SRegOperandType, uint16_t,
80                          Enums::M_U16, VT_32, 1> B16;
81
82    typedef HsailDataType<SRegOperandType, uint32_t,
83                          Enums::M_U32, VT_32, 1> B32;
84
85    typedef HsailDataType<DRegOperandType, uint64_t,
86                          Enums::M_U64, VT_64, 1> B64;
87
88    typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
89    typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
90    typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
91    typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
92
93    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
94    typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
95    typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
96    typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
97
98    typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
99    typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
100
101    template<typename DestOperandType, typename SrcOperandType,
102             int NumSrcOperands>
103    class CommonInstBase : public HsailGPUStaticInst
104    {
105      protected:
106        typename DestOperandType::DestOperand dest;
107        typename SrcOperandType::SrcOperand src[NumSrcOperands];
108
109        void
110        generateDisassembly()
111        {
112            disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
113                                   dest.disassemble());
114
115            for (int i = 0; i < NumSrcOperands; ++i) {
116                disassembly += ",";
117                disassembly += src[i].disassemble();
118            }
119        }
120
121        virtual std::string opcode_suffix() = 0;
122
123      public:
124        CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
125                       const char *opcode)
126            : HsailGPUStaticInst(obj, opcode)
127        {
128            setFlag(ALU);
129
130            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
131
132            dest.init(op_offs, obj);
133
134            for (int i = 0; i < NumSrcOperands; ++i) {
135                op_offs = obj->getOperandPtr(ib->operands, i + 1);
136                src[i].init(op_offs, obj);
137            }
138        }
139
140        bool isVectorRegister(int operandIndex) {
141            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
142            if (operandIndex < NumSrcOperands)
143                return src[operandIndex].isVectorRegister();
144            else
145                return dest.isVectorRegister();
146        }
147        bool isCondRegister(int operandIndex) {
148            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
149            if (operandIndex < NumSrcOperands)
150                return src[operandIndex].isCondRegister();
151            else
152                return dest.isCondRegister();
153        }
154        bool isScalarRegister(int operandIndex) {
155            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
156            if (operandIndex < NumSrcOperands)
157                return src[operandIndex].isScalarRegister();
158            else
159                return dest.isScalarRegister();
160        }
161        bool isSrcOperand(int operandIndex) {
162            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
163            if (operandIndex < NumSrcOperands)
164                return true;
165            return false;
166        }
167
168        bool isDstOperand(int operandIndex) {
169            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
170            if (operandIndex >= NumSrcOperands)
171                return true;
172            return false;
173        }
174        int getOperandSize(int operandIndex) {
175            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
176            if (operandIndex < NumSrcOperands)
177                return src[operandIndex].opSize();
178            else
179                return dest.opSize();
180        }
181        int
182        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
183        {
184            assert(operandIndex >= 0 && operandIndex < getNumOperands());
185
186            if (operandIndex < NumSrcOperands)
187                return src[operandIndex].regIndex();
188            else
189                return dest.regIndex();
190        }
191        int numSrcRegOperands() {
192            int operands = 0;
193            for (int i = 0; i < NumSrcOperands; i++) {
194                if (src[i].isVectorRegister()) {
195                    operands++;
196                }
197            }
198            return operands;
199        }
200        int numDstRegOperands() { return dest.isVectorRegister(); }
201        int getNumOperands() { return NumSrcOperands + 1; }
202    };
203
204    template<typename DataType, int NumSrcOperands>
205    class ArithInst : public CommonInstBase<typename DataType::OperandType,
206                                            typename DataType::OperandType,
207                                            NumSrcOperands>
208    {
209      public:
210        std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
211
212        ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
213                  const char *opcode)
214            : CommonInstBase<typename DataType::OperandType,
215                             typename DataType::OperandType,
216                             NumSrcOperands>(ib, obj, opcode)
217        {
218        }
219    };
220
221    template<typename DestOperandType, typename Src0OperandType,
222             typename Src1OperandType, typename Src2OperandType>
223    class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
224    {
225      protected:
226        typename DestOperandType::DestOperand dest;
227        typename Src0OperandType::SrcOperand  src0;
228        typename Src1OperandType::SrcOperand  src1;
229        typename Src2OperandType::SrcOperand  src2;
230
231        void
232        generateDisassembly()
233        {
234            disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
235                                   src0.disassemble(), src1.disassemble(),
236                                   src2.disassemble());
237        }
238
239      public:
240        ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
241                                      const BrigObject *obj,
242                                      const char *opcode)
243            : HsailGPUStaticInst(obj, opcode)
244        {
245            setFlag(ALU);
246
247            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
248            dest.init(op_offs, obj);
249
250            op_offs = obj->getOperandPtr(ib->operands, 1);
251            src0.init(op_offs, obj);
252
253            op_offs = obj->getOperandPtr(ib->operands, 2);
254            src1.init(op_offs, obj);
255
256            op_offs = obj->getOperandPtr(ib->operands, 3);
257            src2.init(op_offs, obj);
258        }
259
260        bool isVectorRegister(int operandIndex) {
261            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
262            if (!operandIndex)
263                return src0.isVectorRegister();
264            else if (operandIndex == 1)
265                return src1.isVectorRegister();
266            else if (operandIndex == 2)
267                return src2.isVectorRegister();
268            else
269                return dest.isVectorRegister();
270        }
271        bool isCondRegister(int operandIndex) {
272            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
273            if (!operandIndex)
274                return src0.isCondRegister();
275            else if (operandIndex == 1)
276                return src1.isCondRegister();
277            else if (operandIndex == 2)
278                return src2.isCondRegister();
279            else
280                return dest.isCondRegister();
281        }
282        bool isScalarRegister(int operandIndex) {
283            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
284            if (!operandIndex)
285                return src0.isScalarRegister();
286            else if (operandIndex == 1)
287                return src1.isScalarRegister();
288            else if (operandIndex == 2)
289                return src2.isScalarRegister();
290            else
291                return dest.isScalarRegister();
292        }
293        bool isSrcOperand(int operandIndex) {
294            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
295            if (operandIndex < 3)
296                return true;
297            else
298                return false;
299        }
300        bool isDstOperand(int operandIndex) {
301            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
302            if (operandIndex >= 3)
303                return true;
304            else
305                return false;
306        }
307        int getOperandSize(int operandIndex) {
308            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
309            if (!operandIndex)
310                return src0.opSize();
311            else if (operandIndex == 1)
312                return src1.opSize();
313            else if (operandIndex == 2)
314                return src2.opSize();
315            else
316                return dest.opSize();
317        }
318
319        int
320        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
321        {
322            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
323            if (!operandIndex)
324                return src0.regIndex();
325            else if (operandIndex == 1)
326                return src1.regIndex();
327            else if (operandIndex == 2)
328                return src2.regIndex();
329            else
330                return dest.regIndex();
331        }
332
333        int numSrcRegOperands() {
334            int operands = 0;
335            if (src0.isVectorRegister()) {
336                operands++;
337            }
338            if (src1.isVectorRegister()) {
339                operands++;
340            }
341            if (src2.isVectorRegister()) {
342                operands++;
343            }
344            return operands;
345        }
346        int numDstRegOperands() { return dest.isVectorRegister(); }
347        int getNumOperands() { return 4; }
348    };
349
350    template<typename DestDataType, typename Src0DataType,
351             typename Src1DataType, typename Src2DataType>
352    class ThreeNonUniformSourceInst :
353        public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
354                                             typename Src0DataType::OperandType,
355                                             typename Src1DataType::OperandType,
356                                             typename Src2DataType::OperandType>
357    {
358      public:
359        typedef typename DestDataType::CType DestCType;
360        typedef typename Src0DataType::CType Src0CType;
361        typedef typename Src1DataType::CType Src1CType;
362        typedef typename Src2DataType::CType Src2CType;
363
364        ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
365                                  const BrigObject *obj, const char *opcode)
366            : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
367                                         typename Src0DataType::OperandType,
368                                         typename Src1DataType::OperandType,
369                                         typename Src2DataType::OperandType>(ib,
370                                                                    obj, opcode)
371        {
372        }
373    };
374
375    template<typename DataType>
376    class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
377                                                      DataType, DataType>
378    {
379      public:
380        CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
381                 const char *opcode)
382            : ThreeNonUniformSourceInst<DataType, B1, DataType,
383                                        DataType>(ib, obj, opcode)
384        {
385        }
386    };
387
388    template<typename DataType>
389    class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
390                                                               DataType, U32,
391                                                               U32>
392    {
393      public:
394        ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
395                          const char *opcode)
396            : ThreeNonUniformSourceInst<DataType, DataType, U32,
397                                        U32>(ib, obj, opcode)
398        {
399        }
400    };
401
402    template<typename DestOperandType, typename Src0OperandType,
403             typename Src1OperandType>
404    class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
405    {
406      protected:
407        typename DestOperandType::DestOperand dest;
408        typename Src0OperandType::SrcOperand src0;
409        typename Src1OperandType::SrcOperand src1;
410
411        void
412        generateDisassembly()
413        {
414            disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
415                                   src0.disassemble(), src1.disassemble());
416        }
417
418
419      public:
420        TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
421                                    const BrigObject *obj, const char *opcode)
422            : HsailGPUStaticInst(obj, opcode)
423        {
424            setFlag(ALU);
425
426            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
427            dest.init(op_offs, obj);
428
429            op_offs = obj->getOperandPtr(ib->operands, 1);
430            src0.init(op_offs, obj);
431
432            op_offs = obj->getOperandPtr(ib->operands, 2);
433            src1.init(op_offs, obj);
434        }
435        bool isVectorRegister(int operandIndex) {
436            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
437            if (!operandIndex)
438                return src0.isVectorRegister();
439            else if (operandIndex == 1)
440                return src1.isVectorRegister();
441            else
442                return dest.isVectorRegister();
443        }
444        bool isCondRegister(int operandIndex) {
445            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
446            if (!operandIndex)
447                return src0.isCondRegister();
448            else if (operandIndex == 1)
449                return src1.isCondRegister();
450            else
451                return dest.isCondRegister();
452        }
453        bool isScalarRegister(int operandIndex) {
454            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
455            if (!operandIndex)
456                return src0.isScalarRegister();
457            else if (operandIndex == 1)
458                return src1.isScalarRegister();
459            else
460                return dest.isScalarRegister();
461        }
462        bool isSrcOperand(int operandIndex) {
463            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
464            if (operandIndex < 2)
465                return true;
466            else
467                return false;
468        }
469        bool isDstOperand(int operandIndex) {
470            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
471            if (operandIndex >= 2)
472                return true;
473            else
474                return false;
475        }
476        int getOperandSize(int operandIndex) {
477            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
478            if (!operandIndex)
479                return src0.opSize();
480            else if (operandIndex == 1)
481                return src1.opSize();
482            else
483                return dest.opSize();
484        }
485
486        int
487        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
488        {
489            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
490            if (!operandIndex)
491                return src0.regIndex();
492            else if (operandIndex == 1)
493                return src1.regIndex();
494            else
495                return dest.regIndex();
496        }
497
498        int numSrcRegOperands() {
499            int operands = 0;
500            if (src0.isVectorRegister()) {
501                operands++;
502            }
503            if (src1.isVectorRegister()) {
504                operands++;
505            }
506            return operands;
507        }
508        int numDstRegOperands() { return dest.isVectorRegister(); }
509        int getNumOperands() { return 3; }
510    };
511
512    template<typename DestDataType, typename Src0DataType,
513             typename Src1DataType>
514    class TwoNonUniformSourceInst :
515        public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
516                                           typename Src0DataType::OperandType,
517                                           typename Src1DataType::OperandType>
518    {
519      public:
520        typedef typename DestDataType::CType DestCType;
521        typedef typename Src0DataType::CType Src0CType;
522        typedef typename Src1DataType::CType Src1CType;
523
524        TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
525                                const BrigObject *obj, const char *opcode)
526            : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
527                                         typename Src0DataType::OperandType,
528                                         typename Src1DataType::OperandType>(ib,
529                                                                    obj, opcode)
530        {
531        }
532    };
533
534    // helper function for ClassInst
535    template<typename T>
536    bool
537    fpclassify(T src0, uint32_t src1)
538    {
539        int fpclass = std::fpclassify(src0);
540
541        if ((src1 & 0x3) && (fpclass == FP_NAN)) {
542            return true;
543        }
544
545        if (src0 <= -0.0) {
546            if ((src1 & 0x4) && fpclass == FP_INFINITE)
547                return true;
548            if ((src1 & 0x8) && fpclass == FP_NORMAL)
549                return true;
550            if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
551                return true;
552            if ((src1 & 0x20) && fpclass == FP_ZERO)
553                return true;
554        } else {
555            if ((src1 & 0x40) && fpclass == FP_ZERO)
556                return true;
557            if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
558                return true;
559            if ((src1 & 0x100) && fpclass == FP_NORMAL)
560                return true;
561            if ((src1 & 0x200) && fpclass == FP_INFINITE)
562                return true;
563        }
564        return false;
565    }
566
567    template<typename DataType>
568    class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
569    {
570      public:
571        ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
572                  const char *opcode)
573            : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
574        {
575        }
576    };
577
578    template<typename DataType>
579    class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
580    {
581      public:
582        ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
583                  const char *opcode)
584            : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
585        {
586        }
587    };
588
589    // helper function for CmpInst
590    template<typename T>
591    bool
592    compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
593    {
594        using namespace Brig;
595
596        switch (cmpOp) {
597          case BRIG_COMPARE_EQ:
598          case BRIG_COMPARE_EQU:
599          case BRIG_COMPARE_SEQ:
600          case BRIG_COMPARE_SEQU:
601            return (src0 == src1);
602
603          case BRIG_COMPARE_NE:
604          case BRIG_COMPARE_NEU:
605          case BRIG_COMPARE_SNE:
606          case BRIG_COMPARE_SNEU:
607            return (src0 != src1);
608
609          case BRIG_COMPARE_LT:
610          case BRIG_COMPARE_LTU:
611          case BRIG_COMPARE_SLT:
612          case BRIG_COMPARE_SLTU:
613            return (src0 < src1);
614
615          case BRIG_COMPARE_LE:
616          case BRIG_COMPARE_LEU:
617          case BRIG_COMPARE_SLE:
618          case BRIG_COMPARE_SLEU:
619            return (src0 <= src1);
620
621          case BRIG_COMPARE_GT:
622          case BRIG_COMPARE_GTU:
623          case BRIG_COMPARE_SGT:
624          case BRIG_COMPARE_SGTU:
625            return (src0 > src1);
626
627          case BRIG_COMPARE_GE:
628          case BRIG_COMPARE_GEU:
629          case BRIG_COMPARE_SGE:
630          case BRIG_COMPARE_SGEU:
631            return (src0 >= src1);
632
633          case BRIG_COMPARE_NUM:
634          case BRIG_COMPARE_SNUM:
635            return (src0 == src0) || (src1 == src1);
636
637          case BRIG_COMPARE_NAN:
638          case BRIG_COMPARE_SNAN:
639            return (src0 != src0) || (src1 != src1);
640
641          default:
642            fatal("Bad cmpOp value %d\n", (int)cmpOp);
643        }
644    }
645
646    template<typename T>
647    int32_t
648    firstbit(T src0)
649    {
650        if (!src0)
651            return -1;
652
653        //handle positive and negative numbers
654        T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0);
655
656        //the starting pos is MSB
657        int pos = 8 * sizeof(T) - 1;
658        int cnt = 0;
659
660        //search the first bit set to 1
661        while (!(tmp & (1 << pos))) {
662            ++cnt;
663            --pos;
664        }
665        return cnt;
666    }
667
668    const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
669
670    template<typename DestOperandType, typename SrcOperandType>
671    class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
672                                              2>
673    {
674      protected:
675        Brig::BrigCompareOperation cmpOp;
676
677      public:
678        CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
679                    const char *_opcode)
680            : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
681                                                                 _opcode)
682        {
683            assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
684            Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
685            cmpOp = (Brig::BrigCompareOperation)i->compare;
686        }
687    };
688
689    template<typename DestDataType, typename SrcDataType>
690    class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
691                                       typename SrcDataType::OperandType>
692    {
693      public:
694        std::string
695        opcode_suffix()
696        {
697            return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
698                            DestDataType::label, SrcDataType::label);
699        }
700
701        CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
702                const char *_opcode)
703            : CmpInstBase<typename DestDataType::OperandType,
704                          typename SrcDataType::OperandType>(ib, obj, _opcode)
705        {
706        }
707    };
708
709    template<typename DestDataType, typename SrcDataType>
710    class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
711                                          typename SrcDataType::OperandType, 1>
712    {
713      public:
714        std::string opcode_suffix()
715        {
716            return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
717        }
718
719        CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
720                const char *_opcode)
721            : CommonInstBase<typename DestDataType::OperandType,
722                             typename SrcDataType::OperandType,
723                             1>(ib, obj, _opcode)
724        {
725        }
726    };
727
728    template<typename DestDataType, typename SrcDataType>
729    class PopcountInst :
730        public CommonInstBase<typename DestDataType::OperandType,
731                              typename SrcDataType::OperandType, 1>
732    {
733      public:
734        std::string opcode_suffix()
735        {
736            return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
737        }
738
739        PopcountInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
740                     const char *_opcode)
741            : CommonInstBase<typename DestDataType::OperandType,
742                             typename SrcDataType::OperandType,
743                             1>(ib, obj, _opcode)
744        {
745        }
746    };
747
748    class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
749    {
750      public:
751        SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
752                               const BrigObject *obj, const char *_opcode)
753            : HsailGPUStaticInst(obj, _opcode)
754        {
755        }
756
757        bool isVectorRegister(int operandIndex) { return false; }
758        bool isCondRegister(int operandIndex) { return false; }
759        bool isScalarRegister(int operandIndex) { return false; }
760        bool isSrcOperand(int operandIndex) { return false; }
761        bool isDstOperand(int operandIndex) { return false; }
762        int getOperandSize(int operandIndex) { return 0; }
763
764        int
765        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
766        {
767            return -1;
768        }
769
770        int numSrcRegOperands() { return 0; }
771        int numDstRegOperands() { return 0; }
772        int getNumOperands() { return 0; }
773    };
774
775    template<typename DestOperandType>
776    class SpecialInstNoSrcBase : public HsailGPUStaticInst
777    {
778      protected:
779        typename DestOperandType::DestOperand dest;
780
781        void generateDisassembly()
782        {
783            disassembly = csprintf("%s %s", opcode, dest.disassemble());
784        }
785
786      public:
787        SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
788                             const BrigObject *obj, const char *_opcode)
789            : HsailGPUStaticInst(obj, _opcode)
790        {
791            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
792            dest.init(op_offs, obj);
793        }
794
795        bool isVectorRegister(int operandIndex) {
796            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
797            return dest.isVectorRegister();
798        }
799        bool isCondRegister(int operandIndex) {
800            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
801            return dest.isCondRegister();
802        }
803        bool isScalarRegister(int operandIndex) {
804            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
805            return dest.isScalarRegister();
806        }
807        bool isSrcOperand(int operandIndex) { return false; }
808        bool isDstOperand(int operandIndex) { return true; }
809        int getOperandSize(int operandIndex) {
810            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
811            return dest.opSize();
812        }
813
814        int
815        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
816        {
817            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
818            return dest.regIndex();
819        }
820
821        int numSrcRegOperands() { return 0; }
822        int numDstRegOperands() { return dest.isVectorRegister(); }
823        int getNumOperands() { return 1; }
824    };
825
826    template<typename DestDataType>
827    class SpecialInstNoSrc :
828        public SpecialInstNoSrcBase<typename DestDataType::OperandType>
829    {
830      public:
831        typedef typename DestDataType::CType DestCType;
832
833        SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
834                         const char *_opcode)
835            : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
836                                                                       _opcode)
837        {
838        }
839    };
840
841    template<typename DestOperandType>
842    class SpecialInst1SrcBase : public HsailGPUStaticInst
843    {
844      protected:
845        typedef int SrcCType;  // used in execute() template
846
847        typename DestOperandType::DestOperand dest;
848        ImmOperand<SrcCType> src0;
849
850        void
851        generateDisassembly()
852        {
853            disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
854                                   src0.disassemble());
855        }
856
857      public:
858        SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
859                            const BrigObject *obj, const char *_opcode)
860            : HsailGPUStaticInst(obj, _opcode)
861        {
862            setFlag(ALU);
863
864            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
865            dest.init(op_offs, obj);
866
867            op_offs = obj->getOperandPtr(ib->operands, 1);
868            src0.init(op_offs, obj);
869        }
870        bool isVectorRegister(int operandIndex) {
871            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
872            return dest.isVectorRegister();
873        }
874        bool isCondRegister(int operandIndex) {
875            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
876            return dest.isCondRegister();
877        }
878        bool isScalarRegister(int operandIndex) {
879            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
880            return dest.isScalarRegister();
881        }
882        bool isSrcOperand(int operandIndex) { return false; }
883        bool isDstOperand(int operandIndex) { return true; }
884        int getOperandSize(int operandIndex) {
885            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
886            return dest.opSize();
887        }
888
889        int
890        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
891        {
892            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
893            return dest.regIndex();
894        }
895
896        int numSrcRegOperands() { return 0; }
897        int numDstRegOperands() { return dest.isVectorRegister(); }
898        int getNumOperands() { return 1; }
899    };
900
901    template<typename DestDataType>
902    class SpecialInst1Src :
903        public SpecialInst1SrcBase<typename DestDataType::OperandType>
904    {
905      public:
906        typedef typename DestDataType::CType DestCType;
907
908        SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
909                        const char *_opcode)
910            : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
911                                                                      _opcode)
912        {
913        }
914    };
915
916    class Ret : public SpecialInstNoSrcNoDest
917    {
918      public:
919        typedef SpecialInstNoSrcNoDest Base;
920
921        Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
922           : Base(ib, obj, "ret")
923        {
924            setFlag(GPUStaticInst::Return);
925        }
926
927        void execute(GPUDynInstPtr gpuDynInst);
928    };
929
930    class Barrier : public SpecialInstNoSrcNoDest
931    {
932      public:
933        typedef SpecialInstNoSrcNoDest Base;
934        uint8_t width;
935
936        Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
937            : Base(ib, obj, "barrier")
938        {
939            setFlag(GPUStaticInst::MemBarrier);
940            assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
941            width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
942        }
943
944        void execute(GPUDynInstPtr gpuDynInst);
945    };
946
947    class MemFence : public SpecialInstNoSrcNoDest
948    {
949      public:
950        typedef SpecialInstNoSrcNoDest Base;
951
952        Brig::BrigMemoryOrder memFenceMemOrder;
953        Brig::BrigMemoryScope memFenceScopeSegGroup;
954        Brig::BrigMemoryScope memFenceScopeSegGlobal;
955        Brig::BrigMemoryScope memFenceScopeSegImage;
956
957        MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
958            : Base(ib, obj, "memfence")
959        {
960            assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
961
962            memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
963                ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
964
965            memFenceScopeSegGroup = (Brig::BrigMemoryScope)
966                ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
967
968            memFenceScopeSegImage = (Brig::BrigMemoryScope)
969                ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
970
971            memFenceMemOrder = (Brig::BrigMemoryOrder)
972                ((Brig::BrigInstMemFence*)ib)->memoryOrder;
973
974            setFlag(MemoryRef);
975            setFlag(GPUStaticInst::MemFence);
976
977            switch (memFenceMemOrder) {
978              case Brig::BRIG_MEMORY_ORDER_NONE:
979                setFlag(NoOrder);
980                break;
981              case Brig::BRIG_MEMORY_ORDER_RELAXED:
982                setFlag(RelaxedOrder);
983                break;
984              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
985                setFlag(Acquire);
986                break;
987              case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
988                setFlag(Release);
989                break;
990              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
991                setFlag(AcquireRelease);
992                break;
993              default:
994                fatal("MemInst has bad BrigMemoryOrder\n");
995            }
996
997            // set inst flags based on scopes
998            if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
999                memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1000                setFlag(GPUStaticInst::GlobalSegment);
1001
1002                /**
1003                 * A memory fence that has scope for
1004                 * both segments will use the global
1005                 * segment, and be executed in the
1006                 * global memory pipeline, therefore,
1007                 * we set the segment to match the
1008                 * global scope only
1009                 */
1010                switch (memFenceScopeSegGlobal) {
1011                  case Brig::BRIG_MEMORY_SCOPE_NONE:
1012                    setFlag(NoScope);
1013                    break;
1014                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1015                    setFlag(WorkitemScope);
1016                    break;
1017                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1018                    setFlag(WorkgroupScope);
1019                    break;
1020                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
1021                    setFlag(DeviceScope);
1022                    break;
1023                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1024                    setFlag(SystemScope);
1025                    break;
1026                  default:
1027                    fatal("MemFence has bad global scope type\n");
1028                }
1029            } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
1030                setFlag(GPUStaticInst::GlobalSegment);
1031
1032                switch (memFenceScopeSegGlobal) {
1033                  case Brig::BRIG_MEMORY_SCOPE_NONE:
1034                    setFlag(NoScope);
1035                    break;
1036                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1037                    setFlag(WorkitemScope);
1038                    break;
1039                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1040                    setFlag(WorkgroupScope);
1041                    break;
1042                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
1043                    setFlag(DeviceScope);
1044                    break;
1045                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1046                    setFlag(SystemScope);
1047                    break;
1048                  default:
1049                    fatal("MemFence has bad global scope type\n");
1050                }
1051            } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1052                setFlag(GPUStaticInst::GroupSegment);
1053
1054                switch (memFenceScopeSegGroup) {
1055                  case Brig::BRIG_MEMORY_SCOPE_NONE:
1056                    setFlag(NoScope);
1057                    break;
1058                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1059                    setFlag(WorkitemScope);
1060                    break;
1061                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1062                    setFlag(WorkgroupScope);
1063                    break;
1064                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
1065                    setFlag(DeviceScope);
1066                    break;
1067                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1068                    setFlag(SystemScope);
1069                    break;
1070                  default:
1071                    fatal("MemFence has bad group scope type\n");
1072                }
1073            } else {
1074                fatal("MemFence constructor: bad scope specifiers\n");
1075            }
1076        }
1077
1078        void
1079        initiateAcc(GPUDynInstPtr gpuDynInst)
1080        {
1081            Wavefront *wave = gpuDynInst->wavefront();
1082            wave->computeUnit->injectGlobalMemFence(gpuDynInst);
1083        }
1084
1085        void
1086        execute(GPUDynInstPtr gpuDynInst)
1087        {
1088            Wavefront *w = gpuDynInst->wavefront();
1089            // 2 cases:
1090            //   * memfence to a sequentially consistent memory (e.g., LDS).
1091            //     These can be handled as no-ops.
1092            //   * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
1093            //     etc.). We send a packet, tagged with the memory order and
1094            //     scope, and let the GPU coalescer handle it.
1095
1096            if (isGlobalSeg()) {
1097                gpuDynInst->simdId = w->simdId;
1098                gpuDynInst->wfSlotId = w->wfSlotId;
1099                gpuDynInst->wfDynId = w->wfDynId;
1100                gpuDynInst->kern_id = w->kernId;
1101                gpuDynInst->cu_id = w->computeUnit->cu_id;
1102
1103                gpuDynInst->useContinuation = false;
1104                GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
1105                gmp->issueRequest(gpuDynInst);
1106
1107                w->wrGmReqsInPipe--;
1108                w->rdGmReqsInPipe--;
1109                w->memReqsInPipe--;
1110                w->outstandingReqs++;
1111            } else if (isGroupSeg()) {
1112                // no-op
1113            } else {
1114                fatal("MemFence execute: bad op type\n");
1115            }
1116        }
1117    };
1118
1119    class Call : public HsailGPUStaticInst
1120    {
1121      public:
1122        // private helper functions
1123        void calcAddr(Wavefront* w, GPUDynInstPtr m);
1124
1125        void
1126        generateDisassembly()
1127        {
1128            if (dest.disassemble() == "") {
1129                disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
1130                                       src1.disassemble());
1131            } else {
1132                disassembly = csprintf("%s %s (%s) (%s)", opcode,
1133                                       src0.disassemble(), dest.disassemble(),
1134                                       src1.disassemble());
1135            }
1136        }
1137
1138        bool
1139        isPseudoOp()
1140        {
1141            std::string func_name = src0.disassemble();
1142            if (func_name.find("__gem5_hsail_op") != std::string::npos) {
1143                return true;
1144            }
1145            return false;
1146        }
1147
1148        // member variables
1149        ListOperand dest;
1150        FunctionRefOperand src0;
1151        ListOperand src1;
1152        HsailCode *func_ptr;
1153
1154        // exec function for pseudo instructions mapped on top of call opcode
1155        void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
1156
1157        // user-defined pseudo instructions
1158        void MagicPrintLane(Wavefront *w);
1159        void MagicPrintLane64(Wavefront *w);
1160        void MagicPrintWF32(Wavefront *w);
1161        void MagicPrintWF64(Wavefront *w);
1162        void MagicPrintWFFloat(Wavefront *w);
1163        void MagicSimBreak(Wavefront *w);
1164        void MagicPrefixSum(Wavefront *w);
1165        void MagicReduction(Wavefront *w);
1166        void MagicMaskLower(Wavefront *w);
1167        void MagicMaskUpper(Wavefront *w);
1168        void MagicJoinWFBar(Wavefront *w);
1169        void MagicWaitWFBar(Wavefront *w);
1170        void MagicPanic(Wavefront *w);
1171
1172        void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
1173                                          GPUDynInstPtr gpuDynInst);
1174
1175        void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
1176                                         GPUDynInstPtr gpuDynInst);
1177
1178        void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
1179
1180        void MagicXactCasLd(Wavefront *w);
1181        void MagicMostSigThread(Wavefront *w);
1182        void MagicMostSigBroadcast(Wavefront *w);
1183
1184        void MagicPrintWF32ID(Wavefront *w);
1185        void MagicPrintWFID64(Wavefront *w);
1186
1187        Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
1188            : HsailGPUStaticInst(obj, "call")
1189        {
1190            setFlag(ALU);
1191            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1192            dest.init(op_offs, obj);
1193            op_offs = obj->getOperandPtr(ib->operands, 1);
1194            src0.init(op_offs, obj);
1195
1196            func_ptr = nullptr;
1197            std::string func_name = src0.disassemble();
1198            if (!isPseudoOp()) {
1199                func_ptr = dynamic_cast<HsailCode*>(obj->
1200                                                    getFunction(func_name));
1201
1202                if (!func_ptr)
1203                    fatal("call::exec cannot find function: %s\n", func_name);
1204            }
1205
1206            op_offs = obj->getOperandPtr(ib->operands, 2);
1207            src1.init(op_offs, obj);
1208        }
1209
1210        bool isVectorRegister(int operandIndex) { return false; }
1211        bool isCondRegister(int operandIndex) { return false; }
1212        bool isScalarRegister(int operandIndex) { return false; }
1213        bool isSrcOperand(int operandIndex) { return false; }
1214        bool isDstOperand(int operandIndex) { return false; }
1215        int getOperandSize(int operandIndex) { return 0; }
1216
1217        int
1218        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
1219        {
1220            return -1;
1221        }
1222
1223        void
1224        execute(GPUDynInstPtr gpuDynInst)
1225        {
1226            Wavefront *w = gpuDynInst->wavefront();
1227
1228            std::string func_name = src0.disassemble();
1229            if (isPseudoOp()) {
1230                execPseudoInst(w, gpuDynInst);
1231            } else {
1232                fatal("Native HSAIL functions are not yet implemented: %s\n",
1233                      func_name);
1234            }
1235        }
1236        int numSrcRegOperands() { return 0; }
1237        int numDstRegOperands() { return 0; }
1238        int getNumOperands() { return 2; }
1239    };
1240
1241    template<typename T> T heynot(T arg) { return ~arg; }
1242    template<> inline bool heynot<bool>(bool arg) { return !arg; }
1243} // namespace HsailISA
1244
1245#endif // __ARCH_HSAIL_INSTS_DECL_HH__
1246