gen.py revision 11737:50eceddc2286
1#! /usr/bin/python
2
3#
4#  Copyright (c) 2015 Advanced Micro Devices, Inc.
5#  All rights reserved.
6#
7#  For use for simulation and test purposes only
8#
9#  Redistribution and use in source and binary forms, with or without
10#  modification, are permitted provided that the following conditions are met:
11#
12#  1. Redistributions of source code must retain the above copyright notice,
13#  this list of conditions and the following disclaimer.
14#
15#  2. Redistributions in binary form must reproduce the above copyright notice,
16#  this list of conditions and the following disclaimer in the documentation
17#  and/or other materials provided with the distribution.
18#
19#  3. Neither the name of the copyright holder nor the names of its contributors
20#  may be used to endorse or promote products derived from this software
21#  without specific prior written permission.
22#
23#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33#  POSSIBILITY OF SUCH DAMAGE.
34#
35#  Author: Steve Reinhardt
36#
37
38import sys, re
39
40from m5.util import code_formatter
41
42if len(sys.argv) != 4:
43    print "Error: need 3 args (file names)"
44    sys.exit(0)
45
46header_code = code_formatter()
47decoder_code = code_formatter()
48exec_code = code_formatter()
49
50###############
51#
52# Generate file prologs (includes etc.)
53#
54###############
55
56header_code('''
57#include "arch/hsail/insts/decl.hh"
58#include "base/bitfield.hh"
59#include "gpu-compute/hsail_code.hh"
60#include "gpu-compute/wavefront.hh"
61
62namespace HsailISA
63{
64''')
65header_code.indent()
66
67decoder_code('''
68#include "arch/hsail/gpu_decoder.hh"
69#include "arch/hsail/insts/branch.hh"
70#include "arch/hsail/insts/decl.hh"
71#include "arch/hsail/insts/gen_decl.hh"
72#include "arch/hsail/insts/mem.hh"
73#include "arch/hsail/insts/mem_impl.hh"
74#include "gpu-compute/brig_object.hh"
75
76namespace HsailISA
77{
78    std::vector<GPUStaticInst*> Decoder::decodedInsts;
79
80    GPUStaticInst*
81    Decoder::decode(MachInst machInst)
82    {
83        using namespace Brig;
84
85        const BrigInstBase *ib = machInst.brigInstBase;
86        const BrigObject *obj = machInst.brigObj;
87
88        switch(ib->opcode) {
89''')
90decoder_code.indent()
91decoder_code.indent()
92
93exec_code('''
94#include "arch/hsail/insts/gen_decl.hh"
95#include "base/intmath.hh"
96
97namespace HsailISA
98{
99''')
100exec_code.indent()
101
102###############
103#
104# Define code templates for class declarations (for header file)
105#
106###############
107
108# Basic header template for an instruction with no template parameters.
109header_template_nodt = '''
110class $class_name : public $base_class
111{
112  public:
113    typedef $base_class Base;
114
115    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
116       : Base(ib, obj, "$opcode")
117    {
118    }
119
120    void execute(GPUDynInstPtr gpuDynInst);
121};
122
123'''
124
125# Basic header template for an instruction with a single DataType
126# template parameter.
127header_template_1dt = '''
128template<typename DataType>
129class $class_name : public $base_class<DataType>
130{
131  public:
132    typedef $base_class<DataType> Base;
133    typedef typename DataType::CType CType;
134
135    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
136       : Base(ib, obj, "$opcode")
137    {
138    }
139
140    void execute(GPUDynInstPtr gpuDynInst);
141};
142
143'''
144
145header_template_1dt_noexec = '''
146template<typename DataType>
147class $class_name : public $base_class<DataType>
148{
149  public:
150    typedef $base_class<DataType> Base;
151    typedef typename DataType::CType CType;
152
153    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
154       : Base(ib, obj, "$opcode")
155    {
156    }
157};
158
159'''
160
161# Same as header_template_1dt, except the base class has a second
162# template parameter NumSrcOperands to allow a variable number of
163# source operands.  Note that since this is implemented with an array,
164# it only works for instructions where all sources are of the same
165# type (like most arithmetics).
166header_template_1dt_varsrcs = '''
167template<typename DataType>
168class $class_name : public $base_class<DataType, $num_srcs>
169{
170  public:
171    typedef $base_class<DataType, $num_srcs> Base;
172    typedef typename DataType::CType CType;
173
174    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
175       : Base(ib, obj, "$opcode")
176    {
177    }
178
179    void execute(GPUDynInstPtr gpuDynInst);
180};
181
182'''
183
184# Header template for instruction with two DataType template
185# parameters, one for the dest and one for the source.  This is used
186# by compare and convert.
187header_template_2dt = '''
188template<typename DestDataType, class SrcDataType>
189class $class_name : public $base_class<DestDataType, SrcDataType>
190{
191  public:
192    typedef $base_class<DestDataType, SrcDataType> Base;
193    typedef typename DestDataType::CType DestCType;
194    typedef typename SrcDataType::CType SrcCType;
195
196    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
197       : Base(ib, obj, "$opcode")
198    {
199    }
200
201    void execute(GPUDynInstPtr gpuDynInst);
202};
203
204'''
205
206header_templates = {
207    'ArithInst': header_template_1dt_varsrcs,
208    'CmovInst': header_template_1dt,
209    'ClassInst': header_template_1dt,
210    'ShiftInst': header_template_1dt,
211    'ExtractInsertInst': header_template_1dt,
212    'CmpInst': header_template_2dt,
213    'CvtInst': header_template_2dt,
214    'PopcountInst': header_template_2dt,
215    'LdInst': '',
216    'StInst': '',
217    'SpecialInstNoSrc': header_template_nodt,
218    'SpecialInst1Src': header_template_nodt,
219    'SpecialInstNoSrcNoDest': '',
220}
221
222###############
223#
224# Define code templates for exec functions
225#
226###############
227
228# exec function body
229exec_template_nodt_nosrc = '''
230void
231$class_name::execute(GPUDynInstPtr gpuDynInst)
232{
233    Wavefront *w = gpuDynInst->wavefront();
234
235    typedef Base::DestCType DestCType;
236
237    const VectorMask &mask = w->getPred();
238
239    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
240        if (mask[lane]) {
241            DestCType dest_val = $expr;
242            this->dest.set(w, lane, dest_val);
243        }
244    }
245}
246
247'''
248
249exec_template_nodt_1src = '''
250void
251$class_name::execute(GPUDynInstPtr gpuDynInst)
252{
253    Wavefront *w = gpuDynInst->wavefront();
254
255    typedef Base::DestCType DestCType;
256    typedef Base::SrcCType  SrcCType;
257
258    const VectorMask &mask = w->getPred();
259
260    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
261        if (mask[lane]) {
262            SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
263            DestCType dest_val = $expr;
264
265            this->dest.set(w, lane, dest_val);
266        }
267    }
268}
269
270'''
271
272exec_template_1dt_varsrcs = '''
273template<typename DataType>
274void
275$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
276{
277    Wavefront *w = gpuDynInst->wavefront();
278
279    const VectorMask &mask = w->getPred();
280
281    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
282        if (mask[lane]) {
283            CType dest_val;
284            if ($dest_is_src_flag) {
285                dest_val = this->dest.template get<CType>(w, lane);
286            }
287
288            CType src_val[$num_srcs];
289
290            for (int i = 0; i < $num_srcs; ++i) {
291                src_val[i] = this->src[i].template get<CType>(w, lane);
292            }
293
294            dest_val = (CType)($expr);
295
296            this->dest.set(w, lane, dest_val);
297        }
298    }
299}
300
301'''
302
303exec_template_1dt_3srcs = '''
304template<typename DataType>
305void
306$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
307{
308    Wavefront *w = gpuDynInst->wavefront();
309
310    typedef typename Base::Src0CType Src0T;
311    typedef typename Base::Src1CType Src1T;
312    typedef typename Base::Src2CType Src2T;
313
314    const VectorMask &mask = w->getPred();
315
316    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
317        if (mask[lane]) {
318            CType dest_val;
319
320            if ($dest_is_src_flag) {
321                dest_val = this->dest.template get<CType>(w, lane);
322            }
323
324            Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
325            Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
326            Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
327
328            dest_val = $expr;
329
330            this->dest.set(w, lane, dest_val);
331        }
332    }
333}
334
335'''
336
337exec_template_1dt_2src_1dest = '''
338template<typename DataType>
339void
340$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
341{
342    Wavefront *w = gpuDynInst->wavefront();
343
344    typedef typename Base::DestCType DestT;
345    typedef CType Src0T;
346    typedef typename Base::Src1CType Src1T;
347
348    const VectorMask &mask = w->getPred();
349
350    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
351        if (mask[lane]) {
352            DestT dest_val;
353            if ($dest_is_src_flag) {
354                dest_val = this->dest.template get<DestT>(w, lane);
355            }
356            Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
357            Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
358
359            dest_val = $expr;
360
361            this->dest.set(w, lane, dest_val);
362        }
363    }
364}
365
366'''
367
368exec_template_shift = '''
369template<typename DataType>
370void
371$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
372{
373    Wavefront *w = gpuDynInst->wavefront();
374
375    const VectorMask &mask = w->getPred();
376    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
377        if (mask[lane]) {
378            CType dest_val;
379
380            if ($dest_is_src_flag) {
381                dest_val = this->dest.template get<CType>(w, lane);
382            }
383
384            CType src_val0 = this->src0.template get<CType>(w, lane);
385            uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
386
387            dest_val = $expr;
388
389            this->dest.set(w, lane, dest_val);
390        }
391    }
392}
393
394'''
395
396exec_template_2dt = '''
397template<typename DestDataType, class SrcDataType>
398void
399$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
400{
401    Wavefront *w = gpuDynInst->wavefront();
402
403    const VectorMask &mask = w->getPred();
404
405    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
406        if (mask[lane]) {
407            DestCType dest_val;
408            SrcCType src_val[$num_srcs];
409
410            for (int i = 0; i < $num_srcs; ++i) {
411                src_val[i] = this->src[i].template get<SrcCType>(w, lane);
412            }
413
414            dest_val = $expr;
415
416            this->dest.set(w, lane, dest_val);
417        }
418    }
419}
420
421'''
422
423exec_templates = {
424    'ArithInst': exec_template_1dt_varsrcs,
425    'CmovInst': exec_template_1dt_3srcs,
426    'ExtractInsertInst': exec_template_1dt_3srcs,
427    'ClassInst': exec_template_1dt_2src_1dest,
428    'CmpInst': exec_template_2dt,
429    'CvtInst': exec_template_2dt,
430    'PopcountInst': exec_template_2dt,
431    'LdInst': '',
432    'StInst': '',
433    'SpecialInstNoSrc': exec_template_nodt_nosrc,
434    'SpecialInst1Src': exec_template_nodt_1src,
435    'SpecialInstNoSrcNoDest': '',
436}
437
438###############
439#
440# Define code templates for the decoder cases
441#
442###############
443
444# decode template for nodt-opcode case
445decode_nodt_template = '''
446  case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
447
448decode_case_prolog_class_inst = '''
449  case BRIG_OPCODE_$brig_opcode_upper:
450    {
451        //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
452        BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
453        //switch (baseOp->kind) {
454        //    case BRIG_OPERAND_REG:
455        //        type = ((const BrigOperandReg*)baseOp)->type;
456        //        break;
457        //    case BRIG_OPERAND_IMMED:
458        //        type = ((const BrigOperandImmed*)baseOp)->type;
459        //        break;
460        //    default:
461        //        fatal("CLASS unrecognized kind of operand %d\\n",
462        //               baseOp->kind);
463        //}
464        switch (type) {'''
465
466# common prolog for 1dt- or 2dt-opcode case: switch on data type
467decode_case_prolog = '''
468  case BRIG_OPCODE_$brig_opcode_upper:
469    {
470        switch (ib->type) {'''
471
472# single-level decode case entry (for 1dt opcodes)
473decode_case_entry = \
474'      case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
475
476decode_store_prolog = \
477'      case BRIG_TYPE_$type_name: {'
478
479decode_store_case_epilog = '''
480    }'''
481
482decode_store_case_entry = \
483'          return $constructor(ib, obj);'
484
485# common epilog for type switch
486decode_case_epilog = '''
487          default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
488              ib->type);
489        }
490    }
491    break;'''
492
493# Additional templates for nested decode on a second type field (for
494# compare and convert).  These are used in place of the
495# decode_case_entry template to create a second-level switch on on the
496# second type field inside each case of the first-level type switch.
497# Because the name and location of the second type can vary, the Brig
498# instruction type must be provided in $brig_type, and the name of the
499# second type field must be provided in $type_field.
500decode_case2_prolog = '''
501        case BRIG_TYPE_$type_name:
502          switch (((Brig$brig_type*)ib)->$type2_field) {'''
503
504decode_case2_entry = \
505'          case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
506
507decode_case2_epilog = '''
508          default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
509                         ((Brig$brig_type*)ib)->$type2_field);
510        }
511        break;'''
512
513# Figure out how many source operands an expr needs by looking for the
514# highest-numbered srcN value referenced.  Since sources are numbered
515# starting at 0, the return value is N+1.
516def num_src_operands(expr):
517    if expr.find('src2') != -1:
518        return 3
519    elif expr.find('src1') != -1:
520        return 2
521    elif expr.find('src0') != -1:
522        return 1
523    else:
524        return 0
525
526###############
527#
528# Define final code generation methods
529#
530# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
531# generating actual instructions.
532#
533###############
534
535# Generate class declaration, exec function, and decode switch case
536# for an brig_opcode with a single-level type switch.  The 'types'
537# parameter is a list or tuple of types for which the instruction
538# should be instantiated.
539def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
540        type2_info=None, constructor_prefix='new ', is_store=False):
541    brig_opcode_upper = brig_opcode.upper()
542    class_name = brig_opcode
543    opcode = class_name.lower()
544
545    if base_class == 'ArithInst':
546        # note that expr must be provided with ArithInst so we can
547        # derive num_srcs for the template
548        assert expr
549
550    if expr:
551        # Derive several bits of info from expr.  If expr is not used,
552        # this info will be irrelevant.
553        num_srcs = num_src_operands(expr)
554        # if the RHS expression includes 'dest', then we're doing an RMW
555        # on the reg and we need to treat it like a source
556        dest_is_src = expr.find('dest') != -1
557        dest_is_src_flag = str(dest_is_src).lower() # for C++
558        if base_class in ['ShiftInst']:
559            expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
560        elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']:
561            expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
562        else:
563            expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
564        expr = re.sub(r'\bdest\b', r'dest_val', expr)
565
566    # Strip template arguments off of base class before looking up
567    # appropriate templates
568    base_class_base = re.sub(r'<.*>$', '', base_class)
569    header_code(header_templates[base_class_base])
570
571    if base_class.startswith('SpecialInst'):
572        exec_code(exec_templates[base_class_base])
573    elif base_class.startswith('ShiftInst'):
574        header_code(exec_template_shift)
575    else:
576        header_code(exec_templates[base_class_base])
577
578    if not types or isinstance(types, str):
579        # Just a single type
580        constructor = constructor_prefix + class_name
581        decoder_code(decode_nodt_template)
582    else:
583        # multiple types, need at least one level of decode
584        if brig_opcode == 'Class':
585            decoder_code(decode_case_prolog_class_inst)
586        else:
587            decoder_code(decode_case_prolog)
588        if not type2_info:
589            if not is_store:
590                # single list of types, to basic one-level decode
591                for type_name in types:
592                    full_class_name = '%s<%s>' % (class_name, type_name.upper())
593                    constructor = constructor_prefix + full_class_name
594                    decoder_code(decode_case_entry)
595            else:
596                # single list of types, to basic one-level decode
597                for type_name in types:
598                    decoder_code(decode_store_prolog)
599                    type_size = int(re.findall(r'[0-9]+', type_name)[0])
600                    src_size = 32
601                    type_type = type_name[0]
602                    full_class_name = '%s<%s,%s>' % (class_name, \
603                                                     type_name.upper(), \
604                                                     '%s%d' % \
605                                                     (type_type.upper(), \
606                                                     type_size))
607                    constructor = constructor_prefix + full_class_name
608                    decoder_code(decode_store_case_entry)
609                    decoder_code(decode_store_case_epilog)
610        else:
611            # need secondary type switch (convert, compare)
612            # unpack extra info on second switch
613            (type2_field, types2) = type2_info
614            brig_type = 'Inst%s' % brig_opcode
615            for type_name in types:
616                decoder_code(decode_case2_prolog)
617                fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
618                for type2_name in types2:
619                    full_class_name = fmt % type2_name.upper()
620                    constructor = constructor_prefix + full_class_name
621                    decoder_code(decode_case2_entry)
622
623                decoder_code(decode_case2_epilog)
624
625        decoder_code(decode_case_epilog)
626
627###############
628#
629# Generate instructions
630#
631###############
632
633# handy abbreviations for common sets of types
634
635# arithmetic ops are typically defined only on 32- and 64-bit sizes
636arith_int_types = ('S32', 'U32', 'S64', 'U64')
637arith_float_types = ('F32', 'F64')
638arith_types = arith_int_types + arith_float_types
639
640bit_types = ('B1', 'B32', 'B64')
641
642all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
643
644# I think you might be able to do 'f16' memory ops too, but we'll
645# ignore them for now.
646mem_types = all_int_types + arith_float_types
647mem_atom_types = all_int_types + ('B32', 'B64')
648
649##### Arithmetic & logical operations
650gen('Add', arith_types, 'src0 + src1')
651gen('Sub', arith_types, 'src0 - src1')
652gen('Mul', arith_types, 'src0 * src1')
653gen('Div', arith_types, 'src0 / src1')
654gen('Min', arith_types, 'std::min(src0, src1)')
655gen('Max', arith_types, 'std::max(src0, src1)')
656gen('Gcnmin', arith_types, 'std::min(src0, src1)')
657
658gen('CopySign', arith_float_types,
659    'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
660gen('Sqrt', arith_float_types, 'sqrt(src0)')
661gen('Floor', arith_float_types, 'floor(src0)')
662
663# "fast" sqrt... same as slow for us
664gen('Nsqrt', arith_float_types, 'sqrt(src0)')
665gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
666gen('Nrcp', arith_float_types, '1.0/src0')
667gen('Fract', arith_float_types,
668    '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
669
670gen('Ncos', arith_float_types, 'cos(src0)');
671gen('Nsin', arith_float_types, 'sin(src0)');
672
673gen('And', bit_types, 'src0 & src1')
674gen('Or', bit_types,  'src0 | src1')
675gen('Xor', bit_types, 'src0 ^ src1')
676
677gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~src0)')
678gen('Firstbit',bit_types, 'firstbit(src0)')
679gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \
680    ('sourceType', ('B32', 'B64')))
681
682gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
683gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
684
685# gen('Mul_hi', types=('s32','u32', '??'))
686# gen('Mul24', types=('s32','u32', '??'))
687gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
688
689gen('Abs', arith_types, 'std::abs(src0)')
690gen('Neg', arith_types, '-src0')
691
692gen('Mov', bit_types + arith_types, 'src0')
693gen('Not', bit_types, 'heynot(src0)')
694
695# mad and fma differ only in rounding behavior, which we don't emulate
696# also there's an integer form of mad, but not of fma
697gen('Mad', arith_types, 'src0 * src1 + src2')
698gen('Fma', arith_float_types, 'src0 * src1 + src2')
699
700#native floating point operations
701gen('Nfma', arith_float_types, 'src0 * src1 + src2')
702
703gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
704gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
705gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
706
707# see base/bitfield.hh
708gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
709    'ExtractInsertInst')
710
711gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
712    'ExtractInsertInst')
713
714##### Compare
715gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
716    'CmpInst', ('sourceType', arith_types + bit_types))
717gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
718
719##### Conversion
720
721# Conversion operations are only defined on B1, not B32 or B64
722cvt_types = ('B1',) + mem_types
723
724gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
725
726
727##### Load & Store
728gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
729gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
730gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
731    is_store=True)
732gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
733gen('AtomicNoRet', mem_atom_types, base_class='StInst',
734    constructor_prefix='decode')
735
736gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
737gen('Br', base_class = 'LdInst', constructor_prefix='decode')
738
739##### Special operations
740def gen_special(brig_opcode, expr, dest_type='U32'):
741    num_srcs = num_src_operands(expr)
742    if num_srcs == 0:
743        base_class = 'SpecialInstNoSrc<%s>' % dest_type
744    elif num_srcs == 1:
745        base_class = 'SpecialInst1Src<%s>' % dest_type
746    else:
747        assert false
748
749    gen(brig_opcode, None, expr, base_class)
750
751gen_special('WorkItemId', 'w->workItemId[src0][lane]')
752gen_special('WorkItemAbsId',
753    'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
754gen_special('WorkGroupId', 'w->workGroupId[src0]')
755gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
756gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
757gen_special('GridSize', 'w->gridSz[src0]')
758gen_special('GridGroups',
759    'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
760gen_special('LaneId', 'lane')
761gen_special('WaveId', 'w->wfId')
762gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
763
764# gen_special('CU'', ')
765
766gen('Ret', base_class='SpecialInstNoSrcNoDest')
767gen('Barrier', base_class='SpecialInstNoSrcNoDest')
768gen('MemFence', base_class='SpecialInstNoSrcNoDest')
769
770# Map magic instructions to the BrigSyscall opcode
771# Magic instructions are defined in magic.hh
772#
773# In the future, real HSA kernel system calls can be implemented and coexist
774# with magic instructions.
775gen('Call', base_class='SpecialInstNoSrcNoDest')
776
777###############
778#
779# Generate file epilogs
780#
781###############
782header_code('''
783template<>
784inline void
785Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
786{
787    Wavefront *w = gpuDynInst->wavefront();
788
789    const VectorMask &mask = w->getPred();
790
791    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
792        if (mask[lane]) {
793            CType dest_val;
794            CType src_val;
795
796            src_val = this->src[0].template get<CType>(w, lane);
797
798            dest_val = (CType)(src_val);
799
800            this->dest.set(w, lane, dest_val);
801        }
802    }
803}
804
805template<>
806inline void
807Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
808{
809    Wavefront *w = gpuDynInst->wavefront();
810
811    const VectorMask &mask = w->getPred();
812
813    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
814        if (mask[lane]) {
815            CType dest_val;
816            CType src_val;
817
818            src_val = this->src[0].template get<CType>(w, lane);
819
820            dest_val = (CType)(src_val);
821
822            this->dest.set(w, lane, dest_val);
823        }
824    }
825}
826''')
827
828header_code.dedent()
829header_code('''
830} // namespace HsailISA
831''')
832
833# close off main decode switch
834decoder_code.dedent()
835decoder_code.dedent()
836decoder_code('''
837          default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
838        } // end switch(ib->opcode)
839    } // end decode()
840} // namespace HsailISA
841''')
842
843exec_code.dedent()
844exec_code('''
845} // namespace HsailISA
846''')
847
848###############
849#
850# Output accumulated code to files
851#
852###############
853header_code.write(sys.argv[1])
854decoder_code.write(sys.argv[2])
855exec_code.write(sys.argv[3])
856