1#!/usr/bin/env python2.7
2#  Copyright (c) 2015 Advanced Micro Devices, Inc.
3#  All rights reserved.
4#
5#  For use for simulation and test purposes only
6#
7#  Redistribution and use in source and binary forms, with or without
8#  modification, are permitted provided that the following conditions are met:
9#
10#  1. Redistributions of source code must retain the above copyright notice,
11#  this list of conditions and the following disclaimer.
12#
13#  2. Redistributions in binary form must reproduce the above copyright notice,
14#  this list of conditions and the following disclaimer in the documentation
15#  and/or other materials provided with the distribution.
16#
17#  3. Neither the name of the copyright holder nor the names of its contributors
18#  may be used to endorse or promote products derived from this software
19#  without specific prior written permission.
20#
21#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31#  POSSIBILITY OF SUCH DAMAGE.
32#
33#  Author: Steve Reinhardt
34#
35
36from __future__ import print_function
37
38import sys, re
39
40from m5.util import code_formatter
41
42if len(sys.argv) != 4:
43    print("Error: need 3 args (file names)")
44    sys.exit(0)
45
46header_code = code_formatter()
47decoder_code = code_formatter()
48exec_code = code_formatter()
49
50###############
51#
52# Generate file prologs (includes etc.)
53#
54###############
55
56header_code('''
57#include "arch/hsail/insts/decl.hh"
58#include "base/bitfield.hh"
59#include "gpu-compute/hsail_code.hh"
60#include "gpu-compute/wavefront.hh"
61
62namespace HsailISA
63{
64''')
65header_code.indent()
66
67decoder_code('''
68#include "arch/hsail/gpu_decoder.hh"
69#include "arch/hsail/insts/branch.hh"
70#include "arch/hsail/insts/decl.hh"
71#include "arch/hsail/insts/gen_decl.hh"
72#include "arch/hsail/insts/mem.hh"
73#include "arch/hsail/insts/mem_impl.hh"
74#include "gpu-compute/brig_object.hh"
75
76namespace HsailISA
77{
78    std::vector<GPUStaticInst*> Decoder::decodedInsts;
79
80    GPUStaticInst*
81    Decoder::decode(MachInst machInst)
82    {
83        using namespace Brig;
84
85        const BrigInstBase *ib = machInst.brigInstBase;
86        const BrigObject *obj = machInst.brigObj;
87
88        switch(ib->opcode) {
89''')
90decoder_code.indent()
91decoder_code.indent()
92
93exec_code('''
94#include "arch/hsail/insts/gen_decl.hh"
95#include "base/intmath.hh"
96
97namespace HsailISA
98{
99''')
100exec_code.indent()
101
102###############
103#
104# Define code templates for class declarations (for header file)
105#
106###############
107
108# Basic header template for an instruction stub.
109header_template_stub = '''
110class $class_name : public $base_class
111{
112  public:
113    typedef $base_class Base;
114
115    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
116       : Base(ib, obj, "$opcode")
117    {
118    }
119
120    void execute(GPUDynInstPtr gpuDynInst);
121};
122
123'''
124
125# Basic header template for an instruction with no template parameters.
126header_template_nodt = '''
127class $class_name : public $base_class
128{
129  public:
130    typedef $base_class Base;
131
132    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
133       : Base(ib, obj, "$opcode")
134    {
135    }
136
137    void execute(GPUDynInstPtr gpuDynInst);
138};
139
140'''
141
142# Basic header template for an instruction with a single DataType
143# template parameter.
144header_template_1dt = '''
145template<typename DataType>
146class $class_name : public $base_class<DataType>
147{
148  public:
149    typedef $base_class<DataType> Base;
150    typedef typename DataType::CType CType;
151
152    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
153       : Base(ib, obj, "$opcode")
154    {
155    }
156
157    void execute(GPUDynInstPtr gpuDynInst);
158};
159
160'''
161
162header_template_1dt_noexec = '''
163template<typename DataType>
164class $class_name : public $base_class<DataType>
165{
166  public:
167    typedef $base_class<DataType> Base;
168    typedef typename DataType::CType CType;
169
170    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
171       : Base(ib, obj, "$opcode")
172    {
173    }
174};
175
176'''
177
178# Same as header_template_1dt, except the base class has a second
179# template parameter NumSrcOperands to allow a variable number of
180# source operands.  Note that since this is implemented with an array,
181# it only works for instructions where all sources are of the same
182# type (like most arithmetics).
183header_template_1dt_varsrcs = '''
184template<typename DataType>
185class $class_name : public $base_class<DataType, $num_srcs>
186{
187  public:
188    typedef $base_class<DataType, $num_srcs> Base;
189    typedef typename DataType::CType CType;
190
191    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
192       : Base(ib, obj, "$opcode")
193    {
194    }
195
196    void execute(GPUDynInstPtr gpuDynInst);
197};
198
199'''
200
201# Header template for instruction with two DataType template
202# parameters, one for the dest and one for the source.  This is used
203# by compare and convert.
204header_template_2dt = '''
205template<typename DestDataType, class SrcDataType>
206class $class_name : public $base_class<DestDataType, SrcDataType>
207{
208  public:
209    typedef $base_class<DestDataType, SrcDataType> Base;
210    typedef typename DestDataType::CType DestCType;
211    typedef typename SrcDataType::CType SrcCType;
212
213    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
214       : Base(ib, obj, "$opcode")
215    {
216    }
217
218    void execute(GPUDynInstPtr gpuDynInst);
219};
220
221'''
222
223header_templates = {
224    'ArithInst': header_template_1dt_varsrcs,
225    'CmovInst': header_template_1dt,
226    'ClassInst': header_template_1dt,
227    'ShiftInst': header_template_1dt,
228    'ExtractInsertInst': header_template_1dt,
229    'CmpInst': header_template_2dt,
230    'CvtInst': header_template_2dt,
231    'PopcountInst': header_template_2dt,
232    'LdInst': '',
233    'StInst': '',
234    'SpecialInstNoSrc': header_template_nodt,
235    'SpecialInst1Src': header_template_nodt,
236    'SpecialInstNoSrcNoDest': '',
237    'Stub': header_template_stub,
238}
239
240###############
241#
242# Define code templates for exec functions
243#
244###############
245
246# exec function body
247exec_template_stub = '''
248void
249$class_name::execute(GPUDynInstPtr gpuDynInst)
250{
251    fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble());
252}
253
254'''
255exec_template_nodt_nosrc = '''
256void
257$class_name::execute(GPUDynInstPtr gpuDynInst)
258{
259    Wavefront *w = gpuDynInst->wavefront();
260
261    typedef Base::DestCType DestCType;
262
263    const VectorMask &mask = w->getPred();
264
265    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
266        if (mask[lane]) {
267            DestCType dest_val = $expr;
268            this->dest.set(w, lane, dest_val);
269        }
270    }
271}
272
273'''
274
275exec_template_nodt_1src = '''
276void
277$class_name::execute(GPUDynInstPtr gpuDynInst)
278{
279    Wavefront *w = gpuDynInst->wavefront();
280
281    typedef Base::DestCType DestCType;
282    typedef Base::SrcCType  SrcCType;
283
284    const VectorMask &mask = w->getPred();
285
286    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
287        if (mask[lane]) {
288            SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
289            DestCType dest_val = $expr;
290
291            this->dest.set(w, lane, dest_val);
292        }
293    }
294}
295
296'''
297
298exec_template_1dt_varsrcs = '''
299template<typename DataType>
300void
301$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
302{
303    Wavefront *w = gpuDynInst->wavefront();
304
305    const VectorMask &mask = w->getPred();
306
307    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
308        if (mask[lane]) {
309            CType dest_val;
310            if ($dest_is_src_flag) {
311                dest_val = this->dest.template get<CType>(w, lane);
312            }
313
314            CType src_val[$num_srcs];
315
316            for (int i = 0; i < $num_srcs; ++i) {
317                src_val[i] = this->src[i].template get<CType>(w, lane);
318            }
319
320            dest_val = (CType)($expr);
321
322            this->dest.set(w, lane, dest_val);
323        }
324    }
325}
326
327'''
328
329exec_template_1dt_3srcs = '''
330template<typename DataType>
331void
332$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
333{
334    Wavefront *w = gpuDynInst->wavefront();
335
336    typedef typename Base::Src0CType Src0T;
337    typedef typename Base::Src1CType Src1T;
338    typedef typename Base::Src2CType Src2T;
339
340    const VectorMask &mask = w->getPred();
341
342    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
343        if (mask[lane]) {
344            CType dest_val;
345
346            if ($dest_is_src_flag) {
347                dest_val = this->dest.template get<CType>(w, lane);
348            }
349
350            Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
351            Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
352            Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
353
354            dest_val = $expr;
355
356            this->dest.set(w, lane, dest_val);
357        }
358    }
359}
360
361'''
362
363exec_template_1dt_2src_1dest = '''
364template<typename DataType>
365void
366$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
367{
368    Wavefront *w = gpuDynInst->wavefront();
369
370    typedef typename Base::DestCType DestT;
371    typedef CType Src0T;
372    typedef typename Base::Src1CType Src1T;
373
374    const VectorMask &mask = w->getPred();
375
376    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
377        if (mask[lane]) {
378            DestT dest_val;
379            if ($dest_is_src_flag) {
380                dest_val = this->dest.template get<DestT>(w, lane);
381            }
382            Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
383            Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
384
385            dest_val = $expr;
386
387            this->dest.set(w, lane, dest_val);
388        }
389    }
390}
391
392'''
393
394exec_template_shift = '''
395template<typename DataType>
396void
397$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
398{
399    Wavefront *w = gpuDynInst->wavefront();
400
401    const VectorMask &mask = w->getPred();
402    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
403        if (mask[lane]) {
404            CType dest_val;
405
406            if ($dest_is_src_flag) {
407                dest_val = this->dest.template get<CType>(w, lane);
408            }
409
410            CType src_val0 = this->src0.template get<CType>(w, lane);
411            uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
412
413            dest_val = $expr;
414
415            this->dest.set(w, lane, dest_val);
416        }
417    }
418}
419
420'''
421
422exec_template_2dt = '''
423template<typename DestDataType, class SrcDataType>
424void
425$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
426{
427    Wavefront *w = gpuDynInst->wavefront();
428
429    const VectorMask &mask = w->getPred();
430
431    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
432        if (mask[lane]) {
433            DestCType dest_val;
434            SrcCType src_val[$num_srcs];
435
436            for (int i = 0; i < $num_srcs; ++i) {
437                src_val[i] = this->src[i].template get<SrcCType>(w, lane);
438            }
439
440            dest_val = $expr;
441
442            this->dest.set(w, lane, dest_val);
443        }
444    }
445}
446
447'''
448
449exec_templates = {
450    'ArithInst': exec_template_1dt_varsrcs,
451    'CmovInst': exec_template_1dt_3srcs,
452    'ExtractInsertInst': exec_template_1dt_3srcs,
453    'ClassInst': exec_template_1dt_2src_1dest,
454    'CmpInst': exec_template_2dt,
455    'CvtInst': exec_template_2dt,
456    'PopcountInst': exec_template_2dt,
457    'LdInst': '',
458    'StInst': '',
459    'SpecialInstNoSrc': exec_template_nodt_nosrc,
460    'SpecialInst1Src': exec_template_nodt_1src,
461    'SpecialInstNoSrcNoDest': '',
462    'Stub': exec_template_stub,
463}
464
465###############
466#
467# Define code templates for the decoder cases
468#
469###############
470
471# decode template for nodt-opcode case
472decode_nodt_template = '''
473  case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
474
475decode_case_prolog_class_inst = '''
476  case BRIG_OPCODE_$brig_opcode_upper:
477    {
478        //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
479        BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
480        //switch (baseOp->kind) {
481        //    case BRIG_OPERAND_REG:
482        //        type = ((const BrigOperandReg*)baseOp)->type;
483        //        break;
484        //    case BRIG_OPERAND_IMMED:
485        //        type = ((const BrigOperandImmed*)baseOp)->type;
486        //        break;
487        //    default:
488        //        fatal("CLASS unrecognized kind of operand %d\\n",
489        //               baseOp->kind);
490        //}
491        switch (type) {'''
492
493# common prolog for 1dt- or 2dt-opcode case: switch on data type
494decode_case_prolog = '''
495  case BRIG_OPCODE_$brig_opcode_upper:
496    {
497        switch (ib->type) {'''
498
499# single-level decode case entry (for 1dt opcodes)
500decode_case_entry = \
501'      case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
502
503decode_store_prolog = \
504'      case BRIG_TYPE_$type_name: {'
505
506decode_store_case_epilog = '''
507    }'''
508
509decode_store_case_entry = \
510'          return $constructor(ib, obj);'
511
512# common epilog for type switch
513decode_case_epilog = '''
514          default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
515              ib->type);
516        }
517    }
518    break;'''
519
520# Additional templates for nested decode on a second type field (for
521# compare and convert).  These are used in place of the
522# decode_case_entry template to create a second-level switch on on the
523# second type field inside each case of the first-level type switch.
524# Because the name and location of the second type can vary, the Brig
525# instruction type must be provided in $brig_type, and the name of the
526# second type field must be provided in $type_field.
527decode_case2_prolog = '''
528        case BRIG_TYPE_$type_name:
529          switch (((Brig$brig_type*)ib)->$type2_field) {'''
530
531decode_case2_entry = \
532'          case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
533
534decode_case2_epilog = '''
535          default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
536                         ((Brig$brig_type*)ib)->$type2_field);
537        }
538        break;'''
539
540# Figure out how many source operands an expr needs by looking for the
541# highest-numbered srcN value referenced.  Since sources are numbered
542# starting at 0, the return value is N+1.
543def num_src_operands(expr):
544    if expr.find('src2') != -1:
545        return 3
546    elif expr.find('src1') != -1:
547        return 2
548    elif expr.find('src0') != -1:
549        return 1
550    else:
551        return 0
552
553###############
554#
555# Define final code generation methods
556#
557# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
558# generating actual instructions.
559#
560###############
561
562# Generate class declaration, exec function, and decode switch case
563# for an brig_opcode with a single-level type switch.  The 'types'
564# parameter is a list or tuple of types for which the instruction
565# should be instantiated.
566def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
567        type2_info=None, constructor_prefix='new ', is_store=False):
568    brig_opcode_upper = brig_opcode.upper()
569    class_name = brig_opcode
570    opcode = class_name.lower()
571
572    if base_class == 'ArithInst':
573        # note that expr must be provided with ArithInst so we can
574        # derive num_srcs for the template
575        assert expr
576
577    if expr:
578        # Derive several bits of info from expr.  If expr is not used,
579        # this info will be irrelevant.
580        num_srcs = num_src_operands(expr)
581        # if the RHS expression includes 'dest', then we're doing an RMW
582        # on the reg and we need to treat it like a source
583        dest_is_src = expr.find('dest') != -1
584        dest_is_src_flag = str(dest_is_src).lower() # for C++
585        if base_class in ['ShiftInst']:
586            expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
587        elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']:
588            expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
589        else:
590            expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
591        expr = re.sub(r'\bdest\b', r'dest_val', expr)
592
593    # Strip template arguments off of base class before looking up
594    # appropriate templates
595    base_class_base = re.sub(r'<.*>$', '', base_class)
596    header_code(header_templates[base_class_base])
597
598    if base_class.startswith('SpecialInst') or base_class.startswith('Stub'):
599        exec_code(exec_templates[base_class_base])
600    elif base_class.startswith('ShiftInst'):
601        header_code(exec_template_shift)
602    else:
603        header_code(exec_templates[base_class_base])
604
605    if not types or isinstance(types, str):
606        # Just a single type
607        constructor = constructor_prefix + class_name
608        decoder_code(decode_nodt_template)
609    else:
610        # multiple types, need at least one level of decode
611        if brig_opcode == 'Class':
612            decoder_code(decode_case_prolog_class_inst)
613        else:
614            decoder_code(decode_case_prolog)
615        if not type2_info:
616            if not is_store:
617                # single list of types, to basic one-level decode
618                for type_name in types:
619                    full_class_name = '%s<%s>' % (class_name, type_name.upper())
620                    constructor = constructor_prefix + full_class_name
621                    decoder_code(decode_case_entry)
622            else:
623                # single list of types, to basic one-level decode
624                for type_name in types:
625                    decoder_code(decode_store_prolog)
626                    type_size = int(re.findall(r'[0-9]+', type_name)[0])
627                    src_size = 32
628                    type_type = type_name[0]
629                    full_class_name = '%s<%s,%s>' % (class_name, \
630                                                     type_name.upper(), \
631                                                     '%s%d' % \
632                                                     (type_type.upper(), \
633                                                     type_size))
634                    constructor = constructor_prefix + full_class_name
635                    decoder_code(decode_store_case_entry)
636                    decoder_code(decode_store_case_epilog)
637        else:
638            # need secondary type switch (convert, compare)
639            # unpack extra info on second switch
640            (type2_field, types2) = type2_info
641            brig_type = 'Inst%s' % brig_opcode
642            for type_name in types:
643                decoder_code(decode_case2_prolog)
644                fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
645                for type2_name in types2:
646                    full_class_name = fmt % type2_name.upper()
647                    constructor = constructor_prefix + full_class_name
648                    decoder_code(decode_case2_entry)
649
650                decoder_code(decode_case2_epilog)
651
652        decoder_code(decode_case_epilog)
653
654###############
655#
656# Generate instructions
657#
658###############
659
660# handy abbreviations for common sets of types
661
662# arithmetic ops are typically defined only on 32- and 64-bit sizes
663arith_int_types = ('S32', 'U32', 'S64', 'U64')
664arith_float_types = ('F32', 'F64')
665arith_types = arith_int_types + arith_float_types
666
667bit_types = ('B1', 'B32', 'B64')
668
669all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
670
671# I think you might be able to do 'f16' memory ops too, but we'll
672# ignore them for now.
673mem_types = all_int_types + arith_float_types
674mem_atom_types = all_int_types + ('B32', 'B64')
675
676##### Arithmetic & logical operations
677gen('Add', arith_types, 'src0 + src1')
678gen('Sub', arith_types, 'src0 - src1')
679gen('Mul', arith_types, 'src0 * src1')
680gen('Div', arith_types, 'src0 / src1')
681gen('Min', arith_types, 'std::min(src0, src1)')
682gen('Max', arith_types, 'std::max(src0, src1)')
683gen('Gcnmin', arith_types, 'std::min(src0, src1)')
684
685gen('CopySign', arith_float_types,
686    'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
687gen('Sqrt', arith_float_types, 'sqrt(src0)')
688gen('Floor', arith_float_types, 'floor(src0)')
689
690# "fast" sqrt... same as slow for us
691gen('Nsqrt', arith_float_types, 'sqrt(src0)')
692gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
693gen('Nrcp', arith_float_types, '1.0/src0')
694gen('Fract', arith_float_types,
695    '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
696
697gen('Ncos', arith_float_types, 'cos(src0)');
698gen('Nsin', arith_float_types, 'sin(src0)');
699
700gen('And', bit_types, 'src0 & src1')
701gen('Or', bit_types,  'src0 | src1')
702gen('Xor', bit_types, 'src0 ^ src1')
703
704gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~(uint64_t)src0)')
705gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \
706    ('sourceType', ('B32', 'B64')))
707
708gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
709gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
710
711# gen('Mul_hi', types=('s32','u32', '??'))
712# gen('Mul24', types=('s32','u32', '??'))
713gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
714
715gen('Abs', arith_types, 'std::abs(src0)')
716gen('Neg', arith_types, '-src0')
717
718gen('Mov', bit_types + arith_types, 'src0')
719gen('Not', bit_types, 'heynot(src0)')
720
721# mad and fma differ only in rounding behavior, which we don't emulate
722# also there's an integer form of mad, but not of fma
723gen('Mad', arith_types, 'src0 * src1 + src2')
724gen('Fma', arith_float_types, 'src0 * src1 + src2')
725
726#native floating point operations
727gen('Nfma', arith_float_types, 'src0 * src1 + src2')
728
729gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
730gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
731gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
732
733# see base/bitfield.hh
734gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
735    'ExtractInsertInst')
736
737gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
738    'ExtractInsertInst')
739
740##### Compare
741gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
742    'CmpInst', ('sourceType', arith_types + bit_types))
743gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
744
745##### Conversion
746
747# Conversion operations are only defined on B1, not B32 or B64
748cvt_types = ('B1',) + mem_types
749
750gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
751
752
753##### Load & Store
754gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
755gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
756gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
757    is_store=True)
758gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
759gen('AtomicNoRet', mem_atom_types, base_class='StInst',
760    constructor_prefix='decode')
761
762gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
763gen('Br', base_class = 'LdInst', constructor_prefix='decode')
764
765##### Special operations
766def gen_special(brig_opcode, expr, dest_type='U32'):
767    num_srcs = num_src_operands(expr)
768    if num_srcs == 0:
769        base_class = 'SpecialInstNoSrc<%s>' % dest_type
770    elif num_srcs == 1:
771        base_class = 'SpecialInst1Src<%s>' % dest_type
772    else:
773        assert false
774
775    gen(brig_opcode, None, expr, base_class)
776
777gen_special('WorkItemId', 'w->workItemId[src0][lane]')
778gen_special('WorkItemAbsId',
779    'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
780gen_special('WorkGroupId', 'w->workGroupId[src0]')
781gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
782gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
783gen_special('GridSize', 'w->gridSz[src0]')
784gen_special('GridGroups',
785    'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
786gen_special('LaneId', 'lane')
787gen_special('WaveId', 'w->wfId')
788gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
789
790# gen_special('CU'', ')
791
792gen('Ret', base_class='SpecialInstNoSrcNoDest')
793gen('Barrier', base_class='SpecialInstNoSrcNoDest')
794gen('MemFence', base_class='SpecialInstNoSrcNoDest')
795
796# Map magic instructions to the BrigSyscall opcode
797# Magic instructions are defined in magic.hh
798#
799# In the future, real HSA kernel system calls can be implemented and coexist
800# with magic instructions.
801gen('Call', base_class='SpecialInstNoSrcNoDest')
802
803# Stubs for unimplemented instructions:
804# These may need to be implemented at some point in the future, but
805# for now we just match the instructions with their operands.
806#
807# By defining stubs for these instructions, we can work with
808# applications that have them in dead/unused code paths.
809#
810# Needed for rocm-hcc compilations for HSA backends since
811# builtins-hsail library is `cat`d onto the generated kernels.
812# The builtins-hsail library consists of handcoded hsail functions
813# that __might__ be needed by the rocm-hcc compiler in certain binaries.
814gen('Bitmask', base_class='Stub')
815gen('Bitrev', base_class='Stub')
816gen('Firstbit', base_class='Stub')
817gen('Lastbit', base_class='Stub')
818gen('Unpacklo', base_class='Stub')
819gen('Unpackhi', base_class='Stub')
820gen('Pack', base_class='Stub')
821gen('Unpack', base_class='Stub')
822gen('Lerp', base_class='Stub')
823gen('Packcvt', base_class='Stub')
824gen('Unpackcvt', base_class='Stub')
825gen('Sad', base_class='Stub')
826gen('Sadhi', base_class='Stub')
827gen('Activelanecount', base_class='Stub')
828gen('Activelaneid', base_class='Stub')
829gen('Activelanemask', base_class='Stub')
830gen('Activelanepermute', base_class='Stub')
831gen('Groupbaseptr', base_class='Stub')
832gen('Signalnoret', base_class='Stub')
833
834###############
835#
836# Generate file epilogs
837#
838###############
839header_code('''
840template<>
841inline void
842Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
843{
844    Wavefront *w = gpuDynInst->wavefront();
845
846    const VectorMask &mask = w->getPred();
847
848    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
849        if (mask[lane]) {
850            CType dest_val;
851            CType src_val;
852
853            src_val = this->src[0].template get<CType>(w, lane);
854
855            dest_val = (CType)(src_val);
856
857            this->dest.set(w, lane, dest_val);
858        }
859    }
860}
861
862template<>
863inline void
864Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
865{
866    Wavefront *w = gpuDynInst->wavefront();
867
868    const VectorMask &mask = w->getPred();
869
870    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
871        if (mask[lane]) {
872            CType dest_val;
873            CType src_val;
874
875            src_val = this->src[0].template get<CType>(w, lane);
876
877            dest_val = (CType)(src_val);
878
879            this->dest.set(w, lane, dest_val);
880        }
881    }
882}
883''')
884
885header_code.dedent()
886header_code('''
887} // namespace HsailISA
888''')
889
890# close off main decode switch
891decoder_code.dedent()
892decoder_code.dedent()
893decoder_code('''
894          default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
895        } // end switch(ib->opcode)
896    } // end decode()
897} // namespace HsailISA
898''')
899
900exec_code.dedent()
901exec_code('''
902} // namespace HsailISA
903''')
904
905###############
906#
907# Output accumulated code to files
908#
909###############
910header_code.write(sys.argv[1])
911decoder_code.write(sys.argv[2])
912exec_code.write(sys.argv[3])
913