gen.py (12563:8d59ed22ae79) gen.py (13450:32a36390a49e)
1#! /usr/bin/python
2
3#
4# Copyright (c) 2015 Advanced Micro Devices, Inc.
5# All rights reserved.
6#
7# For use for simulation and test purposes only
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions are met:
11#
12# 1. Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14#
15# 2. Redistributions in binary form must reproduce the above copyright notice,
16# this list of conditions and the following disclaimer in the documentation
17# and/or other materials provided with the distribution.
18#
19# 3. Neither the name of the copyright holder nor the names of its contributors
20# may be used to endorse or promote products derived from this software
21# without specific prior written permission.
22#
23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33# POSSIBILITY OF SUCH DAMAGE.
34#
35# Author: Steve Reinhardt
36#
37
38from __future__ import print_function
39
40import sys, re
41
42from m5.util import code_formatter
43
44if len(sys.argv) != 4:
45 print("Error: need 3 args (file names)")
46 sys.exit(0)
47
48header_code = code_formatter()
49decoder_code = code_formatter()
50exec_code = code_formatter()
51
52###############
53#
54# Generate file prologs (includes etc.)
55#
56###############
57
58header_code('''
59#include "arch/hsail/insts/decl.hh"
60#include "base/bitfield.hh"
61#include "gpu-compute/hsail_code.hh"
62#include "gpu-compute/wavefront.hh"
63
64namespace HsailISA
65{
66''')
67header_code.indent()
68
69decoder_code('''
70#include "arch/hsail/gpu_decoder.hh"
71#include "arch/hsail/insts/branch.hh"
72#include "arch/hsail/insts/decl.hh"
73#include "arch/hsail/insts/gen_decl.hh"
74#include "arch/hsail/insts/mem.hh"
75#include "arch/hsail/insts/mem_impl.hh"
76#include "gpu-compute/brig_object.hh"
77
78namespace HsailISA
79{
80 std::vector<GPUStaticInst*> Decoder::decodedInsts;
81
82 GPUStaticInst*
83 Decoder::decode(MachInst machInst)
84 {
85 using namespace Brig;
86
87 const BrigInstBase *ib = machInst.brigInstBase;
88 const BrigObject *obj = machInst.brigObj;
89
90 switch(ib->opcode) {
91''')
92decoder_code.indent()
93decoder_code.indent()
94
95exec_code('''
96#include "arch/hsail/insts/gen_decl.hh"
97#include "base/intmath.hh"
98
99namespace HsailISA
100{
101''')
102exec_code.indent()
103
104###############
105#
106# Define code templates for class declarations (for header file)
107#
108###############
109
110# Basic header template for an instruction stub.
111header_template_stub = '''
112class $class_name : public $base_class
113{
114 public:
115 typedef $base_class Base;
116
117 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
118 : Base(ib, obj, "$opcode")
119 {
120 }
121
122 void execute(GPUDynInstPtr gpuDynInst);
123};
124
125'''
126
127# Basic header template for an instruction with no template parameters.
128header_template_nodt = '''
129class $class_name : public $base_class
130{
131 public:
132 typedef $base_class Base;
133
134 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
135 : Base(ib, obj, "$opcode")
136 {
137 }
138
139 void execute(GPUDynInstPtr gpuDynInst);
140};
141
142'''
143
144# Basic header template for an instruction with a single DataType
145# template parameter.
146header_template_1dt = '''
147template<typename DataType>
148class $class_name : public $base_class<DataType>
149{
150 public:
151 typedef $base_class<DataType> Base;
152 typedef typename DataType::CType CType;
153
154 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
155 : Base(ib, obj, "$opcode")
156 {
157 }
158
159 void execute(GPUDynInstPtr gpuDynInst);
160};
161
162'''
163
164header_template_1dt_noexec = '''
165template<typename DataType>
166class $class_name : public $base_class<DataType>
167{
168 public:
169 typedef $base_class<DataType> Base;
170 typedef typename DataType::CType CType;
171
172 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
173 : Base(ib, obj, "$opcode")
174 {
175 }
176};
177
178'''
179
180# Same as header_template_1dt, except the base class has a second
181# template parameter NumSrcOperands to allow a variable number of
182# source operands. Note that since this is implemented with an array,
183# it only works for instructions where all sources are of the same
184# type (like most arithmetics).
185header_template_1dt_varsrcs = '''
186template<typename DataType>
187class $class_name : public $base_class<DataType, $num_srcs>
188{
189 public:
190 typedef $base_class<DataType, $num_srcs> Base;
191 typedef typename DataType::CType CType;
192
193 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
194 : Base(ib, obj, "$opcode")
195 {
196 }
197
198 void execute(GPUDynInstPtr gpuDynInst);
199};
200
201'''
202
203# Header template for instruction with two DataType template
204# parameters, one for the dest and one for the source. This is used
205# by compare and convert.
206header_template_2dt = '''
207template<typename DestDataType, class SrcDataType>
208class $class_name : public $base_class<DestDataType, SrcDataType>
209{
210 public:
211 typedef $base_class<DestDataType, SrcDataType> Base;
212 typedef typename DestDataType::CType DestCType;
213 typedef typename SrcDataType::CType SrcCType;
214
215 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
216 : Base(ib, obj, "$opcode")
217 {
218 }
219
220 void execute(GPUDynInstPtr gpuDynInst);
221};
222
223'''
224
225header_templates = {
226 'ArithInst': header_template_1dt_varsrcs,
227 'CmovInst': header_template_1dt,
228 'ClassInst': header_template_1dt,
229 'ShiftInst': header_template_1dt,
230 'ExtractInsertInst': header_template_1dt,
231 'CmpInst': header_template_2dt,
232 'CvtInst': header_template_2dt,
233 'PopcountInst': header_template_2dt,
234 'LdInst': '',
235 'StInst': '',
236 'SpecialInstNoSrc': header_template_nodt,
237 'SpecialInst1Src': header_template_nodt,
238 'SpecialInstNoSrcNoDest': '',
239 'Stub': header_template_stub,
240}
241
242###############
243#
244# Define code templates for exec functions
245#
246###############
247
248# exec function body
249exec_template_stub = '''
250void
251$class_name::execute(GPUDynInstPtr gpuDynInst)
252{
253 fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble());
254}
255
256'''
257exec_template_nodt_nosrc = '''
258void
259$class_name::execute(GPUDynInstPtr gpuDynInst)
260{
261 Wavefront *w = gpuDynInst->wavefront();
262
263 typedef Base::DestCType DestCType;
264
265 const VectorMask &mask = w->getPred();
266
267 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
268 if (mask[lane]) {
269 DestCType dest_val = $expr;
270 this->dest.set(w, lane, dest_val);
271 }
272 }
273}
274
275'''
276
277exec_template_nodt_1src = '''
278void
279$class_name::execute(GPUDynInstPtr gpuDynInst)
280{
281 Wavefront *w = gpuDynInst->wavefront();
282
283 typedef Base::DestCType DestCType;
284 typedef Base::SrcCType SrcCType;
285
286 const VectorMask &mask = w->getPred();
287
288 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
289 if (mask[lane]) {
290 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
291 DestCType dest_val = $expr;
292
293 this->dest.set(w, lane, dest_val);
294 }
295 }
296}
297
298'''
299
300exec_template_1dt_varsrcs = '''
301template<typename DataType>
302void
303$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
304{
305 Wavefront *w = gpuDynInst->wavefront();
306
307 const VectorMask &mask = w->getPred();
308
309 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
310 if (mask[lane]) {
311 CType dest_val;
312 if ($dest_is_src_flag) {
313 dest_val = this->dest.template get<CType>(w, lane);
314 }
315
316 CType src_val[$num_srcs];
317
318 for (int i = 0; i < $num_srcs; ++i) {
319 src_val[i] = this->src[i].template get<CType>(w, lane);
320 }
321
322 dest_val = (CType)($expr);
323
324 this->dest.set(w, lane, dest_val);
325 }
326 }
327}
328
329'''
330
331exec_template_1dt_3srcs = '''
332template<typename DataType>
333void
334$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
335{
336 Wavefront *w = gpuDynInst->wavefront();
337
338 typedef typename Base::Src0CType Src0T;
339 typedef typename Base::Src1CType Src1T;
340 typedef typename Base::Src2CType Src2T;
341
342 const VectorMask &mask = w->getPred();
343
344 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
345 if (mask[lane]) {
346 CType dest_val;
347
348 if ($dest_is_src_flag) {
349 dest_val = this->dest.template get<CType>(w, lane);
350 }
351
352 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
353 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
354 Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
355
356 dest_val = $expr;
357
358 this->dest.set(w, lane, dest_val);
359 }
360 }
361}
362
363'''
364
365exec_template_1dt_2src_1dest = '''
366template<typename DataType>
367void
368$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
369{
370 Wavefront *w = gpuDynInst->wavefront();
371
372 typedef typename Base::DestCType DestT;
373 typedef CType Src0T;
374 typedef typename Base::Src1CType Src1T;
375
376 const VectorMask &mask = w->getPred();
377
378 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
379 if (mask[lane]) {
380 DestT dest_val;
381 if ($dest_is_src_flag) {
382 dest_val = this->dest.template get<DestT>(w, lane);
383 }
384 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
385 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
386
387 dest_val = $expr;
388
389 this->dest.set(w, lane, dest_val);
390 }
391 }
392}
393
394'''
395
396exec_template_shift = '''
397template<typename DataType>
398void
399$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
400{
401 Wavefront *w = gpuDynInst->wavefront();
402
403 const VectorMask &mask = w->getPred();
404 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
405 if (mask[lane]) {
406 CType dest_val;
407
408 if ($dest_is_src_flag) {
409 dest_val = this->dest.template get<CType>(w, lane);
410 }
411
412 CType src_val0 = this->src0.template get<CType>(w, lane);
413 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
414
415 dest_val = $expr;
416
417 this->dest.set(w, lane, dest_val);
418 }
419 }
420}
421
422'''
423
424exec_template_2dt = '''
425template<typename DestDataType, class SrcDataType>
426void
427$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
428{
429 Wavefront *w = gpuDynInst->wavefront();
430
431 const VectorMask &mask = w->getPred();
432
433 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
434 if (mask[lane]) {
435 DestCType dest_val;
436 SrcCType src_val[$num_srcs];
437
438 for (int i = 0; i < $num_srcs; ++i) {
439 src_val[i] = this->src[i].template get<SrcCType>(w, lane);
440 }
441
442 dest_val = $expr;
443
444 this->dest.set(w, lane, dest_val);
445 }
446 }
447}
448
449'''
450
451exec_templates = {
452 'ArithInst': exec_template_1dt_varsrcs,
453 'CmovInst': exec_template_1dt_3srcs,
454 'ExtractInsertInst': exec_template_1dt_3srcs,
455 'ClassInst': exec_template_1dt_2src_1dest,
456 'CmpInst': exec_template_2dt,
457 'CvtInst': exec_template_2dt,
458 'PopcountInst': exec_template_2dt,
459 'LdInst': '',
460 'StInst': '',
461 'SpecialInstNoSrc': exec_template_nodt_nosrc,
462 'SpecialInst1Src': exec_template_nodt_1src,
463 'SpecialInstNoSrcNoDest': '',
464 'Stub': exec_template_stub,
465}
466
467###############
468#
469# Define code templates for the decoder cases
470#
471###############
472
473# decode template for nodt-opcode case
474decode_nodt_template = '''
475 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
476
477decode_case_prolog_class_inst = '''
478 case BRIG_OPCODE_$brig_opcode_upper:
479 {
480 //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
481 BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
482 //switch (baseOp->kind) {
483 // case BRIG_OPERAND_REG:
484 // type = ((const BrigOperandReg*)baseOp)->type;
485 // break;
486 // case BRIG_OPERAND_IMMED:
487 // type = ((const BrigOperandImmed*)baseOp)->type;
488 // break;
489 // default:
490 // fatal("CLASS unrecognized kind of operand %d\\n",
491 // baseOp->kind);
492 //}
493 switch (type) {'''
494
495# common prolog for 1dt- or 2dt-opcode case: switch on data type
496decode_case_prolog = '''
497 case BRIG_OPCODE_$brig_opcode_upper:
498 {
499 switch (ib->type) {'''
500
501# single-level decode case entry (for 1dt opcodes)
502decode_case_entry = \
503' case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
504
505decode_store_prolog = \
506' case BRIG_TYPE_$type_name: {'
507
508decode_store_case_epilog = '''
509 }'''
510
511decode_store_case_entry = \
512' return $constructor(ib, obj);'
513
514# common epilog for type switch
515decode_case_epilog = '''
516 default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
517 ib->type);
518 }
519 }
520 break;'''
521
522# Additional templates for nested decode on a second type field (for
523# compare and convert). These are used in place of the
524# decode_case_entry template to create a second-level switch on on the
525# second type field inside each case of the first-level type switch.
526# Because the name and location of the second type can vary, the Brig
527# instruction type must be provided in $brig_type, and the name of the
528# second type field must be provided in $type_field.
529decode_case2_prolog = '''
530 case BRIG_TYPE_$type_name:
531 switch (((Brig$brig_type*)ib)->$type2_field) {'''
532
533decode_case2_entry = \
534' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
535
536decode_case2_epilog = '''
537 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
538 ((Brig$brig_type*)ib)->$type2_field);
539 }
540 break;'''
541
542# Figure out how many source operands an expr needs by looking for the
543# highest-numbered srcN value referenced. Since sources are numbered
544# starting at 0, the return value is N+1.
545def num_src_operands(expr):
546 if expr.find('src2') != -1:
547 return 3
548 elif expr.find('src1') != -1:
549 return 2
550 elif expr.find('src0') != -1:
551 return 1
552 else:
553 return 0
554
555###############
556#
557# Define final code generation methods
558#
559# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
560# generating actual instructions.
561#
562###############
563
564# Generate class declaration, exec function, and decode switch case
565# for an brig_opcode with a single-level type switch. The 'types'
566# parameter is a list or tuple of types for which the instruction
567# should be instantiated.
568def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
569 type2_info=None, constructor_prefix='new ', is_store=False):
570 brig_opcode_upper = brig_opcode.upper()
571 class_name = brig_opcode
572 opcode = class_name.lower()
573
574 if base_class == 'ArithInst':
575 # note that expr must be provided with ArithInst so we can
576 # derive num_srcs for the template
577 assert expr
578
579 if expr:
580 # Derive several bits of info from expr. If expr is not used,
581 # this info will be irrelevant.
582 num_srcs = num_src_operands(expr)
583 # if the RHS expression includes 'dest', then we're doing an RMW
584 # on the reg and we need to treat it like a source
585 dest_is_src = expr.find('dest') != -1
586 dest_is_src_flag = str(dest_is_src).lower() # for C++
587 if base_class in ['ShiftInst']:
588 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
589 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']:
590 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
591 else:
592 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
593 expr = re.sub(r'\bdest\b', r'dest_val', expr)
594
595 # Strip template arguments off of base class before looking up
596 # appropriate templates
597 base_class_base = re.sub(r'<.*>$', '', base_class)
598 header_code(header_templates[base_class_base])
599
600 if base_class.startswith('SpecialInst') or base_class.startswith('Stub'):
601 exec_code(exec_templates[base_class_base])
602 elif base_class.startswith('ShiftInst'):
603 header_code(exec_template_shift)
604 else:
605 header_code(exec_templates[base_class_base])
606
607 if not types or isinstance(types, str):
608 # Just a single type
609 constructor = constructor_prefix + class_name
610 decoder_code(decode_nodt_template)
611 else:
612 # multiple types, need at least one level of decode
613 if brig_opcode == 'Class':
614 decoder_code(decode_case_prolog_class_inst)
615 else:
616 decoder_code(decode_case_prolog)
617 if not type2_info:
618 if not is_store:
619 # single list of types, to basic one-level decode
620 for type_name in types:
621 full_class_name = '%s<%s>' % (class_name, type_name.upper())
622 constructor = constructor_prefix + full_class_name
623 decoder_code(decode_case_entry)
624 else:
625 # single list of types, to basic one-level decode
626 for type_name in types:
627 decoder_code(decode_store_prolog)
628 type_size = int(re.findall(r'[0-9]+', type_name)[0])
629 src_size = 32
630 type_type = type_name[0]
631 full_class_name = '%s<%s,%s>' % (class_name, \
632 type_name.upper(), \
633 '%s%d' % \
634 (type_type.upper(), \
635 type_size))
636 constructor = constructor_prefix + full_class_name
637 decoder_code(decode_store_case_entry)
638 decoder_code(decode_store_case_epilog)
639 else:
640 # need secondary type switch (convert, compare)
641 # unpack extra info on second switch
642 (type2_field, types2) = type2_info
643 brig_type = 'Inst%s' % brig_opcode
644 for type_name in types:
645 decoder_code(decode_case2_prolog)
646 fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
647 for type2_name in types2:
648 full_class_name = fmt % type2_name.upper()
649 constructor = constructor_prefix + full_class_name
650 decoder_code(decode_case2_entry)
651
652 decoder_code(decode_case2_epilog)
653
654 decoder_code(decode_case_epilog)
655
656###############
657#
658# Generate instructions
659#
660###############
661
662# handy abbreviations for common sets of types
663
664# arithmetic ops are typically defined only on 32- and 64-bit sizes
665arith_int_types = ('S32', 'U32', 'S64', 'U64')
666arith_float_types = ('F32', 'F64')
667arith_types = arith_int_types + arith_float_types
668
669bit_types = ('B1', 'B32', 'B64')
670
671all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
672
673# I think you might be able to do 'f16' memory ops too, but we'll
674# ignore them for now.
675mem_types = all_int_types + arith_float_types
676mem_atom_types = all_int_types + ('B32', 'B64')
677
678##### Arithmetic & logical operations
679gen('Add', arith_types, 'src0 + src1')
680gen('Sub', arith_types, 'src0 - src1')
681gen('Mul', arith_types, 'src0 * src1')
682gen('Div', arith_types, 'src0 / src1')
683gen('Min', arith_types, 'std::min(src0, src1)')
684gen('Max', arith_types, 'std::max(src0, src1)')
685gen('Gcnmin', arith_types, 'std::min(src0, src1)')
686
687gen('CopySign', arith_float_types,
688 'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
689gen('Sqrt', arith_float_types, 'sqrt(src0)')
690gen('Floor', arith_float_types, 'floor(src0)')
691
692# "fast" sqrt... same as slow for us
693gen('Nsqrt', arith_float_types, 'sqrt(src0)')
694gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
695gen('Nrcp', arith_float_types, '1.0/src0')
696gen('Fract', arith_float_types,
697 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
698
699gen('Ncos', arith_float_types, 'cos(src0)');
700gen('Nsin', arith_float_types, 'sin(src0)');
701
702gen('And', bit_types, 'src0 & src1')
703gen('Or', bit_types, 'src0 | src1')
704gen('Xor', bit_types, 'src0 ^ src1')
705
1#! /usr/bin/python
2
3#
4# Copyright (c) 2015 Advanced Micro Devices, Inc.
5# All rights reserved.
6#
7# For use for simulation and test purposes only
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions are met:
11#
12# 1. Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14#
15# 2. Redistributions in binary form must reproduce the above copyright notice,
16# this list of conditions and the following disclaimer in the documentation
17# and/or other materials provided with the distribution.
18#
19# 3. Neither the name of the copyright holder nor the names of its contributors
20# may be used to endorse or promote products derived from this software
21# without specific prior written permission.
22#
23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33# POSSIBILITY OF SUCH DAMAGE.
34#
35# Author: Steve Reinhardt
36#
37
38from __future__ import print_function
39
40import sys, re
41
42from m5.util import code_formatter
43
44if len(sys.argv) != 4:
45 print("Error: need 3 args (file names)")
46 sys.exit(0)
47
48header_code = code_formatter()
49decoder_code = code_formatter()
50exec_code = code_formatter()
51
52###############
53#
54# Generate file prologs (includes etc.)
55#
56###############
57
58header_code('''
59#include "arch/hsail/insts/decl.hh"
60#include "base/bitfield.hh"
61#include "gpu-compute/hsail_code.hh"
62#include "gpu-compute/wavefront.hh"
63
64namespace HsailISA
65{
66''')
67header_code.indent()
68
69decoder_code('''
70#include "arch/hsail/gpu_decoder.hh"
71#include "arch/hsail/insts/branch.hh"
72#include "arch/hsail/insts/decl.hh"
73#include "arch/hsail/insts/gen_decl.hh"
74#include "arch/hsail/insts/mem.hh"
75#include "arch/hsail/insts/mem_impl.hh"
76#include "gpu-compute/brig_object.hh"
77
78namespace HsailISA
79{
80 std::vector<GPUStaticInst*> Decoder::decodedInsts;
81
82 GPUStaticInst*
83 Decoder::decode(MachInst machInst)
84 {
85 using namespace Brig;
86
87 const BrigInstBase *ib = machInst.brigInstBase;
88 const BrigObject *obj = machInst.brigObj;
89
90 switch(ib->opcode) {
91''')
92decoder_code.indent()
93decoder_code.indent()
94
95exec_code('''
96#include "arch/hsail/insts/gen_decl.hh"
97#include "base/intmath.hh"
98
99namespace HsailISA
100{
101''')
102exec_code.indent()
103
104###############
105#
106# Define code templates for class declarations (for header file)
107#
108###############
109
110# Basic header template for an instruction stub.
111header_template_stub = '''
112class $class_name : public $base_class
113{
114 public:
115 typedef $base_class Base;
116
117 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
118 : Base(ib, obj, "$opcode")
119 {
120 }
121
122 void execute(GPUDynInstPtr gpuDynInst);
123};
124
125'''
126
127# Basic header template for an instruction with no template parameters.
128header_template_nodt = '''
129class $class_name : public $base_class
130{
131 public:
132 typedef $base_class Base;
133
134 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
135 : Base(ib, obj, "$opcode")
136 {
137 }
138
139 void execute(GPUDynInstPtr gpuDynInst);
140};
141
142'''
143
144# Basic header template for an instruction with a single DataType
145# template parameter.
146header_template_1dt = '''
147template<typename DataType>
148class $class_name : public $base_class<DataType>
149{
150 public:
151 typedef $base_class<DataType> Base;
152 typedef typename DataType::CType CType;
153
154 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
155 : Base(ib, obj, "$opcode")
156 {
157 }
158
159 void execute(GPUDynInstPtr gpuDynInst);
160};
161
162'''
163
164header_template_1dt_noexec = '''
165template<typename DataType>
166class $class_name : public $base_class<DataType>
167{
168 public:
169 typedef $base_class<DataType> Base;
170 typedef typename DataType::CType CType;
171
172 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
173 : Base(ib, obj, "$opcode")
174 {
175 }
176};
177
178'''
179
180# Same as header_template_1dt, except the base class has a second
181# template parameter NumSrcOperands to allow a variable number of
182# source operands. Note that since this is implemented with an array,
183# it only works for instructions where all sources are of the same
184# type (like most arithmetics).
185header_template_1dt_varsrcs = '''
186template<typename DataType>
187class $class_name : public $base_class<DataType, $num_srcs>
188{
189 public:
190 typedef $base_class<DataType, $num_srcs> Base;
191 typedef typename DataType::CType CType;
192
193 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
194 : Base(ib, obj, "$opcode")
195 {
196 }
197
198 void execute(GPUDynInstPtr gpuDynInst);
199};
200
201'''
202
203# Header template for instruction with two DataType template
204# parameters, one for the dest and one for the source. This is used
205# by compare and convert.
206header_template_2dt = '''
207template<typename DestDataType, class SrcDataType>
208class $class_name : public $base_class<DestDataType, SrcDataType>
209{
210 public:
211 typedef $base_class<DestDataType, SrcDataType> Base;
212 typedef typename DestDataType::CType DestCType;
213 typedef typename SrcDataType::CType SrcCType;
214
215 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
216 : Base(ib, obj, "$opcode")
217 {
218 }
219
220 void execute(GPUDynInstPtr gpuDynInst);
221};
222
223'''
224
225header_templates = {
226 'ArithInst': header_template_1dt_varsrcs,
227 'CmovInst': header_template_1dt,
228 'ClassInst': header_template_1dt,
229 'ShiftInst': header_template_1dt,
230 'ExtractInsertInst': header_template_1dt,
231 'CmpInst': header_template_2dt,
232 'CvtInst': header_template_2dt,
233 'PopcountInst': header_template_2dt,
234 'LdInst': '',
235 'StInst': '',
236 'SpecialInstNoSrc': header_template_nodt,
237 'SpecialInst1Src': header_template_nodt,
238 'SpecialInstNoSrcNoDest': '',
239 'Stub': header_template_stub,
240}
241
242###############
243#
244# Define code templates for exec functions
245#
246###############
247
248# exec function body
249exec_template_stub = '''
250void
251$class_name::execute(GPUDynInstPtr gpuDynInst)
252{
253 fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble());
254}
255
256'''
257exec_template_nodt_nosrc = '''
258void
259$class_name::execute(GPUDynInstPtr gpuDynInst)
260{
261 Wavefront *w = gpuDynInst->wavefront();
262
263 typedef Base::DestCType DestCType;
264
265 const VectorMask &mask = w->getPred();
266
267 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
268 if (mask[lane]) {
269 DestCType dest_val = $expr;
270 this->dest.set(w, lane, dest_val);
271 }
272 }
273}
274
275'''
276
277exec_template_nodt_1src = '''
278void
279$class_name::execute(GPUDynInstPtr gpuDynInst)
280{
281 Wavefront *w = gpuDynInst->wavefront();
282
283 typedef Base::DestCType DestCType;
284 typedef Base::SrcCType SrcCType;
285
286 const VectorMask &mask = w->getPred();
287
288 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
289 if (mask[lane]) {
290 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
291 DestCType dest_val = $expr;
292
293 this->dest.set(w, lane, dest_val);
294 }
295 }
296}
297
298'''
299
300exec_template_1dt_varsrcs = '''
301template<typename DataType>
302void
303$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
304{
305 Wavefront *w = gpuDynInst->wavefront();
306
307 const VectorMask &mask = w->getPred();
308
309 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
310 if (mask[lane]) {
311 CType dest_val;
312 if ($dest_is_src_flag) {
313 dest_val = this->dest.template get<CType>(w, lane);
314 }
315
316 CType src_val[$num_srcs];
317
318 for (int i = 0; i < $num_srcs; ++i) {
319 src_val[i] = this->src[i].template get<CType>(w, lane);
320 }
321
322 dest_val = (CType)($expr);
323
324 this->dest.set(w, lane, dest_val);
325 }
326 }
327}
328
329'''
330
331exec_template_1dt_3srcs = '''
332template<typename DataType>
333void
334$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
335{
336 Wavefront *w = gpuDynInst->wavefront();
337
338 typedef typename Base::Src0CType Src0T;
339 typedef typename Base::Src1CType Src1T;
340 typedef typename Base::Src2CType Src2T;
341
342 const VectorMask &mask = w->getPred();
343
344 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
345 if (mask[lane]) {
346 CType dest_val;
347
348 if ($dest_is_src_flag) {
349 dest_val = this->dest.template get<CType>(w, lane);
350 }
351
352 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
353 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
354 Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
355
356 dest_val = $expr;
357
358 this->dest.set(w, lane, dest_val);
359 }
360 }
361}
362
363'''
364
365exec_template_1dt_2src_1dest = '''
366template<typename DataType>
367void
368$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
369{
370 Wavefront *w = gpuDynInst->wavefront();
371
372 typedef typename Base::DestCType DestT;
373 typedef CType Src0T;
374 typedef typename Base::Src1CType Src1T;
375
376 const VectorMask &mask = w->getPred();
377
378 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
379 if (mask[lane]) {
380 DestT dest_val;
381 if ($dest_is_src_flag) {
382 dest_val = this->dest.template get<DestT>(w, lane);
383 }
384 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
385 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
386
387 dest_val = $expr;
388
389 this->dest.set(w, lane, dest_val);
390 }
391 }
392}
393
394'''
395
396exec_template_shift = '''
397template<typename DataType>
398void
399$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
400{
401 Wavefront *w = gpuDynInst->wavefront();
402
403 const VectorMask &mask = w->getPred();
404 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
405 if (mask[lane]) {
406 CType dest_val;
407
408 if ($dest_is_src_flag) {
409 dest_val = this->dest.template get<CType>(w, lane);
410 }
411
412 CType src_val0 = this->src0.template get<CType>(w, lane);
413 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
414
415 dest_val = $expr;
416
417 this->dest.set(w, lane, dest_val);
418 }
419 }
420}
421
422'''
423
424exec_template_2dt = '''
425template<typename DestDataType, class SrcDataType>
426void
427$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
428{
429 Wavefront *w = gpuDynInst->wavefront();
430
431 const VectorMask &mask = w->getPred();
432
433 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
434 if (mask[lane]) {
435 DestCType dest_val;
436 SrcCType src_val[$num_srcs];
437
438 for (int i = 0; i < $num_srcs; ++i) {
439 src_val[i] = this->src[i].template get<SrcCType>(w, lane);
440 }
441
442 dest_val = $expr;
443
444 this->dest.set(w, lane, dest_val);
445 }
446 }
447}
448
449'''
450
451exec_templates = {
452 'ArithInst': exec_template_1dt_varsrcs,
453 'CmovInst': exec_template_1dt_3srcs,
454 'ExtractInsertInst': exec_template_1dt_3srcs,
455 'ClassInst': exec_template_1dt_2src_1dest,
456 'CmpInst': exec_template_2dt,
457 'CvtInst': exec_template_2dt,
458 'PopcountInst': exec_template_2dt,
459 'LdInst': '',
460 'StInst': '',
461 'SpecialInstNoSrc': exec_template_nodt_nosrc,
462 'SpecialInst1Src': exec_template_nodt_1src,
463 'SpecialInstNoSrcNoDest': '',
464 'Stub': exec_template_stub,
465}
466
467###############
468#
469# Define code templates for the decoder cases
470#
471###############
472
473# decode template for nodt-opcode case
474decode_nodt_template = '''
475 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
476
477decode_case_prolog_class_inst = '''
478 case BRIG_OPCODE_$brig_opcode_upper:
479 {
480 //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
481 BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
482 //switch (baseOp->kind) {
483 // case BRIG_OPERAND_REG:
484 // type = ((const BrigOperandReg*)baseOp)->type;
485 // break;
486 // case BRIG_OPERAND_IMMED:
487 // type = ((const BrigOperandImmed*)baseOp)->type;
488 // break;
489 // default:
490 // fatal("CLASS unrecognized kind of operand %d\\n",
491 // baseOp->kind);
492 //}
493 switch (type) {'''
494
495# common prolog for 1dt- or 2dt-opcode case: switch on data type
496decode_case_prolog = '''
497 case BRIG_OPCODE_$brig_opcode_upper:
498 {
499 switch (ib->type) {'''
500
501# single-level decode case entry (for 1dt opcodes)
502decode_case_entry = \
503' case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
504
505decode_store_prolog = \
506' case BRIG_TYPE_$type_name: {'
507
508decode_store_case_epilog = '''
509 }'''
510
511decode_store_case_entry = \
512' return $constructor(ib, obj);'
513
514# common epilog for type switch
515decode_case_epilog = '''
516 default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
517 ib->type);
518 }
519 }
520 break;'''
521
522# Additional templates for nested decode on a second type field (for
523# compare and convert). These are used in place of the
524# decode_case_entry template to create a second-level switch on on the
525# second type field inside each case of the first-level type switch.
526# Because the name and location of the second type can vary, the Brig
527# instruction type must be provided in $brig_type, and the name of the
528# second type field must be provided in $type_field.
529decode_case2_prolog = '''
530 case BRIG_TYPE_$type_name:
531 switch (((Brig$brig_type*)ib)->$type2_field) {'''
532
533decode_case2_entry = \
534' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
535
536decode_case2_epilog = '''
537 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
538 ((Brig$brig_type*)ib)->$type2_field);
539 }
540 break;'''
541
542# Figure out how many source operands an expr needs by looking for the
543# highest-numbered srcN value referenced. Since sources are numbered
544# starting at 0, the return value is N+1.
545def num_src_operands(expr):
546 if expr.find('src2') != -1:
547 return 3
548 elif expr.find('src1') != -1:
549 return 2
550 elif expr.find('src0') != -1:
551 return 1
552 else:
553 return 0
554
555###############
556#
557# Define final code generation methods
558#
559# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
560# generating actual instructions.
561#
562###############
563
564# Generate class declaration, exec function, and decode switch case
565# for an brig_opcode with a single-level type switch. The 'types'
566# parameter is a list or tuple of types for which the instruction
567# should be instantiated.
568def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
569 type2_info=None, constructor_prefix='new ', is_store=False):
570 brig_opcode_upper = brig_opcode.upper()
571 class_name = brig_opcode
572 opcode = class_name.lower()
573
574 if base_class == 'ArithInst':
575 # note that expr must be provided with ArithInst so we can
576 # derive num_srcs for the template
577 assert expr
578
579 if expr:
580 # Derive several bits of info from expr. If expr is not used,
581 # this info will be irrelevant.
582 num_srcs = num_src_operands(expr)
583 # if the RHS expression includes 'dest', then we're doing an RMW
584 # on the reg and we need to treat it like a source
585 dest_is_src = expr.find('dest') != -1
586 dest_is_src_flag = str(dest_is_src).lower() # for C++
587 if base_class in ['ShiftInst']:
588 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
589 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']:
590 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
591 else:
592 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
593 expr = re.sub(r'\bdest\b', r'dest_val', expr)
594
595 # Strip template arguments off of base class before looking up
596 # appropriate templates
597 base_class_base = re.sub(r'<.*>$', '', base_class)
598 header_code(header_templates[base_class_base])
599
600 if base_class.startswith('SpecialInst') or base_class.startswith('Stub'):
601 exec_code(exec_templates[base_class_base])
602 elif base_class.startswith('ShiftInst'):
603 header_code(exec_template_shift)
604 else:
605 header_code(exec_templates[base_class_base])
606
607 if not types or isinstance(types, str):
608 # Just a single type
609 constructor = constructor_prefix + class_name
610 decoder_code(decode_nodt_template)
611 else:
612 # multiple types, need at least one level of decode
613 if brig_opcode == 'Class':
614 decoder_code(decode_case_prolog_class_inst)
615 else:
616 decoder_code(decode_case_prolog)
617 if not type2_info:
618 if not is_store:
619 # single list of types, to basic one-level decode
620 for type_name in types:
621 full_class_name = '%s<%s>' % (class_name, type_name.upper())
622 constructor = constructor_prefix + full_class_name
623 decoder_code(decode_case_entry)
624 else:
625 # single list of types, to basic one-level decode
626 for type_name in types:
627 decoder_code(decode_store_prolog)
628 type_size = int(re.findall(r'[0-9]+', type_name)[0])
629 src_size = 32
630 type_type = type_name[0]
631 full_class_name = '%s<%s,%s>' % (class_name, \
632 type_name.upper(), \
633 '%s%d' % \
634 (type_type.upper(), \
635 type_size))
636 constructor = constructor_prefix + full_class_name
637 decoder_code(decode_store_case_entry)
638 decoder_code(decode_store_case_epilog)
639 else:
640 # need secondary type switch (convert, compare)
641 # unpack extra info on second switch
642 (type2_field, types2) = type2_info
643 brig_type = 'Inst%s' % brig_opcode
644 for type_name in types:
645 decoder_code(decode_case2_prolog)
646 fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
647 for type2_name in types2:
648 full_class_name = fmt % type2_name.upper()
649 constructor = constructor_prefix + full_class_name
650 decoder_code(decode_case2_entry)
651
652 decoder_code(decode_case2_epilog)
653
654 decoder_code(decode_case_epilog)
655
656###############
657#
658# Generate instructions
659#
660###############
661
662# handy abbreviations for common sets of types
663
664# arithmetic ops are typically defined only on 32- and 64-bit sizes
665arith_int_types = ('S32', 'U32', 'S64', 'U64')
666arith_float_types = ('F32', 'F64')
667arith_types = arith_int_types + arith_float_types
668
669bit_types = ('B1', 'B32', 'B64')
670
671all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
672
673# I think you might be able to do 'f16' memory ops too, but we'll
674# ignore them for now.
675mem_types = all_int_types + arith_float_types
676mem_atom_types = all_int_types + ('B32', 'B64')
677
678##### Arithmetic & logical operations
679gen('Add', arith_types, 'src0 + src1')
680gen('Sub', arith_types, 'src0 - src1')
681gen('Mul', arith_types, 'src0 * src1')
682gen('Div', arith_types, 'src0 / src1')
683gen('Min', arith_types, 'std::min(src0, src1)')
684gen('Max', arith_types, 'std::max(src0, src1)')
685gen('Gcnmin', arith_types, 'std::min(src0, src1)')
686
687gen('CopySign', arith_float_types,
688 'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
689gen('Sqrt', arith_float_types, 'sqrt(src0)')
690gen('Floor', arith_float_types, 'floor(src0)')
691
692# "fast" sqrt... same as slow for us
693gen('Nsqrt', arith_float_types, 'sqrt(src0)')
694gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
695gen('Nrcp', arith_float_types, '1.0/src0')
696gen('Fract', arith_float_types,
697 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
698
699gen('Ncos', arith_float_types, 'cos(src0)');
700gen('Nsin', arith_float_types, 'sin(src0)');
701
702gen('And', bit_types, 'src0 & src1')
703gen('Or', bit_types, 'src0 | src1')
704gen('Xor', bit_types, 'src0 ^ src1')
705
706gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~src0)')
706gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~(uint64_t)src0)')
707gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \
708 ('sourceType', ('B32', 'B64')))
709
710gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
711gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
712
713# gen('Mul_hi', types=('s32','u32', '??'))
714# gen('Mul24', types=('s32','u32', '??'))
715gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
716
717gen('Abs', arith_types, 'std::abs(src0)')
718gen('Neg', arith_types, '-src0')
719
720gen('Mov', bit_types + arith_types, 'src0')
721gen('Not', bit_types, 'heynot(src0)')
722
723# mad and fma differ only in rounding behavior, which we don't emulate
724# also there's an integer form of mad, but not of fma
725gen('Mad', arith_types, 'src0 * src1 + src2')
726gen('Fma', arith_float_types, 'src0 * src1 + src2')
727
728#native floating point operations
729gen('Nfma', arith_float_types, 'src0 * src1 + src2')
730
731gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
732gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
733gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
734
735# see base/bitfield.hh
736gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
737 'ExtractInsertInst')
738
739gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
740 'ExtractInsertInst')
741
742##### Compare
743gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
744 'CmpInst', ('sourceType', arith_types + bit_types))
745gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
746
747##### Conversion
748
749# Conversion operations are only defined on B1, not B32 or B64
750cvt_types = ('B1',) + mem_types
751
752gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
753
754
755##### Load & Store
756gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
757gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
758gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
759 is_store=True)
760gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
761gen('AtomicNoRet', mem_atom_types, base_class='StInst',
762 constructor_prefix='decode')
763
764gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
765gen('Br', base_class = 'LdInst', constructor_prefix='decode')
766
767##### Special operations
768def gen_special(brig_opcode, expr, dest_type='U32'):
769 num_srcs = num_src_operands(expr)
770 if num_srcs == 0:
771 base_class = 'SpecialInstNoSrc<%s>' % dest_type
772 elif num_srcs == 1:
773 base_class = 'SpecialInst1Src<%s>' % dest_type
774 else:
775 assert false
776
777 gen(brig_opcode, None, expr, base_class)
778
779gen_special('WorkItemId', 'w->workItemId[src0][lane]')
780gen_special('WorkItemAbsId',
781 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
782gen_special('WorkGroupId', 'w->workGroupId[src0]')
783gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
784gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
785gen_special('GridSize', 'w->gridSz[src0]')
786gen_special('GridGroups',
787 'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
788gen_special('LaneId', 'lane')
789gen_special('WaveId', 'w->wfId')
790gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
791
792# gen_special('CU'', ')
793
794gen('Ret', base_class='SpecialInstNoSrcNoDest')
795gen('Barrier', base_class='SpecialInstNoSrcNoDest')
796gen('MemFence', base_class='SpecialInstNoSrcNoDest')
797
798# Map magic instructions to the BrigSyscall opcode
799# Magic instructions are defined in magic.hh
800#
801# In the future, real HSA kernel system calls can be implemented and coexist
802# with magic instructions.
803gen('Call', base_class='SpecialInstNoSrcNoDest')
804
805# Stubs for unimplemented instructions:
806# These may need to be implemented at some point in the future, but
807# for now we just match the instructions with their operands.
808#
809# By defining stubs for these instructions, we can work with
810# applications that have them in dead/unused code paths.
811#
812# Needed for rocm-hcc compilations for HSA backends since
813# builtins-hsail library is `cat`d onto the generated kernels.
814# The builtins-hsail library consists of handcoded hsail functions
815# that __might__ be needed by the rocm-hcc compiler in certain binaries.
816gen('Bitmask', base_class='Stub')
817gen('Bitrev', base_class='Stub')
818gen('Firstbit', base_class='Stub')
819gen('Lastbit', base_class='Stub')
820gen('Unpacklo', base_class='Stub')
821gen('Unpackhi', base_class='Stub')
822gen('Pack', base_class='Stub')
823gen('Unpack', base_class='Stub')
824gen('Lerp', base_class='Stub')
825gen('Packcvt', base_class='Stub')
826gen('Unpackcvt', base_class='Stub')
827gen('Sad', base_class='Stub')
828gen('Sadhi', base_class='Stub')
829gen('Activelanecount', base_class='Stub')
830gen('Activelaneid', base_class='Stub')
831gen('Activelanemask', base_class='Stub')
832gen('Activelanepermute', base_class='Stub')
833gen('Groupbaseptr', base_class='Stub')
834gen('Signalnoret', base_class='Stub')
835
836###############
837#
838# Generate file epilogs
839#
840###############
841header_code('''
842template<>
843inline void
844Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
845{
846 Wavefront *w = gpuDynInst->wavefront();
847
848 const VectorMask &mask = w->getPred();
849
850 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
851 if (mask[lane]) {
852 CType dest_val;
853 CType src_val;
854
855 src_val = this->src[0].template get<CType>(w, lane);
856
857 dest_val = (CType)(src_val);
858
859 this->dest.set(w, lane, dest_val);
860 }
861 }
862}
863
864template<>
865inline void
866Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
867{
868 Wavefront *w = gpuDynInst->wavefront();
869
870 const VectorMask &mask = w->getPred();
871
872 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
873 if (mask[lane]) {
874 CType dest_val;
875 CType src_val;
876
877 src_val = this->src[0].template get<CType>(w, lane);
878
879 dest_val = (CType)(src_val);
880
881 this->dest.set(w, lane, dest_val);
882 }
883 }
884}
885''')
886
887header_code.dedent()
888header_code('''
889} // namespace HsailISA
890''')
891
892# close off main decode switch
893decoder_code.dedent()
894decoder_code.dedent()
895decoder_code('''
896 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
897 } // end switch(ib->opcode)
898 } // end decode()
899} // namespace HsailISA
900''')
901
902exec_code.dedent()
903exec_code('''
904} // namespace HsailISA
905''')
906
907###############
908#
909# Output accumulated code to files
910#
911###############
912header_code.write(sys.argv[1])
913decoder_code.write(sys.argv[2])
914exec_code.write(sys.argv[3])
707gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \
708 ('sourceType', ('B32', 'B64')))
709
710gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
711gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
712
713# gen('Mul_hi', types=('s32','u32', '??'))
714# gen('Mul24', types=('s32','u32', '??'))
715gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
716
717gen('Abs', arith_types, 'std::abs(src0)')
718gen('Neg', arith_types, '-src0')
719
720gen('Mov', bit_types + arith_types, 'src0')
721gen('Not', bit_types, 'heynot(src0)')
722
723# mad and fma differ only in rounding behavior, which we don't emulate
724# also there's an integer form of mad, but not of fma
725gen('Mad', arith_types, 'src0 * src1 + src2')
726gen('Fma', arith_float_types, 'src0 * src1 + src2')
727
728#native floating point operations
729gen('Nfma', arith_float_types, 'src0 * src1 + src2')
730
731gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
732gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
733gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
734
735# see base/bitfield.hh
736gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
737 'ExtractInsertInst')
738
739gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
740 'ExtractInsertInst')
741
742##### Compare
743gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
744 'CmpInst', ('sourceType', arith_types + bit_types))
745gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
746
747##### Conversion
748
749# Conversion operations are only defined on B1, not B32 or B64
750cvt_types = ('B1',) + mem_types
751
752gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
753
754
755##### Load & Store
756gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
757gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
758gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
759 is_store=True)
760gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
761gen('AtomicNoRet', mem_atom_types, base_class='StInst',
762 constructor_prefix='decode')
763
764gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
765gen('Br', base_class = 'LdInst', constructor_prefix='decode')
766
767##### Special operations
768def gen_special(brig_opcode, expr, dest_type='U32'):
769 num_srcs = num_src_operands(expr)
770 if num_srcs == 0:
771 base_class = 'SpecialInstNoSrc<%s>' % dest_type
772 elif num_srcs == 1:
773 base_class = 'SpecialInst1Src<%s>' % dest_type
774 else:
775 assert false
776
777 gen(brig_opcode, None, expr, base_class)
778
779gen_special('WorkItemId', 'w->workItemId[src0][lane]')
780gen_special('WorkItemAbsId',
781 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
782gen_special('WorkGroupId', 'w->workGroupId[src0]')
783gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
784gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
785gen_special('GridSize', 'w->gridSz[src0]')
786gen_special('GridGroups',
787 'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
788gen_special('LaneId', 'lane')
789gen_special('WaveId', 'w->wfId')
790gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
791
792# gen_special('CU'', ')
793
794gen('Ret', base_class='SpecialInstNoSrcNoDest')
795gen('Barrier', base_class='SpecialInstNoSrcNoDest')
796gen('MemFence', base_class='SpecialInstNoSrcNoDest')
797
798# Map magic instructions to the BrigSyscall opcode
799# Magic instructions are defined in magic.hh
800#
801# In the future, real HSA kernel system calls can be implemented and coexist
802# with magic instructions.
803gen('Call', base_class='SpecialInstNoSrcNoDest')
804
805# Stubs for unimplemented instructions:
806# These may need to be implemented at some point in the future, but
807# for now we just match the instructions with their operands.
808#
809# By defining stubs for these instructions, we can work with
810# applications that have them in dead/unused code paths.
811#
812# Needed for rocm-hcc compilations for HSA backends since
813# builtins-hsail library is `cat`d onto the generated kernels.
814# The builtins-hsail library consists of handcoded hsail functions
815# that __might__ be needed by the rocm-hcc compiler in certain binaries.
816gen('Bitmask', base_class='Stub')
817gen('Bitrev', base_class='Stub')
818gen('Firstbit', base_class='Stub')
819gen('Lastbit', base_class='Stub')
820gen('Unpacklo', base_class='Stub')
821gen('Unpackhi', base_class='Stub')
822gen('Pack', base_class='Stub')
823gen('Unpack', base_class='Stub')
824gen('Lerp', base_class='Stub')
825gen('Packcvt', base_class='Stub')
826gen('Unpackcvt', base_class='Stub')
827gen('Sad', base_class='Stub')
828gen('Sadhi', base_class='Stub')
829gen('Activelanecount', base_class='Stub')
830gen('Activelaneid', base_class='Stub')
831gen('Activelanemask', base_class='Stub')
832gen('Activelanepermute', base_class='Stub')
833gen('Groupbaseptr', base_class='Stub')
834gen('Signalnoret', base_class='Stub')
835
836###############
837#
838# Generate file epilogs
839#
840###############
841header_code('''
842template<>
843inline void
844Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
845{
846 Wavefront *w = gpuDynInst->wavefront();
847
848 const VectorMask &mask = w->getPred();
849
850 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
851 if (mask[lane]) {
852 CType dest_val;
853 CType src_val;
854
855 src_val = this->src[0].template get<CType>(w, lane);
856
857 dest_val = (CType)(src_val);
858
859 this->dest.set(w, lane, dest_val);
860 }
861 }
862}
863
864template<>
865inline void
866Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
867{
868 Wavefront *w = gpuDynInst->wavefront();
869
870 const VectorMask &mask = w->getPred();
871
872 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
873 if (mask[lane]) {
874 CType dest_val;
875 CType src_val;
876
877 src_val = this->src[0].template get<CType>(w, lane);
878
879 dest_val = (CType)(src_val);
880
881 this->dest.set(w, lane, dest_val);
882 }
883 }
884}
885''')
886
887header_code.dedent()
888header_code('''
889} // namespace HsailISA
890''')
891
892# close off main decode switch
893decoder_code.dedent()
894decoder_code.dedent()
895decoder_code('''
896 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
897 } // end switch(ib->opcode)
898 } // end decode()
899} // namespace HsailISA
900''')
901
902exec_code.dedent()
903exec_code('''
904} // namespace HsailISA
905''')
906
907###############
908#
909# Output accumulated code to files
910#
911###############
912header_code.write(sys.argv[1])
913decoder_code.write(sys.argv[2])
914exec_code.write(sys.argv[3])