gen.py revision 11738:ad7e8afa0dfe
1#! /usr/bin/python 2 3# 4# Copyright (c) 2015 Advanced Micro Devices, Inc. 5# All rights reserved. 6# 7# For use for simulation and test purposes only 8# 9# Redistribution and use in source and binary forms, with or without 10# modification, are permitted provided that the following conditions are met: 11# 12# 1. Redistributions of source code must retain the above copyright notice, 13# this list of conditions and the following disclaimer. 14# 15# 2. Redistributions in binary form must reproduce the above copyright notice, 16# this list of conditions and the following disclaimer in the documentation 17# and/or other materials provided with the distribution. 18# 19# 3. Neither the name of the copyright holder nor the names of its contributors 20# may be used to endorse or promote products derived from this software 21# without specific prior written permission. 22# 23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33# POSSIBILITY OF SUCH DAMAGE. 34# 35# Author: Steve Reinhardt 36# 37 38import sys, re 39 40from m5.util import code_formatter 41 42if len(sys.argv) != 4: 43 print "Error: need 3 args (file names)" 44 sys.exit(0) 45 46header_code = code_formatter() 47decoder_code = code_formatter() 48exec_code = code_formatter() 49 50############### 51# 52# Generate file prologs (includes etc.) 53# 54############### 55 56header_code(''' 57#include "arch/hsail/insts/decl.hh" 58#include "base/bitfield.hh" 59#include "gpu-compute/hsail_code.hh" 60#include "gpu-compute/wavefront.hh" 61 62namespace HsailISA 63{ 64''') 65header_code.indent() 66 67decoder_code(''' 68#include "arch/hsail/gpu_decoder.hh" 69#include "arch/hsail/insts/branch.hh" 70#include "arch/hsail/insts/decl.hh" 71#include "arch/hsail/insts/gen_decl.hh" 72#include "arch/hsail/insts/mem.hh" 73#include "arch/hsail/insts/mem_impl.hh" 74#include "gpu-compute/brig_object.hh" 75 76namespace HsailISA 77{ 78 std::vector<GPUStaticInst*> Decoder::decodedInsts; 79 80 GPUStaticInst* 81 Decoder::decode(MachInst machInst) 82 { 83 using namespace Brig; 84 85 const BrigInstBase *ib = machInst.brigInstBase; 86 const BrigObject *obj = machInst.brigObj; 87 88 switch(ib->opcode) { 89''') 90decoder_code.indent() 91decoder_code.indent() 92 93exec_code(''' 94#include "arch/hsail/insts/gen_decl.hh" 95#include "base/intmath.hh" 96 97namespace HsailISA 98{ 99''') 100exec_code.indent() 101 102############### 103# 104# Define code templates for class declarations (for header file) 105# 106############### 107 108# Basic header template for an instruction stub. 109header_template_stub = ''' 110class $class_name : public $base_class 111{ 112 public: 113 typedef $base_class Base; 114 115 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) 116 : Base(ib, obj, "$opcode") 117 { 118 } 119 120 void execute(GPUDynInstPtr gpuDynInst); 121}; 122 123''' 124 125# Basic header template for an instruction with no template parameters. 126header_template_nodt = ''' 127class $class_name : public $base_class 128{ 129 public: 130 typedef $base_class Base; 131 132 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) 133 : Base(ib, obj, "$opcode") 134 { 135 } 136 137 void execute(GPUDynInstPtr gpuDynInst); 138}; 139 140''' 141 142# Basic header template for an instruction with a single DataType 143# template parameter. 144header_template_1dt = ''' 145template<typename DataType> 146class $class_name : public $base_class<DataType> 147{ 148 public: 149 typedef $base_class<DataType> Base; 150 typedef typename DataType::CType CType; 151 152 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) 153 : Base(ib, obj, "$opcode") 154 { 155 } 156 157 void execute(GPUDynInstPtr gpuDynInst); 158}; 159 160''' 161 162header_template_1dt_noexec = ''' 163template<typename DataType> 164class $class_name : public $base_class<DataType> 165{ 166 public: 167 typedef $base_class<DataType> Base; 168 typedef typename DataType::CType CType; 169 170 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) 171 : Base(ib, obj, "$opcode") 172 { 173 } 174}; 175 176''' 177 178# Same as header_template_1dt, except the base class has a second 179# template parameter NumSrcOperands to allow a variable number of 180# source operands. Note that since this is implemented with an array, 181# it only works for instructions where all sources are of the same 182# type (like most arithmetics). 183header_template_1dt_varsrcs = ''' 184template<typename DataType> 185class $class_name : public $base_class<DataType, $num_srcs> 186{ 187 public: 188 typedef $base_class<DataType, $num_srcs> Base; 189 typedef typename DataType::CType CType; 190 191 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) 192 : Base(ib, obj, "$opcode") 193 { 194 } 195 196 void execute(GPUDynInstPtr gpuDynInst); 197}; 198 199''' 200 201# Header template for instruction with two DataType template 202# parameters, one for the dest and one for the source. This is used 203# by compare and convert. 204header_template_2dt = ''' 205template<typename DestDataType, class SrcDataType> 206class $class_name : public $base_class<DestDataType, SrcDataType> 207{ 208 public: 209 typedef $base_class<DestDataType, SrcDataType> Base; 210 typedef typename DestDataType::CType DestCType; 211 typedef typename SrcDataType::CType SrcCType; 212 213 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) 214 : Base(ib, obj, "$opcode") 215 { 216 } 217 218 void execute(GPUDynInstPtr gpuDynInst); 219}; 220 221''' 222 223header_templates = { 224 'ArithInst': header_template_1dt_varsrcs, 225 'CmovInst': header_template_1dt, 226 'ClassInst': header_template_1dt, 227 'ShiftInst': header_template_1dt, 228 'ExtractInsertInst': header_template_1dt, 229 'CmpInst': header_template_2dt, 230 'CvtInst': header_template_2dt, 231 'PopcountInst': header_template_2dt, 232 'LdInst': '', 233 'StInst': '', 234 'SpecialInstNoSrc': header_template_nodt, 235 'SpecialInst1Src': header_template_nodt, 236 'SpecialInstNoSrcNoDest': '', 237 'Stub': header_template_stub, 238} 239 240############### 241# 242# Define code templates for exec functions 243# 244############### 245 246# exec function body 247exec_template_stub = ''' 248void 249$class_name::execute(GPUDynInstPtr gpuDynInst) 250{ 251 fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble()); 252} 253 254''' 255exec_template_nodt_nosrc = ''' 256void 257$class_name::execute(GPUDynInstPtr gpuDynInst) 258{ 259 Wavefront *w = gpuDynInst->wavefront(); 260 261 typedef Base::DestCType DestCType; 262 263 const VectorMask &mask = w->getPred(); 264 265 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 266 if (mask[lane]) { 267 DestCType dest_val = $expr; 268 this->dest.set(w, lane, dest_val); 269 } 270 } 271} 272 273''' 274 275exec_template_nodt_1src = ''' 276void 277$class_name::execute(GPUDynInstPtr gpuDynInst) 278{ 279 Wavefront *w = gpuDynInst->wavefront(); 280 281 typedef Base::DestCType DestCType; 282 typedef Base::SrcCType SrcCType; 283 284 const VectorMask &mask = w->getPred(); 285 286 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 287 if (mask[lane]) { 288 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane); 289 DestCType dest_val = $expr; 290 291 this->dest.set(w, lane, dest_val); 292 } 293 } 294} 295 296''' 297 298exec_template_1dt_varsrcs = ''' 299template<typename DataType> 300void 301$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 302{ 303 Wavefront *w = gpuDynInst->wavefront(); 304 305 const VectorMask &mask = w->getPred(); 306 307 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 308 if (mask[lane]) { 309 CType dest_val; 310 if ($dest_is_src_flag) { 311 dest_val = this->dest.template get<CType>(w, lane); 312 } 313 314 CType src_val[$num_srcs]; 315 316 for (int i = 0; i < $num_srcs; ++i) { 317 src_val[i] = this->src[i].template get<CType>(w, lane); 318 } 319 320 dest_val = (CType)($expr); 321 322 this->dest.set(w, lane, dest_val); 323 } 324 } 325} 326 327''' 328 329exec_template_1dt_3srcs = ''' 330template<typename DataType> 331void 332$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 333{ 334 Wavefront *w = gpuDynInst->wavefront(); 335 336 typedef typename Base::Src0CType Src0T; 337 typedef typename Base::Src1CType Src1T; 338 typedef typename Base::Src2CType Src2T; 339 340 const VectorMask &mask = w->getPred(); 341 342 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 343 if (mask[lane]) { 344 CType dest_val; 345 346 if ($dest_is_src_flag) { 347 dest_val = this->dest.template get<CType>(w, lane); 348 } 349 350 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 351 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 352 Src2T src_val2 = this->src2.template get<Src2T>(w, lane); 353 354 dest_val = $expr; 355 356 this->dest.set(w, lane, dest_val); 357 } 358 } 359} 360 361''' 362 363exec_template_1dt_2src_1dest = ''' 364template<typename DataType> 365void 366$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 367{ 368 Wavefront *w = gpuDynInst->wavefront(); 369 370 typedef typename Base::DestCType DestT; 371 typedef CType Src0T; 372 typedef typename Base::Src1CType Src1T; 373 374 const VectorMask &mask = w->getPred(); 375 376 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 377 if (mask[lane]) { 378 DestT dest_val; 379 if ($dest_is_src_flag) { 380 dest_val = this->dest.template get<DestT>(w, lane); 381 } 382 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 383 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 384 385 dest_val = $expr; 386 387 this->dest.set(w, lane, dest_val); 388 } 389 } 390} 391 392''' 393 394exec_template_shift = ''' 395template<typename DataType> 396void 397$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 398{ 399 Wavefront *w = gpuDynInst->wavefront(); 400 401 const VectorMask &mask = w->getPred(); 402 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 403 if (mask[lane]) { 404 CType dest_val; 405 406 if ($dest_is_src_flag) { 407 dest_val = this->dest.template get<CType>(w, lane); 408 } 409 410 CType src_val0 = this->src0.template get<CType>(w, lane); 411 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane); 412 413 dest_val = $expr; 414 415 this->dest.set(w, lane, dest_val); 416 } 417 } 418} 419 420''' 421 422exec_template_2dt = ''' 423template<typename DestDataType, class SrcDataType> 424void 425$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst) 426{ 427 Wavefront *w = gpuDynInst->wavefront(); 428 429 const VectorMask &mask = w->getPred(); 430 431 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 432 if (mask[lane]) { 433 DestCType dest_val; 434 SrcCType src_val[$num_srcs]; 435 436 for (int i = 0; i < $num_srcs; ++i) { 437 src_val[i] = this->src[i].template get<SrcCType>(w, lane); 438 } 439 440 dest_val = $expr; 441 442 this->dest.set(w, lane, dest_val); 443 } 444 } 445} 446 447''' 448 449exec_templates = { 450 'ArithInst': exec_template_1dt_varsrcs, 451 'CmovInst': exec_template_1dt_3srcs, 452 'ExtractInsertInst': exec_template_1dt_3srcs, 453 'ClassInst': exec_template_1dt_2src_1dest, 454 'CmpInst': exec_template_2dt, 455 'CvtInst': exec_template_2dt, 456 'PopcountInst': exec_template_2dt, 457 'LdInst': '', 458 'StInst': '', 459 'SpecialInstNoSrc': exec_template_nodt_nosrc, 460 'SpecialInst1Src': exec_template_nodt_1src, 461 'SpecialInstNoSrcNoDest': '', 462 'Stub': exec_template_stub, 463} 464 465############### 466# 467# Define code templates for the decoder cases 468# 469############### 470 471# decode template for nodt-opcode case 472decode_nodt_template = ''' 473 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);''' 474 475decode_case_prolog_class_inst = ''' 476 case BRIG_OPCODE_$brig_opcode_upper: 477 { 478 //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]); 479 BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType; 480 //switch (baseOp->kind) { 481 // case BRIG_OPERAND_REG: 482 // type = ((const BrigOperandReg*)baseOp)->type; 483 // break; 484 // case BRIG_OPERAND_IMMED: 485 // type = ((const BrigOperandImmed*)baseOp)->type; 486 // break; 487 // default: 488 // fatal("CLASS unrecognized kind of operand %d\\n", 489 // baseOp->kind); 490 //} 491 switch (type) {''' 492 493# common prolog for 1dt- or 2dt-opcode case: switch on data type 494decode_case_prolog = ''' 495 case BRIG_OPCODE_$brig_opcode_upper: 496 { 497 switch (ib->type) {''' 498 499# single-level decode case entry (for 1dt opcodes) 500decode_case_entry = \ 501' case BRIG_TYPE_$type_name: return $constructor(ib, obj);' 502 503decode_store_prolog = \ 504' case BRIG_TYPE_$type_name: {' 505 506decode_store_case_epilog = ''' 507 }''' 508 509decode_store_case_entry = \ 510' return $constructor(ib, obj);' 511 512# common epilog for type switch 513decode_case_epilog = ''' 514 default: fatal("$brig_opcode_upper: unrecognized type %d\\n", 515 ib->type); 516 } 517 } 518 break;''' 519 520# Additional templates for nested decode on a second type field (for 521# compare and convert). These are used in place of the 522# decode_case_entry template to create a second-level switch on on the 523# second type field inside each case of the first-level type switch. 524# Because the name and location of the second type can vary, the Brig 525# instruction type must be provided in $brig_type, and the name of the 526# second type field must be provided in $type_field. 527decode_case2_prolog = ''' 528 case BRIG_TYPE_$type_name: 529 switch (((Brig$brig_type*)ib)->$type2_field) {''' 530 531decode_case2_entry = \ 532' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);' 533 534decode_case2_epilog = ''' 535 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n", 536 ((Brig$brig_type*)ib)->$type2_field); 537 } 538 break;''' 539 540# Figure out how many source operands an expr needs by looking for the 541# highest-numbered srcN value referenced. Since sources are numbered 542# starting at 0, the return value is N+1. 543def num_src_operands(expr): 544 if expr.find('src2') != -1: 545 return 3 546 elif expr.find('src1') != -1: 547 return 2 548 elif expr.find('src0') != -1: 549 return 1 550 else: 551 return 0 552 553############### 554# 555# Define final code generation methods 556# 557# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for 558# generating actual instructions. 559# 560############### 561 562# Generate class declaration, exec function, and decode switch case 563# for an brig_opcode with a single-level type switch. The 'types' 564# parameter is a list or tuple of types for which the instruction 565# should be instantiated. 566def gen(brig_opcode, types=None, expr=None, base_class='ArithInst', 567 type2_info=None, constructor_prefix='new ', is_store=False): 568 brig_opcode_upper = brig_opcode.upper() 569 class_name = brig_opcode 570 opcode = class_name.lower() 571 572 if base_class == 'ArithInst': 573 # note that expr must be provided with ArithInst so we can 574 # derive num_srcs for the template 575 assert expr 576 577 if expr: 578 # Derive several bits of info from expr. If expr is not used, 579 # this info will be irrelevant. 580 num_srcs = num_src_operands(expr) 581 # if the RHS expression includes 'dest', then we're doing an RMW 582 # on the reg and we need to treat it like a source 583 dest_is_src = expr.find('dest') != -1 584 dest_is_src_flag = str(dest_is_src).lower() # for C++ 585 if base_class in ['ShiftInst']: 586 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 587 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']: 588 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr) 589 else: 590 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 591 expr = re.sub(r'\bdest\b', r'dest_val', expr) 592 593 # Strip template arguments off of base class before looking up 594 # appropriate templates 595 base_class_base = re.sub(r'<.*>$', '', base_class) 596 header_code(header_templates[base_class_base]) 597 598 if base_class.startswith('SpecialInst') or base_class.startswith('Stub'): 599 exec_code(exec_templates[base_class_base]) 600 elif base_class.startswith('ShiftInst'): 601 header_code(exec_template_shift) 602 else: 603 header_code(exec_templates[base_class_base]) 604 605 if not types or isinstance(types, str): 606 # Just a single type 607 constructor = constructor_prefix + class_name 608 decoder_code(decode_nodt_template) 609 else: 610 # multiple types, need at least one level of decode 611 if brig_opcode == 'Class': 612 decoder_code(decode_case_prolog_class_inst) 613 else: 614 decoder_code(decode_case_prolog) 615 if not type2_info: 616 if not is_store: 617 # single list of types, to basic one-level decode 618 for type_name in types: 619 full_class_name = '%s<%s>' % (class_name, type_name.upper()) 620 constructor = constructor_prefix + full_class_name 621 decoder_code(decode_case_entry) 622 else: 623 # single list of types, to basic one-level decode 624 for type_name in types: 625 decoder_code(decode_store_prolog) 626 type_size = int(re.findall(r'[0-9]+', type_name)[0]) 627 src_size = 32 628 type_type = type_name[0] 629 full_class_name = '%s<%s,%s>' % (class_name, \ 630 type_name.upper(), \ 631 '%s%d' % \ 632 (type_type.upper(), \ 633 type_size)) 634 constructor = constructor_prefix + full_class_name 635 decoder_code(decode_store_case_entry) 636 decoder_code(decode_store_case_epilog) 637 else: 638 # need secondary type switch (convert, compare) 639 # unpack extra info on second switch 640 (type2_field, types2) = type2_info 641 brig_type = 'Inst%s' % brig_opcode 642 for type_name in types: 643 decoder_code(decode_case2_prolog) 644 fmt = '%s<%s,%%s>' % (class_name, type_name.upper()) 645 for type2_name in types2: 646 full_class_name = fmt % type2_name.upper() 647 constructor = constructor_prefix + full_class_name 648 decoder_code(decode_case2_entry) 649 650 decoder_code(decode_case2_epilog) 651 652 decoder_code(decode_case_epilog) 653 654############### 655# 656# Generate instructions 657# 658############### 659 660# handy abbreviations for common sets of types 661 662# arithmetic ops are typically defined only on 32- and 64-bit sizes 663arith_int_types = ('S32', 'U32', 'S64', 'U64') 664arith_float_types = ('F32', 'F64') 665arith_types = arith_int_types + arith_float_types 666 667bit_types = ('B1', 'B32', 'B64') 668 669all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types 670 671# I think you might be able to do 'f16' memory ops too, but we'll 672# ignore them for now. 673mem_types = all_int_types + arith_float_types 674mem_atom_types = all_int_types + ('B32', 'B64') 675 676##### Arithmetic & logical operations 677gen('Add', arith_types, 'src0 + src1') 678gen('Sub', arith_types, 'src0 - src1') 679gen('Mul', arith_types, 'src0 * src1') 680gen('Div', arith_types, 'src0 / src1') 681gen('Min', arith_types, 'std::min(src0, src1)') 682gen('Max', arith_types, 'std::max(src0, src1)') 683gen('Gcnmin', arith_types, 'std::min(src0, src1)') 684 685gen('CopySign', arith_float_types, 686 'src1 < 0 ? -std::abs(src0) : std::abs(src0)') 687gen('Sqrt', arith_float_types, 'sqrt(src0)') 688gen('Floor', arith_float_types, 'floor(src0)') 689 690# "fast" sqrt... same as slow for us 691gen('Nsqrt', arith_float_types, 'sqrt(src0)') 692gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)') 693gen('Nrcp', arith_float_types, '1.0/src0') 694gen('Fract', arith_float_types, 695 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)') 696 697gen('Ncos', arith_float_types, 'cos(src0)'); 698gen('Nsin', arith_float_types, 'sin(src0)'); 699 700gen('And', bit_types, 'src0 & src1') 701gen('Or', bit_types, 'src0 | src1') 702gen('Xor', bit_types, 'src0 ^ src1') 703 704gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~src0)') 705gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \ 706 ('sourceType', ('B32', 'B64'))) 707 708gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst') 709gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst') 710 711# gen('Mul_hi', types=('s32','u32', '??')) 712# gen('Mul24', types=('s32','u32', '??')) 713gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)') 714 715gen('Abs', arith_types, 'std::abs(src0)') 716gen('Neg', arith_types, '-src0') 717 718gen('Mov', bit_types + arith_types, 'src0') 719gen('Not', bit_types, 'heynot(src0)') 720 721# mad and fma differ only in rounding behavior, which we don't emulate 722# also there's an integer form of mad, but not of fma 723gen('Mad', arith_types, 'src0 * src1 + src2') 724gen('Fma', arith_float_types, 'src0 * src1 + src2') 725 726#native floating point operations 727gen('Nfma', arith_float_types, 'src0 * src1 + src2') 728 729gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst') 730gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))') 731gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))') 732 733# see base/bitfield.hh 734gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)', 735 'ExtractInsertInst') 736 737gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)', 738 'ExtractInsertInst') 739 740##### Compare 741gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)', 742 'CmpInst', ('sourceType', arith_types + bit_types)) 743gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst') 744 745##### Conversion 746 747# Conversion operations are only defined on B1, not B32 or B64 748cvt_types = ('B1',) + mem_types 749 750gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types)) 751 752 753##### Load & Store 754gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode') 755gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode') 756gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode', 757 is_store=True) 758gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode') 759gen('AtomicNoRet', mem_atom_types, base_class='StInst', 760 constructor_prefix='decode') 761 762gen('Cbr', base_class = 'LdInst', constructor_prefix='decode') 763gen('Br', base_class = 'LdInst', constructor_prefix='decode') 764 765##### Special operations 766def gen_special(brig_opcode, expr, dest_type='U32'): 767 num_srcs = num_src_operands(expr) 768 if num_srcs == 0: 769 base_class = 'SpecialInstNoSrc<%s>' % dest_type 770 elif num_srcs == 1: 771 base_class = 'SpecialInst1Src<%s>' % dest_type 772 else: 773 assert false 774 775 gen(brig_opcode, None, expr, base_class) 776 777gen_special('WorkItemId', 'w->workItemId[src0][lane]') 778gen_special('WorkItemAbsId', 779 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])') 780gen_special('WorkGroupId', 'w->workGroupId[src0]') 781gen_special('WorkGroupSize', 'w->workGroupSz[src0]') 782gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]') 783gen_special('GridSize', 'w->gridSz[src0]') 784gen_special('GridGroups', 785 'divCeil(w->gridSz[src0],w->workGroupSz[src0])') 786gen_special('LaneId', 'lane') 787gen_special('WaveId', 'w->wfId') 788gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') 789 790# gen_special('CU'', ') 791 792gen('Ret', base_class='SpecialInstNoSrcNoDest') 793gen('Barrier', base_class='SpecialInstNoSrcNoDest') 794gen('MemFence', base_class='SpecialInstNoSrcNoDest') 795 796# Map magic instructions to the BrigSyscall opcode 797# Magic instructions are defined in magic.hh 798# 799# In the future, real HSA kernel system calls can be implemented and coexist 800# with magic instructions. 801gen('Call', base_class='SpecialInstNoSrcNoDest') 802 803# Stubs for unimplemented instructions: 804# These may need to be implemented at some point in the future, but 805# for now we just match the instructions with their operands. 806# 807# By defining stubs for these instructions, we can work with 808# applications that have them in dead/unused code paths. 809# 810# Needed for rocm-hcc compilations for HSA backends since 811# builtins-hsail library is `cat`d onto the generated kernels. 812# The builtins-hsail library consists of handcoded hsail functions 813# that __might__ be needed by the rocm-hcc compiler in certain binaries. 814gen('Bitmask', base_class='Stub') 815gen('Bitrev', base_class='Stub') 816gen('Firstbit', base_class='Stub') 817gen('Lastbit', base_class='Stub') 818gen('Unpacklo', base_class='Stub') 819gen('Unpackhi', base_class='Stub') 820gen('Pack', base_class='Stub') 821gen('Unpack', base_class='Stub') 822gen('Lerp', base_class='Stub') 823gen('Packcvt', base_class='Stub') 824gen('Unpackcvt', base_class='Stub') 825gen('Sad', base_class='Stub') 826gen('Sadhi', base_class='Stub') 827gen('Activelanecount', base_class='Stub') 828gen('Activelaneid', base_class='Stub') 829gen('Activelanemask', base_class='Stub') 830gen('Activelanepermute', base_class='Stub') 831gen('Groupbaseptr', base_class='Stub') 832gen('Signalnoret', base_class='Stub') 833 834############### 835# 836# Generate file epilogs 837# 838############### 839header_code(''' 840template<> 841inline void 842Abs<U32>::execute(GPUDynInstPtr gpuDynInst) 843{ 844 Wavefront *w = gpuDynInst->wavefront(); 845 846 const VectorMask &mask = w->getPred(); 847 848 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 849 if (mask[lane]) { 850 CType dest_val; 851 CType src_val; 852 853 src_val = this->src[0].template get<CType>(w, lane); 854 855 dest_val = (CType)(src_val); 856 857 this->dest.set(w, lane, dest_val); 858 } 859 } 860} 861 862template<> 863inline void 864Abs<U64>::execute(GPUDynInstPtr gpuDynInst) 865{ 866 Wavefront *w = gpuDynInst->wavefront(); 867 868 const VectorMask &mask = w->getPred(); 869 870 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 871 if (mask[lane]) { 872 CType dest_val; 873 CType src_val; 874 875 src_val = this->src[0].template get<CType>(w, lane); 876 877 dest_val = (CType)(src_val); 878 879 this->dest.set(w, lane, dest_val); 880 } 881 } 882} 883''') 884 885header_code.dedent() 886header_code(''' 887} // namespace HsailISA 888''') 889 890# close off main decode switch 891decoder_code.dedent() 892decoder_code.dedent() 893decoder_code(''' 894 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode); 895 } // end switch(ib->opcode) 896 } // end decode() 897} // namespace HsailISA 898''') 899 900exec_code.dedent() 901exec_code(''' 902} // namespace HsailISA 903''') 904 905############### 906# 907# Output accumulated code to files 908# 909############### 910header_code.write(sys.argv[1]) 911decoder_code.write(sys.argv[2]) 912exec_code.write(sys.argv[3]) 913