Cross Reference: /gem5/src/arch/hsail/gen.py

Deleted Added

sdiff udiff text old ( 13450:32a36390a49e ) new ( 13754:1345b049ebba )

full compact

gen.py (13450:32a36390a49e)	gen.py (13754:1345b049ebba)
1#! /usr/bin/python 2 3#	1#!/usr/bin/env python2.7
4# Copyright (c) 2015 Advanced Micro Devices, Inc. 5# All rights reserved. 6# 7# For use for simulation and test purposes only 8# 9# Redistribution and use in source and binary forms, with or without 10# modification, are permitted provided that the following conditions are met: 11# 12# 1. Redistributions of source code must retain the above copyright notice, 13# this list of conditions and the following disclaimer. 14# 15# 2. Redistributions in binary form must reproduce the above copyright notice, 16# this list of conditions and the following disclaimer in the documentation 17# and/or other materials provided with the distribution. 18# 19# 3. Neither the name of the copyright holder nor the names of its contributors 20# may be used to endorse or promote products derived from this software 21# without specific prior written permission. 22# 23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33# POSSIBILITY OF SUCH DAMAGE. 34# 35# Author: Steve Reinhardt 36# 37 38from __future__ import print_function 39 40import sys, re 41 42from m5.util import code_formatter 43 44if len(sys.argv) != 4: 45 print("Error: need 3 args (file names)") 46 sys.exit(0) 47 48header_code = code_formatter() 49decoder_code = code_formatter() 50exec_code = code_formatter() 51 52############### 53# 54# Generate file prologs (includes etc.) 55# 56############### 57 58header_code(''' 59#include "arch/hsail/insts/decl.hh" 60#include "base/bitfield.hh" 61#include "gpu-compute/hsail_code.hh" 62#include "gpu-compute/wavefront.hh" 63 64namespace HsailISA 65{ 66''') 67header_code.indent() 68 69decoder_code(''' 70#include "arch/hsail/gpu_decoder.hh" 71#include "arch/hsail/insts/branch.hh" 72#include "arch/hsail/insts/decl.hh" 73#include "arch/hsail/insts/gen_decl.hh" 74#include "arch/hsail/insts/mem.hh" 75#include "arch/hsail/insts/mem_impl.hh" 76#include "gpu-compute/brig_object.hh" 77 78namespace HsailISA 79{ 80 std::vector<GPUStaticInst> Decoder::decodedInsts; 81 82 GPUStaticInst 83 Decoder::decode(MachInst machInst) 84 { 85 using namespace Brig; 86 87 const BrigInstBase ib = machInst.brigInstBase; 88 const BrigObject obj = machInst.brigObj; 89 90 switch(ib->opcode) { 91''') 92decoder_code.indent() 93decoder_code.indent() 94 95exec_code(''' 96#include "arch/hsail/insts/gen_decl.hh" 97#include "base/intmath.hh" 98 99namespace HsailISA 100{ 101''') 102exec_code.indent() 103 104############### 105# 106# Define code templates for class declarations (for header file) 107# 108############### 109 110# Basic header template for an instruction stub. 111header_template_stub = ''' 112class $class_name : public $base_class 113{ 114 public: 115 typedef $base_class Base; 116 117 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 118 : Base(ib, obj, "$opcode") 119 { 120 } 121 122 void execute(GPUDynInstPtr gpuDynInst); 123}; 124 125''' 126 127# Basic header template for an instruction with no template parameters. 128header_template_nodt = ''' 129class $class_name : public $base_class 130{ 131 public: 132 typedef $base_class Base; 133 134 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 135 : Base(ib, obj, "$opcode") 136 { 137 } 138 139 void execute(GPUDynInstPtr gpuDynInst); 140}; 141 142''' 143 144# Basic header template for an instruction with a single DataType 145# template parameter. 146header_template_1dt = ''' 147template<typename DataType> 148class $class_name : public $base_class<DataType> 149{ 150 public: 151 typedef $base_class<DataType> Base; 152 typedef typename DataType::CType CType; 153 154 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 155 : Base(ib, obj, "$opcode") 156 { 157 } 158 159 void execute(GPUDynInstPtr gpuDynInst); 160}; 161 162''' 163 164header_template_1dt_noexec = ''' 165template<typename DataType> 166class $class_name : public $base_class<DataType> 167{ 168 public: 169 typedef $base_class<DataType> Base; 170 typedef typename DataType::CType CType; 171 172 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 173 : Base(ib, obj, "$opcode") 174 { 175 } 176}; 177 178''' 179 180# Same as header_template_1dt, except the base class has a second 181# template parameter NumSrcOperands to allow a variable number of 182# source operands. Note that since this is implemented with an array, 183# it only works for instructions where all sources are of the same 184# type (like most arithmetics). 185header_template_1dt_varsrcs = ''' 186template<typename DataType> 187class $class_name : public $base_class<DataType, $num_srcs> 188{ 189 public: 190 typedef $base_class<DataType, $num_srcs> Base; 191 typedef typename DataType::CType CType; 192 193 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 194 : Base(ib, obj, "$opcode") 195 { 196 } 197 198 void execute(GPUDynInstPtr gpuDynInst); 199}; 200 201''' 202 203# Header template for instruction with two DataType template 204# parameters, one for the dest and one for the source. This is used 205# by compare and convert. 206header_template_2dt = ''' 207template<typename DestDataType, class SrcDataType> 208class $class_name : public $base_class<DestDataType, SrcDataType> 209{ 210 public: 211 typedef $base_class<DestDataType, SrcDataType> Base; 212 typedef typename DestDataType::CType DestCType; 213 typedef typename SrcDataType::CType SrcCType; 214 215 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 216 : Base(ib, obj, "$opcode") 217 { 218 } 219 220 void execute(GPUDynInstPtr gpuDynInst); 221}; 222 223''' 224 225header_templates = { 226 'ArithInst': header_template_1dt_varsrcs, 227 'CmovInst': header_template_1dt, 228 'ClassInst': header_template_1dt, 229 'ShiftInst': header_template_1dt, 230 'ExtractInsertInst': header_template_1dt, 231 'CmpInst': header_template_2dt, 232 'CvtInst': header_template_2dt, 233 'PopcountInst': header_template_2dt, 234 'LdInst': '', 235 'StInst': '', 236 'SpecialInstNoSrc': header_template_nodt, 237 'SpecialInst1Src': header_template_nodt, 238 'SpecialInstNoSrcNoDest': '', 239 'Stub': header_template_stub, 240} 241 242############### 243# 244# Define code templates for exec functions 245# 246############### 247 248# exec function body 249exec_template_stub = ''' 250void 251$class_name::execute(GPUDynInstPtr gpuDynInst) 252{ 253 fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble()); 254} 255 256''' 257exec_template_nodt_nosrc = ''' 258void 259$class_name::execute(GPUDynInstPtr gpuDynInst) 260{ 261 Wavefront w = gpuDynInst->wavefront(); 262* 263 typedef Base::DestCType DestCType; 264 265 const VectorMask &mask = w->getPred(); 266 267 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 268 if (mask[lane]) { 269 DestCType dest_val = $expr; 270 this->dest.set(w, lane, dest_val); 271 } 272 } 273} 274 275''' 276 277exec_template_nodt_1src = ''' 278void 279$class_name::execute(GPUDynInstPtr gpuDynInst) 280{ 281 Wavefront w = gpuDynInst->wavefront(); 282* 283 typedef Base::DestCType DestCType; 284 typedef Base::SrcCType SrcCType; 285 286 const VectorMask &mask = w->getPred(); 287 288 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 289 if (mask[lane]) { 290 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane); 291 DestCType dest_val = $expr; 292 293 this->dest.set(w, lane, dest_val); 294 } 295 } 296} 297 298''' 299 300exec_template_1dt_varsrcs = ''' 301template<typename DataType> 302void 303$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 304{ 305 Wavefront w = gpuDynInst->wavefront(); 306* 307 const VectorMask &mask = w->getPred(); 308 309 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 310 if (mask[lane]) { 311 CType dest_val; 312 if ($dest_is_src_flag) { 313 dest_val = this->dest.template get<CType>(w, lane); 314 } 315 316 CType src_val[$num_srcs]; 317 318 for (int i = 0; i < $num_srcs; ++i) { 319 src_val[i] = this->src[i].template get<CType>(w, lane); 320 } 321 322 dest_val = (CType)($expr); 323 324 this->dest.set(w, lane, dest_val); 325 } 326 } 327} 328 329''' 330 331exec_template_1dt_3srcs = ''' 332template<typename DataType> 333void 334$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 335{ 336 Wavefront w = gpuDynInst->wavefront(); 337* 338 typedef typename Base::Src0CType Src0T; 339 typedef typename Base::Src1CType Src1T; 340 typedef typename Base::Src2CType Src2T; 341 342 const VectorMask &mask = w->getPred(); 343 344 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 345 if (mask[lane]) { 346 CType dest_val; 347 348 if ($dest_is_src_flag) { 349 dest_val = this->dest.template get<CType>(w, lane); 350 } 351 352 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 353 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 354 Src2T src_val2 = this->src2.template get<Src2T>(w, lane); 355 356 dest_val = $expr; 357 358 this->dest.set(w, lane, dest_val); 359 } 360 } 361} 362 363''' 364 365exec_template_1dt_2src_1dest = ''' 366template<typename DataType> 367void 368$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 369{ 370 Wavefront w = gpuDynInst->wavefront(); 371* 372 typedef typename Base::DestCType DestT; 373 typedef CType Src0T; 374 typedef typename Base::Src1CType Src1T; 375 376 const VectorMask &mask = w->getPred(); 377 378 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 379 if (mask[lane]) { 380 DestT dest_val; 381 if ($dest_is_src_flag) { 382 dest_val = this->dest.template get<DestT>(w, lane); 383 } 384 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 385 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 386 387 dest_val = $expr; 388 389 this->dest.set(w, lane, dest_val); 390 } 391 } 392} 393 394''' 395 396exec_template_shift = ''' 397template<typename DataType> 398void 399$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 400{ 401 Wavefront w = gpuDynInst->wavefront(); 402* 403 const VectorMask &mask = w->getPred(); 404 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 405 if (mask[lane]) { 406 CType dest_val; 407 408 if ($dest_is_src_flag) { 409 dest_val = this->dest.template get<CType>(w, lane); 410 } 411 412 CType src_val0 = this->src0.template get<CType>(w, lane); 413 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane); 414 415 dest_val = $expr; 416 417 this->dest.set(w, lane, dest_val); 418 } 419 } 420} 421 422''' 423 424exec_template_2dt = ''' 425template<typename DestDataType, class SrcDataType> 426void 427$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst) 428{ 429 Wavefront w = gpuDynInst->wavefront(); 430* 431 const VectorMask &mask = w->getPred(); 432 433 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 434 if (mask[lane]) { 435 DestCType dest_val; 436 SrcCType src_val[$num_srcs]; 437 438 for (int i = 0; i < $num_srcs; ++i) { 439 src_val[i] = this->src[i].template get<SrcCType>(w, lane); 440 } 441 442 dest_val = $expr; 443 444 this->dest.set(w, lane, dest_val); 445 } 446 } 447} 448 449''' 450 451exec_templates = { 452 'ArithInst': exec_template_1dt_varsrcs, 453 'CmovInst': exec_template_1dt_3srcs, 454 'ExtractInsertInst': exec_template_1dt_3srcs, 455 'ClassInst': exec_template_1dt_2src_1dest, 456 'CmpInst': exec_template_2dt, 457 'CvtInst': exec_template_2dt, 458 'PopcountInst': exec_template_2dt, 459 'LdInst': '', 460 'StInst': '', 461 'SpecialInstNoSrc': exec_template_nodt_nosrc, 462 'SpecialInst1Src': exec_template_nodt_1src, 463 'SpecialInstNoSrcNoDest': '', 464 'Stub': exec_template_stub, 465} 466 467############### 468# 469# Define code templates for the decoder cases 470# 471############### 472 473# decode template for nodt-opcode case 474decode_nodt_template = ''' 475 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);''' 476 477decode_case_prolog_class_inst = ''' 478 case BRIG_OPCODE_$brig_opcode_upper: 479 { 480 //const BrigOperandBase baseOp = obj->getOperand(ib->operands[1]); 481* BrigType16_t type = ((BrigInstSourceType)ib)->sourceType; 482* //switch (baseOp->kind) { 483 // case BRIG_OPERAND_REG: 484 // type = ((const BrigOperandReg)baseOp)->type; 485* // break; 486 // case BRIG_OPERAND_IMMED: 487 // type = ((const BrigOperandImmed)baseOp)->type; 488* // break; 489 // default: 490 // fatal("CLASS unrecognized kind of operand %d\\n", 491 // baseOp->kind); 492 //} 493 switch (type) {''' 494 495# common prolog for 1dt- or 2dt-opcode case: switch on data type 496decode_case_prolog = ''' 497 case BRIG_OPCODE_$brig_opcode_upper: 498 { 499 switch (ib->type) {''' 500 501# single-level decode case entry (for 1dt opcodes) 502decode_case_entry = \ 503' case BRIG_TYPE_$type_name: return $constructor(ib, obj);' 504 505decode_store_prolog = \ 506' case BRIG_TYPE_$type_name: {' 507 508decode_store_case_epilog = ''' 509 }''' 510 511decode_store_case_entry = \ 512' return $constructor(ib, obj);' 513 514# common epilog for type switch 515decode_case_epilog = ''' 516 default: fatal("$brig_opcode_upper: unrecognized type %d\\n", 517 ib->type); 518 } 519 } 520 break;''' 521 522# Additional templates for nested decode on a second type field (for 523# compare and convert). These are used in place of the 524# decode_case_entry template to create a second-level switch on on the 525# second type field inside each case of the first-level type switch. 526# Because the name and location of the second type can vary, the Brig 527# instruction type must be provided in $brig_type, and the name of the 528# second type field must be provided in $type_field. 529decode_case2_prolog = ''' 530 case BRIG_TYPE_$type_name: 531 switch (((Brig$brig_type)ib)->$type2_field) {''' 532* 533decode_case2_entry = \ 534' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);' 535 536decode_case2_epilog = ''' 537 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n", 538 ((Brig$brig_type)ib)->$type2_field); 539* } 540 break;''' 541 542# Figure out how many source operands an expr needs by looking for the 543# highest-numbered srcN value referenced. Since sources are numbered 544# starting at 0, the return value is N+1. 545def num_src_operands(expr): 546 if expr.find('src2') != -1: 547 return 3 548 elif expr.find('src1') != -1: 549 return 2 550 elif expr.find('src0') != -1: 551 return 1 552 else: 553 return 0 554 555############### 556# 557# Define final code generation methods 558# 559# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for 560# generating actual instructions. 561# 562############### 563 564# Generate class declaration, exec function, and decode switch case 565# for an brig_opcode with a single-level type switch. The 'types' 566# parameter is a list or tuple of types for which the instruction 567# should be instantiated. 568def gen(brig_opcode, types=None, expr=None, base_class='ArithInst', 569 type2_info=None, constructor_prefix='new ', is_store=False): 570 brig_opcode_upper = brig_opcode.upper() 571 class_name = brig_opcode 572 opcode = class_name.lower() 573 574 if base_class == 'ArithInst': 575 # note that expr must be provided with ArithInst so we can 576 # derive num_srcs for the template 577 assert expr 578 579 if expr: 580 # Derive several bits of info from expr. If expr is not used, 581 # this info will be irrelevant. 582 num_srcs = num_src_operands(expr) 583 # if the RHS expression includes 'dest', then we're doing an RMW 584 # on the reg and we need to treat it like a source 585 dest_is_src = expr.find('dest') != -1 586 dest_is_src_flag = str(dest_is_src).lower() # for C++ 587 if base_class in ['ShiftInst']: 588 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 589 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']: 590 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr) 591 else: 592 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 593 expr = re.sub(r'\bdest\b', r'dest_val', expr) 594 595 # Strip template arguments off of base class before looking up 596 # appropriate templates 597 base_class_base = re.sub(r'<.>$', '', base_class) 598* header_code(header_templates[base_class_base]) 599 600 if base_class.startswith('SpecialInst') or base_class.startswith('Stub'): 601 exec_code(exec_templates[base_class_base]) 602 elif base_class.startswith('ShiftInst'): 603 header_code(exec_template_shift) 604 else: 605 header_code(exec_templates[base_class_base]) 606 607 if not types or isinstance(types, str): 608 # Just a single type 609 constructor = constructor_prefix + class_name 610 decoder_code(decode_nodt_template) 611 else: 612 # multiple types, need at least one level of decode 613 if brig_opcode == 'Class': 614 decoder_code(decode_case_prolog_class_inst) 615 else: 616 decoder_code(decode_case_prolog) 617 if not type2_info: 618 if not is_store: 619 # single list of types, to basic one-level decode 620 for type_name in types: 621 full_class_name = '%s<%s>' % (class_name, type_name.upper()) 622 constructor = constructor_prefix + full_class_name 623 decoder_code(decode_case_entry) 624 else: 625 # single list of types, to basic one-level decode 626 for type_name in types: 627 decoder_code(decode_store_prolog) 628 type_size = int(re.findall(r'[0-9]+', type_name)[0]) 629 src_size = 32 630 type_type = type_name[0] 631 full_class_name = '%s<%s,%s>' % (class_name, \ 632 type_name.upper(), \ 633 '%s%d' % \ 634 (type_type.upper(), \ 635 type_size)) 636 constructor = constructor_prefix + full_class_name 637 decoder_code(decode_store_case_entry) 638 decoder_code(decode_store_case_epilog) 639 else: 640 # need secondary type switch (convert, compare) 641 # unpack extra info on second switch 642 (type2_field, types2) = type2_info 643 brig_type = 'Inst%s' % brig_opcode 644 for type_name in types: 645 decoder_code(decode_case2_prolog) 646 fmt = '%s<%s,%%s>' % (class_name, type_name.upper()) 647 for type2_name in types2: 648 full_class_name = fmt % type2_name.upper() 649 constructor = constructor_prefix + full_class_name 650 decoder_code(decode_case2_entry) 651 652 decoder_code(decode_case2_epilog) 653 654 decoder_code(decode_case_epilog) 655 656############### 657# 658# Generate instructions 659# 660############### 661 662# handy abbreviations for common sets of types 663 664# arithmetic ops are typically defined only on 32- and 64-bit sizes 665arith_int_types = ('S32', 'U32', 'S64', 'U64') 666arith_float_types = ('F32', 'F64') 667arith_types = arith_int_types + arith_float_types 668 669bit_types = ('B1', 'B32', 'B64') 670 671all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types 672 673# I think you might be able to do 'f16' memory ops too, but we'll 674# ignore them for now. 675mem_types = all_int_types + arith_float_types 676mem_atom_types = all_int_types + ('B32', 'B64') 677 678##### Arithmetic & logical operations 679gen('Add', arith_types, 'src0 + src1') 680gen('Sub', arith_types, 'src0 - src1') 681gen('Mul', arith_types, 'src0 * src1') 682gen('Div', arith_types, 'src0 / src1') 683gen('Min', arith_types, 'std::min(src0, src1)') 684gen('Max', arith_types, 'std::max(src0, src1)') 685gen('Gcnmin', arith_types, 'std::min(src0, src1)') 686 687gen('CopySign', arith_float_types, 688 'src1 < 0 ? -std::abs(src0) : std::abs(src0)') 689gen('Sqrt', arith_float_types, 'sqrt(src0)') 690gen('Floor', arith_float_types, 'floor(src0)') 691 692# "fast" sqrt... same as slow for us 693gen('Nsqrt', arith_float_types, 'sqrt(src0)') 694gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)') 695gen('Nrcp', arith_float_types, '1.0/src0') 696gen('Fract', arith_float_types, 697 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)') 698 699gen('Ncos', arith_float_types, 'cos(src0)'); 700gen('Nsin', arith_float_types, 'sin(src0)'); 701 702gen('And', bit_types, 'src0 & src1') 703gen('Or', bit_types, 'src0 \| src1') 704gen('Xor', bit_types, 'src0 ^ src1') 705 706gen('Bitselect', bit_types, '(src1 & src0) \| (src2 & ~(uint64_t)src0)') 707gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \ 708 ('sourceType', ('B32', 'B64'))) 709 710gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst') 711gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst') 712 713# gen('Mul_hi', types=('s32','u32', '??')) 714# gen('Mul24', types=('s32','u32', '??')) 715gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)') 716 717gen('Abs', arith_types, 'std::abs(src0)') 718gen('Neg', arith_types, '-src0') 719 720gen('Mov', bit_types + arith_types, 'src0') 721gen('Not', bit_types, 'heynot(src0)') 722 723# mad and fma differ only in rounding behavior, which we don't emulate 724# also there's an integer form of mad, but not of fma 725gen('Mad', arith_types, 'src0 * src1 + src2') 726gen('Fma', arith_float_types, 'src0 * src1 + src2') 727 728#native floating point operations 729gen('Nfma', arith_float_types, 'src0 * src1 + src2') 730 731gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst') 732gen('BitAlign', bit_types, '(src0 << src2)\|(src1 >> (32 - src2))') 733gen('ByteAlign', bit_types, '(src0 << 8 * src2)\|(src1 >> (32 - 8 * src2))') 734 735# see base/bitfield.hh 736gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)', 737 'ExtractInsertInst') 738 739gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)', 740 'ExtractInsertInst') 741 742##### Compare 743gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)', 744 'CmpInst', ('sourceType', arith_types + bit_types)) 745gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst') 746 747##### Conversion 748 749# Conversion operations are only defined on B1, not B32 or B64 750cvt_types = ('B1',) + mem_types 751 752gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types)) 753 754 755##### Load & Store 756gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode') 757gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode') 758gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode', 759 is_store=True) 760gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode') 761gen('AtomicNoRet', mem_atom_types, base_class='StInst', 762 constructor_prefix='decode') 763 764gen('Cbr', base_class = 'LdInst', constructor_prefix='decode') 765gen('Br', base_class = 'LdInst', constructor_prefix='decode') 766 767##### Special operations 768def gen_special(brig_opcode, expr, dest_type='U32'): 769 num_srcs = num_src_operands(expr) 770 if num_srcs == 0: 771 base_class = 'SpecialInstNoSrc<%s>' % dest_type 772 elif num_srcs == 1: 773 base_class = 'SpecialInst1Src<%s>' % dest_type 774 else: 775 assert false 776 777 gen(brig_opcode, None, expr, base_class) 778 779gen_special('WorkItemId', 'w->workItemId[src0][lane]') 780gen_special('WorkItemAbsId', 781 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])') 782gen_special('WorkGroupId', 'w->workGroupId[src0]') 783gen_special('WorkGroupSize', 'w->workGroupSz[src0]') 784gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]') 785gen_special('GridSize', 'w->gridSz[src0]') 786gen_special('GridGroups', 787 'divCeil(w->gridSz[src0],w->workGroupSz[src0])') 788gen_special('LaneId', 'lane') 789gen_special('WaveId', 'w->wfId') 790gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') 791 792# gen_special('CU'', ') 793 794gen('Ret', base_class='SpecialInstNoSrcNoDest') 795gen('Barrier', base_class='SpecialInstNoSrcNoDest') 796gen('MemFence', base_class='SpecialInstNoSrcNoDest') 797 798# Map magic instructions to the BrigSyscall opcode 799# Magic instructions are defined in magic.hh 800# 801# In the future, real HSA kernel system calls can be implemented and coexist 802# with magic instructions. 803gen('Call', base_class='SpecialInstNoSrcNoDest') 804 805# Stubs for unimplemented instructions: 806# These may need to be implemented at some point in the future, but 807# for now we just match the instructions with their operands. 808# 809# By defining stubs for these instructions, we can work with 810# applications that have them in dead/unused code paths. 811# 812# Needed for rocm-hcc compilations for HSA backends since 813# builtins-hsail library is `cat`d onto the generated kernels. 814# The builtins-hsail library consists of handcoded hsail functions 815# that __might__ be needed by the rocm-hcc compiler in certain binaries. 816gen('Bitmask', base_class='Stub') 817gen('Bitrev', base_class='Stub') 818gen('Firstbit', base_class='Stub') 819gen('Lastbit', base_class='Stub') 820gen('Unpacklo', base_class='Stub') 821gen('Unpackhi', base_class='Stub') 822gen('Pack', base_class='Stub') 823gen('Unpack', base_class='Stub') 824gen('Lerp', base_class='Stub') 825gen('Packcvt', base_class='Stub') 826gen('Unpackcvt', base_class='Stub') 827gen('Sad', base_class='Stub') 828gen('Sadhi', base_class='Stub') 829gen('Activelanecount', base_class='Stub') 830gen('Activelaneid', base_class='Stub') 831gen('Activelanemask', base_class='Stub') 832gen('Activelanepermute', base_class='Stub') 833gen('Groupbaseptr', base_class='Stub') 834gen('Signalnoret', base_class='Stub') 835 836############### 837# 838# Generate file epilogs 839# 840############### 841header_code(''' 842template<> 843inline void 844Abs<U32>::execute(GPUDynInstPtr gpuDynInst) 845{ 846 Wavefront w = gpuDynInst->wavefront(); 847* 848 const VectorMask &mask = w->getPred(); 849 850 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 851 if (mask[lane]) { 852 CType dest_val; 853 CType src_val; 854 855 src_val = this->src[0].template get<CType>(w, lane); 856 857 dest_val = (CType)(src_val); 858 859 this->dest.set(w, lane, dest_val); 860 } 861 } 862} 863 864template<> 865inline void 866Abs<U64>::execute(GPUDynInstPtr gpuDynInst) 867{ 868 Wavefront w = gpuDynInst->wavefront(); 869* 870 const VectorMask &mask = w->getPred(); 871 872 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 873 if (mask[lane]) { 874 CType dest_val; 875 CType src_val; 876 877 src_val = this->src[0].template get<CType>(w, lane); 878 879 dest_val = (CType)(src_val); 880 881 this->dest.set(w, lane, dest_val); 882 } 883 } 884} 885''') 886 887header_code.dedent() 888header_code(''' 889} // namespace HsailISA 890''') 891 892# close off main decode switch 893decoder_code.dedent() 894decoder_code.dedent() 895decoder_code(''' 896 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode); 897 } // end switch(ib->opcode) 898 } // end decode() 899} // namespace HsailISA 900''') 901 902exec_code.dedent() 903exec_code(''' 904} // namespace HsailISA 905''') 906 907############### 908# 909# Output accumulated code to files 910# 911############### 912header_code.write(sys.argv[1]) 913decoder_code.write(sys.argv[2]) 914exec_code.write(sys.argv[3])	2# Copyright (c) 2015 Advanced Micro Devices, Inc. 3# All rights reserved. 4# 5# For use for simulation and test purposes only 6# 7# Redistribution and use in source and binary forms, with or without 8# modification, are permitted provided that the following conditions are met: 9# 10# 1. Redistributions of source code must retain the above copyright notice, 11# this list of conditions and the following disclaimer. 12# 13# 2. Redistributions in binary form must reproduce the above copyright notice, 14# this list of conditions and the following disclaimer in the documentation 15# and/or other materials provided with the distribution. 16# 17# 3. Neither the name of the copyright holder nor the names of its contributors 18# may be used to endorse or promote products derived from this software 19# without specific prior written permission. 20# 21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31# POSSIBILITY OF SUCH DAMAGE. 32# 33# Author: Steve Reinhardt 34# 35 36from __future__ import print_function 37 38import sys, re 39 40from m5.util import code_formatter 41 42if len(sys.argv) != 4: 43 print("Error: need 3 args (file names)") 44 sys.exit(0) 45 46header_code = code_formatter() 47decoder_code = code_formatter() 48exec_code = code_formatter() 49 50############### 51# 52# Generate file prologs (includes etc.) 53# 54############### 55 56header_code(''' 57#include "arch/hsail/insts/decl.hh" 58#include "base/bitfield.hh" 59#include "gpu-compute/hsail_code.hh" 60#include "gpu-compute/wavefront.hh" 61 62namespace HsailISA 63{ 64''') 65header_code.indent() 66 67decoder_code(''' 68#include "arch/hsail/gpu_decoder.hh" 69#include "arch/hsail/insts/branch.hh" 70#include "arch/hsail/insts/decl.hh" 71#include "arch/hsail/insts/gen_decl.hh" 72#include "arch/hsail/insts/mem.hh" 73#include "arch/hsail/insts/mem_impl.hh" 74#include "gpu-compute/brig_object.hh" 75 76namespace HsailISA 77{ 78 std::vector<GPUStaticInst> Decoder::decodedInsts; 79 80 GPUStaticInst 81 Decoder::decode(MachInst machInst) 82 { 83 using namespace Brig; 84 85 const BrigInstBase ib = machInst.brigInstBase; 86 const BrigObject obj = machInst.brigObj; 87 88 switch(ib->opcode) { 89''') 90decoder_code.indent() 91decoder_code.indent() 92 93exec_code(''' 94#include "arch/hsail/insts/gen_decl.hh" 95#include "base/intmath.hh" 96 97namespace HsailISA 98{ 99''') 100exec_code.indent() 101 102############### 103# 104# Define code templates for class declarations (for header file) 105# 106############### 107 108# Basic header template for an instruction stub. 109header_template_stub = ''' 110class $class_name : public $base_class 111{ 112 public: 113 typedef $base_class Base; 114 115 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 116 : Base(ib, obj, "$opcode") 117 { 118 } 119 120 void execute(GPUDynInstPtr gpuDynInst); 121}; 122 123''' 124 125# Basic header template for an instruction with no template parameters. 126header_template_nodt = ''' 127class $class_name : public $base_class 128{ 129 public: 130 typedef $base_class Base; 131 132 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 133 : Base(ib, obj, "$opcode") 134 { 135 } 136 137 void execute(GPUDynInstPtr gpuDynInst); 138}; 139 140''' 141 142# Basic header template for an instruction with a single DataType 143# template parameter. 144header_template_1dt = ''' 145template<typename DataType> 146class $class_name : public $base_class<DataType> 147{ 148 public: 149 typedef $base_class<DataType> Base; 150 typedef typename DataType::CType CType; 151 152 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 153 : Base(ib, obj, "$opcode") 154 { 155 } 156 157 void execute(GPUDynInstPtr gpuDynInst); 158}; 159 160''' 161 162header_template_1dt_noexec = ''' 163template<typename DataType> 164class $class_name : public $base_class<DataType> 165{ 166 public: 167 typedef $base_class<DataType> Base; 168 typedef typename DataType::CType CType; 169 170 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 171 : Base(ib, obj, "$opcode") 172 { 173 } 174}; 175 176''' 177 178# Same as header_template_1dt, except the base class has a second 179# template parameter NumSrcOperands to allow a variable number of 180# source operands. Note that since this is implemented with an array, 181# it only works for instructions where all sources are of the same 182# type (like most arithmetics). 183header_template_1dt_varsrcs = ''' 184template<typename DataType> 185class $class_name : public $base_class<DataType, $num_srcs> 186{ 187 public: 188 typedef $base_class<DataType, $num_srcs> Base; 189 typedef typename DataType::CType CType; 190 191 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 192 : Base(ib, obj, "$opcode") 193 { 194 } 195 196 void execute(GPUDynInstPtr gpuDynInst); 197}; 198 199''' 200 201# Header template for instruction with two DataType template 202# parameters, one for the dest and one for the source. This is used 203# by compare and convert. 204header_template_2dt = ''' 205template<typename DestDataType, class SrcDataType> 206class $class_name : public $base_class<DestDataType, SrcDataType> 207{ 208 public: 209 typedef $base_class<DestDataType, SrcDataType> Base; 210 typedef typename DestDataType::CType DestCType; 211 typedef typename SrcDataType::CType SrcCType; 212 213 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 214 : Base(ib, obj, "$opcode") 215 { 216 } 217 218 void execute(GPUDynInstPtr gpuDynInst); 219}; 220 221''' 222 223header_templates = { 224 'ArithInst': header_template_1dt_varsrcs, 225 'CmovInst': header_template_1dt, 226 'ClassInst': header_template_1dt, 227 'ShiftInst': header_template_1dt, 228 'ExtractInsertInst': header_template_1dt, 229 'CmpInst': header_template_2dt, 230 'CvtInst': header_template_2dt, 231 'PopcountInst': header_template_2dt, 232 'LdInst': '', 233 'StInst': '', 234 'SpecialInstNoSrc': header_template_nodt, 235 'SpecialInst1Src': header_template_nodt, 236 'SpecialInstNoSrcNoDest': '', 237 'Stub': header_template_stub, 238} 239 240############### 241# 242# Define code templates for exec functions 243# 244############### 245 246# exec function body 247exec_template_stub = ''' 248void 249$class_name::execute(GPUDynInstPtr gpuDynInst) 250{ 251 fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble()); 252} 253 254''' 255exec_template_nodt_nosrc = ''' 256void 257$class_name::execute(GPUDynInstPtr gpuDynInst) 258{ 259 Wavefront w = gpuDynInst->wavefront(); 260* 261 typedef Base::DestCType DestCType; 262 263 const VectorMask &mask = w->getPred(); 264 265 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 266 if (mask[lane]) { 267 DestCType dest_val = $expr; 268 this->dest.set(w, lane, dest_val); 269 } 270 } 271} 272 273''' 274 275exec_template_nodt_1src = ''' 276void 277$class_name::execute(GPUDynInstPtr gpuDynInst) 278{ 279 Wavefront w = gpuDynInst->wavefront(); 280* 281 typedef Base::DestCType DestCType; 282 typedef Base::SrcCType SrcCType; 283 284 const VectorMask &mask = w->getPred(); 285 286 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 287 if (mask[lane]) { 288 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane); 289 DestCType dest_val = $expr; 290 291 this->dest.set(w, lane, dest_val); 292 } 293 } 294} 295 296''' 297 298exec_template_1dt_varsrcs = ''' 299template<typename DataType> 300void 301$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 302{ 303 Wavefront w = gpuDynInst->wavefront(); 304* 305 const VectorMask &mask = w->getPred(); 306 307 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 308 if (mask[lane]) { 309 CType dest_val; 310 if ($dest_is_src_flag) { 311 dest_val = this->dest.template get<CType>(w, lane); 312 } 313 314 CType src_val[$num_srcs]; 315 316 for (int i = 0; i < $num_srcs; ++i) { 317 src_val[i] = this->src[i].template get<CType>(w, lane); 318 } 319 320 dest_val = (CType)($expr); 321 322 this->dest.set(w, lane, dest_val); 323 } 324 } 325} 326 327''' 328 329exec_template_1dt_3srcs = ''' 330template<typename DataType> 331void 332$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 333{ 334 Wavefront w = gpuDynInst->wavefront(); 335* 336 typedef typename Base::Src0CType Src0T; 337 typedef typename Base::Src1CType Src1T; 338 typedef typename Base::Src2CType Src2T; 339 340 const VectorMask &mask = w->getPred(); 341 342 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 343 if (mask[lane]) { 344 CType dest_val; 345 346 if ($dest_is_src_flag) { 347 dest_val = this->dest.template get<CType>(w, lane); 348 } 349 350 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 351 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 352 Src2T src_val2 = this->src2.template get<Src2T>(w, lane); 353 354 dest_val = $expr; 355 356 this->dest.set(w, lane, dest_val); 357 } 358 } 359} 360 361''' 362 363exec_template_1dt_2src_1dest = ''' 364template<typename DataType> 365void 366$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 367{ 368 Wavefront w = gpuDynInst->wavefront(); 369* 370 typedef typename Base::DestCType DestT; 371 typedef CType Src0T; 372 typedef typename Base::Src1CType Src1T; 373 374 const VectorMask &mask = w->getPred(); 375 376 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 377 if (mask[lane]) { 378 DestT dest_val; 379 if ($dest_is_src_flag) { 380 dest_val = this->dest.template get<DestT>(w, lane); 381 } 382 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 383 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 384 385 dest_val = $expr; 386 387 this->dest.set(w, lane, dest_val); 388 } 389 } 390} 391 392''' 393 394exec_template_shift = ''' 395template<typename DataType> 396void 397$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 398{ 399 Wavefront w = gpuDynInst->wavefront(); 400* 401 const VectorMask &mask = w->getPred(); 402 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 403 if (mask[lane]) { 404 CType dest_val; 405 406 if ($dest_is_src_flag) { 407 dest_val = this->dest.template get<CType>(w, lane); 408 } 409 410 CType src_val0 = this->src0.template get<CType>(w, lane); 411 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane); 412 413 dest_val = $expr; 414 415 this->dest.set(w, lane, dest_val); 416 } 417 } 418} 419 420''' 421 422exec_template_2dt = ''' 423template<typename DestDataType, class SrcDataType> 424void 425$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst) 426{ 427 Wavefront w = gpuDynInst->wavefront(); 428* 429 const VectorMask &mask = w->getPred(); 430 431 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 432 if (mask[lane]) { 433 DestCType dest_val; 434 SrcCType src_val[$num_srcs]; 435 436 for (int i = 0; i < $num_srcs; ++i) { 437 src_val[i] = this->src[i].template get<SrcCType>(w, lane); 438 } 439 440 dest_val = $expr; 441 442 this->dest.set(w, lane, dest_val); 443 } 444 } 445} 446 447''' 448 449exec_templates = { 450 'ArithInst': exec_template_1dt_varsrcs, 451 'CmovInst': exec_template_1dt_3srcs, 452 'ExtractInsertInst': exec_template_1dt_3srcs, 453 'ClassInst': exec_template_1dt_2src_1dest, 454 'CmpInst': exec_template_2dt, 455 'CvtInst': exec_template_2dt, 456 'PopcountInst': exec_template_2dt, 457 'LdInst': '', 458 'StInst': '', 459 'SpecialInstNoSrc': exec_template_nodt_nosrc, 460 'SpecialInst1Src': exec_template_nodt_1src, 461 'SpecialInstNoSrcNoDest': '', 462 'Stub': exec_template_stub, 463} 464 465############### 466# 467# Define code templates for the decoder cases 468# 469############### 470 471# decode template for nodt-opcode case 472decode_nodt_template = ''' 473 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);''' 474 475decode_case_prolog_class_inst = ''' 476 case BRIG_OPCODE_$brig_opcode_upper: 477 { 478 //const BrigOperandBase baseOp = obj->getOperand(ib->operands[1]); 479* BrigType16_t type = ((BrigInstSourceType)ib)->sourceType; 480* //switch (baseOp->kind) { 481 // case BRIG_OPERAND_REG: 482 // type = ((const BrigOperandReg)baseOp)->type; 483* // break; 484 // case BRIG_OPERAND_IMMED: 485 // type = ((const BrigOperandImmed)baseOp)->type; 486* // break; 487 // default: 488 // fatal("CLASS unrecognized kind of operand %d\\n", 489 // baseOp->kind); 490 //} 491 switch (type) {''' 492 493# common prolog for 1dt- or 2dt-opcode case: switch on data type 494decode_case_prolog = ''' 495 case BRIG_OPCODE_$brig_opcode_upper: 496 { 497 switch (ib->type) {''' 498 499# single-level decode case entry (for 1dt opcodes) 500decode_case_entry = \ 501' case BRIG_TYPE_$type_name: return $constructor(ib, obj);' 502 503decode_store_prolog = \ 504' case BRIG_TYPE_$type_name: {' 505 506decode_store_case_epilog = ''' 507 }''' 508 509decode_store_case_entry = \ 510' return $constructor(ib, obj);' 511 512# common epilog for type switch 513decode_case_epilog = ''' 514 default: fatal("$brig_opcode_upper: unrecognized type %d\\n", 515 ib->type); 516 } 517 } 518 break;''' 519 520# Additional templates for nested decode on a second type field (for 521# compare and convert). These are used in place of the 522# decode_case_entry template to create a second-level switch on on the 523# second type field inside each case of the first-level type switch. 524# Because the name and location of the second type can vary, the Brig 525# instruction type must be provided in $brig_type, and the name of the 526# second type field must be provided in $type_field. 527decode_case2_prolog = ''' 528 case BRIG_TYPE_$type_name: 529 switch (((Brig$brig_type)ib)->$type2_field) {''' 530* 531decode_case2_entry = \ 532' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);' 533 534decode_case2_epilog = ''' 535 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n", 536 ((Brig$brig_type)ib)->$type2_field); 537* } 538 break;''' 539 540# Figure out how many source operands an expr needs by looking for the 541# highest-numbered srcN value referenced. Since sources are numbered 542# starting at 0, the return value is N+1. 543def num_src_operands(expr): 544 if expr.find('src2') != -1: 545 return 3 546 elif expr.find('src1') != -1: 547 return 2 548 elif expr.find('src0') != -1: 549 return 1 550 else: 551 return 0 552 553############### 554# 555# Define final code generation methods 556# 557# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for 558# generating actual instructions. 559# 560############### 561 562# Generate class declaration, exec function, and decode switch case 563# for an brig_opcode with a single-level type switch. The 'types' 564# parameter is a list or tuple of types for which the instruction 565# should be instantiated. 566def gen(brig_opcode, types=None, expr=None, base_class='ArithInst', 567 type2_info=None, constructor_prefix='new ', is_store=False): 568 brig_opcode_upper = brig_opcode.upper() 569 class_name = brig_opcode 570 opcode = class_name.lower() 571 572 if base_class == 'ArithInst': 573 # note that expr must be provided with ArithInst so we can 574 # derive num_srcs for the template 575 assert expr 576 577 if expr: 578 # Derive several bits of info from expr. If expr is not used, 579 # this info will be irrelevant. 580 num_srcs = num_src_operands(expr) 581 # if the RHS expression includes 'dest', then we're doing an RMW 582 # on the reg and we need to treat it like a source 583 dest_is_src = expr.find('dest') != -1 584 dest_is_src_flag = str(dest_is_src).lower() # for C++ 585 if base_class in ['ShiftInst']: 586 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 587 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']: 588 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr) 589 else: 590 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 591 expr = re.sub(r'\bdest\b', r'dest_val', expr) 592 593 # Strip template arguments off of base class before looking up 594 # appropriate templates 595 base_class_base = re.sub(r'<.>$', '', base_class) 596* header_code(header_templates[base_class_base]) 597 598 if base_class.startswith('SpecialInst') or base_class.startswith('Stub'): 599 exec_code(exec_templates[base_class_base]) 600 elif base_class.startswith('ShiftInst'): 601 header_code(exec_template_shift) 602 else: 603 header_code(exec_templates[base_class_base]) 604 605 if not types or isinstance(types, str): 606 # Just a single type 607 constructor = constructor_prefix + class_name 608 decoder_code(decode_nodt_template) 609 else: 610 # multiple types, need at least one level of decode 611 if brig_opcode == 'Class': 612 decoder_code(decode_case_prolog_class_inst) 613 else: 614 decoder_code(decode_case_prolog) 615 if not type2_info: 616 if not is_store: 617 # single list of types, to basic one-level decode 618 for type_name in types: 619 full_class_name = '%s<%s>' % (class_name, type_name.upper()) 620 constructor = constructor_prefix + full_class_name 621 decoder_code(decode_case_entry) 622 else: 623 # single list of types, to basic one-level decode 624 for type_name in types: 625 decoder_code(decode_store_prolog) 626 type_size = int(re.findall(r'[0-9]+', type_name)[0]) 627 src_size = 32 628 type_type = type_name[0] 629 full_class_name = '%s<%s,%s>' % (class_name, \ 630 type_name.upper(), \ 631 '%s%d' % \ 632 (type_type.upper(), \ 633 type_size)) 634 constructor = constructor_prefix + full_class_name 635 decoder_code(decode_store_case_entry) 636 decoder_code(decode_store_case_epilog) 637 else: 638 # need secondary type switch (convert, compare) 639 # unpack extra info on second switch 640 (type2_field, types2) = type2_info 641 brig_type = 'Inst%s' % brig_opcode 642 for type_name in types: 643 decoder_code(decode_case2_prolog) 644 fmt = '%s<%s,%%s>' % (class_name, type_name.upper()) 645 for type2_name in types2: 646 full_class_name = fmt % type2_name.upper() 647 constructor = constructor_prefix + full_class_name 648 decoder_code(decode_case2_entry) 649 650 decoder_code(decode_case2_epilog) 651 652 decoder_code(decode_case_epilog) 653 654############### 655# 656# Generate instructions 657# 658############### 659 660# handy abbreviations for common sets of types 661 662# arithmetic ops are typically defined only on 32- and 64-bit sizes 663arith_int_types = ('S32', 'U32', 'S64', 'U64') 664arith_float_types = ('F32', 'F64') 665arith_types = arith_int_types + arith_float_types 666 667bit_types = ('B1', 'B32', 'B64') 668 669all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types 670 671# I think you might be able to do 'f16' memory ops too, but we'll 672# ignore them for now. 673mem_types = all_int_types + arith_float_types 674mem_atom_types = all_int_types + ('B32', 'B64') 675 676##### Arithmetic & logical operations 677gen('Add', arith_types, 'src0 + src1') 678gen('Sub', arith_types, 'src0 - src1') 679gen('Mul', arith_types, 'src0 * src1') 680gen('Div', arith_types, 'src0 / src1') 681gen('Min', arith_types, 'std::min(src0, src1)') 682gen('Max', arith_types, 'std::max(src0, src1)') 683gen('Gcnmin', arith_types, 'std::min(src0, src1)') 684 685gen('CopySign', arith_float_types, 686 'src1 < 0 ? -std::abs(src0) : std::abs(src0)') 687gen('Sqrt', arith_float_types, 'sqrt(src0)') 688gen('Floor', arith_float_types, 'floor(src0)') 689 690# "fast" sqrt... same as slow for us 691gen('Nsqrt', arith_float_types, 'sqrt(src0)') 692gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)') 693gen('Nrcp', arith_float_types, '1.0/src0') 694gen('Fract', arith_float_types, 695 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)') 696 697gen('Ncos', arith_float_types, 'cos(src0)'); 698gen('Nsin', arith_float_types, 'sin(src0)'); 699 700gen('And', bit_types, 'src0 & src1') 701gen('Or', bit_types, 'src0 \| src1') 702gen('Xor', bit_types, 'src0 ^ src1') 703 704gen('Bitselect', bit_types, '(src1 & src0) \| (src2 & ~(uint64_t)src0)') 705gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \ 706 ('sourceType', ('B32', 'B64'))) 707 708gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst') 709gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst') 710 711# gen('Mul_hi', types=('s32','u32', '??')) 712# gen('Mul24', types=('s32','u32', '??')) 713gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)') 714 715gen('Abs', arith_types, 'std::abs(src0)') 716gen('Neg', arith_types, '-src0') 717 718gen('Mov', bit_types + arith_types, 'src0') 719gen('Not', bit_types, 'heynot(src0)') 720 721# mad and fma differ only in rounding behavior, which we don't emulate 722# also there's an integer form of mad, but not of fma 723gen('Mad', arith_types, 'src0 * src1 + src2') 724gen('Fma', arith_float_types, 'src0 * src1 + src2') 725 726#native floating point operations 727gen('Nfma', arith_float_types, 'src0 * src1 + src2') 728 729gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst') 730gen('BitAlign', bit_types, '(src0 << src2)\|(src1 >> (32 - src2))') 731gen('ByteAlign', bit_types, '(src0 << 8 * src2)\|(src1 >> (32 - 8 * src2))') 732 733# see base/bitfield.hh 734gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)', 735 'ExtractInsertInst') 736 737gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)', 738 'ExtractInsertInst') 739 740##### Compare 741gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)', 742 'CmpInst', ('sourceType', arith_types + bit_types)) 743gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst') 744 745##### Conversion 746 747# Conversion operations are only defined on B1, not B32 or B64 748cvt_types = ('B1',) + mem_types 749 750gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types)) 751 752 753##### Load & Store 754gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode') 755gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode') 756gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode', 757 is_store=True) 758gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode') 759gen('AtomicNoRet', mem_atom_types, base_class='StInst', 760 constructor_prefix='decode') 761 762gen('Cbr', base_class = 'LdInst', constructor_prefix='decode') 763gen('Br', base_class = 'LdInst', constructor_prefix='decode') 764 765##### Special operations 766def gen_special(brig_opcode, expr, dest_type='U32'): 767 num_srcs = num_src_operands(expr) 768 if num_srcs == 0: 769 base_class = 'SpecialInstNoSrc<%s>' % dest_type 770 elif num_srcs == 1: 771 base_class = 'SpecialInst1Src<%s>' % dest_type 772 else: 773 assert false 774 775 gen(brig_opcode, None, expr, base_class) 776 777gen_special('WorkItemId', 'w->workItemId[src0][lane]') 778gen_special('WorkItemAbsId', 779 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])') 780gen_special('WorkGroupId', 'w->workGroupId[src0]') 781gen_special('WorkGroupSize', 'w->workGroupSz[src0]') 782gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]') 783gen_special('GridSize', 'w->gridSz[src0]') 784gen_special('GridGroups', 785 'divCeil(w->gridSz[src0],w->workGroupSz[src0])') 786gen_special('LaneId', 'lane') 787gen_special('WaveId', 'w->wfId') 788gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') 789 790# gen_special('CU'', ') 791 792gen('Ret', base_class='SpecialInstNoSrcNoDest') 793gen('Barrier', base_class='SpecialInstNoSrcNoDest') 794gen('MemFence', base_class='SpecialInstNoSrcNoDest') 795 796# Map magic instructions to the BrigSyscall opcode 797# Magic instructions are defined in magic.hh 798# 799# In the future, real HSA kernel system calls can be implemented and coexist 800# with magic instructions. 801gen('Call', base_class='SpecialInstNoSrcNoDest') 802 803# Stubs for unimplemented instructions: 804# These may need to be implemented at some point in the future, but 805# for now we just match the instructions with their operands. 806# 807# By defining stubs for these instructions, we can work with 808# applications that have them in dead/unused code paths. 809# 810# Needed for rocm-hcc compilations for HSA backends since 811# builtins-hsail library is `cat`d onto the generated kernels. 812# The builtins-hsail library consists of handcoded hsail functions 813# that __might__ be needed by the rocm-hcc compiler in certain binaries. 814gen('Bitmask', base_class='Stub') 815gen('Bitrev', base_class='Stub') 816gen('Firstbit', base_class='Stub') 817gen('Lastbit', base_class='Stub') 818gen('Unpacklo', base_class='Stub') 819gen('Unpackhi', base_class='Stub') 820gen('Pack', base_class='Stub') 821gen('Unpack', base_class='Stub') 822gen('Lerp', base_class='Stub') 823gen('Packcvt', base_class='Stub') 824gen('Unpackcvt', base_class='Stub') 825gen('Sad', base_class='Stub') 826gen('Sadhi', base_class='Stub') 827gen('Activelanecount', base_class='Stub') 828gen('Activelaneid', base_class='Stub') 829gen('Activelanemask', base_class='Stub') 830gen('Activelanepermute', base_class='Stub') 831gen('Groupbaseptr', base_class='Stub') 832gen('Signalnoret', base_class='Stub') 833 834############### 835# 836# Generate file epilogs 837# 838############### 839header_code(''' 840template<> 841inline void 842Abs<U32>::execute(GPUDynInstPtr gpuDynInst) 843{ 844 Wavefront w = gpuDynInst->wavefront(); 845* 846 const VectorMask &mask = w->getPred(); 847 848 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 849 if (mask[lane]) { 850 CType dest_val; 851 CType src_val; 852 853 src_val = this->src[0].template get<CType>(w, lane); 854 855 dest_val = (CType)(src_val); 856 857 this->dest.set(w, lane, dest_val); 858 } 859 } 860} 861 862template<> 863inline void 864Abs<U64>::execute(GPUDynInstPtr gpuDynInst) 865{ 866 Wavefront w = gpuDynInst->wavefront(); 867* 868 const VectorMask &mask = w->getPred(); 869 870 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 871 if (mask[lane]) { 872 CType dest_val; 873 CType src_val; 874 875 src_val = this->src[0].template get<CType>(w, lane); 876 877 dest_val = (CType)(src_val); 878 879 this->dest.set(w, lane, dest_val); 880 } 881 } 882} 883''') 884 885header_code.dedent() 886header_code(''' 887} // namespace HsailISA 888''') 889 890# close off main decode switch 891decoder_code.dedent() 892decoder_code.dedent() 893decoder_code(''' 894 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode); 895 } // end switch(ib->opcode) 896 } // end decode() 897} // namespace HsailISA 898''') 899 900exec_code.dedent() 901exec_code(''' 902} // namespace HsailISA 903''') 904 905############### 906# 907# Output accumulated code to files 908# 909############### 910header_code.write(sys.argv[1]) 911decoder_code.write(sys.argv[2]) 912exec_code.write(sys.argv[3])

gen.py (13450:32a36390a49e)

gen.py (13754:1345b049ebba)

1#! /usr/bin/python
2
3#

1#!/usr/bin/env python2.7

4# Copyright (c) 2015 Advanced Micro Devices, Inc.
5# All rights reserved.
6#
7# For use for simulation and test purposes only
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions are met:
11#
12# 1. Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14#
15# 2. Redistributions in binary form must reproduce the above copyright notice,
16# this list of conditions and the following disclaimer in the documentation
17# and/or other materials provided with the distribution.
18#
19# 3. Neither the name of the copyright holder nor the names of its contributors
20# may be used to endorse or promote products derived from this software
21# without specific prior written permission.
22#
23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33# POSSIBILITY OF SUCH DAMAGE.
34#
35# Author: Steve Reinhardt
36#
37
38from __future__ import print_function
39
40import sys, re
41
42from m5.util import code_formatter
43
44if len(sys.argv) != 4:
45 print("Error: need 3 args (file names)")
46 sys.exit(0)
47
48header_code = code_formatter()
49decoder_code = code_formatter()
50exec_code = code_formatter()
51
52###############
53#
54# Generate file prologs (includes etc.)
55#
56###############
57
58header_code('''
59#include "arch/hsail/insts/decl.hh"
60#include "base/bitfield.hh"
61#include "gpu-compute/hsail_code.hh"
62#include "gpu-compute/wavefront.hh"
63
64namespace HsailISA
65{
66''')
67header_code.indent()
68
69decoder_code('''
70#include "arch/hsail/gpu_decoder.hh"
71#include "arch/hsail/insts/branch.hh"
72#include "arch/hsail/insts/decl.hh"
73#include "arch/hsail/insts/gen_decl.hh"
74#include "arch/hsail/insts/mem.hh"
75#include "arch/hsail/insts/mem_impl.hh"
76#include "gpu-compute/brig_object.hh"
77
78namespace HsailISA
79{
80 std::vector<GPUStaticInst*> Decoder::decodedInsts;
81
82 GPUStaticInst*
83 Decoder::decode(MachInst machInst)
84 {
85 using namespace Brig;
86
87 const BrigInstBase *ib = machInst.brigInstBase;
88 const BrigObject *obj = machInst.brigObj;
89
90 switch(ib->opcode) {
91''')
92decoder_code.indent()
93decoder_code.indent()
94
95exec_code('''
96#include "arch/hsail/insts/gen_decl.hh"
97#include "base/intmath.hh"
98
99namespace HsailISA
100{
101''')
102exec_code.indent()
103
104###############
105#
106# Define code templates for class declarations (for header file)
107#
108###############
109
110# Basic header template for an instruction stub.
111header_template_stub = '''
112class $class_name : public $base_class
113{
114 public:
115 typedef $base_class Base;
116
117 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
118 : Base(ib, obj, "$opcode")
119 {
120 }
121
122 void execute(GPUDynInstPtr gpuDynInst);
123};
124
125'''
126
127# Basic header template for an instruction with no template parameters.
128header_template_nodt = '''
129class $class_name : public $base_class
130{
131 public:
132 typedef $base_class Base;
133
134 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
135 : Base(ib, obj, "$opcode")
136 {
137 }
138
139 void execute(GPUDynInstPtr gpuDynInst);
140};
141
142'''
143
144# Basic header template for an instruction with a single DataType
145# template parameter.
146header_template_1dt = '''
147template<typename DataType>
148class $class_name : public $base_class<DataType>
149{
150 public:
151 typedef $base_class<DataType> Base;
152 typedef typename DataType::CType CType;
153
154 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
155 : Base(ib, obj, "$opcode")
156 {
157 }
158
159 void execute(GPUDynInstPtr gpuDynInst);
160};
161
162'''
163
164header_template_1dt_noexec = '''
165template<typename DataType>
166class $class_name : public $base_class<DataType>
167{
168 public:
169 typedef $base_class<DataType> Base;
170 typedef typename DataType::CType CType;
171
172 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
173 : Base(ib, obj, "$opcode")
174 {
175 }
176};
177
178'''
179
180# Same as header_template_1dt, except the base class has a second
181# template parameter NumSrcOperands to allow a variable number of
182# source operands. Note that since this is implemented with an array,
183# it only works for instructions where all sources are of the same
184# type (like most arithmetics).
185header_template_1dt_varsrcs = '''
186template<typename DataType>
187class $class_name : public $base_class<DataType, $num_srcs>
188{
189 public:
190 typedef $base_class<DataType, $num_srcs> Base;
191 typedef typename DataType::CType CType;
192
193 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
194 : Base(ib, obj, "$opcode")
195 {
196 }
197
198 void execute(GPUDynInstPtr gpuDynInst);
199};
200
201'''
202
203# Header template for instruction with two DataType template
204# parameters, one for the dest and one for the source. This is used
205# by compare and convert.
206header_template_2dt = '''
207template<typename DestDataType, class SrcDataType>
208class $class_name : public $base_class<DestDataType, SrcDataType>
209{
210 public:
211 typedef $base_class<DestDataType, SrcDataType> Base;
212 typedef typename DestDataType::CType DestCType;
213 typedef typename SrcDataType::CType SrcCType;
214
215 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
216 : Base(ib, obj, "$opcode")
217 {
218 }
219
220 void execute(GPUDynInstPtr gpuDynInst);
221};
222
223'''
224
225header_templates = {
226 'ArithInst': header_template_1dt_varsrcs,
227 'CmovInst': header_template_1dt,
228 'ClassInst': header_template_1dt,
229 'ShiftInst': header_template_1dt,
230 'ExtractInsertInst': header_template_1dt,
231 'CmpInst': header_template_2dt,
232 'CvtInst': header_template_2dt,
233 'PopcountInst': header_template_2dt,
234 'LdInst': '',
235 'StInst': '',
236 'SpecialInstNoSrc': header_template_nodt,
237 'SpecialInst1Src': header_template_nodt,
238 'SpecialInstNoSrcNoDest': '',
239 'Stub': header_template_stub,
240}
241
242###############
243#
244# Define code templates for exec functions
245#
246###############
247
248# exec function body
249exec_template_stub = '''
250void
251$class_name::execute(GPUDynInstPtr gpuDynInst)
252{
253 fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble());
254}
255
256'''
257exec_template_nodt_nosrc = '''
258void
259$class_name::execute(GPUDynInstPtr gpuDynInst)
260{
261 Wavefront *w = gpuDynInst->wavefront();
262
263 typedef Base::DestCType DestCType;
264
265 const VectorMask &mask = w->getPred();
266
267 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
268 if (mask[lane]) {
269 DestCType dest_val = $expr;
270 this->dest.set(w, lane, dest_val);
271 }
272 }
273}
274
275'''
276
277exec_template_nodt_1src = '''
278void
279$class_name::execute(GPUDynInstPtr gpuDynInst)
280{
281 Wavefront *w = gpuDynInst->wavefront();
282
283 typedef Base::DestCType DestCType;
284 typedef Base::SrcCType SrcCType;
285
286 const VectorMask &mask = w->getPred();
287
288 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
289 if (mask[lane]) {
290 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
291 DestCType dest_val = $expr;
292
293 this->dest.set(w, lane, dest_val);
294 }
295 }
296}
297
298'''
299
300exec_template_1dt_varsrcs = '''
301template<typename DataType>
302void
303$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
304{
305 Wavefront *w = gpuDynInst->wavefront();
306
307 const VectorMask &mask = w->getPred();
308
309 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
310 if (mask[lane]) {
311 CType dest_val;
312 if ($dest_is_src_flag) {
313 dest_val = this->dest.template get<CType>(w, lane);
314 }
315
316 CType src_val[$num_srcs];
317
318 for (int i = 0; i < $num_srcs; ++i) {
319 src_val[i] = this->src[i].template get<CType>(w, lane);
320 }
321
322 dest_val = (CType)($expr);
323
324 this->dest.set(w, lane, dest_val);
325 }
326 }
327}
328
329'''
330
331exec_template_1dt_3srcs = '''
332template<typename DataType>
333void
334$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
335{
336 Wavefront *w = gpuDynInst->wavefront();
337
338 typedef typename Base::Src0CType Src0T;
339 typedef typename Base::Src1CType Src1T;
340 typedef typename Base::Src2CType Src2T;
341
342 const VectorMask &mask = w->getPred();
343
344 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
345 if (mask[lane]) {
346 CType dest_val;
347
348 if ($dest_is_src_flag) {
349 dest_val = this->dest.template get<CType>(w, lane);
350 }
351
352 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
353 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
354 Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
355
356 dest_val = $expr;
357
358 this->dest.set(w, lane, dest_val);
359 }
360 }
361}
362
363'''
364
365exec_template_1dt_2src_1dest = '''
366template<typename DataType>
367void
368$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
369{
370 Wavefront *w = gpuDynInst->wavefront();
371
372 typedef typename Base::DestCType DestT;
373 typedef CType Src0T;
374 typedef typename Base::Src1CType Src1T;
375
376 const VectorMask &mask = w->getPred();
377
378 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
379 if (mask[lane]) {
380 DestT dest_val;
381 if ($dest_is_src_flag) {
382 dest_val = this->dest.template get<DestT>(w, lane);
383 }
384 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
385 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
386
387 dest_val = $expr;
388
389 this->dest.set(w, lane, dest_val);
390 }
391 }
392}
393
394'''
395
396exec_template_shift = '''
397template<typename DataType>
398void
399$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
400{
401 Wavefront *w = gpuDynInst->wavefront();
402
403 const VectorMask &mask = w->getPred();
404 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
405 if (mask[lane]) {
406 CType dest_val;
407
408 if ($dest_is_src_flag) {
409 dest_val = this->dest.template get<CType>(w, lane);
410 }
411
412 CType src_val0 = this->src0.template get<CType>(w, lane);
413 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
414
415 dest_val = $expr;
416
417 this->dest.set(w, lane, dest_val);
418 }
419 }
420}
421
422'''
423
424exec_template_2dt = '''
425template<typename DestDataType, class SrcDataType>
426void
427$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
428{
429 Wavefront *w = gpuDynInst->wavefront();
430
431 const VectorMask &mask = w->getPred();
432
433 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
434 if (mask[lane]) {
435 DestCType dest_val;
436 SrcCType src_val[$num_srcs];
437
438 for (int i = 0; i < $num_srcs; ++i) {
439 src_val[i] = this->src[i].template get<SrcCType>(w, lane);
440 }
441
442 dest_val = $expr;
443
444 this->dest.set(w, lane, dest_val);
445 }
446 }
447}
448
449'''
450
451exec_templates = {
452 'ArithInst': exec_template_1dt_varsrcs,
453 'CmovInst': exec_template_1dt_3srcs,
454 'ExtractInsertInst': exec_template_1dt_3srcs,
455 'ClassInst': exec_template_1dt_2src_1dest,
456 'CmpInst': exec_template_2dt,
457 'CvtInst': exec_template_2dt,
458 'PopcountInst': exec_template_2dt,
459 'LdInst': '',
460 'StInst': '',
461 'SpecialInstNoSrc': exec_template_nodt_nosrc,
462 'SpecialInst1Src': exec_template_nodt_1src,
463 'SpecialInstNoSrcNoDest': '',
464 'Stub': exec_template_stub,
465}
466
467###############
468#
469# Define code templates for the decoder cases
470#
471###############
472
473# decode template for nodt-opcode case
474decode_nodt_template = '''
475 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
476
477decode_case_prolog_class_inst = '''
478 case BRIG_OPCODE_$brig_opcode_upper:
479 {
480 //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
481 BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
482 //switch (baseOp->kind) {
483 // case BRIG_OPERAND_REG:
484 // type = ((const BrigOperandReg*)baseOp)->type;
485 // break;
486 // case BRIG_OPERAND_IMMED:
487 // type = ((const BrigOperandImmed*)baseOp)->type;
488 // break;
489 // default:
490 // fatal("CLASS unrecognized kind of operand %d\\n",
491 // baseOp->kind);
492 //}
493 switch (type) {'''
494
495# common prolog for 1dt- or 2dt-opcode case: switch on data type
496decode_case_prolog = '''
497 case BRIG_OPCODE_$brig_opcode_upper:
498 {
499 switch (ib->type) {'''
500
501# single-level decode case entry (for 1dt opcodes)
502decode_case_entry = \
503' case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
504
505decode_store_prolog = \
506' case BRIG_TYPE_$type_name: {'
507
508decode_store_case_epilog = '''
509 }'''
510
511decode_store_case_entry = \
512' return $constructor(ib, obj);'
513
514# common epilog for type switch
515decode_case_epilog = '''
516 default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
517 ib->type);
518 }
519 }
520 break;'''
521
522# Additional templates for nested decode on a second type field (for
523# compare and convert). These are used in place of the
524# decode_case_entry template to create a second-level switch on on the
525# second type field inside each case of the first-level type switch.
526# Because the name and location of the second type can vary, the Brig
527# instruction type must be provided in $brig_type, and the name of the
528# second type field must be provided in $type_field.
529decode_case2_prolog = '''
530 case BRIG_TYPE_$type_name:
531 switch (((Brig$brig_type*)ib)->$type2_field) {'''
532
533decode_case2_entry = \
534' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
535
536decode_case2_epilog = '''
537 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
538 ((Brig$brig_type*)ib)->$type2_field);
539 }
540 break;'''
541
542# Figure out how many source operands an expr needs by looking for the
543# highest-numbered srcN value referenced. Since sources are numbered
544# starting at 0, the return value is N+1.
545def num_src_operands(expr):
546 if expr.find('src2') != -1:
547 return 3
548 elif expr.find('src1') != -1:
549 return 2
550 elif expr.find('src0') != -1:
551 return 1
552 else:
553 return 0
554
555###############
556#
557# Define final code generation methods
558#
559# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
560# generating actual instructions.
561#
562###############
563
564# Generate class declaration, exec function, and decode switch case
565# for an brig_opcode with a single-level type switch. The 'types'
566# parameter is a list or tuple of types for which the instruction
567# should be instantiated.
568def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
569 type2_info=None, constructor_prefix='new ', is_store=False):
570 brig_opcode_upper = brig_opcode.upper()
571 class_name = brig_opcode
572 opcode = class_name.lower()
573
574 if base_class == 'ArithInst':
575 # note that expr must be provided with ArithInst so we can
576 # derive num_srcs for the template
577 assert expr
578
579 if expr:
580 # Derive several bits of info from expr. If expr is not used,
581 # this info will be irrelevant.
582 num_srcs = num_src_operands(expr)
583 # if the RHS expression includes 'dest', then we're doing an RMW
584 # on the reg and we need to treat it like a source
585 dest_is_src = expr.find('dest') != -1
586 dest_is_src_flag = str(dest_is_src).lower() # for C++
587 if base_class in ['ShiftInst']:
588 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
589 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']:
590 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
591 else:
592 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
593 expr = re.sub(r'\bdest\b', r'dest_val', expr)
594
595 # Strip template arguments off of base class before looking up
596 # appropriate templates
597 base_class_base = re.sub(r'<.*>$', '', base_class)
598 header_code(header_templates[base_class_base])
599
600 if base_class.startswith('SpecialInst') or base_class.startswith('Stub'):
601 exec_code(exec_templates[base_class_base])
602 elif base_class.startswith('ShiftInst'):
603 header_code(exec_template_shift)
604 else:
605 header_code(exec_templates[base_class_base])
606
607 if not types or isinstance(types, str):
608 # Just a single type
609 constructor = constructor_prefix + class_name
610 decoder_code(decode_nodt_template)
611 else:
612 # multiple types, need at least one level of decode
613 if brig_opcode == 'Class':
614 decoder_code(decode_case_prolog_class_inst)
615 else:
616 decoder_code(decode_case_prolog)
617 if not type2_info:
618 if not is_store:
619 # single list of types, to basic one-level decode
620 for type_name in types:
621 full_class_name = '%s<%s>' % (class_name, type_name.upper())
622 constructor = constructor_prefix + full_class_name
623 decoder_code(decode_case_entry)
624 else:
625 # single list of types, to basic one-level decode
626 for type_name in types:
627 decoder_code(decode_store_prolog)
628 type_size = int(re.findall(r'[0-9]+', type_name)[0])
629 src_size = 32
630 type_type = type_name[0]
631 full_class_name = '%s<%s,%s>' % (class_name, \
632 type_name.upper(), \
633 '%s%d' % \
634 (type_type.upper(), \
635 type_size))
636 constructor = constructor_prefix + full_class_name
637 decoder_code(decode_store_case_entry)
638 decoder_code(decode_store_case_epilog)
639 else:
640 # need secondary type switch (convert, compare)
641 # unpack extra info on second switch
642 (type2_field, types2) = type2_info
643 brig_type = 'Inst%s' % brig_opcode
644 for type_name in types:
645 decoder_code(decode_case2_prolog)
646 fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
647 for type2_name in types2:
648 full_class_name = fmt % type2_name.upper()
649 constructor = constructor_prefix + full_class_name
650 decoder_code(decode_case2_entry)
651
652 decoder_code(decode_case2_epilog)
653
654 decoder_code(decode_case_epilog)
655
656###############
657#
658# Generate instructions
659#
660###############
661
662# handy abbreviations for common sets of types
663
664# arithmetic ops are typically defined only on 32- and 64-bit sizes
665arith_int_types = ('S32', 'U32', 'S64', 'U64')
666arith_float_types = ('F32', 'F64')
667arith_types = arith_int_types + arith_float_types
668
669bit_types = ('B1', 'B32', 'B64')
670
671all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
672
673# I think you might be able to do 'f16' memory ops too, but we'll
674# ignore them for now.
675mem_types = all_int_types + arith_float_types
676mem_atom_types = all_int_types + ('B32', 'B64')
677
678##### Arithmetic & logical operations
679gen('Add', arith_types, 'src0 + src1')
680gen('Sub', arith_types, 'src0 - src1')
681gen('Mul', arith_types, 'src0 * src1')
682gen('Div', arith_types, 'src0 / src1')
683gen('Min', arith_types, 'std::min(src0, src1)')
684gen('Max', arith_types, 'std::max(src0, src1)')
685gen('Gcnmin', arith_types, 'std::min(src0, src1)')
686
687gen('CopySign', arith_float_types,
688 'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
689gen('Sqrt', arith_float_types, 'sqrt(src0)')
690gen('Floor', arith_float_types, 'floor(src0)')
691
692# "fast" sqrt... same as slow for us
693gen('Nsqrt', arith_float_types, 'sqrt(src0)')
694gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
695gen('Nrcp', arith_float_types, '1.0/src0')
696gen('Fract', arith_float_types,
697 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
698
699gen('Ncos', arith_float_types, 'cos(src0)');
700gen('Nsin', arith_float_types, 'sin(src0)');
701
702gen('And', bit_types, 'src0 & src1')
703gen('Or', bit_types, 'src0 | src1')
704gen('Xor', bit_types, 'src0 ^ src1')
705
706gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~(uint64_t)src0)')
707gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \
708 ('sourceType', ('B32', 'B64')))
709
710gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
711gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
712
713# gen('Mul_hi', types=('s32','u32', '??'))
714# gen('Mul24', types=('s32','u32', '??'))
715gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
716
717gen('Abs', arith_types, 'std::abs(src0)')
718gen('Neg', arith_types, '-src0')
719
720gen('Mov', bit_types + arith_types, 'src0')
721gen('Not', bit_types, 'heynot(src0)')
722
723# mad and fma differ only in rounding behavior, which we don't emulate
724# also there's an integer form of mad, but not of fma
725gen('Mad', arith_types, 'src0 * src1 + src2')
726gen('Fma', arith_float_types, 'src0 * src1 + src2')
727
728#native floating point operations
729gen('Nfma', arith_float_types, 'src0 * src1 + src2')
730
731gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
732gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
733gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
734
735# see base/bitfield.hh
736gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
737 'ExtractInsertInst')
738
739gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
740 'ExtractInsertInst')
741
742##### Compare
743gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
744 'CmpInst', ('sourceType', arith_types + bit_types))
745gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
746
747##### Conversion
748
749# Conversion operations are only defined on B1, not B32 or B64
750cvt_types = ('B1',) + mem_types
751
752gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
753
754
755##### Load & Store
756gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
757gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
758gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
759 is_store=True)
760gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
761gen('AtomicNoRet', mem_atom_types, base_class='StInst',
762 constructor_prefix='decode')
763
764gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
765gen('Br', base_class = 'LdInst', constructor_prefix='decode')
766
767##### Special operations
768def gen_special(brig_opcode, expr, dest_type='U32'):
769 num_srcs = num_src_operands(expr)
770 if num_srcs == 0:
771 base_class = 'SpecialInstNoSrc<%s>' % dest_type
772 elif num_srcs == 1:
773 base_class = 'SpecialInst1Src<%s>' % dest_type
774 else:
775 assert false
776
777 gen(brig_opcode, None, expr, base_class)
778
779gen_special('WorkItemId', 'w->workItemId[src0][lane]')
780gen_special('WorkItemAbsId',
781 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
782gen_special('WorkGroupId', 'w->workGroupId[src0]')
783gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
784gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
785gen_special('GridSize', 'w->gridSz[src0]')
786gen_special('GridGroups',
787 'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
788gen_special('LaneId', 'lane')
789gen_special('WaveId', 'w->wfId')
790gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
791
792# gen_special('CU'', ')
793
794gen('Ret', base_class='SpecialInstNoSrcNoDest')
795gen('Barrier', base_class='SpecialInstNoSrcNoDest')
796gen('MemFence', base_class='SpecialInstNoSrcNoDest')
797
798# Map magic instructions to the BrigSyscall opcode
799# Magic instructions are defined in magic.hh
800#
801# In the future, real HSA kernel system calls can be implemented and coexist
802# with magic instructions.
803gen('Call', base_class='SpecialInstNoSrcNoDest')
804
805# Stubs for unimplemented instructions:
806# These may need to be implemented at some point in the future, but
807# for now we just match the instructions with their operands.
808#
809# By defining stubs for these instructions, we can work with
810# applications that have them in dead/unused code paths.
811#
812# Needed for rocm-hcc compilations for HSA backends since
813# builtins-hsail library is `cat`d onto the generated kernels.
814# The builtins-hsail library consists of handcoded hsail functions
815# that __might__ be needed by the rocm-hcc compiler in certain binaries.
816gen('Bitmask', base_class='Stub')
817gen('Bitrev', base_class='Stub')
818gen('Firstbit', base_class='Stub')
819gen('Lastbit', base_class='Stub')
820gen('Unpacklo', base_class='Stub')
821gen('Unpackhi', base_class='Stub')
822gen('Pack', base_class='Stub')
823gen('Unpack', base_class='Stub')
824gen('Lerp', base_class='Stub')
825gen('Packcvt', base_class='Stub')
826gen('Unpackcvt', base_class='Stub')
827gen('Sad', base_class='Stub')
828gen('Sadhi', base_class='Stub')
829gen('Activelanecount', base_class='Stub')
830gen('Activelaneid', base_class='Stub')
831gen('Activelanemask', base_class='Stub')
832gen('Activelanepermute', base_class='Stub')
833gen('Groupbaseptr', base_class='Stub')
834gen('Signalnoret', base_class='Stub')
835
836###############
837#
838# Generate file epilogs
839#
840###############
841header_code('''
842template<>
843inline void
844Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
845{
846 Wavefront *w = gpuDynInst->wavefront();
847
848 const VectorMask &mask = w->getPred();
849
850 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
851 if (mask[lane]) {
852 CType dest_val;
853 CType src_val;
854
855 src_val = this->src[0].template get<CType>(w, lane);
856
857 dest_val = (CType)(src_val);
858
859 this->dest.set(w, lane, dest_val);
860 }
861 }
862}
863
864template<>
865inline void
866Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
867{
868 Wavefront *w = gpuDynInst->wavefront();
869
870 const VectorMask &mask = w->getPred();
871
872 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
873 if (mask[lane]) {
874 CType dest_val;
875 CType src_val;
876
877 src_val = this->src[0].template get<CType>(w, lane);
878
879 dest_val = (CType)(src_val);
880
881 this->dest.set(w, lane, dest_val);
882 }
883 }
884}
885''')
886
887header_code.dedent()
888header_code('''
889} // namespace HsailISA
890''')
891
892# close off main decode switch
893decoder_code.dedent()
894decoder_code.dedent()
895decoder_code('''
896 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
897 } // end switch(ib->opcode)
898 } // end decode()
899} // namespace HsailISA
900''')
901
902exec_code.dedent()
903exec_code('''
904} // namespace HsailISA
905''')
906
907###############
908#
909# Output accumulated code to files
910#
911###############
912header_code.write(sys.argv[1])
913decoder_code.write(sys.argv[2])
914exec_code.write(sys.argv[3])

2# Copyright (c) 2015 Advanced Micro Devices, Inc.
3# All rights reserved.
4#
5# For use for simulation and test purposes only
6#
7# Redistribution and use in source and binary forms, with or without
8# modification, are permitted provided that the following conditions are met:
9#
10# 1. Redistributions of source code must retain the above copyright notice,
11# this list of conditions and the following disclaimer.
12#
13# 2. Redistributions in binary form must reproduce the above copyright notice,
14# this list of conditions and the following disclaimer in the documentation
15# and/or other materials provided with the distribution.
16#
17# 3. Neither the name of the copyright holder nor the names of its contributors
18# may be used to endorse or promote products derived from this software
19# without specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31# POSSIBILITY OF SUCH DAMAGE.
32#
33# Author: Steve Reinhardt
34#
35
36from __future__ import print_function
37
38import sys, re
39
40from m5.util import code_formatter
41
42if len(sys.argv) != 4:
43 print("Error: need 3 args (file names)")
44 sys.exit(0)
45
46header_code = code_formatter()
47decoder_code = code_formatter()
48exec_code = code_formatter()
49
50###############
51#
52# Generate file prologs (includes etc.)
53#
54###############
55
56header_code('''
57#include "arch/hsail/insts/decl.hh"
58#include "base/bitfield.hh"
59#include "gpu-compute/hsail_code.hh"
60#include "gpu-compute/wavefront.hh"
61
62namespace HsailISA
63{
64''')
65header_code.indent()
66
67decoder_code('''
68#include "arch/hsail/gpu_decoder.hh"
69#include "arch/hsail/insts/branch.hh"
70#include "arch/hsail/insts/decl.hh"
71#include "arch/hsail/insts/gen_decl.hh"
72#include "arch/hsail/insts/mem.hh"
73#include "arch/hsail/insts/mem_impl.hh"
74#include "gpu-compute/brig_object.hh"
75
76namespace HsailISA
77{
78 std::vector<GPUStaticInst*> Decoder::decodedInsts;
79
80 GPUStaticInst*
81 Decoder::decode(MachInst machInst)
82 {
83 using namespace Brig;
84
85 const BrigInstBase *ib = machInst.brigInstBase;
86 const BrigObject *obj = machInst.brigObj;
87
88 switch(ib->opcode) {
89''')
90decoder_code.indent()
91decoder_code.indent()
92
93exec_code('''
94#include "arch/hsail/insts/gen_decl.hh"
95#include "base/intmath.hh"
96
97namespace HsailISA
98{
99''')
100exec_code.indent()
101
102###############
103#
104# Define code templates for class declarations (for header file)
105#
106###############
107
108# Basic header template for an instruction stub.
109header_template_stub = '''
110class $class_name : public $base_class
111{
112 public:
113 typedef $base_class Base;
114
115 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
116 : Base(ib, obj, "$opcode")
117 {
118 }
119
120 void execute(GPUDynInstPtr gpuDynInst);
121};
122
123'''
124
125# Basic header template for an instruction with no template parameters.
126header_template_nodt = '''
127class $class_name : public $base_class
128{
129 public:
130 typedef $base_class Base;
131
132 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
133 : Base(ib, obj, "$opcode")
134 {
135 }
136
137 void execute(GPUDynInstPtr gpuDynInst);
138};
139
140'''
141
142# Basic header template for an instruction with a single DataType
143# template parameter.
144header_template_1dt = '''
145template<typename DataType>
146class $class_name : public $base_class<DataType>
147{
148 public:
149 typedef $base_class<DataType> Base;
150 typedef typename DataType::CType CType;
151
152 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
153 : Base(ib, obj, "$opcode")
154 {
155 }
156
157 void execute(GPUDynInstPtr gpuDynInst);
158};
159
160'''
161
162header_template_1dt_noexec = '''
163template<typename DataType>
164class $class_name : public $base_class<DataType>
165{
166 public:
167 typedef $base_class<DataType> Base;
168 typedef typename DataType::CType CType;
169
170 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
171 : Base(ib, obj, "$opcode")
172 {
173 }
174};
175
176'''
177
178# Same as header_template_1dt, except the base class has a second
179# template parameter NumSrcOperands to allow a variable number of
180# source operands. Note that since this is implemented with an array,
181# it only works for instructions where all sources are of the same
182# type (like most arithmetics).
183header_template_1dt_varsrcs = '''
184template<typename DataType>
185class $class_name : public $base_class<DataType, $num_srcs>
186{
187 public:
188 typedef $base_class<DataType, $num_srcs> Base;
189 typedef typename DataType::CType CType;
190
191 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
192 : Base(ib, obj, "$opcode")
193 {
194 }
195
196 void execute(GPUDynInstPtr gpuDynInst);
197};
198
199'''
200
201# Header template for instruction with two DataType template
202# parameters, one for the dest and one for the source. This is used
203# by compare and convert.
204header_template_2dt = '''
205template<typename DestDataType, class SrcDataType>
206class $class_name : public $base_class<DestDataType, SrcDataType>
207{
208 public:
209 typedef $base_class<DestDataType, SrcDataType> Base;
210 typedef typename DestDataType::CType DestCType;
211 typedef typename SrcDataType::CType SrcCType;
212
213 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
214 : Base(ib, obj, "$opcode")
215 {
216 }
217
218 void execute(GPUDynInstPtr gpuDynInst);
219};
220
221'''
222
223header_templates = {
224 'ArithInst': header_template_1dt_varsrcs,
225 'CmovInst': header_template_1dt,
226 'ClassInst': header_template_1dt,
227 'ShiftInst': header_template_1dt,
228 'ExtractInsertInst': header_template_1dt,
229 'CmpInst': header_template_2dt,
230 'CvtInst': header_template_2dt,
231 'PopcountInst': header_template_2dt,
232 'LdInst': '',
233 'StInst': '',
234 'SpecialInstNoSrc': header_template_nodt,
235 'SpecialInst1Src': header_template_nodt,
236 'SpecialInstNoSrcNoDest': '',
237 'Stub': header_template_stub,
238}
239
240###############
241#
242# Define code templates for exec functions
243#
244###############
245
246# exec function body
247exec_template_stub = '''
248void
249$class_name::execute(GPUDynInstPtr gpuDynInst)
250{
251 fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble());
252}
253
254'''
255exec_template_nodt_nosrc = '''
256void
257$class_name::execute(GPUDynInstPtr gpuDynInst)
258{
259 Wavefront *w = gpuDynInst->wavefront();
260
261 typedef Base::DestCType DestCType;
262
263 const VectorMask &mask = w->getPred();
264
265 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
266 if (mask[lane]) {
267 DestCType dest_val = $expr;
268 this->dest.set(w, lane, dest_val);
269 }
270 }
271}
272
273'''
274
275exec_template_nodt_1src = '''
276void
277$class_name::execute(GPUDynInstPtr gpuDynInst)
278{
279 Wavefront *w = gpuDynInst->wavefront();
280
281 typedef Base::DestCType DestCType;
282 typedef Base::SrcCType SrcCType;
283
284 const VectorMask &mask = w->getPred();
285
286 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
287 if (mask[lane]) {
288 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
289 DestCType dest_val = $expr;
290
291 this->dest.set(w, lane, dest_val);
292 }
293 }
294}
295
296'''
297
298exec_template_1dt_varsrcs = '''
299template<typename DataType>
300void
301$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
302{
303 Wavefront *w = gpuDynInst->wavefront();
304
305 const VectorMask &mask = w->getPred();
306
307 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
308 if (mask[lane]) {
309 CType dest_val;
310 if ($dest_is_src_flag) {
311 dest_val = this->dest.template get<CType>(w, lane);
312 }
313
314 CType src_val[$num_srcs];
315
316 for (int i = 0; i < $num_srcs; ++i) {
317 src_val[i] = this->src[i].template get<CType>(w, lane);
318 }
319
320 dest_val = (CType)($expr);
321
322 this->dest.set(w, lane, dest_val);
323 }
324 }
325}
326
327'''
328
329exec_template_1dt_3srcs = '''
330template<typename DataType>
331void
332$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
333{
334 Wavefront *w = gpuDynInst->wavefront();
335
336 typedef typename Base::Src0CType Src0T;
337 typedef typename Base::Src1CType Src1T;
338 typedef typename Base::Src2CType Src2T;
339
340 const VectorMask &mask = w->getPred();
341
342 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
343 if (mask[lane]) {
344 CType dest_val;
345
346 if ($dest_is_src_flag) {
347 dest_val = this->dest.template get<CType>(w, lane);
348 }
349
350 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
351 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
352 Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
353
354 dest_val = $expr;
355
356 this->dest.set(w, lane, dest_val);
357 }
358 }
359}
360
361'''
362
363exec_template_1dt_2src_1dest = '''
364template<typename DataType>
365void
366$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
367{
368 Wavefront *w = gpuDynInst->wavefront();
369
370 typedef typename Base::DestCType DestT;
371 typedef CType Src0T;
372 typedef typename Base::Src1CType Src1T;
373
374 const VectorMask &mask = w->getPred();
375
376 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
377 if (mask[lane]) {
378 DestT dest_val;
379 if ($dest_is_src_flag) {
380 dest_val = this->dest.template get<DestT>(w, lane);
381 }
382 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
383 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
384
385 dest_val = $expr;
386
387 this->dest.set(w, lane, dest_val);
388 }
389 }
390}
391
392'''
393
394exec_template_shift = '''
395template<typename DataType>
396void
397$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
398{
399 Wavefront *w = gpuDynInst->wavefront();
400
401 const VectorMask &mask = w->getPred();
402 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
403 if (mask[lane]) {
404 CType dest_val;
405
406 if ($dest_is_src_flag) {
407 dest_val = this->dest.template get<CType>(w, lane);
408 }
409
410 CType src_val0 = this->src0.template get<CType>(w, lane);
411 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
412
413 dest_val = $expr;
414
415 this->dest.set(w, lane, dest_val);
416 }
417 }
418}
419
420'''
421
422exec_template_2dt = '''
423template<typename DestDataType, class SrcDataType>
424void
425$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
426{
427 Wavefront *w = gpuDynInst->wavefront();
428
429 const VectorMask &mask = w->getPred();
430
431 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
432 if (mask[lane]) {
433 DestCType dest_val;
434 SrcCType src_val[$num_srcs];
435
436 for (int i = 0; i < $num_srcs; ++i) {
437 src_val[i] = this->src[i].template get<SrcCType>(w, lane);
438 }
439
440 dest_val = $expr;
441
442 this->dest.set(w, lane, dest_val);
443 }
444 }
445}
446
447'''
448
449exec_templates = {
450 'ArithInst': exec_template_1dt_varsrcs,
451 'CmovInst': exec_template_1dt_3srcs,
452 'ExtractInsertInst': exec_template_1dt_3srcs,
453 'ClassInst': exec_template_1dt_2src_1dest,
454 'CmpInst': exec_template_2dt,
455 'CvtInst': exec_template_2dt,
456 'PopcountInst': exec_template_2dt,
457 'LdInst': '',
458 'StInst': '',
459 'SpecialInstNoSrc': exec_template_nodt_nosrc,
460 'SpecialInst1Src': exec_template_nodt_1src,
461 'SpecialInstNoSrcNoDest': '',
462 'Stub': exec_template_stub,
463}
464
465###############
466#
467# Define code templates for the decoder cases
468#
469###############
470
471# decode template for nodt-opcode case
472decode_nodt_template = '''
473 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
474
475decode_case_prolog_class_inst = '''
476 case BRIG_OPCODE_$brig_opcode_upper:
477 {
478 //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
479 BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
480 //switch (baseOp->kind) {
481 // case BRIG_OPERAND_REG:
482 // type = ((const BrigOperandReg*)baseOp)->type;
483 // break;
484 // case BRIG_OPERAND_IMMED:
485 // type = ((const BrigOperandImmed*)baseOp)->type;
486 // break;
487 // default:
488 // fatal("CLASS unrecognized kind of operand %d\\n",
489 // baseOp->kind);
490 //}
491 switch (type) {'''
492
493# common prolog for 1dt- or 2dt-opcode case: switch on data type
494decode_case_prolog = '''
495 case BRIG_OPCODE_$brig_opcode_upper:
496 {
497 switch (ib->type) {'''
498
499# single-level decode case entry (for 1dt opcodes)
500decode_case_entry = \
501' case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
502
503decode_store_prolog = \
504' case BRIG_TYPE_$type_name: {'
505
506decode_store_case_epilog = '''
507 }'''
508
509decode_store_case_entry = \
510' return $constructor(ib, obj);'
511
512# common epilog for type switch
513decode_case_epilog = '''
514 default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
515 ib->type);
516 }
517 }
518 break;'''
519
520# Additional templates for nested decode on a second type field (for
521# compare and convert). These are used in place of the
522# decode_case_entry template to create a second-level switch on on the
523# second type field inside each case of the first-level type switch.
524# Because the name and location of the second type can vary, the Brig
525# instruction type must be provided in $brig_type, and the name of the
526# second type field must be provided in $type_field.
527decode_case2_prolog = '''
528 case BRIG_TYPE_$type_name:
529 switch (((Brig$brig_type*)ib)->$type2_field) {'''
530
531decode_case2_entry = \
532' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
533
534decode_case2_epilog = '''
535 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
536 ((Brig$brig_type*)ib)->$type2_field);
537 }
538 break;'''
539
540# Figure out how many source operands an expr needs by looking for the
541# highest-numbered srcN value referenced. Since sources are numbered
542# starting at 0, the return value is N+1.
543def num_src_operands(expr):
544 if expr.find('src2') != -1:
545 return 3
546 elif expr.find('src1') != -1:
547 return 2
548 elif expr.find('src0') != -1:
549 return 1
550 else:
551 return 0
552
553###############
554#
555# Define final code generation methods
556#
557# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
558# generating actual instructions.
559#
560###############
561
562# Generate class declaration, exec function, and decode switch case
563# for an brig_opcode with a single-level type switch. The 'types'
564# parameter is a list or tuple of types for which the instruction
565# should be instantiated.
566def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
567 type2_info=None, constructor_prefix='new ', is_store=False):
568 brig_opcode_upper = brig_opcode.upper()
569 class_name = brig_opcode
570 opcode = class_name.lower()
571
572 if base_class == 'ArithInst':
573 # note that expr must be provided with ArithInst so we can
574 # derive num_srcs for the template
575 assert expr
576
577 if expr:
578 # Derive several bits of info from expr. If expr is not used,
579 # this info will be irrelevant.
580 num_srcs = num_src_operands(expr)
581 # if the RHS expression includes 'dest', then we're doing an RMW
582 # on the reg and we need to treat it like a source
583 dest_is_src = expr.find('dest') != -1
584 dest_is_src_flag = str(dest_is_src).lower() # for C++
585 if base_class in ['ShiftInst']:
586 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
587 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']:
588 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
589 else:
590 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
591 expr = re.sub(r'\bdest\b', r'dest_val', expr)
592
593 # Strip template arguments off of base class before looking up
594 # appropriate templates
595 base_class_base = re.sub(r'<.*>$', '', base_class)
596 header_code(header_templates[base_class_base])
597
598 if base_class.startswith('SpecialInst') or base_class.startswith('Stub'):
599 exec_code(exec_templates[base_class_base])
600 elif base_class.startswith('ShiftInst'):
601 header_code(exec_template_shift)
602 else:
603 header_code(exec_templates[base_class_base])
604
605 if not types or isinstance(types, str):
606 # Just a single type
607 constructor = constructor_prefix + class_name
608 decoder_code(decode_nodt_template)
609 else:
610 # multiple types, need at least one level of decode
611 if brig_opcode == 'Class':
612 decoder_code(decode_case_prolog_class_inst)
613 else:
614 decoder_code(decode_case_prolog)
615 if not type2_info:
616 if not is_store:
617 # single list of types, to basic one-level decode
618 for type_name in types:
619 full_class_name = '%s<%s>' % (class_name, type_name.upper())
620 constructor = constructor_prefix + full_class_name
621 decoder_code(decode_case_entry)
622 else:
623 # single list of types, to basic one-level decode
624 for type_name in types:
625 decoder_code(decode_store_prolog)
626 type_size = int(re.findall(r'[0-9]+', type_name)[0])
627 src_size = 32
628 type_type = type_name[0]
629 full_class_name = '%s<%s,%s>' % (class_name, \
630 type_name.upper(), \
631 '%s%d' % \
632 (type_type.upper(), \
633 type_size))
634 constructor = constructor_prefix + full_class_name
635 decoder_code(decode_store_case_entry)
636 decoder_code(decode_store_case_epilog)
637 else:
638 # need secondary type switch (convert, compare)
639 # unpack extra info on second switch
640 (type2_field, types2) = type2_info
641 brig_type = 'Inst%s' % brig_opcode
642 for type_name in types:
643 decoder_code(decode_case2_prolog)
644 fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
645 for type2_name in types2:
646 full_class_name = fmt % type2_name.upper()
647 constructor = constructor_prefix + full_class_name
648 decoder_code(decode_case2_entry)
649
650 decoder_code(decode_case2_epilog)
651
652 decoder_code(decode_case_epilog)
653
654###############
655#
656# Generate instructions
657#
658###############
659
660# handy abbreviations for common sets of types
661
662# arithmetic ops are typically defined only on 32- and 64-bit sizes
663arith_int_types = ('S32', 'U32', 'S64', 'U64')
664arith_float_types = ('F32', 'F64')
665arith_types = arith_int_types + arith_float_types
666
667bit_types = ('B1', 'B32', 'B64')
668
669all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
670
671# I think you might be able to do 'f16' memory ops too, but we'll
672# ignore them for now.
673mem_types = all_int_types + arith_float_types
674mem_atom_types = all_int_types + ('B32', 'B64')
675
676##### Arithmetic & logical operations
677gen('Add', arith_types, 'src0 + src1')
678gen('Sub', arith_types, 'src0 - src1')
679gen('Mul', arith_types, 'src0 * src1')
680gen('Div', arith_types, 'src0 / src1')
681gen('Min', arith_types, 'std::min(src0, src1)')
682gen('Max', arith_types, 'std::max(src0, src1)')
683gen('Gcnmin', arith_types, 'std::min(src0, src1)')
684
685gen('CopySign', arith_float_types,
686 'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
687gen('Sqrt', arith_float_types, 'sqrt(src0)')
688gen('Floor', arith_float_types, 'floor(src0)')
689
690# "fast" sqrt... same as slow for us
691gen('Nsqrt', arith_float_types, 'sqrt(src0)')
692gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
693gen('Nrcp', arith_float_types, '1.0/src0')
694gen('Fract', arith_float_types,
695 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
696
697gen('Ncos', arith_float_types, 'cos(src0)');
698gen('Nsin', arith_float_types, 'sin(src0)');
699
700gen('And', bit_types, 'src0 & src1')
701gen('Or', bit_types, 'src0 | src1')
702gen('Xor', bit_types, 'src0 ^ src1')
703
704gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~(uint64_t)src0)')
705gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \
706 ('sourceType', ('B32', 'B64')))
707
708gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
709gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
710
711# gen('Mul_hi', types=('s32','u32', '??'))
712# gen('Mul24', types=('s32','u32', '??'))
713gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
714
715gen('Abs', arith_types, 'std::abs(src0)')
716gen('Neg', arith_types, '-src0')
717
718gen('Mov', bit_types + arith_types, 'src0')
719gen('Not', bit_types, 'heynot(src0)')
720
721# mad and fma differ only in rounding behavior, which we don't emulate
722# also there's an integer form of mad, but not of fma
723gen('Mad', arith_types, 'src0 * src1 + src2')
724gen('Fma', arith_float_types, 'src0 * src1 + src2')
725
726#native floating point operations
727gen('Nfma', arith_float_types, 'src0 * src1 + src2')
728
729gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
730gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
731gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
732
733# see base/bitfield.hh
734gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
735 'ExtractInsertInst')
736
737gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
738 'ExtractInsertInst')
739
740##### Compare
741gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
742 'CmpInst', ('sourceType', arith_types + bit_types))
743gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
744
745##### Conversion
746
747# Conversion operations are only defined on B1, not B32 or B64
748cvt_types = ('B1',) + mem_types
749
750gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
751
752
753##### Load & Store
754gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
755gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
756gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
757 is_store=True)
758gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
759gen('AtomicNoRet', mem_atom_types, base_class='StInst',
760 constructor_prefix='decode')
761
762gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
763gen('Br', base_class = 'LdInst', constructor_prefix='decode')
764
765##### Special operations
766def gen_special(brig_opcode, expr, dest_type='U32'):
767 num_srcs = num_src_operands(expr)
768 if num_srcs == 0:
769 base_class = 'SpecialInstNoSrc<%s>' % dest_type
770 elif num_srcs == 1:
771 base_class = 'SpecialInst1Src<%s>' % dest_type
772 else:
773 assert false
774
775 gen(brig_opcode, None, expr, base_class)
776
777gen_special('WorkItemId', 'w->workItemId[src0][lane]')
778gen_special('WorkItemAbsId',
779 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
780gen_special('WorkGroupId', 'w->workGroupId[src0]')
781gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
782gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
783gen_special('GridSize', 'w->gridSz[src0]')
784gen_special('GridGroups',
785 'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
786gen_special('LaneId', 'lane')
787gen_special('WaveId', 'w->wfId')
788gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
789
790# gen_special('CU'', ')
791
792gen('Ret', base_class='SpecialInstNoSrcNoDest')
793gen('Barrier', base_class='SpecialInstNoSrcNoDest')
794gen('MemFence', base_class='SpecialInstNoSrcNoDest')
795
796# Map magic instructions to the BrigSyscall opcode
797# Magic instructions are defined in magic.hh
798#
799# In the future, real HSA kernel system calls can be implemented and coexist
800# with magic instructions.
801gen('Call', base_class='SpecialInstNoSrcNoDest')
802
803# Stubs for unimplemented instructions:
804# These may need to be implemented at some point in the future, but
805# for now we just match the instructions with their operands.
806#
807# By defining stubs for these instructions, we can work with
808# applications that have them in dead/unused code paths.
809#
810# Needed for rocm-hcc compilations for HSA backends since
811# builtins-hsail library is `cat`d onto the generated kernels.
812# The builtins-hsail library consists of handcoded hsail functions
813# that __might__ be needed by the rocm-hcc compiler in certain binaries.
814gen('Bitmask', base_class='Stub')
815gen('Bitrev', base_class='Stub')
816gen('Firstbit', base_class='Stub')
817gen('Lastbit', base_class='Stub')
818gen('Unpacklo', base_class='Stub')
819gen('Unpackhi', base_class='Stub')
820gen('Pack', base_class='Stub')
821gen('Unpack', base_class='Stub')
822gen('Lerp', base_class='Stub')
823gen('Packcvt', base_class='Stub')
824gen('Unpackcvt', base_class='Stub')
825gen('Sad', base_class='Stub')
826gen('Sadhi', base_class='Stub')
827gen('Activelanecount', base_class='Stub')
828gen('Activelaneid', base_class='Stub')
829gen('Activelanemask', base_class='Stub')
830gen('Activelanepermute', base_class='Stub')
831gen('Groupbaseptr', base_class='Stub')
832gen('Signalnoret', base_class='Stub')
833
834###############
835#
836# Generate file epilogs
837#
838###############
839header_code('''
840template<>
841inline void
842Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
843{
844 Wavefront *w = gpuDynInst->wavefront();
845
846 const VectorMask &mask = w->getPred();
847
848 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
849 if (mask[lane]) {
850 CType dest_val;
851 CType src_val;
852
853 src_val = this->src[0].template get<CType>(w, lane);
854
855 dest_val = (CType)(src_val);
856
857 this->dest.set(w, lane, dest_val);
858 }
859 }
860}
861
862template<>
863inline void
864Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
865{
866 Wavefront *w = gpuDynInst->wavefront();
867
868 const VectorMask &mask = w->getPred();
869
870 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
871 if (mask[lane]) {
872 CType dest_val;
873 CType src_val;
874
875 src_val = this->src[0].template get<CType>(w, lane);
876
877 dest_val = (CType)(src_val);
878
879 this->dest.set(w, lane, dest_val);
880 }
881 }
882}
883''')
884
885header_code.dedent()
886header_code('''
887} // namespace HsailISA
888''')
889
890# close off main decode switch
891decoder_code.dedent()
892decoder_code.dedent()
893decoder_code('''
894 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
895 } // end switch(ib->opcode)
896 } // end decode()
897} // namespace HsailISA
898''')
899
900exec_code.dedent()
901exec_code('''
902} // namespace HsailISA
903''')
904
905###############
906#
907# Output accumulated code to files
908#
909###############
910header_code.write(sys.argv[1])
911decoder_code.write(sys.argv[2])
912exec_code.write(sys.argv[3])