Cross Reference: /gem5/src/arch/hsail/gen.py

Deleted Added

sdiff udiff text old ( 12563:8d59ed22ae79 ) new ( 13450:32a36390a49e )

full compact

gen.py (12563:8d59ed22ae79)	gen.py (13450:32a36390a49e)
1#! /usr/bin/python 2 3# 4# Copyright (c) 2015 Advanced Micro Devices, Inc. 5# All rights reserved. 6# 7# For use for simulation and test purposes only 8# 9# Redistribution and use in source and binary forms, with or without 10# modification, are permitted provided that the following conditions are met: 11# 12# 1. Redistributions of source code must retain the above copyright notice, 13# this list of conditions and the following disclaimer. 14# 15# 2. Redistributions in binary form must reproduce the above copyright notice, 16# this list of conditions and the following disclaimer in the documentation 17# and/or other materials provided with the distribution. 18# 19# 3. Neither the name of the copyright holder nor the names of its contributors 20# may be used to endorse or promote products derived from this software 21# without specific prior written permission. 22# 23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33# POSSIBILITY OF SUCH DAMAGE. 34# 35# Author: Steve Reinhardt 36# 37 38from __future__ import print_function 39 40import sys, re 41 42from m5.util import code_formatter 43 44if len(sys.argv) != 4: 45 print("Error: need 3 args (file names)") 46 sys.exit(0) 47 48header_code = code_formatter() 49decoder_code = code_formatter() 50exec_code = code_formatter() 51 52############### 53# 54# Generate file prologs (includes etc.) 55# 56############### 57 58header_code(''' 59#include "arch/hsail/insts/decl.hh" 60#include "base/bitfield.hh" 61#include "gpu-compute/hsail_code.hh" 62#include "gpu-compute/wavefront.hh" 63 64namespace HsailISA 65{ 66''') 67header_code.indent() 68 69decoder_code(''' 70#include "arch/hsail/gpu_decoder.hh" 71#include "arch/hsail/insts/branch.hh" 72#include "arch/hsail/insts/decl.hh" 73#include "arch/hsail/insts/gen_decl.hh" 74#include "arch/hsail/insts/mem.hh" 75#include "arch/hsail/insts/mem_impl.hh" 76#include "gpu-compute/brig_object.hh" 77 78namespace HsailISA 79{ 80 std::vector<GPUStaticInst> Decoder::decodedInsts; 81 82 GPUStaticInst 83 Decoder::decode(MachInst machInst) 84 { 85 using namespace Brig; 86 87 const BrigInstBase ib = machInst.brigInstBase; 88 const BrigObject obj = machInst.brigObj; 89 90 switch(ib->opcode) { 91''') 92decoder_code.indent() 93decoder_code.indent() 94 95exec_code(''' 96#include "arch/hsail/insts/gen_decl.hh" 97#include "base/intmath.hh" 98 99namespace HsailISA 100{ 101''') 102exec_code.indent() 103 104############### 105# 106# Define code templates for class declarations (for header file) 107# 108############### 109 110# Basic header template for an instruction stub. 111header_template_stub = ''' 112class $class_name : public $base_class 113{ 114 public: 115 typedef $base_class Base; 116 117 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 118 : Base(ib, obj, "$opcode") 119 { 120 } 121 122 void execute(GPUDynInstPtr gpuDynInst); 123}; 124 125''' 126 127# Basic header template for an instruction with no template parameters. 128header_template_nodt = ''' 129class $class_name : public $base_class 130{ 131 public: 132 typedef $base_class Base; 133 134 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 135 : Base(ib, obj, "$opcode") 136 { 137 } 138 139 void execute(GPUDynInstPtr gpuDynInst); 140}; 141 142''' 143 144# Basic header template for an instruction with a single DataType 145# template parameter. 146header_template_1dt = ''' 147template<typename DataType> 148class $class_name : public $base_class<DataType> 149{ 150 public: 151 typedef $base_class<DataType> Base; 152 typedef typename DataType::CType CType; 153 154 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 155 : Base(ib, obj, "$opcode") 156 { 157 } 158 159 void execute(GPUDynInstPtr gpuDynInst); 160}; 161 162''' 163 164header_template_1dt_noexec = ''' 165template<typename DataType> 166class $class_name : public $base_class<DataType> 167{ 168 public: 169 typedef $base_class<DataType> Base; 170 typedef typename DataType::CType CType; 171 172 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 173 : Base(ib, obj, "$opcode") 174 { 175 } 176}; 177 178''' 179 180# Same as header_template_1dt, except the base class has a second 181# template parameter NumSrcOperands to allow a variable number of 182# source operands. Note that since this is implemented with an array, 183# it only works for instructions where all sources are of the same 184# type (like most arithmetics). 185header_template_1dt_varsrcs = ''' 186template<typename DataType> 187class $class_name : public $base_class<DataType, $num_srcs> 188{ 189 public: 190 typedef $base_class<DataType, $num_srcs> Base; 191 typedef typename DataType::CType CType; 192 193 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 194 : Base(ib, obj, "$opcode") 195 { 196 } 197 198 void execute(GPUDynInstPtr gpuDynInst); 199}; 200 201''' 202 203# Header template for instruction with two DataType template 204# parameters, one for the dest and one for the source. This is used 205# by compare and convert. 206header_template_2dt = ''' 207template<typename DestDataType, class SrcDataType> 208class $class_name : public $base_class<DestDataType, SrcDataType> 209{ 210 public: 211 typedef $base_class<DestDataType, SrcDataType> Base; 212 typedef typename DestDataType::CType DestCType; 213 typedef typename SrcDataType::CType SrcCType; 214 215 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 216 : Base(ib, obj, "$opcode") 217 { 218 } 219 220 void execute(GPUDynInstPtr gpuDynInst); 221}; 222 223''' 224 225header_templates = { 226 'ArithInst': header_template_1dt_varsrcs, 227 'CmovInst': header_template_1dt, 228 'ClassInst': header_template_1dt, 229 'ShiftInst': header_template_1dt, 230 'ExtractInsertInst': header_template_1dt, 231 'CmpInst': header_template_2dt, 232 'CvtInst': header_template_2dt, 233 'PopcountInst': header_template_2dt, 234 'LdInst': '', 235 'StInst': '', 236 'SpecialInstNoSrc': header_template_nodt, 237 'SpecialInst1Src': header_template_nodt, 238 'SpecialInstNoSrcNoDest': '', 239 'Stub': header_template_stub, 240} 241 242############### 243# 244# Define code templates for exec functions 245# 246############### 247 248# exec function body 249exec_template_stub = ''' 250void 251$class_name::execute(GPUDynInstPtr gpuDynInst) 252{ 253 fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble()); 254} 255 256''' 257exec_template_nodt_nosrc = ''' 258void 259$class_name::execute(GPUDynInstPtr gpuDynInst) 260{ 261 Wavefront w = gpuDynInst->wavefront(); 262* 263 typedef Base::DestCType DestCType; 264 265 const VectorMask &mask = w->getPred(); 266 267 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 268 if (mask[lane]) { 269 DestCType dest_val = $expr; 270 this->dest.set(w, lane, dest_val); 271 } 272 } 273} 274 275''' 276 277exec_template_nodt_1src = ''' 278void 279$class_name::execute(GPUDynInstPtr gpuDynInst) 280{ 281 Wavefront w = gpuDynInst->wavefront(); 282* 283 typedef Base::DestCType DestCType; 284 typedef Base::SrcCType SrcCType; 285 286 const VectorMask &mask = w->getPred(); 287 288 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 289 if (mask[lane]) { 290 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane); 291 DestCType dest_val = $expr; 292 293 this->dest.set(w, lane, dest_val); 294 } 295 } 296} 297 298''' 299 300exec_template_1dt_varsrcs = ''' 301template<typename DataType> 302void 303$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 304{ 305 Wavefront w = gpuDynInst->wavefront(); 306* 307 const VectorMask &mask = w->getPred(); 308 309 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 310 if (mask[lane]) { 311 CType dest_val; 312 if ($dest_is_src_flag) { 313 dest_val = this->dest.template get<CType>(w, lane); 314 } 315 316 CType src_val[$num_srcs]; 317 318 for (int i = 0; i < $num_srcs; ++i) { 319 src_val[i] = this->src[i].template get<CType>(w, lane); 320 } 321 322 dest_val = (CType)($expr); 323 324 this->dest.set(w, lane, dest_val); 325 } 326 } 327} 328 329''' 330 331exec_template_1dt_3srcs = ''' 332template<typename DataType> 333void 334$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 335{ 336 Wavefront w = gpuDynInst->wavefront(); 337* 338 typedef typename Base::Src0CType Src0T; 339 typedef typename Base::Src1CType Src1T; 340 typedef typename Base::Src2CType Src2T; 341 342 const VectorMask &mask = w->getPred(); 343 344 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 345 if (mask[lane]) { 346 CType dest_val; 347 348 if ($dest_is_src_flag) { 349 dest_val = this->dest.template get<CType>(w, lane); 350 } 351 352 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 353 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 354 Src2T src_val2 = this->src2.template get<Src2T>(w, lane); 355 356 dest_val = $expr; 357 358 this->dest.set(w, lane, dest_val); 359 } 360 } 361} 362 363''' 364 365exec_template_1dt_2src_1dest = ''' 366template<typename DataType> 367void 368$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 369{ 370 Wavefront w = gpuDynInst->wavefront(); 371* 372 typedef typename Base::DestCType DestT; 373 typedef CType Src0T; 374 typedef typename Base::Src1CType Src1T; 375 376 const VectorMask &mask = w->getPred(); 377 378 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 379 if (mask[lane]) { 380 DestT dest_val; 381 if ($dest_is_src_flag) { 382 dest_val = this->dest.template get<DestT>(w, lane); 383 } 384 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 385 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 386 387 dest_val = $expr; 388 389 this->dest.set(w, lane, dest_val); 390 } 391 } 392} 393 394''' 395 396exec_template_shift = ''' 397template<typename DataType> 398void 399$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 400{ 401 Wavefront w = gpuDynInst->wavefront(); 402* 403 const VectorMask &mask = w->getPred(); 404 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 405 if (mask[lane]) { 406 CType dest_val; 407 408 if ($dest_is_src_flag) { 409 dest_val = this->dest.template get<CType>(w, lane); 410 } 411 412 CType src_val0 = this->src0.template get<CType>(w, lane); 413 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane); 414 415 dest_val = $expr; 416 417 this->dest.set(w, lane, dest_val); 418 } 419 } 420} 421 422''' 423 424exec_template_2dt = ''' 425template<typename DestDataType, class SrcDataType> 426void 427$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst) 428{ 429 Wavefront w = gpuDynInst->wavefront(); 430* 431 const VectorMask &mask = w->getPred(); 432 433 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 434 if (mask[lane]) { 435 DestCType dest_val; 436 SrcCType src_val[$num_srcs]; 437 438 for (int i = 0; i < $num_srcs; ++i) { 439 src_val[i] = this->src[i].template get<SrcCType>(w, lane); 440 } 441 442 dest_val = $expr; 443 444 this->dest.set(w, lane, dest_val); 445 } 446 } 447} 448 449''' 450 451exec_templates = { 452 'ArithInst': exec_template_1dt_varsrcs, 453 'CmovInst': exec_template_1dt_3srcs, 454 'ExtractInsertInst': exec_template_1dt_3srcs, 455 'ClassInst': exec_template_1dt_2src_1dest, 456 'CmpInst': exec_template_2dt, 457 'CvtInst': exec_template_2dt, 458 'PopcountInst': exec_template_2dt, 459 'LdInst': '', 460 'StInst': '', 461 'SpecialInstNoSrc': exec_template_nodt_nosrc, 462 'SpecialInst1Src': exec_template_nodt_1src, 463 'SpecialInstNoSrcNoDest': '', 464 'Stub': exec_template_stub, 465} 466 467############### 468# 469# Define code templates for the decoder cases 470# 471############### 472 473# decode template for nodt-opcode case 474decode_nodt_template = ''' 475 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);''' 476 477decode_case_prolog_class_inst = ''' 478 case BRIG_OPCODE_$brig_opcode_upper: 479 { 480 //const BrigOperandBase baseOp = obj->getOperand(ib->operands[1]); 481* BrigType16_t type = ((BrigInstSourceType)ib)->sourceType; 482* //switch (baseOp->kind) { 483 // case BRIG_OPERAND_REG: 484 // type = ((const BrigOperandReg)baseOp)->type; 485* // break; 486 // case BRIG_OPERAND_IMMED: 487 // type = ((const BrigOperandImmed)baseOp)->type; 488* // break; 489 // default: 490 // fatal("CLASS unrecognized kind of operand %d\\n", 491 // baseOp->kind); 492 //} 493 switch (type) {''' 494 495# common prolog for 1dt- or 2dt-opcode case: switch on data type 496decode_case_prolog = ''' 497 case BRIG_OPCODE_$brig_opcode_upper: 498 { 499 switch (ib->type) {''' 500 501# single-level decode case entry (for 1dt opcodes) 502decode_case_entry = \ 503' case BRIG_TYPE_$type_name: return $constructor(ib, obj);' 504 505decode_store_prolog = \ 506' case BRIG_TYPE_$type_name: {' 507 508decode_store_case_epilog = ''' 509 }''' 510 511decode_store_case_entry = \ 512' return $constructor(ib, obj);' 513 514# common epilog for type switch 515decode_case_epilog = ''' 516 default: fatal("$brig_opcode_upper: unrecognized type %d\\n", 517 ib->type); 518 } 519 } 520 break;''' 521 522# Additional templates for nested decode on a second type field (for 523# compare and convert). These are used in place of the 524# decode_case_entry template to create a second-level switch on on the 525# second type field inside each case of the first-level type switch. 526# Because the name and location of the second type can vary, the Brig 527# instruction type must be provided in $brig_type, and the name of the 528# second type field must be provided in $type_field. 529decode_case2_prolog = ''' 530 case BRIG_TYPE_$type_name: 531 switch (((Brig$brig_type)ib)->$type2_field) {''' 532* 533decode_case2_entry = \ 534' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);' 535 536decode_case2_epilog = ''' 537 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n", 538 ((Brig$brig_type)ib)->$type2_field); 539* } 540 break;''' 541 542# Figure out how many source operands an expr needs by looking for the 543# highest-numbered srcN value referenced. Since sources are numbered 544# starting at 0, the return value is N+1. 545def num_src_operands(expr): 546 if expr.find('src2') != -1: 547 return 3 548 elif expr.find('src1') != -1: 549 return 2 550 elif expr.find('src0') != -1: 551 return 1 552 else: 553 return 0 554 555############### 556# 557# Define final code generation methods 558# 559# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for 560# generating actual instructions. 561# 562############### 563 564# Generate class declaration, exec function, and decode switch case 565# for an brig_opcode with a single-level type switch. The 'types' 566# parameter is a list or tuple of types for which the instruction 567# should be instantiated. 568def gen(brig_opcode, types=None, expr=None, base_class='ArithInst', 569 type2_info=None, constructor_prefix='new ', is_store=False): 570 brig_opcode_upper = brig_opcode.upper() 571 class_name = brig_opcode 572 opcode = class_name.lower() 573 574 if base_class == 'ArithInst': 575 # note that expr must be provided with ArithInst so we can 576 # derive num_srcs for the template 577 assert expr 578 579 if expr: 580 # Derive several bits of info from expr. If expr is not used, 581 # this info will be irrelevant. 582 num_srcs = num_src_operands(expr) 583 # if the RHS expression includes 'dest', then we're doing an RMW 584 # on the reg and we need to treat it like a source 585 dest_is_src = expr.find('dest') != -1 586 dest_is_src_flag = str(dest_is_src).lower() # for C++ 587 if base_class in ['ShiftInst']: 588 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 589 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']: 590 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr) 591 else: 592 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 593 expr = re.sub(r'\bdest\b', r'dest_val', expr) 594 595 # Strip template arguments off of base class before looking up 596 # appropriate templates 597 base_class_base = re.sub(r'<.>$', '', base_class) 598* header_code(header_templates[base_class_base]) 599 600 if base_class.startswith('SpecialInst') or base_class.startswith('Stub'): 601 exec_code(exec_templates[base_class_base]) 602 elif base_class.startswith('ShiftInst'): 603 header_code(exec_template_shift) 604 else: 605 header_code(exec_templates[base_class_base]) 606 607 if not types or isinstance(types, str): 608 # Just a single type 609 constructor = constructor_prefix + class_name 610 decoder_code(decode_nodt_template) 611 else: 612 # multiple types, need at least one level of decode 613 if brig_opcode == 'Class': 614 decoder_code(decode_case_prolog_class_inst) 615 else: 616 decoder_code(decode_case_prolog) 617 if not type2_info: 618 if not is_store: 619 # single list of types, to basic one-level decode 620 for type_name in types: 621 full_class_name = '%s<%s>' % (class_name, type_name.upper()) 622 constructor = constructor_prefix + full_class_name 623 decoder_code(decode_case_entry) 624 else: 625 # single list of types, to basic one-level decode 626 for type_name in types: 627 decoder_code(decode_store_prolog) 628 type_size = int(re.findall(r'[0-9]+', type_name)[0]) 629 src_size = 32 630 type_type = type_name[0] 631 full_class_name = '%s<%s,%s>' % (class_name, \ 632 type_name.upper(), \ 633 '%s%d' % \ 634 (type_type.upper(), \ 635 type_size)) 636 constructor = constructor_prefix + full_class_name 637 decoder_code(decode_store_case_entry) 638 decoder_code(decode_store_case_epilog) 639 else: 640 # need secondary type switch (convert, compare) 641 # unpack extra info on second switch 642 (type2_field, types2) = type2_info 643 brig_type = 'Inst%s' % brig_opcode 644 for type_name in types: 645 decoder_code(decode_case2_prolog) 646 fmt = '%s<%s,%%s>' % (class_name, type_name.upper()) 647 for type2_name in types2: 648 full_class_name = fmt % type2_name.upper() 649 constructor = constructor_prefix + full_class_name 650 decoder_code(decode_case2_entry) 651 652 decoder_code(decode_case2_epilog) 653 654 decoder_code(decode_case_epilog) 655 656############### 657# 658# Generate instructions 659# 660############### 661 662# handy abbreviations for common sets of types 663 664# arithmetic ops are typically defined only on 32- and 64-bit sizes 665arith_int_types = ('S32', 'U32', 'S64', 'U64') 666arith_float_types = ('F32', 'F64') 667arith_types = arith_int_types + arith_float_types 668 669bit_types = ('B1', 'B32', 'B64') 670 671all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types 672 673# I think you might be able to do 'f16' memory ops too, but we'll 674# ignore them for now. 675mem_types = all_int_types + arith_float_types 676mem_atom_types = all_int_types + ('B32', 'B64') 677 678##### Arithmetic & logical operations 679gen('Add', arith_types, 'src0 + src1') 680gen('Sub', arith_types, 'src0 - src1') 681gen('Mul', arith_types, 'src0 * src1') 682gen('Div', arith_types, 'src0 / src1') 683gen('Min', arith_types, 'std::min(src0, src1)') 684gen('Max', arith_types, 'std::max(src0, src1)') 685gen('Gcnmin', arith_types, 'std::min(src0, src1)') 686 687gen('CopySign', arith_float_types, 688 'src1 < 0 ? -std::abs(src0) : std::abs(src0)') 689gen('Sqrt', arith_float_types, 'sqrt(src0)') 690gen('Floor', arith_float_types, 'floor(src0)') 691 692# "fast" sqrt... same as slow for us 693gen('Nsqrt', arith_float_types, 'sqrt(src0)') 694gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)') 695gen('Nrcp', arith_float_types, '1.0/src0') 696gen('Fract', arith_float_types, 697 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)') 698 699gen('Ncos', arith_float_types, 'cos(src0)'); 700gen('Nsin', arith_float_types, 'sin(src0)'); 701 702gen('And', bit_types, 'src0 & src1') 703gen('Or', bit_types, 'src0 \| src1') 704gen('Xor', bit_types, 'src0 ^ src1') 705	1#! /usr/bin/python 2 3# 4# Copyright (c) 2015 Advanced Micro Devices, Inc. 5# All rights reserved. 6# 7# For use for simulation and test purposes only 8# 9# Redistribution and use in source and binary forms, with or without 10# modification, are permitted provided that the following conditions are met: 11# 12# 1. Redistributions of source code must retain the above copyright notice, 13# this list of conditions and the following disclaimer. 14# 15# 2. Redistributions in binary form must reproduce the above copyright notice, 16# this list of conditions and the following disclaimer in the documentation 17# and/or other materials provided with the distribution. 18# 19# 3. Neither the name of the copyright holder nor the names of its contributors 20# may be used to endorse or promote products derived from this software 21# without specific prior written permission. 22# 23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33# POSSIBILITY OF SUCH DAMAGE. 34# 35# Author: Steve Reinhardt 36# 37 38from __future__ import print_function 39 40import sys, re 41 42from m5.util import code_formatter 43 44if len(sys.argv) != 4: 45 print("Error: need 3 args (file names)") 46 sys.exit(0) 47 48header_code = code_formatter() 49decoder_code = code_formatter() 50exec_code = code_formatter() 51 52############### 53# 54# Generate file prologs (includes etc.) 55# 56############### 57 58header_code(''' 59#include "arch/hsail/insts/decl.hh" 60#include "base/bitfield.hh" 61#include "gpu-compute/hsail_code.hh" 62#include "gpu-compute/wavefront.hh" 63 64namespace HsailISA 65{ 66''') 67header_code.indent() 68 69decoder_code(''' 70#include "arch/hsail/gpu_decoder.hh" 71#include "arch/hsail/insts/branch.hh" 72#include "arch/hsail/insts/decl.hh" 73#include "arch/hsail/insts/gen_decl.hh" 74#include "arch/hsail/insts/mem.hh" 75#include "arch/hsail/insts/mem_impl.hh" 76#include "gpu-compute/brig_object.hh" 77 78namespace HsailISA 79{ 80 std::vector<GPUStaticInst> Decoder::decodedInsts; 81 82 GPUStaticInst 83 Decoder::decode(MachInst machInst) 84 { 85 using namespace Brig; 86 87 const BrigInstBase ib = machInst.brigInstBase; 88 const BrigObject obj = machInst.brigObj; 89 90 switch(ib->opcode) { 91''') 92decoder_code.indent() 93decoder_code.indent() 94 95exec_code(''' 96#include "arch/hsail/insts/gen_decl.hh" 97#include "base/intmath.hh" 98 99namespace HsailISA 100{ 101''') 102exec_code.indent() 103 104############### 105# 106# Define code templates for class declarations (for header file) 107# 108############### 109 110# Basic header template for an instruction stub. 111header_template_stub = ''' 112class $class_name : public $base_class 113{ 114 public: 115 typedef $base_class Base; 116 117 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 118 : Base(ib, obj, "$opcode") 119 { 120 } 121 122 void execute(GPUDynInstPtr gpuDynInst); 123}; 124 125''' 126 127# Basic header template for an instruction with no template parameters. 128header_template_nodt = ''' 129class $class_name : public $base_class 130{ 131 public: 132 typedef $base_class Base; 133 134 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 135 : Base(ib, obj, "$opcode") 136 { 137 } 138 139 void execute(GPUDynInstPtr gpuDynInst); 140}; 141 142''' 143 144# Basic header template for an instruction with a single DataType 145# template parameter. 146header_template_1dt = ''' 147template<typename DataType> 148class $class_name : public $base_class<DataType> 149{ 150 public: 151 typedef $base_class<DataType> Base; 152 typedef typename DataType::CType CType; 153 154 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 155 : Base(ib, obj, "$opcode") 156 { 157 } 158 159 void execute(GPUDynInstPtr gpuDynInst); 160}; 161 162''' 163 164header_template_1dt_noexec = ''' 165template<typename DataType> 166class $class_name : public $base_class<DataType> 167{ 168 public: 169 typedef $base_class<DataType> Base; 170 typedef typename DataType::CType CType; 171 172 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 173 : Base(ib, obj, "$opcode") 174 { 175 } 176}; 177 178''' 179 180# Same as header_template_1dt, except the base class has a second 181# template parameter NumSrcOperands to allow a variable number of 182# source operands. Note that since this is implemented with an array, 183# it only works for instructions where all sources are of the same 184# type (like most arithmetics). 185header_template_1dt_varsrcs = ''' 186template<typename DataType> 187class $class_name : public $base_class<DataType, $num_srcs> 188{ 189 public: 190 typedef $base_class<DataType, $num_srcs> Base; 191 typedef typename DataType::CType CType; 192 193 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 194 : Base(ib, obj, "$opcode") 195 { 196 } 197 198 void execute(GPUDynInstPtr gpuDynInst); 199}; 200 201''' 202 203# Header template for instruction with two DataType template 204# parameters, one for the dest and one for the source. This is used 205# by compare and convert. 206header_template_2dt = ''' 207template<typename DestDataType, class SrcDataType> 208class $class_name : public $base_class<DestDataType, SrcDataType> 209{ 210 public: 211 typedef $base_class<DestDataType, SrcDataType> Base; 212 typedef typename DestDataType::CType DestCType; 213 typedef typename SrcDataType::CType SrcCType; 214 215 $class_name(const Brig::BrigInstBase ib, const BrigObject obj) 216 : Base(ib, obj, "$opcode") 217 { 218 } 219 220 void execute(GPUDynInstPtr gpuDynInst); 221}; 222 223''' 224 225header_templates = { 226 'ArithInst': header_template_1dt_varsrcs, 227 'CmovInst': header_template_1dt, 228 'ClassInst': header_template_1dt, 229 'ShiftInst': header_template_1dt, 230 'ExtractInsertInst': header_template_1dt, 231 'CmpInst': header_template_2dt, 232 'CvtInst': header_template_2dt, 233 'PopcountInst': header_template_2dt, 234 'LdInst': '', 235 'StInst': '', 236 'SpecialInstNoSrc': header_template_nodt, 237 'SpecialInst1Src': header_template_nodt, 238 'SpecialInstNoSrcNoDest': '', 239 'Stub': header_template_stub, 240} 241 242############### 243# 244# Define code templates for exec functions 245# 246############### 247 248# exec function body 249exec_template_stub = ''' 250void 251$class_name::execute(GPUDynInstPtr gpuDynInst) 252{ 253 fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble()); 254} 255 256''' 257exec_template_nodt_nosrc = ''' 258void 259$class_name::execute(GPUDynInstPtr gpuDynInst) 260{ 261 Wavefront w = gpuDynInst->wavefront(); 262* 263 typedef Base::DestCType DestCType; 264 265 const VectorMask &mask = w->getPred(); 266 267 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 268 if (mask[lane]) { 269 DestCType dest_val = $expr; 270 this->dest.set(w, lane, dest_val); 271 } 272 } 273} 274 275''' 276 277exec_template_nodt_1src = ''' 278void 279$class_name::execute(GPUDynInstPtr gpuDynInst) 280{ 281 Wavefront w = gpuDynInst->wavefront(); 282* 283 typedef Base::DestCType DestCType; 284 typedef Base::SrcCType SrcCType; 285 286 const VectorMask &mask = w->getPred(); 287 288 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 289 if (mask[lane]) { 290 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane); 291 DestCType dest_val = $expr; 292 293 this->dest.set(w, lane, dest_val); 294 } 295 } 296} 297 298''' 299 300exec_template_1dt_varsrcs = ''' 301template<typename DataType> 302void 303$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 304{ 305 Wavefront w = gpuDynInst->wavefront(); 306* 307 const VectorMask &mask = w->getPred(); 308 309 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 310 if (mask[lane]) { 311 CType dest_val; 312 if ($dest_is_src_flag) { 313 dest_val = this->dest.template get<CType>(w, lane); 314 } 315 316 CType src_val[$num_srcs]; 317 318 for (int i = 0; i < $num_srcs; ++i) { 319 src_val[i] = this->src[i].template get<CType>(w, lane); 320 } 321 322 dest_val = (CType)($expr); 323 324 this->dest.set(w, lane, dest_val); 325 } 326 } 327} 328 329''' 330 331exec_template_1dt_3srcs = ''' 332template<typename DataType> 333void 334$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 335{ 336 Wavefront w = gpuDynInst->wavefront(); 337* 338 typedef typename Base::Src0CType Src0T; 339 typedef typename Base::Src1CType Src1T; 340 typedef typename Base::Src2CType Src2T; 341 342 const VectorMask &mask = w->getPred(); 343 344 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 345 if (mask[lane]) { 346 CType dest_val; 347 348 if ($dest_is_src_flag) { 349 dest_val = this->dest.template get<CType>(w, lane); 350 } 351 352 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 353 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 354 Src2T src_val2 = this->src2.template get<Src2T>(w, lane); 355 356 dest_val = $expr; 357 358 this->dest.set(w, lane, dest_val); 359 } 360 } 361} 362 363''' 364 365exec_template_1dt_2src_1dest = ''' 366template<typename DataType> 367void 368$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 369{ 370 Wavefront w = gpuDynInst->wavefront(); 371* 372 typedef typename Base::DestCType DestT; 373 typedef CType Src0T; 374 typedef typename Base::Src1CType Src1T; 375 376 const VectorMask &mask = w->getPred(); 377 378 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 379 if (mask[lane]) { 380 DestT dest_val; 381 if ($dest_is_src_flag) { 382 dest_val = this->dest.template get<DestT>(w, lane); 383 } 384 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 385 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 386 387 dest_val = $expr; 388 389 this->dest.set(w, lane, dest_val); 390 } 391 } 392} 393 394''' 395 396exec_template_shift = ''' 397template<typename DataType> 398void 399$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 400{ 401 Wavefront w = gpuDynInst->wavefront(); 402* 403 const VectorMask &mask = w->getPred(); 404 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 405 if (mask[lane]) { 406 CType dest_val; 407 408 if ($dest_is_src_flag) { 409 dest_val = this->dest.template get<CType>(w, lane); 410 } 411 412 CType src_val0 = this->src0.template get<CType>(w, lane); 413 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane); 414 415 dest_val = $expr; 416 417 this->dest.set(w, lane, dest_val); 418 } 419 } 420} 421 422''' 423 424exec_template_2dt = ''' 425template<typename DestDataType, class SrcDataType> 426void 427$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst) 428{ 429 Wavefront w = gpuDynInst->wavefront(); 430* 431 const VectorMask &mask = w->getPred(); 432 433 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 434 if (mask[lane]) { 435 DestCType dest_val; 436 SrcCType src_val[$num_srcs]; 437 438 for (int i = 0; i < $num_srcs; ++i) { 439 src_val[i] = this->src[i].template get<SrcCType>(w, lane); 440 } 441 442 dest_val = $expr; 443 444 this->dest.set(w, lane, dest_val); 445 } 446 } 447} 448 449''' 450 451exec_templates = { 452 'ArithInst': exec_template_1dt_varsrcs, 453 'CmovInst': exec_template_1dt_3srcs, 454 'ExtractInsertInst': exec_template_1dt_3srcs, 455 'ClassInst': exec_template_1dt_2src_1dest, 456 'CmpInst': exec_template_2dt, 457 'CvtInst': exec_template_2dt, 458 'PopcountInst': exec_template_2dt, 459 'LdInst': '', 460 'StInst': '', 461 'SpecialInstNoSrc': exec_template_nodt_nosrc, 462 'SpecialInst1Src': exec_template_nodt_1src, 463 'SpecialInstNoSrcNoDest': '', 464 'Stub': exec_template_stub, 465} 466 467############### 468# 469# Define code templates for the decoder cases 470# 471############### 472 473# decode template for nodt-opcode case 474decode_nodt_template = ''' 475 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);''' 476 477decode_case_prolog_class_inst = ''' 478 case BRIG_OPCODE_$brig_opcode_upper: 479 { 480 //const BrigOperandBase baseOp = obj->getOperand(ib->operands[1]); 481* BrigType16_t type = ((BrigInstSourceType)ib)->sourceType; 482* //switch (baseOp->kind) { 483 // case BRIG_OPERAND_REG: 484 // type = ((const BrigOperandReg)baseOp)->type; 485* // break; 486 // case BRIG_OPERAND_IMMED: 487 // type = ((const BrigOperandImmed)baseOp)->type; 488* // break; 489 // default: 490 // fatal("CLASS unrecognized kind of operand %d\\n", 491 // baseOp->kind); 492 //} 493 switch (type) {''' 494 495# common prolog for 1dt- or 2dt-opcode case: switch on data type 496decode_case_prolog = ''' 497 case BRIG_OPCODE_$brig_opcode_upper: 498 { 499 switch (ib->type) {''' 500 501# single-level decode case entry (for 1dt opcodes) 502decode_case_entry = \ 503' case BRIG_TYPE_$type_name: return $constructor(ib, obj);' 504 505decode_store_prolog = \ 506' case BRIG_TYPE_$type_name: {' 507 508decode_store_case_epilog = ''' 509 }''' 510 511decode_store_case_entry = \ 512' return $constructor(ib, obj);' 513 514# common epilog for type switch 515decode_case_epilog = ''' 516 default: fatal("$brig_opcode_upper: unrecognized type %d\\n", 517 ib->type); 518 } 519 } 520 break;''' 521 522# Additional templates for nested decode on a second type field (for 523# compare and convert). These are used in place of the 524# decode_case_entry template to create a second-level switch on on the 525# second type field inside each case of the first-level type switch. 526# Because the name and location of the second type can vary, the Brig 527# instruction type must be provided in $brig_type, and the name of the 528# second type field must be provided in $type_field. 529decode_case2_prolog = ''' 530 case BRIG_TYPE_$type_name: 531 switch (((Brig$brig_type)ib)->$type2_field) {''' 532* 533decode_case2_entry = \ 534' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);' 535 536decode_case2_epilog = ''' 537 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n", 538 ((Brig$brig_type)ib)->$type2_field); 539* } 540 break;''' 541 542# Figure out how many source operands an expr needs by looking for the 543# highest-numbered srcN value referenced. Since sources are numbered 544# starting at 0, the return value is N+1. 545def num_src_operands(expr): 546 if expr.find('src2') != -1: 547 return 3 548 elif expr.find('src1') != -1: 549 return 2 550 elif expr.find('src0') != -1: 551 return 1 552 else: 553 return 0 554 555############### 556# 557# Define final code generation methods 558# 559# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for 560# generating actual instructions. 561# 562############### 563 564# Generate class declaration, exec function, and decode switch case 565# for an brig_opcode with a single-level type switch. The 'types' 566# parameter is a list or tuple of types for which the instruction 567# should be instantiated. 568def gen(brig_opcode, types=None, expr=None, base_class='ArithInst', 569 type2_info=None, constructor_prefix='new ', is_store=False): 570 brig_opcode_upper = brig_opcode.upper() 571 class_name = brig_opcode 572 opcode = class_name.lower() 573 574 if base_class == 'ArithInst': 575 # note that expr must be provided with ArithInst so we can 576 # derive num_srcs for the template 577 assert expr 578 579 if expr: 580 # Derive several bits of info from expr. If expr is not used, 581 # this info will be irrelevant. 582 num_srcs = num_src_operands(expr) 583 # if the RHS expression includes 'dest', then we're doing an RMW 584 # on the reg and we need to treat it like a source 585 dest_is_src = expr.find('dest') != -1 586 dest_is_src_flag = str(dest_is_src).lower() # for C++ 587 if base_class in ['ShiftInst']: 588 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 589 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']: 590 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr) 591 else: 592 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 593 expr = re.sub(r'\bdest\b', r'dest_val', expr) 594 595 # Strip template arguments off of base class before looking up 596 # appropriate templates 597 base_class_base = re.sub(r'<.>$', '', base_class) 598* header_code(header_templates[base_class_base]) 599 600 if base_class.startswith('SpecialInst') or base_class.startswith('Stub'): 601 exec_code(exec_templates[base_class_base]) 602 elif base_class.startswith('ShiftInst'): 603 header_code(exec_template_shift) 604 else: 605 header_code(exec_templates[base_class_base]) 606 607 if not types or isinstance(types, str): 608 # Just a single type 609 constructor = constructor_prefix + class_name 610 decoder_code(decode_nodt_template) 611 else: 612 # multiple types, need at least one level of decode 613 if brig_opcode == 'Class': 614 decoder_code(decode_case_prolog_class_inst) 615 else: 616 decoder_code(decode_case_prolog) 617 if not type2_info: 618 if not is_store: 619 # single list of types, to basic one-level decode 620 for type_name in types: 621 full_class_name = '%s<%s>' % (class_name, type_name.upper()) 622 constructor = constructor_prefix + full_class_name 623 decoder_code(decode_case_entry) 624 else: 625 # single list of types, to basic one-level decode 626 for type_name in types: 627 decoder_code(decode_store_prolog) 628 type_size = int(re.findall(r'[0-9]+', type_name)[0]) 629 src_size = 32 630 type_type = type_name[0] 631 full_class_name = '%s<%s,%s>' % (class_name, \ 632 type_name.upper(), \ 633 '%s%d' % \ 634 (type_type.upper(), \ 635 type_size)) 636 constructor = constructor_prefix + full_class_name 637 decoder_code(decode_store_case_entry) 638 decoder_code(decode_store_case_epilog) 639 else: 640 # need secondary type switch (convert, compare) 641 # unpack extra info on second switch 642 (type2_field, types2) = type2_info 643 brig_type = 'Inst%s' % brig_opcode 644 for type_name in types: 645 decoder_code(decode_case2_prolog) 646 fmt = '%s<%s,%%s>' % (class_name, type_name.upper()) 647 for type2_name in types2: 648 full_class_name = fmt % type2_name.upper() 649 constructor = constructor_prefix + full_class_name 650 decoder_code(decode_case2_entry) 651 652 decoder_code(decode_case2_epilog) 653 654 decoder_code(decode_case_epilog) 655 656############### 657# 658# Generate instructions 659# 660############### 661 662# handy abbreviations for common sets of types 663 664# arithmetic ops are typically defined only on 32- and 64-bit sizes 665arith_int_types = ('S32', 'U32', 'S64', 'U64') 666arith_float_types = ('F32', 'F64') 667arith_types = arith_int_types + arith_float_types 668 669bit_types = ('B1', 'B32', 'B64') 670 671all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types 672 673# I think you might be able to do 'f16' memory ops too, but we'll 674# ignore them for now. 675mem_types = all_int_types + arith_float_types 676mem_atom_types = all_int_types + ('B32', 'B64') 677 678##### Arithmetic & logical operations 679gen('Add', arith_types, 'src0 + src1') 680gen('Sub', arith_types, 'src0 - src1') 681gen('Mul', arith_types, 'src0 * src1') 682gen('Div', arith_types, 'src0 / src1') 683gen('Min', arith_types, 'std::min(src0, src1)') 684gen('Max', arith_types, 'std::max(src0, src1)') 685gen('Gcnmin', arith_types, 'std::min(src0, src1)') 686 687gen('CopySign', arith_float_types, 688 'src1 < 0 ? -std::abs(src0) : std::abs(src0)') 689gen('Sqrt', arith_float_types, 'sqrt(src0)') 690gen('Floor', arith_float_types, 'floor(src0)') 691 692# "fast" sqrt... same as slow for us 693gen('Nsqrt', arith_float_types, 'sqrt(src0)') 694gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)') 695gen('Nrcp', arith_float_types, '1.0/src0') 696gen('Fract', arith_float_types, 697 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)') 698 699gen('Ncos', arith_float_types, 'cos(src0)'); 700gen('Nsin', arith_float_types, 'sin(src0)'); 701 702gen('And', bit_types, 'src0 & src1') 703gen('Or', bit_types, 'src0 \| src1') 704gen('Xor', bit_types, 'src0 ^ src1') 705
706gen('Bitselect', bit_types, '(src1 & src0) \| (src2 & ~src0)')	706gen('Bitselect', bit_types, '(src1 & src0) \| (src2 & ~(uint64_t)src0)')
707gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \ 708 ('sourceType', ('B32', 'B64'))) 709 710gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst') 711gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst') 712 713# gen('Mul_hi', types=('s32','u32', '??')) 714# gen('Mul24', types=('s32','u32', '??')) 715gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)') 716 717gen('Abs', arith_types, 'std::abs(src0)') 718gen('Neg', arith_types, '-src0') 719 720gen('Mov', bit_types + arith_types, 'src0') 721gen('Not', bit_types, 'heynot(src0)') 722 723# mad and fma differ only in rounding behavior, which we don't emulate 724# also there's an integer form of mad, but not of fma 725gen('Mad', arith_types, 'src0 * src1 + src2') 726gen('Fma', arith_float_types, 'src0 * src1 + src2') 727 728#native floating point operations 729gen('Nfma', arith_float_types, 'src0 * src1 + src2') 730 731gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst') 732gen('BitAlign', bit_types, '(src0 << src2)\|(src1 >> (32 - src2))') 733gen('ByteAlign', bit_types, '(src0 << 8 * src2)\|(src1 >> (32 - 8 * src2))') 734 735# see base/bitfield.hh 736gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)', 737 'ExtractInsertInst') 738 739gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)', 740 'ExtractInsertInst') 741 742##### Compare 743gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)', 744 'CmpInst', ('sourceType', arith_types + bit_types)) 745gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst') 746 747##### Conversion 748 749# Conversion operations are only defined on B1, not B32 or B64 750cvt_types = ('B1',) + mem_types 751 752gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types)) 753 754 755##### Load & Store 756gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode') 757gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode') 758gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode', 759 is_store=True) 760gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode') 761gen('AtomicNoRet', mem_atom_types, base_class='StInst', 762 constructor_prefix='decode') 763 764gen('Cbr', base_class = 'LdInst', constructor_prefix='decode') 765gen('Br', base_class = 'LdInst', constructor_prefix='decode') 766 767##### Special operations 768def gen_special(brig_opcode, expr, dest_type='U32'): 769 num_srcs = num_src_operands(expr) 770 if num_srcs == 0: 771 base_class = 'SpecialInstNoSrc<%s>' % dest_type 772 elif num_srcs == 1: 773 base_class = 'SpecialInst1Src<%s>' % dest_type 774 else: 775 assert false 776 777 gen(brig_opcode, None, expr, base_class) 778 779gen_special('WorkItemId', 'w->workItemId[src0][lane]') 780gen_special('WorkItemAbsId', 781 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])') 782gen_special('WorkGroupId', 'w->workGroupId[src0]') 783gen_special('WorkGroupSize', 'w->workGroupSz[src0]') 784gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]') 785gen_special('GridSize', 'w->gridSz[src0]') 786gen_special('GridGroups', 787 'divCeil(w->gridSz[src0],w->workGroupSz[src0])') 788gen_special('LaneId', 'lane') 789gen_special('WaveId', 'w->wfId') 790gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') 791 792# gen_special('CU'', ') 793 794gen('Ret', base_class='SpecialInstNoSrcNoDest') 795gen('Barrier', base_class='SpecialInstNoSrcNoDest') 796gen('MemFence', base_class='SpecialInstNoSrcNoDest') 797 798# Map magic instructions to the BrigSyscall opcode 799# Magic instructions are defined in magic.hh 800# 801# In the future, real HSA kernel system calls can be implemented and coexist 802# with magic instructions. 803gen('Call', base_class='SpecialInstNoSrcNoDest') 804 805# Stubs for unimplemented instructions: 806# These may need to be implemented at some point in the future, but 807# for now we just match the instructions with their operands. 808# 809# By defining stubs for these instructions, we can work with 810# applications that have them in dead/unused code paths. 811# 812# Needed for rocm-hcc compilations for HSA backends since 813# builtins-hsail library is `cat`d onto the generated kernels. 814# The builtins-hsail library consists of handcoded hsail functions 815# that __might__ be needed by the rocm-hcc compiler in certain binaries. 816gen('Bitmask', base_class='Stub') 817gen('Bitrev', base_class='Stub') 818gen('Firstbit', base_class='Stub') 819gen('Lastbit', base_class='Stub') 820gen('Unpacklo', base_class='Stub') 821gen('Unpackhi', base_class='Stub') 822gen('Pack', base_class='Stub') 823gen('Unpack', base_class='Stub') 824gen('Lerp', base_class='Stub') 825gen('Packcvt', base_class='Stub') 826gen('Unpackcvt', base_class='Stub') 827gen('Sad', base_class='Stub') 828gen('Sadhi', base_class='Stub') 829gen('Activelanecount', base_class='Stub') 830gen('Activelaneid', base_class='Stub') 831gen('Activelanemask', base_class='Stub') 832gen('Activelanepermute', base_class='Stub') 833gen('Groupbaseptr', base_class='Stub') 834gen('Signalnoret', base_class='Stub') 835 836############### 837# 838# Generate file epilogs 839# 840############### 841header_code(''' 842template<> 843inline void 844Abs<U32>::execute(GPUDynInstPtr gpuDynInst) 845{ 846 Wavefront w = gpuDynInst->wavefront(); 847* 848 const VectorMask &mask = w->getPred(); 849 850 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 851 if (mask[lane]) { 852 CType dest_val; 853 CType src_val; 854 855 src_val = this->src[0].template get<CType>(w, lane); 856 857 dest_val = (CType)(src_val); 858 859 this->dest.set(w, lane, dest_val); 860 } 861 } 862} 863 864template<> 865inline void 866Abs<U64>::execute(GPUDynInstPtr gpuDynInst) 867{ 868 Wavefront w = gpuDynInst->wavefront(); 869* 870 const VectorMask &mask = w->getPred(); 871 872 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 873 if (mask[lane]) { 874 CType dest_val; 875 CType src_val; 876 877 src_val = this->src[0].template get<CType>(w, lane); 878 879 dest_val = (CType)(src_val); 880 881 this->dest.set(w, lane, dest_val); 882 } 883 } 884} 885''') 886 887header_code.dedent() 888header_code(''' 889} // namespace HsailISA 890''') 891 892# close off main decode switch 893decoder_code.dedent() 894decoder_code.dedent() 895decoder_code(''' 896 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode); 897 } // end switch(ib->opcode) 898 } // end decode() 899} // namespace HsailISA 900''') 901 902exec_code.dedent() 903exec_code(''' 904} // namespace HsailISA 905''') 906 907############### 908# 909# Output accumulated code to files 910# 911############### 912header_code.write(sys.argv[1]) 913decoder_code.write(sys.argv[2]) 914exec_code.write(sys.argv[3])	707gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \ 708 ('sourceType', ('B32', 'B64'))) 709 710gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst') 711gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst') 712 713# gen('Mul_hi', types=('s32','u32', '??')) 714# gen('Mul24', types=('s32','u32', '??')) 715gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)') 716 717gen('Abs', arith_types, 'std::abs(src0)') 718gen('Neg', arith_types, '-src0') 719 720gen('Mov', bit_types + arith_types, 'src0') 721gen('Not', bit_types, 'heynot(src0)') 722 723# mad and fma differ only in rounding behavior, which we don't emulate 724# also there's an integer form of mad, but not of fma 725gen('Mad', arith_types, 'src0 * src1 + src2') 726gen('Fma', arith_float_types, 'src0 * src1 + src2') 727 728#native floating point operations 729gen('Nfma', arith_float_types, 'src0 * src1 + src2') 730 731gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst') 732gen('BitAlign', bit_types, '(src0 << src2)\|(src1 >> (32 - src2))') 733gen('ByteAlign', bit_types, '(src0 << 8 * src2)\|(src1 >> (32 - 8 * src2))') 734 735# see base/bitfield.hh 736gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)', 737 'ExtractInsertInst') 738 739gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)', 740 'ExtractInsertInst') 741 742##### Compare 743gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)', 744 'CmpInst', ('sourceType', arith_types + bit_types)) 745gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst') 746 747##### Conversion 748 749# Conversion operations are only defined on B1, not B32 or B64 750cvt_types = ('B1',) + mem_types 751 752gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types)) 753 754 755##### Load & Store 756gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode') 757gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode') 758gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode', 759 is_store=True) 760gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode') 761gen('AtomicNoRet', mem_atom_types, base_class='StInst', 762 constructor_prefix='decode') 763 764gen('Cbr', base_class = 'LdInst', constructor_prefix='decode') 765gen('Br', base_class = 'LdInst', constructor_prefix='decode') 766 767##### Special operations 768def gen_special(brig_opcode, expr, dest_type='U32'): 769 num_srcs = num_src_operands(expr) 770 if num_srcs == 0: 771 base_class = 'SpecialInstNoSrc<%s>' % dest_type 772 elif num_srcs == 1: 773 base_class = 'SpecialInst1Src<%s>' % dest_type 774 else: 775 assert false 776 777 gen(brig_opcode, None, expr, base_class) 778 779gen_special('WorkItemId', 'w->workItemId[src0][lane]') 780gen_special('WorkItemAbsId', 781 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])') 782gen_special('WorkGroupId', 'w->workGroupId[src0]') 783gen_special('WorkGroupSize', 'w->workGroupSz[src0]') 784gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]') 785gen_special('GridSize', 'w->gridSz[src0]') 786gen_special('GridGroups', 787 'divCeil(w->gridSz[src0],w->workGroupSz[src0])') 788gen_special('LaneId', 'lane') 789gen_special('WaveId', 'w->wfId') 790gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') 791 792# gen_special('CU'', ') 793 794gen('Ret', base_class='SpecialInstNoSrcNoDest') 795gen('Barrier', base_class='SpecialInstNoSrcNoDest') 796gen('MemFence', base_class='SpecialInstNoSrcNoDest') 797 798# Map magic instructions to the BrigSyscall opcode 799# Magic instructions are defined in magic.hh 800# 801# In the future, real HSA kernel system calls can be implemented and coexist 802# with magic instructions. 803gen('Call', base_class='SpecialInstNoSrcNoDest') 804 805# Stubs for unimplemented instructions: 806# These may need to be implemented at some point in the future, but 807# for now we just match the instructions with their operands. 808# 809# By defining stubs for these instructions, we can work with 810# applications that have them in dead/unused code paths. 811# 812# Needed for rocm-hcc compilations for HSA backends since 813# builtins-hsail library is `cat`d onto the generated kernels. 814# The builtins-hsail library consists of handcoded hsail functions 815# that __might__ be needed by the rocm-hcc compiler in certain binaries. 816gen('Bitmask', base_class='Stub') 817gen('Bitrev', base_class='Stub') 818gen('Firstbit', base_class='Stub') 819gen('Lastbit', base_class='Stub') 820gen('Unpacklo', base_class='Stub') 821gen('Unpackhi', base_class='Stub') 822gen('Pack', base_class='Stub') 823gen('Unpack', base_class='Stub') 824gen('Lerp', base_class='Stub') 825gen('Packcvt', base_class='Stub') 826gen('Unpackcvt', base_class='Stub') 827gen('Sad', base_class='Stub') 828gen('Sadhi', base_class='Stub') 829gen('Activelanecount', base_class='Stub') 830gen('Activelaneid', base_class='Stub') 831gen('Activelanemask', base_class='Stub') 832gen('Activelanepermute', base_class='Stub') 833gen('Groupbaseptr', base_class='Stub') 834gen('Signalnoret', base_class='Stub') 835 836############### 837# 838# Generate file epilogs 839# 840############### 841header_code(''' 842template<> 843inline void 844Abs<U32>::execute(GPUDynInstPtr gpuDynInst) 845{ 846 Wavefront w = gpuDynInst->wavefront(); 847* 848 const VectorMask &mask = w->getPred(); 849 850 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 851 if (mask[lane]) { 852 CType dest_val; 853 CType src_val; 854 855 src_val = this->src[0].template get<CType>(w, lane); 856 857 dest_val = (CType)(src_val); 858 859 this->dest.set(w, lane, dest_val); 860 } 861 } 862} 863 864template<> 865inline void 866Abs<U64>::execute(GPUDynInstPtr gpuDynInst) 867{ 868 Wavefront w = gpuDynInst->wavefront(); 869* 870 const VectorMask &mask = w->getPred(); 871 872 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 873 if (mask[lane]) { 874 CType dest_val; 875 CType src_val; 876 877 src_val = this->src[0].template get<CType>(w, lane); 878 879 dest_val = (CType)(src_val); 880 881 this->dest.set(w, lane, dest_val); 882 } 883 } 884} 885''') 886 887header_code.dedent() 888header_code(''' 889} // namespace HsailISA 890''') 891 892# close off main decode switch 893decoder_code.dedent() 894decoder_code.dedent() 895decoder_code(''' 896 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode); 897 } // end switch(ib->opcode) 898 } // end decode() 899} // namespace HsailISA 900''') 901 902exec_code.dedent() 903exec_code(''' 904} // namespace HsailISA 905''') 906 907############### 908# 909# Output accumulated code to files 910# 911############### 912header_code.write(sys.argv[1]) 913decoder_code.write(sys.argv[2]) 914exec_code.write(sys.argv[3])