gen.py revision 11737:50eceddc2286
1#! /usr/bin/python 2 3# 4# Copyright (c) 2015 Advanced Micro Devices, Inc. 5# All rights reserved. 6# 7# For use for simulation and test purposes only 8# 9# Redistribution and use in source and binary forms, with or without 10# modification, are permitted provided that the following conditions are met: 11# 12# 1. Redistributions of source code must retain the above copyright notice, 13# this list of conditions and the following disclaimer. 14# 15# 2. Redistributions in binary form must reproduce the above copyright notice, 16# this list of conditions and the following disclaimer in the documentation 17# and/or other materials provided with the distribution. 18# 19# 3. Neither the name of the copyright holder nor the names of its contributors 20# may be used to endorse or promote products derived from this software 21# without specific prior written permission. 22# 23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33# POSSIBILITY OF SUCH DAMAGE. 34# 35# Author: Steve Reinhardt 36# 37 38import sys, re 39 40from m5.util import code_formatter 41 42if len(sys.argv) != 4: 43 print "Error: need 3 args (file names)" 44 sys.exit(0) 45 46header_code = code_formatter() 47decoder_code = code_formatter() 48exec_code = code_formatter() 49 50############### 51# 52# Generate file prologs (includes etc.) 53# 54############### 55 56header_code(''' 57#include "arch/hsail/insts/decl.hh" 58#include "base/bitfield.hh" 59#include "gpu-compute/hsail_code.hh" 60#include "gpu-compute/wavefront.hh" 61 62namespace HsailISA 63{ 64''') 65header_code.indent() 66 67decoder_code(''' 68#include "arch/hsail/gpu_decoder.hh" 69#include "arch/hsail/insts/branch.hh" 70#include "arch/hsail/insts/decl.hh" 71#include "arch/hsail/insts/gen_decl.hh" 72#include "arch/hsail/insts/mem.hh" 73#include "arch/hsail/insts/mem_impl.hh" 74#include "gpu-compute/brig_object.hh" 75 76namespace HsailISA 77{ 78 std::vector<GPUStaticInst*> Decoder::decodedInsts; 79 80 GPUStaticInst* 81 Decoder::decode(MachInst machInst) 82 { 83 using namespace Brig; 84 85 const BrigInstBase *ib = machInst.brigInstBase; 86 const BrigObject *obj = machInst.brigObj; 87 88 switch(ib->opcode) { 89''') 90decoder_code.indent() 91decoder_code.indent() 92 93exec_code(''' 94#include "arch/hsail/insts/gen_decl.hh" 95#include "base/intmath.hh" 96 97namespace HsailISA 98{ 99''') 100exec_code.indent() 101 102############### 103# 104# Define code templates for class declarations (for header file) 105# 106############### 107 108# Basic header template for an instruction with no template parameters. 109header_template_nodt = ''' 110class $class_name : public $base_class 111{ 112 public: 113 typedef $base_class Base; 114 115 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) 116 : Base(ib, obj, "$opcode") 117 { 118 } 119 120 void execute(GPUDynInstPtr gpuDynInst); 121}; 122 123''' 124 125# Basic header template for an instruction with a single DataType 126# template parameter. 127header_template_1dt = ''' 128template<typename DataType> 129class $class_name : public $base_class<DataType> 130{ 131 public: 132 typedef $base_class<DataType> Base; 133 typedef typename DataType::CType CType; 134 135 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) 136 : Base(ib, obj, "$opcode") 137 { 138 } 139 140 void execute(GPUDynInstPtr gpuDynInst); 141}; 142 143''' 144 145header_template_1dt_noexec = ''' 146template<typename DataType> 147class $class_name : public $base_class<DataType> 148{ 149 public: 150 typedef $base_class<DataType> Base; 151 typedef typename DataType::CType CType; 152 153 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) 154 : Base(ib, obj, "$opcode") 155 { 156 } 157}; 158 159''' 160 161# Same as header_template_1dt, except the base class has a second 162# template parameter NumSrcOperands to allow a variable number of 163# source operands. Note that since this is implemented with an array, 164# it only works for instructions where all sources are of the same 165# type (like most arithmetics). 166header_template_1dt_varsrcs = ''' 167template<typename DataType> 168class $class_name : public $base_class<DataType, $num_srcs> 169{ 170 public: 171 typedef $base_class<DataType, $num_srcs> Base; 172 typedef typename DataType::CType CType; 173 174 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) 175 : Base(ib, obj, "$opcode") 176 { 177 } 178 179 void execute(GPUDynInstPtr gpuDynInst); 180}; 181 182''' 183 184# Header template for instruction with two DataType template 185# parameters, one for the dest and one for the source. This is used 186# by compare and convert. 187header_template_2dt = ''' 188template<typename DestDataType, class SrcDataType> 189class $class_name : public $base_class<DestDataType, SrcDataType> 190{ 191 public: 192 typedef $base_class<DestDataType, SrcDataType> Base; 193 typedef typename DestDataType::CType DestCType; 194 typedef typename SrcDataType::CType SrcCType; 195 196 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) 197 : Base(ib, obj, "$opcode") 198 { 199 } 200 201 void execute(GPUDynInstPtr gpuDynInst); 202}; 203 204''' 205 206header_templates = { 207 'ArithInst': header_template_1dt_varsrcs, 208 'CmovInst': header_template_1dt, 209 'ClassInst': header_template_1dt, 210 'ShiftInst': header_template_1dt, 211 'ExtractInsertInst': header_template_1dt, 212 'CmpInst': header_template_2dt, 213 'CvtInst': header_template_2dt, 214 'PopcountInst': header_template_2dt, 215 'LdInst': '', 216 'StInst': '', 217 'SpecialInstNoSrc': header_template_nodt, 218 'SpecialInst1Src': header_template_nodt, 219 'SpecialInstNoSrcNoDest': '', 220} 221 222############### 223# 224# Define code templates for exec functions 225# 226############### 227 228# exec function body 229exec_template_nodt_nosrc = ''' 230void 231$class_name::execute(GPUDynInstPtr gpuDynInst) 232{ 233 Wavefront *w = gpuDynInst->wavefront(); 234 235 typedef Base::DestCType DestCType; 236 237 const VectorMask &mask = w->getPred(); 238 239 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 240 if (mask[lane]) { 241 DestCType dest_val = $expr; 242 this->dest.set(w, lane, dest_val); 243 } 244 } 245} 246 247''' 248 249exec_template_nodt_1src = ''' 250void 251$class_name::execute(GPUDynInstPtr gpuDynInst) 252{ 253 Wavefront *w = gpuDynInst->wavefront(); 254 255 typedef Base::DestCType DestCType; 256 typedef Base::SrcCType SrcCType; 257 258 const VectorMask &mask = w->getPred(); 259 260 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 261 if (mask[lane]) { 262 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane); 263 DestCType dest_val = $expr; 264 265 this->dest.set(w, lane, dest_val); 266 } 267 } 268} 269 270''' 271 272exec_template_1dt_varsrcs = ''' 273template<typename DataType> 274void 275$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 276{ 277 Wavefront *w = gpuDynInst->wavefront(); 278 279 const VectorMask &mask = w->getPred(); 280 281 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 282 if (mask[lane]) { 283 CType dest_val; 284 if ($dest_is_src_flag) { 285 dest_val = this->dest.template get<CType>(w, lane); 286 } 287 288 CType src_val[$num_srcs]; 289 290 for (int i = 0; i < $num_srcs; ++i) { 291 src_val[i] = this->src[i].template get<CType>(w, lane); 292 } 293 294 dest_val = (CType)($expr); 295 296 this->dest.set(w, lane, dest_val); 297 } 298 } 299} 300 301''' 302 303exec_template_1dt_3srcs = ''' 304template<typename DataType> 305void 306$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 307{ 308 Wavefront *w = gpuDynInst->wavefront(); 309 310 typedef typename Base::Src0CType Src0T; 311 typedef typename Base::Src1CType Src1T; 312 typedef typename Base::Src2CType Src2T; 313 314 const VectorMask &mask = w->getPred(); 315 316 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 317 if (mask[lane]) { 318 CType dest_val; 319 320 if ($dest_is_src_flag) { 321 dest_val = this->dest.template get<CType>(w, lane); 322 } 323 324 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 325 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 326 Src2T src_val2 = this->src2.template get<Src2T>(w, lane); 327 328 dest_val = $expr; 329 330 this->dest.set(w, lane, dest_val); 331 } 332 } 333} 334 335''' 336 337exec_template_1dt_2src_1dest = ''' 338template<typename DataType> 339void 340$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 341{ 342 Wavefront *w = gpuDynInst->wavefront(); 343 344 typedef typename Base::DestCType DestT; 345 typedef CType Src0T; 346 typedef typename Base::Src1CType Src1T; 347 348 const VectorMask &mask = w->getPred(); 349 350 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 351 if (mask[lane]) { 352 DestT dest_val; 353 if ($dest_is_src_flag) { 354 dest_val = this->dest.template get<DestT>(w, lane); 355 } 356 Src0T src_val0 = this->src0.template get<Src0T>(w, lane); 357 Src1T src_val1 = this->src1.template get<Src1T>(w, lane); 358 359 dest_val = $expr; 360 361 this->dest.set(w, lane, dest_val); 362 } 363 } 364} 365 366''' 367 368exec_template_shift = ''' 369template<typename DataType> 370void 371$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) 372{ 373 Wavefront *w = gpuDynInst->wavefront(); 374 375 const VectorMask &mask = w->getPred(); 376 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 377 if (mask[lane]) { 378 CType dest_val; 379 380 if ($dest_is_src_flag) { 381 dest_val = this->dest.template get<CType>(w, lane); 382 } 383 384 CType src_val0 = this->src0.template get<CType>(w, lane); 385 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane); 386 387 dest_val = $expr; 388 389 this->dest.set(w, lane, dest_val); 390 } 391 } 392} 393 394''' 395 396exec_template_2dt = ''' 397template<typename DestDataType, class SrcDataType> 398void 399$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst) 400{ 401 Wavefront *w = gpuDynInst->wavefront(); 402 403 const VectorMask &mask = w->getPred(); 404 405 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 406 if (mask[lane]) { 407 DestCType dest_val; 408 SrcCType src_val[$num_srcs]; 409 410 for (int i = 0; i < $num_srcs; ++i) { 411 src_val[i] = this->src[i].template get<SrcCType>(w, lane); 412 } 413 414 dest_val = $expr; 415 416 this->dest.set(w, lane, dest_val); 417 } 418 } 419} 420 421''' 422 423exec_templates = { 424 'ArithInst': exec_template_1dt_varsrcs, 425 'CmovInst': exec_template_1dt_3srcs, 426 'ExtractInsertInst': exec_template_1dt_3srcs, 427 'ClassInst': exec_template_1dt_2src_1dest, 428 'CmpInst': exec_template_2dt, 429 'CvtInst': exec_template_2dt, 430 'PopcountInst': exec_template_2dt, 431 'LdInst': '', 432 'StInst': '', 433 'SpecialInstNoSrc': exec_template_nodt_nosrc, 434 'SpecialInst1Src': exec_template_nodt_1src, 435 'SpecialInstNoSrcNoDest': '', 436} 437 438############### 439# 440# Define code templates for the decoder cases 441# 442############### 443 444# decode template for nodt-opcode case 445decode_nodt_template = ''' 446 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);''' 447 448decode_case_prolog_class_inst = ''' 449 case BRIG_OPCODE_$brig_opcode_upper: 450 { 451 //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]); 452 BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType; 453 //switch (baseOp->kind) { 454 // case BRIG_OPERAND_REG: 455 // type = ((const BrigOperandReg*)baseOp)->type; 456 // break; 457 // case BRIG_OPERAND_IMMED: 458 // type = ((const BrigOperandImmed*)baseOp)->type; 459 // break; 460 // default: 461 // fatal("CLASS unrecognized kind of operand %d\\n", 462 // baseOp->kind); 463 //} 464 switch (type) {''' 465 466# common prolog for 1dt- or 2dt-opcode case: switch on data type 467decode_case_prolog = ''' 468 case BRIG_OPCODE_$brig_opcode_upper: 469 { 470 switch (ib->type) {''' 471 472# single-level decode case entry (for 1dt opcodes) 473decode_case_entry = \ 474' case BRIG_TYPE_$type_name: return $constructor(ib, obj);' 475 476decode_store_prolog = \ 477' case BRIG_TYPE_$type_name: {' 478 479decode_store_case_epilog = ''' 480 }''' 481 482decode_store_case_entry = \ 483' return $constructor(ib, obj);' 484 485# common epilog for type switch 486decode_case_epilog = ''' 487 default: fatal("$brig_opcode_upper: unrecognized type %d\\n", 488 ib->type); 489 } 490 } 491 break;''' 492 493# Additional templates for nested decode on a second type field (for 494# compare and convert). These are used in place of the 495# decode_case_entry template to create a second-level switch on on the 496# second type field inside each case of the first-level type switch. 497# Because the name and location of the second type can vary, the Brig 498# instruction type must be provided in $brig_type, and the name of the 499# second type field must be provided in $type_field. 500decode_case2_prolog = ''' 501 case BRIG_TYPE_$type_name: 502 switch (((Brig$brig_type*)ib)->$type2_field) {''' 503 504decode_case2_entry = \ 505' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);' 506 507decode_case2_epilog = ''' 508 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n", 509 ((Brig$brig_type*)ib)->$type2_field); 510 } 511 break;''' 512 513# Figure out how many source operands an expr needs by looking for the 514# highest-numbered srcN value referenced. Since sources are numbered 515# starting at 0, the return value is N+1. 516def num_src_operands(expr): 517 if expr.find('src2') != -1: 518 return 3 519 elif expr.find('src1') != -1: 520 return 2 521 elif expr.find('src0') != -1: 522 return 1 523 else: 524 return 0 525 526############### 527# 528# Define final code generation methods 529# 530# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for 531# generating actual instructions. 532# 533############### 534 535# Generate class declaration, exec function, and decode switch case 536# for an brig_opcode with a single-level type switch. The 'types' 537# parameter is a list or tuple of types for which the instruction 538# should be instantiated. 539def gen(brig_opcode, types=None, expr=None, base_class='ArithInst', 540 type2_info=None, constructor_prefix='new ', is_store=False): 541 brig_opcode_upper = brig_opcode.upper() 542 class_name = brig_opcode 543 opcode = class_name.lower() 544 545 if base_class == 'ArithInst': 546 # note that expr must be provided with ArithInst so we can 547 # derive num_srcs for the template 548 assert expr 549 550 if expr: 551 # Derive several bits of info from expr. If expr is not used, 552 # this info will be irrelevant. 553 num_srcs = num_src_operands(expr) 554 # if the RHS expression includes 'dest', then we're doing an RMW 555 # on the reg and we need to treat it like a source 556 dest_is_src = expr.find('dest') != -1 557 dest_is_src_flag = str(dest_is_src).lower() # for C++ 558 if base_class in ['ShiftInst']: 559 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 560 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']: 561 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr) 562 else: 563 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) 564 expr = re.sub(r'\bdest\b', r'dest_val', expr) 565 566 # Strip template arguments off of base class before looking up 567 # appropriate templates 568 base_class_base = re.sub(r'<.*>$', '', base_class) 569 header_code(header_templates[base_class_base]) 570 571 if base_class.startswith('SpecialInst'): 572 exec_code(exec_templates[base_class_base]) 573 elif base_class.startswith('ShiftInst'): 574 header_code(exec_template_shift) 575 else: 576 header_code(exec_templates[base_class_base]) 577 578 if not types or isinstance(types, str): 579 # Just a single type 580 constructor = constructor_prefix + class_name 581 decoder_code(decode_nodt_template) 582 else: 583 # multiple types, need at least one level of decode 584 if brig_opcode == 'Class': 585 decoder_code(decode_case_prolog_class_inst) 586 else: 587 decoder_code(decode_case_prolog) 588 if not type2_info: 589 if not is_store: 590 # single list of types, to basic one-level decode 591 for type_name in types: 592 full_class_name = '%s<%s>' % (class_name, type_name.upper()) 593 constructor = constructor_prefix + full_class_name 594 decoder_code(decode_case_entry) 595 else: 596 # single list of types, to basic one-level decode 597 for type_name in types: 598 decoder_code(decode_store_prolog) 599 type_size = int(re.findall(r'[0-9]+', type_name)[0]) 600 src_size = 32 601 type_type = type_name[0] 602 full_class_name = '%s<%s,%s>' % (class_name, \ 603 type_name.upper(), \ 604 '%s%d' % \ 605 (type_type.upper(), \ 606 type_size)) 607 constructor = constructor_prefix + full_class_name 608 decoder_code(decode_store_case_entry) 609 decoder_code(decode_store_case_epilog) 610 else: 611 # need secondary type switch (convert, compare) 612 # unpack extra info on second switch 613 (type2_field, types2) = type2_info 614 brig_type = 'Inst%s' % brig_opcode 615 for type_name in types: 616 decoder_code(decode_case2_prolog) 617 fmt = '%s<%s,%%s>' % (class_name, type_name.upper()) 618 for type2_name in types2: 619 full_class_name = fmt % type2_name.upper() 620 constructor = constructor_prefix + full_class_name 621 decoder_code(decode_case2_entry) 622 623 decoder_code(decode_case2_epilog) 624 625 decoder_code(decode_case_epilog) 626 627############### 628# 629# Generate instructions 630# 631############### 632 633# handy abbreviations for common sets of types 634 635# arithmetic ops are typically defined only on 32- and 64-bit sizes 636arith_int_types = ('S32', 'U32', 'S64', 'U64') 637arith_float_types = ('F32', 'F64') 638arith_types = arith_int_types + arith_float_types 639 640bit_types = ('B1', 'B32', 'B64') 641 642all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types 643 644# I think you might be able to do 'f16' memory ops too, but we'll 645# ignore them for now. 646mem_types = all_int_types + arith_float_types 647mem_atom_types = all_int_types + ('B32', 'B64') 648 649##### Arithmetic & logical operations 650gen('Add', arith_types, 'src0 + src1') 651gen('Sub', arith_types, 'src0 - src1') 652gen('Mul', arith_types, 'src0 * src1') 653gen('Div', arith_types, 'src0 / src1') 654gen('Min', arith_types, 'std::min(src0, src1)') 655gen('Max', arith_types, 'std::max(src0, src1)') 656gen('Gcnmin', arith_types, 'std::min(src0, src1)') 657 658gen('CopySign', arith_float_types, 659 'src1 < 0 ? -std::abs(src0) : std::abs(src0)') 660gen('Sqrt', arith_float_types, 'sqrt(src0)') 661gen('Floor', arith_float_types, 'floor(src0)') 662 663# "fast" sqrt... same as slow for us 664gen('Nsqrt', arith_float_types, 'sqrt(src0)') 665gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)') 666gen('Nrcp', arith_float_types, '1.0/src0') 667gen('Fract', arith_float_types, 668 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)') 669 670gen('Ncos', arith_float_types, 'cos(src0)'); 671gen('Nsin', arith_float_types, 'sin(src0)'); 672 673gen('And', bit_types, 'src0 & src1') 674gen('Or', bit_types, 'src0 | src1') 675gen('Xor', bit_types, 'src0 ^ src1') 676 677gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~src0)') 678gen('Firstbit',bit_types, 'firstbit(src0)') 679gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \ 680 ('sourceType', ('B32', 'B64'))) 681 682gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst') 683gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst') 684 685# gen('Mul_hi', types=('s32','u32', '??')) 686# gen('Mul24', types=('s32','u32', '??')) 687gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)') 688 689gen('Abs', arith_types, 'std::abs(src0)') 690gen('Neg', arith_types, '-src0') 691 692gen('Mov', bit_types + arith_types, 'src0') 693gen('Not', bit_types, 'heynot(src0)') 694 695# mad and fma differ only in rounding behavior, which we don't emulate 696# also there's an integer form of mad, but not of fma 697gen('Mad', arith_types, 'src0 * src1 + src2') 698gen('Fma', arith_float_types, 'src0 * src1 + src2') 699 700#native floating point operations 701gen('Nfma', arith_float_types, 'src0 * src1 + src2') 702 703gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst') 704gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))') 705gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))') 706 707# see base/bitfield.hh 708gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)', 709 'ExtractInsertInst') 710 711gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)', 712 'ExtractInsertInst') 713 714##### Compare 715gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)', 716 'CmpInst', ('sourceType', arith_types + bit_types)) 717gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst') 718 719##### Conversion 720 721# Conversion operations are only defined on B1, not B32 or B64 722cvt_types = ('B1',) + mem_types 723 724gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types)) 725 726 727##### Load & Store 728gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode') 729gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode') 730gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode', 731 is_store=True) 732gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode') 733gen('AtomicNoRet', mem_atom_types, base_class='StInst', 734 constructor_prefix='decode') 735 736gen('Cbr', base_class = 'LdInst', constructor_prefix='decode') 737gen('Br', base_class = 'LdInst', constructor_prefix='decode') 738 739##### Special operations 740def gen_special(brig_opcode, expr, dest_type='U32'): 741 num_srcs = num_src_operands(expr) 742 if num_srcs == 0: 743 base_class = 'SpecialInstNoSrc<%s>' % dest_type 744 elif num_srcs == 1: 745 base_class = 'SpecialInst1Src<%s>' % dest_type 746 else: 747 assert false 748 749 gen(brig_opcode, None, expr, base_class) 750 751gen_special('WorkItemId', 'w->workItemId[src0][lane]') 752gen_special('WorkItemAbsId', 753 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])') 754gen_special('WorkGroupId', 'w->workGroupId[src0]') 755gen_special('WorkGroupSize', 'w->workGroupSz[src0]') 756gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]') 757gen_special('GridSize', 'w->gridSz[src0]') 758gen_special('GridGroups', 759 'divCeil(w->gridSz[src0],w->workGroupSz[src0])') 760gen_special('LaneId', 'lane') 761gen_special('WaveId', 'w->wfId') 762gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') 763 764# gen_special('CU'', ') 765 766gen('Ret', base_class='SpecialInstNoSrcNoDest') 767gen('Barrier', base_class='SpecialInstNoSrcNoDest') 768gen('MemFence', base_class='SpecialInstNoSrcNoDest') 769 770# Map magic instructions to the BrigSyscall opcode 771# Magic instructions are defined in magic.hh 772# 773# In the future, real HSA kernel system calls can be implemented and coexist 774# with magic instructions. 775gen('Call', base_class='SpecialInstNoSrcNoDest') 776 777############### 778# 779# Generate file epilogs 780# 781############### 782header_code(''' 783template<> 784inline void 785Abs<U32>::execute(GPUDynInstPtr gpuDynInst) 786{ 787 Wavefront *w = gpuDynInst->wavefront(); 788 789 const VectorMask &mask = w->getPred(); 790 791 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 792 if (mask[lane]) { 793 CType dest_val; 794 CType src_val; 795 796 src_val = this->src[0].template get<CType>(w, lane); 797 798 dest_val = (CType)(src_val); 799 800 this->dest.set(w, lane, dest_val); 801 } 802 } 803} 804 805template<> 806inline void 807Abs<U64>::execute(GPUDynInstPtr gpuDynInst) 808{ 809 Wavefront *w = gpuDynInst->wavefront(); 810 811 const VectorMask &mask = w->getPred(); 812 813 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 814 if (mask[lane]) { 815 CType dest_val; 816 CType src_val; 817 818 src_val = this->src[0].template get<CType>(w, lane); 819 820 dest_val = (CType)(src_val); 821 822 this->dest.set(w, lane, dest_val); 823 } 824 } 825} 826''') 827 828header_code.dedent() 829header_code(''' 830} // namespace HsailISA 831''') 832 833# close off main decode switch 834decoder_code.dedent() 835decoder_code.dedent() 836decoder_code(''' 837 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode); 838 } // end switch(ib->opcode) 839 } // end decode() 840} // namespace HsailISA 841''') 842 843exec_code.dedent() 844exec_code(''' 845} // namespace HsailISA 846''') 847 848############### 849# 850# Output accumulated code to files 851# 852############### 853header_code.write(sys.argv[1]) 854decoder_code.write(sys.argv[2]) 855exec_code.write(sys.argv[3]) 856