decl.hh (11737:50eceddc2286) decl.hh (11738:ad7e8afa0dfe)
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
37#define __ARCH_HSAIL_INSTS_DECL_HH__
38
39#include <cmath>
40
41#include "arch/hsail/insts/gpu_static_inst.hh"
42#include "arch/hsail/operand.hh"
43#include "debug/HSAIL.hh"
44#include "gpu-compute/gpu_dyn_inst.hh"
45#include "gpu-compute/shader.hh"
46
47namespace HsailISA
48{
49 template<typename _DestOperand, typename _SrcOperand>
50 class HsailOperandType
51 {
52 public:
53 typedef _DestOperand DestOperand;
54 typedef _SrcOperand SrcOperand;
55 };
56
57 typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
58 typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
59 typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
60
61 // The IsBits parameter serves only to disambiguate tbhe B* types from
62 // the U* types, which otherwise would be identical (and
63 // indistinguishable).
64 template<typename _OperandType, typename _CType, Enums::MemType _memType,
65 vgpr_type _vgprType, int IsBits=0>
66 class HsailDataType
67 {
68 public:
69 typedef _OperandType OperandType;
70 typedef _CType CType;
71 static const Enums::MemType memType = _memType;
72 static const vgpr_type vgprType = _vgprType;
73 static const char *label;
74 };
75
76 typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
77 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
78
79 typedef HsailDataType<SRegOperandType, uint16_t,
80 Enums::M_U16, VT_32, 1> B16;
81
82 typedef HsailDataType<SRegOperandType, uint32_t,
83 Enums::M_U32, VT_32, 1> B32;
84
85 typedef HsailDataType<DRegOperandType, uint64_t,
86 Enums::M_U64, VT_64, 1> B64;
87
88 typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
89 typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
90 typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
91 typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
92
93 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
94 typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
95 typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
96 typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
97
98 typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
99 typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
100
101 template<typename DestOperandType, typename SrcOperandType,
102 int NumSrcOperands>
103 class CommonInstBase : public HsailGPUStaticInst
104 {
105 protected:
106 typename DestOperandType::DestOperand dest;
107 typename SrcOperandType::SrcOperand src[NumSrcOperands];
108
109 void
110 generateDisassembly()
111 {
112 disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
113 dest.disassemble());
114
115 for (int i = 0; i < NumSrcOperands; ++i) {
116 disassembly += ",";
117 disassembly += src[i].disassemble();
118 }
119 }
120
121 virtual std::string opcode_suffix() = 0;
122
123 public:
124 CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
125 const char *opcode)
126 : HsailGPUStaticInst(obj, opcode)
127 {
128 setFlag(ALU);
129
130 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
131
132 dest.init(op_offs, obj);
133
134 for (int i = 0; i < NumSrcOperands; ++i) {
135 op_offs = obj->getOperandPtr(ib->operands, i + 1);
136 src[i].init(op_offs, obj);
137 }
138 }
139
140 bool isVectorRegister(int operandIndex) {
141 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
142 if (operandIndex < NumSrcOperands)
143 return src[operandIndex].isVectorRegister();
144 else
145 return dest.isVectorRegister();
146 }
147 bool isCondRegister(int operandIndex) {
148 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
149 if (operandIndex < NumSrcOperands)
150 return src[operandIndex].isCondRegister();
151 else
152 return dest.isCondRegister();
153 }
154 bool isScalarRegister(int operandIndex) {
155 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
156 if (operandIndex < NumSrcOperands)
157 return src[operandIndex].isScalarRegister();
158 else
159 return dest.isScalarRegister();
160 }
161 bool isSrcOperand(int operandIndex) {
162 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
163 if (operandIndex < NumSrcOperands)
164 return true;
165 return false;
166 }
167
168 bool isDstOperand(int operandIndex) {
169 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
170 if (operandIndex >= NumSrcOperands)
171 return true;
172 return false;
173 }
174 int getOperandSize(int operandIndex) {
175 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
176 if (operandIndex < NumSrcOperands)
177 return src[operandIndex].opSize();
178 else
179 return dest.opSize();
180 }
181 int
182 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
183 {
184 assert(operandIndex >= 0 && operandIndex < getNumOperands());
185
186 if (operandIndex < NumSrcOperands)
187 return src[operandIndex].regIndex();
188 else
189 return dest.regIndex();
190 }
191 int numSrcRegOperands() {
192 int operands = 0;
193 for (int i = 0; i < NumSrcOperands; i++) {
194 if (src[i].isVectorRegister()) {
195 operands++;
196 }
197 }
198 return operands;
199 }
200 int numDstRegOperands() { return dest.isVectorRegister(); }
201 int getNumOperands() { return NumSrcOperands + 1; }
202 };
203
204 template<typename DataType, int NumSrcOperands>
205 class ArithInst : public CommonInstBase<typename DataType::OperandType,
206 typename DataType::OperandType,
207 NumSrcOperands>
208 {
209 public:
210 std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
211
212 ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
213 const char *opcode)
214 : CommonInstBase<typename DataType::OperandType,
215 typename DataType::OperandType,
216 NumSrcOperands>(ib, obj, opcode)
217 {
218 }
219 };
220
221 template<typename DestOperandType, typename Src0OperandType,
222 typename Src1OperandType, typename Src2OperandType>
223 class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
224 {
225 protected:
226 typename DestOperandType::DestOperand dest;
227 typename Src0OperandType::SrcOperand src0;
228 typename Src1OperandType::SrcOperand src1;
229 typename Src2OperandType::SrcOperand src2;
230
231 void
232 generateDisassembly()
233 {
234 disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
235 src0.disassemble(), src1.disassemble(),
236 src2.disassemble());
237 }
238
239 public:
240 ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
241 const BrigObject *obj,
242 const char *opcode)
243 : HsailGPUStaticInst(obj, opcode)
244 {
245 setFlag(ALU);
246
247 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
248 dest.init(op_offs, obj);
249
250 op_offs = obj->getOperandPtr(ib->operands, 1);
251 src0.init(op_offs, obj);
252
253 op_offs = obj->getOperandPtr(ib->operands, 2);
254 src1.init(op_offs, obj);
255
256 op_offs = obj->getOperandPtr(ib->operands, 3);
257 src2.init(op_offs, obj);
258 }
259
260 bool isVectorRegister(int operandIndex) {
261 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
262 if (!operandIndex)
263 return src0.isVectorRegister();
264 else if (operandIndex == 1)
265 return src1.isVectorRegister();
266 else if (operandIndex == 2)
267 return src2.isVectorRegister();
268 else
269 return dest.isVectorRegister();
270 }
271 bool isCondRegister(int operandIndex) {
272 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
273 if (!operandIndex)
274 return src0.isCondRegister();
275 else if (operandIndex == 1)
276 return src1.isCondRegister();
277 else if (operandIndex == 2)
278 return src2.isCondRegister();
279 else
280 return dest.isCondRegister();
281 }
282 bool isScalarRegister(int operandIndex) {
283 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
284 if (!operandIndex)
285 return src0.isScalarRegister();
286 else if (operandIndex == 1)
287 return src1.isScalarRegister();
288 else if (operandIndex == 2)
289 return src2.isScalarRegister();
290 else
291 return dest.isScalarRegister();
292 }
293 bool isSrcOperand(int operandIndex) {
294 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
295 if (operandIndex < 3)
296 return true;
297 else
298 return false;
299 }
300 bool isDstOperand(int operandIndex) {
301 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
302 if (operandIndex >= 3)
303 return true;
304 else
305 return false;
306 }
307 int getOperandSize(int operandIndex) {
308 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
309 if (!operandIndex)
310 return src0.opSize();
311 else if (operandIndex == 1)
312 return src1.opSize();
313 else if (operandIndex == 2)
314 return src2.opSize();
315 else
316 return dest.opSize();
317 }
318
319 int
320 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
321 {
322 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
323 if (!operandIndex)
324 return src0.regIndex();
325 else if (operandIndex == 1)
326 return src1.regIndex();
327 else if (operandIndex == 2)
328 return src2.regIndex();
329 else
330 return dest.regIndex();
331 }
332
333 int numSrcRegOperands() {
334 int operands = 0;
335 if (src0.isVectorRegister()) {
336 operands++;
337 }
338 if (src1.isVectorRegister()) {
339 operands++;
340 }
341 if (src2.isVectorRegister()) {
342 operands++;
343 }
344 return operands;
345 }
346 int numDstRegOperands() { return dest.isVectorRegister(); }
347 int getNumOperands() { return 4; }
348 };
349
350 template<typename DestDataType, typename Src0DataType,
351 typename Src1DataType, typename Src2DataType>
352 class ThreeNonUniformSourceInst :
353 public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
354 typename Src0DataType::OperandType,
355 typename Src1DataType::OperandType,
356 typename Src2DataType::OperandType>
357 {
358 public:
359 typedef typename DestDataType::CType DestCType;
360 typedef typename Src0DataType::CType Src0CType;
361 typedef typename Src1DataType::CType Src1CType;
362 typedef typename Src2DataType::CType Src2CType;
363
364 ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
365 const BrigObject *obj, const char *opcode)
366 : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
367 typename Src0DataType::OperandType,
368 typename Src1DataType::OperandType,
369 typename Src2DataType::OperandType>(ib,
370 obj, opcode)
371 {
372 }
373 };
374
375 template<typename DataType>
376 class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
377 DataType, DataType>
378 {
379 public:
380 CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
381 const char *opcode)
382 : ThreeNonUniformSourceInst<DataType, B1, DataType,
383 DataType>(ib, obj, opcode)
384 {
385 }
386 };
387
388 template<typename DataType>
389 class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
390 DataType, U32,
391 U32>
392 {
393 public:
394 ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
395 const char *opcode)
396 : ThreeNonUniformSourceInst<DataType, DataType, U32,
397 U32>(ib, obj, opcode)
398 {
399 }
400 };
401
402 template<typename DestOperandType, typename Src0OperandType,
403 typename Src1OperandType>
404 class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
405 {
406 protected:
407 typename DestOperandType::DestOperand dest;
408 typename Src0OperandType::SrcOperand src0;
409 typename Src1OperandType::SrcOperand src1;
410
411 void
412 generateDisassembly()
413 {
414 disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
415 src0.disassemble(), src1.disassemble());
416 }
417
418
419 public:
420 TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
421 const BrigObject *obj, const char *opcode)
422 : HsailGPUStaticInst(obj, opcode)
423 {
424 setFlag(ALU);
425
426 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
427 dest.init(op_offs, obj);
428
429 op_offs = obj->getOperandPtr(ib->operands, 1);
430 src0.init(op_offs, obj);
431
432 op_offs = obj->getOperandPtr(ib->operands, 2);
433 src1.init(op_offs, obj);
434 }
435 bool isVectorRegister(int operandIndex) {
436 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
437 if (!operandIndex)
438 return src0.isVectorRegister();
439 else if (operandIndex == 1)
440 return src1.isVectorRegister();
441 else
442 return dest.isVectorRegister();
443 }
444 bool isCondRegister(int operandIndex) {
445 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
446 if (!operandIndex)
447 return src0.isCondRegister();
448 else if (operandIndex == 1)
449 return src1.isCondRegister();
450 else
451 return dest.isCondRegister();
452 }
453 bool isScalarRegister(int operandIndex) {
454 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
455 if (!operandIndex)
456 return src0.isScalarRegister();
457 else if (operandIndex == 1)
458 return src1.isScalarRegister();
459 else
460 return dest.isScalarRegister();
461 }
462 bool isSrcOperand(int operandIndex) {
463 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
464 if (operandIndex < 2)
465 return true;
466 else
467 return false;
468 }
469 bool isDstOperand(int operandIndex) {
470 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
471 if (operandIndex >= 2)
472 return true;
473 else
474 return false;
475 }
476 int getOperandSize(int operandIndex) {
477 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
478 if (!operandIndex)
479 return src0.opSize();
480 else if (operandIndex == 1)
481 return src1.opSize();
482 else
483 return dest.opSize();
484 }
485
486 int
487 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
488 {
489 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
490 if (!operandIndex)
491 return src0.regIndex();
492 else if (operandIndex == 1)
493 return src1.regIndex();
494 else
495 return dest.regIndex();
496 }
497
498 int numSrcRegOperands() {
499 int operands = 0;
500 if (src0.isVectorRegister()) {
501 operands++;
502 }
503 if (src1.isVectorRegister()) {
504 operands++;
505 }
506 return operands;
507 }
508 int numDstRegOperands() { return dest.isVectorRegister(); }
509 int getNumOperands() { return 3; }
510 };
511
512 template<typename DestDataType, typename Src0DataType,
513 typename Src1DataType>
514 class TwoNonUniformSourceInst :
515 public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
516 typename Src0DataType::OperandType,
517 typename Src1DataType::OperandType>
518 {
519 public:
520 typedef typename DestDataType::CType DestCType;
521 typedef typename Src0DataType::CType Src0CType;
522 typedef typename Src1DataType::CType Src1CType;
523
524 TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
525 const BrigObject *obj, const char *opcode)
526 : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
527 typename Src0DataType::OperandType,
528 typename Src1DataType::OperandType>(ib,
529 obj, opcode)
530 {
531 }
532 };
533
534 // helper function for ClassInst
535 template<typename T>
536 bool
537 fpclassify(T src0, uint32_t src1)
538 {
539 int fpclass = std::fpclassify(src0);
540
541 if ((src1 & 0x3) && (fpclass == FP_NAN)) {
542 return true;
543 }
544
545 if (src0 <= -0.0) {
546 if ((src1 & 0x4) && fpclass == FP_INFINITE)
547 return true;
548 if ((src1 & 0x8) && fpclass == FP_NORMAL)
549 return true;
550 if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
551 return true;
552 if ((src1 & 0x20) && fpclass == FP_ZERO)
553 return true;
554 } else {
555 if ((src1 & 0x40) && fpclass == FP_ZERO)
556 return true;
557 if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
558 return true;
559 if ((src1 & 0x100) && fpclass == FP_NORMAL)
560 return true;
561 if ((src1 & 0x200) && fpclass == FP_INFINITE)
562 return true;
563 }
564 return false;
565 }
566
567 template<typename DataType>
568 class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
569 {
570 public:
571 ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
572 const char *opcode)
573 : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
574 {
575 }
576 };
577
578 template<typename DataType>
579 class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
580 {
581 public:
582 ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
583 const char *opcode)
584 : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
585 {
586 }
587 };
588
589 // helper function for CmpInst
590 template<typename T>
591 bool
592 compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
593 {
594 using namespace Brig;
595
596 switch (cmpOp) {
597 case BRIG_COMPARE_EQ:
598 case BRIG_COMPARE_EQU:
599 case BRIG_COMPARE_SEQ:
600 case BRIG_COMPARE_SEQU:
601 return (src0 == src1);
602
603 case BRIG_COMPARE_NE:
604 case BRIG_COMPARE_NEU:
605 case BRIG_COMPARE_SNE:
606 case BRIG_COMPARE_SNEU:
607 return (src0 != src1);
608
609 case BRIG_COMPARE_LT:
610 case BRIG_COMPARE_LTU:
611 case BRIG_COMPARE_SLT:
612 case BRIG_COMPARE_SLTU:
613 return (src0 < src1);
614
615 case BRIG_COMPARE_LE:
616 case BRIG_COMPARE_LEU:
617 case BRIG_COMPARE_SLE:
618 case BRIG_COMPARE_SLEU:
619 return (src0 <= src1);
620
621 case BRIG_COMPARE_GT:
622 case BRIG_COMPARE_GTU:
623 case BRIG_COMPARE_SGT:
624 case BRIG_COMPARE_SGTU:
625 return (src0 > src1);
626
627 case BRIG_COMPARE_GE:
628 case BRIG_COMPARE_GEU:
629 case BRIG_COMPARE_SGE:
630 case BRIG_COMPARE_SGEU:
631 return (src0 >= src1);
632
633 case BRIG_COMPARE_NUM:
634 case BRIG_COMPARE_SNUM:
635 return (src0 == src0) || (src1 == src1);
636
637 case BRIG_COMPARE_NAN:
638 case BRIG_COMPARE_SNAN:
639 return (src0 != src0) || (src1 != src1);
640
641 default:
642 fatal("Bad cmpOp value %d\n", (int)cmpOp);
643 }
644 }
645
646 template<typename T>
647 int32_t
648 firstbit(T src0)
649 {
650 if (!src0)
651 return -1;
652
653 //handle positive and negative numbers
654 T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0);
655
656 //the starting pos is MSB
657 int pos = 8 * sizeof(T) - 1;
658 int cnt = 0;
659
660 //search the first bit set to 1
661 while (!(tmp & (1 << pos))) {
662 ++cnt;
663 --pos;
664 }
665 return cnt;
666 }
667
668 const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
669
670 template<typename DestOperandType, typename SrcOperandType>
671 class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
672 2>
673 {
674 protected:
675 Brig::BrigCompareOperation cmpOp;
676
677 public:
678 CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
679 const char *_opcode)
680 : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
681 _opcode)
682 {
683 assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
684 Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
685 cmpOp = (Brig::BrigCompareOperation)i->compare;
686 }
687 };
688
689 template<typename DestDataType, typename SrcDataType>
690 class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
691 typename SrcDataType::OperandType>
692 {
693 public:
694 std::string
695 opcode_suffix()
696 {
697 return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
698 DestDataType::label, SrcDataType::label);
699 }
700
701 CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
702 const char *_opcode)
703 : CmpInstBase<typename DestDataType::OperandType,
704 typename SrcDataType::OperandType>(ib, obj, _opcode)
705 {
706 }
707 };
708
709 template<typename DestDataType, typename SrcDataType>
710 class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
711 typename SrcDataType::OperandType, 1>
712 {
713 public:
714 std::string opcode_suffix()
715 {
716 return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
717 }
718
719 CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
720 const char *_opcode)
721 : CommonInstBase<typename DestDataType::OperandType,
722 typename SrcDataType::OperandType,
723 1>(ib, obj, _opcode)
724 {
725 }
726 };
727
728 template<typename DestDataType, typename SrcDataType>
729 class PopcountInst :
730 public CommonInstBase<typename DestDataType::OperandType,
731 typename SrcDataType::OperandType, 1>
732 {
733 public:
734 std::string opcode_suffix()
735 {
736 return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
737 }
738
739 PopcountInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
740 const char *_opcode)
741 : CommonInstBase<typename DestDataType::OperandType,
742 typename SrcDataType::OperandType,
743 1>(ib, obj, _opcode)
744 {
745 }
746 };
747
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
37#define __ARCH_HSAIL_INSTS_DECL_HH__
38
39#include <cmath>
40
41#include "arch/hsail/insts/gpu_static_inst.hh"
42#include "arch/hsail/operand.hh"
43#include "debug/HSAIL.hh"
44#include "gpu-compute/gpu_dyn_inst.hh"
45#include "gpu-compute/shader.hh"
46
47namespace HsailISA
48{
49 template<typename _DestOperand, typename _SrcOperand>
50 class HsailOperandType
51 {
52 public:
53 typedef _DestOperand DestOperand;
54 typedef _SrcOperand SrcOperand;
55 };
56
57 typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
58 typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
59 typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
60
61 // The IsBits parameter serves only to disambiguate tbhe B* types from
62 // the U* types, which otherwise would be identical (and
63 // indistinguishable).
64 template<typename _OperandType, typename _CType, Enums::MemType _memType,
65 vgpr_type _vgprType, int IsBits=0>
66 class HsailDataType
67 {
68 public:
69 typedef _OperandType OperandType;
70 typedef _CType CType;
71 static const Enums::MemType memType = _memType;
72 static const vgpr_type vgprType = _vgprType;
73 static const char *label;
74 };
75
76 typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
77 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
78
79 typedef HsailDataType<SRegOperandType, uint16_t,
80 Enums::M_U16, VT_32, 1> B16;
81
82 typedef HsailDataType<SRegOperandType, uint32_t,
83 Enums::M_U32, VT_32, 1> B32;
84
85 typedef HsailDataType<DRegOperandType, uint64_t,
86 Enums::M_U64, VT_64, 1> B64;
87
88 typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
89 typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
90 typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
91 typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
92
93 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
94 typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
95 typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
96 typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
97
98 typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
99 typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
100
101 template<typename DestOperandType, typename SrcOperandType,
102 int NumSrcOperands>
103 class CommonInstBase : public HsailGPUStaticInst
104 {
105 protected:
106 typename DestOperandType::DestOperand dest;
107 typename SrcOperandType::SrcOperand src[NumSrcOperands];
108
109 void
110 generateDisassembly()
111 {
112 disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
113 dest.disassemble());
114
115 for (int i = 0; i < NumSrcOperands; ++i) {
116 disassembly += ",";
117 disassembly += src[i].disassemble();
118 }
119 }
120
121 virtual std::string opcode_suffix() = 0;
122
123 public:
124 CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
125 const char *opcode)
126 : HsailGPUStaticInst(obj, opcode)
127 {
128 setFlag(ALU);
129
130 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
131
132 dest.init(op_offs, obj);
133
134 for (int i = 0; i < NumSrcOperands; ++i) {
135 op_offs = obj->getOperandPtr(ib->operands, i + 1);
136 src[i].init(op_offs, obj);
137 }
138 }
139
140 bool isVectorRegister(int operandIndex) {
141 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
142 if (operandIndex < NumSrcOperands)
143 return src[operandIndex].isVectorRegister();
144 else
145 return dest.isVectorRegister();
146 }
147 bool isCondRegister(int operandIndex) {
148 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
149 if (operandIndex < NumSrcOperands)
150 return src[operandIndex].isCondRegister();
151 else
152 return dest.isCondRegister();
153 }
154 bool isScalarRegister(int operandIndex) {
155 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
156 if (operandIndex < NumSrcOperands)
157 return src[operandIndex].isScalarRegister();
158 else
159 return dest.isScalarRegister();
160 }
161 bool isSrcOperand(int operandIndex) {
162 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
163 if (operandIndex < NumSrcOperands)
164 return true;
165 return false;
166 }
167
168 bool isDstOperand(int operandIndex) {
169 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
170 if (operandIndex >= NumSrcOperands)
171 return true;
172 return false;
173 }
174 int getOperandSize(int operandIndex) {
175 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
176 if (operandIndex < NumSrcOperands)
177 return src[operandIndex].opSize();
178 else
179 return dest.opSize();
180 }
181 int
182 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
183 {
184 assert(operandIndex >= 0 && operandIndex < getNumOperands());
185
186 if (operandIndex < NumSrcOperands)
187 return src[operandIndex].regIndex();
188 else
189 return dest.regIndex();
190 }
191 int numSrcRegOperands() {
192 int operands = 0;
193 for (int i = 0; i < NumSrcOperands; i++) {
194 if (src[i].isVectorRegister()) {
195 operands++;
196 }
197 }
198 return operands;
199 }
200 int numDstRegOperands() { return dest.isVectorRegister(); }
201 int getNumOperands() { return NumSrcOperands + 1; }
202 };
203
204 template<typename DataType, int NumSrcOperands>
205 class ArithInst : public CommonInstBase<typename DataType::OperandType,
206 typename DataType::OperandType,
207 NumSrcOperands>
208 {
209 public:
210 std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
211
212 ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
213 const char *opcode)
214 : CommonInstBase<typename DataType::OperandType,
215 typename DataType::OperandType,
216 NumSrcOperands>(ib, obj, opcode)
217 {
218 }
219 };
220
221 template<typename DestOperandType, typename Src0OperandType,
222 typename Src1OperandType, typename Src2OperandType>
223 class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
224 {
225 protected:
226 typename DestOperandType::DestOperand dest;
227 typename Src0OperandType::SrcOperand src0;
228 typename Src1OperandType::SrcOperand src1;
229 typename Src2OperandType::SrcOperand src2;
230
231 void
232 generateDisassembly()
233 {
234 disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
235 src0.disassemble(), src1.disassemble(),
236 src2.disassemble());
237 }
238
239 public:
240 ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
241 const BrigObject *obj,
242 const char *opcode)
243 : HsailGPUStaticInst(obj, opcode)
244 {
245 setFlag(ALU);
246
247 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
248 dest.init(op_offs, obj);
249
250 op_offs = obj->getOperandPtr(ib->operands, 1);
251 src0.init(op_offs, obj);
252
253 op_offs = obj->getOperandPtr(ib->operands, 2);
254 src1.init(op_offs, obj);
255
256 op_offs = obj->getOperandPtr(ib->operands, 3);
257 src2.init(op_offs, obj);
258 }
259
260 bool isVectorRegister(int operandIndex) {
261 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
262 if (!operandIndex)
263 return src0.isVectorRegister();
264 else if (operandIndex == 1)
265 return src1.isVectorRegister();
266 else if (operandIndex == 2)
267 return src2.isVectorRegister();
268 else
269 return dest.isVectorRegister();
270 }
271 bool isCondRegister(int operandIndex) {
272 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
273 if (!operandIndex)
274 return src0.isCondRegister();
275 else if (operandIndex == 1)
276 return src1.isCondRegister();
277 else if (operandIndex == 2)
278 return src2.isCondRegister();
279 else
280 return dest.isCondRegister();
281 }
282 bool isScalarRegister(int operandIndex) {
283 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
284 if (!operandIndex)
285 return src0.isScalarRegister();
286 else if (operandIndex == 1)
287 return src1.isScalarRegister();
288 else if (operandIndex == 2)
289 return src2.isScalarRegister();
290 else
291 return dest.isScalarRegister();
292 }
293 bool isSrcOperand(int operandIndex) {
294 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
295 if (operandIndex < 3)
296 return true;
297 else
298 return false;
299 }
300 bool isDstOperand(int operandIndex) {
301 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
302 if (operandIndex >= 3)
303 return true;
304 else
305 return false;
306 }
307 int getOperandSize(int operandIndex) {
308 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
309 if (!operandIndex)
310 return src0.opSize();
311 else if (operandIndex == 1)
312 return src1.opSize();
313 else if (operandIndex == 2)
314 return src2.opSize();
315 else
316 return dest.opSize();
317 }
318
319 int
320 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
321 {
322 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
323 if (!operandIndex)
324 return src0.regIndex();
325 else if (operandIndex == 1)
326 return src1.regIndex();
327 else if (operandIndex == 2)
328 return src2.regIndex();
329 else
330 return dest.regIndex();
331 }
332
333 int numSrcRegOperands() {
334 int operands = 0;
335 if (src0.isVectorRegister()) {
336 operands++;
337 }
338 if (src1.isVectorRegister()) {
339 operands++;
340 }
341 if (src2.isVectorRegister()) {
342 operands++;
343 }
344 return operands;
345 }
346 int numDstRegOperands() { return dest.isVectorRegister(); }
347 int getNumOperands() { return 4; }
348 };
349
350 template<typename DestDataType, typename Src0DataType,
351 typename Src1DataType, typename Src2DataType>
352 class ThreeNonUniformSourceInst :
353 public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
354 typename Src0DataType::OperandType,
355 typename Src1DataType::OperandType,
356 typename Src2DataType::OperandType>
357 {
358 public:
359 typedef typename DestDataType::CType DestCType;
360 typedef typename Src0DataType::CType Src0CType;
361 typedef typename Src1DataType::CType Src1CType;
362 typedef typename Src2DataType::CType Src2CType;
363
364 ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
365 const BrigObject *obj, const char *opcode)
366 : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
367 typename Src0DataType::OperandType,
368 typename Src1DataType::OperandType,
369 typename Src2DataType::OperandType>(ib,
370 obj, opcode)
371 {
372 }
373 };
374
375 template<typename DataType>
376 class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
377 DataType, DataType>
378 {
379 public:
380 CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
381 const char *opcode)
382 : ThreeNonUniformSourceInst<DataType, B1, DataType,
383 DataType>(ib, obj, opcode)
384 {
385 }
386 };
387
388 template<typename DataType>
389 class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
390 DataType, U32,
391 U32>
392 {
393 public:
394 ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
395 const char *opcode)
396 : ThreeNonUniformSourceInst<DataType, DataType, U32,
397 U32>(ib, obj, opcode)
398 {
399 }
400 };
401
402 template<typename DestOperandType, typename Src0OperandType,
403 typename Src1OperandType>
404 class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
405 {
406 protected:
407 typename DestOperandType::DestOperand dest;
408 typename Src0OperandType::SrcOperand src0;
409 typename Src1OperandType::SrcOperand src1;
410
411 void
412 generateDisassembly()
413 {
414 disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
415 src0.disassemble(), src1.disassemble());
416 }
417
418
419 public:
420 TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
421 const BrigObject *obj, const char *opcode)
422 : HsailGPUStaticInst(obj, opcode)
423 {
424 setFlag(ALU);
425
426 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
427 dest.init(op_offs, obj);
428
429 op_offs = obj->getOperandPtr(ib->operands, 1);
430 src0.init(op_offs, obj);
431
432 op_offs = obj->getOperandPtr(ib->operands, 2);
433 src1.init(op_offs, obj);
434 }
435 bool isVectorRegister(int operandIndex) {
436 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
437 if (!operandIndex)
438 return src0.isVectorRegister();
439 else if (operandIndex == 1)
440 return src1.isVectorRegister();
441 else
442 return dest.isVectorRegister();
443 }
444 bool isCondRegister(int operandIndex) {
445 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
446 if (!operandIndex)
447 return src0.isCondRegister();
448 else if (operandIndex == 1)
449 return src1.isCondRegister();
450 else
451 return dest.isCondRegister();
452 }
453 bool isScalarRegister(int operandIndex) {
454 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
455 if (!operandIndex)
456 return src0.isScalarRegister();
457 else if (operandIndex == 1)
458 return src1.isScalarRegister();
459 else
460 return dest.isScalarRegister();
461 }
462 bool isSrcOperand(int operandIndex) {
463 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
464 if (operandIndex < 2)
465 return true;
466 else
467 return false;
468 }
469 bool isDstOperand(int operandIndex) {
470 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
471 if (operandIndex >= 2)
472 return true;
473 else
474 return false;
475 }
476 int getOperandSize(int operandIndex) {
477 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
478 if (!operandIndex)
479 return src0.opSize();
480 else if (operandIndex == 1)
481 return src1.opSize();
482 else
483 return dest.opSize();
484 }
485
486 int
487 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
488 {
489 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
490 if (!operandIndex)
491 return src0.regIndex();
492 else if (operandIndex == 1)
493 return src1.regIndex();
494 else
495 return dest.regIndex();
496 }
497
498 int numSrcRegOperands() {
499 int operands = 0;
500 if (src0.isVectorRegister()) {
501 operands++;
502 }
503 if (src1.isVectorRegister()) {
504 operands++;
505 }
506 return operands;
507 }
508 int numDstRegOperands() { return dest.isVectorRegister(); }
509 int getNumOperands() { return 3; }
510 };
511
512 template<typename DestDataType, typename Src0DataType,
513 typename Src1DataType>
514 class TwoNonUniformSourceInst :
515 public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
516 typename Src0DataType::OperandType,
517 typename Src1DataType::OperandType>
518 {
519 public:
520 typedef typename DestDataType::CType DestCType;
521 typedef typename Src0DataType::CType Src0CType;
522 typedef typename Src1DataType::CType Src1CType;
523
524 TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
525 const BrigObject *obj, const char *opcode)
526 : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
527 typename Src0DataType::OperandType,
528 typename Src1DataType::OperandType>(ib,
529 obj, opcode)
530 {
531 }
532 };
533
534 // helper function for ClassInst
535 template<typename T>
536 bool
537 fpclassify(T src0, uint32_t src1)
538 {
539 int fpclass = std::fpclassify(src0);
540
541 if ((src1 & 0x3) && (fpclass == FP_NAN)) {
542 return true;
543 }
544
545 if (src0 <= -0.0) {
546 if ((src1 & 0x4) && fpclass == FP_INFINITE)
547 return true;
548 if ((src1 & 0x8) && fpclass == FP_NORMAL)
549 return true;
550 if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
551 return true;
552 if ((src1 & 0x20) && fpclass == FP_ZERO)
553 return true;
554 } else {
555 if ((src1 & 0x40) && fpclass == FP_ZERO)
556 return true;
557 if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
558 return true;
559 if ((src1 & 0x100) && fpclass == FP_NORMAL)
560 return true;
561 if ((src1 & 0x200) && fpclass == FP_INFINITE)
562 return true;
563 }
564 return false;
565 }
566
567 template<typename DataType>
568 class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
569 {
570 public:
571 ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
572 const char *opcode)
573 : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
574 {
575 }
576 };
577
578 template<typename DataType>
579 class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
580 {
581 public:
582 ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
583 const char *opcode)
584 : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
585 {
586 }
587 };
588
589 // helper function for CmpInst
590 template<typename T>
591 bool
592 compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
593 {
594 using namespace Brig;
595
596 switch (cmpOp) {
597 case BRIG_COMPARE_EQ:
598 case BRIG_COMPARE_EQU:
599 case BRIG_COMPARE_SEQ:
600 case BRIG_COMPARE_SEQU:
601 return (src0 == src1);
602
603 case BRIG_COMPARE_NE:
604 case BRIG_COMPARE_NEU:
605 case BRIG_COMPARE_SNE:
606 case BRIG_COMPARE_SNEU:
607 return (src0 != src1);
608
609 case BRIG_COMPARE_LT:
610 case BRIG_COMPARE_LTU:
611 case BRIG_COMPARE_SLT:
612 case BRIG_COMPARE_SLTU:
613 return (src0 < src1);
614
615 case BRIG_COMPARE_LE:
616 case BRIG_COMPARE_LEU:
617 case BRIG_COMPARE_SLE:
618 case BRIG_COMPARE_SLEU:
619 return (src0 <= src1);
620
621 case BRIG_COMPARE_GT:
622 case BRIG_COMPARE_GTU:
623 case BRIG_COMPARE_SGT:
624 case BRIG_COMPARE_SGTU:
625 return (src0 > src1);
626
627 case BRIG_COMPARE_GE:
628 case BRIG_COMPARE_GEU:
629 case BRIG_COMPARE_SGE:
630 case BRIG_COMPARE_SGEU:
631 return (src0 >= src1);
632
633 case BRIG_COMPARE_NUM:
634 case BRIG_COMPARE_SNUM:
635 return (src0 == src0) || (src1 == src1);
636
637 case BRIG_COMPARE_NAN:
638 case BRIG_COMPARE_SNAN:
639 return (src0 != src0) || (src1 != src1);
640
641 default:
642 fatal("Bad cmpOp value %d\n", (int)cmpOp);
643 }
644 }
645
646 template<typename T>
647 int32_t
648 firstbit(T src0)
649 {
650 if (!src0)
651 return -1;
652
653 //handle positive and negative numbers
654 T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0);
655
656 //the starting pos is MSB
657 int pos = 8 * sizeof(T) - 1;
658 int cnt = 0;
659
660 //search the first bit set to 1
661 while (!(tmp & (1 << pos))) {
662 ++cnt;
663 --pos;
664 }
665 return cnt;
666 }
667
668 const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
669
670 template<typename DestOperandType, typename SrcOperandType>
671 class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
672 2>
673 {
674 protected:
675 Brig::BrigCompareOperation cmpOp;
676
677 public:
678 CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
679 const char *_opcode)
680 : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
681 _opcode)
682 {
683 assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
684 Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
685 cmpOp = (Brig::BrigCompareOperation)i->compare;
686 }
687 };
688
689 template<typename DestDataType, typename SrcDataType>
690 class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
691 typename SrcDataType::OperandType>
692 {
693 public:
694 std::string
695 opcode_suffix()
696 {
697 return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
698 DestDataType::label, SrcDataType::label);
699 }
700
701 CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
702 const char *_opcode)
703 : CmpInstBase<typename DestDataType::OperandType,
704 typename SrcDataType::OperandType>(ib, obj, _opcode)
705 {
706 }
707 };
708
709 template<typename DestDataType, typename SrcDataType>
710 class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
711 typename SrcDataType::OperandType, 1>
712 {
713 public:
714 std::string opcode_suffix()
715 {
716 return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
717 }
718
719 CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
720 const char *_opcode)
721 : CommonInstBase<typename DestDataType::OperandType,
722 typename SrcDataType::OperandType,
723 1>(ib, obj, _opcode)
724 {
725 }
726 };
727
728 template<typename DestDataType, typename SrcDataType>
729 class PopcountInst :
730 public CommonInstBase<typename DestDataType::OperandType,
731 typename SrcDataType::OperandType, 1>
732 {
733 public:
734 std::string opcode_suffix()
735 {
736 return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
737 }
738
739 PopcountInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
740 const char *_opcode)
741 : CommonInstBase<typename DestDataType::OperandType,
742 typename SrcDataType::OperandType,
743 1>(ib, obj, _opcode)
744 {
745 }
746 };
747
748 class Stub : public HsailGPUStaticInst
749 {
750 public:
751 Stub(const Brig::BrigInstBase *ib, const BrigObject *obj,
752 const char *_opcode)
753 : HsailGPUStaticInst(obj, _opcode)
754 {
755 }
756
757 void generateDisassembly() override
758 {
759 disassembly = csprintf("%s", opcode);
760 }
761
762 bool isVectorRegister(int operandIndex) override { return false; }
763 bool isCondRegister(int operandIndex) override { return false; }
764 bool isScalarRegister(int operandIndex) override { return false; }
765 bool isSrcOperand(int operandIndex) override { return false; }
766 bool isDstOperand(int operandIndex) override { return false; }
767 int getOperandSize(int operandIndex) override { return 0; }
768
769 int
770 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
771 {
772 return -1;
773 }
774
775 int numSrcRegOperands() override { return 0; }
776 int numDstRegOperands() override { return 0; }
777 int getNumOperands() override { return 0; }
778 };
779
748 class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
749 {
750 public:
751 SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
752 const BrigObject *obj, const char *_opcode)
753 : HsailGPUStaticInst(obj, _opcode)
754 {
755 }
756
780 class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
781 {
782 public:
783 SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
784 const BrigObject *obj, const char *_opcode)
785 : HsailGPUStaticInst(obj, _opcode)
786 {
787 }
788
757 bool isVectorRegister(int operandIndex) { return false; }
758 bool isCondRegister(int operandIndex) { return false; }
759 bool isScalarRegister(int operandIndex) { return false; }
760 bool isSrcOperand(int operandIndex) { return false; }
761 bool isDstOperand(int operandIndex) { return false; }
762 int getOperandSize(int operandIndex) { return 0; }
789 bool isVectorRegister(int operandIndex) override { return false; }
790 bool isCondRegister(int operandIndex) override { return false; }
791 bool isScalarRegister(int operandIndex) override { return false; }
792 bool isSrcOperand(int operandIndex) override { return false; }
793 bool isDstOperand(int operandIndex) override { return false; }
794 int getOperandSize(int operandIndex) override { return 0; }
763
764 int
795
796 int
765 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
797 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
766 {
767 return -1;
768 }
769
798 {
799 return -1;
800 }
801
770 int numSrcRegOperands() { return 0; }
771 int numDstRegOperands() { return 0; }
772 int getNumOperands() { return 0; }
802 int numSrcRegOperands() override { return 0; }
803 int numDstRegOperands() override { return 0; }
804 int getNumOperands() override { return 0; }
773 };
774
775 template<typename DestOperandType>
776 class SpecialInstNoSrcBase : public HsailGPUStaticInst
777 {
778 protected:
779 typename DestOperandType::DestOperand dest;
780
781 void generateDisassembly()
782 {
783 disassembly = csprintf("%s %s", opcode, dest.disassemble());
784 }
785
786 public:
787 SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
788 const BrigObject *obj, const char *_opcode)
789 : HsailGPUStaticInst(obj, _opcode)
790 {
791 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
792 dest.init(op_offs, obj);
793 }
794
795 bool isVectorRegister(int operandIndex) {
796 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
797 return dest.isVectorRegister();
798 }
799 bool isCondRegister(int operandIndex) {
800 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
801 return dest.isCondRegister();
802 }
803 bool isScalarRegister(int operandIndex) {
804 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
805 return dest.isScalarRegister();
806 }
807 bool isSrcOperand(int operandIndex) { return false; }
808 bool isDstOperand(int operandIndex) { return true; }
809 int getOperandSize(int operandIndex) {
810 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
811 return dest.opSize();
812 }
813
814 int
815 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
816 {
817 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
818 return dest.regIndex();
819 }
820
821 int numSrcRegOperands() { return 0; }
822 int numDstRegOperands() { return dest.isVectorRegister(); }
823 int getNumOperands() { return 1; }
824 };
825
826 template<typename DestDataType>
827 class SpecialInstNoSrc :
828 public SpecialInstNoSrcBase<typename DestDataType::OperandType>
829 {
830 public:
831 typedef typename DestDataType::CType DestCType;
832
833 SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
834 const char *_opcode)
835 : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
836 _opcode)
837 {
838 }
839 };
840
841 template<typename DestOperandType>
842 class SpecialInst1SrcBase : public HsailGPUStaticInst
843 {
844 protected:
845 typedef int SrcCType; // used in execute() template
846
847 typename DestOperandType::DestOperand dest;
848 ImmOperand<SrcCType> src0;
849
850 void
851 generateDisassembly()
852 {
853 disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
854 src0.disassemble());
855 }
856
857 public:
858 SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
859 const BrigObject *obj, const char *_opcode)
860 : HsailGPUStaticInst(obj, _opcode)
861 {
862 setFlag(ALU);
863
864 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
865 dest.init(op_offs, obj);
866
867 op_offs = obj->getOperandPtr(ib->operands, 1);
868 src0.init(op_offs, obj);
869 }
870 bool isVectorRegister(int operandIndex) {
871 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
872 return dest.isVectorRegister();
873 }
874 bool isCondRegister(int operandIndex) {
875 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
876 return dest.isCondRegister();
877 }
878 bool isScalarRegister(int operandIndex) {
879 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
880 return dest.isScalarRegister();
881 }
882 bool isSrcOperand(int operandIndex) { return false; }
883 bool isDstOperand(int operandIndex) { return true; }
884 int getOperandSize(int operandIndex) {
885 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
886 return dest.opSize();
887 }
888
889 int
890 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
891 {
892 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
893 return dest.regIndex();
894 }
895
896 int numSrcRegOperands() { return 0; }
897 int numDstRegOperands() { return dest.isVectorRegister(); }
898 int getNumOperands() { return 1; }
899 };
900
901 template<typename DestDataType>
902 class SpecialInst1Src :
903 public SpecialInst1SrcBase<typename DestDataType::OperandType>
904 {
905 public:
906 typedef typename DestDataType::CType DestCType;
907
908 SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
909 const char *_opcode)
910 : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
911 _opcode)
912 {
913 }
914 };
915
916 class Ret : public SpecialInstNoSrcNoDest
917 {
918 public:
919 typedef SpecialInstNoSrcNoDest Base;
920
921 Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
922 : Base(ib, obj, "ret")
923 {
924 setFlag(GPUStaticInst::Return);
925 }
926
927 void execute(GPUDynInstPtr gpuDynInst);
928 };
929
930 class Barrier : public SpecialInstNoSrcNoDest
931 {
932 public:
933 typedef SpecialInstNoSrcNoDest Base;
934 uint8_t width;
935
936 Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
937 : Base(ib, obj, "barrier")
938 {
939 setFlag(GPUStaticInst::MemBarrier);
940 assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
941 width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
942 }
943
944 void execute(GPUDynInstPtr gpuDynInst);
945 };
946
947 class MemFence : public SpecialInstNoSrcNoDest
948 {
949 public:
950 typedef SpecialInstNoSrcNoDest Base;
951
952 Brig::BrigMemoryOrder memFenceMemOrder;
953 Brig::BrigMemoryScope memFenceScopeSegGroup;
954 Brig::BrigMemoryScope memFenceScopeSegGlobal;
955 Brig::BrigMemoryScope memFenceScopeSegImage;
956
957 MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
958 : Base(ib, obj, "memfence")
959 {
960 assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
961
962 memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
963 ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
964
965 memFenceScopeSegGroup = (Brig::BrigMemoryScope)
966 ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
967
968 memFenceScopeSegImage = (Brig::BrigMemoryScope)
969 ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
970
971 memFenceMemOrder = (Brig::BrigMemoryOrder)
972 ((Brig::BrigInstMemFence*)ib)->memoryOrder;
973
974 setFlag(MemoryRef);
975 setFlag(GPUStaticInst::MemFence);
976
977 switch (memFenceMemOrder) {
978 case Brig::BRIG_MEMORY_ORDER_NONE:
979 setFlag(NoOrder);
980 break;
981 case Brig::BRIG_MEMORY_ORDER_RELAXED:
982 setFlag(RelaxedOrder);
983 break;
984 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
985 setFlag(Acquire);
986 break;
987 case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
988 setFlag(Release);
989 break;
990 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
991 setFlag(AcquireRelease);
992 break;
993 default:
994 fatal("MemInst has bad BrigMemoryOrder\n");
995 }
996
997 // set inst flags based on scopes
998 if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
999 memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1000 setFlag(GPUStaticInst::GlobalSegment);
1001
1002 /**
1003 * A memory fence that has scope for
1004 * both segments will use the global
1005 * segment, and be executed in the
1006 * global memory pipeline, therefore,
1007 * we set the segment to match the
1008 * global scope only
1009 */
1010 switch (memFenceScopeSegGlobal) {
1011 case Brig::BRIG_MEMORY_SCOPE_NONE:
1012 setFlag(NoScope);
1013 break;
1014 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1015 setFlag(WorkitemScope);
1016 break;
1017 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1018 setFlag(WorkgroupScope);
1019 break;
1020 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1021 setFlag(DeviceScope);
1022 break;
1023 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1024 setFlag(SystemScope);
1025 break;
1026 default:
1027 fatal("MemFence has bad global scope type\n");
1028 }
1029 } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
1030 setFlag(GPUStaticInst::GlobalSegment);
1031
1032 switch (memFenceScopeSegGlobal) {
1033 case Brig::BRIG_MEMORY_SCOPE_NONE:
1034 setFlag(NoScope);
1035 break;
1036 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1037 setFlag(WorkitemScope);
1038 break;
1039 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1040 setFlag(WorkgroupScope);
1041 break;
1042 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1043 setFlag(DeviceScope);
1044 break;
1045 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1046 setFlag(SystemScope);
1047 break;
1048 default:
1049 fatal("MemFence has bad global scope type\n");
1050 }
1051 } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1052 setFlag(GPUStaticInst::GroupSegment);
1053
1054 switch (memFenceScopeSegGroup) {
1055 case Brig::BRIG_MEMORY_SCOPE_NONE:
1056 setFlag(NoScope);
1057 break;
1058 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1059 setFlag(WorkitemScope);
1060 break;
1061 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1062 setFlag(WorkgroupScope);
1063 break;
1064 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1065 setFlag(DeviceScope);
1066 break;
1067 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1068 setFlag(SystemScope);
1069 break;
1070 default:
1071 fatal("MemFence has bad group scope type\n");
1072 }
1073 } else {
1074 fatal("MemFence constructor: bad scope specifiers\n");
1075 }
1076 }
1077
1078 void
1079 initiateAcc(GPUDynInstPtr gpuDynInst)
1080 {
1081 Wavefront *wave = gpuDynInst->wavefront();
1082 wave->computeUnit->injectGlobalMemFence(gpuDynInst);
1083 }
1084
1085 void
1086 execute(GPUDynInstPtr gpuDynInst)
1087 {
1088 Wavefront *w = gpuDynInst->wavefront();
1089 // 2 cases:
1090 // * memfence to a sequentially consistent memory (e.g., LDS).
1091 // These can be handled as no-ops.
1092 // * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
1093 // etc.). We send a packet, tagged with the memory order and
1094 // scope, and let the GPU coalescer handle it.
1095
1096 if (isGlobalSeg()) {
1097 gpuDynInst->simdId = w->simdId;
1098 gpuDynInst->wfSlotId = w->wfSlotId;
1099 gpuDynInst->wfDynId = w->wfDynId;
1100 gpuDynInst->kern_id = w->kernId;
1101 gpuDynInst->cu_id = w->computeUnit->cu_id;
1102
1103 gpuDynInst->useContinuation = false;
1104 GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
1105 gmp->issueRequest(gpuDynInst);
1106
1107 w->wrGmReqsInPipe--;
1108 w->rdGmReqsInPipe--;
1109 w->memReqsInPipe--;
1110 w->outstandingReqs++;
1111 } else if (isGroupSeg()) {
1112 // no-op
1113 } else {
1114 fatal("MemFence execute: bad op type\n");
1115 }
1116 }
1117 };
1118
1119 class Call : public HsailGPUStaticInst
1120 {
1121 public:
1122 // private helper functions
1123 void calcAddr(Wavefront* w, GPUDynInstPtr m);
1124
1125 void
1126 generateDisassembly()
1127 {
1128 if (dest.disassemble() == "") {
1129 disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
1130 src1.disassemble());
1131 } else {
1132 disassembly = csprintf("%s %s (%s) (%s)", opcode,
1133 src0.disassemble(), dest.disassemble(),
1134 src1.disassemble());
1135 }
1136 }
1137
1138 bool
1139 isPseudoOp()
1140 {
1141 std::string func_name = src0.disassemble();
1142 if (func_name.find("__gem5_hsail_op") != std::string::npos) {
1143 return true;
1144 }
1145 return false;
1146 }
1147
1148 // member variables
1149 ListOperand dest;
1150 FunctionRefOperand src0;
1151 ListOperand src1;
1152 HsailCode *func_ptr;
1153
1154 // exec function for pseudo instructions mapped on top of call opcode
1155 void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
1156
1157 // user-defined pseudo instructions
1158 void MagicPrintLane(Wavefront *w);
1159 void MagicPrintLane64(Wavefront *w);
1160 void MagicPrintWF32(Wavefront *w);
1161 void MagicPrintWF64(Wavefront *w);
1162 void MagicPrintWFFloat(Wavefront *w);
1163 void MagicSimBreak(Wavefront *w);
1164 void MagicPrefixSum(Wavefront *w);
1165 void MagicReduction(Wavefront *w);
1166 void MagicMaskLower(Wavefront *w);
1167 void MagicMaskUpper(Wavefront *w);
1168 void MagicJoinWFBar(Wavefront *w);
1169 void MagicWaitWFBar(Wavefront *w);
1170 void MagicPanic(Wavefront *w);
1171
1172 void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
1173 GPUDynInstPtr gpuDynInst);
1174
1175 void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
1176 GPUDynInstPtr gpuDynInst);
1177
1178 void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
1179
1180 void MagicXactCasLd(Wavefront *w);
1181 void MagicMostSigThread(Wavefront *w);
1182 void MagicMostSigBroadcast(Wavefront *w);
1183
1184 void MagicPrintWF32ID(Wavefront *w);
1185 void MagicPrintWFID64(Wavefront *w);
1186
1187 Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
1188 : HsailGPUStaticInst(obj, "call")
1189 {
1190 setFlag(ALU);
1191 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1192 dest.init(op_offs, obj);
1193 op_offs = obj->getOperandPtr(ib->operands, 1);
1194 src0.init(op_offs, obj);
1195
1196 func_ptr = nullptr;
1197 std::string func_name = src0.disassemble();
1198 if (!isPseudoOp()) {
1199 func_ptr = dynamic_cast<HsailCode*>(obj->
1200 getFunction(func_name));
1201
1202 if (!func_ptr)
1203 fatal("call::exec cannot find function: %s\n", func_name);
1204 }
1205
1206 op_offs = obj->getOperandPtr(ib->operands, 2);
1207 src1.init(op_offs, obj);
1208 }
1209
1210 bool isVectorRegister(int operandIndex) { return false; }
1211 bool isCondRegister(int operandIndex) { return false; }
1212 bool isScalarRegister(int operandIndex) { return false; }
1213 bool isSrcOperand(int operandIndex) { return false; }
1214 bool isDstOperand(int operandIndex) { return false; }
1215 int getOperandSize(int operandIndex) { return 0; }
1216
1217 int
1218 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
1219 {
1220 return -1;
1221 }
1222
1223 void
1224 execute(GPUDynInstPtr gpuDynInst)
1225 {
1226 Wavefront *w = gpuDynInst->wavefront();
1227
1228 std::string func_name = src0.disassemble();
1229 if (isPseudoOp()) {
1230 execPseudoInst(w, gpuDynInst);
1231 } else {
1232 fatal("Native HSAIL functions are not yet implemented: %s\n",
1233 func_name);
1234 }
1235 }
1236 int numSrcRegOperands() { return 0; }
1237 int numDstRegOperands() { return 0; }
1238 int getNumOperands() { return 2; }
1239 };
1240
1241 template<typename T> T heynot(T arg) { return ~arg; }
1242 template<> inline bool heynot<bool>(bool arg) { return !arg; }
1243} // namespace HsailISA
1244
1245#endif // __ARCH_HSAIL_INSTS_DECL_HH__
805 };
806
807 template<typename DestOperandType>
808 class SpecialInstNoSrcBase : public HsailGPUStaticInst
809 {
810 protected:
811 typename DestOperandType::DestOperand dest;
812
813 void generateDisassembly()
814 {
815 disassembly = csprintf("%s %s", opcode, dest.disassemble());
816 }
817
818 public:
819 SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
820 const BrigObject *obj, const char *_opcode)
821 : HsailGPUStaticInst(obj, _opcode)
822 {
823 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
824 dest.init(op_offs, obj);
825 }
826
827 bool isVectorRegister(int operandIndex) {
828 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
829 return dest.isVectorRegister();
830 }
831 bool isCondRegister(int operandIndex) {
832 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
833 return dest.isCondRegister();
834 }
835 bool isScalarRegister(int operandIndex) {
836 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
837 return dest.isScalarRegister();
838 }
839 bool isSrcOperand(int operandIndex) { return false; }
840 bool isDstOperand(int operandIndex) { return true; }
841 int getOperandSize(int operandIndex) {
842 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
843 return dest.opSize();
844 }
845
846 int
847 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
848 {
849 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
850 return dest.regIndex();
851 }
852
853 int numSrcRegOperands() { return 0; }
854 int numDstRegOperands() { return dest.isVectorRegister(); }
855 int getNumOperands() { return 1; }
856 };
857
858 template<typename DestDataType>
859 class SpecialInstNoSrc :
860 public SpecialInstNoSrcBase<typename DestDataType::OperandType>
861 {
862 public:
863 typedef typename DestDataType::CType DestCType;
864
865 SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
866 const char *_opcode)
867 : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
868 _opcode)
869 {
870 }
871 };
872
873 template<typename DestOperandType>
874 class SpecialInst1SrcBase : public HsailGPUStaticInst
875 {
876 protected:
877 typedef int SrcCType; // used in execute() template
878
879 typename DestOperandType::DestOperand dest;
880 ImmOperand<SrcCType> src0;
881
882 void
883 generateDisassembly()
884 {
885 disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
886 src0.disassemble());
887 }
888
889 public:
890 SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
891 const BrigObject *obj, const char *_opcode)
892 : HsailGPUStaticInst(obj, _opcode)
893 {
894 setFlag(ALU);
895
896 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
897 dest.init(op_offs, obj);
898
899 op_offs = obj->getOperandPtr(ib->operands, 1);
900 src0.init(op_offs, obj);
901 }
902 bool isVectorRegister(int operandIndex) {
903 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
904 return dest.isVectorRegister();
905 }
906 bool isCondRegister(int operandIndex) {
907 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
908 return dest.isCondRegister();
909 }
910 bool isScalarRegister(int operandIndex) {
911 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
912 return dest.isScalarRegister();
913 }
914 bool isSrcOperand(int operandIndex) { return false; }
915 bool isDstOperand(int operandIndex) { return true; }
916 int getOperandSize(int operandIndex) {
917 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
918 return dest.opSize();
919 }
920
921 int
922 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
923 {
924 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
925 return dest.regIndex();
926 }
927
928 int numSrcRegOperands() { return 0; }
929 int numDstRegOperands() { return dest.isVectorRegister(); }
930 int getNumOperands() { return 1; }
931 };
932
933 template<typename DestDataType>
934 class SpecialInst1Src :
935 public SpecialInst1SrcBase<typename DestDataType::OperandType>
936 {
937 public:
938 typedef typename DestDataType::CType DestCType;
939
940 SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
941 const char *_opcode)
942 : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
943 _opcode)
944 {
945 }
946 };
947
948 class Ret : public SpecialInstNoSrcNoDest
949 {
950 public:
951 typedef SpecialInstNoSrcNoDest Base;
952
953 Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
954 : Base(ib, obj, "ret")
955 {
956 setFlag(GPUStaticInst::Return);
957 }
958
959 void execute(GPUDynInstPtr gpuDynInst);
960 };
961
962 class Barrier : public SpecialInstNoSrcNoDest
963 {
964 public:
965 typedef SpecialInstNoSrcNoDest Base;
966 uint8_t width;
967
968 Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
969 : Base(ib, obj, "barrier")
970 {
971 setFlag(GPUStaticInst::MemBarrier);
972 assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
973 width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
974 }
975
976 void execute(GPUDynInstPtr gpuDynInst);
977 };
978
979 class MemFence : public SpecialInstNoSrcNoDest
980 {
981 public:
982 typedef SpecialInstNoSrcNoDest Base;
983
984 Brig::BrigMemoryOrder memFenceMemOrder;
985 Brig::BrigMemoryScope memFenceScopeSegGroup;
986 Brig::BrigMemoryScope memFenceScopeSegGlobal;
987 Brig::BrigMemoryScope memFenceScopeSegImage;
988
989 MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
990 : Base(ib, obj, "memfence")
991 {
992 assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
993
994 memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
995 ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
996
997 memFenceScopeSegGroup = (Brig::BrigMemoryScope)
998 ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
999
1000 memFenceScopeSegImage = (Brig::BrigMemoryScope)
1001 ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
1002
1003 memFenceMemOrder = (Brig::BrigMemoryOrder)
1004 ((Brig::BrigInstMemFence*)ib)->memoryOrder;
1005
1006 setFlag(MemoryRef);
1007 setFlag(GPUStaticInst::MemFence);
1008
1009 switch (memFenceMemOrder) {
1010 case Brig::BRIG_MEMORY_ORDER_NONE:
1011 setFlag(NoOrder);
1012 break;
1013 case Brig::BRIG_MEMORY_ORDER_RELAXED:
1014 setFlag(RelaxedOrder);
1015 break;
1016 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
1017 setFlag(Acquire);
1018 break;
1019 case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
1020 setFlag(Release);
1021 break;
1022 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
1023 setFlag(AcquireRelease);
1024 break;
1025 default:
1026 fatal("MemInst has bad BrigMemoryOrder\n");
1027 }
1028
1029 // set inst flags based on scopes
1030 if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
1031 memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1032 setFlag(GPUStaticInst::GlobalSegment);
1033
1034 /**
1035 * A memory fence that has scope for
1036 * both segments will use the global
1037 * segment, and be executed in the
1038 * global memory pipeline, therefore,
1039 * we set the segment to match the
1040 * global scope only
1041 */
1042 switch (memFenceScopeSegGlobal) {
1043 case Brig::BRIG_MEMORY_SCOPE_NONE:
1044 setFlag(NoScope);
1045 break;
1046 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1047 setFlag(WorkitemScope);
1048 break;
1049 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1050 setFlag(WorkgroupScope);
1051 break;
1052 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1053 setFlag(DeviceScope);
1054 break;
1055 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1056 setFlag(SystemScope);
1057 break;
1058 default:
1059 fatal("MemFence has bad global scope type\n");
1060 }
1061 } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
1062 setFlag(GPUStaticInst::GlobalSegment);
1063
1064 switch (memFenceScopeSegGlobal) {
1065 case Brig::BRIG_MEMORY_SCOPE_NONE:
1066 setFlag(NoScope);
1067 break;
1068 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1069 setFlag(WorkitemScope);
1070 break;
1071 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1072 setFlag(WorkgroupScope);
1073 break;
1074 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1075 setFlag(DeviceScope);
1076 break;
1077 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1078 setFlag(SystemScope);
1079 break;
1080 default:
1081 fatal("MemFence has bad global scope type\n");
1082 }
1083 } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1084 setFlag(GPUStaticInst::GroupSegment);
1085
1086 switch (memFenceScopeSegGroup) {
1087 case Brig::BRIG_MEMORY_SCOPE_NONE:
1088 setFlag(NoScope);
1089 break;
1090 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1091 setFlag(WorkitemScope);
1092 break;
1093 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1094 setFlag(WorkgroupScope);
1095 break;
1096 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1097 setFlag(DeviceScope);
1098 break;
1099 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1100 setFlag(SystemScope);
1101 break;
1102 default:
1103 fatal("MemFence has bad group scope type\n");
1104 }
1105 } else {
1106 fatal("MemFence constructor: bad scope specifiers\n");
1107 }
1108 }
1109
1110 void
1111 initiateAcc(GPUDynInstPtr gpuDynInst)
1112 {
1113 Wavefront *wave = gpuDynInst->wavefront();
1114 wave->computeUnit->injectGlobalMemFence(gpuDynInst);
1115 }
1116
1117 void
1118 execute(GPUDynInstPtr gpuDynInst)
1119 {
1120 Wavefront *w = gpuDynInst->wavefront();
1121 // 2 cases:
1122 // * memfence to a sequentially consistent memory (e.g., LDS).
1123 // These can be handled as no-ops.
1124 // * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
1125 // etc.). We send a packet, tagged with the memory order and
1126 // scope, and let the GPU coalescer handle it.
1127
1128 if (isGlobalSeg()) {
1129 gpuDynInst->simdId = w->simdId;
1130 gpuDynInst->wfSlotId = w->wfSlotId;
1131 gpuDynInst->wfDynId = w->wfDynId;
1132 gpuDynInst->kern_id = w->kernId;
1133 gpuDynInst->cu_id = w->computeUnit->cu_id;
1134
1135 gpuDynInst->useContinuation = false;
1136 GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
1137 gmp->issueRequest(gpuDynInst);
1138
1139 w->wrGmReqsInPipe--;
1140 w->rdGmReqsInPipe--;
1141 w->memReqsInPipe--;
1142 w->outstandingReqs++;
1143 } else if (isGroupSeg()) {
1144 // no-op
1145 } else {
1146 fatal("MemFence execute: bad op type\n");
1147 }
1148 }
1149 };
1150
1151 class Call : public HsailGPUStaticInst
1152 {
1153 public:
1154 // private helper functions
1155 void calcAddr(Wavefront* w, GPUDynInstPtr m);
1156
1157 void
1158 generateDisassembly()
1159 {
1160 if (dest.disassemble() == "") {
1161 disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
1162 src1.disassemble());
1163 } else {
1164 disassembly = csprintf("%s %s (%s) (%s)", opcode,
1165 src0.disassemble(), dest.disassemble(),
1166 src1.disassemble());
1167 }
1168 }
1169
1170 bool
1171 isPseudoOp()
1172 {
1173 std::string func_name = src0.disassemble();
1174 if (func_name.find("__gem5_hsail_op") != std::string::npos) {
1175 return true;
1176 }
1177 return false;
1178 }
1179
1180 // member variables
1181 ListOperand dest;
1182 FunctionRefOperand src0;
1183 ListOperand src1;
1184 HsailCode *func_ptr;
1185
1186 // exec function for pseudo instructions mapped on top of call opcode
1187 void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
1188
1189 // user-defined pseudo instructions
1190 void MagicPrintLane(Wavefront *w);
1191 void MagicPrintLane64(Wavefront *w);
1192 void MagicPrintWF32(Wavefront *w);
1193 void MagicPrintWF64(Wavefront *w);
1194 void MagicPrintWFFloat(Wavefront *w);
1195 void MagicSimBreak(Wavefront *w);
1196 void MagicPrefixSum(Wavefront *w);
1197 void MagicReduction(Wavefront *w);
1198 void MagicMaskLower(Wavefront *w);
1199 void MagicMaskUpper(Wavefront *w);
1200 void MagicJoinWFBar(Wavefront *w);
1201 void MagicWaitWFBar(Wavefront *w);
1202 void MagicPanic(Wavefront *w);
1203
1204 void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
1205 GPUDynInstPtr gpuDynInst);
1206
1207 void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
1208 GPUDynInstPtr gpuDynInst);
1209
1210 void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
1211
1212 void MagicXactCasLd(Wavefront *w);
1213 void MagicMostSigThread(Wavefront *w);
1214 void MagicMostSigBroadcast(Wavefront *w);
1215
1216 void MagicPrintWF32ID(Wavefront *w);
1217 void MagicPrintWFID64(Wavefront *w);
1218
1219 Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
1220 : HsailGPUStaticInst(obj, "call")
1221 {
1222 setFlag(ALU);
1223 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1224 dest.init(op_offs, obj);
1225 op_offs = obj->getOperandPtr(ib->operands, 1);
1226 src0.init(op_offs, obj);
1227
1228 func_ptr = nullptr;
1229 std::string func_name = src0.disassemble();
1230 if (!isPseudoOp()) {
1231 func_ptr = dynamic_cast<HsailCode*>(obj->
1232 getFunction(func_name));
1233
1234 if (!func_ptr)
1235 fatal("call::exec cannot find function: %s\n", func_name);
1236 }
1237
1238 op_offs = obj->getOperandPtr(ib->operands, 2);
1239 src1.init(op_offs, obj);
1240 }
1241
1242 bool isVectorRegister(int operandIndex) { return false; }
1243 bool isCondRegister(int operandIndex) { return false; }
1244 bool isScalarRegister(int operandIndex) { return false; }
1245 bool isSrcOperand(int operandIndex) { return false; }
1246 bool isDstOperand(int operandIndex) { return false; }
1247 int getOperandSize(int operandIndex) { return 0; }
1248
1249 int
1250 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
1251 {
1252 return -1;
1253 }
1254
1255 void
1256 execute(GPUDynInstPtr gpuDynInst)
1257 {
1258 Wavefront *w = gpuDynInst->wavefront();
1259
1260 std::string func_name = src0.disassemble();
1261 if (isPseudoOp()) {
1262 execPseudoInst(w, gpuDynInst);
1263 } else {
1264 fatal("Native HSAIL functions are not yet implemented: %s\n",
1265 func_name);
1266 }
1267 }
1268 int numSrcRegOperands() { return 0; }
1269 int numDstRegOperands() { return 0; }
1270 int getNumOperands() { return 2; }
1271 };
1272
1273 template<typename T> T heynot(T arg) { return ~arg; }
1274 template<> inline bool heynot<bool>(bool arg) { return !arg; }
1275} // namespace HsailISA
1276
1277#endif // __ARCH_HSAIL_INSTS_DECL_HH__