1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
37#define __ARCH_HSAIL_INSTS_DECL_HH__
38
39#include <cmath>
40
41#include "arch/hsail/insts/gpu_static_inst.hh"
42#include "arch/hsail/operand.hh"
43#include "debug/HSAIL.hh"
44#include "gpu-compute/gpu_dyn_inst.hh"
45#include "gpu-compute/shader.hh"
46
47namespace HsailISA
48{
49 template<typename _DestOperand, typename _SrcOperand>
50 class HsailOperandType
51 {
52 public:
53 typedef _DestOperand DestOperand;
54 typedef _SrcOperand SrcOperand;
55 };
56
57 typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
58 typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
59 typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
60
61 // The IsBits parameter serves only to disambiguate tbhe B* types from
62 // the U* types, which otherwise would be identical (and
63 // indistinguishable).
64 template<typename _OperandType, typename _CType, Enums::MemType _memType,
65 vgpr_type _vgprType, int IsBits=0>
66 class HsailDataType
67 {
68 public:
69 typedef _OperandType OperandType;
70 typedef _CType CType;
71 static const Enums::MemType memType = _memType;
72 static const vgpr_type vgprType = _vgprType;
73 static const char *label;
74 };
75
76 typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
77 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
78
79 typedef HsailDataType<SRegOperandType, uint16_t,
80 Enums::M_U16, VT_32, 1> B16;
81
82 typedef HsailDataType<SRegOperandType, uint32_t,
83 Enums::M_U32, VT_32, 1> B32;
84
85 typedef HsailDataType<DRegOperandType, uint64_t,
86 Enums::M_U64, VT_64, 1> B64;
87
88 typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
89 typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
90 typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
91 typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
92
93 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
94 typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
95 typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
96 typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
97
98 typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
99 typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
100
101 template<typename DestOperandType, typename SrcOperandType,
102 int NumSrcOperands>
103 class CommonInstBase : public HsailGPUStaticInst
104 {
105 protected:
106 typename DestOperandType::DestOperand dest;
107 typename SrcOperandType::SrcOperand src[NumSrcOperands];
108
109 void
110 generateDisassembly()
111 {
112 disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
113 dest.disassemble());
114
115 for (int i = 0; i < NumSrcOperands; ++i) {
116 disassembly += ",";
117 disassembly += src[i].disassemble();
118 }
119 }
120
121 virtual std::string opcode_suffix() = 0;
122
123 public:
124 CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
125 const char *opcode)
126 : HsailGPUStaticInst(obj, opcode)
127 {
128 setFlag(ALU);
129
130 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
131
132 dest.init(op_offs, obj);
133
134 for (int i = 0; i < NumSrcOperands; ++i) {
135 op_offs = obj->getOperandPtr(ib->operands, i + 1);
136 src[i].init(op_offs, obj);
137 }
138 }
139
140 bool isVectorRegister(int operandIndex) {
141 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
142 if (operandIndex < NumSrcOperands)
143 return src[operandIndex].isVectorRegister();
144 else
145 return dest.isVectorRegister();
146 }
147 bool isCondRegister(int operandIndex) {
148 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
149 if (operandIndex < NumSrcOperands)
150 return src[operandIndex].isCondRegister();
151 else
152 return dest.isCondRegister();
153 }
154 bool isScalarRegister(int operandIndex) {
155 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
156 if (operandIndex < NumSrcOperands)
157 return src[operandIndex].isScalarRegister();
158 else
159 return dest.isScalarRegister();
160 }
161 bool isSrcOperand(int operandIndex) {
162 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
163 if (operandIndex < NumSrcOperands)
164 return true;
165 return false;
166 }
167
168 bool isDstOperand(int operandIndex) {
169 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
170 if (operandIndex >= NumSrcOperands)
171 return true;
172 return false;
173 }
174 int getOperandSize(int operandIndex) {
175 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
176 if (operandIndex < NumSrcOperands)
177 return src[operandIndex].opSize();
178 else
179 return dest.opSize();
180 }
181 int
182 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
183 {
184 assert(operandIndex >= 0 && operandIndex < getNumOperands());
185
186 if (operandIndex < NumSrcOperands)
187 return src[operandIndex].regIndex();
188 else
189 return dest.regIndex();
190 }
191 int numSrcRegOperands() {
192 int operands = 0;
193 for (int i = 0; i < NumSrcOperands; i++) {
194 if (src[i].isVectorRegister()) {
195 operands++;
196 }
197 }
198 return operands;
199 }
200 int numDstRegOperands() { return dest.isVectorRegister(); }
201 int getNumOperands() { return NumSrcOperands + 1; }
202 };
203
204 template<typename DataType, int NumSrcOperands>
205 class ArithInst : public CommonInstBase<typename DataType::OperandType,
206 typename DataType::OperandType,
207 NumSrcOperands>
208 {
209 public:
210 std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
211
212 ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
213 const char *opcode)
214 : CommonInstBase<typename DataType::OperandType,
215 typename DataType::OperandType,
216 NumSrcOperands>(ib, obj, opcode)
217 {
218 }
219 };
220
221 template<typename DestOperandType, typename Src0OperandType,
222 typename Src1OperandType, typename Src2OperandType>
223 class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
224 {
225 protected:
226 typename DestOperandType::DestOperand dest;
227 typename Src0OperandType::SrcOperand src0;
228 typename Src1OperandType::SrcOperand src1;
229 typename Src2OperandType::SrcOperand src2;
230
231 void
232 generateDisassembly()
233 {
234 disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
235 src0.disassemble(), src1.disassemble(),
236 src2.disassemble());
237 }
238
239 public:
240 ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
241 const BrigObject *obj,
242 const char *opcode)
243 : HsailGPUStaticInst(obj, opcode)
244 {
245 setFlag(ALU);
246
247 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
248 dest.init(op_offs, obj);
249
250 op_offs = obj->getOperandPtr(ib->operands, 1);
251 src0.init(op_offs, obj);
252
253 op_offs = obj->getOperandPtr(ib->operands, 2);
254 src1.init(op_offs, obj);
255
256 op_offs = obj->getOperandPtr(ib->operands, 3);
257 src2.init(op_offs, obj);
258 }
259
260 bool isVectorRegister(int operandIndex) {
261 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
262 if (!operandIndex)
263 return src0.isVectorRegister();
264 else if (operandIndex == 1)
265 return src1.isVectorRegister();
266 else if (operandIndex == 2)
267 return src2.isVectorRegister();
268 else
269 return dest.isVectorRegister();
270 }
271 bool isCondRegister(int operandIndex) {
272 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
273 if (!operandIndex)
274 return src0.isCondRegister();
275 else if (operandIndex == 1)
276 return src1.isCondRegister();
277 else if (operandIndex == 2)
278 return src2.isCondRegister();
279 else
280 return dest.isCondRegister();
281 }
282 bool isScalarRegister(int operandIndex) {
283 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
284 if (!operandIndex)
285 return src0.isScalarRegister();
286 else if (operandIndex == 1)
287 return src1.isScalarRegister();
288 else if (operandIndex == 2)
289 return src2.isScalarRegister();
290 else
291 return dest.isScalarRegister();
292 }
293 bool isSrcOperand(int operandIndex) {
294 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
295 if (operandIndex < 3)
296 return true;
297 else
298 return false;
299 }
300 bool isDstOperand(int operandIndex) {
301 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
302 if (operandIndex >= 3)
303 return true;
304 else
305 return false;
306 }
307 int getOperandSize(int operandIndex) {
308 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
309 if (!operandIndex)
310 return src0.opSize();
311 else if (operandIndex == 1)
312 return src1.opSize();
313 else if (operandIndex == 2)
314 return src2.opSize();
315 else
316 return dest.opSize();
317 }
318
319 int
320 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
321 {
322 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
323 if (!operandIndex)
324 return src0.regIndex();
325 else if (operandIndex == 1)
326 return src1.regIndex();
327 else if (operandIndex == 2)
328 return src2.regIndex();
329 else
330 return dest.regIndex();
331 }
332
333 int numSrcRegOperands() {
334 int operands = 0;
335 if (src0.isVectorRegister()) {
336 operands++;
337 }
338 if (src1.isVectorRegister()) {
339 operands++;
340 }
341 if (src2.isVectorRegister()) {
342 operands++;
343 }
344 return operands;
345 }
346 int numDstRegOperands() { return dest.isVectorRegister(); }
347 int getNumOperands() { return 4; }
348 };
349
350 template<typename DestDataType, typename Src0DataType,
351 typename Src1DataType, typename Src2DataType>
352 class ThreeNonUniformSourceInst :
353 public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
354 typename Src0DataType::OperandType,
355 typename Src1DataType::OperandType,
356 typename Src2DataType::OperandType>
357 {
358 public:
359 typedef typename DestDataType::CType DestCType;
360 typedef typename Src0DataType::CType Src0CType;
361 typedef typename Src1DataType::CType Src1CType;
362 typedef typename Src2DataType::CType Src2CType;
363
364 ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
365 const BrigObject *obj, const char *opcode)
366 : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
367 typename Src0DataType::OperandType,
368 typename Src1DataType::OperandType,
369 typename Src2DataType::OperandType>(ib,
370 obj, opcode)
371 {
372 }
373 };
374
375 template<typename DataType>
376 class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
377 DataType, DataType>
378 {
379 public:
380 CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
381 const char *opcode)
382 : ThreeNonUniformSourceInst<DataType, B1, DataType,
383 DataType>(ib, obj, opcode)
384 {
385 }
386 };
387
388 template<typename DataType>
389 class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
390 DataType, U32,
391 U32>
392 {
393 public:
394 ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
395 const char *opcode)
396 : ThreeNonUniformSourceInst<DataType, DataType, U32,
397 U32>(ib, obj, opcode)
398 {
399 }
400 };
401
402 template<typename DestOperandType, typename Src0OperandType,
403 typename Src1OperandType>
404 class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
405 {
406 protected:
407 typename DestOperandType::DestOperand dest;
408 typename Src0OperandType::SrcOperand src0;
409 typename Src1OperandType::SrcOperand src1;
410
411 void
412 generateDisassembly()
413 {
414 disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
415 src0.disassemble(), src1.disassemble());
416 }
417
418
419 public:
420 TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
421 const BrigObject *obj, const char *opcode)
422 : HsailGPUStaticInst(obj, opcode)
423 {
424 setFlag(ALU);
425
426 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
427 dest.init(op_offs, obj);
428
429 op_offs = obj->getOperandPtr(ib->operands, 1);
430 src0.init(op_offs, obj);
431
432 op_offs = obj->getOperandPtr(ib->operands, 2);
433 src1.init(op_offs, obj);
434 }
435 bool isVectorRegister(int operandIndex) {
436 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
437 if (!operandIndex)
438 return src0.isVectorRegister();
439 else if (operandIndex == 1)
440 return src1.isVectorRegister();
441 else
442 return dest.isVectorRegister();
443 }
444 bool isCondRegister(int operandIndex) {
445 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
446 if (!operandIndex)
447 return src0.isCondRegister();
448 else if (operandIndex == 1)
449 return src1.isCondRegister();
450 else
451 return dest.isCondRegister();
452 }
453 bool isScalarRegister(int operandIndex) {
454 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
455 if (!operandIndex)
456 return src0.isScalarRegister();
457 else if (operandIndex == 1)
458 return src1.isScalarRegister();
459 else
460 return dest.isScalarRegister();
461 }
462 bool isSrcOperand(int operandIndex) {
463 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
464 if (operandIndex < 2)
465 return true;
466 else
467 return false;
468 }
469 bool isDstOperand(int operandIndex) {
470 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
471 if (operandIndex >= 2)
472 return true;
473 else
474 return false;
475 }
476 int getOperandSize(int operandIndex) {
477 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
478 if (!operandIndex)
479 return src0.opSize();
480 else if (operandIndex == 1)
481 return src1.opSize();
482 else
483 return dest.opSize();
484 }
485
486 int
487 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
488 {
489 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
490 if (!operandIndex)
491 return src0.regIndex();
492 else if (operandIndex == 1)
493 return src1.regIndex();
494 else
495 return dest.regIndex();
496 }
497
498 int numSrcRegOperands() {
499 int operands = 0;
500 if (src0.isVectorRegister()) {
501 operands++;
502 }
503 if (src1.isVectorRegister()) {
504 operands++;
505 }
506 return operands;
507 }
508 int numDstRegOperands() { return dest.isVectorRegister(); }
509 int getNumOperands() { return 3; }
510 };
511
512 template<typename DestDataType, typename Src0DataType,
513 typename Src1DataType>
514 class TwoNonUniformSourceInst :
515 public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
516 typename Src0DataType::OperandType,
517 typename Src1DataType::OperandType>
518 {
519 public:
520 typedef typename DestDataType::CType DestCType;
521 typedef typename Src0DataType::CType Src0CType;
522 typedef typename Src1DataType::CType Src1CType;
523
524 TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
525 const BrigObject *obj, const char *opcode)
526 : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
527 typename Src0DataType::OperandType,
528 typename Src1DataType::OperandType>(ib,
529 obj, opcode)
530 {
531 }
532 };
533
534 // helper function for ClassInst
535 template<typename T>
536 bool
537 fpclassify(T src0, uint32_t src1)
538 {
539 int fpclass = std::fpclassify(src0);
540
541 if ((src1 & 0x3) && (fpclass == FP_NAN)) {
542 return true;
543 }
544
545 if (src0 <= -0.0) {
546 if ((src1 & 0x4) && fpclass == FP_INFINITE)
547 return true;
548 if ((src1 & 0x8) && fpclass == FP_NORMAL)
549 return true;
550 if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
551 return true;
552 if ((src1 & 0x20) && fpclass == FP_ZERO)
553 return true;
554 } else {
555 if ((src1 & 0x40) && fpclass == FP_ZERO)
556 return true;
557 if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
558 return true;
559 if ((src1 & 0x100) && fpclass == FP_NORMAL)
560 return true;
561 if ((src1 & 0x200) && fpclass == FP_INFINITE)
562 return true;
563 }
564 return false;
565 }
566
567 template<typename DataType>
568 class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
569 {
570 public:
571 ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
572 const char *opcode)
573 : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
574 {
575 }
576 };
577
578 template<typename DataType>
579 class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
580 {
581 public:
582 ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
583 const char *opcode)
584 : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
585 {
586 }
587 };
588
589 // helper function for CmpInst
590 template<typename T>
591 bool
592 compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
593 {
594 using namespace Brig;
595
596 switch (cmpOp) {
597 case BRIG_COMPARE_EQ:
598 case BRIG_COMPARE_EQU:
599 case BRIG_COMPARE_SEQ:
600 case BRIG_COMPARE_SEQU:
601 return (src0 == src1);
602
603 case BRIG_COMPARE_NE:
604 case BRIG_COMPARE_NEU:
605 case BRIG_COMPARE_SNE:
606 case BRIG_COMPARE_SNEU:
607 return (src0 != src1);
608
609 case BRIG_COMPARE_LT:
610 case BRIG_COMPARE_LTU:
611 case BRIG_COMPARE_SLT:
612 case BRIG_COMPARE_SLTU:
613 return (src0 < src1);
614
615 case BRIG_COMPARE_LE:
616 case BRIG_COMPARE_LEU:
617 case BRIG_COMPARE_SLE:
618 case BRIG_COMPARE_SLEU:
619 return (src0 <= src1);
620
621 case BRIG_COMPARE_GT:
622 case BRIG_COMPARE_GTU:
623 case BRIG_COMPARE_SGT:
624 case BRIG_COMPARE_SGTU:
625 return (src0 > src1);
626
627 case BRIG_COMPARE_GE:
628 case BRIG_COMPARE_GEU:
629 case BRIG_COMPARE_SGE:
630 case BRIG_COMPARE_SGEU:
631 return (src0 >= src1);
632
633 case BRIG_COMPARE_NUM:
634 case BRIG_COMPARE_SNUM:
635 return (src0 == src0) || (src1 == src1);
636
637 case BRIG_COMPARE_NAN:
638 case BRIG_COMPARE_SNAN:
639 return (src0 != src0) || (src1 != src1);
640
641 default:
642 fatal("Bad cmpOp value %d\n", (int)cmpOp);
643 }
644 }
645
646 template<typename T>
647 int32_t
648 firstbit(T src0)
649 {
650 if (!src0)
651 return -1;
652
653 //handle positive and negative numbers
654 T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0);
655
656 //the starting pos is MSB
657 int pos = 8 * sizeof(T) - 1;
658 int cnt = 0;
659
660 //search the first bit set to 1
661 while (!(tmp & (1 << pos))) {
662 ++cnt;
663 --pos;
664 }
665 return cnt;
666 }
667
668 const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
669
670 template<typename DestOperandType, typename SrcOperandType>
671 class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
672 2>
673 {
674 protected:
675 Brig::BrigCompareOperation cmpOp;
676
677 public:
678 CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
679 const char *_opcode)
680 : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
681 _opcode)
682 {
683 assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
684 Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
685 cmpOp = (Brig::BrigCompareOperation)i->compare;
686 }
687 };
688
689 template<typename DestDataType, typename SrcDataType>
690 class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
691 typename SrcDataType::OperandType>
692 {
693 public:
694 std::string
695 opcode_suffix()
696 {
697 return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
698 DestDataType::label, SrcDataType::label);
699 }
700
701 CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
702 const char *_opcode)
703 : CmpInstBase<typename DestDataType::OperandType,
704 typename SrcDataType::OperandType>(ib, obj, _opcode)
705 {
706 }
707 };
708
709 template<typename DestDataType, typename SrcDataType>
710 class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
711 typename SrcDataType::OperandType, 1>
712 {
713 public:
714 std::string opcode_suffix()
715 {
716 return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
717 }
718
719 CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
720 const char *_opcode)
721 : CommonInstBase<typename DestDataType::OperandType,
722 typename SrcDataType::OperandType,
723 1>(ib, obj, _opcode)
724 {
725 }
726 };
727
728 template<typename DestDataType, typename SrcDataType>
729 class PopcountInst :
730 public CommonInstBase<typename DestDataType::OperandType,
731 typename SrcDataType::OperandType, 1>
732 {
733 public:
734 std::string opcode_suffix()
735 {
736 return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
737 }
738
739 PopcountInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
740 const char *_opcode)
741 : CommonInstBase<typename DestDataType::OperandType,
742 typename SrcDataType::OperandType,
743 1>(ib, obj, _opcode)
744 {
745 }
746 };
747
748 class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
749 {
750 public:
751 SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
752 const BrigObject *obj, const char *_opcode)
753 : HsailGPUStaticInst(obj, _opcode)
754 {
755 }
756
757 bool isVectorRegister(int operandIndex) { return false; }
758 bool isCondRegister(int operandIndex) { return false; }
759 bool isScalarRegister(int operandIndex) { return false; }
760 bool isSrcOperand(int operandIndex) { return false; }
761 bool isDstOperand(int operandIndex) { return false; }
762 int getOperandSize(int operandIndex) { return 0; }
763
764 int
765 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
766 {
767 return -1;
768 }
769
770 int numSrcRegOperands() { return 0; }
771 int numDstRegOperands() { return 0; }
772 int getNumOperands() { return 0; }
773 };
774
775 template<typename DestOperandType>
776 class SpecialInstNoSrcBase : public HsailGPUStaticInst
777 {
778 protected:
779 typename DestOperandType::DestOperand dest;
780
781 void generateDisassembly()
782 {
783 disassembly = csprintf("%s %s", opcode, dest.disassemble());
784 }
785
786 public:
787 SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
788 const BrigObject *obj, const char *_opcode)
789 : HsailGPUStaticInst(obj, _opcode)
790 {
791 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
792 dest.init(op_offs, obj);
793 }
794
795 bool isVectorRegister(int operandIndex) {
796 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
797 return dest.isVectorRegister();
798 }
799 bool isCondRegister(int operandIndex) {
800 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
801 return dest.isCondRegister();
802 }
803 bool isScalarRegister(int operandIndex) {
804 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
805 return dest.isScalarRegister();
806 }
807 bool isSrcOperand(int operandIndex) { return false; }
808 bool isDstOperand(int operandIndex) { return true; }
809 int getOperandSize(int operandIndex) {
810 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
811 return dest.opSize();
812 }
813
814 int
815 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
816 {
817 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
818 return dest.regIndex();
819 }
820
821 int numSrcRegOperands() { return 0; }
822 int numDstRegOperands() { return dest.isVectorRegister(); }
823 int getNumOperands() { return 1; }
824 };
825
826 template<typename DestDataType>
827 class SpecialInstNoSrc :
828 public SpecialInstNoSrcBase<typename DestDataType::OperandType>
829 {
830 public:
831 typedef typename DestDataType::CType DestCType;
832
833 SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
834 const char *_opcode)
835 : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
836 _opcode)
837 {
838 }
839 };
840
841 template<typename DestOperandType>
842 class SpecialInst1SrcBase : public HsailGPUStaticInst
843 {
844 protected:
845 typedef int SrcCType; // used in execute() template
846
847 typename DestOperandType::DestOperand dest;
848 ImmOperand<SrcCType> src0;
849
850 void
851 generateDisassembly()
852 {
853 disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
854 src0.disassemble());
855 }
856
857 public:
858 SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
859 const BrigObject *obj, const char *_opcode)
860 : HsailGPUStaticInst(obj, _opcode)
861 {
862 setFlag(ALU);
863
864 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
865 dest.init(op_offs, obj);
866
867 op_offs = obj->getOperandPtr(ib->operands, 1);
868 src0.init(op_offs, obj);
869 }
870 bool isVectorRegister(int operandIndex) {
871 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
872 return dest.isVectorRegister();
873 }
874 bool isCondRegister(int operandIndex) {
875 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
876 return dest.isCondRegister();
877 }
878 bool isScalarRegister(int operandIndex) {
879 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
880 return dest.isScalarRegister();
881 }
882 bool isSrcOperand(int operandIndex) { return false; }
883 bool isDstOperand(int operandIndex) { return true; }
884 int getOperandSize(int operandIndex) {
885 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
886 return dest.opSize();
887 }
888
889 int
890 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
891 {
892 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
893 return dest.regIndex();
894 }
895
896 int numSrcRegOperands() { return 0; }
897 int numDstRegOperands() { return dest.isVectorRegister(); }
898 int getNumOperands() { return 1; }
899 };
900
901 template<typename DestDataType>
902 class SpecialInst1Src :
903 public SpecialInst1SrcBase<typename DestDataType::OperandType>
904 {
905 public:
906 typedef typename DestDataType::CType DestCType;
907
908 SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
909 const char *_opcode)
910 : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
911 _opcode)
912 {
913 }
914 };
915
916 class Ret : public SpecialInstNoSrcNoDest
917 {
918 public:
919 typedef SpecialInstNoSrcNoDest Base;
920
921 Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
922 : Base(ib, obj, "ret")
923 {
924 setFlag(GPUStaticInst::Return);
925 }
926
927 void execute(GPUDynInstPtr gpuDynInst);
928 };
929
930 class Barrier : public SpecialInstNoSrcNoDest
931 {
932 public:
933 typedef SpecialInstNoSrcNoDest Base;
934 uint8_t width;
935
936 Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
937 : Base(ib, obj, "barrier")
938 {
939 setFlag(GPUStaticInst::MemBarrier);
940 assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
941 width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
942 }
943
944 void execute(GPUDynInstPtr gpuDynInst);
945 };
946
947 class MemFence : public SpecialInstNoSrcNoDest
948 {
949 public:
950 typedef SpecialInstNoSrcNoDest Base;
951
952 Brig::BrigMemoryOrder memFenceMemOrder;
953 Brig::BrigMemoryScope memFenceScopeSegGroup;
954 Brig::BrigMemoryScope memFenceScopeSegGlobal;
955 Brig::BrigMemoryScope memFenceScopeSegImage;
956
957 MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
958 : Base(ib, obj, "memfence")
959 {
960 assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
961
962 memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
963 ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
964
965 memFenceScopeSegGroup = (Brig::BrigMemoryScope)
966 ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
967
968 memFenceScopeSegImage = (Brig::BrigMemoryScope)
969 ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
970
971 memFenceMemOrder = (Brig::BrigMemoryOrder)
972 ((Brig::BrigInstMemFence*)ib)->memoryOrder;
973
974 setFlag(MemoryRef);
975 setFlag(GPUStaticInst::MemFence);
976
977 switch (memFenceMemOrder) {
978 case Brig::BRIG_MEMORY_ORDER_NONE:
979 setFlag(NoOrder);
980 break;
981 case Brig::BRIG_MEMORY_ORDER_RELAXED:
982 setFlag(RelaxedOrder);
983 break;
984 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
985 setFlag(Acquire);
986 break;
987 case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
988 setFlag(Release);
989 break;
990 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
991 setFlag(AcquireRelease);
992 break;
993 default:
994 fatal("MemInst has bad BrigMemoryOrder\n");
995 }
996
997 // set inst flags based on scopes
998 if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
999 memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1000 setFlag(GPUStaticInst::GlobalSegment);
1001
1002 /**
1003 * A memory fence that has scope for
1004 * both segments will use the global
1005 * segment, and be executed in the
1006 * global memory pipeline, therefore,
1007 * we set the segment to match the
1008 * global scope only
1009 */
1010 switch (memFenceScopeSegGlobal) {
1011 case Brig::BRIG_MEMORY_SCOPE_NONE:
1012 setFlag(NoScope);
1013 break;
1014 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1015 setFlag(WorkitemScope);
1016 break;
1017 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1018 setFlag(WorkgroupScope);
1019 break;
1020 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1021 setFlag(DeviceScope);
1022 break;
1023 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1024 setFlag(SystemScope);
1025 break;
1026 default:
1027 fatal("MemFence has bad global scope type\n");
1028 }
1029 } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
1030 setFlag(GPUStaticInst::GlobalSegment);
1031
1032 switch (memFenceScopeSegGlobal) {
1033 case Brig::BRIG_MEMORY_SCOPE_NONE:
1034 setFlag(NoScope);
1035 break;
1036 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1037 setFlag(WorkitemScope);
1038 break;
1039 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1040 setFlag(WorkgroupScope);
1041 break;
1042 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1043 setFlag(DeviceScope);
1044 break;
1045 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1046 setFlag(SystemScope);
1047 break;
1048 default:
1049 fatal("MemFence has bad global scope type\n");
1050 }
1051 } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1052 setFlag(GPUStaticInst::GroupSegment);
1053
1054 switch (memFenceScopeSegGroup) {
1055 case Brig::BRIG_MEMORY_SCOPE_NONE:
1056 setFlag(NoScope);
1057 break;
1058 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1059 setFlag(WorkitemScope);
1060 break;
1061 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1062 setFlag(WorkgroupScope);
1063 break;
1064 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1065 setFlag(DeviceScope);
1066 break;
1067 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1068 setFlag(SystemScope);
1069 break;
1070 default:
1071 fatal("MemFence has bad group scope type\n");
1072 }
1073 } else {
1074 fatal("MemFence constructor: bad scope specifiers\n");
1075 }
1076 }
1077
1078 void
1079 initiateAcc(GPUDynInstPtr gpuDynInst)
1080 {
1081 Wavefront *wave = gpuDynInst->wavefront();
1082 wave->computeUnit->injectGlobalMemFence(gpuDynInst);
1083 }
1084
1085 void
1086 execute(GPUDynInstPtr gpuDynInst)
1087 {
1088 Wavefront *w = gpuDynInst->wavefront();
1089 // 2 cases:
1090 // * memfence to a sequentially consistent memory (e.g., LDS).
1091 // These can be handled as no-ops.
1092 // * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
1093 // etc.). We send a packet, tagged with the memory order and
1094 // scope, and let the GPU coalescer handle it.
1095
1096 if (isGlobalSeg()) {
1097 gpuDynInst->simdId = w->simdId;
1098 gpuDynInst->wfSlotId = w->wfSlotId;
1099 gpuDynInst->wfDynId = w->wfDynId;
1100 gpuDynInst->kern_id = w->kernId;
1101 gpuDynInst->cu_id = w->computeUnit->cu_id;
1102
1103 gpuDynInst->useContinuation = false;
1104 GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
1105 gmp->issueRequest(gpuDynInst);
1106
1107 w->wrGmReqsInPipe--;
1108 w->rdGmReqsInPipe--;
1109 w->memReqsInPipe--;
1110 w->outstandingReqs++;
1111 } else if (isGroupSeg()) {
1112 // no-op
1113 } else {
1114 fatal("MemFence execute: bad op type\n");
1115 }
1116 }
1117 };
1118
1119 class Call : public HsailGPUStaticInst
1120 {
1121 public:
1122 // private helper functions
1123 void calcAddr(Wavefront* w, GPUDynInstPtr m);
1124
1125 void
1126 generateDisassembly()
1127 {
1128 if (dest.disassemble() == "") {
1129 disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
1130 src1.disassemble());
1131 } else {
1132 disassembly = csprintf("%s %s (%s) (%s)", opcode,
1133 src0.disassemble(), dest.disassemble(),
1134 src1.disassemble());
1135 }
1136 }
1137
1138 bool
1139 isPseudoOp()
1140 {
1141 std::string func_name = src0.disassemble();
1142 if (func_name.find("__gem5_hsail_op") != std::string::npos) {
1143 return true;
1144 }
1145 return false;
1146 }
1147
1148 // member variables
1149 ListOperand dest;
1150 FunctionRefOperand src0;
1151 ListOperand src1;
1152 HsailCode *func_ptr;
1153
1154 // exec function for pseudo instructions mapped on top of call opcode
1155 void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
1156
1157 // user-defined pseudo instructions
1158 void MagicPrintLane(Wavefront *w);
1159 void MagicPrintLane64(Wavefront *w);
1160 void MagicPrintWF32(Wavefront *w);
1161 void MagicPrintWF64(Wavefront *w);
1162 void MagicPrintWFFloat(Wavefront *w);
1163 void MagicSimBreak(Wavefront *w);
1164 void MagicPrefixSum(Wavefront *w);
1165 void MagicReduction(Wavefront *w);
1166 void MagicMaskLower(Wavefront *w);
1167 void MagicMaskUpper(Wavefront *w);
1168 void MagicJoinWFBar(Wavefront *w);
1169 void MagicWaitWFBar(Wavefront *w);
1170 void MagicPanic(Wavefront *w);
1171
1172 void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
1173 GPUDynInstPtr gpuDynInst);
1174
1175 void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
1176 GPUDynInstPtr gpuDynInst);
1177
1178 void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
1179
1180 void MagicXactCasLd(Wavefront *w);
1181 void MagicMostSigThread(Wavefront *w);
1182 void MagicMostSigBroadcast(Wavefront *w);
1183
1184 void MagicPrintWF32ID(Wavefront *w);
1185 void MagicPrintWFID64(Wavefront *w);
1186
1187 Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
1188 : HsailGPUStaticInst(obj, "call")
1189 {
1190 setFlag(ALU);
1191 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1192 dest.init(op_offs, obj);
1193 op_offs = obj->getOperandPtr(ib->operands, 1);
1194 src0.init(op_offs, obj);
1195
1196 func_ptr = nullptr;
1197 std::string func_name = src0.disassemble();
1198 if (!isPseudoOp()) {
1199 func_ptr = dynamic_cast<HsailCode*>(obj->
1200 getFunction(func_name));
1201
1202 if (!func_ptr)
1203 fatal("call::exec cannot find function: %s\n", func_name);
1204 }
1205
1206 op_offs = obj->getOperandPtr(ib->operands, 2);
1207 src1.init(op_offs, obj);
1208 }
1209
1210 bool isVectorRegister(int operandIndex) { return false; }
1211 bool isCondRegister(int operandIndex) { return false; }
1212 bool isScalarRegister(int operandIndex) { return false; }
1213 bool isSrcOperand(int operandIndex) { return false; }
1214 bool isDstOperand(int operandIndex) { return false; }
1215 int getOperandSize(int operandIndex) { return 0; }
1216
1217 int
1218 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
1219 {
1220 return -1;
1221 }
1222
1223 void
1224 execute(GPUDynInstPtr gpuDynInst)
1225 {
1226 Wavefront *w = gpuDynInst->wavefront();
1227
1228 std::string func_name = src0.disassemble();
1229 if (isPseudoOp()) {
1230 execPseudoInst(w, gpuDynInst);
1231 } else {
1232 fatal("Native HSAIL functions are not yet implemented: %s\n",
1233 func_name);
1234 }
1235 }
1236 int numSrcRegOperands() { return 0; }
1237 int numDstRegOperands() { return 0; }
1238 int getNumOperands() { return 2; }
1239 };
1240
1241 template<typename T> T heynot(T arg) { return ~arg; }
1242 template<> inline bool heynot<bool>(bool arg) { return !arg; }
1243} // namespace HsailISA
1244
1245#endif // __ARCH_HSAIL_INSTS_DECL_HH__