Deleted Added
sdiff udiff text old ( 11700:7d4d424c9f17 ) new ( 11737:50eceddc2286 )
full compact
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
37#define __ARCH_HSAIL_INSTS_DECL_HH__
38
39#include <cmath>
40
41#include "arch/hsail/insts/gpu_static_inst.hh"
42#include "arch/hsail/operand.hh"
43#include "debug/HSAIL.hh"
44#include "gpu-compute/gpu_dyn_inst.hh"
45#include "gpu-compute/shader.hh"
46
47namespace HsailISA
48{
49 template<typename _DestOperand, typename _SrcOperand>
50 class HsailOperandType
51 {
52 public:
53 typedef _DestOperand DestOperand;
54 typedef _SrcOperand SrcOperand;
55 };
56
57 typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
58 typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
59 typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
60
61 // The IsBits parameter serves only to disambiguate tbhe B* types from
62 // the U* types, which otherwise would be identical (and
63 // indistinguishable).
64 template<typename _OperandType, typename _CType, Enums::MemType _memType,
65 vgpr_type _vgprType, int IsBits=0>
66 class HsailDataType
67 {
68 public:
69 typedef _OperandType OperandType;
70 typedef _CType CType;
71 static const Enums::MemType memType = _memType;
72 static const vgpr_type vgprType = _vgprType;
73 static const char *label;
74 };
75
76 typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
77 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
78
79 typedef HsailDataType<SRegOperandType, uint16_t,
80 Enums::M_U16, VT_32, 1> B16;
81
82 typedef HsailDataType<SRegOperandType, uint32_t,
83 Enums::M_U32, VT_32, 1> B32;
84
85 typedef HsailDataType<DRegOperandType, uint64_t,
86 Enums::M_U64, VT_64, 1> B64;
87
88 typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
89 typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
90 typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
91 typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
92
93 typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
94 typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
95 typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
96 typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
97
98 typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
99 typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
100
101 template<typename DestOperandType, typename SrcOperandType,
102 int NumSrcOperands>
103 class CommonInstBase : public HsailGPUStaticInst
104 {
105 protected:
106 typename DestOperandType::DestOperand dest;
107 typename SrcOperandType::SrcOperand src[NumSrcOperands];
108
109 void
110 generateDisassembly()
111 {
112 disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
113 dest.disassemble());
114
115 for (int i = 0; i < NumSrcOperands; ++i) {
116 disassembly += ",";
117 disassembly += src[i].disassemble();
118 }
119 }
120
121 virtual std::string opcode_suffix() = 0;
122
123 public:
124 CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
125 const char *opcode)
126 : HsailGPUStaticInst(obj, opcode)
127 {
128 setFlag(ALU);
129
130 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
131
132 dest.init(op_offs, obj);
133
134 for (int i = 0; i < NumSrcOperands; ++i) {
135 op_offs = obj->getOperandPtr(ib->operands, i + 1);
136 src[i].init(op_offs, obj);
137 }
138 }
139
140 bool isVectorRegister(int operandIndex) {
141 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
142 if (operandIndex < NumSrcOperands)
143 return src[operandIndex].isVectorRegister();
144 else
145 return dest.isVectorRegister();
146 }
147 bool isCondRegister(int operandIndex) {
148 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
149 if (operandIndex < NumSrcOperands)
150 return src[operandIndex].isCondRegister();
151 else
152 return dest.isCondRegister();
153 }
154 bool isScalarRegister(int operandIndex) {
155 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
156 if (operandIndex < NumSrcOperands)
157 return src[operandIndex].isScalarRegister();
158 else
159 return dest.isScalarRegister();
160 }
161 bool isSrcOperand(int operandIndex) {
162 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
163 if (operandIndex < NumSrcOperands)
164 return true;
165 return false;
166 }
167
168 bool isDstOperand(int operandIndex) {
169 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
170 if (operandIndex >= NumSrcOperands)
171 return true;
172 return false;
173 }
174 int getOperandSize(int operandIndex) {
175 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
176 if (operandIndex < NumSrcOperands)
177 return src[operandIndex].opSize();
178 else
179 return dest.opSize();
180 }
181 int
182 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
183 {
184 assert(operandIndex >= 0 && operandIndex < getNumOperands());
185
186 if (operandIndex < NumSrcOperands)
187 return src[operandIndex].regIndex();
188 else
189 return dest.regIndex();
190 }
191 int numSrcRegOperands() {
192 int operands = 0;
193 for (int i = 0; i < NumSrcOperands; i++) {
194 if (src[i].isVectorRegister()) {
195 operands++;
196 }
197 }
198 return operands;
199 }
200 int numDstRegOperands() { return dest.isVectorRegister(); }
201 int getNumOperands() { return NumSrcOperands + 1; }
202 };
203
204 template<typename DataType, int NumSrcOperands>
205 class ArithInst : public CommonInstBase<typename DataType::OperandType,
206 typename DataType::OperandType,
207 NumSrcOperands>
208 {
209 public:
210 std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
211
212 ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
213 const char *opcode)
214 : CommonInstBase<typename DataType::OperandType,
215 typename DataType::OperandType,
216 NumSrcOperands>(ib, obj, opcode)
217 {
218 }
219 };
220
221 template<typename DestOperandType, typename Src0OperandType,
222 typename Src1OperandType, typename Src2OperandType>
223 class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
224 {
225 protected:
226 typename DestOperandType::DestOperand dest;
227 typename Src0OperandType::SrcOperand src0;
228 typename Src1OperandType::SrcOperand src1;
229 typename Src2OperandType::SrcOperand src2;
230
231 void
232 generateDisassembly()
233 {
234 disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
235 src0.disassemble(), src1.disassemble(),
236 src2.disassemble());
237 }
238
239 public:
240 ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
241 const BrigObject *obj,
242 const char *opcode)
243 : HsailGPUStaticInst(obj, opcode)
244 {
245 setFlag(ALU);
246
247 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
248 dest.init(op_offs, obj);
249
250 op_offs = obj->getOperandPtr(ib->operands, 1);
251 src0.init(op_offs, obj);
252
253 op_offs = obj->getOperandPtr(ib->operands, 2);
254 src1.init(op_offs, obj);
255
256 op_offs = obj->getOperandPtr(ib->operands, 3);
257 src2.init(op_offs, obj);
258 }
259
260 bool isVectorRegister(int operandIndex) {
261 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
262 if (!operandIndex)
263 return src0.isVectorRegister();
264 else if (operandIndex == 1)
265 return src1.isVectorRegister();
266 else if (operandIndex == 2)
267 return src2.isVectorRegister();
268 else
269 return dest.isVectorRegister();
270 }
271 bool isCondRegister(int operandIndex) {
272 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
273 if (!operandIndex)
274 return src0.isCondRegister();
275 else if (operandIndex == 1)
276 return src1.isCondRegister();
277 else if (operandIndex == 2)
278 return src2.isCondRegister();
279 else
280 return dest.isCondRegister();
281 }
282 bool isScalarRegister(int operandIndex) {
283 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
284 if (!operandIndex)
285 return src0.isScalarRegister();
286 else if (operandIndex == 1)
287 return src1.isScalarRegister();
288 else if (operandIndex == 2)
289 return src2.isScalarRegister();
290 else
291 return dest.isScalarRegister();
292 }
293 bool isSrcOperand(int operandIndex) {
294 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
295 if (operandIndex < 3)
296 return true;
297 else
298 return false;
299 }
300 bool isDstOperand(int operandIndex) {
301 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
302 if (operandIndex >= 3)
303 return true;
304 else
305 return false;
306 }
307 int getOperandSize(int operandIndex) {
308 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
309 if (!operandIndex)
310 return src0.opSize();
311 else if (operandIndex == 1)
312 return src1.opSize();
313 else if (operandIndex == 2)
314 return src2.opSize();
315 else
316 return dest.opSize();
317 }
318
319 int
320 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
321 {
322 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
323 if (!operandIndex)
324 return src0.regIndex();
325 else if (operandIndex == 1)
326 return src1.regIndex();
327 else if (operandIndex == 2)
328 return src2.regIndex();
329 else
330 return dest.regIndex();
331 }
332
333 int numSrcRegOperands() {
334 int operands = 0;
335 if (src0.isVectorRegister()) {
336 operands++;
337 }
338 if (src1.isVectorRegister()) {
339 operands++;
340 }
341 if (src2.isVectorRegister()) {
342 operands++;
343 }
344 return operands;
345 }
346 int numDstRegOperands() { return dest.isVectorRegister(); }
347 int getNumOperands() { return 4; }
348 };
349
350 template<typename DestDataType, typename Src0DataType,
351 typename Src1DataType, typename Src2DataType>
352 class ThreeNonUniformSourceInst :
353 public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
354 typename Src0DataType::OperandType,
355 typename Src1DataType::OperandType,
356 typename Src2DataType::OperandType>
357 {
358 public:
359 typedef typename DestDataType::CType DestCType;
360 typedef typename Src0DataType::CType Src0CType;
361 typedef typename Src1DataType::CType Src1CType;
362 typedef typename Src2DataType::CType Src2CType;
363
364 ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
365 const BrigObject *obj, const char *opcode)
366 : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
367 typename Src0DataType::OperandType,
368 typename Src1DataType::OperandType,
369 typename Src2DataType::OperandType>(ib,
370 obj, opcode)
371 {
372 }
373 };
374
375 template<typename DataType>
376 class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
377 DataType, DataType>
378 {
379 public:
380 CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
381 const char *opcode)
382 : ThreeNonUniformSourceInst<DataType, B1, DataType,
383 DataType>(ib, obj, opcode)
384 {
385 }
386 };
387
388 template<typename DataType>
389 class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
390 DataType, U32,
391 U32>
392 {
393 public:
394 ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
395 const char *opcode)
396 : ThreeNonUniformSourceInst<DataType, DataType, U32,
397 U32>(ib, obj, opcode)
398 {
399 }
400 };
401
402 template<typename DestOperandType, typename Src0OperandType,
403 typename Src1OperandType>
404 class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
405 {
406 protected:
407 typename DestOperandType::DestOperand dest;
408 typename Src0OperandType::SrcOperand src0;
409 typename Src1OperandType::SrcOperand src1;
410
411 void
412 generateDisassembly()
413 {
414 disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
415 src0.disassemble(), src1.disassemble());
416 }
417
418
419 public:
420 TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
421 const BrigObject *obj, const char *opcode)
422 : HsailGPUStaticInst(obj, opcode)
423 {
424 setFlag(ALU);
425
426 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
427 dest.init(op_offs, obj);
428
429 op_offs = obj->getOperandPtr(ib->operands, 1);
430 src0.init(op_offs, obj);
431
432 op_offs = obj->getOperandPtr(ib->operands, 2);
433 src1.init(op_offs, obj);
434 }
435 bool isVectorRegister(int operandIndex) {
436 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
437 if (!operandIndex)
438 return src0.isVectorRegister();
439 else if (operandIndex == 1)
440 return src1.isVectorRegister();
441 else
442 return dest.isVectorRegister();
443 }
444 bool isCondRegister(int operandIndex) {
445 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
446 if (!operandIndex)
447 return src0.isCondRegister();
448 else if (operandIndex == 1)
449 return src1.isCondRegister();
450 else
451 return dest.isCondRegister();
452 }
453 bool isScalarRegister(int operandIndex) {
454 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
455 if (!operandIndex)
456 return src0.isScalarRegister();
457 else if (operandIndex == 1)
458 return src1.isScalarRegister();
459 else
460 return dest.isScalarRegister();
461 }
462 bool isSrcOperand(int operandIndex) {
463 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
464 if (operandIndex < 2)
465 return true;
466 else
467 return false;
468 }
469 bool isDstOperand(int operandIndex) {
470 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
471 if (operandIndex >= 2)
472 return true;
473 else
474 return false;
475 }
476 int getOperandSize(int operandIndex) {
477 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
478 if (!operandIndex)
479 return src0.opSize();
480 else if (operandIndex == 1)
481 return src1.opSize();
482 else
483 return dest.opSize();
484 }
485
486 int
487 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
488 {
489 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
490 if (!operandIndex)
491 return src0.regIndex();
492 else if (operandIndex == 1)
493 return src1.regIndex();
494 else
495 return dest.regIndex();
496 }
497
498 int numSrcRegOperands() {
499 int operands = 0;
500 if (src0.isVectorRegister()) {
501 operands++;
502 }
503 if (src1.isVectorRegister()) {
504 operands++;
505 }
506 return operands;
507 }
508 int numDstRegOperands() { return dest.isVectorRegister(); }
509 int getNumOperands() { return 3; }
510 };
511
512 template<typename DestDataType, typename Src0DataType,
513 typename Src1DataType>
514 class TwoNonUniformSourceInst :
515 public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
516 typename Src0DataType::OperandType,
517 typename Src1DataType::OperandType>
518 {
519 public:
520 typedef typename DestDataType::CType DestCType;
521 typedef typename Src0DataType::CType Src0CType;
522 typedef typename Src1DataType::CType Src1CType;
523
524 TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
525 const BrigObject *obj, const char *opcode)
526 : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
527 typename Src0DataType::OperandType,
528 typename Src1DataType::OperandType>(ib,
529 obj, opcode)
530 {
531 }
532 };
533
534 // helper function for ClassInst
535 template<typename T>
536 bool
537 fpclassify(T src0, uint32_t src1)
538 {
539 int fpclass = std::fpclassify(src0);
540
541 if ((src1 & 0x3) && (fpclass == FP_NAN)) {
542 return true;
543 }
544
545 if (src0 <= -0.0) {
546 if ((src1 & 0x4) && fpclass == FP_INFINITE)
547 return true;
548 if ((src1 & 0x8) && fpclass == FP_NORMAL)
549 return true;
550 if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
551 return true;
552 if ((src1 & 0x20) && fpclass == FP_ZERO)
553 return true;
554 } else {
555 if ((src1 & 0x40) && fpclass == FP_ZERO)
556 return true;
557 if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
558 return true;
559 if ((src1 & 0x100) && fpclass == FP_NORMAL)
560 return true;
561 if ((src1 & 0x200) && fpclass == FP_INFINITE)
562 return true;
563 }
564 return false;
565 }
566
567 template<typename DataType>
568 class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
569 {
570 public:
571 ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
572 const char *opcode)
573 : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
574 {
575 }
576 };
577
578 template<typename DataType>
579 class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
580 {
581 public:
582 ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
583 const char *opcode)
584 : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
585 {
586 }
587 };
588
589 // helper function for CmpInst
590 template<typename T>
591 bool
592 compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
593 {
594 using namespace Brig;
595
596 switch (cmpOp) {
597 case BRIG_COMPARE_EQ:
598 case BRIG_COMPARE_EQU:
599 case BRIG_COMPARE_SEQ:
600 case BRIG_COMPARE_SEQU:
601 return (src0 == src1);
602
603 case BRIG_COMPARE_NE:
604 case BRIG_COMPARE_NEU:
605 case BRIG_COMPARE_SNE:
606 case BRIG_COMPARE_SNEU:
607 return (src0 != src1);
608
609 case BRIG_COMPARE_LT:
610 case BRIG_COMPARE_LTU:
611 case BRIG_COMPARE_SLT:
612 case BRIG_COMPARE_SLTU:
613 return (src0 < src1);
614
615 case BRIG_COMPARE_LE:
616 case BRIG_COMPARE_LEU:
617 case BRIG_COMPARE_SLE:
618 case BRIG_COMPARE_SLEU:
619 return (src0 <= src1);
620
621 case BRIG_COMPARE_GT:
622 case BRIG_COMPARE_GTU:
623 case BRIG_COMPARE_SGT:
624 case BRIG_COMPARE_SGTU:
625 return (src0 > src1);
626
627 case BRIG_COMPARE_GE:
628 case BRIG_COMPARE_GEU:
629 case BRIG_COMPARE_SGE:
630 case BRIG_COMPARE_SGEU:
631 return (src0 >= src1);
632
633 case BRIG_COMPARE_NUM:
634 case BRIG_COMPARE_SNUM:
635 return (src0 == src0) || (src1 == src1);
636
637 case BRIG_COMPARE_NAN:
638 case BRIG_COMPARE_SNAN:
639 return (src0 != src0) || (src1 != src1);
640
641 default:
642 fatal("Bad cmpOp value %d\n", (int)cmpOp);
643 }
644 }
645
646 template<typename T>
647 int32_t
648 firstbit(T src0)
649 {
650 if (!src0)
651 return -1;
652
653 //handle positive and negative numbers
654 T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0);
655
656 //the starting pos is MSB
657 int pos = 8 * sizeof(T) - 1;
658 int cnt = 0;
659
660 //search the first bit set to 1
661 while (!(tmp & (1 << pos))) {
662 ++cnt;
663 --pos;
664 }
665 return cnt;
666 }
667
668 const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
669
670 template<typename DestOperandType, typename SrcOperandType>
671 class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
672 2>
673 {
674 protected:
675 Brig::BrigCompareOperation cmpOp;
676
677 public:
678 CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
679 const char *_opcode)
680 : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
681 _opcode)
682 {
683 assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
684 Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
685 cmpOp = (Brig::BrigCompareOperation)i->compare;
686 }
687 };
688
689 template<typename DestDataType, typename SrcDataType>
690 class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
691 typename SrcDataType::OperandType>
692 {
693 public:
694 std::string
695 opcode_suffix()
696 {
697 return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
698 DestDataType::label, SrcDataType::label);
699 }
700
701 CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
702 const char *_opcode)
703 : CmpInstBase<typename DestDataType::OperandType,
704 typename SrcDataType::OperandType>(ib, obj, _opcode)
705 {
706 }
707 };
708
709 template<typename DestDataType, typename SrcDataType>
710 class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
711 typename SrcDataType::OperandType, 1>
712 {
713 public:
714 std::string opcode_suffix()
715 {
716 return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
717 }
718
719 CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
720 const char *_opcode)
721 : CommonInstBase<typename DestDataType::OperandType,
722 typename SrcDataType::OperandType,
723 1>(ib, obj, _opcode)
724 {
725 }
726 };
727
728 class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
729 {
730 public:
731 SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
732 const BrigObject *obj, const char *_opcode)
733 : HsailGPUStaticInst(obj, _opcode)
734 {
735 }
736
737 bool isVectorRegister(int operandIndex) { return false; }
738 bool isCondRegister(int operandIndex) { return false; }
739 bool isScalarRegister(int operandIndex) { return false; }
740 bool isSrcOperand(int operandIndex) { return false; }
741 bool isDstOperand(int operandIndex) { return false; }
742 int getOperandSize(int operandIndex) { return 0; }
743
744 int
745 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
746 {
747 return -1;
748 }
749
750 int numSrcRegOperands() { return 0; }
751 int numDstRegOperands() { return 0; }
752 int getNumOperands() { return 0; }
753 };
754
755 template<typename DestOperandType>
756 class SpecialInstNoSrcBase : public HsailGPUStaticInst
757 {
758 protected:
759 typename DestOperandType::DestOperand dest;
760
761 void generateDisassembly()
762 {
763 disassembly = csprintf("%s %s", opcode, dest.disassemble());
764 }
765
766 public:
767 SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
768 const BrigObject *obj, const char *_opcode)
769 : HsailGPUStaticInst(obj, _opcode)
770 {
771 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
772 dest.init(op_offs, obj);
773 }
774
775 bool isVectorRegister(int operandIndex) {
776 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
777 return dest.isVectorRegister();
778 }
779 bool isCondRegister(int operandIndex) {
780 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
781 return dest.isCondRegister();
782 }
783 bool isScalarRegister(int operandIndex) {
784 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
785 return dest.isScalarRegister();
786 }
787 bool isSrcOperand(int operandIndex) { return false; }
788 bool isDstOperand(int operandIndex) { return true; }
789 int getOperandSize(int operandIndex) {
790 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
791 return dest.opSize();
792 }
793
794 int
795 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
796 {
797 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
798 return dest.regIndex();
799 }
800
801 int numSrcRegOperands() { return 0; }
802 int numDstRegOperands() { return dest.isVectorRegister(); }
803 int getNumOperands() { return 1; }
804 };
805
806 template<typename DestDataType>
807 class SpecialInstNoSrc :
808 public SpecialInstNoSrcBase<typename DestDataType::OperandType>
809 {
810 public:
811 typedef typename DestDataType::CType DestCType;
812
813 SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
814 const char *_opcode)
815 : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
816 _opcode)
817 {
818 }
819 };
820
821 template<typename DestOperandType>
822 class SpecialInst1SrcBase : public HsailGPUStaticInst
823 {
824 protected:
825 typedef int SrcCType; // used in execute() template
826
827 typename DestOperandType::DestOperand dest;
828 ImmOperand<SrcCType> src0;
829
830 void
831 generateDisassembly()
832 {
833 disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
834 src0.disassemble());
835 }
836
837 public:
838 SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
839 const BrigObject *obj, const char *_opcode)
840 : HsailGPUStaticInst(obj, _opcode)
841 {
842 setFlag(ALU);
843
844 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
845 dest.init(op_offs, obj);
846
847 op_offs = obj->getOperandPtr(ib->operands, 1);
848 src0.init(op_offs, obj);
849 }
850 bool isVectorRegister(int operandIndex) {
851 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
852 return dest.isVectorRegister();
853 }
854 bool isCondRegister(int operandIndex) {
855 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
856 return dest.isCondRegister();
857 }
858 bool isScalarRegister(int operandIndex) {
859 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
860 return dest.isScalarRegister();
861 }
862 bool isSrcOperand(int operandIndex) { return false; }
863 bool isDstOperand(int operandIndex) { return true; }
864 int getOperandSize(int operandIndex) {
865 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
866 return dest.opSize();
867 }
868
869 int
870 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
871 {
872 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
873 return dest.regIndex();
874 }
875
876 int numSrcRegOperands() { return 0; }
877 int numDstRegOperands() { return dest.isVectorRegister(); }
878 int getNumOperands() { return 1; }
879 };
880
881 template<typename DestDataType>
882 class SpecialInst1Src :
883 public SpecialInst1SrcBase<typename DestDataType::OperandType>
884 {
885 public:
886 typedef typename DestDataType::CType DestCType;
887
888 SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
889 const char *_opcode)
890 : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
891 _opcode)
892 {
893 }
894 };
895
896 class Ret : public SpecialInstNoSrcNoDest
897 {
898 public:
899 typedef SpecialInstNoSrcNoDest Base;
900
901 Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
902 : Base(ib, obj, "ret")
903 {
904 setFlag(GPUStaticInst::Return);
905 }
906
907 void execute(GPUDynInstPtr gpuDynInst);
908 };
909
910 class Barrier : public SpecialInstNoSrcNoDest
911 {
912 public:
913 typedef SpecialInstNoSrcNoDest Base;
914 uint8_t width;
915
916 Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
917 : Base(ib, obj, "barrier")
918 {
919 setFlag(GPUStaticInst::MemBarrier);
920 assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
921 width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
922 }
923
924 void execute(GPUDynInstPtr gpuDynInst);
925 };
926
927 class MemFence : public SpecialInstNoSrcNoDest
928 {
929 public:
930 typedef SpecialInstNoSrcNoDest Base;
931
932 Brig::BrigMemoryOrder memFenceMemOrder;
933 Brig::BrigMemoryScope memFenceScopeSegGroup;
934 Brig::BrigMemoryScope memFenceScopeSegGlobal;
935 Brig::BrigMemoryScope memFenceScopeSegImage;
936
937 MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
938 : Base(ib, obj, "memfence")
939 {
940 assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
941
942 memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
943 ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
944
945 memFenceScopeSegGroup = (Brig::BrigMemoryScope)
946 ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
947
948 memFenceScopeSegImage = (Brig::BrigMemoryScope)
949 ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
950
951 memFenceMemOrder = (Brig::BrigMemoryOrder)
952 ((Brig::BrigInstMemFence*)ib)->memoryOrder;
953
954 setFlag(MemoryRef);
955 setFlag(GPUStaticInst::MemFence);
956
957 switch (memFenceMemOrder) {
958 case Brig::BRIG_MEMORY_ORDER_NONE:
959 setFlag(NoOrder);
960 break;
961 case Brig::BRIG_MEMORY_ORDER_RELAXED:
962 setFlag(RelaxedOrder);
963 break;
964 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
965 setFlag(Acquire);
966 break;
967 case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
968 setFlag(Release);
969 break;
970 case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
971 setFlag(AcquireRelease);
972 break;
973 default:
974 fatal("MemInst has bad BrigMemoryOrder\n");
975 }
976
977 // set inst flags based on scopes
978 if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
979 memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
980 setFlag(GPUStaticInst::GlobalSegment);
981
982 /**
983 * A memory fence that has scope for
984 * both segments will use the global
985 * segment, and be executed in the
986 * global memory pipeline, therefore,
987 * we set the segment to match the
988 * global scope only
989 */
990 switch (memFenceScopeSegGlobal) {
991 case Brig::BRIG_MEMORY_SCOPE_NONE:
992 setFlag(NoScope);
993 break;
994 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
995 setFlag(WorkitemScope);
996 break;
997 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
998 setFlag(WorkgroupScope);
999 break;
1000 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1001 setFlag(DeviceScope);
1002 break;
1003 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1004 setFlag(SystemScope);
1005 break;
1006 default:
1007 fatal("MemFence has bad global scope type\n");
1008 }
1009 } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
1010 setFlag(GPUStaticInst::GlobalSegment);
1011
1012 switch (memFenceScopeSegGlobal) {
1013 case Brig::BRIG_MEMORY_SCOPE_NONE:
1014 setFlag(NoScope);
1015 break;
1016 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1017 setFlag(WorkitemScope);
1018 break;
1019 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1020 setFlag(WorkgroupScope);
1021 break;
1022 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1023 setFlag(DeviceScope);
1024 break;
1025 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1026 setFlag(SystemScope);
1027 break;
1028 default:
1029 fatal("MemFence has bad global scope type\n");
1030 }
1031 } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
1032 setFlag(GPUStaticInst::GroupSegment);
1033
1034 switch (memFenceScopeSegGroup) {
1035 case Brig::BRIG_MEMORY_SCOPE_NONE:
1036 setFlag(NoScope);
1037 break;
1038 case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
1039 setFlag(WorkitemScope);
1040 break;
1041 case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
1042 setFlag(WorkgroupScope);
1043 break;
1044 case Brig::BRIG_MEMORY_SCOPE_AGENT:
1045 setFlag(DeviceScope);
1046 break;
1047 case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
1048 setFlag(SystemScope);
1049 break;
1050 default:
1051 fatal("MemFence has bad group scope type\n");
1052 }
1053 } else {
1054 fatal("MemFence constructor: bad scope specifiers\n");
1055 }
1056 }
1057
1058 void
1059 initiateAcc(GPUDynInstPtr gpuDynInst)
1060 {
1061 Wavefront *wave = gpuDynInst->wavefront();
1062 wave->computeUnit->injectGlobalMemFence(gpuDynInst);
1063 }
1064
1065 void
1066 execute(GPUDynInstPtr gpuDynInst)
1067 {
1068 Wavefront *w = gpuDynInst->wavefront();
1069 // 2 cases:
1070 // * memfence to a sequentially consistent memory (e.g., LDS).
1071 // These can be handled as no-ops.
1072 // * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
1073 // etc.). We send a packet, tagged with the memory order and
1074 // scope, and let the GPU coalescer handle it.
1075
1076 if (isGlobalSeg()) {
1077 gpuDynInst->simdId = w->simdId;
1078 gpuDynInst->wfSlotId = w->wfSlotId;
1079 gpuDynInst->wfDynId = w->wfDynId;
1080 gpuDynInst->kern_id = w->kernId;
1081 gpuDynInst->cu_id = w->computeUnit->cu_id;
1082
1083 gpuDynInst->useContinuation = false;
1084 GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
1085 gmp->issueRequest(gpuDynInst);
1086
1087 w->wrGmReqsInPipe--;
1088 w->rdGmReqsInPipe--;
1089 w->memReqsInPipe--;
1090 w->outstandingReqs++;
1091 } else if (isGroupSeg()) {
1092 // no-op
1093 } else {
1094 fatal("MemFence execute: bad op type\n");
1095 }
1096 }
1097 };
1098
1099 class Call : public HsailGPUStaticInst
1100 {
1101 public:
1102 // private helper functions
1103 void calcAddr(Wavefront* w, GPUDynInstPtr m);
1104
1105 void
1106 generateDisassembly()
1107 {
1108 if (dest.disassemble() == "") {
1109 disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
1110 src1.disassemble());
1111 } else {
1112 disassembly = csprintf("%s %s (%s) (%s)", opcode,
1113 src0.disassemble(), dest.disassemble(),
1114 src1.disassemble());
1115 }
1116 }
1117
1118 bool
1119 isPseudoOp()
1120 {
1121 std::string func_name = src0.disassemble();
1122 if (func_name.find("__gem5_hsail_op") != std::string::npos) {
1123 return true;
1124 }
1125 return false;
1126 }
1127
1128 // member variables
1129 ListOperand dest;
1130 FunctionRefOperand src0;
1131 ListOperand src1;
1132 HsailCode *func_ptr;
1133
1134 // exec function for pseudo instructions mapped on top of call opcode
1135 void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
1136
1137 // user-defined pseudo instructions
1138 void MagicPrintLane(Wavefront *w);
1139 void MagicPrintLane64(Wavefront *w);
1140 void MagicPrintWF32(Wavefront *w);
1141 void MagicPrintWF64(Wavefront *w);
1142 void MagicPrintWFFloat(Wavefront *w);
1143 void MagicSimBreak(Wavefront *w);
1144 void MagicPrefixSum(Wavefront *w);
1145 void MagicReduction(Wavefront *w);
1146 void MagicMaskLower(Wavefront *w);
1147 void MagicMaskUpper(Wavefront *w);
1148 void MagicJoinWFBar(Wavefront *w);
1149 void MagicWaitWFBar(Wavefront *w);
1150 void MagicPanic(Wavefront *w);
1151
1152 void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
1153 GPUDynInstPtr gpuDynInst);
1154
1155 void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
1156 GPUDynInstPtr gpuDynInst);
1157
1158 void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
1159
1160 void MagicXactCasLd(Wavefront *w);
1161 void MagicMostSigThread(Wavefront *w);
1162 void MagicMostSigBroadcast(Wavefront *w);
1163
1164 void MagicPrintWF32ID(Wavefront *w);
1165 void MagicPrintWFID64(Wavefront *w);
1166
1167 Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
1168 : HsailGPUStaticInst(obj, "call")
1169 {
1170 setFlag(ALU);
1171 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1172 dest.init(op_offs, obj);
1173 op_offs = obj->getOperandPtr(ib->operands, 1);
1174 src0.init(op_offs, obj);
1175
1176 func_ptr = nullptr;
1177 std::string func_name = src0.disassemble();
1178 if (!isPseudoOp()) {
1179 func_ptr = dynamic_cast<HsailCode*>(obj->
1180 getFunction(func_name));
1181
1182 if (!func_ptr)
1183 fatal("call::exec cannot find function: %s\n", func_name);
1184 }
1185
1186 op_offs = obj->getOperandPtr(ib->operands, 2);
1187 src1.init(op_offs, obj);
1188 }
1189
1190 bool isVectorRegister(int operandIndex) { return false; }
1191 bool isCondRegister(int operandIndex) { return false; }
1192 bool isScalarRegister(int operandIndex) { return false; }
1193 bool isSrcOperand(int operandIndex) { return false; }
1194 bool isDstOperand(int operandIndex) { return false; }
1195 int getOperandSize(int operandIndex) { return 0; }
1196
1197 int
1198 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
1199 {
1200 return -1;
1201 }
1202
1203 void
1204 execute(GPUDynInstPtr gpuDynInst)
1205 {
1206 Wavefront *w = gpuDynInst->wavefront();
1207
1208 std::string func_name = src0.disassemble();
1209 if (isPseudoOp()) {
1210 execPseudoInst(w, gpuDynInst);
1211 } else {
1212 fatal("Native HSAIL functions are not yet implemented: %s\n",
1213 func_name);
1214 }
1215 }
1216 int numSrcRegOperands() { return 0; }
1217 int numDstRegOperands() { return 0; }
1218 int getNumOperands() { return 2; }
1219 };
1220
1221 template<typename T> T heynot(T arg) { return ~arg; }
1222 template<> inline bool heynot<bool>(bool arg) { return !arg; }
1223} // namespace HsailISA
1224
1225#endif // __ARCH_HSAIL_INSTS_DECL_HH__