Deleted Added
sdiff udiff text old ( 11692:e772fdcd3809 ) new ( 11693:bc1f702c25b9 )
full compact
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_MEM_HH__
37#define __ARCH_HSAIL_INSTS_MEM_HH__
38
39#include "arch/hsail/insts/decl.hh"
40#include "arch/hsail/insts/gpu_static_inst.hh"
41#include "arch/hsail/operand.hh"
42
43namespace HsailISA
44{
45 class MemInst
46 {
47 public:
48 MemInst() : size(0), addr_operand(nullptr) { }
49
50 MemInst(Enums::MemType m_type)
51 {
52 if (m_type == Enums::M_U64 ||
53 m_type == Enums::M_S64 ||
54 m_type == Enums::M_F64) {
55 size = 8;
56 } else if (m_type == Enums::M_U32 ||
57 m_type == Enums::M_S32 ||
58 m_type == Enums::M_F32) {
59 size = 4;
60 } else if (m_type == Enums::M_U16 ||
61 m_type == Enums::M_S16 ||
62 m_type == Enums::M_F16) {
63 size = 2;
64 } else {
65 size = 1;
66 }
67
68 addr_operand = nullptr;
69 }
70
71 void
72 init_addr(AddrOperandBase *_addr_operand)
73 {
74 addr_operand = _addr_operand;
75 }
76
77 private:
78 int size;
79 AddrOperandBase *addr_operand;
80
81 public:
82 int getMemOperandSize() { return size; }
83 AddrOperandBase *getAddressOperand() { return addr_operand; }
84 };
85
86 template<typename DestOperandType, typename AddrOperandType>
87 class LdaInstBase : public HsailGPUStaticInst
88 {
89 public:
90 typename DestOperandType::DestOperand dest;
91 AddrOperandType addr;
92
93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
94 const char *_opcode)
95 : HsailGPUStaticInst(obj, _opcode)
96 {
97 using namespace Brig;
98
99 setFlag(ALU);
100
101 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
102 dest.init(op_offs, obj);
103 op_offs = obj->getOperandPtr(ib->operands, 1);
104 addr.init(op_offs, obj);
105 }
106
107 int numSrcRegOperands() override
108 { return(this->addr.isVectorRegister()); }
109 int numDstRegOperands() override
110 { return dest.isVectorRegister(); }
111 bool isVectorRegister(int operandIndex) override
112 {
113 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
114 return((operandIndex == 0) ? dest.isVectorRegister() :
115 this->addr.isVectorRegister());
116 }
117 bool isCondRegister(int operandIndex) override
118 {
119 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
120 return((operandIndex == 0) ? dest.isCondRegister() :
121 this->addr.isCondRegister());
122 }
123 bool isScalarRegister(int operandIndex) override
124 {
125 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
126 return((operandIndex == 0) ? dest.isScalarRegister() :
127 this->addr.isScalarRegister());
128 }
129 bool isSrcOperand(int operandIndex) override
130 {
131 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
132 if (operandIndex > 0)
133 return(this->addr.isVectorRegister());
134 return false;
135 }
136 bool isDstOperand(int operandIndex) override {
137 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
138 return(operandIndex == 0);
139 }
140 int getOperandSize(int operandIndex) override
141 {
142 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
143 return((operandIndex == 0) ? dest.opSize() :
144 this->addr.opSize());
145 }
146 int getRegisterIndex(int operandIndex) override
147 {
148 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
149 return((operandIndex == 0) ? dest.regIndex() :
150 this->addr.regIndex());
151 }
152 int getNumOperands() override
153 {
154 if (this->addr.isVectorRegister())
155 return 2;
156 return 1;
157 }
158 };
159
160 template<typename DestDataType, typename AddrOperandType>
161 class LdaInst :
162 public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
163 public MemInst
164 {
165 public:
166 void generateDisassembly();
167
168 LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
169 const char *_opcode)
170 : LdaInstBase<typename DestDataType::OperandType,
171 AddrOperandType>(ib, obj, _opcode)
172 {
173 init_addr(&this->addr);
174 }
175
176 void execute(GPUDynInstPtr gpuDynInst);
177 };
178
179 template<typename DataType>
180 GPUStaticInst*
181 decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
182 {
183 unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
184 BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
185
186 if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
187 return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
188 } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
189 // V2/V4 not allowed
190 switch (regDataType.regKind) {
191 case Brig::BRIG_REGISTER_KIND_SINGLE:
192 return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
193 case Brig::BRIG_REGISTER_KIND_DOUBLE:
194 return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
195 default:
196 fatal("Bad ldas register operand type %d\n", regDataType.type);
197 }
198 } else {
199 fatal("Bad ldas register operand kind %d\n", regDataType.kind);
200 }
201 }
202
203 template<typename MemOperandType, typename DestOperandType,
204 typename AddrOperandType>
205 class LdInstBase : public HsailGPUStaticInst
206 {
207 public:
208 Brig::BrigWidth8_t width;
209 typename DestOperandType::DestOperand dest;
210 AddrOperandType addr;
211
212 Brig::BrigSegment segment;
213 Brig::BrigMemoryOrder memoryOrder;
214 Brig::BrigMemoryScope memoryScope;
215 unsigned int equivClass;
216
217 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
218 const char *_opcode)
219 : HsailGPUStaticInst(obj, _opcode)
220 {
221 using namespace Brig;
222
223 setFlag(MemoryRef);
224 setFlag(Load);
225
226 if (ib->opcode == BRIG_OPCODE_LD) {
227 const BrigInstMem *ldst = (const BrigInstMem*)ib;
228
229 segment = (BrigSegment)ldst->segment;
230 memoryOrder = BRIG_MEMORY_ORDER_NONE;
231 memoryScope = BRIG_MEMORY_SCOPE_NONE;
232 equivClass = ldst->equivClass;
233
234 width = ldst->width;
235 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
236 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
237 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
238 dest.init(op_offs, obj);
239
240 op_offs = obj->getOperandPtr(ib->operands, 1);
241 addr.init(op_offs, obj);
242 } else {
243 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
244
245 segment = (BrigSegment)at->segment;
246 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
247 memoryScope = (BrigMemoryScope)at->memoryScope;
248 equivClass = 0;
249
250 width = BRIG_WIDTH_1;
251 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
252 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
253
254 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
255 dest.init(op_offs, obj);
256
257 op_offs = obj->getOperandPtr(ib->operands,1);
258 addr.init(op_offs, obj);
259 }
260
261 switch (memoryOrder) {
262 case BRIG_MEMORY_ORDER_NONE:
263 setFlag(NoOrder);
264 break;
265 case BRIG_MEMORY_ORDER_RELAXED:
266 setFlag(RelaxedOrder);
267 break;
268 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
269 setFlag(Acquire);
270 break;
271 case BRIG_MEMORY_ORDER_SC_RELEASE:
272 setFlag(Release);
273 break;
274 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
275 setFlag(AcquireRelease);
276 break;
277 default:
278 fatal("LdInst has bad memory order type\n");
279 }
280
281 switch (memoryScope) {
282 case BRIG_MEMORY_SCOPE_NONE:
283 setFlag(NoScope);
284 break;
285 case BRIG_MEMORY_SCOPE_WORKITEM:
286 setFlag(WorkitemScope);
287 break;
288 case BRIG_MEMORY_SCOPE_WORKGROUP:
289 setFlag(WorkgroupScope);
290 break;
291 case BRIG_MEMORY_SCOPE_AGENT:
292 setFlag(DeviceScope);
293 break;
294 case BRIG_MEMORY_SCOPE_SYSTEM:
295 setFlag(SystemScope);
296 break;
297 default:
298 fatal("LdInst has bad memory scope type\n");
299 }
300
301 switch (segment) {
302 case BRIG_SEGMENT_GLOBAL:
303 setFlag(GlobalSegment);
304 break;
305 case BRIG_SEGMENT_GROUP:
306 setFlag(GroupSegment);
307 break;
308 case BRIG_SEGMENT_PRIVATE:
309 setFlag(PrivateSegment);
310 break;
311 case BRIG_SEGMENT_READONLY:
312 setFlag(ReadOnlySegment);
313 break;
314 case BRIG_SEGMENT_SPILL:
315 setFlag(SpillSegment);
316 break;
317 case BRIG_SEGMENT_FLAT:
318 setFlag(Flat);
319 break;
320 case BRIG_SEGMENT_KERNARG:
321 setFlag(KernArgSegment);
322 break;
323 case BRIG_SEGMENT_ARG:
324 setFlag(ArgSegment);
325 break;
326 default:
327 panic("Ld: segment %d not supported\n", segment);
328 }
329 }
330
331 int numSrcRegOperands() override
332 { return(this->addr.isVectorRegister()); }
333 int numDstRegOperands() override { return dest.isVectorRegister(); }
334 int getNumOperands() override
335 {
336 if (this->addr.isVectorRegister())
337 return 2;
338 else
339 return 1;
340 }
341 bool isVectorRegister(int operandIndex) override
342 {
343 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
344 return((operandIndex == 0) ? dest.isVectorRegister() :
345 this->addr.isVectorRegister());
346 }
347 bool isCondRegister(int operandIndex) override
348 {
349 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
350 return((operandIndex == 0) ? dest.isCondRegister() :
351 this->addr.isCondRegister());
352 }
353 bool isScalarRegister(int operandIndex) override
354 {
355 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
356 return((operandIndex == 0) ? dest.isScalarRegister() :
357 this->addr.isScalarRegister());
358 }
359 bool isSrcOperand(int operandIndex) override
360 {
361 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
362 if (operandIndex > 0)
363 return(this->addr.isVectorRegister());
364 return false;
365 }
366 bool isDstOperand(int operandIndex) override
367 {
368 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
369 return(operandIndex == 0);
370 }
371 int getOperandSize(int operandIndex) override
372 {
373 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
374 return((operandIndex == 0) ? dest.opSize() :
375 this->addr.opSize());
376 }
377 int getRegisterIndex(int operandIndex) override
378 {
379 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
380 return((operandIndex == 0) ? dest.regIndex() :
381 this->addr.regIndex());
382 }
383 };
384
385 template<typename MemDataType, typename DestDataType,
386 typename AddrOperandType>
387 class LdInst :
388 public LdInstBase<typename MemDataType::CType,
389 typename DestDataType::OperandType, AddrOperandType>,
390 public MemInst
391 {
392 typename DestDataType::OperandType::DestOperand dest_vect[4];
393 uint16_t num_dest_operands;
394 void generateDisassembly() override;
395
396 public:
397 LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
398 const char *_opcode)
399 : LdInstBase<typename MemDataType::CType,
400 typename DestDataType::OperandType,
401 AddrOperandType>(ib, obj, _opcode),
402 MemInst(MemDataType::memType)
403 {
404 init_addr(&this->addr);
405
406 unsigned op_offs = obj->getOperandPtr(ib->operands,0);
407 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
408
409 if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
410 const Brig::BrigOperandOperandList *brigRegVecOp =
411 (const Brig::BrigOperandOperandList*)brigOp;
412
413 num_dest_operands =
414 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
415
416 assert(num_dest_operands <= 4);
417 } else {
418 num_dest_operands = 1;
419 }
420
421 if (num_dest_operands > 1) {
422 assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
423
424 for (int i = 0; i < num_dest_operands; ++i) {
425 dest_vect[i].init_from_vect(op_offs, obj, i);
426 }
427 }
428 }
429
430 void
431 initiateAcc(GPUDynInstPtr gpuDynInst) override
432 {
433 typedef typename MemDataType::CType c0;
434
435 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
436
437 if (num_dest_operands > 1) {
438 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
439 if (gpuDynInst->exec_mask[i])
440 gpuDynInst->statusVector.push_back(num_dest_operands);
441 else
442 gpuDynInst->statusVector.push_back(0);
443 }
444
445 for (int k = 0; k < num_dest_operands; ++k) {
446
447 c0 *d = &((c0*)gpuDynInst->d_data)
448 [k * gpuDynInst->computeUnit()->wfSize()];
449
450 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
451 if (gpuDynInst->exec_mask[i]) {
452 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
453
454 if (this->isLocalMem()) {
455 // load from shared memory
456 *d = gpuDynInst->wavefront()->ldsChunk->
457 read<c0>(vaddr);
458 } else {
459 Request *req = new Request(0, vaddr, sizeof(c0), 0,
460 gpuDynInst->computeUnit()->masterId(),
461 0, gpuDynInst->wfDynId);
462
463 gpuDynInst->setRequestFlags(req);
464 PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
465 pkt->dataStatic(d);
466
467 if (gpuDynInst->computeUnit()->shader->
468 separate_acquire_release &&
469 gpuDynInst->isAcquire()) {
470 // if this load has acquire semantics,
471 // set the response continuation function
472 // to perform an Acquire request
473 gpuDynInst->execContinuation =
474 &GPUStaticInst::execLdAcq;
475
476 gpuDynInst->useContinuation = true;
477 } else {
478 // the request will be finished when
479 // the load completes
480 gpuDynInst->useContinuation = false;
481 }
482 // translation is performed in sendRequest()
483 gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
484 i, pkt);
485 }
486 }
487 ++d;
488 }
489 }
490
491 gpuDynInst->updateStats();
492 }
493
494 private:
495 void
496 execLdAcq(GPUDynInstPtr gpuDynInst) override
497 {
498 // after the load has complete and if the load has acquire
499 // semantics, issue an acquire request.
500 if (!this->isLocalMem()) {
501 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
502 && gpuDynInst->isAcquire()) {
503 gpuDynInst->statusBitVector = VectorMask(1);
504 gpuDynInst->useContinuation = false;
505 // create request
506 Request *req = new Request(0, 0, 0, 0,
507 gpuDynInst->computeUnit()->masterId(),
508 0, gpuDynInst->wfDynId);
509 req->setFlags(Request::ACQUIRE);
510 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
511 }
512 }
513 }
514
515 public:
516 bool isVectorRegister(int operandIndex) override
517 {
518 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
519 if ((num_dest_operands != getNumOperands()) &&
520 (operandIndex == (getNumOperands()-1)))
521 return(this->addr.isVectorRegister());
522 if (num_dest_operands > 1) {
523 return dest_vect[operandIndex].isVectorRegister();
524 }
525 else if (num_dest_operands == 1) {
526 return LdInstBase<typename MemDataType::CType,
527 typename DestDataType::OperandType,
528 AddrOperandType>::dest.isVectorRegister();
529 }
530 return false;
531 }
532 bool isCondRegister(int operandIndex) override
533 {
534 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
535 if ((num_dest_operands != getNumOperands()) &&
536 (operandIndex == (getNumOperands()-1)))
537 return(this->addr.isCondRegister());
538 if (num_dest_operands > 1)
539 return dest_vect[operandIndex].isCondRegister();
540 else if (num_dest_operands == 1)
541 return LdInstBase<typename MemDataType::CType,
542 typename DestDataType::OperandType,
543 AddrOperandType>::dest.isCondRegister();
544 return false;
545 }
546 bool isScalarRegister(int operandIndex) override
547 {
548 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
549 if ((num_dest_operands != getNumOperands()) &&
550 (operandIndex == (getNumOperands()-1)))
551 return(this->addr.isScalarRegister());
552 if (num_dest_operands > 1)
553 return dest_vect[operandIndex].isScalarRegister();
554 else if (num_dest_operands == 1)
555 return LdInstBase<typename MemDataType::CType,
556 typename DestDataType::OperandType,
557 AddrOperandType>::dest.isScalarRegister();
558 return false;
559 }
560 bool isSrcOperand(int operandIndex) override
561 {
562 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
563 if ((num_dest_operands != getNumOperands()) &&
564 (operandIndex == (getNumOperands()-1)))
565 return(this->addr.isVectorRegister());
566 return false;
567 }
568 bool isDstOperand(int operandIndex) override
569 {
570 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
571 if ((num_dest_operands != getNumOperands()) &&
572 (operandIndex == (getNumOperands()-1)))
573 return false;
574 return true;
575 }
576 int getOperandSize(int operandIndex) override
577 {
578 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
579 if ((num_dest_operands != getNumOperands()) &&
580 (operandIndex == (getNumOperands()-1)))
581 return(this->addr.opSize());
582 if (num_dest_operands > 1)
583 return(dest_vect[operandIndex].opSize());
584 else if (num_dest_operands == 1)
585 return(LdInstBase<typename MemDataType::CType,
586 typename DestDataType::OperandType,
587 AddrOperandType>::dest.opSize());
588 return 0;
589 }
590 int getRegisterIndex(int operandIndex) override
591 {
592 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
593 if ((num_dest_operands != getNumOperands()) &&
594 (operandIndex == (getNumOperands()-1)))
595 return(this->addr.regIndex());
596 if (num_dest_operands > 1)
597 return(dest_vect[operandIndex].regIndex());
598 else if (num_dest_operands == 1)
599 return(LdInstBase<typename MemDataType::CType,
600 typename DestDataType::OperandType,
601 AddrOperandType>::dest.regIndex());
602 return -1;
603 }
604 int getNumOperands() override
605 {
606 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
607 return(num_dest_operands+1);
608 else
609 return(num_dest_operands);
610 }
611 void execute(GPUDynInstPtr gpuDynInst) override;
612 };
613
614 template<typename MemDT, typename DestDT>
615 GPUStaticInst*
616 decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
617 {
618 unsigned op_offs = obj->getOperandPtr(ib->operands,1);
619 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
620
621 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
622 return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
623 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
624 tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
625 switch (tmp.regKind) {
626 case Brig::BRIG_REGISTER_KIND_SINGLE:
627 return new LdInst<MemDT, DestDT,
628 SRegAddrOperand>(ib, obj, "ld");
629 case Brig::BRIG_REGISTER_KIND_DOUBLE:
630 return new LdInst<MemDT, DestDT,
631 DRegAddrOperand>(ib, obj, "ld");
632 default:
633 fatal("Bad ld register operand type %d\n", tmp.regKind);
634 }
635 } else {
636 fatal("Bad ld register operand kind %d\n", tmp.kind);
637 }
638 }
639
640 template<typename MemDT>
641 GPUStaticInst*
642 decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
643 {
644 unsigned op_offs = obj->getOperandPtr(ib->operands,0);
645 BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
646
647 assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
648 dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
649 switch(dest.regKind) {
650 case Brig::BRIG_REGISTER_KIND_SINGLE:
651 switch (ib->type) {
652 case Brig::BRIG_TYPE_B8:
653 case Brig::BRIG_TYPE_B16:
654 case Brig::BRIG_TYPE_B32:
655 return decodeLd2<MemDT, B32>(ib, obj);
656 case Brig::BRIG_TYPE_U8:
657 case Brig::BRIG_TYPE_U16:
658 case Brig::BRIG_TYPE_U32:
659 return decodeLd2<MemDT, U32>(ib, obj);
660 case Brig::BRIG_TYPE_S8:
661 case Brig::BRIG_TYPE_S16:
662 case Brig::BRIG_TYPE_S32:
663 return decodeLd2<MemDT, S32>(ib, obj);
664 case Brig::BRIG_TYPE_F16:
665 case Brig::BRIG_TYPE_F32:
666 return decodeLd2<MemDT, U32>(ib, obj);
667 default:
668 fatal("Bad ld register operand type %d, %d\n",
669 dest.regKind, ib->type);
670 };
671 case Brig::BRIG_REGISTER_KIND_DOUBLE:
672 switch (ib->type) {
673 case Brig::BRIG_TYPE_B64:
674 return decodeLd2<MemDT, B64>(ib, obj);
675 case Brig::BRIG_TYPE_U64:
676 return decodeLd2<MemDT, U64>(ib, obj);
677 case Brig::BRIG_TYPE_S64:
678 return decodeLd2<MemDT, S64>(ib, obj);
679 case Brig::BRIG_TYPE_F64:
680 return decodeLd2<MemDT, U64>(ib, obj);
681 default:
682 fatal("Bad ld register operand type %d, %d\n",
683 dest.regKind, ib->type);
684 };
685 default:
686 fatal("Bad ld register operand type %d, %d\n", dest.regKind,
687 ib->type);
688 }
689 }
690
691 template<typename MemDataType, typename SrcOperandType,
692 typename AddrOperandType>
693 class StInstBase : public HsailGPUStaticInst
694 {
695 public:
696 typename SrcOperandType::SrcOperand src;
697 AddrOperandType addr;
698
699 Brig::BrigSegment segment;
700 Brig::BrigMemoryScope memoryScope;
701 Brig::BrigMemoryOrder memoryOrder;
702 unsigned int equivClass;
703
704 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
705 const char *_opcode)
706 : HsailGPUStaticInst(obj, _opcode)
707 {
708 using namespace Brig;
709
710 setFlag(MemoryRef);
711 setFlag(Store);
712
713 if (ib->opcode == BRIG_OPCODE_ST) {
714 const BrigInstMem *ldst = (const BrigInstMem*)ib;
715
716 segment = (BrigSegment)ldst->segment;
717 memoryOrder = BRIG_MEMORY_ORDER_NONE;
718 memoryScope = BRIG_MEMORY_SCOPE_NONE;
719 equivClass = ldst->equivClass;
720
721 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
722 const BrigOperand *baseOp = obj->getOperand(op_offs);
723
724 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
725 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
726 src.init(op_offs, obj);
727 }
728
729 op_offs = obj->getOperandPtr(ib->operands, 1);
730 addr.init(op_offs, obj);
731 } else {
732 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
733
734 segment = (BrigSegment)at->segment;
735 memoryScope = (BrigMemoryScope)at->memoryScope;
736 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
737 equivClass = 0;
738
739 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
740 addr.init(op_offs, obj);
741
742 op_offs = obj->getOperandPtr(ib->operands, 1);
743 src.init(op_offs, obj);
744 }
745
746 switch (memoryOrder) {
747 case BRIG_MEMORY_ORDER_NONE:
748 setFlag(NoOrder);
749 break;
750 case BRIG_MEMORY_ORDER_RELAXED:
751 setFlag(RelaxedOrder);
752 break;
753 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
754 setFlag(Acquire);
755 break;
756 case BRIG_MEMORY_ORDER_SC_RELEASE:
757 setFlag(Release);
758 break;
759 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
760 setFlag(AcquireRelease);
761 break;
762 default:
763 fatal("StInst has bad memory order type\n");
764 }
765
766 switch (memoryScope) {
767 case BRIG_MEMORY_SCOPE_NONE:
768 setFlag(NoScope);
769 break;
770 case BRIG_MEMORY_SCOPE_WORKITEM:
771 setFlag(WorkitemScope);
772 break;
773 case BRIG_MEMORY_SCOPE_WORKGROUP:
774 setFlag(WorkgroupScope);
775 break;
776 case BRIG_MEMORY_SCOPE_AGENT:
777 setFlag(DeviceScope);
778 break;
779 case BRIG_MEMORY_SCOPE_SYSTEM:
780 setFlag(SystemScope);
781 break;
782 default:
783 fatal("StInst has bad memory scope type\n");
784 }
785
786 switch (segment) {
787 case BRIG_SEGMENT_GLOBAL:
788 setFlag(GlobalSegment);
789 break;
790 case BRIG_SEGMENT_GROUP:
791 setFlag(GroupSegment);
792 break;
793 case BRIG_SEGMENT_PRIVATE:
794 setFlag(PrivateSegment);
795 break;
796 case BRIG_SEGMENT_READONLY:
797 setFlag(ReadOnlySegment);
798 break;
799 case BRIG_SEGMENT_SPILL:
800 setFlag(SpillSegment);
801 break;
802 case BRIG_SEGMENT_FLAT:
803 setFlag(Flat);
804 break;
805 case BRIG_SEGMENT_ARG:
806 setFlag(ArgSegment);
807 break;
808 default:
809 panic("St: segment %d not supported\n", segment);
810 }
811 }
812
813 int numDstRegOperands() override { return 0; }
814 int numSrcRegOperands() override
815 {
816 return src.isVectorRegister() + this->addr.isVectorRegister();
817 }
818 int getNumOperands() override
819 {
820 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
821 return 2;
822 else
823 return 1;
824 }
825 bool isVectorRegister(int operandIndex) override
826 {
827 assert(operandIndex >= 0 && operandIndex < getNumOperands());
828 return !operandIndex ? src.isVectorRegister() :
829 this->addr.isVectorRegister();
830 }
831 bool isCondRegister(int operandIndex) override
832 {
833 assert(operandIndex >= 0 && operandIndex < getNumOperands());
834 return !operandIndex ? src.isCondRegister() :
835 this->addr.isCondRegister();
836 }
837 bool isScalarRegister(int operandIndex) override
838 {
839 assert(operandIndex >= 0 && operandIndex < getNumOperands());
840 return !operandIndex ? src.isScalarRegister() :
841 this->addr.isScalarRegister();
842 }
843 bool isSrcOperand(int operandIndex) override
844 {
845 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
846 return true;
847 }
848 bool isDstOperand(int operandIndex) override { return false; }
849 int getOperandSize(int operandIndex) override
850 {
851 assert(operandIndex >= 0 && operandIndex < getNumOperands());
852 return !operandIndex ? src.opSize() : this->addr.opSize();
853 }
854 int getRegisterIndex(int operandIndex) override
855 {
856 assert(operandIndex >= 0 && operandIndex < getNumOperands());
857 return !operandIndex ? src.regIndex() : this->addr.regIndex();
858 }
859 };
860
861
862 template<typename MemDataType, typename SrcDataType,
863 typename AddrOperandType>
864 class StInst :
865 public StInstBase<MemDataType, typename SrcDataType::OperandType,
866 AddrOperandType>,
867 public MemInst
868 {
869 public:
870 typename SrcDataType::OperandType::SrcOperand src_vect[4];
871 uint16_t num_src_operands;
872 void generateDisassembly() override;
873
874 StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
875 const char *_opcode, int srcIdx)
876 : StInstBase<MemDataType, typename SrcDataType::OperandType,
877 AddrOperandType>(ib, obj, _opcode),
878 MemInst(SrcDataType::memType)
879 {
880 init_addr(&this->addr);
881
882 BrigRegOperandInfo rinfo;
883 unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
884 const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
885
886 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
887 const Brig::BrigOperandConstantBytes *op =
888 (Brig::BrigOperandConstantBytes*)baseOp;
889
890 rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
891 Brig::BRIG_TYPE_NONE);
892 } else {
893 rinfo = findRegDataType(op_offs, obj);
894 }
895
896 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
897 const Brig::BrigOperandOperandList *brigRegVecOp =
898 (const Brig::BrigOperandOperandList*)baseOp;
899
900 num_src_operands =
901 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
902
903 assert(num_src_operands <= 4);
904 } else {
905 num_src_operands = 1;
906 }
907
908 if (num_src_operands > 1) {
909 assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
910
911 for (int i = 0; i < num_src_operands; ++i) {
912 src_vect[i].init_from_vect(op_offs, obj, i);
913 }
914 }
915 }
916
917 void
918 initiateAcc(GPUDynInstPtr gpuDynInst) override
919 {
920 // before performing a store, check if this store has
921 // release semantics, and if so issue a release first
922 if (!this->isLocalMem()) {
923 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
924 && gpuDynInst->isRelease()) {
925
926 gpuDynInst->statusBitVector = VectorMask(1);
927 gpuDynInst->execContinuation = &GPUStaticInst::execSt;
928 gpuDynInst->useContinuation = true;
929 // create request
930 Request *req = new Request(0, 0, 0, 0,
931 gpuDynInst->computeUnit()->masterId(),
932 0, gpuDynInst->wfDynId);
933 req->setFlags(Request::RELEASE);
934 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
935
936 return;
937 }
938 }
939
940 // if there is no release semantic, perform stores immediately
941 execSt(gpuDynInst);
942 }
943
944 private:
945 // execSt may be called through a continuation
946 // if the store had release semantics. see comment for
947 // execSt in gpu_static_inst.hh
948 void
949 execSt(GPUDynInstPtr gpuDynInst) override
950 {
951 typedef typename MemDataType::CType c0;
952
953 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
954
955 if (num_src_operands > 1) {
956 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
957 if (gpuDynInst->exec_mask[i])
958 gpuDynInst->statusVector.push_back(num_src_operands);
959 else
960 gpuDynInst->statusVector.push_back(0);
961 }
962
963 for (int k = 0; k < num_src_operands; ++k) {
964 c0 *d = &((c0*)gpuDynInst->d_data)
965 [k * gpuDynInst->computeUnit()->wfSize()];
966
967 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
968 if (gpuDynInst->exec_mask[i]) {
969 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
970
971 if (this->isLocalMem()) {
972 //store to shared memory
973 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
974 *d);
975 } else {
976 Request *req =
977 new Request(0, vaddr, sizeof(c0), 0,
978 gpuDynInst->computeUnit()->masterId(),
979 0, gpuDynInst->wfDynId);
980
981 gpuDynInst->setRequestFlags(req);
982 PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
983 pkt->dataStatic<c0>(d);
984
985 // translation is performed in sendRequest()
986 // the request will be finished when the store completes
987 gpuDynInst->useContinuation = false;
988 gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
989 i, pkt);
990
991 }
992 }
993 ++d;
994 }
995 }
996
997 gpuDynInst->updateStats();
998 }
999
1000 public:
1001 bool isVectorRegister(int operandIndex) override
1002 {
1003 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1004 if (operandIndex == num_src_operands)
1005 return this->addr.isVectorRegister();
1006 if (num_src_operands > 1)
1007 return src_vect[operandIndex].isVectorRegister();
1008 else if (num_src_operands == 1)
1009 return StInstBase<MemDataType,
1010 typename SrcDataType::OperandType,
1011 AddrOperandType>::src.isVectorRegister();
1012 return false;
1013 }
1014 bool isCondRegister(int operandIndex) override
1015 {
1016 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1017 if (operandIndex == num_src_operands)
1018 return this->addr.isCondRegister();
1019 if (num_src_operands > 1)
1020 return src_vect[operandIndex].isCondRegister();
1021 else if (num_src_operands == 1)
1022 return StInstBase<MemDataType,
1023 typename SrcDataType::OperandType,
1024 AddrOperandType>::src.isCondRegister();
1025 return false;
1026 }
1027 bool isScalarRegister(int operandIndex) override
1028 {
1029 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1030 if (operandIndex == num_src_operands)
1031 return this->addr.isScalarRegister();
1032 if (num_src_operands > 1)
1033 return src_vect[operandIndex].isScalarRegister();
1034 else if (num_src_operands == 1)
1035 return StInstBase<MemDataType,
1036 typename SrcDataType::OperandType,
1037 AddrOperandType>::src.isScalarRegister();
1038 return false;
1039 }
1040 bool isSrcOperand(int operandIndex) override
1041 {
1042 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1043 return true;
1044 }
1045 bool isDstOperand(int operandIndex) override { return false; }
1046 int getOperandSize(int operandIndex) override
1047 {
1048 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1049 if (operandIndex == num_src_operands)
1050 return this->addr.opSize();
1051 if (num_src_operands > 1)
1052 return src_vect[operandIndex].opSize();
1053 else if (num_src_operands == 1)
1054 return StInstBase<MemDataType,
1055 typename SrcDataType::OperandType,
1056 AddrOperandType>::src.opSize();
1057 return 0;
1058 }
1059 int getRegisterIndex(int operandIndex) override
1060 {
1061 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1062 if (operandIndex == num_src_operands)
1063 return this->addr.regIndex();
1064 if (num_src_operands > 1)
1065 return src_vect[operandIndex].regIndex();
1066 else if (num_src_operands == 1)
1067 return StInstBase<MemDataType,
1068 typename SrcDataType::OperandType,
1069 AddrOperandType>::src.regIndex();
1070 return -1;
1071 }
1072 int getNumOperands() override
1073 {
1074 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
1075 return num_src_operands + 1;
1076 else
1077 return num_src_operands;
1078 }
1079 void execute(GPUDynInstPtr gpuDynInst) override;
1080 };
1081
1082 template<typename DataType, typename SrcDataType>
1083 GPUStaticInst*
1084 decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
1085 {
1086 int srcIdx = 0;
1087 int destIdx = 1;
1088 if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
1089 ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
1090 srcIdx = 1;
1091 destIdx = 0;
1092 }
1093 unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
1094
1095 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1096
1097 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1098 return new StInst<DataType, SrcDataType,
1099 NoRegAddrOperand>(ib, obj, "st", srcIdx);
1100 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1101 // V2/V4 not allowed
1102 switch (tmp.regKind) {
1103 case Brig::BRIG_REGISTER_KIND_SINGLE:
1104 return new StInst<DataType, SrcDataType,
1105 SRegAddrOperand>(ib, obj, "st", srcIdx);
1106 case Brig::BRIG_REGISTER_KIND_DOUBLE:
1107 return new StInst<DataType, SrcDataType,
1108 DRegAddrOperand>(ib, obj, "st", srcIdx);
1109 default:
1110 fatal("Bad st register operand type %d\n", tmp.type);
1111 }
1112 } else {
1113 fatal("Bad st register operand kind %d\n", tmp.kind);
1114 }
1115 }
1116
1117 template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1118 bool HasDst>
1119 class AtomicInstBase : public HsailGPUStaticInst
1120 {
1121 public:
1122 typename OperandType::DestOperand dest;
1123 typename OperandType::SrcOperand src[NumSrcOperands];
1124 AddrOperandType addr;
1125
1126 Brig::BrigSegment segment;
1127 Brig::BrigMemoryOrder memoryOrder;
1128 Brig::BrigAtomicOperation atomicOperation;
1129 Brig::BrigMemoryScope memoryScope;
1130 Brig::BrigOpcode opcode;
1131
1132 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
1133 const char *_opcode)
1134 : HsailGPUStaticInst(obj, _opcode)
1135 {
1136 using namespace Brig;
1137
1138 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1139
1140 segment = (BrigSegment)at->segment;
1141 memoryScope = (BrigMemoryScope)at->memoryScope;
1142 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1143 atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1144 opcode = (BrigOpcode)ib->opcode;
1145
1146 assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
1147 opcode == Brig::BRIG_OPCODE_ATOMIC);
1148
1149 setFlag(MemoryRef);
1150
1151 if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
1152 setFlag(AtomicReturn);
1153 } else {
1154 setFlag(AtomicNoReturn);
1155 }
1156
1157 switch (memoryOrder) {
1158 case BRIG_MEMORY_ORDER_NONE:
1159 setFlag(NoOrder);
1160 break;
1161 case BRIG_MEMORY_ORDER_RELAXED:
1162 setFlag(RelaxedOrder);
1163 break;
1164 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
1165 setFlag(Acquire);
1166 break;
1167 case BRIG_MEMORY_ORDER_SC_RELEASE:
1168 setFlag(Release);
1169 break;
1170 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
1171 setFlag(AcquireRelease);
1172 break;
1173 default:
1174 fatal("AtomicInst has bad memory order type\n");
1175 }
1176
1177 switch (memoryScope) {
1178 case BRIG_MEMORY_SCOPE_NONE:
1179 setFlag(NoScope);
1180 break;
1181 case BRIG_MEMORY_SCOPE_WORKITEM:
1182 setFlag(WorkitemScope);
1183 break;
1184 case BRIG_MEMORY_SCOPE_WORKGROUP:
1185 setFlag(WorkgroupScope);
1186 break;
1187 case BRIG_MEMORY_SCOPE_AGENT:
1188 setFlag(DeviceScope);
1189 break;
1190 case BRIG_MEMORY_SCOPE_SYSTEM:
1191 setFlag(SystemScope);
1192 break;
1193 default:
1194 fatal("AtomicInst has bad memory scope type\n");
1195 }
1196
1197 switch (atomicOperation) {
1198 case Brig::BRIG_ATOMIC_AND:
1199 setFlag(AtomicAnd);
1200 break;
1201 case Brig::BRIG_ATOMIC_OR:
1202 setFlag(AtomicOr);
1203 break;
1204 case Brig::BRIG_ATOMIC_XOR:
1205 setFlag(AtomicXor);
1206 break;
1207 case Brig::BRIG_ATOMIC_CAS:
1208 setFlag(AtomicCAS);
1209 break;
1210 case Brig::BRIG_ATOMIC_EXCH:
1211 setFlag(AtomicExch);
1212 break;
1213 case Brig::BRIG_ATOMIC_ADD:
1214 setFlag(AtomicAdd);
1215 break;
1216 case Brig::BRIG_ATOMIC_WRAPINC:
1217 setFlag(AtomicInc);
1218 break;
1219 case Brig::BRIG_ATOMIC_WRAPDEC:
1220 setFlag(AtomicDec);
1221 break;
1222 case Brig::BRIG_ATOMIC_MIN:
1223 setFlag(AtomicMin);
1224 break;
1225 case Brig::BRIG_ATOMIC_MAX:
1226 setFlag(AtomicMax);
1227 break;
1228 case Brig::BRIG_ATOMIC_SUB:
1229 setFlag(AtomicSub);
1230 break;
1231 default:
1232 fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
1233 }
1234
1235 switch (segment) {
1236 case BRIG_SEGMENT_GLOBAL:
1237 setFlag(GlobalSegment);
1238 break;
1239 case BRIG_SEGMENT_GROUP:
1240 setFlag(GroupSegment);
1241 break;
1242 case BRIG_SEGMENT_FLAT:
1243 setFlag(Flat);
1244 break;
1245 default:
1246 panic("Atomic: segment %d not supported\n", segment);
1247 }
1248
1249 if (HasDst) {
1250 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1251 dest.init(op_offs, obj);
1252
1253 op_offs = obj->getOperandPtr(ib->operands, 1);
1254 addr.init(op_offs, obj);
1255
1256 for (int i = 0; i < NumSrcOperands; ++i) {
1257 op_offs = obj->getOperandPtr(ib->operands, i + 2);
1258 src[i].init(op_offs, obj);
1259 }
1260 } else {
1261
1262 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1263 addr.init(op_offs, obj);
1264
1265 for (int i = 0; i < NumSrcOperands; ++i) {
1266 op_offs = obj->getOperandPtr(ib->operands, i + 1);
1267 src[i].init(op_offs, obj);
1268 }
1269 }
1270 }
1271
1272 int numSrcRegOperands()
1273 {
1274 int operands = 0;
1275 for (int i = 0; i < NumSrcOperands; i++) {
1276 if (src[i].isVectorRegister()) {
1277 operands++;
1278 }
1279 }
1280 if (addr.isVectorRegister())
1281 operands++;
1282 return operands;
1283 }
1284 int numDstRegOperands() { return dest.isVectorRegister(); }
1285 int getNumOperands()
1286 {
1287 if (addr.isVectorRegister())
1288 return(NumSrcOperands + 2);
1289 return(NumSrcOperands + 1);
1290 }
1291 bool isVectorRegister(int operandIndex)
1292 {
1293 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1294 if (operandIndex < NumSrcOperands)
1295 return src[operandIndex].isVectorRegister();
1296 else if (operandIndex == NumSrcOperands)
1297 return(addr.isVectorRegister());
1298 else
1299 return dest.isVectorRegister();
1300 }
1301 bool isCondRegister(int operandIndex)
1302 {
1303 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1304 if (operandIndex < NumSrcOperands)
1305 return src[operandIndex].isCondRegister();
1306 else if (operandIndex == NumSrcOperands)
1307 return(addr.isCondRegister());
1308 else
1309 return dest.isCondRegister();
1310 }
1311 bool isScalarRegister(int operandIndex)
1312 {
1313 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1314 if (operandIndex < NumSrcOperands)
1315 return src[operandIndex].isScalarRegister();
1316 else if (operandIndex == NumSrcOperands)
1317 return(addr.isScalarRegister());
1318 else
1319 return dest.isScalarRegister();
1320 }
1321 bool isSrcOperand(int operandIndex)
1322 {
1323 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1324 if (operandIndex < NumSrcOperands)
1325 return true;
1326 else if (operandIndex == NumSrcOperands)
1327 return(addr.isVectorRegister());
1328 else
1329 return false;
1330 }
1331 bool isDstOperand(int operandIndex)
1332 {
1333 if (operandIndex <= NumSrcOperands)
1334 return false;
1335 else
1336 return true;
1337 }
1338 int getOperandSize(int operandIndex)
1339 {
1340 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1341 if (operandIndex < NumSrcOperands)
1342 return(src[operandIndex].opSize());
1343 else if (operandIndex == NumSrcOperands)
1344 return(addr.opSize());
1345 else
1346 return(dest.opSize());
1347 }
1348 int getRegisterIndex(int operandIndex)
1349 {
1350 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1351 if (operandIndex < NumSrcOperands)
1352 return(src[operandIndex].regIndex());
1353 else if (operandIndex == NumSrcOperands)
1354 return(addr.regIndex());
1355 else
1356 return(dest.regIndex());
1357 return -1;
1358 }
1359 };
1360
1361 template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
1362 bool HasDst>
1363 class AtomicInst :
1364 public AtomicInstBase<typename MemDataType::OperandType,
1365 AddrOperandType, NumSrcOperands, HasDst>,
1366 public MemInst
1367 {
1368 public:
1369 void generateDisassembly() override;
1370
1371 AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
1372 const char *_opcode)
1373 : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
1374 NumSrcOperands, HasDst>
1375 (ib, obj, _opcode),
1376 MemInst(MemDataType::memType)
1377 {
1378 init_addr(&this->addr);
1379 }
1380
1381 void
1382 initiateAcc(GPUDynInstPtr gpuDynInst) override
1383 {
1384 // before doing the RMW, check if this atomic has
1385 // release semantics, and if so issue a release first
1386 if (!this->isLocalMem()) {
1387 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1388 && (gpuDynInst->isRelease()
1389 || gpuDynInst->isAcquireRelease())) {
1390
1391 gpuDynInst->statusBitVector = VectorMask(1);
1392
1393 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1394 gpuDynInst->useContinuation = true;
1395
1396 // create request
1397 Request *req = new Request(0, 0, 0, 0,
1398 gpuDynInst->computeUnit()->masterId(),
1399 0, gpuDynInst->wfDynId);
1400 req->setFlags(Request::RELEASE);
1401 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1402
1403 return;
1404 }
1405 }
1406
1407 // if there is no release semantic, execute the RMW immediately
1408 execAtomic(gpuDynInst);
1409
1410 }
1411
1412 void execute(GPUDynInstPtr gpuDynInst) override;
1413
1414 private:
1415 // execAtomic may be called through a continuation
1416 // if the RMW had release semantics. see comment for
1417 // execContinuation in gpu_dyn_inst.hh
1418 void
1419 execAtomic(GPUDynInstPtr gpuDynInst) override
1420 {
1421 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1422
1423 typedef typename MemDataType::CType c0;
1424
1425 c0 *d = &((c0*) gpuDynInst->d_data)[0];
1426 c0 *e = &((c0*) gpuDynInst->a_data)[0];
1427 c0 *f = &((c0*) gpuDynInst->x_data)[0];
1428
1429 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1430 if (gpuDynInst->exec_mask[i]) {
1431 Addr vaddr = gpuDynInst->addr[i];
1432
1433 if (this->isLocalMem()) {
1434 Wavefront *wavefront = gpuDynInst->wavefront();
1435 *d = wavefront->ldsChunk->read<c0>(vaddr);
1436
1437 if (this->isAtomicAdd()) {
1438 wavefront->ldsChunk->write<c0>(vaddr,
1439 wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1440 } else if (this->isAtomicSub()) {
1441 wavefront->ldsChunk->write<c0>(vaddr,
1442 wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1443 } else if (this->isAtomicMax()) {
1444 wavefront->ldsChunk->write<c0>(vaddr,
1445 std::max(wavefront->ldsChunk->read<c0>(vaddr),
1446 (*e)));
1447 } else if (this->isAtomicMin()) {
1448 wavefront->ldsChunk->write<c0>(vaddr,
1449 std::min(wavefront->ldsChunk->read<c0>(vaddr),
1450 (*e)));
1451 } else if (this->isAtomicAnd()) {
1452 wavefront->ldsChunk->write<c0>(vaddr,
1453 wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1454 } else if (this->isAtomicOr()) {
1455 wavefront->ldsChunk->write<c0>(vaddr,
1456 wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1457 } else if (this->isAtomicXor()) {
1458 wavefront->ldsChunk->write<c0>(vaddr,
1459 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1460 } else if (this->isAtomicInc()) {
1461 wavefront->ldsChunk->write<c0>(vaddr,
1462 wavefront->ldsChunk->read<c0>(vaddr) + 1);
1463 } else if (this->isAtomicDec()) {
1464 wavefront->ldsChunk->write<c0>(vaddr,
1465 wavefront->ldsChunk->read<c0>(vaddr) - 1);
1466 } else if (this->isAtomicExch()) {
1467 wavefront->ldsChunk->write<c0>(vaddr, (*e));
1468 } else if (this->isAtomicCAS()) {
1469 wavefront->ldsChunk->write<c0>(vaddr,
1470 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1471 (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1472 } else {
1473 fatal("Unrecognized or invalid HSAIL atomic op "
1474 "type.\n");
1475 }
1476 } else {
1477 Request *req =
1478 new Request(0, vaddr, sizeof(c0), 0,
1479 gpuDynInst->computeUnit()->masterId(),
1480 0, gpuDynInst->wfDynId,
1481 gpuDynInst->makeAtomicOpFunctor<c0>(e,
1482 f));
1483
1484 gpuDynInst->setRequestFlags(req);
1485 PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1486 pkt->dataStatic(d);
1487
1488 if (gpuDynInst->computeUnit()->shader->
1489 separate_acquire_release &&
1490 (gpuDynInst->isAcquire())) {
1491 // if this atomic has acquire semantics,
1492 // schedule the continuation to perform an
1493 // acquire after the RMW completes
1494 gpuDynInst->execContinuation =
1495 &GPUStaticInst::execAtomicAcq;
1496
1497 gpuDynInst->useContinuation = true;
1498 } else {
1499 // the request will be finished when the RMW completes
1500 gpuDynInst->useContinuation = false;
1501 }
1502 // translation is performed in sendRequest()
1503 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
1504 pkt);
1505 }
1506 }
1507
1508 ++d;
1509 ++e;
1510 ++f;
1511 }
1512
1513 gpuDynInst->updateStats();
1514 }
1515
1516 // execAtomicACq will always be called through a continuation.
1517 // see comment for execContinuation in gpu_dyn_inst.hh
1518 void
1519 execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1520 {
1521 // after performing the RMW, check to see if this instruction
1522 // has acquire semantics, and if so, issue an acquire
1523 if (!this->isLocalMem()) {
1524 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1525 && gpuDynInst->isAcquire()) {
1526 gpuDynInst->statusBitVector = VectorMask(1);
1527
1528 // the request will be finished when
1529 // the acquire completes
1530 gpuDynInst->useContinuation = false;
1531 // create request
1532 Request *req = new Request(0, 0, 0, 0,
1533 gpuDynInst->computeUnit()->masterId(),
1534 0, gpuDynInst->wfDynId);
1535 req->setFlags(Request::ACQUIRE);
1536 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1537 }
1538 }
1539 }
1540 };
1541
1542 template<typename DataType, typename AddrOperandType, int NumSrcOperands>
1543 GPUStaticInst*
1544 constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1545 {
1546 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1547
1548 if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
1549 return decodeLd<DataType>(ib, obj);
1550 } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
1551 switch (ib->type) {
1552 case Brig::BRIG_TYPE_B8:
1553 return decodeSt<S8,S8>(ib, obj);
1554 case Brig::BRIG_TYPE_B16:
1555 return decodeSt<S16,S16>(ib, obj);
1556 case Brig::BRIG_TYPE_B32:
1557 return decodeSt<S32,S32>(ib, obj);
1558 case Brig::BRIG_TYPE_B64:
1559 return decodeSt<S64,S64>(ib, obj);
1560 default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
1561 }
1562 } else {
1563 if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
1564 return new AtomicInst<DataType, AddrOperandType,
1565 NumSrcOperands, false>(ib, obj, "atomicnoret");
1566 else
1567 return new AtomicInst<DataType, AddrOperandType,
1568 NumSrcOperands, true>(ib, obj, "atomic");
1569 }
1570 }
1571
1572 template<typename DataType, int NumSrcOperands>
1573 GPUStaticInst*
1574 decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
1575 {
1576 unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
1577 Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
1578
1579 unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
1580
1581 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1582
1583 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1584 return constructAtomic<DataType, NoRegAddrOperand,
1585 NumSrcOperands>(ib, obj);
1586 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1587 // V2/V4 not allowed
1588 switch (tmp.regKind) {
1589 case Brig::BRIG_REGISTER_KIND_SINGLE:
1590 return constructAtomic<DataType, SRegAddrOperand,
1591 NumSrcOperands>(ib, obj);
1592 case Brig::BRIG_REGISTER_KIND_DOUBLE:
1593 return constructAtomic<DataType, DRegAddrOperand,
1594 NumSrcOperands>(ib, obj);
1595 default:
1596 fatal("Bad atomic register operand type %d\n", tmp.type);
1597 }
1598 } else {
1599 fatal("Bad atomic register operand kind %d\n", tmp.kind);
1600 }
1601 }
1602
1603
1604 template<typename DataType>
1605 GPUStaticInst*
1606 decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1607 {
1608 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1609
1610 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1611 return decodeAtomicHelper<DataType, 2>(ib, obj);
1612 } else {
1613 return decodeAtomicHelper<DataType, 1>(ib, obj);
1614 }
1615 }
1616
1617 template<typename DataType>
1618 GPUStaticInst*
1619 decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
1620 {
1621 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1622 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1623 return decodeAtomicHelper<DataType, 2>(ib, obj);
1624 } else {
1625 return decodeAtomicHelper<DataType, 1>(ib, obj);
1626 }
1627 }
1628} // namespace HsailISA
1629
1630#endif // __ARCH_HSAIL_INSTS_MEM_HH__