mem.hh (11692:e772fdcd3809) mem.hh (11693:bc1f702c25b9)
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_MEM_HH__
37#define __ARCH_HSAIL_INSTS_MEM_HH__
38
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_MEM_HH__
37#define __ARCH_HSAIL_INSTS_MEM_HH__
38
39#include <type_traits>
40
39#include "arch/hsail/insts/decl.hh"
40#include "arch/hsail/insts/gpu_static_inst.hh"
41#include "arch/hsail/operand.hh"
41#include "arch/hsail/insts/decl.hh"
42#include "arch/hsail/insts/gpu_static_inst.hh"
43#include "arch/hsail/operand.hh"
44#include "gpu-compute/compute_unit.hh"
42
43namespace HsailISA
44{
45 class MemInst
46 {
47 public:
48 MemInst() : size(0), addr_operand(nullptr) { }
49
50 MemInst(Enums::MemType m_type)
51 {
52 if (m_type == Enums::M_U64 ||
53 m_type == Enums::M_S64 ||
54 m_type == Enums::M_F64) {
55 size = 8;
56 } else if (m_type == Enums::M_U32 ||
57 m_type == Enums::M_S32 ||
58 m_type == Enums::M_F32) {
59 size = 4;
60 } else if (m_type == Enums::M_U16 ||
61 m_type == Enums::M_S16 ||
62 m_type == Enums::M_F16) {
63 size = 2;
64 } else {
65 size = 1;
66 }
67
68 addr_operand = nullptr;
69 }
70
71 void
72 init_addr(AddrOperandBase *_addr_operand)
73 {
74 addr_operand = _addr_operand;
75 }
76
77 private:
78 int size;
79 AddrOperandBase *addr_operand;
80
81 public:
82 int getMemOperandSize() { return size; }
83 AddrOperandBase *getAddressOperand() { return addr_operand; }
84 };
85
86 template<typename DestOperandType, typename AddrOperandType>
87 class LdaInstBase : public HsailGPUStaticInst
88 {
89 public:
90 typename DestOperandType::DestOperand dest;
91 AddrOperandType addr;
92
93 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
94 const char *_opcode)
95 : HsailGPUStaticInst(obj, _opcode)
96 {
97 using namespace Brig;
98
99 setFlag(ALU);
100
101 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
102 dest.init(op_offs, obj);
103 op_offs = obj->getOperandPtr(ib->operands, 1);
104 addr.init(op_offs, obj);
105 }
106
107 int numSrcRegOperands() override
108 { return(this->addr.isVectorRegister()); }
109 int numDstRegOperands() override
110 { return dest.isVectorRegister(); }
111 bool isVectorRegister(int operandIndex) override
112 {
113 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
114 return((operandIndex == 0) ? dest.isVectorRegister() :
115 this->addr.isVectorRegister());
116 }
117 bool isCondRegister(int operandIndex) override
118 {
119 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
120 return((operandIndex == 0) ? dest.isCondRegister() :
121 this->addr.isCondRegister());
122 }
123 bool isScalarRegister(int operandIndex) override
124 {
125 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
126 return((operandIndex == 0) ? dest.isScalarRegister() :
127 this->addr.isScalarRegister());
128 }
129 bool isSrcOperand(int operandIndex) override
130 {
131 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
132 if (operandIndex > 0)
133 return(this->addr.isVectorRegister());
134 return false;
135 }
136 bool isDstOperand(int operandIndex) override {
137 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
138 return(operandIndex == 0);
139 }
140 int getOperandSize(int operandIndex) override
141 {
142 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
143 return((operandIndex == 0) ? dest.opSize() :
144 this->addr.opSize());
145 }
146 int getRegisterIndex(int operandIndex) override
147 {
148 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
149 return((operandIndex == 0) ? dest.regIndex() :
150 this->addr.regIndex());
151 }
152 int getNumOperands() override
153 {
154 if (this->addr.isVectorRegister())
155 return 2;
156 return 1;
157 }
158 };
159
160 template<typename DestDataType, typename AddrOperandType>
161 class LdaInst :
162 public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
163 public MemInst
164 {
165 public:
166 void generateDisassembly();
167
168 LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
169 const char *_opcode)
170 : LdaInstBase<typename DestDataType::OperandType,
171 AddrOperandType>(ib, obj, _opcode)
172 {
173 init_addr(&this->addr);
174 }
175
176 void execute(GPUDynInstPtr gpuDynInst);
177 };
178
179 template<typename DataType>
180 GPUStaticInst*
181 decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
182 {
183 unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
184 BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
185
186 if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
187 return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
188 } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
189 // V2/V4 not allowed
190 switch (regDataType.regKind) {
191 case Brig::BRIG_REGISTER_KIND_SINGLE:
192 return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
193 case Brig::BRIG_REGISTER_KIND_DOUBLE:
194 return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
195 default:
196 fatal("Bad ldas register operand type %d\n", regDataType.type);
197 }
198 } else {
199 fatal("Bad ldas register operand kind %d\n", regDataType.kind);
200 }
201 }
202
203 template<typename MemOperandType, typename DestOperandType,
204 typename AddrOperandType>
205 class LdInstBase : public HsailGPUStaticInst
206 {
207 public:
208 Brig::BrigWidth8_t width;
209 typename DestOperandType::DestOperand dest;
210 AddrOperandType addr;
211
212 Brig::BrigSegment segment;
213 Brig::BrigMemoryOrder memoryOrder;
214 Brig::BrigMemoryScope memoryScope;
215 unsigned int equivClass;
216
217 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
218 const char *_opcode)
219 : HsailGPUStaticInst(obj, _opcode)
220 {
221 using namespace Brig;
222
223 setFlag(MemoryRef);
224 setFlag(Load);
225
226 if (ib->opcode == BRIG_OPCODE_LD) {
227 const BrigInstMem *ldst = (const BrigInstMem*)ib;
228
229 segment = (BrigSegment)ldst->segment;
230 memoryOrder = BRIG_MEMORY_ORDER_NONE;
231 memoryScope = BRIG_MEMORY_SCOPE_NONE;
232 equivClass = ldst->equivClass;
233
234 width = ldst->width;
235 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
236 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
237 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
238 dest.init(op_offs, obj);
239
240 op_offs = obj->getOperandPtr(ib->operands, 1);
241 addr.init(op_offs, obj);
242 } else {
243 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
244
245 segment = (BrigSegment)at->segment;
246 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
247 memoryScope = (BrigMemoryScope)at->memoryScope;
248 equivClass = 0;
249
250 width = BRIG_WIDTH_1;
251 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
252 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
253
254 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
255 dest.init(op_offs, obj);
256
257 op_offs = obj->getOperandPtr(ib->operands,1);
258 addr.init(op_offs, obj);
259 }
260
261 switch (memoryOrder) {
262 case BRIG_MEMORY_ORDER_NONE:
263 setFlag(NoOrder);
264 break;
265 case BRIG_MEMORY_ORDER_RELAXED:
266 setFlag(RelaxedOrder);
267 break;
268 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
269 setFlag(Acquire);
270 break;
271 case BRIG_MEMORY_ORDER_SC_RELEASE:
272 setFlag(Release);
273 break;
274 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
275 setFlag(AcquireRelease);
276 break;
277 default:
278 fatal("LdInst has bad memory order type\n");
279 }
280
281 switch (memoryScope) {
282 case BRIG_MEMORY_SCOPE_NONE:
283 setFlag(NoScope);
284 break;
285 case BRIG_MEMORY_SCOPE_WORKITEM:
286 setFlag(WorkitemScope);
287 break;
288 case BRIG_MEMORY_SCOPE_WORKGROUP:
289 setFlag(WorkgroupScope);
290 break;
291 case BRIG_MEMORY_SCOPE_AGENT:
292 setFlag(DeviceScope);
293 break;
294 case BRIG_MEMORY_SCOPE_SYSTEM:
295 setFlag(SystemScope);
296 break;
297 default:
298 fatal("LdInst has bad memory scope type\n");
299 }
300
301 switch (segment) {
302 case BRIG_SEGMENT_GLOBAL:
303 setFlag(GlobalSegment);
304 break;
305 case BRIG_SEGMENT_GROUP:
306 setFlag(GroupSegment);
307 break;
308 case BRIG_SEGMENT_PRIVATE:
309 setFlag(PrivateSegment);
310 break;
311 case BRIG_SEGMENT_READONLY:
312 setFlag(ReadOnlySegment);
313 break;
314 case BRIG_SEGMENT_SPILL:
315 setFlag(SpillSegment);
316 break;
317 case BRIG_SEGMENT_FLAT:
318 setFlag(Flat);
319 break;
320 case BRIG_SEGMENT_KERNARG:
321 setFlag(KernArgSegment);
322 break;
323 case BRIG_SEGMENT_ARG:
324 setFlag(ArgSegment);
325 break;
326 default:
327 panic("Ld: segment %d not supported\n", segment);
328 }
329 }
330
331 int numSrcRegOperands() override
332 { return(this->addr.isVectorRegister()); }
333 int numDstRegOperands() override { return dest.isVectorRegister(); }
334 int getNumOperands() override
335 {
336 if (this->addr.isVectorRegister())
337 return 2;
338 else
339 return 1;
340 }
341 bool isVectorRegister(int operandIndex) override
342 {
343 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
344 return((operandIndex == 0) ? dest.isVectorRegister() :
345 this->addr.isVectorRegister());
346 }
347 bool isCondRegister(int operandIndex) override
348 {
349 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
350 return((operandIndex == 0) ? dest.isCondRegister() :
351 this->addr.isCondRegister());
352 }
353 bool isScalarRegister(int operandIndex) override
354 {
355 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
356 return((operandIndex == 0) ? dest.isScalarRegister() :
357 this->addr.isScalarRegister());
358 }
359 bool isSrcOperand(int operandIndex) override
360 {
361 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
362 if (operandIndex > 0)
363 return(this->addr.isVectorRegister());
364 return false;
365 }
366 bool isDstOperand(int operandIndex) override
367 {
368 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
369 return(operandIndex == 0);
370 }
371 int getOperandSize(int operandIndex) override
372 {
373 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
374 return((operandIndex == 0) ? dest.opSize() :
375 this->addr.opSize());
376 }
377 int getRegisterIndex(int operandIndex) override
378 {
379 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
380 return((operandIndex == 0) ? dest.regIndex() :
381 this->addr.regIndex());
382 }
383 };
384
385 template<typename MemDataType, typename DestDataType,
386 typename AddrOperandType>
387 class LdInst :
388 public LdInstBase<typename MemDataType::CType,
389 typename DestDataType::OperandType, AddrOperandType>,
390 public MemInst
391 {
392 typename DestDataType::OperandType::DestOperand dest_vect[4];
393 uint16_t num_dest_operands;
394 void generateDisassembly() override;
395
396 public:
397 LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
398 const char *_opcode)
399 : LdInstBase<typename MemDataType::CType,
400 typename DestDataType::OperandType,
401 AddrOperandType>(ib, obj, _opcode),
402 MemInst(MemDataType::memType)
403 {
404 init_addr(&this->addr);
405
406 unsigned op_offs = obj->getOperandPtr(ib->operands,0);
407 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
408
409 if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
410 const Brig::BrigOperandOperandList *brigRegVecOp =
411 (const Brig::BrigOperandOperandList*)brigOp;
412
413 num_dest_operands =
414 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
415
416 assert(num_dest_operands <= 4);
417 } else {
418 num_dest_operands = 1;
419 }
420
421 if (num_dest_operands > 1) {
422 assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
423
424 for (int i = 0; i < num_dest_operands; ++i) {
425 dest_vect[i].init_from_vect(op_offs, obj, i);
426 }
427 }
428 }
429
430 void
431 initiateAcc(GPUDynInstPtr gpuDynInst) override
432 {
433 typedef typename MemDataType::CType c0;
434
435 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
436
437 if (num_dest_operands > 1) {
438 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
439 if (gpuDynInst->exec_mask[i])
440 gpuDynInst->statusVector.push_back(num_dest_operands);
441 else
442 gpuDynInst->statusVector.push_back(0);
443 }
444
445 for (int k = 0; k < num_dest_operands; ++k) {
446
447 c0 *d = &((c0*)gpuDynInst->d_data)
448 [k * gpuDynInst->computeUnit()->wfSize()];
449
450 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
451 if (gpuDynInst->exec_mask[i]) {
452 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
453
454 if (this->isLocalMem()) {
455 // load from shared memory
456 *d = gpuDynInst->wavefront()->ldsChunk->
457 read<c0>(vaddr);
458 } else {
459 Request *req = new Request(0, vaddr, sizeof(c0), 0,
460 gpuDynInst->computeUnit()->masterId(),
461 0, gpuDynInst->wfDynId);
462
463 gpuDynInst->setRequestFlags(req);
464 PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
465 pkt->dataStatic(d);
466
467 if (gpuDynInst->computeUnit()->shader->
468 separate_acquire_release &&
469 gpuDynInst->isAcquire()) {
470 // if this load has acquire semantics,
471 // set the response continuation function
472 // to perform an Acquire request
473 gpuDynInst->execContinuation =
474 &GPUStaticInst::execLdAcq;
475
476 gpuDynInst->useContinuation = true;
477 } else {
478 // the request will be finished when
479 // the load completes
480 gpuDynInst->useContinuation = false;
481 }
482 // translation is performed in sendRequest()
483 gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
484 i, pkt);
485 }
486 }
487 ++d;
488 }
489 }
490
491 gpuDynInst->updateStats();
492 }
493
45
46namespace HsailISA
47{
48 class MemInst
49 {
50 public:
51 MemInst() : size(0), addr_operand(nullptr) { }
52
53 MemInst(Enums::MemType m_type)
54 {
55 if (m_type == Enums::M_U64 ||
56 m_type == Enums::M_S64 ||
57 m_type == Enums::M_F64) {
58 size = 8;
59 } else if (m_type == Enums::M_U32 ||
60 m_type == Enums::M_S32 ||
61 m_type == Enums::M_F32) {
62 size = 4;
63 } else if (m_type == Enums::M_U16 ||
64 m_type == Enums::M_S16 ||
65 m_type == Enums::M_F16) {
66 size = 2;
67 } else {
68 size = 1;
69 }
70
71 addr_operand = nullptr;
72 }
73
74 void
75 init_addr(AddrOperandBase *_addr_operand)
76 {
77 addr_operand = _addr_operand;
78 }
79
80 private:
81 int size;
82 AddrOperandBase *addr_operand;
83
84 public:
85 int getMemOperandSize() { return size; }
86 AddrOperandBase *getAddressOperand() { return addr_operand; }
87 };
88
89 template<typename DestOperandType, typename AddrOperandType>
90 class LdaInstBase : public HsailGPUStaticInst
91 {
92 public:
93 typename DestOperandType::DestOperand dest;
94 AddrOperandType addr;
95
96 LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
97 const char *_opcode)
98 : HsailGPUStaticInst(obj, _opcode)
99 {
100 using namespace Brig;
101
102 setFlag(ALU);
103
104 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
105 dest.init(op_offs, obj);
106 op_offs = obj->getOperandPtr(ib->operands, 1);
107 addr.init(op_offs, obj);
108 }
109
110 int numSrcRegOperands() override
111 { return(this->addr.isVectorRegister()); }
112 int numDstRegOperands() override
113 { return dest.isVectorRegister(); }
114 bool isVectorRegister(int operandIndex) override
115 {
116 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
117 return((operandIndex == 0) ? dest.isVectorRegister() :
118 this->addr.isVectorRegister());
119 }
120 bool isCondRegister(int operandIndex) override
121 {
122 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
123 return((operandIndex == 0) ? dest.isCondRegister() :
124 this->addr.isCondRegister());
125 }
126 bool isScalarRegister(int operandIndex) override
127 {
128 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
129 return((operandIndex == 0) ? dest.isScalarRegister() :
130 this->addr.isScalarRegister());
131 }
132 bool isSrcOperand(int operandIndex) override
133 {
134 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
135 if (operandIndex > 0)
136 return(this->addr.isVectorRegister());
137 return false;
138 }
139 bool isDstOperand(int operandIndex) override {
140 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
141 return(operandIndex == 0);
142 }
143 int getOperandSize(int operandIndex) override
144 {
145 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
146 return((operandIndex == 0) ? dest.opSize() :
147 this->addr.opSize());
148 }
149 int getRegisterIndex(int operandIndex) override
150 {
151 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
152 return((operandIndex == 0) ? dest.regIndex() :
153 this->addr.regIndex());
154 }
155 int getNumOperands() override
156 {
157 if (this->addr.isVectorRegister())
158 return 2;
159 return 1;
160 }
161 };
162
163 template<typename DestDataType, typename AddrOperandType>
164 class LdaInst :
165 public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
166 public MemInst
167 {
168 public:
169 void generateDisassembly();
170
171 LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
172 const char *_opcode)
173 : LdaInstBase<typename DestDataType::OperandType,
174 AddrOperandType>(ib, obj, _opcode)
175 {
176 init_addr(&this->addr);
177 }
178
179 void execute(GPUDynInstPtr gpuDynInst);
180 };
181
182 template<typename DataType>
183 GPUStaticInst*
184 decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
185 {
186 unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
187 BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
188
189 if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
190 return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
191 } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
192 // V2/V4 not allowed
193 switch (regDataType.regKind) {
194 case Brig::BRIG_REGISTER_KIND_SINGLE:
195 return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
196 case Brig::BRIG_REGISTER_KIND_DOUBLE:
197 return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
198 default:
199 fatal("Bad ldas register operand type %d\n", regDataType.type);
200 }
201 } else {
202 fatal("Bad ldas register operand kind %d\n", regDataType.kind);
203 }
204 }
205
206 template<typename MemOperandType, typename DestOperandType,
207 typename AddrOperandType>
208 class LdInstBase : public HsailGPUStaticInst
209 {
210 public:
211 Brig::BrigWidth8_t width;
212 typename DestOperandType::DestOperand dest;
213 AddrOperandType addr;
214
215 Brig::BrigSegment segment;
216 Brig::BrigMemoryOrder memoryOrder;
217 Brig::BrigMemoryScope memoryScope;
218 unsigned int equivClass;
219
220 LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
221 const char *_opcode)
222 : HsailGPUStaticInst(obj, _opcode)
223 {
224 using namespace Brig;
225
226 setFlag(MemoryRef);
227 setFlag(Load);
228
229 if (ib->opcode == BRIG_OPCODE_LD) {
230 const BrigInstMem *ldst = (const BrigInstMem*)ib;
231
232 segment = (BrigSegment)ldst->segment;
233 memoryOrder = BRIG_MEMORY_ORDER_NONE;
234 memoryScope = BRIG_MEMORY_SCOPE_NONE;
235 equivClass = ldst->equivClass;
236
237 width = ldst->width;
238 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
239 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
240 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
241 dest.init(op_offs, obj);
242
243 op_offs = obj->getOperandPtr(ib->operands, 1);
244 addr.init(op_offs, obj);
245 } else {
246 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
247
248 segment = (BrigSegment)at->segment;
249 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
250 memoryScope = (BrigMemoryScope)at->memoryScope;
251 equivClass = 0;
252
253 width = BRIG_WIDTH_1;
254 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
255 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
256
257 if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
258 dest.init(op_offs, obj);
259
260 op_offs = obj->getOperandPtr(ib->operands,1);
261 addr.init(op_offs, obj);
262 }
263
264 switch (memoryOrder) {
265 case BRIG_MEMORY_ORDER_NONE:
266 setFlag(NoOrder);
267 break;
268 case BRIG_MEMORY_ORDER_RELAXED:
269 setFlag(RelaxedOrder);
270 break;
271 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
272 setFlag(Acquire);
273 break;
274 case BRIG_MEMORY_ORDER_SC_RELEASE:
275 setFlag(Release);
276 break;
277 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
278 setFlag(AcquireRelease);
279 break;
280 default:
281 fatal("LdInst has bad memory order type\n");
282 }
283
284 switch (memoryScope) {
285 case BRIG_MEMORY_SCOPE_NONE:
286 setFlag(NoScope);
287 break;
288 case BRIG_MEMORY_SCOPE_WORKITEM:
289 setFlag(WorkitemScope);
290 break;
291 case BRIG_MEMORY_SCOPE_WORKGROUP:
292 setFlag(WorkgroupScope);
293 break;
294 case BRIG_MEMORY_SCOPE_AGENT:
295 setFlag(DeviceScope);
296 break;
297 case BRIG_MEMORY_SCOPE_SYSTEM:
298 setFlag(SystemScope);
299 break;
300 default:
301 fatal("LdInst has bad memory scope type\n");
302 }
303
304 switch (segment) {
305 case BRIG_SEGMENT_GLOBAL:
306 setFlag(GlobalSegment);
307 break;
308 case BRIG_SEGMENT_GROUP:
309 setFlag(GroupSegment);
310 break;
311 case BRIG_SEGMENT_PRIVATE:
312 setFlag(PrivateSegment);
313 break;
314 case BRIG_SEGMENT_READONLY:
315 setFlag(ReadOnlySegment);
316 break;
317 case BRIG_SEGMENT_SPILL:
318 setFlag(SpillSegment);
319 break;
320 case BRIG_SEGMENT_FLAT:
321 setFlag(Flat);
322 break;
323 case BRIG_SEGMENT_KERNARG:
324 setFlag(KernArgSegment);
325 break;
326 case BRIG_SEGMENT_ARG:
327 setFlag(ArgSegment);
328 break;
329 default:
330 panic("Ld: segment %d not supported\n", segment);
331 }
332 }
333
334 int numSrcRegOperands() override
335 { return(this->addr.isVectorRegister()); }
336 int numDstRegOperands() override { return dest.isVectorRegister(); }
337 int getNumOperands() override
338 {
339 if (this->addr.isVectorRegister())
340 return 2;
341 else
342 return 1;
343 }
344 bool isVectorRegister(int operandIndex) override
345 {
346 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
347 return((operandIndex == 0) ? dest.isVectorRegister() :
348 this->addr.isVectorRegister());
349 }
350 bool isCondRegister(int operandIndex) override
351 {
352 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
353 return((operandIndex == 0) ? dest.isCondRegister() :
354 this->addr.isCondRegister());
355 }
356 bool isScalarRegister(int operandIndex) override
357 {
358 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
359 return((operandIndex == 0) ? dest.isScalarRegister() :
360 this->addr.isScalarRegister());
361 }
362 bool isSrcOperand(int operandIndex) override
363 {
364 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
365 if (operandIndex > 0)
366 return(this->addr.isVectorRegister());
367 return false;
368 }
369 bool isDstOperand(int operandIndex) override
370 {
371 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
372 return(operandIndex == 0);
373 }
374 int getOperandSize(int operandIndex) override
375 {
376 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
377 return((operandIndex == 0) ? dest.opSize() :
378 this->addr.opSize());
379 }
380 int getRegisterIndex(int operandIndex) override
381 {
382 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
383 return((operandIndex == 0) ? dest.regIndex() :
384 this->addr.regIndex());
385 }
386 };
387
388 template<typename MemDataType, typename DestDataType,
389 typename AddrOperandType>
390 class LdInst :
391 public LdInstBase<typename MemDataType::CType,
392 typename DestDataType::OperandType, AddrOperandType>,
393 public MemInst
394 {
395 typename DestDataType::OperandType::DestOperand dest_vect[4];
396 uint16_t num_dest_operands;
397 void generateDisassembly() override;
398
399 public:
400 LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
401 const char *_opcode)
402 : LdInstBase<typename MemDataType::CType,
403 typename DestDataType::OperandType,
404 AddrOperandType>(ib, obj, _opcode),
405 MemInst(MemDataType::memType)
406 {
407 init_addr(&this->addr);
408
409 unsigned op_offs = obj->getOperandPtr(ib->operands,0);
410 const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
411
412 if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
413 const Brig::BrigOperandOperandList *brigRegVecOp =
414 (const Brig::BrigOperandOperandList*)brigOp;
415
416 num_dest_operands =
417 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
418
419 assert(num_dest_operands <= 4);
420 } else {
421 num_dest_operands = 1;
422 }
423
424 if (num_dest_operands > 1) {
425 assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
426
427 for (int i = 0; i < num_dest_operands; ++i) {
428 dest_vect[i].init_from_vect(op_offs, obj, i);
429 }
430 }
431 }
432
433 void
434 initiateAcc(GPUDynInstPtr gpuDynInst) override
435 {
436 typedef typename MemDataType::CType c0;
437
438 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
439
440 if (num_dest_operands > 1) {
441 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
442 if (gpuDynInst->exec_mask[i])
443 gpuDynInst->statusVector.push_back(num_dest_operands);
444 else
445 gpuDynInst->statusVector.push_back(0);
446 }
447
448 for (int k = 0; k < num_dest_operands; ++k) {
449
450 c0 *d = &((c0*)gpuDynInst->d_data)
451 [k * gpuDynInst->computeUnit()->wfSize()];
452
453 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
454 if (gpuDynInst->exec_mask[i]) {
455 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
456
457 if (this->isLocalMem()) {
458 // load from shared memory
459 *d = gpuDynInst->wavefront()->ldsChunk->
460 read<c0>(vaddr);
461 } else {
462 Request *req = new Request(0, vaddr, sizeof(c0), 0,
463 gpuDynInst->computeUnit()->masterId(),
464 0, gpuDynInst->wfDynId);
465
466 gpuDynInst->setRequestFlags(req);
467 PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
468 pkt->dataStatic(d);
469
470 if (gpuDynInst->computeUnit()->shader->
471 separate_acquire_release &&
472 gpuDynInst->isAcquire()) {
473 // if this load has acquire semantics,
474 // set the response continuation function
475 // to perform an Acquire request
476 gpuDynInst->execContinuation =
477 &GPUStaticInst::execLdAcq;
478
479 gpuDynInst->useContinuation = true;
480 } else {
481 // the request will be finished when
482 // the load completes
483 gpuDynInst->useContinuation = false;
484 }
485 // translation is performed in sendRequest()
486 gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
487 i, pkt);
488 }
489 }
490 ++d;
491 }
492 }
493
494 gpuDynInst->updateStats();
495 }
496
497 void
498 completeAcc(GPUDynInstPtr gpuDynInst) override
499 {
500 typedef typename MemDataType::CType c1;
501
502 constexpr bool is_vt_32 = DestDataType::vgprType == VT_32;
503
504 /**
505 * this code essentially replaces the long if-else chain
506 * that was in used GlobalMemPipeline::exec() to infer the
507 * size (single/double) and type (floating point/integer) of
508 * the destination register. this is needed for load
509 * instructions because the loaded value and the
510 * destination type can be of different sizes, and we also
511 * need to know if the value we're writing back is floating
512 * point and signed/unsigned, so we can properly cast the
513 * writeback value
514 */
515 typedef typename std::conditional<is_vt_32,
516 typename std::conditional<std::is_floating_point<c1>::value,
517 float, typename std::conditional<std::is_signed<c1>::value,
518 int32_t, uint32_t>::type>::type,
519 typename std::conditional<std::is_floating_point<c1>::value,
520 double, typename std::conditional<std::is_signed<c1>::value,
521 int64_t, uint64_t>::type>::type>::type c0;
522
523
524 Wavefront *w = gpuDynInst->wavefront();
525
526 std::vector<uint32_t> regVec;
527 // iterate over number of destination register operands since
528 // this is a load
529 for (int k = 0; k < num_dest_operands; ++k) {
530 assert((sizeof(c1) * num_dest_operands)
531 <= MAX_WIDTH_FOR_MEM_INST);
532
533 int dst = this->dest.regIndex() + k;
534 if (num_dest_operands > MAX_REGS_FOR_NON_VEC_MEM_INST)
535 dst = dest_vect[k].regIndex();
536 // virtual->physical VGPR mapping
537 int physVgpr = w->remap(dst, sizeof(c0), 1);
538 // save the physical VGPR index
539 regVec.push_back(physVgpr);
540
541 c1 *p1 =
542 &((c1*)gpuDynInst->d_data)[k * w->computeUnit->wfSize()];
543
544 for (int i = 0; i < w->computeUnit->wfSize(); ++i) {
545 if (gpuDynInst->exec_mask[i]) {
546 DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: "
547 "$%s%d <- %d global ld done (src = wavefront "
548 "ld inst)\n", w->computeUnit->cu_id, w->simdId,
549 w->wfSlotId, i, sizeof(c0) == 4 ? "s" : "d",
550 dst, *p1);
551 // write the value into the physical VGPR. This is a
552 // purely functional operation. No timing is modeled.
553 w->computeUnit->vrf[w->simdId]->write<c0>(physVgpr,
554 *p1, i);
555 }
556 ++p1;
557 }
558 }
559
560 // Schedule the write operation of the load data on the VRF.
561 // This simply models the timing aspect of the VRF write operation.
562 // It does not modify the physical VGPR.
563 int loadVrfBankConflictCycles = gpuDynInst->computeUnit()->
564 vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec,
565 sizeof(c0), gpuDynInst->time);
566
567 if (this->isGlobalMem()) {
568 gpuDynInst->computeUnit()->globalMemoryPipe
569 .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
570 } else {
571 assert(this->isLocalMem());
572 gpuDynInst->computeUnit()->localMemoryPipe
573 .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
574 }
575 }
576
494 private:
495 void
496 execLdAcq(GPUDynInstPtr gpuDynInst) override
497 {
498 // after the load has complete and if the load has acquire
499 // semantics, issue an acquire request.
500 if (!this->isLocalMem()) {
501 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
502 && gpuDynInst->isAcquire()) {
503 gpuDynInst->statusBitVector = VectorMask(1);
504 gpuDynInst->useContinuation = false;
505 // create request
506 Request *req = new Request(0, 0, 0, 0,
507 gpuDynInst->computeUnit()->masterId(),
508 0, gpuDynInst->wfDynId);
509 req->setFlags(Request::ACQUIRE);
510 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
511 }
512 }
513 }
514
515 public:
516 bool isVectorRegister(int operandIndex) override
517 {
518 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
519 if ((num_dest_operands != getNumOperands()) &&
520 (operandIndex == (getNumOperands()-1)))
521 return(this->addr.isVectorRegister());
522 if (num_dest_operands > 1) {
523 return dest_vect[operandIndex].isVectorRegister();
524 }
525 else if (num_dest_operands == 1) {
526 return LdInstBase<typename MemDataType::CType,
527 typename DestDataType::OperandType,
528 AddrOperandType>::dest.isVectorRegister();
529 }
530 return false;
531 }
532 bool isCondRegister(int operandIndex) override
533 {
534 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
535 if ((num_dest_operands != getNumOperands()) &&
536 (operandIndex == (getNumOperands()-1)))
537 return(this->addr.isCondRegister());
538 if (num_dest_operands > 1)
539 return dest_vect[operandIndex].isCondRegister();
540 else if (num_dest_operands == 1)
541 return LdInstBase<typename MemDataType::CType,
542 typename DestDataType::OperandType,
543 AddrOperandType>::dest.isCondRegister();
544 return false;
545 }
546 bool isScalarRegister(int operandIndex) override
547 {
548 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
549 if ((num_dest_operands != getNumOperands()) &&
550 (operandIndex == (getNumOperands()-1)))
551 return(this->addr.isScalarRegister());
552 if (num_dest_operands > 1)
553 return dest_vect[operandIndex].isScalarRegister();
554 else if (num_dest_operands == 1)
555 return LdInstBase<typename MemDataType::CType,
556 typename DestDataType::OperandType,
557 AddrOperandType>::dest.isScalarRegister();
558 return false;
559 }
560 bool isSrcOperand(int operandIndex) override
561 {
562 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
563 if ((num_dest_operands != getNumOperands()) &&
564 (operandIndex == (getNumOperands()-1)))
565 return(this->addr.isVectorRegister());
566 return false;
567 }
568 bool isDstOperand(int operandIndex) override
569 {
570 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
571 if ((num_dest_operands != getNumOperands()) &&
572 (operandIndex == (getNumOperands()-1)))
573 return false;
574 return true;
575 }
576 int getOperandSize(int operandIndex) override
577 {
578 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
579 if ((num_dest_operands != getNumOperands()) &&
580 (operandIndex == (getNumOperands()-1)))
581 return(this->addr.opSize());
582 if (num_dest_operands > 1)
583 return(dest_vect[operandIndex].opSize());
584 else if (num_dest_operands == 1)
585 return(LdInstBase<typename MemDataType::CType,
586 typename DestDataType::OperandType,
587 AddrOperandType>::dest.opSize());
588 return 0;
589 }
590 int getRegisterIndex(int operandIndex) override
591 {
592 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
593 if ((num_dest_operands != getNumOperands()) &&
594 (operandIndex == (getNumOperands()-1)))
595 return(this->addr.regIndex());
596 if (num_dest_operands > 1)
597 return(dest_vect[operandIndex].regIndex());
598 else if (num_dest_operands == 1)
599 return(LdInstBase<typename MemDataType::CType,
600 typename DestDataType::OperandType,
601 AddrOperandType>::dest.regIndex());
602 return -1;
603 }
604 int getNumOperands() override
605 {
606 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
607 return(num_dest_operands+1);
608 else
609 return(num_dest_operands);
610 }
611 void execute(GPUDynInstPtr gpuDynInst) override;
612 };
613
614 template<typename MemDT, typename DestDT>
615 GPUStaticInst*
616 decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
617 {
618 unsigned op_offs = obj->getOperandPtr(ib->operands,1);
619 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
620
621 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
622 return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
623 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
624 tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
625 switch (tmp.regKind) {
626 case Brig::BRIG_REGISTER_KIND_SINGLE:
627 return new LdInst<MemDT, DestDT,
628 SRegAddrOperand>(ib, obj, "ld");
629 case Brig::BRIG_REGISTER_KIND_DOUBLE:
630 return new LdInst<MemDT, DestDT,
631 DRegAddrOperand>(ib, obj, "ld");
632 default:
633 fatal("Bad ld register operand type %d\n", tmp.regKind);
634 }
635 } else {
636 fatal("Bad ld register operand kind %d\n", tmp.kind);
637 }
638 }
639
640 template<typename MemDT>
641 GPUStaticInst*
642 decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
643 {
644 unsigned op_offs = obj->getOperandPtr(ib->operands,0);
645 BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
646
647 assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
648 dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
649 switch(dest.regKind) {
650 case Brig::BRIG_REGISTER_KIND_SINGLE:
651 switch (ib->type) {
652 case Brig::BRIG_TYPE_B8:
653 case Brig::BRIG_TYPE_B16:
654 case Brig::BRIG_TYPE_B32:
655 return decodeLd2<MemDT, B32>(ib, obj);
656 case Brig::BRIG_TYPE_U8:
657 case Brig::BRIG_TYPE_U16:
658 case Brig::BRIG_TYPE_U32:
659 return decodeLd2<MemDT, U32>(ib, obj);
660 case Brig::BRIG_TYPE_S8:
661 case Brig::BRIG_TYPE_S16:
662 case Brig::BRIG_TYPE_S32:
663 return decodeLd2<MemDT, S32>(ib, obj);
664 case Brig::BRIG_TYPE_F16:
665 case Brig::BRIG_TYPE_F32:
666 return decodeLd2<MemDT, U32>(ib, obj);
667 default:
668 fatal("Bad ld register operand type %d, %d\n",
669 dest.regKind, ib->type);
670 };
671 case Brig::BRIG_REGISTER_KIND_DOUBLE:
672 switch (ib->type) {
673 case Brig::BRIG_TYPE_B64:
674 return decodeLd2<MemDT, B64>(ib, obj);
675 case Brig::BRIG_TYPE_U64:
676 return decodeLd2<MemDT, U64>(ib, obj);
677 case Brig::BRIG_TYPE_S64:
678 return decodeLd2<MemDT, S64>(ib, obj);
679 case Brig::BRIG_TYPE_F64:
680 return decodeLd2<MemDT, U64>(ib, obj);
681 default:
682 fatal("Bad ld register operand type %d, %d\n",
683 dest.regKind, ib->type);
684 };
685 default:
686 fatal("Bad ld register operand type %d, %d\n", dest.regKind,
687 ib->type);
688 }
689 }
690
691 template<typename MemDataType, typename SrcOperandType,
692 typename AddrOperandType>
693 class StInstBase : public HsailGPUStaticInst
694 {
695 public:
696 typename SrcOperandType::SrcOperand src;
697 AddrOperandType addr;
698
699 Brig::BrigSegment segment;
700 Brig::BrigMemoryScope memoryScope;
701 Brig::BrigMemoryOrder memoryOrder;
702 unsigned int equivClass;
703
704 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
705 const char *_opcode)
706 : HsailGPUStaticInst(obj, _opcode)
707 {
708 using namespace Brig;
709
710 setFlag(MemoryRef);
711 setFlag(Store);
712
713 if (ib->opcode == BRIG_OPCODE_ST) {
714 const BrigInstMem *ldst = (const BrigInstMem*)ib;
715
716 segment = (BrigSegment)ldst->segment;
717 memoryOrder = BRIG_MEMORY_ORDER_NONE;
718 memoryScope = BRIG_MEMORY_SCOPE_NONE;
719 equivClass = ldst->equivClass;
720
721 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
722 const BrigOperand *baseOp = obj->getOperand(op_offs);
723
724 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
725 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
726 src.init(op_offs, obj);
727 }
728
729 op_offs = obj->getOperandPtr(ib->operands, 1);
730 addr.init(op_offs, obj);
731 } else {
732 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
733
734 segment = (BrigSegment)at->segment;
735 memoryScope = (BrigMemoryScope)at->memoryScope;
736 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
737 equivClass = 0;
738
739 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
740 addr.init(op_offs, obj);
741
742 op_offs = obj->getOperandPtr(ib->operands, 1);
743 src.init(op_offs, obj);
744 }
745
746 switch (memoryOrder) {
747 case BRIG_MEMORY_ORDER_NONE:
748 setFlag(NoOrder);
749 break;
750 case BRIG_MEMORY_ORDER_RELAXED:
751 setFlag(RelaxedOrder);
752 break;
753 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
754 setFlag(Acquire);
755 break;
756 case BRIG_MEMORY_ORDER_SC_RELEASE:
757 setFlag(Release);
758 break;
759 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
760 setFlag(AcquireRelease);
761 break;
762 default:
763 fatal("StInst has bad memory order type\n");
764 }
765
766 switch (memoryScope) {
767 case BRIG_MEMORY_SCOPE_NONE:
768 setFlag(NoScope);
769 break;
770 case BRIG_MEMORY_SCOPE_WORKITEM:
771 setFlag(WorkitemScope);
772 break;
773 case BRIG_MEMORY_SCOPE_WORKGROUP:
774 setFlag(WorkgroupScope);
775 break;
776 case BRIG_MEMORY_SCOPE_AGENT:
777 setFlag(DeviceScope);
778 break;
779 case BRIG_MEMORY_SCOPE_SYSTEM:
780 setFlag(SystemScope);
781 break;
782 default:
783 fatal("StInst has bad memory scope type\n");
784 }
785
786 switch (segment) {
787 case BRIG_SEGMENT_GLOBAL:
788 setFlag(GlobalSegment);
789 break;
790 case BRIG_SEGMENT_GROUP:
791 setFlag(GroupSegment);
792 break;
793 case BRIG_SEGMENT_PRIVATE:
794 setFlag(PrivateSegment);
795 break;
796 case BRIG_SEGMENT_READONLY:
797 setFlag(ReadOnlySegment);
798 break;
799 case BRIG_SEGMENT_SPILL:
800 setFlag(SpillSegment);
801 break;
802 case BRIG_SEGMENT_FLAT:
803 setFlag(Flat);
804 break;
805 case BRIG_SEGMENT_ARG:
806 setFlag(ArgSegment);
807 break;
808 default:
809 panic("St: segment %d not supported\n", segment);
810 }
811 }
812
813 int numDstRegOperands() override { return 0; }
814 int numSrcRegOperands() override
815 {
816 return src.isVectorRegister() + this->addr.isVectorRegister();
817 }
818 int getNumOperands() override
819 {
820 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
821 return 2;
822 else
823 return 1;
824 }
825 bool isVectorRegister(int operandIndex) override
826 {
827 assert(operandIndex >= 0 && operandIndex < getNumOperands());
828 return !operandIndex ? src.isVectorRegister() :
829 this->addr.isVectorRegister();
830 }
831 bool isCondRegister(int operandIndex) override
832 {
833 assert(operandIndex >= 0 && operandIndex < getNumOperands());
834 return !operandIndex ? src.isCondRegister() :
835 this->addr.isCondRegister();
836 }
837 bool isScalarRegister(int operandIndex) override
838 {
839 assert(operandIndex >= 0 && operandIndex < getNumOperands());
840 return !operandIndex ? src.isScalarRegister() :
841 this->addr.isScalarRegister();
842 }
843 bool isSrcOperand(int operandIndex) override
844 {
845 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
846 return true;
847 }
848 bool isDstOperand(int operandIndex) override { return false; }
849 int getOperandSize(int operandIndex) override
850 {
851 assert(operandIndex >= 0 && operandIndex < getNumOperands());
852 return !operandIndex ? src.opSize() : this->addr.opSize();
853 }
854 int getRegisterIndex(int operandIndex) override
855 {
856 assert(operandIndex >= 0 && operandIndex < getNumOperands());
857 return !operandIndex ? src.regIndex() : this->addr.regIndex();
858 }
859 };
860
861
862 template<typename MemDataType, typename SrcDataType,
863 typename AddrOperandType>
864 class StInst :
865 public StInstBase<MemDataType, typename SrcDataType::OperandType,
866 AddrOperandType>,
867 public MemInst
868 {
869 public:
870 typename SrcDataType::OperandType::SrcOperand src_vect[4];
871 uint16_t num_src_operands;
872 void generateDisassembly() override;
873
874 StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
875 const char *_opcode, int srcIdx)
876 : StInstBase<MemDataType, typename SrcDataType::OperandType,
877 AddrOperandType>(ib, obj, _opcode),
878 MemInst(SrcDataType::memType)
879 {
880 init_addr(&this->addr);
881
882 BrigRegOperandInfo rinfo;
883 unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
884 const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
885
886 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
887 const Brig::BrigOperandConstantBytes *op =
888 (Brig::BrigOperandConstantBytes*)baseOp;
889
890 rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
891 Brig::BRIG_TYPE_NONE);
892 } else {
893 rinfo = findRegDataType(op_offs, obj);
894 }
895
896 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
897 const Brig::BrigOperandOperandList *brigRegVecOp =
898 (const Brig::BrigOperandOperandList*)baseOp;
899
900 num_src_operands =
901 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
902
903 assert(num_src_operands <= 4);
904 } else {
905 num_src_operands = 1;
906 }
907
908 if (num_src_operands > 1) {
909 assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
910
911 for (int i = 0; i < num_src_operands; ++i) {
912 src_vect[i].init_from_vect(op_offs, obj, i);
913 }
914 }
915 }
916
917 void
918 initiateAcc(GPUDynInstPtr gpuDynInst) override
919 {
920 // before performing a store, check if this store has
921 // release semantics, and if so issue a release first
922 if (!this->isLocalMem()) {
923 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
924 && gpuDynInst->isRelease()) {
925
926 gpuDynInst->statusBitVector = VectorMask(1);
927 gpuDynInst->execContinuation = &GPUStaticInst::execSt;
928 gpuDynInst->useContinuation = true;
929 // create request
930 Request *req = new Request(0, 0, 0, 0,
931 gpuDynInst->computeUnit()->masterId(),
932 0, gpuDynInst->wfDynId);
933 req->setFlags(Request::RELEASE);
934 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
935
936 return;
937 }
938 }
939
940 // if there is no release semantic, perform stores immediately
941 execSt(gpuDynInst);
942 }
943
577 private:
578 void
579 execLdAcq(GPUDynInstPtr gpuDynInst) override
580 {
581 // after the load has complete and if the load has acquire
582 // semantics, issue an acquire request.
583 if (!this->isLocalMem()) {
584 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
585 && gpuDynInst->isAcquire()) {
586 gpuDynInst->statusBitVector = VectorMask(1);
587 gpuDynInst->useContinuation = false;
588 // create request
589 Request *req = new Request(0, 0, 0, 0,
590 gpuDynInst->computeUnit()->masterId(),
591 0, gpuDynInst->wfDynId);
592 req->setFlags(Request::ACQUIRE);
593 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
594 }
595 }
596 }
597
598 public:
599 bool isVectorRegister(int operandIndex) override
600 {
601 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
602 if ((num_dest_operands != getNumOperands()) &&
603 (operandIndex == (getNumOperands()-1)))
604 return(this->addr.isVectorRegister());
605 if (num_dest_operands > 1) {
606 return dest_vect[operandIndex].isVectorRegister();
607 }
608 else if (num_dest_operands == 1) {
609 return LdInstBase<typename MemDataType::CType,
610 typename DestDataType::OperandType,
611 AddrOperandType>::dest.isVectorRegister();
612 }
613 return false;
614 }
615 bool isCondRegister(int operandIndex) override
616 {
617 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
618 if ((num_dest_operands != getNumOperands()) &&
619 (operandIndex == (getNumOperands()-1)))
620 return(this->addr.isCondRegister());
621 if (num_dest_operands > 1)
622 return dest_vect[operandIndex].isCondRegister();
623 else if (num_dest_operands == 1)
624 return LdInstBase<typename MemDataType::CType,
625 typename DestDataType::OperandType,
626 AddrOperandType>::dest.isCondRegister();
627 return false;
628 }
629 bool isScalarRegister(int operandIndex) override
630 {
631 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
632 if ((num_dest_operands != getNumOperands()) &&
633 (operandIndex == (getNumOperands()-1)))
634 return(this->addr.isScalarRegister());
635 if (num_dest_operands > 1)
636 return dest_vect[operandIndex].isScalarRegister();
637 else if (num_dest_operands == 1)
638 return LdInstBase<typename MemDataType::CType,
639 typename DestDataType::OperandType,
640 AddrOperandType>::dest.isScalarRegister();
641 return false;
642 }
643 bool isSrcOperand(int operandIndex) override
644 {
645 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
646 if ((num_dest_operands != getNumOperands()) &&
647 (operandIndex == (getNumOperands()-1)))
648 return(this->addr.isVectorRegister());
649 return false;
650 }
651 bool isDstOperand(int operandIndex) override
652 {
653 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
654 if ((num_dest_operands != getNumOperands()) &&
655 (operandIndex == (getNumOperands()-1)))
656 return false;
657 return true;
658 }
659 int getOperandSize(int operandIndex) override
660 {
661 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
662 if ((num_dest_operands != getNumOperands()) &&
663 (operandIndex == (getNumOperands()-1)))
664 return(this->addr.opSize());
665 if (num_dest_operands > 1)
666 return(dest_vect[operandIndex].opSize());
667 else if (num_dest_operands == 1)
668 return(LdInstBase<typename MemDataType::CType,
669 typename DestDataType::OperandType,
670 AddrOperandType>::dest.opSize());
671 return 0;
672 }
673 int getRegisterIndex(int operandIndex) override
674 {
675 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
676 if ((num_dest_operands != getNumOperands()) &&
677 (operandIndex == (getNumOperands()-1)))
678 return(this->addr.regIndex());
679 if (num_dest_operands > 1)
680 return(dest_vect[operandIndex].regIndex());
681 else if (num_dest_operands == 1)
682 return(LdInstBase<typename MemDataType::CType,
683 typename DestDataType::OperandType,
684 AddrOperandType>::dest.regIndex());
685 return -1;
686 }
687 int getNumOperands() override
688 {
689 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
690 return(num_dest_operands+1);
691 else
692 return(num_dest_operands);
693 }
694 void execute(GPUDynInstPtr gpuDynInst) override;
695 };
696
697 template<typename MemDT, typename DestDT>
698 GPUStaticInst*
699 decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
700 {
701 unsigned op_offs = obj->getOperandPtr(ib->operands,1);
702 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
703
704 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
705 return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
706 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
707 tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
708 switch (tmp.regKind) {
709 case Brig::BRIG_REGISTER_KIND_SINGLE:
710 return new LdInst<MemDT, DestDT,
711 SRegAddrOperand>(ib, obj, "ld");
712 case Brig::BRIG_REGISTER_KIND_DOUBLE:
713 return new LdInst<MemDT, DestDT,
714 DRegAddrOperand>(ib, obj, "ld");
715 default:
716 fatal("Bad ld register operand type %d\n", tmp.regKind);
717 }
718 } else {
719 fatal("Bad ld register operand kind %d\n", tmp.kind);
720 }
721 }
722
723 template<typename MemDT>
724 GPUStaticInst*
725 decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
726 {
727 unsigned op_offs = obj->getOperandPtr(ib->operands,0);
728 BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
729
730 assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
731 dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
732 switch(dest.regKind) {
733 case Brig::BRIG_REGISTER_KIND_SINGLE:
734 switch (ib->type) {
735 case Brig::BRIG_TYPE_B8:
736 case Brig::BRIG_TYPE_B16:
737 case Brig::BRIG_TYPE_B32:
738 return decodeLd2<MemDT, B32>(ib, obj);
739 case Brig::BRIG_TYPE_U8:
740 case Brig::BRIG_TYPE_U16:
741 case Brig::BRIG_TYPE_U32:
742 return decodeLd2<MemDT, U32>(ib, obj);
743 case Brig::BRIG_TYPE_S8:
744 case Brig::BRIG_TYPE_S16:
745 case Brig::BRIG_TYPE_S32:
746 return decodeLd2<MemDT, S32>(ib, obj);
747 case Brig::BRIG_TYPE_F16:
748 case Brig::BRIG_TYPE_F32:
749 return decodeLd2<MemDT, U32>(ib, obj);
750 default:
751 fatal("Bad ld register operand type %d, %d\n",
752 dest.regKind, ib->type);
753 };
754 case Brig::BRIG_REGISTER_KIND_DOUBLE:
755 switch (ib->type) {
756 case Brig::BRIG_TYPE_B64:
757 return decodeLd2<MemDT, B64>(ib, obj);
758 case Brig::BRIG_TYPE_U64:
759 return decodeLd2<MemDT, U64>(ib, obj);
760 case Brig::BRIG_TYPE_S64:
761 return decodeLd2<MemDT, S64>(ib, obj);
762 case Brig::BRIG_TYPE_F64:
763 return decodeLd2<MemDT, U64>(ib, obj);
764 default:
765 fatal("Bad ld register operand type %d, %d\n",
766 dest.regKind, ib->type);
767 };
768 default:
769 fatal("Bad ld register operand type %d, %d\n", dest.regKind,
770 ib->type);
771 }
772 }
773
774 template<typename MemDataType, typename SrcOperandType,
775 typename AddrOperandType>
776 class StInstBase : public HsailGPUStaticInst
777 {
778 public:
779 typename SrcOperandType::SrcOperand src;
780 AddrOperandType addr;
781
782 Brig::BrigSegment segment;
783 Brig::BrigMemoryScope memoryScope;
784 Brig::BrigMemoryOrder memoryOrder;
785 unsigned int equivClass;
786
787 StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
788 const char *_opcode)
789 : HsailGPUStaticInst(obj, _opcode)
790 {
791 using namespace Brig;
792
793 setFlag(MemoryRef);
794 setFlag(Store);
795
796 if (ib->opcode == BRIG_OPCODE_ST) {
797 const BrigInstMem *ldst = (const BrigInstMem*)ib;
798
799 segment = (BrigSegment)ldst->segment;
800 memoryOrder = BRIG_MEMORY_ORDER_NONE;
801 memoryScope = BRIG_MEMORY_SCOPE_NONE;
802 equivClass = ldst->equivClass;
803
804 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
805 const BrigOperand *baseOp = obj->getOperand(op_offs);
806
807 if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
808 (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
809 src.init(op_offs, obj);
810 }
811
812 op_offs = obj->getOperandPtr(ib->operands, 1);
813 addr.init(op_offs, obj);
814 } else {
815 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
816
817 segment = (BrigSegment)at->segment;
818 memoryScope = (BrigMemoryScope)at->memoryScope;
819 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
820 equivClass = 0;
821
822 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
823 addr.init(op_offs, obj);
824
825 op_offs = obj->getOperandPtr(ib->operands, 1);
826 src.init(op_offs, obj);
827 }
828
829 switch (memoryOrder) {
830 case BRIG_MEMORY_ORDER_NONE:
831 setFlag(NoOrder);
832 break;
833 case BRIG_MEMORY_ORDER_RELAXED:
834 setFlag(RelaxedOrder);
835 break;
836 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
837 setFlag(Acquire);
838 break;
839 case BRIG_MEMORY_ORDER_SC_RELEASE:
840 setFlag(Release);
841 break;
842 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
843 setFlag(AcquireRelease);
844 break;
845 default:
846 fatal("StInst has bad memory order type\n");
847 }
848
849 switch (memoryScope) {
850 case BRIG_MEMORY_SCOPE_NONE:
851 setFlag(NoScope);
852 break;
853 case BRIG_MEMORY_SCOPE_WORKITEM:
854 setFlag(WorkitemScope);
855 break;
856 case BRIG_MEMORY_SCOPE_WORKGROUP:
857 setFlag(WorkgroupScope);
858 break;
859 case BRIG_MEMORY_SCOPE_AGENT:
860 setFlag(DeviceScope);
861 break;
862 case BRIG_MEMORY_SCOPE_SYSTEM:
863 setFlag(SystemScope);
864 break;
865 default:
866 fatal("StInst has bad memory scope type\n");
867 }
868
869 switch (segment) {
870 case BRIG_SEGMENT_GLOBAL:
871 setFlag(GlobalSegment);
872 break;
873 case BRIG_SEGMENT_GROUP:
874 setFlag(GroupSegment);
875 break;
876 case BRIG_SEGMENT_PRIVATE:
877 setFlag(PrivateSegment);
878 break;
879 case BRIG_SEGMENT_READONLY:
880 setFlag(ReadOnlySegment);
881 break;
882 case BRIG_SEGMENT_SPILL:
883 setFlag(SpillSegment);
884 break;
885 case BRIG_SEGMENT_FLAT:
886 setFlag(Flat);
887 break;
888 case BRIG_SEGMENT_ARG:
889 setFlag(ArgSegment);
890 break;
891 default:
892 panic("St: segment %d not supported\n", segment);
893 }
894 }
895
896 int numDstRegOperands() override { return 0; }
897 int numSrcRegOperands() override
898 {
899 return src.isVectorRegister() + this->addr.isVectorRegister();
900 }
901 int getNumOperands() override
902 {
903 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
904 return 2;
905 else
906 return 1;
907 }
908 bool isVectorRegister(int operandIndex) override
909 {
910 assert(operandIndex >= 0 && operandIndex < getNumOperands());
911 return !operandIndex ? src.isVectorRegister() :
912 this->addr.isVectorRegister();
913 }
914 bool isCondRegister(int operandIndex) override
915 {
916 assert(operandIndex >= 0 && operandIndex < getNumOperands());
917 return !operandIndex ? src.isCondRegister() :
918 this->addr.isCondRegister();
919 }
920 bool isScalarRegister(int operandIndex) override
921 {
922 assert(operandIndex >= 0 && operandIndex < getNumOperands());
923 return !operandIndex ? src.isScalarRegister() :
924 this->addr.isScalarRegister();
925 }
926 bool isSrcOperand(int operandIndex) override
927 {
928 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
929 return true;
930 }
931 bool isDstOperand(int operandIndex) override { return false; }
932 int getOperandSize(int operandIndex) override
933 {
934 assert(operandIndex >= 0 && operandIndex < getNumOperands());
935 return !operandIndex ? src.opSize() : this->addr.opSize();
936 }
937 int getRegisterIndex(int operandIndex) override
938 {
939 assert(operandIndex >= 0 && operandIndex < getNumOperands());
940 return !operandIndex ? src.regIndex() : this->addr.regIndex();
941 }
942 };
943
944
945 template<typename MemDataType, typename SrcDataType,
946 typename AddrOperandType>
947 class StInst :
948 public StInstBase<MemDataType, typename SrcDataType::OperandType,
949 AddrOperandType>,
950 public MemInst
951 {
952 public:
953 typename SrcDataType::OperandType::SrcOperand src_vect[4];
954 uint16_t num_src_operands;
955 void generateDisassembly() override;
956
957 StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
958 const char *_opcode, int srcIdx)
959 : StInstBase<MemDataType, typename SrcDataType::OperandType,
960 AddrOperandType>(ib, obj, _opcode),
961 MemInst(SrcDataType::memType)
962 {
963 init_addr(&this->addr);
964
965 BrigRegOperandInfo rinfo;
966 unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
967 const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
968
969 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
970 const Brig::BrigOperandConstantBytes *op =
971 (Brig::BrigOperandConstantBytes*)baseOp;
972
973 rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
974 Brig::BRIG_TYPE_NONE);
975 } else {
976 rinfo = findRegDataType(op_offs, obj);
977 }
978
979 if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
980 const Brig::BrigOperandOperandList *brigRegVecOp =
981 (const Brig::BrigOperandOperandList*)baseOp;
982
983 num_src_operands =
984 *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
985
986 assert(num_src_operands <= 4);
987 } else {
988 num_src_operands = 1;
989 }
990
991 if (num_src_operands > 1) {
992 assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
993
994 for (int i = 0; i < num_src_operands; ++i) {
995 src_vect[i].init_from_vect(op_offs, obj, i);
996 }
997 }
998 }
999
1000 void
1001 initiateAcc(GPUDynInstPtr gpuDynInst) override
1002 {
1003 // before performing a store, check if this store has
1004 // release semantics, and if so issue a release first
1005 if (!this->isLocalMem()) {
1006 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1007 && gpuDynInst->isRelease()) {
1008
1009 gpuDynInst->statusBitVector = VectorMask(1);
1010 gpuDynInst->execContinuation = &GPUStaticInst::execSt;
1011 gpuDynInst->useContinuation = true;
1012 // create request
1013 Request *req = new Request(0, 0, 0, 0,
1014 gpuDynInst->computeUnit()->masterId(),
1015 0, gpuDynInst->wfDynId);
1016 req->setFlags(Request::RELEASE);
1017 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1018
1019 return;
1020 }
1021 }
1022
1023 // if there is no release semantic, perform stores immediately
1024 execSt(gpuDynInst);
1025 }
1026
1027 // stores don't write anything back, so there is nothing
1028 // to do here. we only override this method to avoid the
1029 // fatal in the base class implementation
1030 void completeAcc(GPUDynInstPtr gpuDynInst) override { }
1031
944 private:
945 // execSt may be called through a continuation
946 // if the store had release semantics. see comment for
947 // execSt in gpu_static_inst.hh
948 void
949 execSt(GPUDynInstPtr gpuDynInst) override
950 {
951 typedef typename MemDataType::CType c0;
952
953 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
954
955 if (num_src_operands > 1) {
956 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
957 if (gpuDynInst->exec_mask[i])
958 gpuDynInst->statusVector.push_back(num_src_operands);
959 else
960 gpuDynInst->statusVector.push_back(0);
961 }
962
963 for (int k = 0; k < num_src_operands; ++k) {
964 c0 *d = &((c0*)gpuDynInst->d_data)
965 [k * gpuDynInst->computeUnit()->wfSize()];
966
967 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
968 if (gpuDynInst->exec_mask[i]) {
969 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
970
971 if (this->isLocalMem()) {
972 //store to shared memory
973 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
974 *d);
975 } else {
976 Request *req =
977 new Request(0, vaddr, sizeof(c0), 0,
978 gpuDynInst->computeUnit()->masterId(),
979 0, gpuDynInst->wfDynId);
980
981 gpuDynInst->setRequestFlags(req);
982 PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
983 pkt->dataStatic<c0>(d);
984
985 // translation is performed in sendRequest()
986 // the request will be finished when the store completes
987 gpuDynInst->useContinuation = false;
988 gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
989 i, pkt);
990
991 }
992 }
993 ++d;
994 }
995 }
996
997 gpuDynInst->updateStats();
998 }
999
1000 public:
1001 bool isVectorRegister(int operandIndex) override
1002 {
1003 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1004 if (operandIndex == num_src_operands)
1005 return this->addr.isVectorRegister();
1006 if (num_src_operands > 1)
1007 return src_vect[operandIndex].isVectorRegister();
1008 else if (num_src_operands == 1)
1009 return StInstBase<MemDataType,
1010 typename SrcDataType::OperandType,
1011 AddrOperandType>::src.isVectorRegister();
1012 return false;
1013 }
1014 bool isCondRegister(int operandIndex) override
1015 {
1016 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1017 if (operandIndex == num_src_operands)
1018 return this->addr.isCondRegister();
1019 if (num_src_operands > 1)
1020 return src_vect[operandIndex].isCondRegister();
1021 else if (num_src_operands == 1)
1022 return StInstBase<MemDataType,
1023 typename SrcDataType::OperandType,
1024 AddrOperandType>::src.isCondRegister();
1025 return false;
1026 }
1027 bool isScalarRegister(int operandIndex) override
1028 {
1029 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1030 if (operandIndex == num_src_operands)
1031 return this->addr.isScalarRegister();
1032 if (num_src_operands > 1)
1033 return src_vect[operandIndex].isScalarRegister();
1034 else if (num_src_operands == 1)
1035 return StInstBase<MemDataType,
1036 typename SrcDataType::OperandType,
1037 AddrOperandType>::src.isScalarRegister();
1038 return false;
1039 }
1040 bool isSrcOperand(int operandIndex) override
1041 {
1042 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1043 return true;
1044 }
1045 bool isDstOperand(int operandIndex) override { return false; }
1046 int getOperandSize(int operandIndex) override
1047 {
1048 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1049 if (operandIndex == num_src_operands)
1050 return this->addr.opSize();
1051 if (num_src_operands > 1)
1052 return src_vect[operandIndex].opSize();
1053 else if (num_src_operands == 1)
1054 return StInstBase<MemDataType,
1055 typename SrcDataType::OperandType,
1056 AddrOperandType>::src.opSize();
1057 return 0;
1058 }
1059 int getRegisterIndex(int operandIndex) override
1060 {
1061 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1062 if (operandIndex == num_src_operands)
1063 return this->addr.regIndex();
1064 if (num_src_operands > 1)
1065 return src_vect[operandIndex].regIndex();
1066 else if (num_src_operands == 1)
1067 return StInstBase<MemDataType,
1068 typename SrcDataType::OperandType,
1069 AddrOperandType>::src.regIndex();
1070 return -1;
1071 }
1072 int getNumOperands() override
1073 {
1074 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
1075 return num_src_operands + 1;
1076 else
1077 return num_src_operands;
1078 }
1079 void execute(GPUDynInstPtr gpuDynInst) override;
1080 };
1081
1082 template<typename DataType, typename SrcDataType>
1083 GPUStaticInst*
1084 decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
1085 {
1086 int srcIdx = 0;
1087 int destIdx = 1;
1088 if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
1089 ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
1090 srcIdx = 1;
1091 destIdx = 0;
1092 }
1093 unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
1094
1095 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1096
1097 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1098 return new StInst<DataType, SrcDataType,
1099 NoRegAddrOperand>(ib, obj, "st", srcIdx);
1100 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1101 // V2/V4 not allowed
1102 switch (tmp.regKind) {
1103 case Brig::BRIG_REGISTER_KIND_SINGLE:
1104 return new StInst<DataType, SrcDataType,
1105 SRegAddrOperand>(ib, obj, "st", srcIdx);
1106 case Brig::BRIG_REGISTER_KIND_DOUBLE:
1107 return new StInst<DataType, SrcDataType,
1108 DRegAddrOperand>(ib, obj, "st", srcIdx);
1109 default:
1110 fatal("Bad st register operand type %d\n", tmp.type);
1111 }
1112 } else {
1113 fatal("Bad st register operand kind %d\n", tmp.kind);
1114 }
1115 }
1116
1117 template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1118 bool HasDst>
1119 class AtomicInstBase : public HsailGPUStaticInst
1120 {
1121 public:
1122 typename OperandType::DestOperand dest;
1123 typename OperandType::SrcOperand src[NumSrcOperands];
1124 AddrOperandType addr;
1125
1126 Brig::BrigSegment segment;
1127 Brig::BrigMemoryOrder memoryOrder;
1128 Brig::BrigAtomicOperation atomicOperation;
1129 Brig::BrigMemoryScope memoryScope;
1130 Brig::BrigOpcode opcode;
1131
1132 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
1133 const char *_opcode)
1134 : HsailGPUStaticInst(obj, _opcode)
1135 {
1136 using namespace Brig;
1137
1138 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1139
1140 segment = (BrigSegment)at->segment;
1141 memoryScope = (BrigMemoryScope)at->memoryScope;
1142 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1143 atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1144 opcode = (BrigOpcode)ib->opcode;
1145
1146 assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
1147 opcode == Brig::BRIG_OPCODE_ATOMIC);
1148
1149 setFlag(MemoryRef);
1150
1151 if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
1152 setFlag(AtomicReturn);
1153 } else {
1154 setFlag(AtomicNoReturn);
1155 }
1156
1157 switch (memoryOrder) {
1158 case BRIG_MEMORY_ORDER_NONE:
1159 setFlag(NoOrder);
1160 break;
1161 case BRIG_MEMORY_ORDER_RELAXED:
1162 setFlag(RelaxedOrder);
1163 break;
1164 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
1165 setFlag(Acquire);
1166 break;
1167 case BRIG_MEMORY_ORDER_SC_RELEASE:
1168 setFlag(Release);
1169 break;
1170 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
1171 setFlag(AcquireRelease);
1172 break;
1173 default:
1174 fatal("AtomicInst has bad memory order type\n");
1175 }
1176
1177 switch (memoryScope) {
1178 case BRIG_MEMORY_SCOPE_NONE:
1179 setFlag(NoScope);
1180 break;
1181 case BRIG_MEMORY_SCOPE_WORKITEM:
1182 setFlag(WorkitemScope);
1183 break;
1184 case BRIG_MEMORY_SCOPE_WORKGROUP:
1185 setFlag(WorkgroupScope);
1186 break;
1187 case BRIG_MEMORY_SCOPE_AGENT:
1188 setFlag(DeviceScope);
1189 break;
1190 case BRIG_MEMORY_SCOPE_SYSTEM:
1191 setFlag(SystemScope);
1192 break;
1193 default:
1194 fatal("AtomicInst has bad memory scope type\n");
1195 }
1196
1197 switch (atomicOperation) {
1198 case Brig::BRIG_ATOMIC_AND:
1199 setFlag(AtomicAnd);
1200 break;
1201 case Brig::BRIG_ATOMIC_OR:
1202 setFlag(AtomicOr);
1203 break;
1204 case Brig::BRIG_ATOMIC_XOR:
1205 setFlag(AtomicXor);
1206 break;
1207 case Brig::BRIG_ATOMIC_CAS:
1208 setFlag(AtomicCAS);
1209 break;
1210 case Brig::BRIG_ATOMIC_EXCH:
1211 setFlag(AtomicExch);
1212 break;
1213 case Brig::BRIG_ATOMIC_ADD:
1214 setFlag(AtomicAdd);
1215 break;
1216 case Brig::BRIG_ATOMIC_WRAPINC:
1217 setFlag(AtomicInc);
1218 break;
1219 case Brig::BRIG_ATOMIC_WRAPDEC:
1220 setFlag(AtomicDec);
1221 break;
1222 case Brig::BRIG_ATOMIC_MIN:
1223 setFlag(AtomicMin);
1224 break;
1225 case Brig::BRIG_ATOMIC_MAX:
1226 setFlag(AtomicMax);
1227 break;
1228 case Brig::BRIG_ATOMIC_SUB:
1229 setFlag(AtomicSub);
1230 break;
1231 default:
1232 fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
1233 }
1234
1235 switch (segment) {
1236 case BRIG_SEGMENT_GLOBAL:
1237 setFlag(GlobalSegment);
1238 break;
1239 case BRIG_SEGMENT_GROUP:
1240 setFlag(GroupSegment);
1241 break;
1242 case BRIG_SEGMENT_FLAT:
1243 setFlag(Flat);
1244 break;
1245 default:
1246 panic("Atomic: segment %d not supported\n", segment);
1247 }
1248
1249 if (HasDst) {
1250 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1251 dest.init(op_offs, obj);
1252
1253 op_offs = obj->getOperandPtr(ib->operands, 1);
1254 addr.init(op_offs, obj);
1255
1256 for (int i = 0; i < NumSrcOperands; ++i) {
1257 op_offs = obj->getOperandPtr(ib->operands, i + 2);
1258 src[i].init(op_offs, obj);
1259 }
1260 } else {
1261
1262 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1263 addr.init(op_offs, obj);
1264
1265 for (int i = 0; i < NumSrcOperands; ++i) {
1266 op_offs = obj->getOperandPtr(ib->operands, i + 1);
1267 src[i].init(op_offs, obj);
1268 }
1269 }
1270 }
1271
1272 int numSrcRegOperands()
1273 {
1274 int operands = 0;
1275 for (int i = 0; i < NumSrcOperands; i++) {
1276 if (src[i].isVectorRegister()) {
1277 operands++;
1278 }
1279 }
1280 if (addr.isVectorRegister())
1281 operands++;
1282 return operands;
1283 }
1284 int numDstRegOperands() { return dest.isVectorRegister(); }
1285 int getNumOperands()
1286 {
1287 if (addr.isVectorRegister())
1288 return(NumSrcOperands + 2);
1289 return(NumSrcOperands + 1);
1290 }
1291 bool isVectorRegister(int operandIndex)
1292 {
1293 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1294 if (operandIndex < NumSrcOperands)
1295 return src[operandIndex].isVectorRegister();
1296 else if (operandIndex == NumSrcOperands)
1297 return(addr.isVectorRegister());
1298 else
1299 return dest.isVectorRegister();
1300 }
1301 bool isCondRegister(int operandIndex)
1302 {
1303 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1304 if (operandIndex < NumSrcOperands)
1305 return src[operandIndex].isCondRegister();
1306 else if (operandIndex == NumSrcOperands)
1307 return(addr.isCondRegister());
1308 else
1309 return dest.isCondRegister();
1310 }
1311 bool isScalarRegister(int operandIndex)
1312 {
1313 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1314 if (operandIndex < NumSrcOperands)
1315 return src[operandIndex].isScalarRegister();
1316 else if (operandIndex == NumSrcOperands)
1317 return(addr.isScalarRegister());
1318 else
1319 return dest.isScalarRegister();
1320 }
1321 bool isSrcOperand(int operandIndex)
1322 {
1323 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1324 if (operandIndex < NumSrcOperands)
1325 return true;
1326 else if (operandIndex == NumSrcOperands)
1327 return(addr.isVectorRegister());
1328 else
1329 return false;
1330 }
1331 bool isDstOperand(int operandIndex)
1332 {
1333 if (operandIndex <= NumSrcOperands)
1334 return false;
1335 else
1336 return true;
1337 }
1338 int getOperandSize(int operandIndex)
1339 {
1340 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1341 if (operandIndex < NumSrcOperands)
1342 return(src[operandIndex].opSize());
1343 else if (operandIndex == NumSrcOperands)
1344 return(addr.opSize());
1345 else
1346 return(dest.opSize());
1347 }
1348 int getRegisterIndex(int operandIndex)
1349 {
1350 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1351 if (operandIndex < NumSrcOperands)
1352 return(src[operandIndex].regIndex());
1353 else if (operandIndex == NumSrcOperands)
1354 return(addr.regIndex());
1355 else
1356 return(dest.regIndex());
1357 return -1;
1358 }
1359 };
1360
1361 template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
1362 bool HasDst>
1363 class AtomicInst :
1364 public AtomicInstBase<typename MemDataType::OperandType,
1365 AddrOperandType, NumSrcOperands, HasDst>,
1366 public MemInst
1367 {
1368 public:
1369 void generateDisassembly() override;
1370
1371 AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
1372 const char *_opcode)
1373 : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
1374 NumSrcOperands, HasDst>
1375 (ib, obj, _opcode),
1376 MemInst(MemDataType::memType)
1377 {
1378 init_addr(&this->addr);
1379 }
1380
1381 void
1382 initiateAcc(GPUDynInstPtr gpuDynInst) override
1383 {
1384 // before doing the RMW, check if this atomic has
1385 // release semantics, and if so issue a release first
1386 if (!this->isLocalMem()) {
1387 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1388 && (gpuDynInst->isRelease()
1389 || gpuDynInst->isAcquireRelease())) {
1390
1391 gpuDynInst->statusBitVector = VectorMask(1);
1392
1393 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1394 gpuDynInst->useContinuation = true;
1395
1396 // create request
1397 Request *req = new Request(0, 0, 0, 0,
1398 gpuDynInst->computeUnit()->masterId(),
1399 0, gpuDynInst->wfDynId);
1400 req->setFlags(Request::RELEASE);
1401 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1402
1403 return;
1404 }
1405 }
1406
1407 // if there is no release semantic, execute the RMW immediately
1408 execAtomic(gpuDynInst);
1409
1410 }
1411
1032 private:
1033 // execSt may be called through a continuation
1034 // if the store had release semantics. see comment for
1035 // execSt in gpu_static_inst.hh
1036 void
1037 execSt(GPUDynInstPtr gpuDynInst) override
1038 {
1039 typedef typename MemDataType::CType c0;
1040
1041 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1042
1043 if (num_src_operands > 1) {
1044 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
1045 if (gpuDynInst->exec_mask[i])
1046 gpuDynInst->statusVector.push_back(num_src_operands);
1047 else
1048 gpuDynInst->statusVector.push_back(0);
1049 }
1050
1051 for (int k = 0; k < num_src_operands; ++k) {
1052 c0 *d = &((c0*)gpuDynInst->d_data)
1053 [k * gpuDynInst->computeUnit()->wfSize()];
1054
1055 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1056 if (gpuDynInst->exec_mask[i]) {
1057 Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
1058
1059 if (this->isLocalMem()) {
1060 //store to shared memory
1061 gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
1062 *d);
1063 } else {
1064 Request *req =
1065 new Request(0, vaddr, sizeof(c0), 0,
1066 gpuDynInst->computeUnit()->masterId(),
1067 0, gpuDynInst->wfDynId);
1068
1069 gpuDynInst->setRequestFlags(req);
1070 PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
1071 pkt->dataStatic<c0>(d);
1072
1073 // translation is performed in sendRequest()
1074 // the request will be finished when the store completes
1075 gpuDynInst->useContinuation = false;
1076 gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
1077 i, pkt);
1078
1079 }
1080 }
1081 ++d;
1082 }
1083 }
1084
1085 gpuDynInst->updateStats();
1086 }
1087
1088 public:
1089 bool isVectorRegister(int operandIndex) override
1090 {
1091 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1092 if (operandIndex == num_src_operands)
1093 return this->addr.isVectorRegister();
1094 if (num_src_operands > 1)
1095 return src_vect[operandIndex].isVectorRegister();
1096 else if (num_src_operands == 1)
1097 return StInstBase<MemDataType,
1098 typename SrcDataType::OperandType,
1099 AddrOperandType>::src.isVectorRegister();
1100 return false;
1101 }
1102 bool isCondRegister(int operandIndex) override
1103 {
1104 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1105 if (operandIndex == num_src_operands)
1106 return this->addr.isCondRegister();
1107 if (num_src_operands > 1)
1108 return src_vect[operandIndex].isCondRegister();
1109 else if (num_src_operands == 1)
1110 return StInstBase<MemDataType,
1111 typename SrcDataType::OperandType,
1112 AddrOperandType>::src.isCondRegister();
1113 return false;
1114 }
1115 bool isScalarRegister(int operandIndex) override
1116 {
1117 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1118 if (operandIndex == num_src_operands)
1119 return this->addr.isScalarRegister();
1120 if (num_src_operands > 1)
1121 return src_vect[operandIndex].isScalarRegister();
1122 else if (num_src_operands == 1)
1123 return StInstBase<MemDataType,
1124 typename SrcDataType::OperandType,
1125 AddrOperandType>::src.isScalarRegister();
1126 return false;
1127 }
1128 bool isSrcOperand(int operandIndex) override
1129 {
1130 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1131 return true;
1132 }
1133 bool isDstOperand(int operandIndex) override { return false; }
1134 int getOperandSize(int operandIndex) override
1135 {
1136 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1137 if (operandIndex == num_src_operands)
1138 return this->addr.opSize();
1139 if (num_src_operands > 1)
1140 return src_vect[operandIndex].opSize();
1141 else if (num_src_operands == 1)
1142 return StInstBase<MemDataType,
1143 typename SrcDataType::OperandType,
1144 AddrOperandType>::src.opSize();
1145 return 0;
1146 }
1147 int getRegisterIndex(int operandIndex) override
1148 {
1149 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1150 if (operandIndex == num_src_operands)
1151 return this->addr.regIndex();
1152 if (num_src_operands > 1)
1153 return src_vect[operandIndex].regIndex();
1154 else if (num_src_operands == 1)
1155 return StInstBase<MemDataType,
1156 typename SrcDataType::OperandType,
1157 AddrOperandType>::src.regIndex();
1158 return -1;
1159 }
1160 int getNumOperands() override
1161 {
1162 if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
1163 return num_src_operands + 1;
1164 else
1165 return num_src_operands;
1166 }
1167 void execute(GPUDynInstPtr gpuDynInst) override;
1168 };
1169
1170 template<typename DataType, typename SrcDataType>
1171 GPUStaticInst*
1172 decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
1173 {
1174 int srcIdx = 0;
1175 int destIdx = 1;
1176 if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
1177 ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
1178 srcIdx = 1;
1179 destIdx = 0;
1180 }
1181 unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
1182
1183 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1184
1185 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1186 return new StInst<DataType, SrcDataType,
1187 NoRegAddrOperand>(ib, obj, "st", srcIdx);
1188 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1189 // V2/V4 not allowed
1190 switch (tmp.regKind) {
1191 case Brig::BRIG_REGISTER_KIND_SINGLE:
1192 return new StInst<DataType, SrcDataType,
1193 SRegAddrOperand>(ib, obj, "st", srcIdx);
1194 case Brig::BRIG_REGISTER_KIND_DOUBLE:
1195 return new StInst<DataType, SrcDataType,
1196 DRegAddrOperand>(ib, obj, "st", srcIdx);
1197 default:
1198 fatal("Bad st register operand type %d\n", tmp.type);
1199 }
1200 } else {
1201 fatal("Bad st register operand kind %d\n", tmp.kind);
1202 }
1203 }
1204
1205 template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1206 bool HasDst>
1207 class AtomicInstBase : public HsailGPUStaticInst
1208 {
1209 public:
1210 typename OperandType::DestOperand dest;
1211 typename OperandType::SrcOperand src[NumSrcOperands];
1212 AddrOperandType addr;
1213
1214 Brig::BrigSegment segment;
1215 Brig::BrigMemoryOrder memoryOrder;
1216 Brig::BrigAtomicOperation atomicOperation;
1217 Brig::BrigMemoryScope memoryScope;
1218 Brig::BrigOpcode opcode;
1219
1220 AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
1221 const char *_opcode)
1222 : HsailGPUStaticInst(obj, _opcode)
1223 {
1224 using namespace Brig;
1225
1226 const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1227
1228 segment = (BrigSegment)at->segment;
1229 memoryScope = (BrigMemoryScope)at->memoryScope;
1230 memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1231 atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1232 opcode = (BrigOpcode)ib->opcode;
1233
1234 assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
1235 opcode == Brig::BRIG_OPCODE_ATOMIC);
1236
1237 setFlag(MemoryRef);
1238
1239 if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
1240 setFlag(AtomicReturn);
1241 } else {
1242 setFlag(AtomicNoReturn);
1243 }
1244
1245 switch (memoryOrder) {
1246 case BRIG_MEMORY_ORDER_NONE:
1247 setFlag(NoOrder);
1248 break;
1249 case BRIG_MEMORY_ORDER_RELAXED:
1250 setFlag(RelaxedOrder);
1251 break;
1252 case BRIG_MEMORY_ORDER_SC_ACQUIRE:
1253 setFlag(Acquire);
1254 break;
1255 case BRIG_MEMORY_ORDER_SC_RELEASE:
1256 setFlag(Release);
1257 break;
1258 case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
1259 setFlag(AcquireRelease);
1260 break;
1261 default:
1262 fatal("AtomicInst has bad memory order type\n");
1263 }
1264
1265 switch (memoryScope) {
1266 case BRIG_MEMORY_SCOPE_NONE:
1267 setFlag(NoScope);
1268 break;
1269 case BRIG_MEMORY_SCOPE_WORKITEM:
1270 setFlag(WorkitemScope);
1271 break;
1272 case BRIG_MEMORY_SCOPE_WORKGROUP:
1273 setFlag(WorkgroupScope);
1274 break;
1275 case BRIG_MEMORY_SCOPE_AGENT:
1276 setFlag(DeviceScope);
1277 break;
1278 case BRIG_MEMORY_SCOPE_SYSTEM:
1279 setFlag(SystemScope);
1280 break;
1281 default:
1282 fatal("AtomicInst has bad memory scope type\n");
1283 }
1284
1285 switch (atomicOperation) {
1286 case Brig::BRIG_ATOMIC_AND:
1287 setFlag(AtomicAnd);
1288 break;
1289 case Brig::BRIG_ATOMIC_OR:
1290 setFlag(AtomicOr);
1291 break;
1292 case Brig::BRIG_ATOMIC_XOR:
1293 setFlag(AtomicXor);
1294 break;
1295 case Brig::BRIG_ATOMIC_CAS:
1296 setFlag(AtomicCAS);
1297 break;
1298 case Brig::BRIG_ATOMIC_EXCH:
1299 setFlag(AtomicExch);
1300 break;
1301 case Brig::BRIG_ATOMIC_ADD:
1302 setFlag(AtomicAdd);
1303 break;
1304 case Brig::BRIG_ATOMIC_WRAPINC:
1305 setFlag(AtomicInc);
1306 break;
1307 case Brig::BRIG_ATOMIC_WRAPDEC:
1308 setFlag(AtomicDec);
1309 break;
1310 case Brig::BRIG_ATOMIC_MIN:
1311 setFlag(AtomicMin);
1312 break;
1313 case Brig::BRIG_ATOMIC_MAX:
1314 setFlag(AtomicMax);
1315 break;
1316 case Brig::BRIG_ATOMIC_SUB:
1317 setFlag(AtomicSub);
1318 break;
1319 default:
1320 fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
1321 }
1322
1323 switch (segment) {
1324 case BRIG_SEGMENT_GLOBAL:
1325 setFlag(GlobalSegment);
1326 break;
1327 case BRIG_SEGMENT_GROUP:
1328 setFlag(GroupSegment);
1329 break;
1330 case BRIG_SEGMENT_FLAT:
1331 setFlag(Flat);
1332 break;
1333 default:
1334 panic("Atomic: segment %d not supported\n", segment);
1335 }
1336
1337 if (HasDst) {
1338 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1339 dest.init(op_offs, obj);
1340
1341 op_offs = obj->getOperandPtr(ib->operands, 1);
1342 addr.init(op_offs, obj);
1343
1344 for (int i = 0; i < NumSrcOperands; ++i) {
1345 op_offs = obj->getOperandPtr(ib->operands, i + 2);
1346 src[i].init(op_offs, obj);
1347 }
1348 } else {
1349
1350 unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1351 addr.init(op_offs, obj);
1352
1353 for (int i = 0; i < NumSrcOperands; ++i) {
1354 op_offs = obj->getOperandPtr(ib->operands, i + 1);
1355 src[i].init(op_offs, obj);
1356 }
1357 }
1358 }
1359
1360 int numSrcRegOperands()
1361 {
1362 int operands = 0;
1363 for (int i = 0; i < NumSrcOperands; i++) {
1364 if (src[i].isVectorRegister()) {
1365 operands++;
1366 }
1367 }
1368 if (addr.isVectorRegister())
1369 operands++;
1370 return operands;
1371 }
1372 int numDstRegOperands() { return dest.isVectorRegister(); }
1373 int getNumOperands()
1374 {
1375 if (addr.isVectorRegister())
1376 return(NumSrcOperands + 2);
1377 return(NumSrcOperands + 1);
1378 }
1379 bool isVectorRegister(int operandIndex)
1380 {
1381 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1382 if (operandIndex < NumSrcOperands)
1383 return src[operandIndex].isVectorRegister();
1384 else if (operandIndex == NumSrcOperands)
1385 return(addr.isVectorRegister());
1386 else
1387 return dest.isVectorRegister();
1388 }
1389 bool isCondRegister(int operandIndex)
1390 {
1391 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1392 if (operandIndex < NumSrcOperands)
1393 return src[operandIndex].isCondRegister();
1394 else if (operandIndex == NumSrcOperands)
1395 return(addr.isCondRegister());
1396 else
1397 return dest.isCondRegister();
1398 }
1399 bool isScalarRegister(int operandIndex)
1400 {
1401 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1402 if (operandIndex < NumSrcOperands)
1403 return src[operandIndex].isScalarRegister();
1404 else if (operandIndex == NumSrcOperands)
1405 return(addr.isScalarRegister());
1406 else
1407 return dest.isScalarRegister();
1408 }
1409 bool isSrcOperand(int operandIndex)
1410 {
1411 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1412 if (operandIndex < NumSrcOperands)
1413 return true;
1414 else if (operandIndex == NumSrcOperands)
1415 return(addr.isVectorRegister());
1416 else
1417 return false;
1418 }
1419 bool isDstOperand(int operandIndex)
1420 {
1421 if (operandIndex <= NumSrcOperands)
1422 return false;
1423 else
1424 return true;
1425 }
1426 int getOperandSize(int operandIndex)
1427 {
1428 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1429 if (operandIndex < NumSrcOperands)
1430 return(src[operandIndex].opSize());
1431 else if (operandIndex == NumSrcOperands)
1432 return(addr.opSize());
1433 else
1434 return(dest.opSize());
1435 }
1436 int getRegisterIndex(int operandIndex)
1437 {
1438 assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1439 if (operandIndex < NumSrcOperands)
1440 return(src[operandIndex].regIndex());
1441 else if (operandIndex == NumSrcOperands)
1442 return(addr.regIndex());
1443 else
1444 return(dest.regIndex());
1445 return -1;
1446 }
1447 };
1448
1449 template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
1450 bool HasDst>
1451 class AtomicInst :
1452 public AtomicInstBase<typename MemDataType::OperandType,
1453 AddrOperandType, NumSrcOperands, HasDst>,
1454 public MemInst
1455 {
1456 public:
1457 void generateDisassembly() override;
1458
1459 AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
1460 const char *_opcode)
1461 : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
1462 NumSrcOperands, HasDst>
1463 (ib, obj, _opcode),
1464 MemInst(MemDataType::memType)
1465 {
1466 init_addr(&this->addr);
1467 }
1468
1469 void
1470 initiateAcc(GPUDynInstPtr gpuDynInst) override
1471 {
1472 // before doing the RMW, check if this atomic has
1473 // release semantics, and if so issue a release first
1474 if (!this->isLocalMem()) {
1475 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1476 && (gpuDynInst->isRelease()
1477 || gpuDynInst->isAcquireRelease())) {
1478
1479 gpuDynInst->statusBitVector = VectorMask(1);
1480
1481 gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1482 gpuDynInst->useContinuation = true;
1483
1484 // create request
1485 Request *req = new Request(0, 0, 0, 0,
1486 gpuDynInst->computeUnit()->masterId(),
1487 0, gpuDynInst->wfDynId);
1488 req->setFlags(Request::RELEASE);
1489 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1490
1491 return;
1492 }
1493 }
1494
1495 // if there is no release semantic, execute the RMW immediately
1496 execAtomic(gpuDynInst);
1497
1498 }
1499
1500 void
1501 completeAcc(GPUDynInstPtr gpuDynInst) override
1502 {
1503 // if this is not an atomic return op, then we
1504 // have nothing more to do.
1505 if (this->isAtomicRet()) {
1506 // the size of the src operands and the
1507 // memory being operated on must match
1508 // for HSAIL atomics - this assumption may
1509 // not apply to all ISAs
1510 typedef typename MemDataType::CType CType;
1511
1512 Wavefront *w = gpuDynInst->wavefront();
1513 int dst = this->dest.regIndex();
1514 std::vector<uint32_t> regVec;
1515 // virtual->physical VGPR mapping
1516 int physVgpr = w->remap(dst, sizeof(CType), 1);
1517 regVec.push_back(physVgpr);
1518 CType *p1 = &((CType*)gpuDynInst->d_data)[0];
1519
1520 for (int i = 0; i < w->computeUnit->wfSize(); ++i) {
1521 if (gpuDynInst->exec_mask[i]) {
1522 DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: "
1523 "$%s%d <- %d global ld done (src = wavefront "
1524 "ld inst)\n", w->computeUnit->cu_id, w->simdId,
1525 w->wfSlotId, i, sizeof(CType) == 4 ? "s" : "d",
1526 dst, *p1);
1527 // write the value into the physical VGPR. This is a
1528 // purely functional operation. No timing is modeled.
1529 w->computeUnit->vrf[w->simdId]->write<CType>(physVgpr, *p1, i);
1530 }
1531 ++p1;
1532 }
1533
1534 // Schedule the write operation of the load data on the VRF.
1535 // This simply models the timing aspect of the VRF write operation.
1536 // It does not modify the physical VGPR.
1537 int loadVrfBankConflictCycles = gpuDynInst->computeUnit()->
1538 vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec,
1539 sizeof(CType), gpuDynInst->time);
1540
1541 if (this->isGlobalMem()) {
1542 gpuDynInst->computeUnit()->globalMemoryPipe
1543 .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
1544 } else {
1545 assert(this->isLocalMem());
1546 gpuDynInst->computeUnit()->localMemoryPipe
1547 .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
1548 }
1549 }
1550 }
1551
1412 void execute(GPUDynInstPtr gpuDynInst) override;
1413
1414 private:
1415 // execAtomic may be called through a continuation
1416 // if the RMW had release semantics. see comment for
1417 // execContinuation in gpu_dyn_inst.hh
1418 void
1419 execAtomic(GPUDynInstPtr gpuDynInst) override
1420 {
1421 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1422
1423 typedef typename MemDataType::CType c0;
1424
1425 c0 *d = &((c0*) gpuDynInst->d_data)[0];
1426 c0 *e = &((c0*) gpuDynInst->a_data)[0];
1427 c0 *f = &((c0*) gpuDynInst->x_data)[0];
1428
1429 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1430 if (gpuDynInst->exec_mask[i]) {
1431 Addr vaddr = gpuDynInst->addr[i];
1432
1433 if (this->isLocalMem()) {
1434 Wavefront *wavefront = gpuDynInst->wavefront();
1435 *d = wavefront->ldsChunk->read<c0>(vaddr);
1436
1437 if (this->isAtomicAdd()) {
1438 wavefront->ldsChunk->write<c0>(vaddr,
1439 wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1440 } else if (this->isAtomicSub()) {
1441 wavefront->ldsChunk->write<c0>(vaddr,
1442 wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1443 } else if (this->isAtomicMax()) {
1444 wavefront->ldsChunk->write<c0>(vaddr,
1445 std::max(wavefront->ldsChunk->read<c0>(vaddr),
1446 (*e)));
1447 } else if (this->isAtomicMin()) {
1448 wavefront->ldsChunk->write<c0>(vaddr,
1449 std::min(wavefront->ldsChunk->read<c0>(vaddr),
1450 (*e)));
1451 } else if (this->isAtomicAnd()) {
1452 wavefront->ldsChunk->write<c0>(vaddr,
1453 wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1454 } else if (this->isAtomicOr()) {
1455 wavefront->ldsChunk->write<c0>(vaddr,
1456 wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1457 } else if (this->isAtomicXor()) {
1458 wavefront->ldsChunk->write<c0>(vaddr,
1459 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1460 } else if (this->isAtomicInc()) {
1461 wavefront->ldsChunk->write<c0>(vaddr,
1462 wavefront->ldsChunk->read<c0>(vaddr) + 1);
1463 } else if (this->isAtomicDec()) {
1464 wavefront->ldsChunk->write<c0>(vaddr,
1465 wavefront->ldsChunk->read<c0>(vaddr) - 1);
1466 } else if (this->isAtomicExch()) {
1467 wavefront->ldsChunk->write<c0>(vaddr, (*e));
1468 } else if (this->isAtomicCAS()) {
1469 wavefront->ldsChunk->write<c0>(vaddr,
1470 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1471 (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1472 } else {
1473 fatal("Unrecognized or invalid HSAIL atomic op "
1474 "type.\n");
1475 }
1476 } else {
1477 Request *req =
1478 new Request(0, vaddr, sizeof(c0), 0,
1479 gpuDynInst->computeUnit()->masterId(),
1480 0, gpuDynInst->wfDynId,
1481 gpuDynInst->makeAtomicOpFunctor<c0>(e,
1482 f));
1483
1484 gpuDynInst->setRequestFlags(req);
1485 PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1486 pkt->dataStatic(d);
1487
1488 if (gpuDynInst->computeUnit()->shader->
1489 separate_acquire_release &&
1490 (gpuDynInst->isAcquire())) {
1491 // if this atomic has acquire semantics,
1492 // schedule the continuation to perform an
1493 // acquire after the RMW completes
1494 gpuDynInst->execContinuation =
1495 &GPUStaticInst::execAtomicAcq;
1496
1497 gpuDynInst->useContinuation = true;
1498 } else {
1499 // the request will be finished when the RMW completes
1500 gpuDynInst->useContinuation = false;
1501 }
1502 // translation is performed in sendRequest()
1503 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
1504 pkt);
1505 }
1506 }
1507
1508 ++d;
1509 ++e;
1510 ++f;
1511 }
1512
1513 gpuDynInst->updateStats();
1514 }
1515
1516 // execAtomicACq will always be called through a continuation.
1517 // see comment for execContinuation in gpu_dyn_inst.hh
1518 void
1519 execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1520 {
1521 // after performing the RMW, check to see if this instruction
1522 // has acquire semantics, and if so, issue an acquire
1523 if (!this->isLocalMem()) {
1524 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1525 && gpuDynInst->isAcquire()) {
1526 gpuDynInst->statusBitVector = VectorMask(1);
1527
1528 // the request will be finished when
1529 // the acquire completes
1530 gpuDynInst->useContinuation = false;
1531 // create request
1532 Request *req = new Request(0, 0, 0, 0,
1533 gpuDynInst->computeUnit()->masterId(),
1534 0, gpuDynInst->wfDynId);
1535 req->setFlags(Request::ACQUIRE);
1536 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1537 }
1538 }
1539 }
1540 };
1541
1542 template<typename DataType, typename AddrOperandType, int NumSrcOperands>
1543 GPUStaticInst*
1544 constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1545 {
1546 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1547
1548 if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
1549 return decodeLd<DataType>(ib, obj);
1550 } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
1551 switch (ib->type) {
1552 case Brig::BRIG_TYPE_B8:
1553 return decodeSt<S8,S8>(ib, obj);
1554 case Brig::BRIG_TYPE_B16:
1555 return decodeSt<S16,S16>(ib, obj);
1556 case Brig::BRIG_TYPE_B32:
1557 return decodeSt<S32,S32>(ib, obj);
1558 case Brig::BRIG_TYPE_B64:
1559 return decodeSt<S64,S64>(ib, obj);
1560 default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
1561 }
1562 } else {
1563 if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
1564 return new AtomicInst<DataType, AddrOperandType,
1565 NumSrcOperands, false>(ib, obj, "atomicnoret");
1566 else
1567 return new AtomicInst<DataType, AddrOperandType,
1568 NumSrcOperands, true>(ib, obj, "atomic");
1569 }
1570 }
1571
1572 template<typename DataType, int NumSrcOperands>
1573 GPUStaticInst*
1574 decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
1575 {
1576 unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
1577 Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
1578
1579 unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
1580
1581 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1582
1583 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1584 return constructAtomic<DataType, NoRegAddrOperand,
1585 NumSrcOperands>(ib, obj);
1586 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1587 // V2/V4 not allowed
1588 switch (tmp.regKind) {
1589 case Brig::BRIG_REGISTER_KIND_SINGLE:
1590 return constructAtomic<DataType, SRegAddrOperand,
1591 NumSrcOperands>(ib, obj);
1592 case Brig::BRIG_REGISTER_KIND_DOUBLE:
1593 return constructAtomic<DataType, DRegAddrOperand,
1594 NumSrcOperands>(ib, obj);
1595 default:
1596 fatal("Bad atomic register operand type %d\n", tmp.type);
1597 }
1598 } else {
1599 fatal("Bad atomic register operand kind %d\n", tmp.kind);
1600 }
1601 }
1602
1603
1604 template<typename DataType>
1605 GPUStaticInst*
1606 decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1607 {
1608 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1609
1610 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1611 return decodeAtomicHelper<DataType, 2>(ib, obj);
1612 } else {
1613 return decodeAtomicHelper<DataType, 1>(ib, obj);
1614 }
1615 }
1616
1617 template<typename DataType>
1618 GPUStaticInst*
1619 decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
1620 {
1621 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1622 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1623 return decodeAtomicHelper<DataType, 2>(ib, obj);
1624 } else {
1625 return decodeAtomicHelper<DataType, 1>(ib, obj);
1626 }
1627 }
1628} // namespace HsailISA
1629
1630#endif // __ARCH_HSAIL_INSTS_MEM_HH__
1552 void execute(GPUDynInstPtr gpuDynInst) override;
1553
1554 private:
1555 // execAtomic may be called through a continuation
1556 // if the RMW had release semantics. see comment for
1557 // execContinuation in gpu_dyn_inst.hh
1558 void
1559 execAtomic(GPUDynInstPtr gpuDynInst) override
1560 {
1561 gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1562
1563 typedef typename MemDataType::CType c0;
1564
1565 c0 *d = &((c0*) gpuDynInst->d_data)[0];
1566 c0 *e = &((c0*) gpuDynInst->a_data)[0];
1567 c0 *f = &((c0*) gpuDynInst->x_data)[0];
1568
1569 for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1570 if (gpuDynInst->exec_mask[i]) {
1571 Addr vaddr = gpuDynInst->addr[i];
1572
1573 if (this->isLocalMem()) {
1574 Wavefront *wavefront = gpuDynInst->wavefront();
1575 *d = wavefront->ldsChunk->read<c0>(vaddr);
1576
1577 if (this->isAtomicAdd()) {
1578 wavefront->ldsChunk->write<c0>(vaddr,
1579 wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1580 } else if (this->isAtomicSub()) {
1581 wavefront->ldsChunk->write<c0>(vaddr,
1582 wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1583 } else if (this->isAtomicMax()) {
1584 wavefront->ldsChunk->write<c0>(vaddr,
1585 std::max(wavefront->ldsChunk->read<c0>(vaddr),
1586 (*e)));
1587 } else if (this->isAtomicMin()) {
1588 wavefront->ldsChunk->write<c0>(vaddr,
1589 std::min(wavefront->ldsChunk->read<c0>(vaddr),
1590 (*e)));
1591 } else if (this->isAtomicAnd()) {
1592 wavefront->ldsChunk->write<c0>(vaddr,
1593 wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1594 } else if (this->isAtomicOr()) {
1595 wavefront->ldsChunk->write<c0>(vaddr,
1596 wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1597 } else if (this->isAtomicXor()) {
1598 wavefront->ldsChunk->write<c0>(vaddr,
1599 wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1600 } else if (this->isAtomicInc()) {
1601 wavefront->ldsChunk->write<c0>(vaddr,
1602 wavefront->ldsChunk->read<c0>(vaddr) + 1);
1603 } else if (this->isAtomicDec()) {
1604 wavefront->ldsChunk->write<c0>(vaddr,
1605 wavefront->ldsChunk->read<c0>(vaddr) - 1);
1606 } else if (this->isAtomicExch()) {
1607 wavefront->ldsChunk->write<c0>(vaddr, (*e));
1608 } else if (this->isAtomicCAS()) {
1609 wavefront->ldsChunk->write<c0>(vaddr,
1610 (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1611 (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1612 } else {
1613 fatal("Unrecognized or invalid HSAIL atomic op "
1614 "type.\n");
1615 }
1616 } else {
1617 Request *req =
1618 new Request(0, vaddr, sizeof(c0), 0,
1619 gpuDynInst->computeUnit()->masterId(),
1620 0, gpuDynInst->wfDynId,
1621 gpuDynInst->makeAtomicOpFunctor<c0>(e,
1622 f));
1623
1624 gpuDynInst->setRequestFlags(req);
1625 PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1626 pkt->dataStatic(d);
1627
1628 if (gpuDynInst->computeUnit()->shader->
1629 separate_acquire_release &&
1630 (gpuDynInst->isAcquire())) {
1631 // if this atomic has acquire semantics,
1632 // schedule the continuation to perform an
1633 // acquire after the RMW completes
1634 gpuDynInst->execContinuation =
1635 &GPUStaticInst::execAtomicAcq;
1636
1637 gpuDynInst->useContinuation = true;
1638 } else {
1639 // the request will be finished when the RMW completes
1640 gpuDynInst->useContinuation = false;
1641 }
1642 // translation is performed in sendRequest()
1643 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
1644 pkt);
1645 }
1646 }
1647
1648 ++d;
1649 ++e;
1650 ++f;
1651 }
1652
1653 gpuDynInst->updateStats();
1654 }
1655
1656 // execAtomicACq will always be called through a continuation.
1657 // see comment for execContinuation in gpu_dyn_inst.hh
1658 void
1659 execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1660 {
1661 // after performing the RMW, check to see if this instruction
1662 // has acquire semantics, and if so, issue an acquire
1663 if (!this->isLocalMem()) {
1664 if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1665 && gpuDynInst->isAcquire()) {
1666 gpuDynInst->statusBitVector = VectorMask(1);
1667
1668 // the request will be finished when
1669 // the acquire completes
1670 gpuDynInst->useContinuation = false;
1671 // create request
1672 Request *req = new Request(0, 0, 0, 0,
1673 gpuDynInst->computeUnit()->masterId(),
1674 0, gpuDynInst->wfDynId);
1675 req->setFlags(Request::ACQUIRE);
1676 gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1677 }
1678 }
1679 }
1680 };
1681
1682 template<typename DataType, typename AddrOperandType, int NumSrcOperands>
1683 GPUStaticInst*
1684 constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1685 {
1686 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1687
1688 if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
1689 return decodeLd<DataType>(ib, obj);
1690 } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
1691 switch (ib->type) {
1692 case Brig::BRIG_TYPE_B8:
1693 return decodeSt<S8,S8>(ib, obj);
1694 case Brig::BRIG_TYPE_B16:
1695 return decodeSt<S16,S16>(ib, obj);
1696 case Brig::BRIG_TYPE_B32:
1697 return decodeSt<S32,S32>(ib, obj);
1698 case Brig::BRIG_TYPE_B64:
1699 return decodeSt<S64,S64>(ib, obj);
1700 default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
1701 }
1702 } else {
1703 if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
1704 return new AtomicInst<DataType, AddrOperandType,
1705 NumSrcOperands, false>(ib, obj, "atomicnoret");
1706 else
1707 return new AtomicInst<DataType, AddrOperandType,
1708 NumSrcOperands, true>(ib, obj, "atomic");
1709 }
1710 }
1711
1712 template<typename DataType, int NumSrcOperands>
1713 GPUStaticInst*
1714 decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
1715 {
1716 unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
1717 Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
1718
1719 unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
1720
1721 BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1722
1723 if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1724 return constructAtomic<DataType, NoRegAddrOperand,
1725 NumSrcOperands>(ib, obj);
1726 } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1727 // V2/V4 not allowed
1728 switch (tmp.regKind) {
1729 case Brig::BRIG_REGISTER_KIND_SINGLE:
1730 return constructAtomic<DataType, SRegAddrOperand,
1731 NumSrcOperands>(ib, obj);
1732 case Brig::BRIG_REGISTER_KIND_DOUBLE:
1733 return constructAtomic<DataType, DRegAddrOperand,
1734 NumSrcOperands>(ib, obj);
1735 default:
1736 fatal("Bad atomic register operand type %d\n", tmp.type);
1737 }
1738 } else {
1739 fatal("Bad atomic register operand kind %d\n", tmp.kind);
1740 }
1741 }
1742
1743
1744 template<typename DataType>
1745 GPUStaticInst*
1746 decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1747 {
1748 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1749
1750 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1751 return decodeAtomicHelper<DataType, 2>(ib, obj);
1752 } else {
1753 return decodeAtomicHelper<DataType, 1>(ib, obj);
1754 }
1755 }
1756
1757 template<typename DataType>
1758 GPUStaticInst*
1759 decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
1760 {
1761 const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1762 if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1763 return decodeAtomicHelper<DataType, 2>(ib, obj);
1764 } else {
1765 return decodeAtomicHelper<DataType, 1>(ib, obj);
1766 }
1767 }
1768} // namespace HsailISA
1769
1770#endif // __ARCH_HSAIL_INSTS_MEM_HH__