mem_impl.hh (11639:2e8d4bd8108d) mem_impl.hh (11645:44ca2fc730eb)
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#include "arch/hsail/generic_types.hh"
37#include "gpu-compute/hsail_code.hh"
38
39// defined in code.cc, but not worth sucking in all of code.h for this
40// at this point
41extern const char *segmentNames[];
42
43namespace HsailISA
44{
45 template<typename DestDataType, typename AddrRegOperandType>
46 void
47 LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
48 {
49 this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
50 DestDataType::label,
51 this->dest.disassemble(),
52 this->addr.disassemble());
53 }
54
55 template<typename DestDataType, typename AddrRegOperandType>
56 void
57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
58 {
59 Wavefront *w = gpuDynInst->wavefront();
60
61 typedef typename DestDataType::CType CType M5_VAR_USED;
62 const VectorMask &mask = w->getPred();
63 std::vector<Addr> addr_vec;
64 addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
65 this->addr.calcVector(w, addr_vec);
66
67 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
68 if (mask[lane]) {
69 this->dest.set(w, lane, addr_vec[lane]);
70 }
71 }
72 addr_vec.clear();
73 }
74
75 template<typename MemDataType, typename DestDataType,
76 typename AddrRegOperandType>
77 void
78 LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
79 {
80 switch (num_dest_operands) {
81 case 1:
82 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
83 segmentNames[this->segment],
84 MemDataType::label,
85 this->dest.disassemble(),
86 this->addr.disassemble());
87 break;
88 case 2:
89 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
90 segmentNames[this->segment],
91 MemDataType::label,
92 this->dest_vect[0].disassemble(),
93 this->dest_vect[1].disassemble(),
94 this->addr.disassemble());
95 break;
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#include "arch/hsail/generic_types.hh"
37#include "gpu-compute/hsail_code.hh"
38
39// defined in code.cc, but not worth sucking in all of code.h for this
40// at this point
41extern const char *segmentNames[];
42
43namespace HsailISA
44{
45 template<typename DestDataType, typename AddrRegOperandType>
46 void
47 LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
48 {
49 this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
50 DestDataType::label,
51 this->dest.disassemble(),
52 this->addr.disassemble());
53 }
54
55 template<typename DestDataType, typename AddrRegOperandType>
56 void
57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
58 {
59 Wavefront *w = gpuDynInst->wavefront();
60
61 typedef typename DestDataType::CType CType M5_VAR_USED;
62 const VectorMask &mask = w->getPred();
63 std::vector<Addr> addr_vec;
64 addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
65 this->addr.calcVector(w, addr_vec);
66
67 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
68 if (mask[lane]) {
69 this->dest.set(w, lane, addr_vec[lane]);
70 }
71 }
72 addr_vec.clear();
73 }
74
75 template<typename MemDataType, typename DestDataType,
76 typename AddrRegOperandType>
77 void
78 LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
79 {
80 switch (num_dest_operands) {
81 case 1:
82 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
83 segmentNames[this->segment],
84 MemDataType::label,
85 this->dest.disassemble(),
86 this->addr.disassemble());
87 break;
88 case 2:
89 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
90 segmentNames[this->segment],
91 MemDataType::label,
92 this->dest_vect[0].disassemble(),
93 this->dest_vect[1].disassemble(),
94 this->addr.disassemble());
95 break;
96 case 3:
97 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s), %s", this->opcode,
98 segmentNames[this->segment],
99 MemDataType::label,
100 this->dest_vect[0].disassemble(),
101 this->dest_vect[1].disassemble(),
102 this->dest_vect[2].disassemble(),
103 this->addr.disassemble());
104 break;
96 case 4:
97 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
98 this->opcode,
99 segmentNames[this->segment],
100 MemDataType::label,
101 this->dest_vect[0].disassemble(),
102 this->dest_vect[1].disassemble(),
103 this->dest_vect[2].disassemble(),
104 this->dest_vect[3].disassemble(),
105 this->addr.disassemble());
106 break;
107 default:
108 fatal("Bad ld register dest operand, num vector operands: %d \n",
109 num_dest_operands);
110 break;
111 }
112 }
113
114 static Addr
115 calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
116 {
117 // what is the size of the object we are accessing??
118 // NOTE: the compiler doesn't generate enough information
119 // to do this yet..have to just line up all the private
120 // work-item spaces back to back for now
121 /*
122 StorageElement* se =
123 i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
124 assert(se);
125
126 return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
127 se->offset * w->computeUnit->wfSize() +
128 lane * se->size;
129 */
130
131 // addressing strategy: interleave the private spaces of
132 // work-items in a wave-front on 8 byte granularity.
133 // this won't be perfect coalescing like the spill space
134 // strategy, but it's better than nothing. The spill space
135 // strategy won't work with private because the same address
136 // may be accessed by different sized loads/stores.
137
138 // Note: I'm assuming that the largest load/store to private
139 // is 8 bytes. If it is larger, the stride will have to increase
140
141 Addr addr_div8 = addr / 8;
142 Addr addr_mod8 = addr % 8;
143
144 Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
145 addr_mod8 + w->privBase;
146
147 assert(ret < w->privBase +
148 (w->privSizePerItem * w->computeUnit->wfSize()));
149
150 return ret;
151 }
152
153 template<typename MemDataType, typename DestDataType,
154 typename AddrRegOperandType>
155 void
156 LdInst<MemDataType, DestDataType,
157 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
158 {
159 Wavefront *w = gpuDynInst->wavefront();
160
161 typedef typename MemDataType::CType MemCType;
162 const VectorMask &mask = w->getPred();
163
164 // Kernarg references are handled uniquely for now (no Memory Request
165 // is used), so special-case them up front. Someday we should
166 // make this more realistic, at which we should get rid of this
167 // block and fold this case into the switch below.
168 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
169 MemCType val;
170
171 // I assume no vector ld for kernargs
172 assert(num_dest_operands == 1);
173
174 // assuming for the moment that we'll never do register
175 // offsets into kernarg space... just to make life simpler
176 uint64_t address = this->addr.calcUniform();
177
178 val = *(MemCType*)&w->kernelArgs[address];
179
180 DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
181
182 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
183 if (mask[lane]) {
184 this->dest.set(w, lane, val);
185 }
186 }
187
188 return;
189 } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
190 uint64_t address = this->addr.calcUniform();
191 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
192 if (mask[lane]) {
193 MemCType val = w->readCallArgMem<MemCType>(lane, address);
194
195 DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
196 (unsigned long long)val);
197
198 this->dest.set(w, lane, val);
199 }
200 }
201
202 return;
203 }
204
205 GPUDynInstPtr m = gpuDynInst;
206
207 this->addr.calcVector(w, m->addr);
208
209 m->m_op = Enums::MO_LD;
210 m->m_type = MemDataType::memType;
211 m->v_type = DestDataType::vgprType;
212
213 m->exec_mask = w->execMask();
214 m->statusBitVector = 0;
215 m->equiv = this->equivClass;
216 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
217
218 m->scope = getGenericMemoryScope(this->memoryScope);
219
220 if (num_dest_operands == 1) {
221 m->dst_reg = this->dest.regIndex();
222 m->n_reg = 1;
223 } else {
224 m->n_reg = num_dest_operands;
225 for (int i = 0; i < num_dest_operands; ++i) {
226 m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
227 }
228 }
229
230 m->simdId = w->simdId;
231 m->wfSlotId = w->wfSlotId;
232 m->wfDynId = w->wfDynId;
233 m->kern_id = w->kernId;
234 m->cu_id = w->computeUnit->cu_id;
235 m->latency.init(&w->computeUnit->shader->tick_cnt);
236
237 switch (this->segment) {
238 case Brig::BRIG_SEGMENT_GLOBAL:
239 m->s_type = SEG_GLOBAL;
240 m->pipeId = GLBMEM_PIPE;
241 m->latency.set(w->computeUnit->shader->ticks(1));
242
243 // this is a complete hack to get around a compiler bug
244 // (the compiler currently generates global access for private
245 // addresses (starting from 0). We need to add the private offset)
246 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
247 if (m->addr[lane] < w->privSizePerItem) {
248 if (mask[lane]) {
249 // what is the size of the object we are accessing?
250 // find base for for this wavefront
251
252 // calcPrivAddr will fail if accesses are unaligned
253 assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
254
255 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
256 this);
257
258 m->addr[lane] = privAddr;
259 }
260 }
261 }
262
263 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
264 w->outstandingReqsRdGm++;
265 w->rdGmReqsInPipe--;
266 break;
267
268 case Brig::BRIG_SEGMENT_SPILL:
269 assert(num_dest_operands == 1);
270 m->s_type = SEG_SPILL;
271 m->pipeId = GLBMEM_PIPE;
272 m->latency.set(w->computeUnit->shader->ticks(1));
273 {
274 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
275 // note: this calculation will NOT WORK if the compiler
276 // ever generates loads/stores to the same address with
277 // different widths (e.g., a ld_u32 addr and a ld_u16 addr)
278 if (mask[lane]) {
279 assert(m->addr[lane] < w->spillSizePerItem);
280
281 m->addr[lane] = m->addr[lane] * w->spillWidth +
282 lane * sizeof(MemCType) + w->spillBase;
283
284 w->lastAddr[lane] = m->addr[lane];
285 }
286 }
287 }
288
289 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
290 w->outstandingReqsRdGm++;
291 w->rdGmReqsInPipe--;
292 break;
293
294 case Brig::BRIG_SEGMENT_GROUP:
295 m->s_type = SEG_SHARED;
296 m->pipeId = LDSMEM_PIPE;
297 m->latency.set(w->computeUnit->shader->ticks(24));
298 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
299 w->outstandingReqsRdLm++;
300 w->rdLmReqsInPipe--;
301 break;
302
303 case Brig::BRIG_SEGMENT_READONLY:
304 m->s_type = SEG_READONLY;
305 m->pipeId = GLBMEM_PIPE;
306 m->latency.set(w->computeUnit->shader->ticks(1));
307
308 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
309 if (mask[lane]) {
310 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
311 m->addr[lane] += w->roBase;
312 }
313 }
314
315 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
316 w->outstandingReqsRdGm++;
317 w->rdGmReqsInPipe--;
318 break;
319
320 case Brig::BRIG_SEGMENT_PRIVATE:
321 m->s_type = SEG_PRIVATE;
322 m->pipeId = GLBMEM_PIPE;
323 m->latency.set(w->computeUnit->shader->ticks(1));
324 {
325 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
326 if (mask[lane]) {
327 assert(m->addr[lane] < w->privSizePerItem);
328
329 m->addr[lane] = m->addr[lane] +
330 lane * sizeof(MemCType) + w->privBase;
331 }
332 }
333 }
334 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
335 w->outstandingReqsRdGm++;
336 w->rdGmReqsInPipe--;
337 break;
338
339 default:
340 fatal("Load to unsupported segment %d %llxe\n", this->segment,
341 m->addr[0]);
342 }
343
344 w->outstandingReqs++;
345 w->memReqsInPipe--;
346 }
347
348 template<typename OperationType, typename SrcDataType,
349 typename AddrRegOperandType>
350 void
351 StInst<OperationType, SrcDataType,
352 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
353 {
354 Wavefront *w = gpuDynInst->wavefront();
355
356 typedef typename OperationType::CType CType;
357
358 const VectorMask &mask = w->getPred();
359
360 // arg references are handled uniquely for now (no Memory Request
361 // is used), so special-case them up front. Someday we should
362 // make this more realistic, at which we should get rid of this
363 // block and fold this case into the switch below.
364 if (this->segment == Brig::BRIG_SEGMENT_ARG) {
365 uint64_t address = this->addr.calcUniform();
366
367 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
368 if (mask[lane]) {
369 CType data = this->src.template get<CType>(w, lane);
370 DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
371 w->writeCallArgMem<CType>(lane, address, data);
372 }
373 }
374
375 return;
376 }
377
378 GPUDynInstPtr m = gpuDynInst;
379
380 m->exec_mask = w->execMask();
381
382 this->addr.calcVector(w, m->addr);
383
384 if (num_src_operands == 1) {
385 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
386 if (mask[lane]) {
387 ((CType*)m->d_data)[lane] =
388 this->src.template get<CType>(w, lane);
389 }
390 }
391 } else {
392 for (int k= 0; k < num_src_operands; ++k) {
393 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
394 if (mask[lane]) {
395 ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
396 this->src_vect[k].template get<CType>(w, lane);
397 }
398 }
399 }
400 }
401
402 m->m_op = Enums::MO_ST;
403 m->m_type = OperationType::memType;
404 m->v_type = OperationType::vgprType;
405
406 m->statusBitVector = 0;
407 m->equiv = this->equivClass;
408
409 if (num_src_operands == 1) {
410 m->n_reg = 1;
411 } else {
412 m->n_reg = num_src_operands;
413 }
414
415 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
416
417 m->scope = getGenericMemoryScope(this->memoryScope);
418
419 m->simdId = w->simdId;
420 m->wfSlotId = w->wfSlotId;
421 m->wfDynId = w->wfDynId;
422 m->kern_id = w->kernId;
423 m->cu_id = w->computeUnit->cu_id;
424 m->latency.init(&w->computeUnit->shader->tick_cnt);
425
426 switch (this->segment) {
427 case Brig::BRIG_SEGMENT_GLOBAL:
428 m->s_type = SEG_GLOBAL;
429 m->pipeId = GLBMEM_PIPE;
430 m->latency.set(w->computeUnit->shader->ticks(1));
431
432 // this is a complete hack to get around a compiler bug
433 // (the compiler currently generates global access for private
434 // addresses (starting from 0). We need to add the private offset)
435 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
436 if (mask[lane]) {
437 if (m->addr[lane] < w->privSizePerItem) {
438
439 // calcPrivAddr will fail if accesses are unaligned
440 assert(!((sizeof(CType)-1) & m->addr[lane]));
441
442 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
443 this);
444
445 m->addr[lane] = privAddr;
446 }
447 }
448 }
449
450 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
451 w->outstandingReqsWrGm++;
452 w->wrGmReqsInPipe--;
453 break;
454
455 case Brig::BRIG_SEGMENT_SPILL:
456 assert(num_src_operands == 1);
457 m->s_type = SEG_SPILL;
458 m->pipeId = GLBMEM_PIPE;
459 m->latency.set(w->computeUnit->shader->ticks(1));
460 {
461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
462 if (mask[lane]) {
463 assert(m->addr[lane] < w->spillSizePerItem);
464
465 m->addr[lane] = m->addr[lane] * w->spillWidth +
466 lane * sizeof(CType) + w->spillBase;
467 }
468 }
469 }
470
471 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
472 w->outstandingReqsWrGm++;
473 w->wrGmReqsInPipe--;
474 break;
475
476 case Brig::BRIG_SEGMENT_GROUP:
477 m->s_type = SEG_SHARED;
478 m->pipeId = LDSMEM_PIPE;
479 m->latency.set(w->computeUnit->shader->ticks(24));
480 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
481 w->outstandingReqsWrLm++;
482 w->wrLmReqsInPipe--;
483 break;
484
485 case Brig::BRIG_SEGMENT_PRIVATE:
486 m->s_type = SEG_PRIVATE;
487 m->pipeId = GLBMEM_PIPE;
488 m->latency.set(w->computeUnit->shader->ticks(1));
489 {
490 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
491 if (mask[lane]) {
492 assert(m->addr[lane] < w->privSizePerItem);
493 m->addr[lane] = m->addr[lane] + lane *
494 sizeof(CType)+w->privBase;
495 }
496 }
497 }
498
499 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
500 w->outstandingReqsWrGm++;
501 w->wrGmReqsInPipe--;
502 break;
503
504 default:
505 fatal("Store to unsupported segment %d\n", this->segment);
506 }
507
508 w->outstandingReqs++;
509 w->memReqsInPipe--;
510 }
511
512 template<typename OperationType, typename SrcDataType,
513 typename AddrRegOperandType>
514 void
515 StInst<OperationType, SrcDataType,
516 AddrRegOperandType>::generateDisassembly()
517 {
518 switch (num_src_operands) {
519 case 1:
520 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
521 segmentNames[this->segment],
522 OperationType::label,
523 this->src.disassemble(),
524 this->addr.disassemble());
525 break;
526 case 2:
527 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
528 segmentNames[this->segment],
529 OperationType::label,
530 this->src_vect[0].disassemble(),
531 this->src_vect[1].disassemble(),
532 this->addr.disassemble());
533 break;
534 case 4:
535 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
536 this->opcode,
537 segmentNames[this->segment],
538 OperationType::label,
539 this->src_vect[0].disassemble(),
540 this->src_vect[1].disassemble(),
541 this->src_vect[2].disassemble(),
542 this->src_vect[3].disassemble(),
543 this->addr.disassemble());
544 break;
545 default: fatal("Bad ld register src operand, num vector operands: "
546 "%d \n", num_src_operands);
547 break;
548 }
549 }
550
551 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
552 bool HasDst>
553 void
554 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
555 HasDst>::execute(GPUDynInstPtr gpuDynInst)
556 {
557 typedef typename DataType::CType CType;
558
559 Wavefront *w = gpuDynInst->wavefront();
560
561 GPUDynInstPtr m = gpuDynInst;
562
563 this->addr.calcVector(w, m->addr);
564
565 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
566 ((CType *)m->a_data)[lane] =
567 this->src[0].template get<CType>(w, lane);
568 }
569
570 // load second source operand for CAS
571 if (NumSrcOperands > 1) {
572 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
573 ((CType*)m->x_data)[lane] =
574 this->src[1].template get<CType>(w, lane);
575 }
576 }
577
578 assert(NumSrcOperands <= 2);
579
580 m->m_op = this->opType;
581 m->m_type = DataType::memType;
582 m->v_type = DataType::vgprType;
583
584 m->exec_mask = w->execMask();
585 m->statusBitVector = 0;
586 m->equiv = 0; // atomics don't have an equivalence class operand
587 m->n_reg = 1;
588 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
589
590 m->scope = getGenericMemoryScope(this->memoryScope);
591
592 if (HasDst) {
593 m->dst_reg = this->dest.regIndex();
594 }
595
596 m->simdId = w->simdId;
597 m->wfSlotId = w->wfSlotId;
598 m->wfDynId = w->wfDynId;
599 m->kern_id = w->kernId;
600 m->cu_id = w->computeUnit->cu_id;
601 m->latency.init(&w->computeUnit->shader->tick_cnt);
602
603 switch (this->segment) {
604 case Brig::BRIG_SEGMENT_GLOBAL:
605 m->s_type = SEG_GLOBAL;
606 m->latency.set(w->computeUnit->shader->ticks(64));
607 m->pipeId = GLBMEM_PIPE;
608
609 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
610 w->outstandingReqsWrGm++;
611 w->wrGmReqsInPipe--;
612 w->outstandingReqsRdGm++;
613 w->rdGmReqsInPipe--;
614 break;
615
616 case Brig::BRIG_SEGMENT_GROUP:
617 m->s_type = SEG_SHARED;
618 m->pipeId = LDSMEM_PIPE;
619 m->latency.set(w->computeUnit->shader->ticks(24));
620 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
621 w->outstandingReqsWrLm++;
622 w->wrLmReqsInPipe--;
623 w->outstandingReqsRdLm++;
624 w->rdLmReqsInPipe--;
625 break;
626
627 default:
628 fatal("Atomic op to unsupported segment %d\n",
629 this->segment);
630 }
631
632 w->outstandingReqs++;
633 w->memReqsInPipe--;
634 }
635
636 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
637
638 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
639 bool HasDst>
640 void
641 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
642 HasDst>::generateDisassembly()
643 {
644 if (HasDst) {
645 this->disassembly =
646 csprintf("%s_%s_%s_%s %s,%s", this->opcode,
647 atomicOpToString(this->atomicOperation),
648 segmentNames[this->segment],
649 DataType::label, this->dest.disassemble(),
650 this->addr.disassemble());
651 } else {
652 this->disassembly =
653 csprintf("%s_%s_%s_%s %s", this->opcode,
654 atomicOpToString(this->atomicOperation),
655 segmentNames[this->segment],
656 DataType::label, this->addr.disassemble());
657 }
658
659 for (int i = 0; i < NumSrcOperands; ++i) {
660 this->disassembly += ",";
661 this->disassembly += this->src[i].disassemble();
662 }
663 }
664} // namespace HsailISA
105 case 4:
106 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
107 this->opcode,
108 segmentNames[this->segment],
109 MemDataType::label,
110 this->dest_vect[0].disassemble(),
111 this->dest_vect[1].disassemble(),
112 this->dest_vect[2].disassemble(),
113 this->dest_vect[3].disassemble(),
114 this->addr.disassemble());
115 break;
116 default:
117 fatal("Bad ld register dest operand, num vector operands: %d \n",
118 num_dest_operands);
119 break;
120 }
121 }
122
123 static Addr
124 calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
125 {
126 // what is the size of the object we are accessing??
127 // NOTE: the compiler doesn't generate enough information
128 // to do this yet..have to just line up all the private
129 // work-item spaces back to back for now
130 /*
131 StorageElement* se =
132 i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
133 assert(se);
134
135 return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
136 se->offset * w->computeUnit->wfSize() +
137 lane * se->size;
138 */
139
140 // addressing strategy: interleave the private spaces of
141 // work-items in a wave-front on 8 byte granularity.
142 // this won't be perfect coalescing like the spill space
143 // strategy, but it's better than nothing. The spill space
144 // strategy won't work with private because the same address
145 // may be accessed by different sized loads/stores.
146
147 // Note: I'm assuming that the largest load/store to private
148 // is 8 bytes. If it is larger, the stride will have to increase
149
150 Addr addr_div8 = addr / 8;
151 Addr addr_mod8 = addr % 8;
152
153 Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
154 addr_mod8 + w->privBase;
155
156 assert(ret < w->privBase +
157 (w->privSizePerItem * w->computeUnit->wfSize()));
158
159 return ret;
160 }
161
162 template<typename MemDataType, typename DestDataType,
163 typename AddrRegOperandType>
164 void
165 LdInst<MemDataType, DestDataType,
166 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
167 {
168 Wavefront *w = gpuDynInst->wavefront();
169
170 typedef typename MemDataType::CType MemCType;
171 const VectorMask &mask = w->getPred();
172
173 // Kernarg references are handled uniquely for now (no Memory Request
174 // is used), so special-case them up front. Someday we should
175 // make this more realistic, at which we should get rid of this
176 // block and fold this case into the switch below.
177 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
178 MemCType val;
179
180 // I assume no vector ld for kernargs
181 assert(num_dest_operands == 1);
182
183 // assuming for the moment that we'll never do register
184 // offsets into kernarg space... just to make life simpler
185 uint64_t address = this->addr.calcUniform();
186
187 val = *(MemCType*)&w->kernelArgs[address];
188
189 DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
190
191 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
192 if (mask[lane]) {
193 this->dest.set(w, lane, val);
194 }
195 }
196
197 return;
198 } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
199 uint64_t address = this->addr.calcUniform();
200 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
201 if (mask[lane]) {
202 MemCType val = w->readCallArgMem<MemCType>(lane, address);
203
204 DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
205 (unsigned long long)val);
206
207 this->dest.set(w, lane, val);
208 }
209 }
210
211 return;
212 }
213
214 GPUDynInstPtr m = gpuDynInst;
215
216 this->addr.calcVector(w, m->addr);
217
218 m->m_op = Enums::MO_LD;
219 m->m_type = MemDataType::memType;
220 m->v_type = DestDataType::vgprType;
221
222 m->exec_mask = w->execMask();
223 m->statusBitVector = 0;
224 m->equiv = this->equivClass;
225 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
226
227 m->scope = getGenericMemoryScope(this->memoryScope);
228
229 if (num_dest_operands == 1) {
230 m->dst_reg = this->dest.regIndex();
231 m->n_reg = 1;
232 } else {
233 m->n_reg = num_dest_operands;
234 for (int i = 0; i < num_dest_operands; ++i) {
235 m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
236 }
237 }
238
239 m->simdId = w->simdId;
240 m->wfSlotId = w->wfSlotId;
241 m->wfDynId = w->wfDynId;
242 m->kern_id = w->kernId;
243 m->cu_id = w->computeUnit->cu_id;
244 m->latency.init(&w->computeUnit->shader->tick_cnt);
245
246 switch (this->segment) {
247 case Brig::BRIG_SEGMENT_GLOBAL:
248 m->s_type = SEG_GLOBAL;
249 m->pipeId = GLBMEM_PIPE;
250 m->latency.set(w->computeUnit->shader->ticks(1));
251
252 // this is a complete hack to get around a compiler bug
253 // (the compiler currently generates global access for private
254 // addresses (starting from 0). We need to add the private offset)
255 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
256 if (m->addr[lane] < w->privSizePerItem) {
257 if (mask[lane]) {
258 // what is the size of the object we are accessing?
259 // find base for for this wavefront
260
261 // calcPrivAddr will fail if accesses are unaligned
262 assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
263
264 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
265 this);
266
267 m->addr[lane] = privAddr;
268 }
269 }
270 }
271
272 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
273 w->outstandingReqsRdGm++;
274 w->rdGmReqsInPipe--;
275 break;
276
277 case Brig::BRIG_SEGMENT_SPILL:
278 assert(num_dest_operands == 1);
279 m->s_type = SEG_SPILL;
280 m->pipeId = GLBMEM_PIPE;
281 m->latency.set(w->computeUnit->shader->ticks(1));
282 {
283 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
284 // note: this calculation will NOT WORK if the compiler
285 // ever generates loads/stores to the same address with
286 // different widths (e.g., a ld_u32 addr and a ld_u16 addr)
287 if (mask[lane]) {
288 assert(m->addr[lane] < w->spillSizePerItem);
289
290 m->addr[lane] = m->addr[lane] * w->spillWidth +
291 lane * sizeof(MemCType) + w->spillBase;
292
293 w->lastAddr[lane] = m->addr[lane];
294 }
295 }
296 }
297
298 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
299 w->outstandingReqsRdGm++;
300 w->rdGmReqsInPipe--;
301 break;
302
303 case Brig::BRIG_SEGMENT_GROUP:
304 m->s_type = SEG_SHARED;
305 m->pipeId = LDSMEM_PIPE;
306 m->latency.set(w->computeUnit->shader->ticks(24));
307 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
308 w->outstandingReqsRdLm++;
309 w->rdLmReqsInPipe--;
310 break;
311
312 case Brig::BRIG_SEGMENT_READONLY:
313 m->s_type = SEG_READONLY;
314 m->pipeId = GLBMEM_PIPE;
315 m->latency.set(w->computeUnit->shader->ticks(1));
316
317 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
318 if (mask[lane]) {
319 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
320 m->addr[lane] += w->roBase;
321 }
322 }
323
324 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
325 w->outstandingReqsRdGm++;
326 w->rdGmReqsInPipe--;
327 break;
328
329 case Brig::BRIG_SEGMENT_PRIVATE:
330 m->s_type = SEG_PRIVATE;
331 m->pipeId = GLBMEM_PIPE;
332 m->latency.set(w->computeUnit->shader->ticks(1));
333 {
334 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
335 if (mask[lane]) {
336 assert(m->addr[lane] < w->privSizePerItem);
337
338 m->addr[lane] = m->addr[lane] +
339 lane * sizeof(MemCType) + w->privBase;
340 }
341 }
342 }
343 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
344 w->outstandingReqsRdGm++;
345 w->rdGmReqsInPipe--;
346 break;
347
348 default:
349 fatal("Load to unsupported segment %d %llxe\n", this->segment,
350 m->addr[0]);
351 }
352
353 w->outstandingReqs++;
354 w->memReqsInPipe--;
355 }
356
357 template<typename OperationType, typename SrcDataType,
358 typename AddrRegOperandType>
359 void
360 StInst<OperationType, SrcDataType,
361 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
362 {
363 Wavefront *w = gpuDynInst->wavefront();
364
365 typedef typename OperationType::CType CType;
366
367 const VectorMask &mask = w->getPred();
368
369 // arg references are handled uniquely for now (no Memory Request
370 // is used), so special-case them up front. Someday we should
371 // make this more realistic, at which we should get rid of this
372 // block and fold this case into the switch below.
373 if (this->segment == Brig::BRIG_SEGMENT_ARG) {
374 uint64_t address = this->addr.calcUniform();
375
376 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
377 if (mask[lane]) {
378 CType data = this->src.template get<CType>(w, lane);
379 DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
380 w->writeCallArgMem<CType>(lane, address, data);
381 }
382 }
383
384 return;
385 }
386
387 GPUDynInstPtr m = gpuDynInst;
388
389 m->exec_mask = w->execMask();
390
391 this->addr.calcVector(w, m->addr);
392
393 if (num_src_operands == 1) {
394 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
395 if (mask[lane]) {
396 ((CType*)m->d_data)[lane] =
397 this->src.template get<CType>(w, lane);
398 }
399 }
400 } else {
401 for (int k= 0; k < num_src_operands; ++k) {
402 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
403 if (mask[lane]) {
404 ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
405 this->src_vect[k].template get<CType>(w, lane);
406 }
407 }
408 }
409 }
410
411 m->m_op = Enums::MO_ST;
412 m->m_type = OperationType::memType;
413 m->v_type = OperationType::vgprType;
414
415 m->statusBitVector = 0;
416 m->equiv = this->equivClass;
417
418 if (num_src_operands == 1) {
419 m->n_reg = 1;
420 } else {
421 m->n_reg = num_src_operands;
422 }
423
424 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
425
426 m->scope = getGenericMemoryScope(this->memoryScope);
427
428 m->simdId = w->simdId;
429 m->wfSlotId = w->wfSlotId;
430 m->wfDynId = w->wfDynId;
431 m->kern_id = w->kernId;
432 m->cu_id = w->computeUnit->cu_id;
433 m->latency.init(&w->computeUnit->shader->tick_cnt);
434
435 switch (this->segment) {
436 case Brig::BRIG_SEGMENT_GLOBAL:
437 m->s_type = SEG_GLOBAL;
438 m->pipeId = GLBMEM_PIPE;
439 m->latency.set(w->computeUnit->shader->ticks(1));
440
441 // this is a complete hack to get around a compiler bug
442 // (the compiler currently generates global access for private
443 // addresses (starting from 0). We need to add the private offset)
444 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
445 if (mask[lane]) {
446 if (m->addr[lane] < w->privSizePerItem) {
447
448 // calcPrivAddr will fail if accesses are unaligned
449 assert(!((sizeof(CType)-1) & m->addr[lane]));
450
451 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
452 this);
453
454 m->addr[lane] = privAddr;
455 }
456 }
457 }
458
459 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
460 w->outstandingReqsWrGm++;
461 w->wrGmReqsInPipe--;
462 break;
463
464 case Brig::BRIG_SEGMENT_SPILL:
465 assert(num_src_operands == 1);
466 m->s_type = SEG_SPILL;
467 m->pipeId = GLBMEM_PIPE;
468 m->latency.set(w->computeUnit->shader->ticks(1));
469 {
470 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
471 if (mask[lane]) {
472 assert(m->addr[lane] < w->spillSizePerItem);
473
474 m->addr[lane] = m->addr[lane] * w->spillWidth +
475 lane * sizeof(CType) + w->spillBase;
476 }
477 }
478 }
479
480 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
481 w->outstandingReqsWrGm++;
482 w->wrGmReqsInPipe--;
483 break;
484
485 case Brig::BRIG_SEGMENT_GROUP:
486 m->s_type = SEG_SHARED;
487 m->pipeId = LDSMEM_PIPE;
488 m->latency.set(w->computeUnit->shader->ticks(24));
489 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
490 w->outstandingReqsWrLm++;
491 w->wrLmReqsInPipe--;
492 break;
493
494 case Brig::BRIG_SEGMENT_PRIVATE:
495 m->s_type = SEG_PRIVATE;
496 m->pipeId = GLBMEM_PIPE;
497 m->latency.set(w->computeUnit->shader->ticks(1));
498 {
499 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
500 if (mask[lane]) {
501 assert(m->addr[lane] < w->privSizePerItem);
502 m->addr[lane] = m->addr[lane] + lane *
503 sizeof(CType)+w->privBase;
504 }
505 }
506 }
507
508 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
509 w->outstandingReqsWrGm++;
510 w->wrGmReqsInPipe--;
511 break;
512
513 default:
514 fatal("Store to unsupported segment %d\n", this->segment);
515 }
516
517 w->outstandingReqs++;
518 w->memReqsInPipe--;
519 }
520
521 template<typename OperationType, typename SrcDataType,
522 typename AddrRegOperandType>
523 void
524 StInst<OperationType, SrcDataType,
525 AddrRegOperandType>::generateDisassembly()
526 {
527 switch (num_src_operands) {
528 case 1:
529 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
530 segmentNames[this->segment],
531 OperationType::label,
532 this->src.disassemble(),
533 this->addr.disassemble());
534 break;
535 case 2:
536 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
537 segmentNames[this->segment],
538 OperationType::label,
539 this->src_vect[0].disassemble(),
540 this->src_vect[1].disassemble(),
541 this->addr.disassemble());
542 break;
543 case 4:
544 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
545 this->opcode,
546 segmentNames[this->segment],
547 OperationType::label,
548 this->src_vect[0].disassemble(),
549 this->src_vect[1].disassemble(),
550 this->src_vect[2].disassemble(),
551 this->src_vect[3].disassemble(),
552 this->addr.disassemble());
553 break;
554 default: fatal("Bad ld register src operand, num vector operands: "
555 "%d \n", num_src_operands);
556 break;
557 }
558 }
559
560 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
561 bool HasDst>
562 void
563 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
564 HasDst>::execute(GPUDynInstPtr gpuDynInst)
565 {
566 typedef typename DataType::CType CType;
567
568 Wavefront *w = gpuDynInst->wavefront();
569
570 GPUDynInstPtr m = gpuDynInst;
571
572 this->addr.calcVector(w, m->addr);
573
574 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
575 ((CType *)m->a_data)[lane] =
576 this->src[0].template get<CType>(w, lane);
577 }
578
579 // load second source operand for CAS
580 if (NumSrcOperands > 1) {
581 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
582 ((CType*)m->x_data)[lane] =
583 this->src[1].template get<CType>(w, lane);
584 }
585 }
586
587 assert(NumSrcOperands <= 2);
588
589 m->m_op = this->opType;
590 m->m_type = DataType::memType;
591 m->v_type = DataType::vgprType;
592
593 m->exec_mask = w->execMask();
594 m->statusBitVector = 0;
595 m->equiv = 0; // atomics don't have an equivalence class operand
596 m->n_reg = 1;
597 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
598
599 m->scope = getGenericMemoryScope(this->memoryScope);
600
601 if (HasDst) {
602 m->dst_reg = this->dest.regIndex();
603 }
604
605 m->simdId = w->simdId;
606 m->wfSlotId = w->wfSlotId;
607 m->wfDynId = w->wfDynId;
608 m->kern_id = w->kernId;
609 m->cu_id = w->computeUnit->cu_id;
610 m->latency.init(&w->computeUnit->shader->tick_cnt);
611
612 switch (this->segment) {
613 case Brig::BRIG_SEGMENT_GLOBAL:
614 m->s_type = SEG_GLOBAL;
615 m->latency.set(w->computeUnit->shader->ticks(64));
616 m->pipeId = GLBMEM_PIPE;
617
618 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
619 w->outstandingReqsWrGm++;
620 w->wrGmReqsInPipe--;
621 w->outstandingReqsRdGm++;
622 w->rdGmReqsInPipe--;
623 break;
624
625 case Brig::BRIG_SEGMENT_GROUP:
626 m->s_type = SEG_SHARED;
627 m->pipeId = LDSMEM_PIPE;
628 m->latency.set(w->computeUnit->shader->ticks(24));
629 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
630 w->outstandingReqsWrLm++;
631 w->wrLmReqsInPipe--;
632 w->outstandingReqsRdLm++;
633 w->rdLmReqsInPipe--;
634 break;
635
636 default:
637 fatal("Atomic op to unsupported segment %d\n",
638 this->segment);
639 }
640
641 w->outstandingReqs++;
642 w->memReqsInPipe--;
643 }
644
645 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
646
647 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
648 bool HasDst>
649 void
650 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
651 HasDst>::generateDisassembly()
652 {
653 if (HasDst) {
654 this->disassembly =
655 csprintf("%s_%s_%s_%s %s,%s", this->opcode,
656 atomicOpToString(this->atomicOperation),
657 segmentNames[this->segment],
658 DataType::label, this->dest.disassemble(),
659 this->addr.disassemble());
660 } else {
661 this->disassembly =
662 csprintf("%s_%s_%s_%s %s", this->opcode,
663 atomicOpToString(this->atomicOperation),
664 segmentNames[this->segment],
665 DataType::label, this->addr.disassemble());
666 }
667
668 for (int i = 0; i < NumSrcOperands; ++i) {
669 this->disassembly += ",";
670 this->disassembly += this->src[i].disassemble();
671 }
672 }
673} // namespace HsailISA