Deleted Added
sdiff udiff text old ( 11639:2e8d4bd8108d ) new ( 11645:44ca2fc730eb )
full compact
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#include "arch/hsail/generic_types.hh"
37#include "gpu-compute/hsail_code.hh"
38
39// defined in code.cc, but not worth sucking in all of code.h for this
40// at this point
41extern const char *segmentNames[];
42
43namespace HsailISA
44{
45 template<typename DestDataType, typename AddrRegOperandType>
46 void
47 LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
48 {
49 this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
50 DestDataType::label,
51 this->dest.disassemble(),
52 this->addr.disassemble());
53 }
54
55 template<typename DestDataType, typename AddrRegOperandType>
56 void
57 LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
58 {
59 Wavefront *w = gpuDynInst->wavefront();
60
61 typedef typename DestDataType::CType CType M5_VAR_USED;
62 const VectorMask &mask = w->getPred();
63 std::vector<Addr> addr_vec;
64 addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
65 this->addr.calcVector(w, addr_vec);
66
67 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
68 if (mask[lane]) {
69 this->dest.set(w, lane, addr_vec[lane]);
70 }
71 }
72 addr_vec.clear();
73 }
74
75 template<typename MemDataType, typename DestDataType,
76 typename AddrRegOperandType>
77 void
78 LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
79 {
80 switch (num_dest_operands) {
81 case 1:
82 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
83 segmentNames[this->segment],
84 MemDataType::label,
85 this->dest.disassemble(),
86 this->addr.disassemble());
87 break;
88 case 2:
89 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
90 segmentNames[this->segment],
91 MemDataType::label,
92 this->dest_vect[0].disassemble(),
93 this->dest_vect[1].disassemble(),
94 this->addr.disassemble());
95 break;
96 case 4:
97 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
98 this->opcode,
99 segmentNames[this->segment],
100 MemDataType::label,
101 this->dest_vect[0].disassemble(),
102 this->dest_vect[1].disassemble(),
103 this->dest_vect[2].disassemble(),
104 this->dest_vect[3].disassemble(),
105 this->addr.disassemble());
106 break;
107 default:
108 fatal("Bad ld register dest operand, num vector operands: %d \n",
109 num_dest_operands);
110 break;
111 }
112 }
113
114 static Addr
115 calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
116 {
117 // what is the size of the object we are accessing??
118 // NOTE: the compiler doesn't generate enough information
119 // to do this yet..have to just line up all the private
120 // work-item spaces back to back for now
121 /*
122 StorageElement* se =
123 i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
124 assert(se);
125
126 return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
127 se->offset * w->computeUnit->wfSize() +
128 lane * se->size;
129 */
130
131 // addressing strategy: interleave the private spaces of
132 // work-items in a wave-front on 8 byte granularity.
133 // this won't be perfect coalescing like the spill space
134 // strategy, but it's better than nothing. The spill space
135 // strategy won't work with private because the same address
136 // may be accessed by different sized loads/stores.
137
138 // Note: I'm assuming that the largest load/store to private
139 // is 8 bytes. If it is larger, the stride will have to increase
140
141 Addr addr_div8 = addr / 8;
142 Addr addr_mod8 = addr % 8;
143
144 Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
145 addr_mod8 + w->privBase;
146
147 assert(ret < w->privBase +
148 (w->privSizePerItem * w->computeUnit->wfSize()));
149
150 return ret;
151 }
152
153 template<typename MemDataType, typename DestDataType,
154 typename AddrRegOperandType>
155 void
156 LdInst<MemDataType, DestDataType,
157 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
158 {
159 Wavefront *w = gpuDynInst->wavefront();
160
161 typedef typename MemDataType::CType MemCType;
162 const VectorMask &mask = w->getPred();
163
164 // Kernarg references are handled uniquely for now (no Memory Request
165 // is used), so special-case them up front. Someday we should
166 // make this more realistic, at which we should get rid of this
167 // block and fold this case into the switch below.
168 if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
169 MemCType val;
170
171 // I assume no vector ld for kernargs
172 assert(num_dest_operands == 1);
173
174 // assuming for the moment that we'll never do register
175 // offsets into kernarg space... just to make life simpler
176 uint64_t address = this->addr.calcUniform();
177
178 val = *(MemCType*)&w->kernelArgs[address];
179
180 DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
181
182 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
183 if (mask[lane]) {
184 this->dest.set(w, lane, val);
185 }
186 }
187
188 return;
189 } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
190 uint64_t address = this->addr.calcUniform();
191 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
192 if (mask[lane]) {
193 MemCType val = w->readCallArgMem<MemCType>(lane, address);
194
195 DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
196 (unsigned long long)val);
197
198 this->dest.set(w, lane, val);
199 }
200 }
201
202 return;
203 }
204
205 GPUDynInstPtr m = gpuDynInst;
206
207 this->addr.calcVector(w, m->addr);
208
209 m->m_op = Enums::MO_LD;
210 m->m_type = MemDataType::memType;
211 m->v_type = DestDataType::vgprType;
212
213 m->exec_mask = w->execMask();
214 m->statusBitVector = 0;
215 m->equiv = this->equivClass;
216 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
217
218 m->scope = getGenericMemoryScope(this->memoryScope);
219
220 if (num_dest_operands == 1) {
221 m->dst_reg = this->dest.regIndex();
222 m->n_reg = 1;
223 } else {
224 m->n_reg = num_dest_operands;
225 for (int i = 0; i < num_dest_operands; ++i) {
226 m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
227 }
228 }
229
230 m->simdId = w->simdId;
231 m->wfSlotId = w->wfSlotId;
232 m->wfDynId = w->wfDynId;
233 m->kern_id = w->kernId;
234 m->cu_id = w->computeUnit->cu_id;
235 m->latency.init(&w->computeUnit->shader->tick_cnt);
236
237 switch (this->segment) {
238 case Brig::BRIG_SEGMENT_GLOBAL:
239 m->s_type = SEG_GLOBAL;
240 m->pipeId = GLBMEM_PIPE;
241 m->latency.set(w->computeUnit->shader->ticks(1));
242
243 // this is a complete hack to get around a compiler bug
244 // (the compiler currently generates global access for private
245 // addresses (starting from 0). We need to add the private offset)
246 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
247 if (m->addr[lane] < w->privSizePerItem) {
248 if (mask[lane]) {
249 // what is the size of the object we are accessing?
250 // find base for for this wavefront
251
252 // calcPrivAddr will fail if accesses are unaligned
253 assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
254
255 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
256 this);
257
258 m->addr[lane] = privAddr;
259 }
260 }
261 }
262
263 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
264 w->outstandingReqsRdGm++;
265 w->rdGmReqsInPipe--;
266 break;
267
268 case Brig::BRIG_SEGMENT_SPILL:
269 assert(num_dest_operands == 1);
270 m->s_type = SEG_SPILL;
271 m->pipeId = GLBMEM_PIPE;
272 m->latency.set(w->computeUnit->shader->ticks(1));
273 {
274 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
275 // note: this calculation will NOT WORK if the compiler
276 // ever generates loads/stores to the same address with
277 // different widths (e.g., a ld_u32 addr and a ld_u16 addr)
278 if (mask[lane]) {
279 assert(m->addr[lane] < w->spillSizePerItem);
280
281 m->addr[lane] = m->addr[lane] * w->spillWidth +
282 lane * sizeof(MemCType) + w->spillBase;
283
284 w->lastAddr[lane] = m->addr[lane];
285 }
286 }
287 }
288
289 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
290 w->outstandingReqsRdGm++;
291 w->rdGmReqsInPipe--;
292 break;
293
294 case Brig::BRIG_SEGMENT_GROUP:
295 m->s_type = SEG_SHARED;
296 m->pipeId = LDSMEM_PIPE;
297 m->latency.set(w->computeUnit->shader->ticks(24));
298 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
299 w->outstandingReqsRdLm++;
300 w->rdLmReqsInPipe--;
301 break;
302
303 case Brig::BRIG_SEGMENT_READONLY:
304 m->s_type = SEG_READONLY;
305 m->pipeId = GLBMEM_PIPE;
306 m->latency.set(w->computeUnit->shader->ticks(1));
307
308 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
309 if (mask[lane]) {
310 assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
311 m->addr[lane] += w->roBase;
312 }
313 }
314
315 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
316 w->outstandingReqsRdGm++;
317 w->rdGmReqsInPipe--;
318 break;
319
320 case Brig::BRIG_SEGMENT_PRIVATE:
321 m->s_type = SEG_PRIVATE;
322 m->pipeId = GLBMEM_PIPE;
323 m->latency.set(w->computeUnit->shader->ticks(1));
324 {
325 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
326 if (mask[lane]) {
327 assert(m->addr[lane] < w->privSizePerItem);
328
329 m->addr[lane] = m->addr[lane] +
330 lane * sizeof(MemCType) + w->privBase;
331 }
332 }
333 }
334 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
335 w->outstandingReqsRdGm++;
336 w->rdGmReqsInPipe--;
337 break;
338
339 default:
340 fatal("Load to unsupported segment %d %llxe\n", this->segment,
341 m->addr[0]);
342 }
343
344 w->outstandingReqs++;
345 w->memReqsInPipe--;
346 }
347
348 template<typename OperationType, typename SrcDataType,
349 typename AddrRegOperandType>
350 void
351 StInst<OperationType, SrcDataType,
352 AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
353 {
354 Wavefront *w = gpuDynInst->wavefront();
355
356 typedef typename OperationType::CType CType;
357
358 const VectorMask &mask = w->getPred();
359
360 // arg references are handled uniquely for now (no Memory Request
361 // is used), so special-case them up front. Someday we should
362 // make this more realistic, at which we should get rid of this
363 // block and fold this case into the switch below.
364 if (this->segment == Brig::BRIG_SEGMENT_ARG) {
365 uint64_t address = this->addr.calcUniform();
366
367 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
368 if (mask[lane]) {
369 CType data = this->src.template get<CType>(w, lane);
370 DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
371 w->writeCallArgMem<CType>(lane, address, data);
372 }
373 }
374
375 return;
376 }
377
378 GPUDynInstPtr m = gpuDynInst;
379
380 m->exec_mask = w->execMask();
381
382 this->addr.calcVector(w, m->addr);
383
384 if (num_src_operands == 1) {
385 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
386 if (mask[lane]) {
387 ((CType*)m->d_data)[lane] =
388 this->src.template get<CType>(w, lane);
389 }
390 }
391 } else {
392 for (int k= 0; k < num_src_operands; ++k) {
393 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
394 if (mask[lane]) {
395 ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
396 this->src_vect[k].template get<CType>(w, lane);
397 }
398 }
399 }
400 }
401
402 m->m_op = Enums::MO_ST;
403 m->m_type = OperationType::memType;
404 m->v_type = OperationType::vgprType;
405
406 m->statusBitVector = 0;
407 m->equiv = this->equivClass;
408
409 if (num_src_operands == 1) {
410 m->n_reg = 1;
411 } else {
412 m->n_reg = num_src_operands;
413 }
414
415 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
416
417 m->scope = getGenericMemoryScope(this->memoryScope);
418
419 m->simdId = w->simdId;
420 m->wfSlotId = w->wfSlotId;
421 m->wfDynId = w->wfDynId;
422 m->kern_id = w->kernId;
423 m->cu_id = w->computeUnit->cu_id;
424 m->latency.init(&w->computeUnit->shader->tick_cnt);
425
426 switch (this->segment) {
427 case Brig::BRIG_SEGMENT_GLOBAL:
428 m->s_type = SEG_GLOBAL;
429 m->pipeId = GLBMEM_PIPE;
430 m->latency.set(w->computeUnit->shader->ticks(1));
431
432 // this is a complete hack to get around a compiler bug
433 // (the compiler currently generates global access for private
434 // addresses (starting from 0). We need to add the private offset)
435 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
436 if (mask[lane]) {
437 if (m->addr[lane] < w->privSizePerItem) {
438
439 // calcPrivAddr will fail if accesses are unaligned
440 assert(!((sizeof(CType)-1) & m->addr[lane]));
441
442 Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
443 this);
444
445 m->addr[lane] = privAddr;
446 }
447 }
448 }
449
450 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
451 w->outstandingReqsWrGm++;
452 w->wrGmReqsInPipe--;
453 break;
454
455 case Brig::BRIG_SEGMENT_SPILL:
456 assert(num_src_operands == 1);
457 m->s_type = SEG_SPILL;
458 m->pipeId = GLBMEM_PIPE;
459 m->latency.set(w->computeUnit->shader->ticks(1));
460 {
461 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
462 if (mask[lane]) {
463 assert(m->addr[lane] < w->spillSizePerItem);
464
465 m->addr[lane] = m->addr[lane] * w->spillWidth +
466 lane * sizeof(CType) + w->spillBase;
467 }
468 }
469 }
470
471 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
472 w->outstandingReqsWrGm++;
473 w->wrGmReqsInPipe--;
474 break;
475
476 case Brig::BRIG_SEGMENT_GROUP:
477 m->s_type = SEG_SHARED;
478 m->pipeId = LDSMEM_PIPE;
479 m->latency.set(w->computeUnit->shader->ticks(24));
480 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
481 w->outstandingReqsWrLm++;
482 w->wrLmReqsInPipe--;
483 break;
484
485 case Brig::BRIG_SEGMENT_PRIVATE:
486 m->s_type = SEG_PRIVATE;
487 m->pipeId = GLBMEM_PIPE;
488 m->latency.set(w->computeUnit->shader->ticks(1));
489 {
490 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
491 if (mask[lane]) {
492 assert(m->addr[lane] < w->privSizePerItem);
493 m->addr[lane] = m->addr[lane] + lane *
494 sizeof(CType)+w->privBase;
495 }
496 }
497 }
498
499 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
500 w->outstandingReqsWrGm++;
501 w->wrGmReqsInPipe--;
502 break;
503
504 default:
505 fatal("Store to unsupported segment %d\n", this->segment);
506 }
507
508 w->outstandingReqs++;
509 w->memReqsInPipe--;
510 }
511
512 template<typename OperationType, typename SrcDataType,
513 typename AddrRegOperandType>
514 void
515 StInst<OperationType, SrcDataType,
516 AddrRegOperandType>::generateDisassembly()
517 {
518 switch (num_src_operands) {
519 case 1:
520 this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
521 segmentNames[this->segment],
522 OperationType::label,
523 this->src.disassemble(),
524 this->addr.disassemble());
525 break;
526 case 2:
527 this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
528 segmentNames[this->segment],
529 OperationType::label,
530 this->src_vect[0].disassemble(),
531 this->src_vect[1].disassemble(),
532 this->addr.disassemble());
533 break;
534 case 4:
535 this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
536 this->opcode,
537 segmentNames[this->segment],
538 OperationType::label,
539 this->src_vect[0].disassemble(),
540 this->src_vect[1].disassemble(),
541 this->src_vect[2].disassemble(),
542 this->src_vect[3].disassemble(),
543 this->addr.disassemble());
544 break;
545 default: fatal("Bad ld register src operand, num vector operands: "
546 "%d \n", num_src_operands);
547 break;
548 }
549 }
550
551 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
552 bool HasDst>
553 void
554 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
555 HasDst>::execute(GPUDynInstPtr gpuDynInst)
556 {
557 typedef typename DataType::CType CType;
558
559 Wavefront *w = gpuDynInst->wavefront();
560
561 GPUDynInstPtr m = gpuDynInst;
562
563 this->addr.calcVector(w, m->addr);
564
565 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
566 ((CType *)m->a_data)[lane] =
567 this->src[0].template get<CType>(w, lane);
568 }
569
570 // load second source operand for CAS
571 if (NumSrcOperands > 1) {
572 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
573 ((CType*)m->x_data)[lane] =
574 this->src[1].template get<CType>(w, lane);
575 }
576 }
577
578 assert(NumSrcOperands <= 2);
579
580 m->m_op = this->opType;
581 m->m_type = DataType::memType;
582 m->v_type = DataType::vgprType;
583
584 m->exec_mask = w->execMask();
585 m->statusBitVector = 0;
586 m->equiv = 0; // atomics don't have an equivalence class operand
587 m->n_reg = 1;
588 m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
589
590 m->scope = getGenericMemoryScope(this->memoryScope);
591
592 if (HasDst) {
593 m->dst_reg = this->dest.regIndex();
594 }
595
596 m->simdId = w->simdId;
597 m->wfSlotId = w->wfSlotId;
598 m->wfDynId = w->wfDynId;
599 m->kern_id = w->kernId;
600 m->cu_id = w->computeUnit->cu_id;
601 m->latency.init(&w->computeUnit->shader->tick_cnt);
602
603 switch (this->segment) {
604 case Brig::BRIG_SEGMENT_GLOBAL:
605 m->s_type = SEG_GLOBAL;
606 m->latency.set(w->computeUnit->shader->ticks(64));
607 m->pipeId = GLBMEM_PIPE;
608
609 w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
610 w->outstandingReqsWrGm++;
611 w->wrGmReqsInPipe--;
612 w->outstandingReqsRdGm++;
613 w->rdGmReqsInPipe--;
614 break;
615
616 case Brig::BRIG_SEGMENT_GROUP:
617 m->s_type = SEG_SHARED;
618 m->pipeId = LDSMEM_PIPE;
619 m->latency.set(w->computeUnit->shader->ticks(24));
620 w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
621 w->outstandingReqsWrLm++;
622 w->wrLmReqsInPipe--;
623 w->outstandingReqsRdLm++;
624 w->rdLmReqsInPipe--;
625 break;
626
627 default:
628 fatal("Atomic op to unsupported segment %d\n",
629 this->segment);
630 }
631
632 w->outstandingReqs++;
633 w->memReqsInPipe--;
634 }
635
636 const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
637
638 template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
639 bool HasDst>
640 void
641 AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
642 HasDst>::generateDisassembly()
643 {
644 if (HasDst) {
645 this->disassembly =
646 csprintf("%s_%s_%s_%s %s,%s", this->opcode,
647 atomicOpToString(this->atomicOperation),
648 segmentNames[this->segment],
649 DataType::label, this->dest.disassemble(),
650 this->addr.disassemble());
651 } else {
652 this->disassembly =
653 csprintf("%s_%s_%s_%s %s", this->opcode,
654 atomicOpToString(this->atomicOperation),
655 segmentNames[this->segment],
656 DataType::label, this->addr.disassemble());
657 }
658
659 for (int i = 0; i < NumSrcOperands; ++i) {
660 this->disassembly += ",";
661 this->disassembly += this->src[i].disassemble();
662 }
663 }
664} // namespace HsailISA