mem_impl.hh revision 11308:7d8836fd043d
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#include "arch/hsail/generic_types.hh"
37#include "gpu-compute/hsail_code.hh"
38
39// defined in code.cc, but not worth sucking in all of code.h for this
40// at this point
41extern const char *segmentNames[];
42
43namespace HsailISA
44{
45    template<typename DestDataType, typename AddrRegOperandType>
46    void
47    LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
48    {
49        this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
50                                     DestDataType::label,
51                                     this->dest.disassemble(),
52                                     this->addr.disassemble());
53    }
54
55    template<typename DestDataType, typename AddrRegOperandType>
56    void
57    LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
58    {
59        Wavefront *w = gpuDynInst->wavefront();
60
61        typedef typename DestDataType::CType CType M5_VAR_USED;
62        const VectorMask &mask = w->get_pred();
63        uint64_t addr_vec[VSZ];
64        this->addr.calcVector(w, addr_vec);
65
66        for (int lane = 0; lane < VSZ; ++lane) {
67            if (mask[lane]) {
68                this->dest.set(w, lane, addr_vec[lane]);
69            }
70        }
71    }
72
73    template<typename MemDataType, typename DestDataType,
74             typename AddrRegOperandType>
75    void
76    LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
77    {
78        switch (num_dest_operands) {
79          case 1:
80            this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
81                                         segmentNames[this->segment],
82                                         MemDataType::label,
83                                         this->dest.disassemble(),
84                                         this->addr.disassemble());
85            break;
86          case 2:
87            this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
88                                         segmentNames[this->segment],
89                                         MemDataType::label,
90                                         this->dest_vect[0].disassemble(),
91                                         this->dest_vect[1].disassemble(),
92                                         this->addr.disassemble());
93            break;
94          case 4:
95            this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
96                                         this->opcode,
97                                         segmentNames[this->segment],
98                                         MemDataType::label,
99                                         this->dest_vect[0].disassemble(),
100                                         this->dest_vect[1].disassemble(),
101                                         this->dest_vect[2].disassemble(),
102                                         this->dest_vect[3].disassemble(),
103                                         this->addr.disassemble());
104            break;
105          default:
106            fatal("Bad ld register dest operand, num vector operands: %d \n",
107                  num_dest_operands);
108            break;
109        }
110    }
111
112    static Addr
113    calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
114    {
115        // what is the size of the object we are accessing??
116        // NOTE: the compiler doesn't generate enough information
117        // to do this yet..have to just line up all the private
118        // work-item spaces back to back for now
119        /*
120        StorageElement* se =
121            i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
122        assert(se);
123
124        return w->wfSlotId * w->privSizePerItem * VSZ +
125            se->offset * VSZ +
126            lane * se->size;
127        */
128
129        // addressing strategy: interleave the private spaces of
130        // work-items in a wave-front on 8 byte granularity.
131        // this won't be perfect coalescing like the spill space
132        // strategy, but it's better than nothing. The spill space
133        // strategy won't work with private because the same address
134        // may be accessed by different sized loads/stores.
135
136        // Note: I'm assuming that the largest load/store to private
137        // is 8 bytes. If it is larger, the stride will have to increase
138
139        Addr addr_div8 = addr / 8;
140        Addr addr_mod8 = addr % 8;
141
142        Addr ret = addr_div8 * 8 * VSZ + lane * 8 + addr_mod8 + w->privBase;
143
144        assert(ret < w->privBase + (w->privSizePerItem * VSZ));
145
146        return ret;
147    }
148
149    template<typename MemDataType, typename DestDataType,
150             typename AddrRegOperandType>
151    void
152    LdInst<MemDataType, DestDataType,
153           AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
154    {
155        Wavefront *w = gpuDynInst->wavefront();
156
157        typedef typename MemDataType::CType MemCType;
158        const VectorMask &mask = w->get_pred();
159
160        // Kernarg references are handled uniquely for now (no Memory Request
161        // is used), so special-case them up front.  Someday we should
162        // make this more realistic, at which we should get rid of this
163        // block and fold this case into the switch below.
164        if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
165            MemCType val;
166
167            // I assume no vector ld for kernargs
168            assert(num_dest_operands == 1);
169
170            // assuming for the moment that we'll never do register
171            // offsets into kernarg space... just to make life simpler
172            uint64_t address = this->addr.calcUniform();
173
174            val = *(MemCType*)&w->kernelArgs[address];
175
176            DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
177
178            for (int lane = 0; lane < VSZ; ++lane) {
179                if (mask[lane]) {
180                    this->dest.set(w, lane, val);
181                }
182            }
183
184            return;
185        } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
186            uint64_t address = this->addr.calcUniform();
187            for (int lane = 0; lane < VSZ; ++lane) {
188                if (mask[lane]) {
189                    MemCType val = w->readCallArgMem<MemCType>(lane, address);
190
191                    DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
192                            (unsigned long long)val);
193
194                    this->dest.set(w, lane, val);
195                }
196            }
197
198            return;
199        }
200
201        GPUDynInstPtr m = gpuDynInst;
202
203        this->addr.calcVector(w, m->addr);
204
205        m->m_op = Enums::MO_LD;
206        m->m_type = MemDataType::memType;
207        m->v_type = DestDataType::vgprType;
208
209        m->exec_mask = w->execMask();
210        m->statusBitVector = 0;
211        m->equiv = this->equivClass;
212        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
213
214        m->scope = getGenericMemoryScope(this->memoryScope);
215
216        if (num_dest_operands == 1) {
217            m->dst_reg = this->dest.regIndex();
218            m->n_reg = 1;
219        } else {
220            m->n_reg = num_dest_operands;
221            for (int i = 0; i < num_dest_operands; ++i) {
222                m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
223            }
224        }
225
226        m->simdId = w->simdId;
227        m->wfSlotId = w->wfSlotId;
228        m->wfDynId = w->wfDynId;
229        m->kern_id = w->kern_id;
230        m->cu_id = w->computeUnit->cu_id;
231        m->latency.init(&w->computeUnit->shader->tick_cnt);
232
233        switch (this->segment) {
234          case Brig::BRIG_SEGMENT_GLOBAL:
235            m->s_type = SEG_GLOBAL;
236            m->pipeId = GLBMEM_PIPE;
237            m->latency.set(w->computeUnit->shader->ticks(1));
238
239            // this is a complete hack to get around a compiler bug
240            // (the compiler currently generates global access for private
241            //  addresses (starting from 0). We need to add the private offset)
242            for (int lane = 0; lane < VSZ; ++lane) {
243                if (m->addr[lane] < w->privSizePerItem) {
244                    if (mask[lane]) {
245                        // what is the size of the object we are accessing?
246                        // find base for for this wavefront
247
248                        // calcPrivAddr will fail if accesses are unaligned
249                        assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
250
251                        Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
252                                                     this);
253
254                        m->addr[lane] = privAddr;
255                    }
256                }
257            }
258
259            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
260            w->outstanding_reqs_rd_gm++;
261            w->rd_gm_reqs_in_pipe--;
262            break;
263
264          case Brig::BRIG_SEGMENT_SPILL:
265            assert(num_dest_operands == 1);
266            m->s_type = SEG_SPILL;
267            m->pipeId = GLBMEM_PIPE;
268            m->latency.set(w->computeUnit->shader->ticks(1));
269            {
270                for (int lane = 0; lane < VSZ; ++lane) {
271                    //  note: this calculation will NOT WORK if the compiler
272                    //  ever generates loads/stores to the same address with
273                    //  different widths (e.g., a ld_u32 addr and a ld_u16 addr)
274                    if (mask[lane]) {
275                        assert(m->addr[lane] < w->spillSizePerItem);
276
277                        m->addr[lane] = m->addr[lane] * w->spillWidth +
278                                        lane * sizeof(MemCType) + w->spillBase;
279
280                        w->last_addr[lane] = m->addr[lane];
281                    }
282                }
283            }
284
285            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
286            w->outstanding_reqs_rd_gm++;
287            w->rd_gm_reqs_in_pipe--;
288            break;
289
290          case Brig::BRIG_SEGMENT_GROUP:
291            m->s_type = SEG_SHARED;
292            m->pipeId = LDSMEM_PIPE;
293            m->latency.set(w->computeUnit->shader->ticks(24));
294            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
295            w->outstanding_reqs_rd_lm++;
296            w->rd_lm_reqs_in_pipe--;
297            break;
298
299          case Brig::BRIG_SEGMENT_READONLY:
300            m->s_type = SEG_READONLY;
301            m->pipeId = GLBMEM_PIPE;
302            m->latency.set(w->computeUnit->shader->ticks(1));
303
304            for (int lane = 0; lane < VSZ; ++lane) {
305                if (mask[lane]) {
306                    assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
307                    m->addr[lane] += w->roBase;
308                }
309            }
310
311            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
312            w->outstanding_reqs_rd_gm++;
313            w->rd_gm_reqs_in_pipe--;
314            break;
315
316          case Brig::BRIG_SEGMENT_PRIVATE:
317            m->s_type = SEG_PRIVATE;
318            m->pipeId = GLBMEM_PIPE;
319            m->latency.set(w->computeUnit->shader->ticks(1));
320            {
321                for (int lane = 0; lane < VSZ; ++lane) {
322                    if (mask[lane]) {
323                        assert(m->addr[lane] < w->privSizePerItem);
324
325                        m->addr[lane] = m->addr[lane] +
326                            lane * sizeof(MemCType) + w->privBase;
327                    }
328                }
329            }
330            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
331            w->outstanding_reqs_rd_gm++;
332            w->rd_gm_reqs_in_pipe--;
333            break;
334
335          default:
336            fatal("Load to unsupported segment %d %llxe\n", this->segment,
337                  m->addr[0]);
338        }
339
340        w->outstanding_reqs++;
341        w->mem_reqs_in_pipe--;
342    }
343
344    template<typename OperationType, typename SrcDataType,
345             typename AddrRegOperandType>
346    void
347    StInst<OperationType, SrcDataType,
348           AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
349    {
350        Wavefront *w = gpuDynInst->wavefront();
351
352        typedef typename OperationType::CType CType;
353
354        const VectorMask &mask = w->get_pred();
355
356        // arg references are handled uniquely for now (no Memory Request
357        // is used), so special-case them up front.  Someday we should
358        // make this more realistic, at which we should get rid of this
359        // block and fold this case into the switch below.
360        if (this->segment == Brig::BRIG_SEGMENT_ARG) {
361            uint64_t address = this->addr.calcUniform();
362
363            for (int lane = 0; lane < VSZ; ++lane) {
364                if (mask[lane]) {
365                    CType data = this->src.template get<CType>(w, lane);
366                    DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
367                    w->writeCallArgMem<CType>(lane, address, data);
368                }
369            }
370
371            return;
372        }
373
374        GPUDynInstPtr m = gpuDynInst;
375
376        m->exec_mask = w->execMask();
377
378        this->addr.calcVector(w, m->addr);
379
380        if (num_src_operands == 1) {
381            for (int lane = 0; lane < VSZ; ++lane) {
382                if (mask[lane]) {
383                    ((CType*)m->d_data)[lane] =
384                        this->src.template get<CType>(w, lane);
385                }
386            }
387        } else {
388            for (int k= 0; k < num_src_operands; ++k) {
389                for (int lane = 0; lane < VSZ; ++lane) {
390                    if (mask[lane]) {
391                        ((CType*)m->d_data)[k * VSZ + lane] =
392                            this->src_vect[k].template get<CType>(w, lane);
393                    }
394                }
395            }
396        }
397
398        m->m_op = Enums::MO_ST;
399        m->m_type = OperationType::memType;
400        m->v_type = OperationType::vgprType;
401
402        m->statusBitVector = 0;
403        m->equiv = this->equivClass;
404
405        if (num_src_operands == 1) {
406            m->n_reg = 1;
407        } else {
408            m->n_reg = num_src_operands;
409        }
410
411        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
412
413        m->scope = getGenericMemoryScope(this->memoryScope);
414
415        m->simdId = w->simdId;
416        m->wfSlotId = w->wfSlotId;
417        m->wfDynId = w->wfDynId;
418        m->kern_id = w->kern_id;
419        m->cu_id = w->computeUnit->cu_id;
420        m->latency.init(&w->computeUnit->shader->tick_cnt);
421
422        switch (this->segment) {
423          case Brig::BRIG_SEGMENT_GLOBAL:
424            m->s_type = SEG_GLOBAL;
425            m->pipeId = GLBMEM_PIPE;
426            m->latency.set(w->computeUnit->shader->ticks(1));
427
428            // this is a complete hack to get around a compiler bug
429            // (the compiler currently generates global access for private
430            //  addresses (starting from 0). We need to add the private offset)
431            for (int lane = 0; lane < VSZ; ++lane) {
432                if (mask[lane]) {
433                    if (m->addr[lane] < w->privSizePerItem) {
434
435                        // calcPrivAddr will fail if accesses are unaligned
436                        assert(!((sizeof(CType)-1) & m->addr[lane]));
437
438                        Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
439                                                     this);
440
441                        m->addr[lane] = privAddr;
442                    }
443                }
444            }
445
446            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
447            w->outstanding_reqs_wr_gm++;
448            w->wr_gm_reqs_in_pipe--;
449            break;
450
451          case Brig::BRIG_SEGMENT_SPILL:
452            assert(num_src_operands == 1);
453            m->s_type = SEG_SPILL;
454            m->pipeId = GLBMEM_PIPE;
455            m->latency.set(w->computeUnit->shader->ticks(1));
456            {
457                for (int lane = 0; lane < VSZ; ++lane) {
458                    if (mask[lane]) {
459                        assert(m->addr[lane] < w->spillSizePerItem);
460
461                        m->addr[lane] = m->addr[lane] * w->spillWidth +
462                                        lane * sizeof(CType) + w->spillBase;
463                    }
464                }
465            }
466
467            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
468            w->outstanding_reqs_wr_gm++;
469            w->wr_gm_reqs_in_pipe--;
470            break;
471
472          case Brig::BRIG_SEGMENT_GROUP:
473            m->s_type = SEG_SHARED;
474            m->pipeId = LDSMEM_PIPE;
475            m->latency.set(w->computeUnit->shader->ticks(24));
476            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
477            w->outstanding_reqs_wr_lm++;
478            w->wr_lm_reqs_in_pipe--;
479            break;
480
481          case Brig::BRIG_SEGMENT_PRIVATE:
482            m->s_type = SEG_PRIVATE;
483            m->pipeId = GLBMEM_PIPE;
484            m->latency.set(w->computeUnit->shader->ticks(1));
485            {
486                for (int lane = 0; lane < VSZ; ++lane) {
487                    if (mask[lane]) {
488                        assert(m->addr[lane] < w->privSizePerItem);
489                        m->addr[lane] = m->addr[lane] + lane *
490                            sizeof(CType)+w->privBase;
491                    }
492                }
493            }
494
495            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
496            w->outstanding_reqs_wr_gm++;
497            w->wr_gm_reqs_in_pipe--;
498            break;
499
500          default:
501            fatal("Store to unsupported segment %d\n", this->segment);
502        }
503
504        w->outstanding_reqs++;
505        w->mem_reqs_in_pipe--;
506    }
507
508    template<typename OperationType, typename SrcDataType,
509             typename AddrRegOperandType>
510    void
511    StInst<OperationType, SrcDataType,
512           AddrRegOperandType>::generateDisassembly()
513    {
514        switch (num_src_operands) {
515          case 1:
516            this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
517                                         segmentNames[this->segment],
518                                         OperationType::label,
519                                         this->src.disassemble(),
520                                         this->addr.disassemble());
521            break;
522          case 2:
523            this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
524                                         segmentNames[this->segment],
525                                         OperationType::label,
526                                         this->src_vect[0].disassemble(),
527                                         this->src_vect[1].disassemble(),
528                                         this->addr.disassemble());
529            break;
530          case 4:
531            this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
532                                         this->opcode,
533                                         segmentNames[this->segment],
534                                         OperationType::label,
535                                         this->src_vect[0].disassemble(),
536                                         this->src_vect[1].disassemble(),
537                                         this->src_vect[2].disassemble(),
538                                         this->src_vect[3].disassemble(),
539                                         this->addr.disassemble());
540            break;
541          default: fatal("Bad ld register src operand, num vector operands: "
542                         "%d \n", num_src_operands);
543            break;
544        }
545    }
546
547    template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
548             bool HasDst>
549    void
550    AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
551        HasDst>::execute(GPUDynInstPtr gpuDynInst)
552    {
553        typedef typename DataType::CType CType;
554
555        Wavefront *w = gpuDynInst->wavefront();
556
557        GPUDynInstPtr m = gpuDynInst;
558
559        this->addr.calcVector(w, m->addr);
560
561        for (int lane = 0; lane < VSZ; ++lane) {
562            ((CType *)m->a_data)[lane] =
563                this->src[0].template get<CType>(w, lane);
564        }
565
566        // load second source operand for CAS
567        if (NumSrcOperands > 1) {
568            for (int lane = 0; lane < VSZ; ++lane) {
569                ((CType*)m->x_data)[lane] =
570                    this->src[1].template get<CType>(w, lane);
571            }
572        }
573
574        assert(NumSrcOperands <= 2);
575
576        m->m_op = this->opType;
577        m->m_type = DataType::memType;
578        m->v_type = DataType::vgprType;
579
580        m->exec_mask = w->execMask();
581        m->statusBitVector = 0;
582        m->equiv = 0;  // atomics don't have an equivalence class operand
583        m->n_reg = 1;
584        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
585
586        m->scope = getGenericMemoryScope(this->memoryScope);
587
588        if (HasDst) {
589            m->dst_reg = this->dest.regIndex();
590        }
591
592        m->simdId = w->simdId;
593        m->wfSlotId = w->wfSlotId;
594        m->wfDynId = w->wfDynId;
595        m->kern_id = w->kern_id;
596        m->cu_id = w->computeUnit->cu_id;
597        m->latency.init(&w->computeUnit->shader->tick_cnt);
598
599        switch (this->segment) {
600          case Brig::BRIG_SEGMENT_GLOBAL:
601            m->s_type = SEG_GLOBAL;
602            m->latency.set(w->computeUnit->shader->ticks(64));
603            m->pipeId = GLBMEM_PIPE;
604
605            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
606            w->outstanding_reqs_wr_gm++;
607            w->wr_gm_reqs_in_pipe--;
608            w->outstanding_reqs_rd_gm++;
609            w->rd_gm_reqs_in_pipe--;
610            break;
611
612          case Brig::BRIG_SEGMENT_GROUP:
613            m->s_type = SEG_SHARED;
614            m->pipeId = LDSMEM_PIPE;
615            m->latency.set(w->computeUnit->shader->ticks(24));
616            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
617            w->outstanding_reqs_wr_lm++;
618            w->wr_lm_reqs_in_pipe--;
619            w->outstanding_reqs_rd_lm++;
620            w->rd_lm_reqs_in_pipe--;
621            break;
622
623          default:
624            fatal("Atomic op to unsupported segment %d\n",
625                  this->segment);
626        }
627
628        w->outstanding_reqs++;
629        w->mem_reqs_in_pipe--;
630    }
631
632    const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
633
634    template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
635             bool HasDst>
636    void
637    AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
638               HasDst>::generateDisassembly()
639    {
640        if (HasDst) {
641            this->disassembly =
642                csprintf("%s_%s_%s_%s %s,%s", this->opcode,
643                         atomicOpToString(this->atomicOperation),
644                         segmentNames[this->segment],
645                         DataType::label, this->dest.disassemble(),
646                         this->addr.disassemble());
647        } else {
648            this->disassembly =
649                csprintf("%s_%s_%s_%s %s", this->opcode,
650                         atomicOpToString(this->atomicOperation),
651                         segmentNames[this->segment],
652                         DataType::label, this->addr.disassemble());
653        }
654
655        for (int i = 0; i < NumSrcOperands; ++i) {
656            this->disassembly += ",";
657            this->disassembly += this->src[i].disassemble();
658        }
659    }
660} // namespace HsailISA
661