mem_impl.hh revision 11645:44ca2fc730eb
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#include "arch/hsail/generic_types.hh"
37#include "gpu-compute/hsail_code.hh"
38
39// defined in code.cc, but not worth sucking in all of code.h for this
40// at this point
41extern const char *segmentNames[];
42
43namespace HsailISA
44{
45    template<typename DestDataType, typename AddrRegOperandType>
46    void
47    LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
48    {
49        this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
50                                     DestDataType::label,
51                                     this->dest.disassemble(),
52                                     this->addr.disassemble());
53    }
54
55    template<typename DestDataType, typename AddrRegOperandType>
56    void
57    LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
58    {
59        Wavefront *w = gpuDynInst->wavefront();
60
61        typedef typename DestDataType::CType CType M5_VAR_USED;
62        const VectorMask &mask = w->getPred();
63        std::vector<Addr> addr_vec;
64        addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
65        this->addr.calcVector(w, addr_vec);
66
67        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
68            if (mask[lane]) {
69                this->dest.set(w, lane, addr_vec[lane]);
70            }
71        }
72        addr_vec.clear();
73    }
74
75    template<typename MemDataType, typename DestDataType,
76             typename AddrRegOperandType>
77    void
78    LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
79    {
80        switch (num_dest_operands) {
81          case 1:
82            this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
83                                         segmentNames[this->segment],
84                                         MemDataType::label,
85                                         this->dest.disassemble(),
86                                         this->addr.disassemble());
87            break;
88          case 2:
89            this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
90                                         segmentNames[this->segment],
91                                         MemDataType::label,
92                                         this->dest_vect[0].disassemble(),
93                                         this->dest_vect[1].disassemble(),
94                                         this->addr.disassemble());
95            break;
96          case 3:
97            this->disassembly = csprintf("%s_%s_%s (%s,%s,%s), %s", this->opcode,
98                                         segmentNames[this->segment],
99                                         MemDataType::label,
100                                         this->dest_vect[0].disassemble(),
101                                         this->dest_vect[1].disassemble(),
102                                         this->dest_vect[2].disassemble(),
103                                         this->addr.disassemble());
104            break;
105          case 4:
106            this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
107                                         this->opcode,
108                                         segmentNames[this->segment],
109                                         MemDataType::label,
110                                         this->dest_vect[0].disassemble(),
111                                         this->dest_vect[1].disassemble(),
112                                         this->dest_vect[2].disassemble(),
113                                         this->dest_vect[3].disassemble(),
114                                         this->addr.disassemble());
115            break;
116          default:
117            fatal("Bad ld register dest operand, num vector operands: %d \n",
118                  num_dest_operands);
119            break;
120        }
121    }
122
123    static Addr
124    calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
125    {
126        // what is the size of the object we are accessing??
127        // NOTE: the compiler doesn't generate enough information
128        // to do this yet..have to just line up all the private
129        // work-item spaces back to back for now
130        /*
131        StorageElement* se =
132            i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
133        assert(se);
134
135        return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
136            se->offset * w->computeUnit->wfSize() +
137            lane * se->size;
138        */
139
140        // addressing strategy: interleave the private spaces of
141        // work-items in a wave-front on 8 byte granularity.
142        // this won't be perfect coalescing like the spill space
143        // strategy, but it's better than nothing. The spill space
144        // strategy won't work with private because the same address
145        // may be accessed by different sized loads/stores.
146
147        // Note: I'm assuming that the largest load/store to private
148        // is 8 bytes. If it is larger, the stride will have to increase
149
150        Addr addr_div8 = addr / 8;
151        Addr addr_mod8 = addr % 8;
152
153        Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
154            addr_mod8 + w->privBase;
155
156        assert(ret < w->privBase +
157               (w->privSizePerItem * w->computeUnit->wfSize()));
158
159        return ret;
160    }
161
162    template<typename MemDataType, typename DestDataType,
163             typename AddrRegOperandType>
164    void
165    LdInst<MemDataType, DestDataType,
166           AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
167    {
168        Wavefront *w = gpuDynInst->wavefront();
169
170        typedef typename MemDataType::CType MemCType;
171        const VectorMask &mask = w->getPred();
172
173        // Kernarg references are handled uniquely for now (no Memory Request
174        // is used), so special-case them up front.  Someday we should
175        // make this more realistic, at which we should get rid of this
176        // block and fold this case into the switch below.
177        if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
178            MemCType val;
179
180            // I assume no vector ld for kernargs
181            assert(num_dest_operands == 1);
182
183            // assuming for the moment that we'll never do register
184            // offsets into kernarg space... just to make life simpler
185            uint64_t address = this->addr.calcUniform();
186
187            val = *(MemCType*)&w->kernelArgs[address];
188
189            DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
190
191            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
192                if (mask[lane]) {
193                    this->dest.set(w, lane, val);
194                }
195            }
196
197            return;
198        } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
199            uint64_t address = this->addr.calcUniform();
200            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
201                if (mask[lane]) {
202                    MemCType val = w->readCallArgMem<MemCType>(lane, address);
203
204                    DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
205                            (unsigned long long)val);
206
207                    this->dest.set(w, lane, val);
208                }
209            }
210
211            return;
212        }
213
214        GPUDynInstPtr m = gpuDynInst;
215
216        this->addr.calcVector(w, m->addr);
217
218        m->m_op = Enums::MO_LD;
219        m->m_type = MemDataType::memType;
220        m->v_type = DestDataType::vgprType;
221
222        m->exec_mask = w->execMask();
223        m->statusBitVector = 0;
224        m->equiv = this->equivClass;
225        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
226
227        m->scope = getGenericMemoryScope(this->memoryScope);
228
229        if (num_dest_operands == 1) {
230            m->dst_reg = this->dest.regIndex();
231            m->n_reg = 1;
232        } else {
233            m->n_reg = num_dest_operands;
234            for (int i = 0; i < num_dest_operands; ++i) {
235                m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
236            }
237        }
238
239        m->simdId = w->simdId;
240        m->wfSlotId = w->wfSlotId;
241        m->wfDynId = w->wfDynId;
242        m->kern_id = w->kernId;
243        m->cu_id = w->computeUnit->cu_id;
244        m->latency.init(&w->computeUnit->shader->tick_cnt);
245
246        switch (this->segment) {
247          case Brig::BRIG_SEGMENT_GLOBAL:
248            m->s_type = SEG_GLOBAL;
249            m->pipeId = GLBMEM_PIPE;
250            m->latency.set(w->computeUnit->shader->ticks(1));
251
252            // this is a complete hack to get around a compiler bug
253            // (the compiler currently generates global access for private
254            //  addresses (starting from 0). We need to add the private offset)
255            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
256                if (m->addr[lane] < w->privSizePerItem) {
257                    if (mask[lane]) {
258                        // what is the size of the object we are accessing?
259                        // find base for for this wavefront
260
261                        // calcPrivAddr will fail if accesses are unaligned
262                        assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
263
264                        Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
265                                                     this);
266
267                        m->addr[lane] = privAddr;
268                    }
269                }
270            }
271
272            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
273            w->outstandingReqsRdGm++;
274            w->rdGmReqsInPipe--;
275            break;
276
277          case Brig::BRIG_SEGMENT_SPILL:
278            assert(num_dest_operands == 1);
279            m->s_type = SEG_SPILL;
280            m->pipeId = GLBMEM_PIPE;
281            m->latency.set(w->computeUnit->shader->ticks(1));
282            {
283                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
284                    //  note: this calculation will NOT WORK if the compiler
285                    //  ever generates loads/stores to the same address with
286                    //  different widths (e.g., a ld_u32 addr and a ld_u16 addr)
287                    if (mask[lane]) {
288                        assert(m->addr[lane] < w->spillSizePerItem);
289
290                        m->addr[lane] = m->addr[lane] * w->spillWidth +
291                                        lane * sizeof(MemCType) + w->spillBase;
292
293                        w->lastAddr[lane] = m->addr[lane];
294                    }
295                }
296            }
297
298            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
299            w->outstandingReqsRdGm++;
300            w->rdGmReqsInPipe--;
301            break;
302
303          case Brig::BRIG_SEGMENT_GROUP:
304            m->s_type = SEG_SHARED;
305            m->pipeId = LDSMEM_PIPE;
306            m->latency.set(w->computeUnit->shader->ticks(24));
307            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
308            w->outstandingReqsRdLm++;
309            w->rdLmReqsInPipe--;
310            break;
311
312          case Brig::BRIG_SEGMENT_READONLY:
313            m->s_type = SEG_READONLY;
314            m->pipeId = GLBMEM_PIPE;
315            m->latency.set(w->computeUnit->shader->ticks(1));
316
317            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
318                if (mask[lane]) {
319                    assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
320                    m->addr[lane] += w->roBase;
321                }
322            }
323
324            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
325            w->outstandingReqsRdGm++;
326            w->rdGmReqsInPipe--;
327            break;
328
329          case Brig::BRIG_SEGMENT_PRIVATE:
330            m->s_type = SEG_PRIVATE;
331            m->pipeId = GLBMEM_PIPE;
332            m->latency.set(w->computeUnit->shader->ticks(1));
333            {
334                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
335                    if (mask[lane]) {
336                        assert(m->addr[lane] < w->privSizePerItem);
337
338                        m->addr[lane] = m->addr[lane] +
339                            lane * sizeof(MemCType) + w->privBase;
340                    }
341                }
342            }
343            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
344            w->outstandingReqsRdGm++;
345            w->rdGmReqsInPipe--;
346            break;
347
348          default:
349            fatal("Load to unsupported segment %d %llxe\n", this->segment,
350                  m->addr[0]);
351        }
352
353        w->outstandingReqs++;
354        w->memReqsInPipe--;
355    }
356
357    template<typename OperationType, typename SrcDataType,
358             typename AddrRegOperandType>
359    void
360    StInst<OperationType, SrcDataType,
361           AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
362    {
363        Wavefront *w = gpuDynInst->wavefront();
364
365        typedef typename OperationType::CType CType;
366
367        const VectorMask &mask = w->getPred();
368
369        // arg references are handled uniquely for now (no Memory Request
370        // is used), so special-case them up front.  Someday we should
371        // make this more realistic, at which we should get rid of this
372        // block and fold this case into the switch below.
373        if (this->segment == Brig::BRIG_SEGMENT_ARG) {
374            uint64_t address = this->addr.calcUniform();
375
376            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
377                if (mask[lane]) {
378                    CType data = this->src.template get<CType>(w, lane);
379                    DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
380                    w->writeCallArgMem<CType>(lane, address, data);
381                }
382            }
383
384            return;
385        }
386
387        GPUDynInstPtr m = gpuDynInst;
388
389        m->exec_mask = w->execMask();
390
391        this->addr.calcVector(w, m->addr);
392
393        if (num_src_operands == 1) {
394            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
395                if (mask[lane]) {
396                    ((CType*)m->d_data)[lane] =
397                        this->src.template get<CType>(w, lane);
398                }
399            }
400        } else {
401            for (int k= 0; k < num_src_operands; ++k) {
402                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
403                    if (mask[lane]) {
404                        ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
405                            this->src_vect[k].template get<CType>(w, lane);
406                    }
407                }
408            }
409        }
410
411        m->m_op = Enums::MO_ST;
412        m->m_type = OperationType::memType;
413        m->v_type = OperationType::vgprType;
414
415        m->statusBitVector = 0;
416        m->equiv = this->equivClass;
417
418        if (num_src_operands == 1) {
419            m->n_reg = 1;
420        } else {
421            m->n_reg = num_src_operands;
422        }
423
424        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
425
426        m->scope = getGenericMemoryScope(this->memoryScope);
427
428        m->simdId = w->simdId;
429        m->wfSlotId = w->wfSlotId;
430        m->wfDynId = w->wfDynId;
431        m->kern_id = w->kernId;
432        m->cu_id = w->computeUnit->cu_id;
433        m->latency.init(&w->computeUnit->shader->tick_cnt);
434
435        switch (this->segment) {
436          case Brig::BRIG_SEGMENT_GLOBAL:
437            m->s_type = SEG_GLOBAL;
438            m->pipeId = GLBMEM_PIPE;
439            m->latency.set(w->computeUnit->shader->ticks(1));
440
441            // this is a complete hack to get around a compiler bug
442            // (the compiler currently generates global access for private
443            //  addresses (starting from 0). We need to add the private offset)
444            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
445                if (mask[lane]) {
446                    if (m->addr[lane] < w->privSizePerItem) {
447
448                        // calcPrivAddr will fail if accesses are unaligned
449                        assert(!((sizeof(CType)-1) & m->addr[lane]));
450
451                        Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
452                                                     this);
453
454                        m->addr[lane] = privAddr;
455                    }
456                }
457            }
458
459            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
460            w->outstandingReqsWrGm++;
461            w->wrGmReqsInPipe--;
462            break;
463
464          case Brig::BRIG_SEGMENT_SPILL:
465            assert(num_src_operands == 1);
466            m->s_type = SEG_SPILL;
467            m->pipeId = GLBMEM_PIPE;
468            m->latency.set(w->computeUnit->shader->ticks(1));
469            {
470                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
471                    if (mask[lane]) {
472                        assert(m->addr[lane] < w->spillSizePerItem);
473
474                        m->addr[lane] = m->addr[lane] * w->spillWidth +
475                                        lane * sizeof(CType) + w->spillBase;
476                    }
477                }
478            }
479
480            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
481            w->outstandingReqsWrGm++;
482            w->wrGmReqsInPipe--;
483            break;
484
485          case Brig::BRIG_SEGMENT_GROUP:
486            m->s_type = SEG_SHARED;
487            m->pipeId = LDSMEM_PIPE;
488            m->latency.set(w->computeUnit->shader->ticks(24));
489            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
490            w->outstandingReqsWrLm++;
491            w->wrLmReqsInPipe--;
492            break;
493
494          case Brig::BRIG_SEGMENT_PRIVATE:
495            m->s_type = SEG_PRIVATE;
496            m->pipeId = GLBMEM_PIPE;
497            m->latency.set(w->computeUnit->shader->ticks(1));
498            {
499                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
500                    if (mask[lane]) {
501                        assert(m->addr[lane] < w->privSizePerItem);
502                        m->addr[lane] = m->addr[lane] + lane *
503                            sizeof(CType)+w->privBase;
504                    }
505                }
506            }
507
508            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
509            w->outstandingReqsWrGm++;
510            w->wrGmReqsInPipe--;
511            break;
512
513          default:
514            fatal("Store to unsupported segment %d\n", this->segment);
515        }
516
517        w->outstandingReqs++;
518        w->memReqsInPipe--;
519    }
520
521    template<typename OperationType, typename SrcDataType,
522             typename AddrRegOperandType>
523    void
524    StInst<OperationType, SrcDataType,
525           AddrRegOperandType>::generateDisassembly()
526    {
527        switch (num_src_operands) {
528          case 1:
529            this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
530                                         segmentNames[this->segment],
531                                         OperationType::label,
532                                         this->src.disassemble(),
533                                         this->addr.disassemble());
534            break;
535          case 2:
536            this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
537                                         segmentNames[this->segment],
538                                         OperationType::label,
539                                         this->src_vect[0].disassemble(),
540                                         this->src_vect[1].disassemble(),
541                                         this->addr.disassemble());
542            break;
543          case 4:
544            this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
545                                         this->opcode,
546                                         segmentNames[this->segment],
547                                         OperationType::label,
548                                         this->src_vect[0].disassemble(),
549                                         this->src_vect[1].disassemble(),
550                                         this->src_vect[2].disassemble(),
551                                         this->src_vect[3].disassemble(),
552                                         this->addr.disassemble());
553            break;
554          default: fatal("Bad ld register src operand, num vector operands: "
555                         "%d \n", num_src_operands);
556            break;
557        }
558    }
559
560    template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
561             bool HasDst>
562    void
563    AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
564        HasDst>::execute(GPUDynInstPtr gpuDynInst)
565    {
566        typedef typename DataType::CType CType;
567
568        Wavefront *w = gpuDynInst->wavefront();
569
570        GPUDynInstPtr m = gpuDynInst;
571
572        this->addr.calcVector(w, m->addr);
573
574        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
575            ((CType *)m->a_data)[lane] =
576                this->src[0].template get<CType>(w, lane);
577        }
578
579        // load second source operand for CAS
580        if (NumSrcOperands > 1) {
581            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
582                ((CType*)m->x_data)[lane] =
583                    this->src[1].template get<CType>(w, lane);
584            }
585        }
586
587        assert(NumSrcOperands <= 2);
588
589        m->m_op = this->opType;
590        m->m_type = DataType::memType;
591        m->v_type = DataType::vgprType;
592
593        m->exec_mask = w->execMask();
594        m->statusBitVector = 0;
595        m->equiv = 0;  // atomics don't have an equivalence class operand
596        m->n_reg = 1;
597        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
598
599        m->scope = getGenericMemoryScope(this->memoryScope);
600
601        if (HasDst) {
602            m->dst_reg = this->dest.regIndex();
603        }
604
605        m->simdId = w->simdId;
606        m->wfSlotId = w->wfSlotId;
607        m->wfDynId = w->wfDynId;
608        m->kern_id = w->kernId;
609        m->cu_id = w->computeUnit->cu_id;
610        m->latency.init(&w->computeUnit->shader->tick_cnt);
611
612        switch (this->segment) {
613          case Brig::BRIG_SEGMENT_GLOBAL:
614            m->s_type = SEG_GLOBAL;
615            m->latency.set(w->computeUnit->shader->ticks(64));
616            m->pipeId = GLBMEM_PIPE;
617
618            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
619            w->outstandingReqsWrGm++;
620            w->wrGmReqsInPipe--;
621            w->outstandingReqsRdGm++;
622            w->rdGmReqsInPipe--;
623            break;
624
625          case Brig::BRIG_SEGMENT_GROUP:
626            m->s_type = SEG_SHARED;
627            m->pipeId = LDSMEM_PIPE;
628            m->latency.set(w->computeUnit->shader->ticks(24));
629            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
630            w->outstandingReqsWrLm++;
631            w->wrLmReqsInPipe--;
632            w->outstandingReqsRdLm++;
633            w->rdLmReqsInPipe--;
634            break;
635
636          default:
637            fatal("Atomic op to unsupported segment %d\n",
638                  this->segment);
639        }
640
641        w->outstandingReqs++;
642        w->memReqsInPipe--;
643    }
644
645    const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
646
647    template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
648             bool HasDst>
649    void
650    AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
651               HasDst>::generateDisassembly()
652    {
653        if (HasDst) {
654            this->disassembly =
655                csprintf("%s_%s_%s_%s %s,%s", this->opcode,
656                         atomicOpToString(this->atomicOperation),
657                         segmentNames[this->segment],
658                         DataType::label, this->dest.disassemble(),
659                         this->addr.disassemble());
660        } else {
661            this->disassembly =
662                csprintf("%s_%s_%s_%s %s", this->opcode,
663                         atomicOpToString(this->atomicOperation),
664                         segmentNames[this->segment],
665                         DataType::label, this->addr.disassemble());
666        }
667
668        for (int i = 0; i < NumSrcOperands; ++i) {
669            this->disassembly += ",";
670            this->disassembly += this->src[i].disassemble();
671        }
672    }
673} // namespace HsailISA
674