mem_impl.hh revision 11534
16498Snate@binkert.org/*
26498Snate@binkert.org * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
36498Snate@binkert.org * All rights reserved.
46498Snate@binkert.org *
56498Snate@binkert.org * For use for simulation and test purposes only
66498Snate@binkert.org *
76498Snate@binkert.org * Redistribution and use in source and binary forms, with or without
86498Snate@binkert.org * modification, are permitted provided that the following conditions are met:
96498Snate@binkert.org *
106498Snate@binkert.org * 1. Redistributions of source code must retain the above copyright notice,
116498Snate@binkert.org * this list of conditions and the following disclaimer.
126498Snate@binkert.org *
136498Snate@binkert.org * 2. Redistributions in binary form must reproduce the above copyright notice,
146498Snate@binkert.org * this list of conditions and the following disclaimer in the documentation
156498Snate@binkert.org * and/or other materials provided with the distribution.
166498Snate@binkert.org *
176498Snate@binkert.org * 3. Neither the name of the copyright holder nor the names of its contributors
186498Snate@binkert.org * may be used to endorse or promote products derived from this software
196498Snate@binkert.org * without specific prior written permission.
206498Snate@binkert.org *
216498Snate@binkert.org * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
226498Snate@binkert.org * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
236498Snate@binkert.org * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
246498Snate@binkert.org * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
256498Snate@binkert.org * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
266498Snate@binkert.org * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
276498Snate@binkert.org * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
286498Snate@binkert.org * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
296498Snate@binkert.org * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
306498Snate@binkert.org * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
316498Snate@binkert.org * POSSIBILITY OF SUCH DAMAGE.
326498Snate@binkert.org *
336498Snate@binkert.org * Author: Steve Reinhardt
346498Snate@binkert.org */
356498Snate@binkert.org
366498Snate@binkert.org#include "arch/hsail/generic_types.hh"
376498Snate@binkert.org#include "gpu-compute/hsail_code.hh"
386498Snate@binkert.org
396498Snate@binkert.org// defined in code.cc, but not worth sucking in all of code.h for this
406498Snate@binkert.org// at this point
416498Snate@binkert.orgextern const char *segmentNames[];
426498Snate@binkert.org
436498Snate@binkert.orgnamespace HsailISA
446498Snate@binkert.org{
456498Snate@binkert.org    template<typename DestDataType, typename AddrRegOperandType>
466498Snate@binkert.org    void
476498Snate@binkert.org    LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
486498Snate@binkert.org    {
496498Snate@binkert.org        this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
506498Snate@binkert.org                                     DestDataType::label,
516498Snate@binkert.org                                     this->dest.disassemble(),
526498Snate@binkert.org                                     this->addr.disassemble());
536498Snate@binkert.org    }
546498Snate@binkert.org
55    template<typename DestDataType, typename AddrRegOperandType>
56    void
57    LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
58    {
59        Wavefront *w = gpuDynInst->wavefront();
60
61        typedef typename DestDataType::CType CType M5_VAR_USED;
62        const VectorMask &mask = w->get_pred();
63        std::vector<Addr> addr_vec;
64        addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
65        this->addr.calcVector(w, addr_vec);
66
67        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
68            if (mask[lane]) {
69                this->dest.set(w, lane, addr_vec[lane]);
70            }
71        }
72        addr_vec.clear();
73    }
74
75    template<typename MemDataType, typename DestDataType,
76             typename AddrRegOperandType>
77    void
78    LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
79    {
80        switch (num_dest_operands) {
81          case 1:
82            this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
83                                         segmentNames[this->segment],
84                                         MemDataType::label,
85                                         this->dest.disassemble(),
86                                         this->addr.disassemble());
87            break;
88          case 2:
89            this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
90                                         segmentNames[this->segment],
91                                         MemDataType::label,
92                                         this->dest_vect[0].disassemble(),
93                                         this->dest_vect[1].disassemble(),
94                                         this->addr.disassemble());
95            break;
96          case 4:
97            this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
98                                         this->opcode,
99                                         segmentNames[this->segment],
100                                         MemDataType::label,
101                                         this->dest_vect[0].disassemble(),
102                                         this->dest_vect[1].disassemble(),
103                                         this->dest_vect[2].disassemble(),
104                                         this->dest_vect[3].disassemble(),
105                                         this->addr.disassemble());
106            break;
107          default:
108            fatal("Bad ld register dest operand, num vector operands: %d \n",
109                  num_dest_operands);
110            break;
111        }
112    }
113
114    static Addr
115    calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
116    {
117        // what is the size of the object we are accessing??
118        // NOTE: the compiler doesn't generate enough information
119        // to do this yet..have to just line up all the private
120        // work-item spaces back to back for now
121        /*
122        StorageElement* se =
123            i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
124        assert(se);
125
126        return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
127            se->offset * w->computeUnit->wfSize() +
128            lane * se->size;
129        */
130
131        // addressing strategy: interleave the private spaces of
132        // work-items in a wave-front on 8 byte granularity.
133        // this won't be perfect coalescing like the spill space
134        // strategy, but it's better than nothing. The spill space
135        // strategy won't work with private because the same address
136        // may be accessed by different sized loads/stores.
137
138        // Note: I'm assuming that the largest load/store to private
139        // is 8 bytes. If it is larger, the stride will have to increase
140
141        Addr addr_div8 = addr / 8;
142        Addr addr_mod8 = addr % 8;
143
144        Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
145            addr_mod8 + w->privBase;
146
147        assert(ret < w->privBase +
148               (w->privSizePerItem * w->computeUnit->wfSize()));
149
150        return ret;
151    }
152
153    template<typename MemDataType, typename DestDataType,
154             typename AddrRegOperandType>
155    void
156    LdInst<MemDataType, DestDataType,
157           AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
158    {
159        Wavefront *w = gpuDynInst->wavefront();
160
161        typedef typename MemDataType::CType MemCType;
162        const VectorMask &mask = w->get_pred();
163
164        // Kernarg references are handled uniquely for now (no Memory Request
165        // is used), so special-case them up front.  Someday we should
166        // make this more realistic, at which we should get rid of this
167        // block and fold this case into the switch below.
168        if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
169            MemCType val;
170
171            // I assume no vector ld for kernargs
172            assert(num_dest_operands == 1);
173
174            // assuming for the moment that we'll never do register
175            // offsets into kernarg space... just to make life simpler
176            uint64_t address = this->addr.calcUniform();
177
178            val = *(MemCType*)&w->kernelArgs[address];
179
180            DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
181
182            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
183                if (mask[lane]) {
184                    this->dest.set(w, lane, val);
185                }
186            }
187
188            return;
189        } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
190            uint64_t address = this->addr.calcUniform();
191            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
192                if (mask[lane]) {
193                    MemCType val = w->readCallArgMem<MemCType>(lane, address);
194
195                    DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
196                            (unsigned long long)val);
197
198                    this->dest.set(w, lane, val);
199                }
200            }
201
202            return;
203        }
204
205        GPUDynInstPtr m = gpuDynInst;
206
207        this->addr.calcVector(w, m->addr);
208
209        m->m_op = Enums::MO_LD;
210        m->m_type = MemDataType::memType;
211        m->v_type = DestDataType::vgprType;
212
213        m->exec_mask = w->execMask();
214        m->statusBitVector = 0;
215        m->equiv = this->equivClass;
216        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
217
218        m->scope = getGenericMemoryScope(this->memoryScope);
219
220        if (num_dest_operands == 1) {
221            m->dst_reg = this->dest.regIndex();
222            m->n_reg = 1;
223        } else {
224            m->n_reg = num_dest_operands;
225            for (int i = 0; i < num_dest_operands; ++i) {
226                m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
227            }
228        }
229
230        m->simdId = w->simdId;
231        m->wfSlotId = w->wfSlotId;
232        m->wfDynId = w->wfDynId;
233        m->kern_id = w->kern_id;
234        m->cu_id = w->computeUnit->cu_id;
235        m->latency.init(&w->computeUnit->shader->tick_cnt);
236
237        switch (this->segment) {
238          case Brig::BRIG_SEGMENT_GLOBAL:
239            m->s_type = SEG_GLOBAL;
240            m->pipeId = GLBMEM_PIPE;
241            m->latency.set(w->computeUnit->shader->ticks(1));
242
243            // this is a complete hack to get around a compiler bug
244            // (the compiler currently generates global access for private
245            //  addresses (starting from 0). We need to add the private offset)
246            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
247                if (m->addr[lane] < w->privSizePerItem) {
248                    if (mask[lane]) {
249                        // what is the size of the object we are accessing?
250                        // find base for for this wavefront
251
252                        // calcPrivAddr will fail if accesses are unaligned
253                        assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
254
255                        Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
256                                                     this);
257
258                        m->addr[lane] = privAddr;
259                    }
260                }
261            }
262
263            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
264            w->outstanding_reqs_rd_gm++;
265            w->rd_gm_reqs_in_pipe--;
266            break;
267
268          case Brig::BRIG_SEGMENT_SPILL:
269            assert(num_dest_operands == 1);
270            m->s_type = SEG_SPILL;
271            m->pipeId = GLBMEM_PIPE;
272            m->latency.set(w->computeUnit->shader->ticks(1));
273            {
274                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
275                    //  note: this calculation will NOT WORK if the compiler
276                    //  ever generates loads/stores to the same address with
277                    //  different widths (e.g., a ld_u32 addr and a ld_u16 addr)
278                    if (mask[lane]) {
279                        assert(m->addr[lane] < w->spillSizePerItem);
280
281                        m->addr[lane] = m->addr[lane] * w->spillWidth +
282                                        lane * sizeof(MemCType) + w->spillBase;
283
284                        w->last_addr[lane] = m->addr[lane];
285                    }
286                }
287            }
288
289            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
290            w->outstanding_reqs_rd_gm++;
291            w->rd_gm_reqs_in_pipe--;
292            break;
293
294          case Brig::BRIG_SEGMENT_GROUP:
295            m->s_type = SEG_SHARED;
296            m->pipeId = LDSMEM_PIPE;
297            m->latency.set(w->computeUnit->shader->ticks(24));
298            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
299            w->outstanding_reqs_rd_lm++;
300            w->rd_lm_reqs_in_pipe--;
301            break;
302
303          case Brig::BRIG_SEGMENT_READONLY:
304            m->s_type = SEG_READONLY;
305            m->pipeId = GLBMEM_PIPE;
306            m->latency.set(w->computeUnit->shader->ticks(1));
307
308            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
309                if (mask[lane]) {
310                    assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
311                    m->addr[lane] += w->roBase;
312                }
313            }
314
315            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
316            w->outstanding_reqs_rd_gm++;
317            w->rd_gm_reqs_in_pipe--;
318            break;
319
320          case Brig::BRIG_SEGMENT_PRIVATE:
321            m->s_type = SEG_PRIVATE;
322            m->pipeId = GLBMEM_PIPE;
323            m->latency.set(w->computeUnit->shader->ticks(1));
324            {
325                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
326                    if (mask[lane]) {
327                        assert(m->addr[lane] < w->privSizePerItem);
328
329                        m->addr[lane] = m->addr[lane] +
330                            lane * sizeof(MemCType) + w->privBase;
331                    }
332                }
333            }
334            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
335            w->outstanding_reqs_rd_gm++;
336            w->rd_gm_reqs_in_pipe--;
337            break;
338
339          default:
340            fatal("Load to unsupported segment %d %llxe\n", this->segment,
341                  m->addr[0]);
342        }
343
344        w->outstanding_reqs++;
345        w->mem_reqs_in_pipe--;
346    }
347
348    template<typename OperationType, typename SrcDataType,
349             typename AddrRegOperandType>
350    void
351    StInst<OperationType, SrcDataType,
352           AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
353    {
354        Wavefront *w = gpuDynInst->wavefront();
355
356        typedef typename OperationType::CType CType;
357
358        const VectorMask &mask = w->get_pred();
359
360        // arg references are handled uniquely for now (no Memory Request
361        // is used), so special-case them up front.  Someday we should
362        // make this more realistic, at which we should get rid of this
363        // block and fold this case into the switch below.
364        if (this->segment == Brig::BRIG_SEGMENT_ARG) {
365            uint64_t address = this->addr.calcUniform();
366
367            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
368                if (mask[lane]) {
369                    CType data = this->src.template get<CType>(w, lane);
370                    DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
371                    w->writeCallArgMem<CType>(lane, address, data);
372                }
373            }
374
375            return;
376        }
377
378        GPUDynInstPtr m = gpuDynInst;
379
380        m->exec_mask = w->execMask();
381
382        this->addr.calcVector(w, m->addr);
383
384        if (num_src_operands == 1) {
385            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
386                if (mask[lane]) {
387                    ((CType*)m->d_data)[lane] =
388                        this->src.template get<CType>(w, lane);
389                }
390            }
391        } else {
392            for (int k= 0; k < num_src_operands; ++k) {
393                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
394                    if (mask[lane]) {
395                        ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
396                            this->src_vect[k].template get<CType>(w, lane);
397                    }
398                }
399            }
400        }
401
402        m->m_op = Enums::MO_ST;
403        m->m_type = OperationType::memType;
404        m->v_type = OperationType::vgprType;
405
406        m->statusBitVector = 0;
407        m->equiv = this->equivClass;
408
409        if (num_src_operands == 1) {
410            m->n_reg = 1;
411        } else {
412            m->n_reg = num_src_operands;
413        }
414
415        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
416
417        m->scope = getGenericMemoryScope(this->memoryScope);
418
419        m->simdId = w->simdId;
420        m->wfSlotId = w->wfSlotId;
421        m->wfDynId = w->wfDynId;
422        m->kern_id = w->kern_id;
423        m->cu_id = w->computeUnit->cu_id;
424        m->latency.init(&w->computeUnit->shader->tick_cnt);
425
426        switch (this->segment) {
427          case Brig::BRIG_SEGMENT_GLOBAL:
428            m->s_type = SEG_GLOBAL;
429            m->pipeId = GLBMEM_PIPE;
430            m->latency.set(w->computeUnit->shader->ticks(1));
431
432            // this is a complete hack to get around a compiler bug
433            // (the compiler currently generates global access for private
434            //  addresses (starting from 0). We need to add the private offset)
435            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
436                if (mask[lane]) {
437                    if (m->addr[lane] < w->privSizePerItem) {
438
439                        // calcPrivAddr will fail if accesses are unaligned
440                        assert(!((sizeof(CType)-1) & m->addr[lane]));
441
442                        Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
443                                                     this);
444
445                        m->addr[lane] = privAddr;
446                    }
447                }
448            }
449
450            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
451            w->outstanding_reqs_wr_gm++;
452            w->wr_gm_reqs_in_pipe--;
453            break;
454
455          case Brig::BRIG_SEGMENT_SPILL:
456            assert(num_src_operands == 1);
457            m->s_type = SEG_SPILL;
458            m->pipeId = GLBMEM_PIPE;
459            m->latency.set(w->computeUnit->shader->ticks(1));
460            {
461                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
462                    if (mask[lane]) {
463                        assert(m->addr[lane] < w->spillSizePerItem);
464
465                        m->addr[lane] = m->addr[lane] * w->spillWidth +
466                                        lane * sizeof(CType) + w->spillBase;
467                    }
468                }
469            }
470
471            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
472            w->outstanding_reqs_wr_gm++;
473            w->wr_gm_reqs_in_pipe--;
474            break;
475
476          case Brig::BRIG_SEGMENT_GROUP:
477            m->s_type = SEG_SHARED;
478            m->pipeId = LDSMEM_PIPE;
479            m->latency.set(w->computeUnit->shader->ticks(24));
480            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
481            w->outstanding_reqs_wr_lm++;
482            w->wr_lm_reqs_in_pipe--;
483            break;
484
485          case Brig::BRIG_SEGMENT_PRIVATE:
486            m->s_type = SEG_PRIVATE;
487            m->pipeId = GLBMEM_PIPE;
488            m->latency.set(w->computeUnit->shader->ticks(1));
489            {
490                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
491                    if (mask[lane]) {
492                        assert(m->addr[lane] < w->privSizePerItem);
493                        m->addr[lane] = m->addr[lane] + lane *
494                            sizeof(CType)+w->privBase;
495                    }
496                }
497            }
498
499            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
500            w->outstanding_reqs_wr_gm++;
501            w->wr_gm_reqs_in_pipe--;
502            break;
503
504          default:
505            fatal("Store to unsupported segment %d\n", this->segment);
506        }
507
508        w->outstanding_reqs++;
509        w->mem_reqs_in_pipe--;
510    }
511
512    template<typename OperationType, typename SrcDataType,
513             typename AddrRegOperandType>
514    void
515    StInst<OperationType, SrcDataType,
516           AddrRegOperandType>::generateDisassembly()
517    {
518        switch (num_src_operands) {
519          case 1:
520            this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
521                                         segmentNames[this->segment],
522                                         OperationType::label,
523                                         this->src.disassemble(),
524                                         this->addr.disassemble());
525            break;
526          case 2:
527            this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
528                                         segmentNames[this->segment],
529                                         OperationType::label,
530                                         this->src_vect[0].disassemble(),
531                                         this->src_vect[1].disassemble(),
532                                         this->addr.disassemble());
533            break;
534          case 4:
535            this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
536                                         this->opcode,
537                                         segmentNames[this->segment],
538                                         OperationType::label,
539                                         this->src_vect[0].disassemble(),
540                                         this->src_vect[1].disassemble(),
541                                         this->src_vect[2].disassemble(),
542                                         this->src_vect[3].disassemble(),
543                                         this->addr.disassemble());
544            break;
545          default: fatal("Bad ld register src operand, num vector operands: "
546                         "%d \n", num_src_operands);
547            break;
548        }
549    }
550
551    template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
552             bool HasDst>
553    void
554    AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
555        HasDst>::execute(GPUDynInstPtr gpuDynInst)
556    {
557        typedef typename DataType::CType CType;
558
559        Wavefront *w = gpuDynInst->wavefront();
560
561        GPUDynInstPtr m = gpuDynInst;
562
563        this->addr.calcVector(w, m->addr);
564
565        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
566            ((CType *)m->a_data)[lane] =
567                this->src[0].template get<CType>(w, lane);
568        }
569
570        // load second source operand for CAS
571        if (NumSrcOperands > 1) {
572            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
573                ((CType*)m->x_data)[lane] =
574                    this->src[1].template get<CType>(w, lane);
575            }
576        }
577
578        assert(NumSrcOperands <= 2);
579
580        m->m_op = this->opType;
581        m->m_type = DataType::memType;
582        m->v_type = DataType::vgprType;
583
584        m->exec_mask = w->execMask();
585        m->statusBitVector = 0;
586        m->equiv = 0;  // atomics don't have an equivalence class operand
587        m->n_reg = 1;
588        m->memoryOrder = getGenericMemoryOrder(this->memoryOrder);
589
590        m->scope = getGenericMemoryScope(this->memoryScope);
591
592        if (HasDst) {
593            m->dst_reg = this->dest.regIndex();
594        }
595
596        m->simdId = w->simdId;
597        m->wfSlotId = w->wfSlotId;
598        m->wfDynId = w->wfDynId;
599        m->kern_id = w->kern_id;
600        m->cu_id = w->computeUnit->cu_id;
601        m->latency.init(&w->computeUnit->shader->tick_cnt);
602
603        switch (this->segment) {
604          case Brig::BRIG_SEGMENT_GLOBAL:
605            m->s_type = SEG_GLOBAL;
606            m->latency.set(w->computeUnit->shader->ticks(64));
607            m->pipeId = GLBMEM_PIPE;
608
609            w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
610            w->outstanding_reqs_wr_gm++;
611            w->wr_gm_reqs_in_pipe--;
612            w->outstanding_reqs_rd_gm++;
613            w->rd_gm_reqs_in_pipe--;
614            break;
615
616          case Brig::BRIG_SEGMENT_GROUP:
617            m->s_type = SEG_SHARED;
618            m->pipeId = LDSMEM_PIPE;
619            m->latency.set(w->computeUnit->shader->ticks(24));
620            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
621            w->outstanding_reqs_wr_lm++;
622            w->wr_lm_reqs_in_pipe--;
623            w->outstanding_reqs_rd_lm++;
624            w->rd_lm_reqs_in_pipe--;
625            break;
626
627          default:
628            fatal("Atomic op to unsupported segment %d\n",
629                  this->segment);
630        }
631
632        w->outstanding_reqs++;
633        w->mem_reqs_in_pipe--;
634    }
635
636    const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
637
638    template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
639             bool HasDst>
640    void
641    AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
642               HasDst>::generateDisassembly()
643    {
644        if (HasDst) {
645            this->disassembly =
646                csprintf("%s_%s_%s_%s %s,%s", this->opcode,
647                         atomicOpToString(this->atomicOperation),
648                         segmentNames[this->segment],
649                         DataType::label, this->dest.disassemble(),
650                         this->addr.disassemble());
651        } else {
652            this->disassembly =
653                csprintf("%s_%s_%s_%s %s", this->opcode,
654                         atomicOpToString(this->atomicOperation),
655                         segmentNames[this->segment],
656                         DataType::label, this->addr.disassemble());
657        }
658
659        for (int i = 0; i < NumSrcOperands; ++i) {
660            this->disassembly += ",";
661            this->disassembly += this->src[i].disassemble();
662        }
663    }
664} // namespace HsailISA
665