mem.hh revision 11435:0f1b46dde3fa
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_MEM_HH__
37#define __ARCH_HSAIL_INSTS_MEM_HH__
38
39#include "arch/hsail/insts/decl.hh"
40#include "arch/hsail/insts/gpu_static_inst.hh"
41#include "arch/hsail/operand.hh"
42
43namespace HsailISA
44{
45    class MemInst
46    {
47      public:
48        MemInst() : size(0), addr_operand(nullptr) { }
49
50        MemInst(Enums::MemType m_type)
51        {
52            if (m_type == Enums::M_U64 ||
53                m_type == Enums::M_S64 ||
54                m_type == Enums::M_F64) {
55                size = 8;
56            } else if (m_type == Enums::M_U32 ||
57                       m_type == Enums::M_S32 ||
58                       m_type == Enums::M_F32) {
59                size = 4;
60            } else if (m_type == Enums::M_U16 ||
61                       m_type == Enums::M_S16 ||
62                       m_type == Enums::M_F16) {
63                size = 2;
64            } else {
65                size = 1;
66            }
67
68            addr_operand = nullptr;
69        }
70
71        void
72        init_addr(AddrOperandBase *_addr_operand)
73        {
74            addr_operand = _addr_operand;
75        }
76
77      private:
78        int size;
79        AddrOperandBase *addr_operand;
80
81      public:
82        int getMemOperandSize() { return size; }
83        AddrOperandBase *getAddressOperand() { return addr_operand; }
84    };
85
86    template<typename DestOperandType, typename AddrOperandType>
87    class LdaInstBase : public HsailGPUStaticInst
88    {
89      public:
90        typename DestOperandType::DestOperand dest;
91        AddrOperandType addr;
92
93        LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
94                    const char *_opcode)
95           : HsailGPUStaticInst(obj, _opcode)
96        {
97            using namespace Brig;
98
99            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
100            dest.init(op_offs, obj);
101            op_offs = obj->getOperandPtr(ib->operands, 1);
102            addr.init(op_offs, obj);
103        }
104
105        int numSrcRegOperands() override
106        { return(this->addr.isVectorRegister()); }
107        int numDstRegOperands() override
108        { return dest.isVectorRegister(); }
109        bool isVectorRegister(int operandIndex) override
110        {
111            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
112            return((operandIndex == 0) ? dest.isVectorRegister() :
113                   this->addr.isVectorRegister());
114        }
115        bool isCondRegister(int operandIndex) override
116        {
117            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
118            return((operandIndex == 0) ? dest.isCondRegister() :
119                   this->addr.isCondRegister());
120        }
121        bool isScalarRegister(int operandIndex) override
122        {
123            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
124            return((operandIndex == 0) ? dest.isScalarRegister() :
125                   this->addr.isScalarRegister());
126        }
127        bool isSrcOperand(int operandIndex) override
128        {
129            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
130            if (operandIndex > 0)
131                return(this->addr.isVectorRegister());
132            return false;
133        }
134        bool isDstOperand(int operandIndex) override {
135            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
136            return(operandIndex == 0);
137        }
138        int getOperandSize(int operandIndex) override
139        {
140            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
141            return((operandIndex == 0) ? dest.opSize() :
142                   this->addr.opSize());
143        }
144        int getRegisterIndex(int operandIndex) override
145        {
146            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
147            return((operandIndex == 0) ? dest.regIndex() :
148                   this->addr.regIndex());
149        }
150        int getNumOperands() override
151        {
152            if (this->addr.isVectorRegister())
153                return 2;
154            return 1;
155        }
156    };
157
158    template<typename DestDataType, typename AddrOperandType>
159    class LdaInst :
160        public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
161        public MemInst
162    {
163      public:
164        void generateDisassembly();
165
166        LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
167                        const char *_opcode)
168            : LdaInstBase<typename DestDataType::OperandType,
169                          AddrOperandType>(ib, obj, _opcode)
170        {
171            init_addr(&this->addr);
172        }
173
174        void execute(GPUDynInstPtr gpuDynInst);
175    };
176
177    template<typename DataType>
178    GPUStaticInst*
179    decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
180    {
181        unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
182        BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
183
184        if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
185            return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
186        } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
187            // V2/V4 not allowed
188            switch (regDataType.regKind) {
189              case Brig::BRIG_REGISTER_KIND_SINGLE:
190                return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
191              case Brig::BRIG_REGISTER_KIND_DOUBLE:
192                return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
193              default:
194                fatal("Bad ldas register operand type %d\n", regDataType.type);
195            }
196        } else {
197            fatal("Bad ldas register operand kind %d\n", regDataType.kind);
198        }
199    }
200
201    template<typename MemOperandType, typename DestOperandType,
202             typename AddrOperandType>
203    class LdInstBase : public HsailGPUStaticInst
204    {
205      public:
206        Brig::BrigWidth8_t width;
207        typename DestOperandType::DestOperand dest;
208        AddrOperandType addr;
209
210        Brig::BrigSegment segment;
211        Brig::BrigMemoryOrder memoryOrder;
212        Brig::BrigMemoryScope memoryScope;
213        unsigned int equivClass;
214        bool isArgLoad()
215        {
216            return segment == Brig::BRIG_SEGMENT_KERNARG ||
217                   segment == Brig::BRIG_SEGMENT_ARG;
218        }
219        void
220        initLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
221               const char *_opcode)
222        {
223            using namespace Brig;
224
225            const BrigInstMem *ldst = (const BrigInstMem*)ib;
226
227            segment = (BrigSegment)ldst->segment;
228            memoryOrder = BRIG_MEMORY_ORDER_NONE;
229            memoryScope = BRIG_MEMORY_SCOPE_NONE;
230            equivClass = ldst->equivClass;
231
232            switch (segment) {
233              case BRIG_SEGMENT_GLOBAL:
234                o_type = Enums::OT_GLOBAL_READ;
235                break;
236
237              case BRIG_SEGMENT_GROUP:
238                o_type = Enums::OT_SHARED_READ;
239                break;
240
241              case BRIG_SEGMENT_PRIVATE:
242                o_type = Enums::OT_PRIVATE_READ;
243                break;
244
245              case BRIG_SEGMENT_READONLY:
246                o_type = Enums::OT_READONLY_READ;
247                break;
248
249              case BRIG_SEGMENT_SPILL:
250                o_type = Enums::OT_SPILL_READ;
251                break;
252
253              case BRIG_SEGMENT_FLAT:
254                o_type = Enums::OT_FLAT_READ;
255                break;
256
257              case BRIG_SEGMENT_KERNARG:
258                o_type = Enums::OT_KERN_READ;
259                break;
260
261              case BRIG_SEGMENT_ARG:
262                o_type = Enums::OT_ARG;
263                break;
264
265              default:
266                panic("Ld: segment %d not supported\n", segment);
267            }
268
269            width = ldst->width;
270            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
271            const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
272            if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
273                dest.init(op_offs, obj);
274
275            op_offs = obj->getOperandPtr(ib->operands, 1);
276            addr.init(op_offs, obj);
277        }
278
279        void
280        initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
281                     const char *_opcode)
282        {
283            using namespace Brig;
284
285            const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
286
287            segment = (BrigSegment)at->segment;
288            memoryOrder = (BrigMemoryOrder)at->memoryOrder;
289            memoryScope = (BrigMemoryScope)at->memoryScope;
290            equivClass = 0;
291
292            switch (segment) {
293              case BRIG_SEGMENT_GLOBAL:
294                o_type = Enums::OT_GLOBAL_READ;
295                break;
296
297              case BRIG_SEGMENT_GROUP:
298                o_type = Enums::OT_SHARED_READ;
299                break;
300
301              case BRIG_SEGMENT_PRIVATE:
302                o_type = Enums::OT_PRIVATE_READ;
303                break;
304
305              case BRIG_SEGMENT_READONLY:
306                o_type = Enums::OT_READONLY_READ;
307                break;
308
309              case BRIG_SEGMENT_SPILL:
310                o_type = Enums::OT_SPILL_READ;
311                break;
312
313              case BRIG_SEGMENT_FLAT:
314                o_type = Enums::OT_FLAT_READ;
315                break;
316
317              case BRIG_SEGMENT_KERNARG:
318                o_type = Enums::OT_KERN_READ;
319                break;
320
321              case BRIG_SEGMENT_ARG:
322                o_type = Enums::OT_ARG;
323                break;
324
325              default:
326                panic("Ld: segment %d not supported\n", segment);
327            }
328
329            width = BRIG_WIDTH_1;
330            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
331            const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
332
333            if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
334                dest.init(op_offs, obj);
335
336            op_offs = obj->getOperandPtr(ib->operands,1);
337            addr.init(op_offs, obj);
338        }
339
340        LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
341                   const char *_opcode)
342           : HsailGPUStaticInst(obj, _opcode)
343        {
344            using namespace Brig;
345
346            if (ib->opcode == BRIG_OPCODE_LD) {
347                initLd(ib, obj, _opcode);
348            } else {
349                initAtomicLd(ib, obj, _opcode);
350            }
351        }
352
353        int numSrcRegOperands() override
354        { return(this->addr.isVectorRegister()); }
355        int numDstRegOperands() override { return dest.isVectorRegister(); }
356        int getNumOperands() override
357        {
358            if (this->addr.isVectorRegister())
359                return 2;
360            else
361                return 1;
362        }
363        bool isVectorRegister(int operandIndex) override
364        {
365            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
366            return((operandIndex == 0) ? dest.isVectorRegister() :
367                   this->addr.isVectorRegister());
368        }
369        bool isCondRegister(int operandIndex) override
370        {
371            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
372            return((operandIndex == 0) ? dest.isCondRegister() :
373                   this->addr.isCondRegister());
374        }
375        bool isScalarRegister(int operandIndex) override
376        {
377            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
378            return((operandIndex == 0) ? dest.isScalarRegister() :
379                   this->addr.isScalarRegister());
380        }
381        bool isSrcOperand(int operandIndex) override
382        {
383            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
384            if (operandIndex > 0)
385                return(this->addr.isVectorRegister());
386            return false;
387        }
388        bool isDstOperand(int operandIndex) override
389        {
390            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
391            return(operandIndex == 0);
392        }
393        int getOperandSize(int operandIndex) override
394        {
395            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
396            return((operandIndex == 0) ? dest.opSize() :
397                   this->addr.opSize());
398        }
399        int getRegisterIndex(int operandIndex) override
400        {
401            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
402            return((operandIndex == 0) ? dest.regIndex() :
403                   this->addr.regIndex());
404        }
405    };
406
407    template<typename MemDataType, typename DestDataType,
408             typename AddrOperandType>
409    class LdInst :
410        public LdInstBase<typename MemDataType::CType,
411                          typename DestDataType::OperandType, AddrOperandType>,
412        public MemInst
413    {
414        typename DestDataType::OperandType::DestOperand dest_vect[4];
415        uint16_t num_dest_operands;
416        void generateDisassembly() override;
417
418      public:
419        LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
420               const char *_opcode)
421            : LdInstBase<typename MemDataType::CType,
422                         typename DestDataType::OperandType,
423                         AddrOperandType>(ib, obj, _opcode),
424              MemInst(MemDataType::memType)
425        {
426            init_addr(&this->addr);
427
428            unsigned op_offs = obj->getOperandPtr(ib->operands,0);
429            const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
430
431            if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
432                const Brig::BrigOperandOperandList *brigRegVecOp =
433                    (const Brig::BrigOperandOperandList*)brigOp;
434
435                num_dest_operands =
436                    *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
437
438                assert(num_dest_operands <= 4);
439            } else {
440                num_dest_operands = 1;
441            }
442
443            if (num_dest_operands > 1) {
444                assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
445
446                for (int i = 0; i < num_dest_operands; ++i) {
447                    dest_vect[i].init_from_vect(op_offs, obj, i);
448                }
449            }
450        }
451
452        void
453        initiateAcc(GPUDynInstPtr gpuDynInst) override
454        {
455            typedef typename MemDataType::CType c0;
456
457            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
458
459            if (num_dest_operands > 1) {
460                for (int i = 0; i < VSZ; ++i)
461                    if (gpuDynInst->exec_mask[i])
462                        gpuDynInst->statusVector.push_back(num_dest_operands);
463                    else
464                        gpuDynInst->statusVector.push_back(0);
465            }
466
467            for (int k = 0; k < num_dest_operands; ++k) {
468
469                c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
470
471                for (int i = 0; i < VSZ; ++i) {
472                    if (gpuDynInst->exec_mask[i]) {
473                        Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
474
475                        if (isLocalMem()) {
476                            // load from shared memory
477                            *d = gpuDynInst->wavefront()->ldsChunk->
478                                read<c0>(vaddr);
479                        } else {
480                            Request *req = new Request(0, vaddr, sizeof(c0), 0,
481                                          gpuDynInst->computeUnit()->masterId(),
482                                          0, gpuDynInst->wfDynId);
483
484                            gpuDynInst->setRequestFlags(req);
485                            PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
486                            pkt->dataStatic(d);
487
488                            if (gpuDynInst->computeUnit()->shader->
489                                separate_acquire_release &&
490                                gpuDynInst->memoryOrder ==
491                                Enums::MEMORY_ORDER_SC_ACQUIRE) {
492                                // if this load has acquire semantics,
493                                // set the response continuation function
494                                // to perform an Acquire request
495                                gpuDynInst->execContinuation =
496                                    &GPUStaticInst::execLdAcq;
497
498                                gpuDynInst->useContinuation = true;
499                            } else {
500                                // the request will be finished when
501                                // the load completes
502                                gpuDynInst->useContinuation = false;
503                            }
504                            // translation is performed in sendRequest()
505                            gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
506                                                                   i, pkt);
507                        }
508                    }
509                    ++d;
510                }
511            }
512
513            gpuDynInst->updateStats();
514        }
515
516      private:
517        void
518        execLdAcq(GPUDynInstPtr gpuDynInst) override
519        {
520            // after the load has complete and if the load has acquire
521            // semantics, issue an acquire request.
522            if (!isLocalMem()) {
523                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
524                    && gpuDynInst->memoryOrder ==
525                    Enums::MEMORY_ORDER_SC_ACQUIRE) {
526                    gpuDynInst->statusBitVector = VectorMask(1);
527                    gpuDynInst->useContinuation = false;
528                    // create request
529                    Request *req = new Request(0, 0, 0, 0,
530                                  gpuDynInst->computeUnit()->masterId(),
531                                  0, gpuDynInst->wfDynId);
532                    req->setFlags(Request::ACQUIRE);
533                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
534                }
535            }
536        }
537
538      public:
539        bool
540        isLocalMem() const override
541        {
542            return this->segment == Brig::BRIG_SEGMENT_GROUP;
543        }
544
545        bool isVectorRegister(int operandIndex) override
546        {
547            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
548            if ((num_dest_operands != getNumOperands()) &&
549                (operandIndex == (getNumOperands()-1)))
550                return(this->addr.isVectorRegister());
551            if (num_dest_operands > 1) {
552                return dest_vect[operandIndex].isVectorRegister();
553            }
554            else if (num_dest_operands == 1) {
555                return LdInstBase<typename MemDataType::CType,
556                       typename DestDataType::OperandType,
557                       AddrOperandType>::dest.isVectorRegister();
558            }
559            return false;
560        }
561        bool isCondRegister(int operandIndex) override
562        {
563            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
564            if ((num_dest_operands != getNumOperands()) &&
565                (operandIndex == (getNumOperands()-1)))
566                return(this->addr.isCondRegister());
567            if (num_dest_operands > 1)
568                return dest_vect[operandIndex].isCondRegister();
569            else if (num_dest_operands == 1)
570                return LdInstBase<typename MemDataType::CType,
571                       typename DestDataType::OperandType,
572                       AddrOperandType>::dest.isCondRegister();
573            return false;
574        }
575        bool isScalarRegister(int operandIndex) override
576        {
577            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
578            if ((num_dest_operands != getNumOperands()) &&
579                (operandIndex == (getNumOperands()-1)))
580                return(this->addr.isScalarRegister());
581            if (num_dest_operands > 1)
582                return dest_vect[operandIndex].isScalarRegister();
583            else if (num_dest_operands == 1)
584                return LdInstBase<typename MemDataType::CType,
585                       typename DestDataType::OperandType,
586                       AddrOperandType>::dest.isScalarRegister();
587            return false;
588        }
589        bool isSrcOperand(int operandIndex) override
590        {
591            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
592            if ((num_dest_operands != getNumOperands()) &&
593                (operandIndex == (getNumOperands()-1)))
594                return(this->addr.isVectorRegister());
595            return false;
596        }
597        bool isDstOperand(int operandIndex) override
598        {
599            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
600            if ((num_dest_operands != getNumOperands()) &&
601                (operandIndex == (getNumOperands()-1)))
602                return false;
603            return true;
604        }
605        int getOperandSize(int operandIndex) override
606        {
607            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
608            if ((num_dest_operands != getNumOperands()) &&
609                (operandIndex == (getNumOperands()-1)))
610                return(this->addr.opSize());
611            if (num_dest_operands > 1)
612                return(dest_vect[operandIndex].opSize());
613            else if (num_dest_operands == 1)
614                return(LdInstBase<typename MemDataType::CType,
615                       typename DestDataType::OperandType,
616                       AddrOperandType>::dest.opSize());
617            return 0;
618        }
619        int getRegisterIndex(int operandIndex) override
620        {
621            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
622            if ((num_dest_operands != getNumOperands()) &&
623                (operandIndex == (getNumOperands()-1)))
624                return(this->addr.regIndex());
625            if (num_dest_operands > 1)
626                return(dest_vect[operandIndex].regIndex());
627            else if (num_dest_operands == 1)
628                return(LdInstBase<typename MemDataType::CType,
629                       typename DestDataType::OperandType,
630                       AddrOperandType>::dest.regIndex());
631            return -1;
632        }
633        int getNumOperands() override
634        {
635            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
636                return(num_dest_operands+1);
637            else
638                return(num_dest_operands);
639        }
640        void execute(GPUDynInstPtr gpuDynInst) override;
641    };
642
643    template<typename MemDT, typename DestDT>
644    GPUStaticInst*
645    decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
646    {
647        unsigned op_offs = obj->getOperandPtr(ib->operands,1);
648        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
649
650        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
651            return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
652        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
653                   tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
654            switch (tmp.regKind) {
655              case Brig::BRIG_REGISTER_KIND_SINGLE:
656                return new LdInst<MemDT, DestDT,
657                                  SRegAddrOperand>(ib, obj, "ld");
658              case Brig::BRIG_REGISTER_KIND_DOUBLE:
659                return new LdInst<MemDT, DestDT,
660                                  DRegAddrOperand>(ib, obj, "ld");
661              default:
662                fatal("Bad ld register operand type %d\n", tmp.regKind);
663            }
664        } else {
665            fatal("Bad ld register operand kind %d\n", tmp.kind);
666        }
667    }
668
669    template<typename MemDT>
670    GPUStaticInst*
671    decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
672    {
673        unsigned op_offs = obj->getOperandPtr(ib->operands,0);
674        BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
675
676        assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
677               dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
678        switch(dest.regKind) {
679          case Brig::BRIG_REGISTER_KIND_SINGLE:
680            switch (ib->type) {
681              case Brig::BRIG_TYPE_B8:
682              case Brig::BRIG_TYPE_B16:
683              case Brig::BRIG_TYPE_B32:
684                return decodeLd2<MemDT, B32>(ib, obj);
685              case Brig::BRIG_TYPE_U8:
686              case Brig::BRIG_TYPE_U16:
687              case Brig::BRIG_TYPE_U32:
688                return decodeLd2<MemDT, U32>(ib, obj);
689              case Brig::BRIG_TYPE_S8:
690              case Brig::BRIG_TYPE_S16:
691              case Brig::BRIG_TYPE_S32:
692                return decodeLd2<MemDT, S32>(ib, obj);
693              case Brig::BRIG_TYPE_F16:
694              case Brig::BRIG_TYPE_F32:
695                return decodeLd2<MemDT, U32>(ib, obj);
696              default:
697                fatal("Bad ld register operand type %d, %d\n",
698                      dest.regKind, ib->type);
699            };
700          case Brig::BRIG_REGISTER_KIND_DOUBLE:
701            switch (ib->type) {
702              case Brig::BRIG_TYPE_B64:
703                return decodeLd2<MemDT, B64>(ib, obj);
704              case Brig::BRIG_TYPE_U64:
705                return decodeLd2<MemDT, U64>(ib, obj);
706              case Brig::BRIG_TYPE_S64:
707                return decodeLd2<MemDT, S64>(ib, obj);
708              case Brig::BRIG_TYPE_F64:
709                return decodeLd2<MemDT, U64>(ib, obj);
710              default:
711                fatal("Bad ld register operand type %d, %d\n",
712                      dest.regKind, ib->type);
713            };
714          default:
715            fatal("Bad ld register operand type %d, %d\n", dest.regKind,
716                  ib->type);
717        }
718    }
719
720    template<typename MemDataType, typename SrcOperandType,
721             typename AddrOperandType>
722    class StInstBase : public HsailGPUStaticInst
723    {
724      public:
725        typename SrcOperandType::SrcOperand src;
726        AddrOperandType addr;
727
728        Brig::BrigSegment segment;
729        Brig::BrigMemoryScope memoryScope;
730        Brig::BrigMemoryOrder memoryOrder;
731        unsigned int equivClass;
732
733        void
734        initSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
735               const char *_opcode)
736        {
737            using namespace Brig;
738
739            const BrigInstMem *ldst = (const BrigInstMem*)ib;
740
741            segment = (BrigSegment)ldst->segment;
742            memoryOrder = BRIG_MEMORY_ORDER_NONE;
743            memoryScope = BRIG_MEMORY_SCOPE_NONE;
744            equivClass = ldst->equivClass;
745
746            switch (segment) {
747              case BRIG_SEGMENT_GLOBAL:
748                o_type = Enums::OT_GLOBAL_WRITE;
749                break;
750
751              case BRIG_SEGMENT_GROUP:
752                o_type = Enums::OT_SHARED_WRITE;
753                break;
754
755              case BRIG_SEGMENT_PRIVATE:
756                o_type = Enums::OT_PRIVATE_WRITE;
757                break;
758
759              case BRIG_SEGMENT_READONLY:
760                o_type = Enums::OT_READONLY_WRITE;
761                break;
762
763              case BRIG_SEGMENT_SPILL:
764                o_type = Enums::OT_SPILL_WRITE;
765                break;
766
767              case BRIG_SEGMENT_FLAT:
768                o_type = Enums::OT_FLAT_WRITE;
769                break;
770
771              case BRIG_SEGMENT_ARG:
772                o_type = Enums::OT_ARG;
773                break;
774
775              default:
776                panic("St: segment %d not supported\n", segment);
777            }
778
779            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
780            const BrigOperand *baseOp = obj->getOperand(op_offs);
781
782            if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
783                (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
784                src.init(op_offs, obj);
785            }
786
787            op_offs = obj->getOperandPtr(ib->operands, 1);
788            addr.init(op_offs, obj);
789        }
790
791        void
792        initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
793                     const char *_opcode)
794        {
795            using namespace Brig;
796
797            const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
798
799            segment = (BrigSegment)at->segment;
800            memoryScope = (BrigMemoryScope)at->memoryScope;
801            memoryOrder = (BrigMemoryOrder)at->memoryOrder;
802            equivClass = 0;
803
804            switch (segment) {
805              case BRIG_SEGMENT_GLOBAL:
806                o_type = Enums::OT_GLOBAL_WRITE;
807                break;
808
809              case BRIG_SEGMENT_GROUP:
810                o_type = Enums::OT_SHARED_WRITE;
811                break;
812
813              case BRIG_SEGMENT_PRIVATE:
814                o_type = Enums::OT_PRIVATE_WRITE;
815                break;
816
817              case BRIG_SEGMENT_READONLY:
818                o_type = Enums::OT_READONLY_WRITE;
819                break;
820
821              case BRIG_SEGMENT_SPILL:
822                o_type = Enums::OT_SPILL_WRITE;
823                break;
824
825              case BRIG_SEGMENT_FLAT:
826                o_type = Enums::OT_FLAT_WRITE;
827                break;
828
829              case BRIG_SEGMENT_ARG:
830                o_type = Enums::OT_ARG;
831                break;
832
833              default:
834                panic("St: segment %d not supported\n", segment);
835            }
836
837            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
838            addr.init(op_offs, obj);
839
840            op_offs = obj->getOperandPtr(ib->operands, 1);
841            src.init(op_offs, obj);
842        }
843
844        StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
845                   const char *_opcode)
846           : HsailGPUStaticInst(obj, _opcode)
847        {
848            using namespace Brig;
849
850            if (ib->opcode == BRIG_OPCODE_ST) {
851                initSt(ib, obj, _opcode);
852            } else {
853                initAtomicSt(ib, obj, _opcode);
854            }
855        }
856
857        int numDstRegOperands() override { return 0; }
858        int numSrcRegOperands() override
859        {
860            return src.isVectorRegister() + this->addr.isVectorRegister();
861        }
862        int getNumOperands() override
863        {
864            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
865                return 2;
866            else
867                return 1;
868        }
869        bool isVectorRegister(int operandIndex) override
870        {
871            assert(operandIndex >= 0 && operandIndex < getNumOperands());
872            return !operandIndex ? src.isVectorRegister() :
873                   this->addr.isVectorRegister();
874        }
875        bool isCondRegister(int operandIndex) override
876        {
877            assert(operandIndex >= 0 && operandIndex < getNumOperands());
878            return !operandIndex ? src.isCondRegister() :
879                   this->addr.isCondRegister();
880        }
881        bool isScalarRegister(int operandIndex) override
882        {
883            assert(operandIndex >= 0 && operandIndex < getNumOperands());
884            return !operandIndex ? src.isScalarRegister() :
885                   this->addr.isScalarRegister();
886        }
887        bool isSrcOperand(int operandIndex) override
888        {
889            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
890            return true;
891        }
892        bool isDstOperand(int operandIndex) override { return false; }
893        int getOperandSize(int operandIndex) override
894        {
895            assert(operandIndex >= 0 && operandIndex < getNumOperands());
896            return !operandIndex ? src.opSize() : this->addr.opSize();
897        }
898        int getRegisterIndex(int operandIndex) override
899        {
900            assert(operandIndex >= 0 && operandIndex < getNumOperands());
901            return !operandIndex ? src.regIndex() : this->addr.regIndex();
902        }
903    };
904
905
906    template<typename MemDataType, typename SrcDataType,
907             typename AddrOperandType>
908    class StInst :
909        public StInstBase<MemDataType, typename SrcDataType::OperandType,
910                          AddrOperandType>,
911        public MemInst
912    {
913      public:
914        typename SrcDataType::OperandType::SrcOperand src_vect[4];
915        uint16_t num_src_operands;
916        void generateDisassembly() override;
917
918        StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
919                        const char *_opcode, int srcIdx)
920            : StInstBase<MemDataType, typename SrcDataType::OperandType,
921                         AddrOperandType>(ib, obj, _opcode),
922              MemInst(SrcDataType::memType)
923        {
924            init_addr(&this->addr);
925
926            BrigRegOperandInfo rinfo;
927            unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
928            const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
929
930            if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
931                const Brig::BrigOperandConstantBytes *op =
932                    (Brig::BrigOperandConstantBytes*)baseOp;
933
934                rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
935                                           Brig::BRIG_TYPE_NONE);
936            } else {
937                rinfo = findRegDataType(op_offs, obj);
938            }
939
940            if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
941                const Brig::BrigOperandOperandList *brigRegVecOp =
942                    (const Brig::BrigOperandOperandList*)baseOp;
943
944                num_src_operands =
945                    *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
946
947                assert(num_src_operands <= 4);
948            } else {
949                num_src_operands = 1;
950            }
951
952            if (num_src_operands > 1) {
953                assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
954
955                for (int i = 0; i < num_src_operands; ++i) {
956                    src_vect[i].init_from_vect(op_offs, obj, i);
957                }
958            }
959        }
960
961        void
962        initiateAcc(GPUDynInstPtr gpuDynInst) override
963        {
964            // before performing a store, check if this store has
965            // release semantics, and if so issue a release first
966            if (!isLocalMem()) {
967                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
968                    && gpuDynInst->memoryOrder ==
969                    Enums::MEMORY_ORDER_SC_RELEASE) {
970
971                    gpuDynInst->statusBitVector = VectorMask(1);
972                    gpuDynInst->execContinuation = &GPUStaticInst::execSt;
973                    gpuDynInst->useContinuation = true;
974                    // create request
975                    Request *req = new Request(0, 0, 0, 0,
976                                  gpuDynInst->computeUnit()->masterId(),
977                                  0, gpuDynInst->wfDynId);
978                    req->setFlags(Request::RELEASE);
979                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
980
981                    return;
982                }
983            }
984
985            // if there is no release semantic, perform stores immediately
986            execSt(gpuDynInst);
987        }
988
989        bool
990        isLocalMem() const override
991        {
992            return this->segment == Brig::BRIG_SEGMENT_GROUP;
993        }
994
995      private:
996        // execSt may be called through a continuation
997        // if the store had release semantics. see comment for
998        // execSt in gpu_static_inst.hh
999        void
1000        execSt(GPUDynInstPtr gpuDynInst) override
1001        {
1002            typedef typename MemDataType::CType c0;
1003
1004            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1005
1006            if (num_src_operands > 1) {
1007                for (int i = 0; i < VSZ; ++i)
1008                    if (gpuDynInst->exec_mask[i])
1009                        gpuDynInst->statusVector.push_back(num_src_operands);
1010                    else
1011                        gpuDynInst->statusVector.push_back(0);
1012            }
1013
1014            for (int k = 0; k < num_src_operands; ++k) {
1015                c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
1016
1017                for (int i = 0; i < VSZ; ++i) {
1018                    if (gpuDynInst->exec_mask[i]) {
1019                        Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
1020
1021                        if (isLocalMem()) {
1022                            //store to shared memory
1023                            gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
1024                                                                         *d);
1025                        } else {
1026                            Request *req =
1027                              new Request(0, vaddr, sizeof(c0), 0,
1028                                          gpuDynInst->computeUnit()->masterId(),
1029                                          0, gpuDynInst->wfDynId);
1030
1031                            gpuDynInst->setRequestFlags(req);
1032                            PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
1033                            pkt->dataStatic<c0>(d);
1034
1035                            // translation is performed in sendRequest()
1036                            // the request will be finished when the store completes
1037                            gpuDynInst->useContinuation = false;
1038                            gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
1039                                                                   i, pkt);
1040
1041                        }
1042                    }
1043                    ++d;
1044                }
1045            }
1046
1047            gpuDynInst->updateStats();
1048        }
1049
1050      public:
1051        bool isVectorRegister(int operandIndex) override
1052        {
1053            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1054            if (operandIndex == num_src_operands)
1055                return this->addr.isVectorRegister();
1056            if (num_src_operands > 1)
1057                return src_vect[operandIndex].isVectorRegister();
1058            else if (num_src_operands == 1)
1059                return StInstBase<MemDataType,
1060                       typename SrcDataType::OperandType,
1061                       AddrOperandType>::src.isVectorRegister();
1062            return false;
1063        }
1064        bool isCondRegister(int operandIndex) override
1065        {
1066            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1067            if (operandIndex == num_src_operands)
1068                return this->addr.isCondRegister();
1069            if (num_src_operands > 1)
1070                return src_vect[operandIndex].isCondRegister();
1071            else if (num_src_operands == 1)
1072                return StInstBase<MemDataType,
1073                       typename SrcDataType::OperandType,
1074                       AddrOperandType>::src.isCondRegister();
1075            return false;
1076        }
1077        bool isScalarRegister(int operandIndex) override
1078        {
1079            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1080            if (operandIndex == num_src_operands)
1081                return this->addr.isScalarRegister();
1082            if (num_src_operands > 1)
1083                return src_vect[operandIndex].isScalarRegister();
1084            else if (num_src_operands == 1)
1085                return StInstBase<MemDataType,
1086                       typename SrcDataType::OperandType,
1087                       AddrOperandType>::src.isScalarRegister();
1088            return false;
1089        }
1090        bool isSrcOperand(int operandIndex) override
1091        {
1092            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1093            return true;
1094        }
1095        bool isDstOperand(int operandIndex) override { return false; }
1096        int getOperandSize(int operandIndex) override
1097        {
1098            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1099            if (operandIndex == num_src_operands)
1100                return this->addr.opSize();
1101            if (num_src_operands > 1)
1102                return src_vect[operandIndex].opSize();
1103            else if (num_src_operands == 1)
1104                return StInstBase<MemDataType,
1105                       typename SrcDataType::OperandType,
1106                       AddrOperandType>::src.opSize();
1107            return 0;
1108        }
1109        int getRegisterIndex(int operandIndex) override
1110        {
1111            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1112            if (operandIndex == num_src_operands)
1113                return this->addr.regIndex();
1114            if (num_src_operands > 1)
1115                return src_vect[operandIndex].regIndex();
1116            else if (num_src_operands == 1)
1117                return StInstBase<MemDataType,
1118                       typename SrcDataType::OperandType,
1119                       AddrOperandType>::src.regIndex();
1120            return -1;
1121        }
1122        int getNumOperands() override
1123        {
1124            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
1125                return num_src_operands + 1;
1126            else
1127                return num_src_operands;
1128        }
1129        void execute(GPUDynInstPtr gpuDynInst) override;
1130    };
1131
1132    template<typename DataType, typename SrcDataType>
1133    GPUStaticInst*
1134    decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
1135    {
1136        int srcIdx = 0;
1137        int destIdx = 1;
1138        if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
1139            ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
1140            srcIdx = 1;
1141            destIdx = 0;
1142        }
1143        unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
1144
1145        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1146
1147        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1148            return new StInst<DataType, SrcDataType,
1149                              NoRegAddrOperand>(ib, obj, "st", srcIdx);
1150        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1151            // V2/V4 not allowed
1152            switch (tmp.regKind) {
1153              case Brig::BRIG_REGISTER_KIND_SINGLE:
1154                return new StInst<DataType, SrcDataType,
1155                                  SRegAddrOperand>(ib, obj, "st", srcIdx);
1156              case Brig::BRIG_REGISTER_KIND_DOUBLE:
1157                return new StInst<DataType, SrcDataType,
1158                                  DRegAddrOperand>(ib, obj, "st", srcIdx);
1159              default:
1160                fatal("Bad st register operand type %d\n", tmp.type);
1161            }
1162        } else {
1163            fatal("Bad st register operand kind %d\n", tmp.kind);
1164        }
1165    }
1166
1167    Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode,
1168                                           Brig::BrigAtomicOperation brigOp);
1169
1170    template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1171             bool HasDst>
1172    class AtomicInstBase : public HsailGPUStaticInst
1173    {
1174      public:
1175        typename OperandType::DestOperand dest;
1176        typename OperandType::SrcOperand src[NumSrcOperands];
1177        AddrOperandType addr;
1178
1179        Brig::BrigSegment segment;
1180        Brig::BrigMemoryOrder memoryOrder;
1181        Brig::BrigAtomicOperation atomicOperation;
1182        Brig::BrigMemoryScope memoryScope;
1183        Brig::BrigOpcode opcode;
1184        Enums::MemOpType opType;
1185
1186        AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
1187                       const char *_opcode)
1188           : HsailGPUStaticInst(obj, _opcode)
1189        {
1190            using namespace Brig;
1191
1192            const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1193
1194            segment = (BrigSegment)at->segment;
1195            memoryScope = (BrigMemoryScope)at->memoryScope;
1196            memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1197            atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1198            opcode = (BrigOpcode)ib->opcode;
1199            opType = brigAtomicToMemOpType(opcode, atomicOperation);
1200
1201            switch (segment) {
1202              case BRIG_SEGMENT_GLOBAL:
1203                o_type = Enums::OT_GLOBAL_ATOMIC;
1204                break;
1205
1206              case BRIG_SEGMENT_GROUP:
1207                o_type = Enums::OT_SHARED_ATOMIC;
1208                break;
1209
1210              case BRIG_SEGMENT_FLAT:
1211                o_type = Enums::OT_FLAT_ATOMIC;
1212                break;
1213
1214              default:
1215                panic("Atomic: segment %d not supported\n", segment);
1216            }
1217
1218            if (HasDst) {
1219                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1220                dest.init(op_offs, obj);
1221
1222                op_offs = obj->getOperandPtr(ib->operands, 1);
1223                addr.init(op_offs, obj);
1224
1225                for (int i = 0; i < NumSrcOperands; ++i) {
1226                    op_offs = obj->getOperandPtr(ib->operands, i + 2);
1227                    src[i].init(op_offs, obj);
1228                }
1229            } else {
1230
1231                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1232                addr.init(op_offs, obj);
1233
1234                for (int i = 0; i < NumSrcOperands; ++i) {
1235                    op_offs = obj->getOperandPtr(ib->operands, i + 1);
1236                    src[i].init(op_offs, obj);
1237                }
1238            }
1239        }
1240
1241        int numSrcRegOperands()
1242        {
1243            int operands = 0;
1244            for (int i = 0; i < NumSrcOperands; i++) {
1245                if (src[i].isVectorRegister()) {
1246                    operands++;
1247                }
1248            }
1249            if (addr.isVectorRegister())
1250                operands++;
1251            return operands;
1252        }
1253        int numDstRegOperands() { return dest.isVectorRegister(); }
1254        int getNumOperands()
1255        {
1256            if (addr.isVectorRegister())
1257                return(NumSrcOperands + 2);
1258            return(NumSrcOperands + 1);
1259        }
1260        bool isVectorRegister(int operandIndex)
1261        {
1262            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1263            if (operandIndex < NumSrcOperands)
1264                return src[operandIndex].isVectorRegister();
1265            else if (operandIndex == NumSrcOperands)
1266                return(addr.isVectorRegister());
1267            else
1268                return dest.isVectorRegister();
1269        }
1270        bool isCondRegister(int operandIndex)
1271        {
1272            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1273            if (operandIndex < NumSrcOperands)
1274                return src[operandIndex].isCondRegister();
1275            else if (operandIndex == NumSrcOperands)
1276                return(addr.isCondRegister());
1277            else
1278                return dest.isCondRegister();
1279        }
1280        bool isScalarRegister(int operandIndex)
1281        {
1282            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1283            if (operandIndex < NumSrcOperands)
1284                return src[operandIndex].isScalarRegister();
1285            else if (operandIndex == NumSrcOperands)
1286                return(addr.isScalarRegister());
1287            else
1288                return dest.isScalarRegister();
1289        }
1290        bool isSrcOperand(int operandIndex)
1291        {
1292            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1293            if (operandIndex < NumSrcOperands)
1294                return true;
1295            else if (operandIndex == NumSrcOperands)
1296                return(addr.isVectorRegister());
1297            else
1298                return false;
1299        }
1300        bool isDstOperand(int operandIndex)
1301        {
1302            if (operandIndex <= NumSrcOperands)
1303                return false;
1304            else
1305                return true;
1306        }
1307        int getOperandSize(int operandIndex)
1308        {
1309            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1310            if (operandIndex < NumSrcOperands)
1311                return(src[operandIndex].opSize());
1312            else if (operandIndex == NumSrcOperands)
1313                return(addr.opSize());
1314            else
1315                return(dest.opSize());
1316        }
1317        int getRegisterIndex(int operandIndex)
1318        {
1319            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1320            if (operandIndex < NumSrcOperands)
1321                return(src[operandIndex].regIndex());
1322            else if (operandIndex == NumSrcOperands)
1323                return(addr.regIndex());
1324            else
1325                return(dest.regIndex());
1326            return -1;
1327        }
1328    };
1329
1330    template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
1331             bool HasDst>
1332    class AtomicInst :
1333        public AtomicInstBase<typename MemDataType::OperandType,
1334                              AddrOperandType, NumSrcOperands, HasDst>,
1335        public MemInst
1336    {
1337      public:
1338        void generateDisassembly() override;
1339
1340        AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
1341                   const char *_opcode)
1342            : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
1343                             NumSrcOperands, HasDst>
1344                (ib, obj, _opcode),
1345              MemInst(MemDataType::memType)
1346        {
1347            init_addr(&this->addr);
1348        }
1349
1350        void
1351        initiateAcc(GPUDynInstPtr gpuDynInst) override
1352        {
1353            // before doing the RMW, check if this atomic has
1354            // release semantics, and if so issue a release first
1355            if (!isLocalMem()) {
1356                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1357                    && (gpuDynInst->memoryOrder ==
1358                    Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder ==
1359                    Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) {
1360
1361                    gpuDynInst->statusBitVector = VectorMask(1);
1362
1363                    gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1364                    gpuDynInst->useContinuation = true;
1365
1366                    // create request
1367                    Request *req = new Request(0, 0, 0, 0,
1368                                  gpuDynInst->computeUnit()->masterId(),
1369                                  0, gpuDynInst->wfDynId);
1370                    req->setFlags(Request::RELEASE);
1371                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1372
1373                    return;
1374                }
1375            }
1376
1377            // if there is no release semantic, execute the RMW immediately
1378            execAtomic(gpuDynInst);
1379
1380        }
1381
1382        void execute(GPUDynInstPtr gpuDynInst) override;
1383
1384        bool
1385        isLocalMem() const override
1386        {
1387            return this->segment == Brig::BRIG_SEGMENT_GROUP;
1388        }
1389
1390      private:
1391        // execAtomic may be called through a continuation
1392        // if the RMW had release semantics. see comment for
1393        // execContinuation in gpu_dyn_inst.hh
1394        void
1395        execAtomic(GPUDynInstPtr gpuDynInst) override
1396        {
1397            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1398
1399            typedef typename MemDataType::CType c0;
1400
1401            c0 *d = &((c0*) gpuDynInst->d_data)[0];
1402            c0 *e = &((c0*) gpuDynInst->a_data)[0];
1403            c0 *f = &((c0*) gpuDynInst->x_data)[0];
1404
1405            for (int i = 0; i < VSZ; ++i) {
1406                if (gpuDynInst->exec_mask[i]) {
1407                    Addr vaddr = gpuDynInst->addr[i];
1408
1409                    if (isLocalMem()) {
1410                        Wavefront *wavefront = gpuDynInst->wavefront();
1411                        *d = wavefront->ldsChunk->read<c0>(vaddr);
1412
1413                        switch (this->opType) {
1414                          case Enums::MO_AADD:
1415                          case Enums::MO_ANRADD:
1416                            wavefront->ldsChunk->write<c0>(vaddr,
1417                            wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1418                            break;
1419                          case Enums::MO_ASUB:
1420                          case Enums::MO_ANRSUB:
1421                            wavefront->ldsChunk->write<c0>(vaddr,
1422                            wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1423                            break;
1424                          case Enums::MO_AMAX:
1425                          case Enums::MO_ANRMAX:
1426                            wavefront->ldsChunk->write<c0>(vaddr,
1427                            std::max(wavefront->ldsChunk->read<c0>(vaddr),
1428                            (*e)));
1429                            break;
1430                          case Enums::MO_AMIN:
1431                          case Enums::MO_ANRMIN:
1432                            wavefront->ldsChunk->write<c0>(vaddr,
1433                            std::min(wavefront->ldsChunk->read<c0>(vaddr),
1434                            (*e)));
1435                            break;
1436                          case Enums::MO_AAND:
1437                          case Enums::MO_ANRAND:
1438                            wavefront->ldsChunk->write<c0>(vaddr,
1439                            wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1440                            break;
1441                          case Enums::MO_AOR:
1442                          case Enums::MO_ANROR:
1443                            wavefront->ldsChunk->write<c0>(vaddr,
1444                            wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1445                            break;
1446                          case Enums::MO_AXOR:
1447                          case Enums::MO_ANRXOR:
1448                            wavefront->ldsChunk->write<c0>(vaddr,
1449                            wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1450                            break;
1451                          case Enums::MO_AINC:
1452                          case Enums::MO_ANRINC:
1453                            wavefront->ldsChunk->write<c0>(vaddr,
1454                            wavefront->ldsChunk->read<c0>(vaddr) + 1);
1455                            break;
1456                          case Enums::MO_ADEC:
1457                          case Enums::MO_ANRDEC:
1458                            wavefront->ldsChunk->write<c0>(vaddr,
1459                            wavefront->ldsChunk->read<c0>(vaddr) - 1);
1460                            break;
1461                          case Enums::MO_AEXCH:
1462                          case Enums::MO_ANREXCH:
1463                            wavefront->ldsChunk->write<c0>(vaddr, (*e));
1464                            break;
1465                          case Enums::MO_ACAS:
1466                          case Enums::MO_ANRCAS:
1467                            wavefront->ldsChunk->write<c0>(vaddr,
1468                            (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1469                            (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1470                            break;
1471                          default:
1472                            fatal("Unrecognized or invalid HSAIL atomic op "
1473                                  "type.\n");
1474                            break;
1475                        }
1476                    } else {
1477                        Request *req =
1478                            new Request(0, vaddr, sizeof(c0), 0,
1479                                        gpuDynInst->computeUnit()->masterId(),
1480                                        0, gpuDynInst->wfDynId,
1481                                        gpuDynInst->makeAtomicOpFunctor<c0>(e,
1482                                        f, this->opType));
1483
1484                        gpuDynInst->setRequestFlags(req);
1485                        PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1486                        pkt->dataStatic(d);
1487
1488                        if (gpuDynInst->computeUnit()->shader->
1489                            separate_acquire_release &&
1490                            (gpuDynInst->memoryOrder ==
1491                             Enums::MEMORY_ORDER_SC_ACQUIRE)) {
1492                            // if this atomic has acquire semantics,
1493                            // schedule the continuation to perform an
1494                            // acquire after the RMW completes
1495                            gpuDynInst->execContinuation =
1496                                &GPUStaticInst::execAtomicAcq;
1497
1498                            gpuDynInst->useContinuation = true;
1499                        } else {
1500                            // the request will be finished when the RMW completes
1501                            gpuDynInst->useContinuation = false;
1502                        }
1503                        // translation is performed in sendRequest()
1504                        gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
1505                                                               pkt);
1506                    }
1507                }
1508
1509                ++d;
1510                ++e;
1511                ++f;
1512            }
1513
1514            gpuDynInst->updateStats();
1515        }
1516
1517        // execAtomicACq will always be called through a continuation.
1518        // see comment for execContinuation in gpu_dyn_inst.hh
1519        void
1520        execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1521        {
1522            // after performing the RMW, check to see if this instruction
1523            // has acquire semantics, and if so, issue an acquire
1524            if (!isLocalMem()) {
1525                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1526                     && gpuDynInst->memoryOrder ==
1527                     Enums::MEMORY_ORDER_SC_ACQUIRE) {
1528                    gpuDynInst->statusBitVector = VectorMask(1);
1529
1530                    // the request will be finished when
1531                    // the acquire completes
1532                    gpuDynInst->useContinuation = false;
1533                    // create request
1534                    Request *req = new Request(0, 0, 0, 0,
1535                                  gpuDynInst->computeUnit()->masterId(),
1536                                  0, gpuDynInst->wfDynId);
1537                    req->setFlags(Request::ACQUIRE);
1538                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1539                }
1540            }
1541        }
1542    };
1543
1544    template<typename DataType, typename AddrOperandType, int NumSrcOperands>
1545    GPUStaticInst*
1546    constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1547    {
1548        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1549
1550        if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
1551            return decodeLd<DataType>(ib, obj);
1552        } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
1553            switch (ib->type) {
1554              case Brig::BRIG_TYPE_B8:
1555                return decodeSt<S8,S8>(ib, obj);
1556              case Brig::BRIG_TYPE_B16:
1557                return decodeSt<S8,S16>(ib, obj);
1558              case Brig::BRIG_TYPE_B32:
1559                return decodeSt<S8,S32>(ib, obj);
1560              case Brig::BRIG_TYPE_B64:
1561                return decodeSt<S8,S64>(ib, obj);
1562              default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
1563            }
1564        } else {
1565            if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
1566                return new AtomicInst<DataType, AddrOperandType,
1567                    NumSrcOperands, false>(ib, obj, "atomicnoret");
1568            else
1569                return new AtomicInst<DataType, AddrOperandType,
1570                    NumSrcOperands, true>(ib, obj, "atomic");
1571        }
1572    }
1573
1574    template<typename DataType, int NumSrcOperands>
1575    GPUStaticInst*
1576    decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
1577    {
1578        unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
1579            Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
1580
1581        unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
1582
1583        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1584
1585        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1586            return constructAtomic<DataType, NoRegAddrOperand,
1587                                   NumSrcOperands>(ib, obj);
1588        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1589            // V2/V4 not allowed
1590            switch (tmp.regKind) {
1591              case Brig::BRIG_REGISTER_KIND_SINGLE:
1592                  return constructAtomic<DataType, SRegAddrOperand,
1593                                         NumSrcOperands>(ib, obj);
1594              case Brig::BRIG_REGISTER_KIND_DOUBLE:
1595                return constructAtomic<DataType, DRegAddrOperand,
1596                                       NumSrcOperands>(ib, obj);
1597              default:
1598                fatal("Bad atomic register operand type %d\n", tmp.type);
1599            }
1600        } else {
1601            fatal("Bad atomic register operand kind %d\n", tmp.kind);
1602        }
1603    }
1604
1605
1606    template<typename DataType>
1607    GPUStaticInst*
1608    decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1609    {
1610        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1611
1612        if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1613            return decodeAtomicHelper<DataType, 2>(ib, obj);
1614        } else {
1615            return decodeAtomicHelper<DataType, 1>(ib, obj);
1616        }
1617    }
1618
1619    template<typename DataType>
1620    GPUStaticInst*
1621    decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
1622    {
1623        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1624        if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1625            return decodeAtomicHelper<DataType, 2>(ib, obj);
1626        } else {
1627            return decodeAtomicHelper<DataType, 1>(ib, obj);
1628        }
1629    }
1630} // namespace HsailISA
1631
1632#endif // __ARCH_HSAIL_INSTS_MEM_HH__
1633