mem.hh revision 11692:e772fdcd3809
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_MEM_HH__
37#define __ARCH_HSAIL_INSTS_MEM_HH__
38
39#include "arch/hsail/insts/decl.hh"
40#include "arch/hsail/insts/gpu_static_inst.hh"
41#include "arch/hsail/operand.hh"
42
43namespace HsailISA
44{
45    class MemInst
46    {
47      public:
48        MemInst() : size(0), addr_operand(nullptr) { }
49
50        MemInst(Enums::MemType m_type)
51        {
52            if (m_type == Enums::M_U64 ||
53                m_type == Enums::M_S64 ||
54                m_type == Enums::M_F64) {
55                size = 8;
56            } else if (m_type == Enums::M_U32 ||
57                       m_type == Enums::M_S32 ||
58                       m_type == Enums::M_F32) {
59                size = 4;
60            } else if (m_type == Enums::M_U16 ||
61                       m_type == Enums::M_S16 ||
62                       m_type == Enums::M_F16) {
63                size = 2;
64            } else {
65                size = 1;
66            }
67
68            addr_operand = nullptr;
69        }
70
71        void
72        init_addr(AddrOperandBase *_addr_operand)
73        {
74            addr_operand = _addr_operand;
75        }
76
77      private:
78        int size;
79        AddrOperandBase *addr_operand;
80
81      public:
82        int getMemOperandSize() { return size; }
83        AddrOperandBase *getAddressOperand() { return addr_operand; }
84    };
85
86    template<typename DestOperandType, typename AddrOperandType>
87    class LdaInstBase : public HsailGPUStaticInst
88    {
89      public:
90        typename DestOperandType::DestOperand dest;
91        AddrOperandType addr;
92
93        LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
94                    const char *_opcode)
95           : HsailGPUStaticInst(obj, _opcode)
96        {
97            using namespace Brig;
98
99            setFlag(ALU);
100
101            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
102            dest.init(op_offs, obj);
103            op_offs = obj->getOperandPtr(ib->operands, 1);
104            addr.init(op_offs, obj);
105        }
106
107        int numSrcRegOperands() override
108        { return(this->addr.isVectorRegister()); }
109        int numDstRegOperands() override
110        { return dest.isVectorRegister(); }
111        bool isVectorRegister(int operandIndex) override
112        {
113            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
114            return((operandIndex == 0) ? dest.isVectorRegister() :
115                   this->addr.isVectorRegister());
116        }
117        bool isCondRegister(int operandIndex) override
118        {
119            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
120            return((operandIndex == 0) ? dest.isCondRegister() :
121                   this->addr.isCondRegister());
122        }
123        bool isScalarRegister(int operandIndex) override
124        {
125            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
126            return((operandIndex == 0) ? dest.isScalarRegister() :
127                   this->addr.isScalarRegister());
128        }
129        bool isSrcOperand(int operandIndex) override
130        {
131            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
132            if (operandIndex > 0)
133                return(this->addr.isVectorRegister());
134            return false;
135        }
136        bool isDstOperand(int operandIndex) override {
137            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
138            return(operandIndex == 0);
139        }
140        int getOperandSize(int operandIndex) override
141        {
142            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
143            return((operandIndex == 0) ? dest.opSize() :
144                   this->addr.opSize());
145        }
146        int getRegisterIndex(int operandIndex) override
147        {
148            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
149            return((operandIndex == 0) ? dest.regIndex() :
150                   this->addr.regIndex());
151        }
152        int getNumOperands() override
153        {
154            if (this->addr.isVectorRegister())
155                return 2;
156            return 1;
157        }
158    };
159
160    template<typename DestDataType, typename AddrOperandType>
161    class LdaInst :
162        public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
163        public MemInst
164    {
165      public:
166        void generateDisassembly();
167
168        LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
169                        const char *_opcode)
170            : LdaInstBase<typename DestDataType::OperandType,
171                          AddrOperandType>(ib, obj, _opcode)
172        {
173            init_addr(&this->addr);
174        }
175
176        void execute(GPUDynInstPtr gpuDynInst);
177    };
178
179    template<typename DataType>
180    GPUStaticInst*
181    decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
182    {
183        unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
184        BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
185
186        if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
187            return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
188        } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
189            // V2/V4 not allowed
190            switch (regDataType.regKind) {
191              case Brig::BRIG_REGISTER_KIND_SINGLE:
192                return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
193              case Brig::BRIG_REGISTER_KIND_DOUBLE:
194                return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
195              default:
196                fatal("Bad ldas register operand type %d\n", regDataType.type);
197            }
198        } else {
199            fatal("Bad ldas register operand kind %d\n", regDataType.kind);
200        }
201    }
202
203    template<typename MemOperandType, typename DestOperandType,
204             typename AddrOperandType>
205    class LdInstBase : public HsailGPUStaticInst
206    {
207      public:
208        Brig::BrigWidth8_t width;
209        typename DestOperandType::DestOperand dest;
210        AddrOperandType addr;
211
212        Brig::BrigSegment segment;
213        Brig::BrigMemoryOrder memoryOrder;
214        Brig::BrigMemoryScope memoryScope;
215        unsigned int equivClass;
216
217        LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
218                   const char *_opcode)
219           : HsailGPUStaticInst(obj, _opcode)
220        {
221            using namespace Brig;
222
223            setFlag(MemoryRef);
224            setFlag(Load);
225
226            if (ib->opcode == BRIG_OPCODE_LD) {
227                const BrigInstMem *ldst = (const BrigInstMem*)ib;
228
229                segment = (BrigSegment)ldst->segment;
230                memoryOrder = BRIG_MEMORY_ORDER_NONE;
231                memoryScope = BRIG_MEMORY_SCOPE_NONE;
232                equivClass = ldst->equivClass;
233
234                width = ldst->width;
235                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
236                const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
237                if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
238                    dest.init(op_offs, obj);
239
240                op_offs = obj->getOperandPtr(ib->operands, 1);
241                addr.init(op_offs, obj);
242            } else {
243                const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
244
245                segment = (BrigSegment)at->segment;
246                memoryOrder = (BrigMemoryOrder)at->memoryOrder;
247                memoryScope = (BrigMemoryScope)at->memoryScope;
248                equivClass = 0;
249
250                width = BRIG_WIDTH_1;
251                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
252                const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
253
254                if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
255                    dest.init(op_offs, obj);
256
257                op_offs = obj->getOperandPtr(ib->operands,1);
258                addr.init(op_offs, obj);
259            }
260
261            switch (memoryOrder) {
262              case BRIG_MEMORY_ORDER_NONE:
263                setFlag(NoOrder);
264                break;
265              case BRIG_MEMORY_ORDER_RELAXED:
266                setFlag(RelaxedOrder);
267                break;
268              case BRIG_MEMORY_ORDER_SC_ACQUIRE:
269                setFlag(Acquire);
270                break;
271              case BRIG_MEMORY_ORDER_SC_RELEASE:
272                setFlag(Release);
273                break;
274              case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
275                setFlag(AcquireRelease);
276                break;
277              default:
278                fatal("LdInst has bad memory order type\n");
279            }
280
281            switch (memoryScope) {
282              case BRIG_MEMORY_SCOPE_NONE:
283                setFlag(NoScope);
284                break;
285              case BRIG_MEMORY_SCOPE_WORKITEM:
286                setFlag(WorkitemScope);
287                break;
288              case BRIG_MEMORY_SCOPE_WORKGROUP:
289                setFlag(WorkgroupScope);
290                break;
291              case BRIG_MEMORY_SCOPE_AGENT:
292                setFlag(DeviceScope);
293                break;
294              case BRIG_MEMORY_SCOPE_SYSTEM:
295                setFlag(SystemScope);
296                break;
297              default:
298                fatal("LdInst has bad memory scope type\n");
299            }
300
301            switch (segment) {
302              case BRIG_SEGMENT_GLOBAL:
303                setFlag(GlobalSegment);
304                break;
305              case BRIG_SEGMENT_GROUP:
306                setFlag(GroupSegment);
307                break;
308              case BRIG_SEGMENT_PRIVATE:
309                setFlag(PrivateSegment);
310                break;
311              case BRIG_SEGMENT_READONLY:
312                setFlag(ReadOnlySegment);
313                break;
314              case BRIG_SEGMENT_SPILL:
315                setFlag(SpillSegment);
316                break;
317              case BRIG_SEGMENT_FLAT:
318                setFlag(Flat);
319                break;
320              case BRIG_SEGMENT_KERNARG:
321                setFlag(KernArgSegment);
322                break;
323              case BRIG_SEGMENT_ARG:
324                setFlag(ArgSegment);
325                break;
326              default:
327                panic("Ld: segment %d not supported\n", segment);
328            }
329        }
330
331        int numSrcRegOperands() override
332        { return(this->addr.isVectorRegister()); }
333        int numDstRegOperands() override { return dest.isVectorRegister(); }
334        int getNumOperands() override
335        {
336            if (this->addr.isVectorRegister())
337                return 2;
338            else
339                return 1;
340        }
341        bool isVectorRegister(int operandIndex) override
342        {
343            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
344            return((operandIndex == 0) ? dest.isVectorRegister() :
345                   this->addr.isVectorRegister());
346        }
347        bool isCondRegister(int operandIndex) override
348        {
349            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
350            return((operandIndex == 0) ? dest.isCondRegister() :
351                   this->addr.isCondRegister());
352        }
353        bool isScalarRegister(int operandIndex) override
354        {
355            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
356            return((operandIndex == 0) ? dest.isScalarRegister() :
357                   this->addr.isScalarRegister());
358        }
359        bool isSrcOperand(int operandIndex) override
360        {
361            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
362            if (operandIndex > 0)
363                return(this->addr.isVectorRegister());
364            return false;
365        }
366        bool isDstOperand(int operandIndex) override
367        {
368            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
369            return(operandIndex == 0);
370        }
371        int getOperandSize(int operandIndex) override
372        {
373            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
374            return((operandIndex == 0) ? dest.opSize() :
375                   this->addr.opSize());
376        }
377        int getRegisterIndex(int operandIndex) override
378        {
379            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
380            return((operandIndex == 0) ? dest.regIndex() :
381                   this->addr.regIndex());
382        }
383    };
384
385    template<typename MemDataType, typename DestDataType,
386             typename AddrOperandType>
387    class LdInst :
388        public LdInstBase<typename MemDataType::CType,
389                          typename DestDataType::OperandType, AddrOperandType>,
390        public MemInst
391    {
392        typename DestDataType::OperandType::DestOperand dest_vect[4];
393        uint16_t num_dest_operands;
394        void generateDisassembly() override;
395
396      public:
397        LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
398               const char *_opcode)
399            : LdInstBase<typename MemDataType::CType,
400                         typename DestDataType::OperandType,
401                         AddrOperandType>(ib, obj, _opcode),
402              MemInst(MemDataType::memType)
403        {
404            init_addr(&this->addr);
405
406            unsigned op_offs = obj->getOperandPtr(ib->operands,0);
407            const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
408
409            if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
410                const Brig::BrigOperandOperandList *brigRegVecOp =
411                    (const Brig::BrigOperandOperandList*)brigOp;
412
413                num_dest_operands =
414                    *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
415
416                assert(num_dest_operands <= 4);
417            } else {
418                num_dest_operands = 1;
419            }
420
421            if (num_dest_operands > 1) {
422                assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
423
424                for (int i = 0; i < num_dest_operands; ++i) {
425                    dest_vect[i].init_from_vect(op_offs, obj, i);
426                }
427            }
428        }
429
430        void
431        initiateAcc(GPUDynInstPtr gpuDynInst) override
432        {
433            typedef typename MemDataType::CType c0;
434
435            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
436
437            if (num_dest_operands > 1) {
438                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
439                    if (gpuDynInst->exec_mask[i])
440                        gpuDynInst->statusVector.push_back(num_dest_operands);
441                    else
442                        gpuDynInst->statusVector.push_back(0);
443            }
444
445            for (int k = 0; k < num_dest_operands; ++k) {
446
447                c0 *d = &((c0*)gpuDynInst->d_data)
448                    [k * gpuDynInst->computeUnit()->wfSize()];
449
450                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
451                    if (gpuDynInst->exec_mask[i]) {
452                        Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
453
454                        if (this->isLocalMem()) {
455                            // load from shared memory
456                            *d = gpuDynInst->wavefront()->ldsChunk->
457                                read<c0>(vaddr);
458                        } else {
459                            Request *req = new Request(0, vaddr, sizeof(c0), 0,
460                                          gpuDynInst->computeUnit()->masterId(),
461                                          0, gpuDynInst->wfDynId);
462
463                            gpuDynInst->setRequestFlags(req);
464                            PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
465                            pkt->dataStatic(d);
466
467                            if (gpuDynInst->computeUnit()->shader->
468                                separate_acquire_release &&
469                                gpuDynInst->isAcquire()) {
470                                // if this load has acquire semantics,
471                                // set the response continuation function
472                                // to perform an Acquire request
473                                gpuDynInst->execContinuation =
474                                    &GPUStaticInst::execLdAcq;
475
476                                gpuDynInst->useContinuation = true;
477                            } else {
478                                // the request will be finished when
479                                // the load completes
480                                gpuDynInst->useContinuation = false;
481                            }
482                            // translation is performed in sendRequest()
483                            gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
484                                                                   i, pkt);
485                        }
486                    }
487                    ++d;
488                }
489            }
490
491            gpuDynInst->updateStats();
492        }
493
494      private:
495        void
496        execLdAcq(GPUDynInstPtr gpuDynInst) override
497        {
498            // after the load has complete and if the load has acquire
499            // semantics, issue an acquire request.
500            if (!this->isLocalMem()) {
501                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
502                    && gpuDynInst->isAcquire()) {
503                    gpuDynInst->statusBitVector = VectorMask(1);
504                    gpuDynInst->useContinuation = false;
505                    // create request
506                    Request *req = new Request(0, 0, 0, 0,
507                                  gpuDynInst->computeUnit()->masterId(),
508                                  0, gpuDynInst->wfDynId);
509                    req->setFlags(Request::ACQUIRE);
510                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
511                }
512            }
513        }
514
515      public:
516        bool isVectorRegister(int operandIndex) override
517        {
518            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
519            if ((num_dest_operands != getNumOperands()) &&
520                (operandIndex == (getNumOperands()-1)))
521                return(this->addr.isVectorRegister());
522            if (num_dest_operands > 1) {
523                return dest_vect[operandIndex].isVectorRegister();
524            }
525            else if (num_dest_operands == 1) {
526                return LdInstBase<typename MemDataType::CType,
527                       typename DestDataType::OperandType,
528                       AddrOperandType>::dest.isVectorRegister();
529            }
530            return false;
531        }
532        bool isCondRegister(int operandIndex) override
533        {
534            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
535            if ((num_dest_operands != getNumOperands()) &&
536                (operandIndex == (getNumOperands()-1)))
537                return(this->addr.isCondRegister());
538            if (num_dest_operands > 1)
539                return dest_vect[operandIndex].isCondRegister();
540            else if (num_dest_operands == 1)
541                return LdInstBase<typename MemDataType::CType,
542                       typename DestDataType::OperandType,
543                       AddrOperandType>::dest.isCondRegister();
544            return false;
545        }
546        bool isScalarRegister(int operandIndex) override
547        {
548            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
549            if ((num_dest_operands != getNumOperands()) &&
550                (operandIndex == (getNumOperands()-1)))
551                return(this->addr.isScalarRegister());
552            if (num_dest_operands > 1)
553                return dest_vect[operandIndex].isScalarRegister();
554            else if (num_dest_operands == 1)
555                return LdInstBase<typename MemDataType::CType,
556                       typename DestDataType::OperandType,
557                       AddrOperandType>::dest.isScalarRegister();
558            return false;
559        }
560        bool isSrcOperand(int operandIndex) override
561        {
562            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
563            if ((num_dest_operands != getNumOperands()) &&
564                (operandIndex == (getNumOperands()-1)))
565                return(this->addr.isVectorRegister());
566            return false;
567        }
568        bool isDstOperand(int operandIndex) override
569        {
570            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
571            if ((num_dest_operands != getNumOperands()) &&
572                (operandIndex == (getNumOperands()-1)))
573                return false;
574            return true;
575        }
576        int getOperandSize(int operandIndex) override
577        {
578            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
579            if ((num_dest_operands != getNumOperands()) &&
580                (operandIndex == (getNumOperands()-1)))
581                return(this->addr.opSize());
582            if (num_dest_operands > 1)
583                return(dest_vect[operandIndex].opSize());
584            else if (num_dest_operands == 1)
585                return(LdInstBase<typename MemDataType::CType,
586                       typename DestDataType::OperandType,
587                       AddrOperandType>::dest.opSize());
588            return 0;
589        }
590        int getRegisterIndex(int operandIndex) override
591        {
592            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
593            if ((num_dest_operands != getNumOperands()) &&
594                (operandIndex == (getNumOperands()-1)))
595                return(this->addr.regIndex());
596            if (num_dest_operands > 1)
597                return(dest_vect[operandIndex].regIndex());
598            else if (num_dest_operands == 1)
599                return(LdInstBase<typename MemDataType::CType,
600                       typename DestDataType::OperandType,
601                       AddrOperandType>::dest.regIndex());
602            return -1;
603        }
604        int getNumOperands() override
605        {
606            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
607                return(num_dest_operands+1);
608            else
609                return(num_dest_operands);
610        }
611        void execute(GPUDynInstPtr gpuDynInst) override;
612    };
613
614    template<typename MemDT, typename DestDT>
615    GPUStaticInst*
616    decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
617    {
618        unsigned op_offs = obj->getOperandPtr(ib->operands,1);
619        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
620
621        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
622            return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
623        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
624                   tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
625            switch (tmp.regKind) {
626              case Brig::BRIG_REGISTER_KIND_SINGLE:
627                return new LdInst<MemDT, DestDT,
628                                  SRegAddrOperand>(ib, obj, "ld");
629              case Brig::BRIG_REGISTER_KIND_DOUBLE:
630                return new LdInst<MemDT, DestDT,
631                                  DRegAddrOperand>(ib, obj, "ld");
632              default:
633                fatal("Bad ld register operand type %d\n", tmp.regKind);
634            }
635        } else {
636            fatal("Bad ld register operand kind %d\n", tmp.kind);
637        }
638    }
639
640    template<typename MemDT>
641    GPUStaticInst*
642    decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
643    {
644        unsigned op_offs = obj->getOperandPtr(ib->operands,0);
645        BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
646
647        assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
648               dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
649        switch(dest.regKind) {
650          case Brig::BRIG_REGISTER_KIND_SINGLE:
651            switch (ib->type) {
652              case Brig::BRIG_TYPE_B8:
653              case Brig::BRIG_TYPE_B16:
654              case Brig::BRIG_TYPE_B32:
655                return decodeLd2<MemDT, B32>(ib, obj);
656              case Brig::BRIG_TYPE_U8:
657              case Brig::BRIG_TYPE_U16:
658              case Brig::BRIG_TYPE_U32:
659                return decodeLd2<MemDT, U32>(ib, obj);
660              case Brig::BRIG_TYPE_S8:
661              case Brig::BRIG_TYPE_S16:
662              case Brig::BRIG_TYPE_S32:
663                return decodeLd2<MemDT, S32>(ib, obj);
664              case Brig::BRIG_TYPE_F16:
665              case Brig::BRIG_TYPE_F32:
666                return decodeLd2<MemDT, U32>(ib, obj);
667              default:
668                fatal("Bad ld register operand type %d, %d\n",
669                      dest.regKind, ib->type);
670            };
671          case Brig::BRIG_REGISTER_KIND_DOUBLE:
672            switch (ib->type) {
673              case Brig::BRIG_TYPE_B64:
674                return decodeLd2<MemDT, B64>(ib, obj);
675              case Brig::BRIG_TYPE_U64:
676                return decodeLd2<MemDT, U64>(ib, obj);
677              case Brig::BRIG_TYPE_S64:
678                return decodeLd2<MemDT, S64>(ib, obj);
679              case Brig::BRIG_TYPE_F64:
680                return decodeLd2<MemDT, U64>(ib, obj);
681              default:
682                fatal("Bad ld register operand type %d, %d\n",
683                      dest.regKind, ib->type);
684            };
685          default:
686            fatal("Bad ld register operand type %d, %d\n", dest.regKind,
687                  ib->type);
688        }
689    }
690
691    template<typename MemDataType, typename SrcOperandType,
692             typename AddrOperandType>
693    class StInstBase : public HsailGPUStaticInst
694    {
695      public:
696        typename SrcOperandType::SrcOperand src;
697        AddrOperandType addr;
698
699        Brig::BrigSegment segment;
700        Brig::BrigMemoryScope memoryScope;
701        Brig::BrigMemoryOrder memoryOrder;
702        unsigned int equivClass;
703
704        StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
705                   const char *_opcode)
706           : HsailGPUStaticInst(obj, _opcode)
707        {
708            using namespace Brig;
709
710            setFlag(MemoryRef);
711            setFlag(Store);
712
713            if (ib->opcode == BRIG_OPCODE_ST) {
714                const BrigInstMem *ldst = (const BrigInstMem*)ib;
715
716                segment = (BrigSegment)ldst->segment;
717                memoryOrder = BRIG_MEMORY_ORDER_NONE;
718                memoryScope = BRIG_MEMORY_SCOPE_NONE;
719                equivClass = ldst->equivClass;
720
721                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
722                const BrigOperand *baseOp = obj->getOperand(op_offs);
723
724                if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
725                    (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
726                    src.init(op_offs, obj);
727                }
728
729                op_offs = obj->getOperandPtr(ib->operands, 1);
730                addr.init(op_offs, obj);
731            } else {
732                const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
733
734                segment = (BrigSegment)at->segment;
735                memoryScope = (BrigMemoryScope)at->memoryScope;
736                memoryOrder = (BrigMemoryOrder)at->memoryOrder;
737                equivClass = 0;
738
739                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
740                addr.init(op_offs, obj);
741
742                op_offs = obj->getOperandPtr(ib->operands, 1);
743                src.init(op_offs, obj);
744            }
745
746            switch (memoryOrder) {
747              case BRIG_MEMORY_ORDER_NONE:
748                setFlag(NoOrder);
749                break;
750              case BRIG_MEMORY_ORDER_RELAXED:
751                setFlag(RelaxedOrder);
752                break;
753              case BRIG_MEMORY_ORDER_SC_ACQUIRE:
754                setFlag(Acquire);
755                break;
756              case BRIG_MEMORY_ORDER_SC_RELEASE:
757                setFlag(Release);
758                break;
759              case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
760                setFlag(AcquireRelease);
761                break;
762              default:
763                fatal("StInst has bad memory order type\n");
764            }
765
766            switch (memoryScope) {
767              case BRIG_MEMORY_SCOPE_NONE:
768                setFlag(NoScope);
769                break;
770              case BRIG_MEMORY_SCOPE_WORKITEM:
771                setFlag(WorkitemScope);
772                break;
773              case BRIG_MEMORY_SCOPE_WORKGROUP:
774                setFlag(WorkgroupScope);
775                break;
776              case BRIG_MEMORY_SCOPE_AGENT:
777                setFlag(DeviceScope);
778                break;
779              case BRIG_MEMORY_SCOPE_SYSTEM:
780                setFlag(SystemScope);
781                break;
782              default:
783                fatal("StInst has bad memory scope type\n");
784            }
785
786            switch (segment) {
787              case BRIG_SEGMENT_GLOBAL:
788                setFlag(GlobalSegment);
789                break;
790              case BRIG_SEGMENT_GROUP:
791                setFlag(GroupSegment);
792                break;
793              case BRIG_SEGMENT_PRIVATE:
794                setFlag(PrivateSegment);
795                break;
796              case BRIG_SEGMENT_READONLY:
797                setFlag(ReadOnlySegment);
798                break;
799              case BRIG_SEGMENT_SPILL:
800                setFlag(SpillSegment);
801                break;
802              case BRIG_SEGMENT_FLAT:
803                setFlag(Flat);
804                break;
805              case BRIG_SEGMENT_ARG:
806                setFlag(ArgSegment);
807                break;
808              default:
809                panic("St: segment %d not supported\n", segment);
810            }
811        }
812
813        int numDstRegOperands() override { return 0; }
814        int numSrcRegOperands() override
815        {
816            return src.isVectorRegister() + this->addr.isVectorRegister();
817        }
818        int getNumOperands() override
819        {
820            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
821                return 2;
822            else
823                return 1;
824        }
825        bool isVectorRegister(int operandIndex) override
826        {
827            assert(operandIndex >= 0 && operandIndex < getNumOperands());
828            return !operandIndex ? src.isVectorRegister() :
829                   this->addr.isVectorRegister();
830        }
831        bool isCondRegister(int operandIndex) override
832        {
833            assert(operandIndex >= 0 && operandIndex < getNumOperands());
834            return !operandIndex ? src.isCondRegister() :
835                   this->addr.isCondRegister();
836        }
837        bool isScalarRegister(int operandIndex) override
838        {
839            assert(operandIndex >= 0 && operandIndex < getNumOperands());
840            return !operandIndex ? src.isScalarRegister() :
841                   this->addr.isScalarRegister();
842        }
843        bool isSrcOperand(int operandIndex) override
844        {
845            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
846            return true;
847        }
848        bool isDstOperand(int operandIndex) override { return false; }
849        int getOperandSize(int operandIndex) override
850        {
851            assert(operandIndex >= 0 && operandIndex < getNumOperands());
852            return !operandIndex ? src.opSize() : this->addr.opSize();
853        }
854        int getRegisterIndex(int operandIndex) override
855        {
856            assert(operandIndex >= 0 && operandIndex < getNumOperands());
857            return !operandIndex ? src.regIndex() : this->addr.regIndex();
858        }
859    };
860
861
862    template<typename MemDataType, typename SrcDataType,
863             typename AddrOperandType>
864    class StInst :
865        public StInstBase<MemDataType, typename SrcDataType::OperandType,
866                          AddrOperandType>,
867        public MemInst
868    {
869      public:
870        typename SrcDataType::OperandType::SrcOperand src_vect[4];
871        uint16_t num_src_operands;
872        void generateDisassembly() override;
873
874        StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
875                        const char *_opcode, int srcIdx)
876            : StInstBase<MemDataType, typename SrcDataType::OperandType,
877                         AddrOperandType>(ib, obj, _opcode),
878              MemInst(SrcDataType::memType)
879        {
880            init_addr(&this->addr);
881
882            BrigRegOperandInfo rinfo;
883            unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
884            const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
885
886            if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
887                const Brig::BrigOperandConstantBytes *op =
888                    (Brig::BrigOperandConstantBytes*)baseOp;
889
890                rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
891                                           Brig::BRIG_TYPE_NONE);
892            } else {
893                rinfo = findRegDataType(op_offs, obj);
894            }
895
896            if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
897                const Brig::BrigOperandOperandList *brigRegVecOp =
898                    (const Brig::BrigOperandOperandList*)baseOp;
899
900                num_src_operands =
901                    *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
902
903                assert(num_src_operands <= 4);
904            } else {
905                num_src_operands = 1;
906            }
907
908            if (num_src_operands > 1) {
909                assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
910
911                for (int i = 0; i < num_src_operands; ++i) {
912                    src_vect[i].init_from_vect(op_offs, obj, i);
913                }
914            }
915        }
916
917        void
918        initiateAcc(GPUDynInstPtr gpuDynInst) override
919        {
920            // before performing a store, check if this store has
921            // release semantics, and if so issue a release first
922            if (!this->isLocalMem()) {
923                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
924                    && gpuDynInst->isRelease()) {
925
926                    gpuDynInst->statusBitVector = VectorMask(1);
927                    gpuDynInst->execContinuation = &GPUStaticInst::execSt;
928                    gpuDynInst->useContinuation = true;
929                    // create request
930                    Request *req = new Request(0, 0, 0, 0,
931                                  gpuDynInst->computeUnit()->masterId(),
932                                  0, gpuDynInst->wfDynId);
933                    req->setFlags(Request::RELEASE);
934                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
935
936                    return;
937                }
938            }
939
940            // if there is no release semantic, perform stores immediately
941            execSt(gpuDynInst);
942        }
943
944      private:
945        // execSt may be called through a continuation
946        // if the store had release semantics. see comment for
947        // execSt in gpu_static_inst.hh
948        void
949        execSt(GPUDynInstPtr gpuDynInst) override
950        {
951            typedef typename MemDataType::CType c0;
952
953            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
954
955            if (num_src_operands > 1) {
956                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
957                    if (gpuDynInst->exec_mask[i])
958                        gpuDynInst->statusVector.push_back(num_src_operands);
959                    else
960                        gpuDynInst->statusVector.push_back(0);
961            }
962
963            for (int k = 0; k < num_src_operands; ++k) {
964                c0 *d = &((c0*)gpuDynInst->d_data)
965                    [k * gpuDynInst->computeUnit()->wfSize()];
966
967                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
968                    if (gpuDynInst->exec_mask[i]) {
969                        Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
970
971                        if (this->isLocalMem()) {
972                            //store to shared memory
973                            gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
974                                                                         *d);
975                        } else {
976                            Request *req =
977                              new Request(0, vaddr, sizeof(c0), 0,
978                                          gpuDynInst->computeUnit()->masterId(),
979                                          0, gpuDynInst->wfDynId);
980
981                            gpuDynInst->setRequestFlags(req);
982                            PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
983                            pkt->dataStatic<c0>(d);
984
985                            // translation is performed in sendRequest()
986                            // the request will be finished when the store completes
987                            gpuDynInst->useContinuation = false;
988                            gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
989                                                                   i, pkt);
990
991                        }
992                    }
993                    ++d;
994                }
995            }
996
997            gpuDynInst->updateStats();
998        }
999
1000      public:
1001        bool isVectorRegister(int operandIndex) override
1002        {
1003            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1004            if (operandIndex == num_src_operands)
1005                return this->addr.isVectorRegister();
1006            if (num_src_operands > 1)
1007                return src_vect[operandIndex].isVectorRegister();
1008            else if (num_src_operands == 1)
1009                return StInstBase<MemDataType,
1010                       typename SrcDataType::OperandType,
1011                       AddrOperandType>::src.isVectorRegister();
1012            return false;
1013        }
1014        bool isCondRegister(int operandIndex) override
1015        {
1016            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1017            if (operandIndex == num_src_operands)
1018                return this->addr.isCondRegister();
1019            if (num_src_operands > 1)
1020                return src_vect[operandIndex].isCondRegister();
1021            else if (num_src_operands == 1)
1022                return StInstBase<MemDataType,
1023                       typename SrcDataType::OperandType,
1024                       AddrOperandType>::src.isCondRegister();
1025            return false;
1026        }
1027        bool isScalarRegister(int operandIndex) override
1028        {
1029            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1030            if (operandIndex == num_src_operands)
1031                return this->addr.isScalarRegister();
1032            if (num_src_operands > 1)
1033                return src_vect[operandIndex].isScalarRegister();
1034            else if (num_src_operands == 1)
1035                return StInstBase<MemDataType,
1036                       typename SrcDataType::OperandType,
1037                       AddrOperandType>::src.isScalarRegister();
1038            return false;
1039        }
1040        bool isSrcOperand(int operandIndex) override
1041        {
1042            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1043            return true;
1044        }
1045        bool isDstOperand(int operandIndex) override { return false; }
1046        int getOperandSize(int operandIndex) override
1047        {
1048            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1049            if (operandIndex == num_src_operands)
1050                return this->addr.opSize();
1051            if (num_src_operands > 1)
1052                return src_vect[operandIndex].opSize();
1053            else if (num_src_operands == 1)
1054                return StInstBase<MemDataType,
1055                       typename SrcDataType::OperandType,
1056                       AddrOperandType>::src.opSize();
1057            return 0;
1058        }
1059        int getRegisterIndex(int operandIndex) override
1060        {
1061            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1062            if (operandIndex == num_src_operands)
1063                return this->addr.regIndex();
1064            if (num_src_operands > 1)
1065                return src_vect[operandIndex].regIndex();
1066            else if (num_src_operands == 1)
1067                return StInstBase<MemDataType,
1068                       typename SrcDataType::OperandType,
1069                       AddrOperandType>::src.regIndex();
1070            return -1;
1071        }
1072        int getNumOperands() override
1073        {
1074            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
1075                return num_src_operands + 1;
1076            else
1077                return num_src_operands;
1078        }
1079        void execute(GPUDynInstPtr gpuDynInst) override;
1080    };
1081
1082    template<typename DataType, typename SrcDataType>
1083    GPUStaticInst*
1084    decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
1085    {
1086        int srcIdx = 0;
1087        int destIdx = 1;
1088        if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
1089            ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
1090            srcIdx = 1;
1091            destIdx = 0;
1092        }
1093        unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
1094
1095        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1096
1097        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1098            return new StInst<DataType, SrcDataType,
1099                              NoRegAddrOperand>(ib, obj, "st", srcIdx);
1100        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1101            // V2/V4 not allowed
1102            switch (tmp.regKind) {
1103              case Brig::BRIG_REGISTER_KIND_SINGLE:
1104                return new StInst<DataType, SrcDataType,
1105                                  SRegAddrOperand>(ib, obj, "st", srcIdx);
1106              case Brig::BRIG_REGISTER_KIND_DOUBLE:
1107                return new StInst<DataType, SrcDataType,
1108                                  DRegAddrOperand>(ib, obj, "st", srcIdx);
1109              default:
1110                fatal("Bad st register operand type %d\n", tmp.type);
1111            }
1112        } else {
1113            fatal("Bad st register operand kind %d\n", tmp.kind);
1114        }
1115    }
1116
1117    template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1118             bool HasDst>
1119    class AtomicInstBase : public HsailGPUStaticInst
1120    {
1121      public:
1122        typename OperandType::DestOperand dest;
1123        typename OperandType::SrcOperand src[NumSrcOperands];
1124        AddrOperandType addr;
1125
1126        Brig::BrigSegment segment;
1127        Brig::BrigMemoryOrder memoryOrder;
1128        Brig::BrigAtomicOperation atomicOperation;
1129        Brig::BrigMemoryScope memoryScope;
1130        Brig::BrigOpcode opcode;
1131
1132        AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
1133                       const char *_opcode)
1134           : HsailGPUStaticInst(obj, _opcode)
1135        {
1136            using namespace Brig;
1137
1138            const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1139
1140            segment = (BrigSegment)at->segment;
1141            memoryScope = (BrigMemoryScope)at->memoryScope;
1142            memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1143            atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1144            opcode = (BrigOpcode)ib->opcode;
1145
1146            assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
1147                   opcode == Brig::BRIG_OPCODE_ATOMIC);
1148
1149            setFlag(MemoryRef);
1150
1151            if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
1152                setFlag(AtomicReturn);
1153            } else {
1154                setFlag(AtomicNoReturn);
1155            }
1156
1157            switch (memoryOrder) {
1158              case BRIG_MEMORY_ORDER_NONE:
1159                setFlag(NoOrder);
1160                break;
1161              case BRIG_MEMORY_ORDER_RELAXED:
1162                setFlag(RelaxedOrder);
1163                break;
1164              case BRIG_MEMORY_ORDER_SC_ACQUIRE:
1165                setFlag(Acquire);
1166                break;
1167              case BRIG_MEMORY_ORDER_SC_RELEASE:
1168                setFlag(Release);
1169                break;
1170              case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
1171                setFlag(AcquireRelease);
1172                break;
1173              default:
1174                fatal("AtomicInst has bad memory order type\n");
1175            }
1176
1177            switch (memoryScope) {
1178              case BRIG_MEMORY_SCOPE_NONE:
1179                setFlag(NoScope);
1180                break;
1181              case BRIG_MEMORY_SCOPE_WORKITEM:
1182                setFlag(WorkitemScope);
1183                break;
1184              case BRIG_MEMORY_SCOPE_WORKGROUP:
1185                setFlag(WorkgroupScope);
1186                break;
1187              case BRIG_MEMORY_SCOPE_AGENT:
1188                setFlag(DeviceScope);
1189                break;
1190              case BRIG_MEMORY_SCOPE_SYSTEM:
1191                setFlag(SystemScope);
1192                break;
1193              default:
1194                fatal("AtomicInst has bad memory scope type\n");
1195            }
1196
1197            switch (atomicOperation) {
1198              case Brig::BRIG_ATOMIC_AND:
1199                setFlag(AtomicAnd);
1200                break;
1201              case Brig::BRIG_ATOMIC_OR:
1202                setFlag(AtomicOr);
1203                break;
1204              case Brig::BRIG_ATOMIC_XOR:
1205                setFlag(AtomicXor);
1206                break;
1207              case Brig::BRIG_ATOMIC_CAS:
1208                setFlag(AtomicCAS);
1209                break;
1210              case Brig::BRIG_ATOMIC_EXCH:
1211                setFlag(AtomicExch);
1212                break;
1213              case Brig::BRIG_ATOMIC_ADD:
1214                setFlag(AtomicAdd);
1215                break;
1216              case Brig::BRIG_ATOMIC_WRAPINC:
1217                setFlag(AtomicInc);
1218                break;
1219              case Brig::BRIG_ATOMIC_WRAPDEC:
1220                setFlag(AtomicDec);
1221                break;
1222              case Brig::BRIG_ATOMIC_MIN:
1223                setFlag(AtomicMin);
1224                break;
1225              case Brig::BRIG_ATOMIC_MAX:
1226                setFlag(AtomicMax);
1227                break;
1228              case Brig::BRIG_ATOMIC_SUB:
1229                setFlag(AtomicSub);
1230                break;
1231              default:
1232                fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
1233            }
1234
1235            switch (segment) {
1236              case BRIG_SEGMENT_GLOBAL:
1237                setFlag(GlobalSegment);
1238                break;
1239              case BRIG_SEGMENT_GROUP:
1240                setFlag(GroupSegment);
1241                break;
1242              case BRIG_SEGMENT_FLAT:
1243                setFlag(Flat);
1244                break;
1245              default:
1246                panic("Atomic: segment %d not supported\n", segment);
1247            }
1248
1249            if (HasDst) {
1250                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1251                dest.init(op_offs, obj);
1252
1253                op_offs = obj->getOperandPtr(ib->operands, 1);
1254                addr.init(op_offs, obj);
1255
1256                for (int i = 0; i < NumSrcOperands; ++i) {
1257                    op_offs = obj->getOperandPtr(ib->operands, i + 2);
1258                    src[i].init(op_offs, obj);
1259                }
1260            } else {
1261
1262                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1263                addr.init(op_offs, obj);
1264
1265                for (int i = 0; i < NumSrcOperands; ++i) {
1266                    op_offs = obj->getOperandPtr(ib->operands, i + 1);
1267                    src[i].init(op_offs, obj);
1268                }
1269            }
1270        }
1271
1272        int numSrcRegOperands()
1273        {
1274            int operands = 0;
1275            for (int i = 0; i < NumSrcOperands; i++) {
1276                if (src[i].isVectorRegister()) {
1277                    operands++;
1278                }
1279            }
1280            if (addr.isVectorRegister())
1281                operands++;
1282            return operands;
1283        }
1284        int numDstRegOperands() { return dest.isVectorRegister(); }
1285        int getNumOperands()
1286        {
1287            if (addr.isVectorRegister())
1288                return(NumSrcOperands + 2);
1289            return(NumSrcOperands + 1);
1290        }
1291        bool isVectorRegister(int operandIndex)
1292        {
1293            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1294            if (operandIndex < NumSrcOperands)
1295                return src[operandIndex].isVectorRegister();
1296            else if (operandIndex == NumSrcOperands)
1297                return(addr.isVectorRegister());
1298            else
1299                return dest.isVectorRegister();
1300        }
1301        bool isCondRegister(int operandIndex)
1302        {
1303            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1304            if (operandIndex < NumSrcOperands)
1305                return src[operandIndex].isCondRegister();
1306            else if (operandIndex == NumSrcOperands)
1307                return(addr.isCondRegister());
1308            else
1309                return dest.isCondRegister();
1310        }
1311        bool isScalarRegister(int operandIndex)
1312        {
1313            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1314            if (operandIndex < NumSrcOperands)
1315                return src[operandIndex].isScalarRegister();
1316            else if (operandIndex == NumSrcOperands)
1317                return(addr.isScalarRegister());
1318            else
1319                return dest.isScalarRegister();
1320        }
1321        bool isSrcOperand(int operandIndex)
1322        {
1323            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1324            if (operandIndex < NumSrcOperands)
1325                return true;
1326            else if (operandIndex == NumSrcOperands)
1327                return(addr.isVectorRegister());
1328            else
1329                return false;
1330        }
1331        bool isDstOperand(int operandIndex)
1332        {
1333            if (operandIndex <= NumSrcOperands)
1334                return false;
1335            else
1336                return true;
1337        }
1338        int getOperandSize(int operandIndex)
1339        {
1340            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1341            if (operandIndex < NumSrcOperands)
1342                return(src[operandIndex].opSize());
1343            else if (operandIndex == NumSrcOperands)
1344                return(addr.opSize());
1345            else
1346                return(dest.opSize());
1347        }
1348        int getRegisterIndex(int operandIndex)
1349        {
1350            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1351            if (operandIndex < NumSrcOperands)
1352                return(src[operandIndex].regIndex());
1353            else if (operandIndex == NumSrcOperands)
1354                return(addr.regIndex());
1355            else
1356                return(dest.regIndex());
1357            return -1;
1358        }
1359    };
1360
1361    template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
1362             bool HasDst>
1363    class AtomicInst :
1364        public AtomicInstBase<typename MemDataType::OperandType,
1365                              AddrOperandType, NumSrcOperands, HasDst>,
1366        public MemInst
1367    {
1368      public:
1369        void generateDisassembly() override;
1370
1371        AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
1372                   const char *_opcode)
1373            : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
1374                             NumSrcOperands, HasDst>
1375                (ib, obj, _opcode),
1376              MemInst(MemDataType::memType)
1377        {
1378            init_addr(&this->addr);
1379        }
1380
1381        void
1382        initiateAcc(GPUDynInstPtr gpuDynInst) override
1383        {
1384            // before doing the RMW, check if this atomic has
1385            // release semantics, and if so issue a release first
1386            if (!this->isLocalMem()) {
1387                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1388                    && (gpuDynInst->isRelease()
1389                    || gpuDynInst->isAcquireRelease())) {
1390
1391                    gpuDynInst->statusBitVector = VectorMask(1);
1392
1393                    gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1394                    gpuDynInst->useContinuation = true;
1395
1396                    // create request
1397                    Request *req = new Request(0, 0, 0, 0,
1398                                  gpuDynInst->computeUnit()->masterId(),
1399                                  0, gpuDynInst->wfDynId);
1400                    req->setFlags(Request::RELEASE);
1401                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1402
1403                    return;
1404                }
1405            }
1406
1407            // if there is no release semantic, execute the RMW immediately
1408            execAtomic(gpuDynInst);
1409
1410        }
1411
1412        void execute(GPUDynInstPtr gpuDynInst) override;
1413
1414      private:
1415        // execAtomic may be called through a continuation
1416        // if the RMW had release semantics. see comment for
1417        // execContinuation in gpu_dyn_inst.hh
1418        void
1419        execAtomic(GPUDynInstPtr gpuDynInst) override
1420        {
1421            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1422
1423            typedef typename MemDataType::CType c0;
1424
1425            c0 *d = &((c0*) gpuDynInst->d_data)[0];
1426            c0 *e = &((c0*) gpuDynInst->a_data)[0];
1427            c0 *f = &((c0*) gpuDynInst->x_data)[0];
1428
1429            for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1430                if (gpuDynInst->exec_mask[i]) {
1431                    Addr vaddr = gpuDynInst->addr[i];
1432
1433                    if (this->isLocalMem()) {
1434                        Wavefront *wavefront = gpuDynInst->wavefront();
1435                        *d = wavefront->ldsChunk->read<c0>(vaddr);
1436
1437                        if (this->isAtomicAdd()) {
1438                            wavefront->ldsChunk->write<c0>(vaddr,
1439                            wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1440                        } else if (this->isAtomicSub()) {
1441                            wavefront->ldsChunk->write<c0>(vaddr,
1442                            wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1443                        } else if (this->isAtomicMax()) {
1444                            wavefront->ldsChunk->write<c0>(vaddr,
1445                            std::max(wavefront->ldsChunk->read<c0>(vaddr),
1446                            (*e)));
1447                        } else if (this->isAtomicMin()) {
1448                            wavefront->ldsChunk->write<c0>(vaddr,
1449                            std::min(wavefront->ldsChunk->read<c0>(vaddr),
1450                            (*e)));
1451                        } else if (this->isAtomicAnd()) {
1452                            wavefront->ldsChunk->write<c0>(vaddr,
1453                            wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1454                        } else if (this->isAtomicOr()) {
1455                            wavefront->ldsChunk->write<c0>(vaddr,
1456                            wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1457                        } else if (this->isAtomicXor()) {
1458                            wavefront->ldsChunk->write<c0>(vaddr,
1459                            wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1460                        } else if (this->isAtomicInc()) {
1461                            wavefront->ldsChunk->write<c0>(vaddr,
1462                            wavefront->ldsChunk->read<c0>(vaddr) + 1);
1463                        } else if (this->isAtomicDec()) {
1464                            wavefront->ldsChunk->write<c0>(vaddr,
1465                            wavefront->ldsChunk->read<c0>(vaddr) - 1);
1466                        } else if (this->isAtomicExch()) {
1467                            wavefront->ldsChunk->write<c0>(vaddr, (*e));
1468                        } else if (this->isAtomicCAS()) {
1469                            wavefront->ldsChunk->write<c0>(vaddr,
1470                            (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1471                            (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1472                        } else {
1473                            fatal("Unrecognized or invalid HSAIL atomic op "
1474                                  "type.\n");
1475                        }
1476                    } else {
1477                        Request *req =
1478                            new Request(0, vaddr, sizeof(c0), 0,
1479                                        gpuDynInst->computeUnit()->masterId(),
1480                                        0, gpuDynInst->wfDynId,
1481                                        gpuDynInst->makeAtomicOpFunctor<c0>(e,
1482                                        f));
1483
1484                        gpuDynInst->setRequestFlags(req);
1485                        PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1486                        pkt->dataStatic(d);
1487
1488                        if (gpuDynInst->computeUnit()->shader->
1489                            separate_acquire_release &&
1490                            (gpuDynInst->isAcquire())) {
1491                            // if this atomic has acquire semantics,
1492                            // schedule the continuation to perform an
1493                            // acquire after the RMW completes
1494                            gpuDynInst->execContinuation =
1495                                &GPUStaticInst::execAtomicAcq;
1496
1497                            gpuDynInst->useContinuation = true;
1498                        } else {
1499                            // the request will be finished when the RMW completes
1500                            gpuDynInst->useContinuation = false;
1501                        }
1502                        // translation is performed in sendRequest()
1503                        gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
1504                                                               pkt);
1505                    }
1506                }
1507
1508                ++d;
1509                ++e;
1510                ++f;
1511            }
1512
1513            gpuDynInst->updateStats();
1514        }
1515
1516        // execAtomicACq will always be called through a continuation.
1517        // see comment for execContinuation in gpu_dyn_inst.hh
1518        void
1519        execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1520        {
1521            // after performing the RMW, check to see if this instruction
1522            // has acquire semantics, and if so, issue an acquire
1523            if (!this->isLocalMem()) {
1524                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1525                     && gpuDynInst->isAcquire()) {
1526                    gpuDynInst->statusBitVector = VectorMask(1);
1527
1528                    // the request will be finished when
1529                    // the acquire completes
1530                    gpuDynInst->useContinuation = false;
1531                    // create request
1532                    Request *req = new Request(0, 0, 0, 0,
1533                                  gpuDynInst->computeUnit()->masterId(),
1534                                  0, gpuDynInst->wfDynId);
1535                    req->setFlags(Request::ACQUIRE);
1536                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1537                }
1538            }
1539        }
1540    };
1541
1542    template<typename DataType, typename AddrOperandType, int NumSrcOperands>
1543    GPUStaticInst*
1544    constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1545    {
1546        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1547
1548        if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
1549            return decodeLd<DataType>(ib, obj);
1550        } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
1551            switch (ib->type) {
1552              case Brig::BRIG_TYPE_B8:
1553                return decodeSt<S8,S8>(ib, obj);
1554              case Brig::BRIG_TYPE_B16:
1555                return decodeSt<S16,S16>(ib, obj);
1556              case Brig::BRIG_TYPE_B32:
1557                return decodeSt<S32,S32>(ib, obj);
1558              case Brig::BRIG_TYPE_B64:
1559                return decodeSt<S64,S64>(ib, obj);
1560              default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
1561            }
1562        } else {
1563            if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
1564                return new AtomicInst<DataType, AddrOperandType,
1565                    NumSrcOperands, false>(ib, obj, "atomicnoret");
1566            else
1567                return new AtomicInst<DataType, AddrOperandType,
1568                    NumSrcOperands, true>(ib, obj, "atomic");
1569        }
1570    }
1571
1572    template<typename DataType, int NumSrcOperands>
1573    GPUStaticInst*
1574    decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
1575    {
1576        unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
1577            Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
1578
1579        unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
1580
1581        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1582
1583        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1584            return constructAtomic<DataType, NoRegAddrOperand,
1585                                   NumSrcOperands>(ib, obj);
1586        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1587            // V2/V4 not allowed
1588            switch (tmp.regKind) {
1589              case Brig::BRIG_REGISTER_KIND_SINGLE:
1590                  return constructAtomic<DataType, SRegAddrOperand,
1591                                         NumSrcOperands>(ib, obj);
1592              case Brig::BRIG_REGISTER_KIND_DOUBLE:
1593                return constructAtomic<DataType, DRegAddrOperand,
1594                                       NumSrcOperands>(ib, obj);
1595              default:
1596                fatal("Bad atomic register operand type %d\n", tmp.type);
1597            }
1598        } else {
1599            fatal("Bad atomic register operand kind %d\n", tmp.kind);
1600        }
1601    }
1602
1603
1604    template<typename DataType>
1605    GPUStaticInst*
1606    decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1607    {
1608        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1609
1610        if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1611            return decodeAtomicHelper<DataType, 2>(ib, obj);
1612        } else {
1613            return decodeAtomicHelper<DataType, 1>(ib, obj);
1614        }
1615    }
1616
1617    template<typename DataType>
1618    GPUStaticInst*
1619    decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
1620    {
1621        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1622        if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1623            return decodeAtomicHelper<DataType, 2>(ib, obj);
1624        } else {
1625            return decodeAtomicHelper<DataType, 1>(ib, obj);
1626        }
1627    }
1628} // namespace HsailISA
1629
1630#endif // __ARCH_HSAIL_INSTS_MEM_HH__
1631