mem.hh revision 11325:67cc559d513a
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_MEM_HH__
37#define __ARCH_HSAIL_INSTS_MEM_HH__
38
39#include "arch/hsail/insts/decl.hh"
40#include "arch/hsail/insts/gpu_static_inst.hh"
41#include "arch/hsail/operand.hh"
42
43namespace HsailISA
44{
45    class MemInst
46    {
47      public:
48        MemInst() : size(0), addr_operand(nullptr) { }
49
50        MemInst(Enums::MemType m_type)
51        {
52            if (m_type == Enums::M_U64 ||
53                m_type == Enums::M_S64 ||
54                m_type == Enums::M_F64) {
55                size = 8;
56            } else if (m_type == Enums::M_U32 ||
57                       m_type == Enums::M_S32 ||
58                       m_type == Enums::M_F32) {
59                size = 4;
60            } else if (m_type == Enums::M_U16 ||
61                       m_type == Enums::M_S16 ||
62                       m_type == Enums::M_F16) {
63                size = 2;
64            } else {
65                size = 1;
66            }
67
68            addr_operand = nullptr;
69        }
70
71        void
72        init_addr(AddrOperandBase *_addr_operand)
73        {
74            addr_operand = _addr_operand;
75        }
76
77      private:
78        int size;
79        AddrOperandBase *addr_operand;
80
81      public:
82        int getMemOperandSize() { return size; }
83        AddrOperandBase *getAddressOperand() { return addr_operand; }
84    };
85
86    template<typename DestOperandType, typename AddrOperandType>
87    class LdaInstBase : public HsailGPUStaticInst
88    {
89      public:
90        typename DestOperandType::DestOperand dest;
91        AddrOperandType addr;
92
93        LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
94                    const char *_opcode)
95           : HsailGPUStaticInst(obj, _opcode)
96        {
97            using namespace Brig;
98
99            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
100            dest.init(op_offs, obj);
101            op_offs = obj->getOperandPtr(ib->operands, 1);
102            addr.init(op_offs, obj);
103        }
104
105        int numSrcRegOperands() { return(this->addr.isVectorRegister()); }
106        int numDstRegOperands() { return dest.isVectorRegister(); }
107        bool isVectorRegister(int operandIndex)
108        {
109            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
110            return((operandIndex == 0) ? dest.isVectorRegister() :
111                   this->addr.isVectorRegister());
112        }
113        bool isCondRegister(int operandIndex)
114        {
115            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
116            return((operandIndex == 0) ? dest.isCondRegister() :
117                   this->addr.isCondRegister());
118        }
119        bool isScalarRegister(int operandIndex)
120        {
121            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
122            return((operandIndex == 0) ? dest.isScalarRegister() :
123                   this->addr.isScalarRegister());
124        }
125        bool isSrcOperand(int operandIndex)
126        {
127            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
128            if (operandIndex > 0)
129                return(this->addr.isVectorRegister());
130            return false;
131        }
132        bool isDstOperand(int operandIndex) {
133            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
134            return(operandIndex == 0);
135        }
136        int getOperandSize(int operandIndex)
137        {
138            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
139            return((operandIndex == 0) ? dest.opSize() :
140                   this->addr.opSize());
141        }
142        int getRegisterIndex(int operandIndex)
143        {
144            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
145            return((operandIndex == 0) ? dest.regIndex() :
146                   this->addr.regIndex());
147        }
148        int getNumOperands()
149        {
150            if (this->addr.isVectorRegister())
151                return 2;
152            return 1;
153        }
154    };
155
156    template<typename DestDataType, typename AddrOperandType>
157    class LdaInst :
158        public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
159        public MemInst
160    {
161      public:
162        void generateDisassembly();
163
164        LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
165                        const char *_opcode)
166            : LdaInstBase<typename DestDataType::OperandType,
167                          AddrOperandType>(ib, obj, _opcode)
168        {
169            init_addr(&this->addr);
170        }
171
172        void execute(GPUDynInstPtr gpuDynInst);
173    };
174
175    template<typename DataType>
176    GPUStaticInst*
177    decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
178    {
179        unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
180        BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
181
182        if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
183            return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
184        } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
185            // V2/V4 not allowed
186            switch (regDataType.regKind) {
187              case Brig::BRIG_REGISTER_KIND_SINGLE:
188                return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
189              case Brig::BRIG_REGISTER_KIND_DOUBLE:
190                return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
191              default:
192                fatal("Bad ldas register operand type %d\n", regDataType.type);
193            }
194        } else {
195            fatal("Bad ldas register operand kind %d\n", regDataType.kind);
196        }
197    }
198
199    template<typename MemOperandType, typename DestOperandType,
200             typename AddrOperandType>
201    class LdInstBase : public HsailGPUStaticInst
202    {
203      public:
204        Brig::BrigWidth8_t width;
205        typename DestOperandType::DestOperand dest;
206        AddrOperandType addr;
207
208        Brig::BrigSegment segment;
209        Brig::BrigMemoryOrder memoryOrder;
210        Brig::BrigMemoryScope memoryScope;
211        unsigned int equivClass;
212        bool isArgLoad()
213        {
214            return segment == Brig::BRIG_SEGMENT_KERNARG ||
215                   segment == Brig::BRIG_SEGMENT_ARG;
216        }
217        void
218        initLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
219               const char *_opcode)
220        {
221            using namespace Brig;
222
223            const BrigInstMem *ldst = (const BrigInstMem*)ib;
224
225            segment = (BrigSegment)ldst->segment;
226            memoryOrder = BRIG_MEMORY_ORDER_NONE;
227            memoryScope = BRIG_MEMORY_SCOPE_NONE;
228            equivClass = ldst->equivClass;
229
230            switch (segment) {
231              case BRIG_SEGMENT_GLOBAL:
232                o_type = Enums::OT_GLOBAL_READ;
233                break;
234
235              case BRIG_SEGMENT_GROUP:
236                o_type = Enums::OT_SHARED_READ;
237                break;
238
239              case BRIG_SEGMENT_PRIVATE:
240                o_type = Enums::OT_PRIVATE_READ;
241                break;
242
243              case BRIG_SEGMENT_READONLY:
244                o_type = Enums::OT_READONLY_READ;
245                break;
246
247              case BRIG_SEGMENT_SPILL:
248                o_type = Enums::OT_SPILL_READ;
249                break;
250
251              case BRIG_SEGMENT_FLAT:
252                o_type = Enums::OT_FLAT_READ;
253                break;
254
255              case BRIG_SEGMENT_KERNARG:
256                o_type = Enums::OT_KERN_READ;
257                break;
258
259              case BRIG_SEGMENT_ARG:
260                o_type = Enums::OT_ARG;
261                break;
262
263              default:
264                panic("Ld: segment %d not supported\n", segment);
265            }
266
267            width = ldst->width;
268            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
269            const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
270            if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
271                dest.init(op_offs, obj);
272
273            op_offs = obj->getOperandPtr(ib->operands, 1);
274            addr.init(op_offs, obj);
275        }
276
277        void
278        initAtomicLd(const Brig::BrigInstBase *ib, const BrigObject *obj,
279                     const char *_opcode)
280        {
281            using namespace Brig;
282
283            const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
284
285            segment = (BrigSegment)at->segment;
286            memoryOrder = (BrigMemoryOrder)at->memoryOrder;
287            memoryScope = (BrigMemoryScope)at->memoryScope;
288            equivClass = 0;
289
290            switch (segment) {
291              case BRIG_SEGMENT_GLOBAL:
292                o_type = Enums::OT_GLOBAL_READ;
293                break;
294
295              case BRIG_SEGMENT_GROUP:
296                o_type = Enums::OT_SHARED_READ;
297                break;
298
299              case BRIG_SEGMENT_PRIVATE:
300                o_type = Enums::OT_PRIVATE_READ;
301                break;
302
303              case BRIG_SEGMENT_READONLY:
304                o_type = Enums::OT_READONLY_READ;
305                break;
306
307              case BRIG_SEGMENT_SPILL:
308                o_type = Enums::OT_SPILL_READ;
309                break;
310
311              case BRIG_SEGMENT_FLAT:
312                o_type = Enums::OT_FLAT_READ;
313                break;
314
315              case BRIG_SEGMENT_KERNARG:
316                o_type = Enums::OT_KERN_READ;
317                break;
318
319              case BRIG_SEGMENT_ARG:
320                o_type = Enums::OT_ARG;
321                break;
322
323              default:
324                panic("Ld: segment %d not supported\n", segment);
325            }
326
327            width = BRIG_WIDTH_1;
328            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
329            const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
330
331            if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
332                dest.init(op_offs, obj);
333
334            op_offs = obj->getOperandPtr(ib->operands,1);
335            addr.init(op_offs, obj);
336        }
337
338        LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
339                   const char *_opcode)
340           : HsailGPUStaticInst(obj, _opcode)
341        {
342            using namespace Brig;
343
344            if (ib->opcode == BRIG_OPCODE_LD) {
345                initLd(ib, obj, _opcode);
346            } else {
347                initAtomicLd(ib, obj, _opcode);
348            }
349        }
350
351        int numSrcRegOperands() { return(this->addr.isVectorRegister()); }
352        int numDstRegOperands() { return dest.isVectorRegister(); }
353        int getNumOperands()
354        {
355            if (this->addr.isVectorRegister())
356                return 2;
357            else
358                return 1;
359        }
360        bool isVectorRegister(int operandIndex)
361        {
362            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
363            return((operandIndex == 0) ? dest.isVectorRegister() :
364                   this->addr.isVectorRegister());
365        }
366        bool isCondRegister(int operandIndex)
367        {
368            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
369            return((operandIndex == 0) ? dest.isCondRegister() :
370                   this->addr.isCondRegister());
371        }
372        bool isScalarRegister(int operandIndex)
373        {
374            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
375            return((operandIndex == 0) ? dest.isScalarRegister() :
376                   this->addr.isScalarRegister());
377        }
378        bool isSrcOperand(int operandIndex)
379        {
380            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
381            if (operandIndex > 0)
382                return(this->addr.isVectorRegister());
383            return false;
384        }
385        bool isDstOperand(int operandIndex)
386        {
387            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
388            return(operandIndex == 0);
389        }
390        int getOperandSize(int operandIndex)
391        {
392            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
393            return((operandIndex == 0) ? dest.opSize() :
394                   this->addr.opSize());
395        }
396        int getRegisterIndex(int operandIndex)
397        {
398            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
399            return((operandIndex == 0) ? dest.regIndex() :
400                   this->addr.regIndex());
401        }
402    };
403
404    template<typename MemDataType, typename DestDataType,
405             typename AddrOperandType>
406    class LdInst :
407        public LdInstBase<typename MemDataType::CType,
408                          typename DestDataType::OperandType, AddrOperandType>,
409        public MemInst
410    {
411        typename DestDataType::OperandType::DestOperand dest_vect[4];
412        uint16_t num_dest_operands;
413        void generateDisassembly();
414
415      public:
416        LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
417               const char *_opcode)
418            : LdInstBase<typename MemDataType::CType,
419                         typename DestDataType::OperandType,
420                         AddrOperandType>(ib, obj, _opcode),
421              MemInst(MemDataType::memType)
422        {
423            init_addr(&this->addr);
424
425            unsigned op_offs = obj->getOperandPtr(ib->operands,0);
426            const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
427
428            if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
429                const Brig::BrigOperandOperandList *brigRegVecOp =
430                    (const Brig::BrigOperandOperandList*)brigOp;
431
432                num_dest_operands =
433                    *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
434
435                assert(num_dest_operands <= 4);
436            } else {
437                num_dest_operands = 1;
438            }
439
440            if (num_dest_operands > 1) {
441                assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
442
443                for (int i = 0; i < num_dest_operands; ++i) {
444                    dest_vect[i].init_from_vect(op_offs, obj, i);
445                }
446            }
447        }
448
449        void
450        initiateAcc(GPUDynInstPtr gpuDynInst) override
451        {
452            typedef typename MemDataType::CType c0;
453
454            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
455
456            if (num_dest_operands > 1) {
457                for (int i = 0; i < VSZ; ++i)
458                    if (gpuDynInst->exec_mask[i])
459                        gpuDynInst->statusVector.push_back(num_dest_operands);
460                    else
461                        gpuDynInst->statusVector.push_back(0);
462            }
463
464            for (int k = 0; k < num_dest_operands; ++k) {
465
466                c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
467
468                for (int i = 0; i < VSZ; ++i) {
469                    if (gpuDynInst->exec_mask[i]) {
470                        Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
471
472                        if (isLocalMem()) {
473                            // load from shared memory
474                            *d = gpuDynInst->wavefront()->ldsChunk->
475                                read<c0>(vaddr);
476                        } else {
477                            Request *req = new Request(0, vaddr, sizeof(c0), 0,
478                                          gpuDynInst->computeUnit()->masterId(),
479                                          0, gpuDynInst->wfDynId, i);
480
481                            gpuDynInst->setRequestFlags(req);
482                            PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
483                            pkt->dataStatic(d);
484
485                            if (gpuDynInst->computeUnit()->shader->
486                                separate_acquire_release &&
487                                gpuDynInst->memoryOrder ==
488                                Enums::MEMORY_ORDER_SC_ACQUIRE) {
489                                // if this load has acquire semantics,
490                                // set the response continuation function
491                                // to perform an Acquire request
492                                gpuDynInst->execContinuation =
493                                    &GPUStaticInst::execLdAcq;
494
495                                gpuDynInst->useContinuation = true;
496                            } else {
497                                // the request will be finished when
498                                // the load completes
499                                gpuDynInst->useContinuation = false;
500                            }
501                            // translation is performed in sendRequest()
502                            gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
503                                                                   i, pkt);
504                        }
505                    }
506                    ++d;
507                }
508            }
509
510            gpuDynInst->updateStats();
511        }
512
513      private:
514        void
515        execLdAcq(GPUDynInstPtr gpuDynInst) override
516        {
517            // after the load has complete and if the load has acquire
518            // semantics, issue an acquire request.
519            if (!isLocalMem()) {
520                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
521                    && gpuDynInst->memoryOrder ==
522                    Enums::MEMORY_ORDER_SC_ACQUIRE) {
523                    gpuDynInst->statusBitVector = VectorMask(1);
524                    gpuDynInst->useContinuation = false;
525                    // create request
526                    Request *req = new Request(0, 0, 0, 0,
527                                  gpuDynInst->computeUnit()->masterId(),
528                                  0, gpuDynInst->wfDynId, -1);
529                    req->setFlags(Request::ACQUIRE);
530                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
531                }
532            }
533        }
534
535      public:
536        bool
537        isLocalMem() const override
538        {
539            return this->segment == Brig::BRIG_SEGMENT_GROUP;
540        }
541
542        bool isVectorRegister(int operandIndex)
543        {
544            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
545            if ((num_dest_operands != getNumOperands()) &&
546                (operandIndex == (getNumOperands()-1)))
547                return(this->addr.isVectorRegister());
548            if (num_dest_operands > 1) {
549                return dest_vect[operandIndex].isVectorRegister();
550            }
551            else if (num_dest_operands == 1) {
552                return LdInstBase<typename MemDataType::CType,
553                       typename DestDataType::OperandType,
554                       AddrOperandType>::dest.isVectorRegister();
555            }
556            return false;
557        }
558        bool isCondRegister(int operandIndex)
559        {
560            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
561            if ((num_dest_operands != getNumOperands()) &&
562                (operandIndex == (getNumOperands()-1)))
563                return(this->addr.isCondRegister());
564            if (num_dest_operands > 1)
565                return dest_vect[operandIndex].isCondRegister();
566            else if (num_dest_operands == 1)
567                return LdInstBase<typename MemDataType::CType,
568                       typename DestDataType::OperandType,
569                       AddrOperandType>::dest.isCondRegister();
570            return false;
571        }
572        bool isScalarRegister(int operandIndex)
573        {
574            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
575            if ((num_dest_operands != getNumOperands()) &&
576                (operandIndex == (getNumOperands()-1)))
577                return(this->addr.isScalarRegister());
578            if (num_dest_operands > 1)
579                return dest_vect[operandIndex].isScalarRegister();
580            else if (num_dest_operands == 1)
581                return LdInstBase<typename MemDataType::CType,
582                       typename DestDataType::OperandType,
583                       AddrOperandType>::dest.isScalarRegister();
584            return false;
585        }
586        bool isSrcOperand(int operandIndex)
587        {
588            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
589            if ((num_dest_operands != getNumOperands()) &&
590                (operandIndex == (getNumOperands()-1)))
591                return(this->addr.isVectorRegister());
592            return false;
593        }
594        bool isDstOperand(int operandIndex)
595        {
596            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
597            if ((num_dest_operands != getNumOperands()) &&
598                (operandIndex == (getNumOperands()-1)))
599                return false;
600            return true;
601        }
602        int getOperandSize(int operandIndex)
603        {
604            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
605            if ((num_dest_operands != getNumOperands()) &&
606                (operandIndex == (getNumOperands()-1)))
607                return(this->addr.opSize());
608            if (num_dest_operands > 1)
609                return(dest_vect[operandIndex].opSize());
610            else if (num_dest_operands == 1)
611                return(LdInstBase<typename MemDataType::CType,
612                       typename DestDataType::OperandType,
613                       AddrOperandType>::dest.opSize());
614            return 0;
615        }
616        int getRegisterIndex(int operandIndex)
617        {
618            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
619            if ((num_dest_operands != getNumOperands()) &&
620                (operandIndex == (getNumOperands()-1)))
621                return(this->addr.regIndex());
622            if (num_dest_operands > 1)
623                return(dest_vect[operandIndex].regIndex());
624            else if (num_dest_operands == 1)
625                return(LdInstBase<typename MemDataType::CType,
626                       typename DestDataType::OperandType,
627                       AddrOperandType>::dest.regIndex());
628            return -1;
629        }
630        int getNumOperands()
631        {
632            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
633                return(num_dest_operands+1);
634            else
635                return(num_dest_operands);
636        }
637        void execute(GPUDynInstPtr gpuDynInst);
638    };
639
640    template<typename MemDT, typename DestDT>
641    GPUStaticInst*
642    decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
643    {
644        unsigned op_offs = obj->getOperandPtr(ib->operands,1);
645        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
646
647        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
648            return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
649        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
650                   tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
651            switch (tmp.regKind) {
652              case Brig::BRIG_REGISTER_KIND_SINGLE:
653                return new LdInst<MemDT, DestDT,
654                                  SRegAddrOperand>(ib, obj, "ld");
655              case Brig::BRIG_REGISTER_KIND_DOUBLE:
656                return new LdInst<MemDT, DestDT,
657                                  DRegAddrOperand>(ib, obj, "ld");
658              default:
659                fatal("Bad ld register operand type %d\n", tmp.regKind);
660            }
661        } else {
662            fatal("Bad ld register operand kind %d\n", tmp.kind);
663        }
664    }
665
666    template<typename MemDT>
667    GPUStaticInst*
668    decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
669    {
670        unsigned op_offs = obj->getOperandPtr(ib->operands,0);
671        BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
672
673        assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
674               dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
675        switch(dest.regKind) {
676          case Brig::BRIG_REGISTER_KIND_SINGLE:
677            switch (ib->type) {
678              case Brig::BRIG_TYPE_B8:
679              case Brig::BRIG_TYPE_B16:
680              case Brig::BRIG_TYPE_B32:
681                return decodeLd2<MemDT, B32>(ib, obj);
682              case Brig::BRIG_TYPE_U8:
683              case Brig::BRIG_TYPE_U16:
684              case Brig::BRIG_TYPE_U32:
685                return decodeLd2<MemDT, U32>(ib, obj);
686              case Brig::BRIG_TYPE_S8:
687              case Brig::BRIG_TYPE_S16:
688              case Brig::BRIG_TYPE_S32:
689                return decodeLd2<MemDT, S32>(ib, obj);
690              case Brig::BRIG_TYPE_F16:
691              case Brig::BRIG_TYPE_F32:
692                return decodeLd2<MemDT, U32>(ib, obj);
693              default:
694                fatal("Bad ld register operand type %d, %d\n",
695                      dest.regKind, ib->type);
696            };
697          case Brig::BRIG_REGISTER_KIND_DOUBLE:
698            switch (ib->type) {
699              case Brig::BRIG_TYPE_B64:
700                return decodeLd2<MemDT, B64>(ib, obj);
701              case Brig::BRIG_TYPE_U64:
702                return decodeLd2<MemDT, U64>(ib, obj);
703              case Brig::BRIG_TYPE_S64:
704                return decodeLd2<MemDT, S64>(ib, obj);
705              case Brig::BRIG_TYPE_F64:
706                return decodeLd2<MemDT, U64>(ib, obj);
707              default:
708                fatal("Bad ld register operand type %d, %d\n",
709                      dest.regKind, ib->type);
710            };
711          default:
712            fatal("Bad ld register operand type %d, %d\n", dest.regKind,
713                  ib->type);
714        }
715    }
716
717    template<typename MemDataType, typename SrcOperandType,
718             typename AddrOperandType>
719    class StInstBase : public HsailGPUStaticInst
720    {
721      public:
722        typename SrcOperandType::SrcOperand src;
723        AddrOperandType addr;
724
725        Brig::BrigSegment segment;
726        Brig::BrigMemoryScope memoryScope;
727        Brig::BrigMemoryOrder memoryOrder;
728        unsigned int equivClass;
729
730        void
731        initSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
732               const char *_opcode)
733        {
734            using namespace Brig;
735
736            const BrigInstMem *ldst = (const BrigInstMem*)ib;
737
738            segment = (BrigSegment)ldst->segment;
739            memoryOrder = BRIG_MEMORY_ORDER_NONE;
740            memoryScope = BRIG_MEMORY_SCOPE_NONE;
741            equivClass = ldst->equivClass;
742
743            switch (segment) {
744              case BRIG_SEGMENT_GLOBAL:
745                o_type = Enums::OT_GLOBAL_WRITE;
746                break;
747
748              case BRIG_SEGMENT_GROUP:
749                o_type = Enums::OT_SHARED_WRITE;
750                break;
751
752              case BRIG_SEGMENT_PRIVATE:
753                o_type = Enums::OT_PRIVATE_WRITE;
754                break;
755
756              case BRIG_SEGMENT_READONLY:
757                o_type = Enums::OT_READONLY_WRITE;
758                break;
759
760              case BRIG_SEGMENT_SPILL:
761                o_type = Enums::OT_SPILL_WRITE;
762                break;
763
764              case BRIG_SEGMENT_FLAT:
765                o_type = Enums::OT_FLAT_WRITE;
766                break;
767
768              case BRIG_SEGMENT_ARG:
769                o_type = Enums::OT_ARG;
770                break;
771
772              default:
773                panic("St: segment %d not supported\n", segment);
774            }
775
776            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
777            const BrigOperand *baseOp = obj->getOperand(op_offs);
778
779            if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
780                (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
781                src.init(op_offs, obj);
782            }
783
784            op_offs = obj->getOperandPtr(ib->operands, 1);
785            addr.init(op_offs, obj);
786        }
787
788        void
789        initAtomicSt(const Brig::BrigInstBase *ib, const BrigObject *obj,
790                     const char *_opcode)
791        {
792            using namespace Brig;
793
794            const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
795
796            segment = (BrigSegment)at->segment;
797            memoryScope = (BrigMemoryScope)at->memoryScope;
798            memoryOrder = (BrigMemoryOrder)at->memoryOrder;
799            equivClass = 0;
800
801            switch (segment) {
802              case BRIG_SEGMENT_GLOBAL:
803                o_type = Enums::OT_GLOBAL_WRITE;
804                break;
805
806              case BRIG_SEGMENT_GROUP:
807                o_type = Enums::OT_SHARED_WRITE;
808                break;
809
810              case BRIG_SEGMENT_PRIVATE:
811                o_type = Enums::OT_PRIVATE_WRITE;
812                break;
813
814              case BRIG_SEGMENT_READONLY:
815                o_type = Enums::OT_READONLY_WRITE;
816                break;
817
818              case BRIG_SEGMENT_SPILL:
819                o_type = Enums::OT_SPILL_WRITE;
820                break;
821
822              case BRIG_SEGMENT_FLAT:
823                o_type = Enums::OT_FLAT_WRITE;
824                break;
825
826              case BRIG_SEGMENT_ARG:
827                o_type = Enums::OT_ARG;
828                break;
829
830              default:
831                panic("St: segment %d not supported\n", segment);
832            }
833
834            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
835            addr.init(op_offs, obj);
836
837            op_offs = obj->getOperandPtr(ib->operands, 1);
838            src.init(op_offs, obj);
839        }
840
841        StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
842                   const char *_opcode)
843           : HsailGPUStaticInst(obj, _opcode)
844        {
845            using namespace Brig;
846
847            if (ib->opcode == BRIG_OPCODE_ST) {
848                initSt(ib, obj, _opcode);
849            } else {
850                initAtomicSt(ib, obj, _opcode);
851            }
852        }
853
854        int numDstRegOperands() { return 0; }
855        int numSrcRegOperands()
856        {
857            return src.isVectorRegister() + this->addr.isVectorRegister();
858        }
859        int getNumOperands()
860        {
861            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
862                return 2;
863            else
864                return 1;
865        }
866        bool isVectorRegister(int operandIndex)
867        {
868            assert(operandIndex >= 0 && operandIndex < getNumOperands());
869            return !operandIndex ? src.isVectorRegister() :
870                   this->addr.isVectorRegister();
871        }
872        bool isCondRegister(int operandIndex)
873        {
874            assert(operandIndex >= 0 && operandIndex < getNumOperands());
875            return !operandIndex ? src.isCondRegister() :
876                   this->addr.isCondRegister();
877        }
878        bool isScalarRegister(int operandIndex)
879        {
880            assert(operandIndex >= 0 && operandIndex < getNumOperands());
881            return !operandIndex ? src.isScalarRegister() :
882                   this->addr.isScalarRegister();
883        }
884        bool isSrcOperand(int operandIndex)
885        {
886            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
887            return true;
888        }
889        bool isDstOperand(int operandIndex) { return false; }
890        int getOperandSize(int operandIndex)
891        {
892            assert(operandIndex >= 0 && operandIndex < getNumOperands());
893            return !operandIndex ? src.opSize() : this->addr.opSize();
894        }
895        int getRegisterIndex(int operandIndex)
896        {
897            assert(operandIndex >= 0 && operandIndex < getNumOperands());
898            return !operandIndex ? src.regIndex() : this->addr.regIndex();
899        }
900    };
901
902
903    template<typename MemDataType, typename SrcDataType,
904             typename AddrOperandType>
905    class StInst :
906        public StInstBase<MemDataType, typename SrcDataType::OperandType,
907                          AddrOperandType>,
908        public MemInst
909    {
910      public:
911        typename SrcDataType::OperandType::SrcOperand src_vect[4];
912        uint16_t num_src_operands;
913        void generateDisassembly();
914
915        StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
916                        const char *_opcode, int srcIdx)
917            : StInstBase<MemDataType, typename SrcDataType::OperandType,
918                         AddrOperandType>(ib, obj, _opcode),
919              MemInst(SrcDataType::memType)
920        {
921            init_addr(&this->addr);
922
923            BrigRegOperandInfo rinfo;
924            unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
925            const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
926
927            if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
928                const Brig::BrigOperandConstantBytes *op =
929                    (Brig::BrigOperandConstantBytes*)baseOp;
930
931                rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
932                                           Brig::BRIG_TYPE_NONE);
933            } else {
934                rinfo = findRegDataType(op_offs, obj);
935            }
936
937            if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
938                const Brig::BrigOperandOperandList *brigRegVecOp =
939                    (const Brig::BrigOperandOperandList*)baseOp;
940
941                num_src_operands =
942                    *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
943
944                assert(num_src_operands <= 4);
945            } else {
946                num_src_operands = 1;
947            }
948
949            if (num_src_operands > 1) {
950                assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
951
952                for (int i = 0; i < num_src_operands; ++i) {
953                    src_vect[i].init_from_vect(op_offs, obj, i);
954                }
955            }
956        }
957
958        void
959        initiateAcc(GPUDynInstPtr gpuDynInst) override
960        {
961            // before performing a store, check if this store has
962            // release semantics, and if so issue a release first
963            if (!isLocalMem()) {
964                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
965                    && gpuDynInst->memoryOrder ==
966                    Enums::MEMORY_ORDER_SC_RELEASE) {
967
968                    gpuDynInst->statusBitVector = VectorMask(1);
969                    gpuDynInst->execContinuation = &GPUStaticInst::execSt;
970                    gpuDynInst->useContinuation = true;
971                    // create request
972                    Request *req = new Request(0, 0, 0, 0,
973                                  gpuDynInst->computeUnit()->masterId(),
974                                  0, gpuDynInst->wfDynId, -1);
975                    req->setFlags(Request::RELEASE);
976                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
977
978                    return;
979                }
980            }
981
982            // if there is no release semantic, perform stores immediately
983            execSt(gpuDynInst);
984        }
985
986        bool
987        isLocalMem() const override
988        {
989            return this->segment == Brig::BRIG_SEGMENT_GROUP;
990        }
991
992      private:
993        // execSt may be called through a continuation
994        // if the store had release semantics. see comment for
995        // execSt in gpu_static_inst.hh
996        void
997        execSt(GPUDynInstPtr gpuDynInst) override
998        {
999            typedef typename MemDataType::CType c0;
1000
1001            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1002
1003            if (num_src_operands > 1) {
1004                for (int i = 0; i < VSZ; ++i)
1005                    if (gpuDynInst->exec_mask[i])
1006                        gpuDynInst->statusVector.push_back(num_src_operands);
1007                    else
1008                        gpuDynInst->statusVector.push_back(0);
1009            }
1010
1011            for (int k = 0; k < num_src_operands; ++k) {
1012                c0 *d = &((c0*)gpuDynInst->d_data)[k * VSZ];
1013
1014                for (int i = 0; i < VSZ; ++i) {
1015                    if (gpuDynInst->exec_mask[i]) {
1016                        Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
1017
1018                        if (isLocalMem()) {
1019                            //store to shared memory
1020                            gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
1021                                                                         *d);
1022                        } else {
1023                            Request *req =
1024                              new Request(0, vaddr, sizeof(c0), 0,
1025                                          gpuDynInst->computeUnit()->masterId(),
1026                                          0, gpuDynInst->wfDynId, i);
1027
1028                            gpuDynInst->setRequestFlags(req);
1029                            PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
1030                            pkt->dataStatic<c0>(d);
1031
1032                            // translation is performed in sendRequest()
1033                            // the request will be finished when the store completes
1034                            gpuDynInst->useContinuation = false;
1035                            gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
1036                                                                   i, pkt);
1037
1038                        }
1039                    }
1040                    ++d;
1041                }
1042            }
1043
1044            gpuDynInst->updateStats();
1045        }
1046
1047      public:
1048        bool isVectorRegister(int operandIndex)
1049        {
1050            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1051            if (operandIndex == num_src_operands)
1052                return this->addr.isVectorRegister();
1053            if (num_src_operands > 1)
1054                return src_vect[operandIndex].isVectorRegister();
1055            else if (num_src_operands == 1)
1056                return StInstBase<MemDataType,
1057                       typename SrcDataType::OperandType,
1058                       AddrOperandType>::src.isVectorRegister();
1059            return false;
1060        }
1061        bool isCondRegister(int operandIndex)
1062        {
1063            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1064            if (operandIndex == num_src_operands)
1065                return this->addr.isCondRegister();
1066            if (num_src_operands > 1)
1067                return src_vect[operandIndex].isCondRegister();
1068            else if (num_src_operands == 1)
1069                return StInstBase<MemDataType,
1070                       typename SrcDataType::OperandType,
1071                       AddrOperandType>::src.isCondRegister();
1072            return false;
1073        }
1074        bool isScalarRegister(int operandIndex)
1075        {
1076            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1077            if (operandIndex == num_src_operands)
1078                return this->addr.isScalarRegister();
1079            if (num_src_operands > 1)
1080                return src_vect[operandIndex].isScalarRegister();
1081            else if (num_src_operands == 1)
1082                return StInstBase<MemDataType,
1083                       typename SrcDataType::OperandType,
1084                       AddrOperandType>::src.isScalarRegister();
1085            return false;
1086        }
1087        bool isSrcOperand(int operandIndex)
1088        {
1089            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1090            return true;
1091        }
1092        bool isDstOperand(int operandIndex) { return false; }
1093        int getOperandSize(int operandIndex)
1094        {
1095            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1096            if (operandIndex == num_src_operands)
1097                return this->addr.opSize();
1098            if (num_src_operands > 1)
1099                return src_vect[operandIndex].opSize();
1100            else if (num_src_operands == 1)
1101                return StInstBase<MemDataType,
1102                       typename SrcDataType::OperandType,
1103                       AddrOperandType>::src.opSize();
1104            return 0;
1105        }
1106        int getRegisterIndex(int operandIndex)
1107        {
1108            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1109            if (operandIndex == num_src_operands)
1110                return this->addr.regIndex();
1111            if (num_src_operands > 1)
1112                return src_vect[operandIndex].regIndex();
1113            else if (num_src_operands == 1)
1114                return StInstBase<MemDataType,
1115                       typename SrcDataType::OperandType,
1116                       AddrOperandType>::src.regIndex();
1117            return -1;
1118        }
1119        int getNumOperands()
1120        {
1121            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
1122                return num_src_operands + 1;
1123            else
1124                return num_src_operands;
1125        }
1126        void execute(GPUDynInstPtr gpuDynInst);
1127    };
1128
1129    template<typename DataType, typename SrcDataType>
1130    GPUStaticInst*
1131    decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
1132    {
1133        int srcIdx = 0;
1134        int destIdx = 1;
1135        if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
1136            ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
1137            srcIdx = 1;
1138            destIdx = 0;
1139        }
1140        unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
1141
1142        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1143
1144        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1145            return new StInst<DataType, SrcDataType,
1146                              NoRegAddrOperand>(ib, obj, "st", srcIdx);
1147        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1148            // V2/V4 not allowed
1149            switch (tmp.regKind) {
1150              case Brig::BRIG_REGISTER_KIND_SINGLE:
1151                return new StInst<DataType, SrcDataType,
1152                                  SRegAddrOperand>(ib, obj, "st", srcIdx);
1153              case Brig::BRIG_REGISTER_KIND_DOUBLE:
1154                return new StInst<DataType, SrcDataType,
1155                                  DRegAddrOperand>(ib, obj, "st", srcIdx);
1156              default:
1157                fatal("Bad st register operand type %d\n", tmp.type);
1158            }
1159        } else {
1160            fatal("Bad st register operand kind %d\n", tmp.kind);
1161        }
1162    }
1163
1164    Enums::MemOpType brigAtomicToMemOpType(Brig::BrigOpcode brigOpCode,
1165                                           Brig::BrigAtomicOperation brigOp);
1166
1167    template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1168             bool HasDst>
1169    class AtomicInstBase : public HsailGPUStaticInst
1170    {
1171      public:
1172        typename OperandType::DestOperand dest;
1173        typename OperandType::SrcOperand src[NumSrcOperands];
1174        AddrOperandType addr;
1175
1176        Brig::BrigSegment segment;
1177        Brig::BrigMemoryOrder memoryOrder;
1178        Brig::BrigAtomicOperation atomicOperation;
1179        Brig::BrigMemoryScope memoryScope;
1180        Brig::BrigOpcode opcode;
1181        Enums::MemOpType opType;
1182
1183        AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
1184                       const char *_opcode)
1185           : HsailGPUStaticInst(obj, _opcode)
1186        {
1187            using namespace Brig;
1188
1189            const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1190
1191            segment = (BrigSegment)at->segment;
1192            memoryScope = (BrigMemoryScope)at->memoryScope;
1193            memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1194            atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1195            opcode = (BrigOpcode)ib->opcode;
1196            opType = brigAtomicToMemOpType(opcode, atomicOperation);
1197
1198            switch (segment) {
1199              case BRIG_SEGMENT_GLOBAL:
1200                o_type = Enums::OT_GLOBAL_ATOMIC;
1201                break;
1202
1203              case BRIG_SEGMENT_GROUP:
1204                o_type = Enums::OT_SHARED_ATOMIC;
1205                break;
1206
1207              case BRIG_SEGMENT_FLAT:
1208                o_type = Enums::OT_FLAT_ATOMIC;
1209                break;
1210
1211              default:
1212                panic("Atomic: segment %d not supported\n", segment);
1213            }
1214
1215            if (HasDst) {
1216                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1217                dest.init(op_offs, obj);
1218
1219                op_offs = obj->getOperandPtr(ib->operands, 1);
1220                addr.init(op_offs, obj);
1221
1222                for (int i = 0; i < NumSrcOperands; ++i) {
1223                    op_offs = obj->getOperandPtr(ib->operands, i + 2);
1224                    src[i].init(op_offs, obj);
1225                }
1226            } else {
1227
1228                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1229                addr.init(op_offs, obj);
1230
1231                for (int i = 0; i < NumSrcOperands; ++i) {
1232                    op_offs = obj->getOperandPtr(ib->operands, i + 1);
1233                    src[i].init(op_offs, obj);
1234                }
1235            }
1236        }
1237
1238        int numSrcRegOperands()
1239        {
1240            int operands = 0;
1241            for (int i = 0; i < NumSrcOperands; i++) {
1242                if (src[i].isVectorRegister()) {
1243                    operands++;
1244                }
1245            }
1246            if (addr.isVectorRegister())
1247                operands++;
1248            return operands;
1249        }
1250        int numDstRegOperands() { return dest.isVectorRegister(); }
1251        int getNumOperands()
1252        {
1253            if (addr.isVectorRegister())
1254                return(NumSrcOperands + 2);
1255            return(NumSrcOperands + 1);
1256        }
1257        bool isVectorRegister(int operandIndex)
1258        {
1259            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1260            if (operandIndex < NumSrcOperands)
1261                return src[operandIndex].isVectorRegister();
1262            else if (operandIndex == NumSrcOperands)
1263                return(addr.isVectorRegister());
1264            else
1265                return dest.isVectorRegister();
1266        }
1267        bool isCondRegister(int operandIndex)
1268        {
1269            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1270            if (operandIndex < NumSrcOperands)
1271                return src[operandIndex].isCondRegister();
1272            else if (operandIndex == NumSrcOperands)
1273                return(addr.isCondRegister());
1274            else
1275                return dest.isCondRegister();
1276        }
1277        bool isScalarRegister(int operandIndex)
1278        {
1279            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1280            if (operandIndex < NumSrcOperands)
1281                return src[operandIndex].isScalarRegister();
1282            else if (operandIndex == NumSrcOperands)
1283                return(addr.isScalarRegister());
1284            else
1285                return dest.isScalarRegister();
1286        }
1287        bool isSrcOperand(int operandIndex)
1288        {
1289            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1290            if (operandIndex < NumSrcOperands)
1291                return true;
1292            else if (operandIndex == NumSrcOperands)
1293                return(addr.isVectorRegister());
1294            else
1295                return false;
1296        }
1297        bool isDstOperand(int operandIndex)
1298        {
1299            if (operandIndex <= NumSrcOperands)
1300                return false;
1301            else
1302                return true;
1303        }
1304        int getOperandSize(int operandIndex)
1305        {
1306            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1307            if (operandIndex < NumSrcOperands)
1308                return(src[operandIndex].opSize());
1309            else if (operandIndex == NumSrcOperands)
1310                return(addr.opSize());
1311            else
1312                return(dest.opSize());
1313        }
1314        int getRegisterIndex(int operandIndex)
1315        {
1316            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1317            if (operandIndex < NumSrcOperands)
1318                return(src[operandIndex].regIndex());
1319            else if (operandIndex == NumSrcOperands)
1320                return(addr.regIndex());
1321            else
1322                return(dest.regIndex());
1323            return -1;
1324        }
1325    };
1326
1327    template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
1328             bool HasDst>
1329    class AtomicInst :
1330        public AtomicInstBase<typename MemDataType::OperandType,
1331                              AddrOperandType, NumSrcOperands, HasDst>,
1332        public MemInst
1333    {
1334      public:
1335        void generateDisassembly();
1336
1337        AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
1338                   const char *_opcode)
1339            : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
1340                             NumSrcOperands, HasDst>
1341                (ib, obj, _opcode),
1342              MemInst(MemDataType::memType)
1343        {
1344            init_addr(&this->addr);
1345        }
1346
1347        void
1348        initiateAcc(GPUDynInstPtr gpuDynInst) override
1349        {
1350            // before doing the RMW, check if this atomic has
1351            // release semantics, and if so issue a release first
1352            if (!isLocalMem()) {
1353                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1354                    && (gpuDynInst->memoryOrder ==
1355                    Enums::MEMORY_ORDER_SC_RELEASE || gpuDynInst->memoryOrder ==
1356                    Enums::MEMORY_ORDER_SC_ACQUIRE_RELEASE)) {
1357
1358                    gpuDynInst->statusBitVector = VectorMask(1);
1359
1360                    gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1361                    gpuDynInst->useContinuation = true;
1362
1363                    // create request
1364                    Request *req = new Request(0, 0, 0, 0,
1365                                  gpuDynInst->computeUnit()->masterId(),
1366                                  0, gpuDynInst->wfDynId, -1);
1367                    req->setFlags(Request::RELEASE);
1368                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1369
1370                    return;
1371                }
1372            }
1373
1374            // if there is no release semantic, execute the RMW immediately
1375            execAtomic(gpuDynInst);
1376
1377        }
1378
1379        void execute(GPUDynInstPtr gpuDynInst);
1380
1381        bool
1382        isLocalMem() const override
1383        {
1384            return this->segment == Brig::BRIG_SEGMENT_GROUP;
1385        }
1386
1387      private:
1388        // execAtomic may be called through a continuation
1389        // if the RMW had release semantics. see comment for
1390        // execContinuation in gpu_dyn_inst.hh
1391        void
1392        execAtomic(GPUDynInstPtr gpuDynInst) override
1393        {
1394            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1395
1396            typedef typename MemDataType::CType c0;
1397
1398            c0 *d = &((c0*) gpuDynInst->d_data)[0];
1399            c0 *e = &((c0*) gpuDynInst->a_data)[0];
1400            c0 *f = &((c0*) gpuDynInst->x_data)[0];
1401
1402            for (int i = 0; i < VSZ; ++i) {
1403                if (gpuDynInst->exec_mask[i]) {
1404                    Addr vaddr = gpuDynInst->addr[i];
1405
1406                    if (isLocalMem()) {
1407                        Wavefront *wavefront = gpuDynInst->wavefront();
1408                        *d = wavefront->ldsChunk->read<c0>(vaddr);
1409
1410                        switch (this->opType) {
1411                          case Enums::MO_AADD:
1412                          case Enums::MO_ANRADD:
1413                            wavefront->ldsChunk->write<c0>(vaddr,
1414                            wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1415                            break;
1416                          case Enums::MO_ASUB:
1417                          case Enums::MO_ANRSUB:
1418                            wavefront->ldsChunk->write<c0>(vaddr,
1419                            wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1420                            break;
1421                          case Enums::MO_AMAX:
1422                          case Enums::MO_ANRMAX:
1423                            wavefront->ldsChunk->write<c0>(vaddr,
1424                            std::max(wavefront->ldsChunk->read<c0>(vaddr),
1425                            (*e)));
1426                            break;
1427                          case Enums::MO_AMIN:
1428                          case Enums::MO_ANRMIN:
1429                            wavefront->ldsChunk->write<c0>(vaddr,
1430                            std::min(wavefront->ldsChunk->read<c0>(vaddr),
1431                            (*e)));
1432                            break;
1433                          case Enums::MO_AAND:
1434                          case Enums::MO_ANRAND:
1435                            wavefront->ldsChunk->write<c0>(vaddr,
1436                            wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1437                            break;
1438                          case Enums::MO_AOR:
1439                          case Enums::MO_ANROR:
1440                            wavefront->ldsChunk->write<c0>(vaddr,
1441                            wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1442                            break;
1443                          case Enums::MO_AXOR:
1444                          case Enums::MO_ANRXOR:
1445                            wavefront->ldsChunk->write<c0>(vaddr,
1446                            wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1447                            break;
1448                          case Enums::MO_AINC:
1449                          case Enums::MO_ANRINC:
1450                            wavefront->ldsChunk->write<c0>(vaddr,
1451                            wavefront->ldsChunk->read<c0>(vaddr) + 1);
1452                            break;
1453                          case Enums::MO_ADEC:
1454                          case Enums::MO_ANRDEC:
1455                            wavefront->ldsChunk->write<c0>(vaddr,
1456                            wavefront->ldsChunk->read<c0>(vaddr) - 1);
1457                            break;
1458                          case Enums::MO_AEXCH:
1459                          case Enums::MO_ANREXCH:
1460                            wavefront->ldsChunk->write<c0>(vaddr, (*e));
1461                            break;
1462                          case Enums::MO_ACAS:
1463                          case Enums::MO_ANRCAS:
1464                            wavefront->ldsChunk->write<c0>(vaddr,
1465                            (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1466                            (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1467                            break;
1468                          default:
1469                            fatal("Unrecognized or invalid HSAIL atomic op "
1470                                  "type.\n");
1471                            break;
1472                        }
1473                    } else {
1474                        Request *req =
1475                            new Request(0, vaddr, sizeof(c0), 0,
1476                                        gpuDynInst->computeUnit()->masterId(),
1477                                        0, gpuDynInst->wfDynId, i,
1478                                        gpuDynInst->makeAtomicOpFunctor<c0>(e,
1479                                        f, this->opType));
1480
1481                        gpuDynInst->setRequestFlags(req);
1482                        PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1483                        pkt->dataStatic(d);
1484
1485                        if (gpuDynInst->computeUnit()->shader->
1486                            separate_acquire_release &&
1487                            (gpuDynInst->memoryOrder ==
1488                             Enums::MEMORY_ORDER_SC_ACQUIRE)) {
1489                            // if this atomic has acquire semantics,
1490                            // schedule the continuation to perform an
1491                            // acquire after the RMW completes
1492                            gpuDynInst->execContinuation =
1493                                &GPUStaticInst::execAtomicAcq;
1494
1495                            gpuDynInst->useContinuation = true;
1496                        } else {
1497                            // the request will be finished when the RMW completes
1498                            gpuDynInst->useContinuation = false;
1499                        }
1500                        // translation is performed in sendRequest()
1501                        gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
1502                                                               pkt);
1503                    }
1504                }
1505
1506                ++d;
1507                ++e;
1508                ++f;
1509            }
1510
1511            gpuDynInst->updateStats();
1512        }
1513
1514        // execAtomicACq will always be called through a continuation.
1515        // see comment for execContinuation in gpu_dyn_inst.hh
1516        void
1517        execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1518        {
1519            // after performing the RMW, check to see if this instruction
1520            // has acquire semantics, and if so, issue an acquire
1521            if (!isLocalMem()) {
1522                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1523                     && gpuDynInst->memoryOrder ==
1524                     Enums::MEMORY_ORDER_SC_ACQUIRE) {
1525                    gpuDynInst->statusBitVector = VectorMask(1);
1526
1527                    // the request will be finished when
1528                    // the acquire completes
1529                    gpuDynInst->useContinuation = false;
1530                    // create request
1531                    Request *req = new Request(0, 0, 0, 0,
1532                                  gpuDynInst->computeUnit()->masterId(),
1533                                  0, gpuDynInst->wfDynId, -1);
1534                    req->setFlags(Request::ACQUIRE);
1535                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1536                }
1537            }
1538        }
1539    };
1540
1541    template<typename DataType, typename AddrOperandType, int NumSrcOperands>
1542    GPUStaticInst*
1543    constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1544    {
1545        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1546
1547        if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
1548            return decodeLd<DataType>(ib, obj);
1549        } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
1550            switch (ib->type) {
1551              case Brig::BRIG_TYPE_B8:
1552                return decodeSt<S8,S8>(ib, obj);
1553              case Brig::BRIG_TYPE_B16:
1554                return decodeSt<S8,S16>(ib, obj);
1555              case Brig::BRIG_TYPE_B32:
1556                return decodeSt<S8,S32>(ib, obj);
1557              case Brig::BRIG_TYPE_B64:
1558                return decodeSt<S8,S64>(ib, obj);
1559              default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
1560            }
1561        } else {
1562            if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
1563                return new AtomicInst<DataType, AddrOperandType,
1564                    NumSrcOperands, false>(ib, obj, "atomicnoret");
1565            else
1566                return new AtomicInst<DataType, AddrOperandType,
1567                    NumSrcOperands, true>(ib, obj, "atomic");
1568        }
1569    }
1570
1571    template<typename DataType, int NumSrcOperands>
1572    GPUStaticInst*
1573    decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
1574    {
1575        unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
1576            Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
1577
1578        unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
1579
1580        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1581
1582        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
1583            return constructAtomic<DataType, NoRegAddrOperand,
1584                                   NumSrcOperands>(ib, obj);
1585        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1586            // V2/V4 not allowed
1587            switch (tmp.regKind) {
1588              case Brig::BRIG_REGISTER_KIND_SINGLE:
1589                  return constructAtomic<DataType, SRegAddrOperand,
1590                                         NumSrcOperands>(ib, obj);
1591              case Brig::BRIG_REGISTER_KIND_DOUBLE:
1592                return constructAtomic<DataType, DRegAddrOperand,
1593                                       NumSrcOperands>(ib, obj);
1594              default:
1595                fatal("Bad atomic register operand type %d\n", tmp.type);
1596            }
1597        } else {
1598            fatal("Bad atomic register operand kind %d\n", tmp.kind);
1599        }
1600    }
1601
1602
1603    template<typename DataType>
1604    GPUStaticInst*
1605    decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
1606    {
1607        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1608
1609        if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1610            return decodeAtomicHelper<DataType, 2>(ib, obj);
1611        } else {
1612            return decodeAtomicHelper<DataType, 1>(ib, obj);
1613        }
1614    }
1615
1616    template<typename DataType>
1617    GPUStaticInst*
1618    decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
1619    {
1620        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1621        if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
1622            return decodeAtomicHelper<DataType, 2>(ib, obj);
1623        } else {
1624            return decodeAtomicHelper<DataType, 1>(ib, obj);
1625        }
1626    }
1627} // namespace HsailISA
1628
1629#endif // __ARCH_HSAIL_INSTS_MEM_HH__
1630