1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
37#define __ARCH_HSAIL_INSTS_BRANCH_HH__
38
39#include "arch/hsail/insts/gpu_static_inst.hh"
40#include "arch/hsail/operand.hh"
41#include "gpu-compute/gpu_dyn_inst.hh"
42#include "gpu-compute/wavefront.hh"
43
44namespace HsailISA
45{
46
47    // The main difference between a direct branch and an indirect branch
48    // is whether the target is a register or a label, so we can share a
49    // lot of code if we template the base implementation on that type.
50    template<typename TargetType>
51    class BrnInstBase : public HsailGPUStaticInst
52    {
53    public:
54        void generateDisassembly() override;
55
56        Brig::BrigWidth8_t width;
57        TargetType target;
58
59        BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
60           : HsailGPUStaticInst(obj, "brn")
61        {
62            setFlag(Branch);
63            setFlag(UnconditionalJump);
64            width = ((Brig::BrigInstBr*)ib)->width;
65            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
66            target.init(op_offs, obj);
67        }
68
69        uint32_t getTargetPc()  override { return target.getTarget(0, 0); }
70
71        bool isVectorRegister(int operandIndex) override {
72            assert(operandIndex >= 0 && operandIndex < getNumOperands());
73            return target.isVectorRegister();
74        }
75        bool isCondRegister(int operandIndex) override {
76            assert(operandIndex >= 0 && operandIndex < getNumOperands());
77            return target.isCondRegister();
78        }
79        bool isScalarRegister(int operandIndex) override {
80            assert(operandIndex >= 0 && operandIndex < getNumOperands());
81            return target.isScalarRegister();
82        }
83
84        bool isSrcOperand(int operandIndex) override {
85            assert(operandIndex >= 0 && operandIndex < getNumOperands());
86            return true;
87        }
88
89        bool isDstOperand(int operandIndex) override {
90            return false;
91        }
92
93        int getOperandSize(int operandIndex) override {
94            assert(operandIndex >= 0 && operandIndex < getNumOperands());
95            return target.opSize();
96        }
97
98        int
99        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
100        {
101            assert(operandIndex >= 0 && operandIndex < getNumOperands());
102            return target.regIndex();
103        }
104
105        int getNumOperands() override {
106            return 1;
107        }
108
109        void execute(GPUDynInstPtr gpuDynInst) override;
110    };
111
112    template<typename TargetType>
113    void
114    BrnInstBase<TargetType>::generateDisassembly()
115    {
116        std::string widthClause;
117
118        if (width != 1) {
119            widthClause = csprintf("_width(%d)", width);
120        }
121
122        disassembly = csprintf("%s%s %s", opcode, widthClause,
123                               target.disassemble());
124    }
125
126    template<typename TargetType>
127    void
128    BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
129    {
130        Wavefront *w = gpuDynInst->wavefront();
131
132        if (getTargetPc() == w->rpc()) {
133            w->popFromReconvergenceStack();
134        } else {
135            // Rpc and execution mask remain the same
136            w->pc(getTargetPc());
137        }
138    }
139
140    class BrnDirectInst : public BrnInstBase<LabelOperand>
141    {
142      public:
143        BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
144            : BrnInstBase<LabelOperand>(ib, obj)
145        {
146        }
147        int numSrcRegOperands() { return 0; }
148        int numDstRegOperands() { return 0; }
149    };
150
151    class BrnIndirectInst : public BrnInstBase<SRegOperand>
152    {
153      public:
154        BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
155            : BrnInstBase<SRegOperand>(ib, obj)
156        {
157        }
158        int numSrcRegOperands() { return target.isVectorRegister(); }
159        int numDstRegOperands() { return 0; }
160    };
161
162    GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
163                             const BrigObject *obj);
164
165    template<typename TargetType>
166    class CbrInstBase : public HsailGPUStaticInst
167    {
168      public:
169        void generateDisassembly() override;
170
171        Brig::BrigWidth8_t width;
172        CRegOperand cond;
173        TargetType target;
174
175        CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
176           : HsailGPUStaticInst(obj, "cbr")
177        {
178            setFlag(Branch);
179            width = ((Brig::BrigInstBr *)ib)->width;
180            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
181            cond.init(op_offs, obj);
182            op_offs = obj->getOperandPtr(ib->operands, 1);
183            target.init(op_offs, obj);
184        }
185
186        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
187
188        void execute(GPUDynInstPtr gpuDynInst) override;
189        // Assumption: Target is operand 0, Condition Register is operand 1
190        bool isVectorRegister(int operandIndex) override {
191            assert(operandIndex >= 0 && operandIndex < getNumOperands());
192            if (!operandIndex)
193                return target.isVectorRegister();
194            else
195                return false;
196        }
197        bool isCondRegister(int operandIndex) override {
198            assert(operandIndex >= 0 && operandIndex < getNumOperands());
199            if (!operandIndex)
200                return target.isCondRegister();
201            else
202                return true;
203        }
204        bool isScalarRegister(int operandIndex) override {
205            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
206            if (!operandIndex)
207                return target.isScalarRegister();
208            else
209                return false;
210        }
211        bool isSrcOperand(int operandIndex) override {
212            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
213            if (operandIndex == 0)
214                return true;
215            return false;
216        }
217        // both Condition Register and Target are source operands
218        bool isDstOperand(int operandIndex) override {
219            return false;
220        }
221        int getOperandSize(int operandIndex) override {
222            assert(operandIndex >= 0 && operandIndex < getNumOperands());
223            if (!operandIndex)
224                return target.opSize();
225            else
226                return 1;
227        }
228        int
229        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
230        {
231            assert(operandIndex >= 0 && operandIndex < getNumOperands());
232            if (!operandIndex)
233                return target.regIndex();
234            else
235                return -1;
236         }
237
238        // Operands = Target, Condition Register
239        int getNumOperands() override {
240            return 2;
241        }
242    };
243
244    template<typename TargetType>
245    void
246    CbrInstBase<TargetType>::generateDisassembly()
247    {
248        std::string widthClause;
249
250        if (width != 1) {
251            widthClause = csprintf("_width(%d)", width);
252        }
253
254        disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
255                               cond.disassemble(), target.disassemble());
256    }
257
258    template<typename TargetType>
259    void
260    CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
261    {
262        Wavefront *w = gpuDynInst->wavefront();
263
264        const uint32_t curr_pc M5_VAR_USED = w->pc();
265        const uint32_t curr_rpc = w->rpc();
266        const VectorMask curr_mask = w->execMask();
267
268        /**
269         * TODO: can we move this pop outside the instruction, and
270         * into the wavefront?
271         */
272        w->popFromReconvergenceStack();
273
274        // immediate post-dominator instruction
275        const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
276        if (curr_rpc != rpc) {
277            w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
278        }
279
280        // taken branch
281        const uint32_t true_pc = getTargetPc();
282        VectorMask true_mask;
283        for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
284            true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
285        }
286
287        // not taken branch
288        const uint32_t false_pc = nextInstAddr();
289        assert(true_pc != false_pc);
290        if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
291            VectorMask false_mask = curr_mask & ~true_mask;
292            w->pushToReconvergenceStack(false_pc, rpc, false_mask);
293        }
294
295        if (true_pc != rpc && true_mask.count()) {
296            w->pushToReconvergenceStack(true_pc, rpc, true_mask);
297        }
298        assert(w->pc() != curr_pc);
299    }
300
301
302    class CbrDirectInst : public CbrInstBase<LabelOperand>
303    {
304      public:
305        CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
306            : CbrInstBase<LabelOperand>(ib, obj)
307        {
308        }
309        // the source operand of a conditional branch is a Condition
310        // Register which is not stored in the VRF
311        // so we do not count it as a source-register operand
312        // even though, formally, it is one.
313        int numSrcRegOperands() { return 0; }
314        int numDstRegOperands() { return 0; }
315    };
316
317    class CbrIndirectInst : public CbrInstBase<SRegOperand>
318    {
319      public:
320        CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
321            : CbrInstBase<SRegOperand>(ib, obj)
322        {
323        }
324        // one source operand of the conditional indirect branch is a Condition
325        // register which is not stored in the VRF so we do not count it
326        // as a source-register operand even though, formally, it is one.
327        int numSrcRegOperands() { return target.isVectorRegister(); }
328        int numDstRegOperands() { return 0; }
329    };
330
331    GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
332                             const BrigObject *obj);
333
334    template<typename TargetType>
335    class BrInstBase : public HsailGPUStaticInst
336    {
337      public:
338        void generateDisassembly() override;
339
340        ImmOperand<uint32_t> width;
341        TargetType target;
342
343        BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
344           : HsailGPUStaticInst(obj, "br")
345        {
346            setFlag(Branch);
347            setFlag(UnconditionalJump);
348            width.init(((Brig::BrigInstBr *)ib)->width, obj);
349            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
350            target.init(op_offs, obj);
351        }
352
353        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
354
355        void execute(GPUDynInstPtr gpuDynInst) override;
356        bool isVectorRegister(int operandIndex) override {
357            assert(operandIndex >= 0 && operandIndex < getNumOperands());
358            return target.isVectorRegister();
359        }
360        bool isCondRegister(int operandIndex) override {
361            assert(operandIndex >= 0 && operandIndex < getNumOperands());
362            return target.isCondRegister();
363        }
364        bool isScalarRegister(int operandIndex) override {
365            assert(operandIndex >= 0 && operandIndex < getNumOperands());
366            return target.isScalarRegister();
367        }
368        bool isSrcOperand(int operandIndex) override {
369            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
370            return true;
371        }
372        bool isDstOperand(int operandIndex) override { return false; }
373        int getOperandSize(int operandIndex) override {
374            assert(operandIndex >= 0 && operandIndex < getNumOperands());
375            return target.opSize();
376        }
377        int
378        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
379        {
380            assert(operandIndex >= 0 && operandIndex < getNumOperands());
381            return target.regIndex();
382        }
383        int getNumOperands() override { return 1; }
384    };
385
386    template<typename TargetType>
387    void
388    BrInstBase<TargetType>::generateDisassembly()
389    {
390        std::string widthClause;
391
392        if (width.bits != 1) {
393            widthClause = csprintf("_width(%d)", width.bits);
394        }
395
396        disassembly = csprintf("%s%s %s", opcode, widthClause,
397                               target.disassemble());
398    }
399
400    template<typename TargetType>
401    void
402    BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
403    {
404        Wavefront *w = gpuDynInst->wavefront();
405
406        if (getTargetPc() == w->rpc()) {
407            w->popFromReconvergenceStack();
408        } else {
409            // Rpc and execution mask remain the same
410            w->pc(getTargetPc());
411        }
412    }
413
414    class BrDirectInst : public BrInstBase<LabelOperand>
415    {
416      public:
417        BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
418            : BrInstBase<LabelOperand>(ib, obj)
419        {
420        }
421
422        int numSrcRegOperands() { return 0; }
423        int numDstRegOperands() { return 0; }
424    };
425
426    class BrIndirectInst : public BrInstBase<SRegOperand>
427    {
428      public:
429        BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
430            : BrInstBase<SRegOperand>(ib, obj)
431        {
432        }
433        int numSrcRegOperands() { return target.isVectorRegister(); }
434        int numDstRegOperands() { return 0; }
435    };
436
437    GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
438                            const BrigObject *obj);
439} // namespace HsailISA
440
441#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__
442