branch.hh revision 11534:7106f550afad
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
37#define __ARCH_HSAIL_INSTS_BRANCH_HH__
38
39#include "arch/hsail/insts/gpu_static_inst.hh"
40#include "arch/hsail/operand.hh"
41#include "gpu-compute/gpu_dyn_inst.hh"
42#include "gpu-compute/wavefront.hh"
43
44namespace HsailISA
45{
46
47    // The main difference between a direct branch and an indirect branch
48    // is whether the target is a register or a label, so we can share a
49    // lot of code if we template the base implementation on that type.
50    template<typename TargetType>
51    class BrnInstBase : public HsailGPUStaticInst
52    {
53    public:
54        void generateDisassembly() override;
55
56        Brig::BrigWidth8_t width;
57        TargetType target;
58
59        BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
60           : HsailGPUStaticInst(obj, "brn")
61        {
62            o_type = Enums::OT_BRANCH;
63            width = ((Brig::BrigInstBr*)ib)->width;
64            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
65            target.init(op_offs, obj);
66            o_type = Enums::OT_BRANCH;
67        }
68
69        uint32_t getTargetPc()  override { return target.getTarget(0, 0); }
70
71        bool unconditionalJumpInstruction() override { return true; }
72        bool isVectorRegister(int operandIndex) override {
73            assert(operandIndex >= 0 && operandIndex < getNumOperands());
74            return target.isVectorRegister();
75        }
76        bool isCondRegister(int operandIndex) override {
77            assert(operandIndex >= 0 && operandIndex < getNumOperands());
78            return target.isCondRegister();
79        }
80        bool isScalarRegister(int operandIndex) override {
81            assert(operandIndex >= 0 && operandIndex < getNumOperands());
82            return target.isScalarRegister();
83        }
84
85        bool isSrcOperand(int operandIndex) override {
86            assert(operandIndex >= 0 && operandIndex < getNumOperands());
87            return true;
88        }
89
90        bool isDstOperand(int operandIndex) override {
91            return false;
92        }
93
94        int getOperandSize(int operandIndex) override {
95            assert(operandIndex >= 0 && operandIndex < getNumOperands());
96            return target.opSize();
97        }
98
99        int getRegisterIndex(int operandIndex) override {
100            assert(operandIndex >= 0 && operandIndex < getNumOperands());
101            return target.regIndex();
102        }
103
104        int getNumOperands() override {
105            return 1;
106        }
107
108        void execute(GPUDynInstPtr gpuDynInst) override;
109    };
110
111    template<typename TargetType>
112    void
113    BrnInstBase<TargetType>::generateDisassembly()
114    {
115        std::string widthClause;
116
117        if (width != 1) {
118            widthClause = csprintf("_width(%d)", width);
119        }
120
121        disassembly = csprintf("%s%s %s", opcode, widthClause,
122                               target.disassemble());
123    }
124
125    template<typename TargetType>
126    void
127    BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
128    {
129        Wavefront *w = gpuDynInst->wavefront();
130
131        if (getTargetPc() == w->rpc()) {
132            w->popFromReconvergenceStack();
133        } else {
134            // Rpc and execution mask remain the same
135            w->pc(getTargetPc());
136        }
137        w->discardFetch();
138    }
139
140    class BrnDirectInst : public BrnInstBase<LabelOperand>
141    {
142      public:
143        BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
144            : BrnInstBase<LabelOperand>(ib, obj)
145        {
146        }
147        int numSrcRegOperands() { return 0; }
148        int numDstRegOperands() { return 0; }
149    };
150
151    class BrnIndirectInst : public BrnInstBase<SRegOperand>
152    {
153      public:
154        BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
155            : BrnInstBase<SRegOperand>(ib, obj)
156        {
157        }
158        int numSrcRegOperands() { return target.isVectorRegister(); }
159        int numDstRegOperands() { return 0; }
160    };
161
162    GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
163                             const BrigObject *obj);
164
165    template<typename TargetType>
166    class CbrInstBase : public HsailGPUStaticInst
167    {
168      public:
169        void generateDisassembly() override;
170
171        Brig::BrigWidth8_t width;
172        CRegOperand cond;
173        TargetType target;
174
175        CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
176           : HsailGPUStaticInst(obj, "cbr")
177        {
178            o_type = Enums::OT_BRANCH;
179            width = ((Brig::BrigInstBr *)ib)->width;
180            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
181            cond.init(op_offs, obj);
182            op_offs = obj->getOperandPtr(ib->operands, 1);
183            target.init(op_offs, obj);
184            o_type = Enums::OT_BRANCH;
185        }
186
187        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
188
189        void execute(GPUDynInstPtr gpuDynInst) override;
190        // Assumption: Target is operand 0, Condition Register is operand 1
191        bool isVectorRegister(int operandIndex) override {
192            assert(operandIndex >= 0 && operandIndex < getNumOperands());
193            if (!operandIndex)
194                return target.isVectorRegister();
195            else
196                return false;
197        }
198        bool isCondRegister(int operandIndex) override {
199            assert(operandIndex >= 0 && operandIndex < getNumOperands());
200            if (!operandIndex)
201                return target.isCondRegister();
202            else
203                return true;
204        }
205        bool isScalarRegister(int operandIndex) override {
206            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
207            if (!operandIndex)
208                return target.isScalarRegister();
209            else
210                return false;
211        }
212        bool isSrcOperand(int operandIndex) override {
213            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
214            if (operandIndex == 0)
215                return true;
216            return false;
217        }
218        // both Condition Register and Target are source operands
219        bool isDstOperand(int operandIndex) override {
220            return false;
221        }
222        int getOperandSize(int operandIndex) override {
223            assert(operandIndex >= 0 && operandIndex < getNumOperands());
224            if (!operandIndex)
225                return target.opSize();
226            else
227                return 1;
228        }
229        int getRegisterIndex(int operandIndex) override {
230            assert(operandIndex >= 0 && operandIndex < getNumOperands());
231            if (!operandIndex)
232                return target.regIndex();
233            else
234                return -1;
235         }
236
237        // Operands = Target, Condition Register
238        int getNumOperands() override {
239            return 2;
240        }
241    };
242
243    template<typename TargetType>
244    void
245    CbrInstBase<TargetType>::generateDisassembly()
246    {
247        std::string widthClause;
248
249        if (width != 1) {
250            widthClause = csprintf("_width(%d)", width);
251        }
252
253        disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
254                               cond.disassemble(), target.disassemble());
255    }
256
257    template<typename TargetType>
258    void
259    CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
260    {
261        Wavefront *w = gpuDynInst->wavefront();
262
263        const uint32_t curr_pc = w->pc();
264        const uint32_t curr_rpc = w->rpc();
265        const VectorMask curr_mask = w->execMask();
266
267        /**
268         * TODO: can we move this pop outside the instruction, and
269         * into the wavefront?
270         */
271        w->popFromReconvergenceStack();
272
273        // immediate post-dominator instruction
274        const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
275        if (curr_rpc != rpc) {
276            w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
277        }
278
279        // taken branch
280        const uint32_t true_pc = getTargetPc();
281        VectorMask true_mask;
282        for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
283            true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
284        }
285
286        // not taken branch
287        const uint32_t false_pc = curr_pc + 1;
288        assert(true_pc != false_pc);
289        if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
290            VectorMask false_mask = curr_mask & ~true_mask;
291            w->pushToReconvergenceStack(false_pc, rpc, false_mask);
292        }
293
294        if (true_pc != rpc && true_mask.count()) {
295            w->pushToReconvergenceStack(true_pc, rpc, true_mask);
296        }
297        assert(w->pc() != curr_pc);
298        w->discardFetch();
299    }
300
301
302    class CbrDirectInst : public CbrInstBase<LabelOperand>
303    {
304      public:
305        CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
306            : CbrInstBase<LabelOperand>(ib, obj)
307        {
308        }
309        // the source operand of a conditional branch is a Condition
310        // Register which is not stored in the VRF
311        // so we do not count it as a source-register operand
312        // even though, formally, it is one.
313        int numSrcRegOperands() { return 0; }
314        int numDstRegOperands() { return 0; }
315    };
316
317    class CbrIndirectInst : public CbrInstBase<SRegOperand>
318    {
319      public:
320        CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
321            : CbrInstBase<SRegOperand>(ib, obj)
322        {
323        }
324        // one source operand of the conditional indirect branch is a Condition
325        // register which is not stored in the VRF so we do not count it
326        // as a source-register operand even though, formally, it is one.
327        int numSrcRegOperands() { return target.isVectorRegister(); }
328        int numDstRegOperands() { return 0; }
329    };
330
331    GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
332                             const BrigObject *obj);
333
334    template<typename TargetType>
335    class BrInstBase : public HsailGPUStaticInst
336    {
337      public:
338        void generateDisassembly() override;
339
340        ImmOperand<uint32_t> width;
341        TargetType target;
342
343        BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
344           : HsailGPUStaticInst(obj, "br")
345        {
346            o_type = Enums::OT_BRANCH;
347            width.init(((Brig::BrigInstBr *)ib)->width, obj);
348            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
349            target.init(op_offs, obj);
350            o_type = Enums::OT_BRANCH;
351        }
352
353        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
354
355        bool unconditionalJumpInstruction() override { return true; }
356
357        void execute(GPUDynInstPtr gpuDynInst) override;
358        bool isVectorRegister(int operandIndex) override {
359            assert(operandIndex >= 0 && operandIndex < getNumOperands());
360            return target.isVectorRegister();
361        }
362        bool isCondRegister(int operandIndex) override {
363            assert(operandIndex >= 0 && operandIndex < getNumOperands());
364            return target.isCondRegister();
365        }
366        bool isScalarRegister(int operandIndex) override {
367            assert(operandIndex >= 0 && operandIndex < getNumOperands());
368            return target.isScalarRegister();
369        }
370        bool isSrcOperand(int operandIndex) override {
371            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
372            return true;
373        }
374        bool isDstOperand(int operandIndex) override { return false; }
375        int getOperandSize(int operandIndex) override {
376            assert(operandIndex >= 0 && operandIndex < getNumOperands());
377            return target.opSize();
378        }
379        int getRegisterIndex(int operandIndex) override {
380            assert(operandIndex >= 0 && operandIndex < getNumOperands());
381            return target.regIndex();
382        }
383        int getNumOperands() override { return 1; }
384    };
385
386    template<typename TargetType>
387    void
388    BrInstBase<TargetType>::generateDisassembly()
389    {
390        std::string widthClause;
391
392        if (width.bits != 1) {
393            widthClause = csprintf("_width(%d)", width.bits);
394        }
395
396        disassembly = csprintf("%s%s %s", opcode, widthClause,
397                               target.disassemble());
398    }
399
400    template<typename TargetType>
401    void
402    BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
403    {
404        Wavefront *w = gpuDynInst->wavefront();
405
406        if (getTargetPc() == w->rpc()) {
407            w->popFromReconvergenceStack();
408        } else {
409            // Rpc and execution mask remain the same
410            w->pc(getTargetPc());
411        }
412        w->discardFetch();
413    }
414
415    class BrDirectInst : public BrInstBase<LabelOperand>
416    {
417      public:
418        BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
419            : BrInstBase<LabelOperand>(ib, obj)
420        {
421        }
422
423        int numSrcRegOperands() { return 0; }
424        int numDstRegOperands() { return 0; }
425    };
426
427    class BrIndirectInst : public BrInstBase<SRegOperand>
428    {
429      public:
430        BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
431            : BrInstBase<SRegOperand>(ib, obj)
432        {
433        }
434        int numSrcRegOperands() { return target.isVectorRegister(); }
435        int numDstRegOperands() { return 0; }
436    };
437
438    GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
439                            const BrigObject *obj);
440} // namespace HsailISA
441
442#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__
443