branch.hh revision 11692:e772fdcd3809
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
37#define __ARCH_HSAIL_INSTS_BRANCH_HH__
38
39#include "arch/hsail/insts/gpu_static_inst.hh"
40#include "arch/hsail/operand.hh"
41#include "gpu-compute/gpu_dyn_inst.hh"
42#include "gpu-compute/wavefront.hh"
43
44namespace HsailISA
45{
46
47    // The main difference between a direct branch and an indirect branch
48    // is whether the target is a register or a label, so we can share a
49    // lot of code if we template the base implementation on that type.
50    template<typename TargetType>
51    class BrnInstBase : public HsailGPUStaticInst
52    {
53    public:
54        void generateDisassembly() override;
55
56        Brig::BrigWidth8_t width;
57        TargetType target;
58
59        BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
60           : HsailGPUStaticInst(obj, "brn")
61        {
62            setFlag(Branch);
63            setFlag(UnconditionalJump);
64            width = ((Brig::BrigInstBr*)ib)->width;
65            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
66            target.init(op_offs, obj);
67        }
68
69        uint32_t getTargetPc()  override { return target.getTarget(0, 0); }
70
71        bool isVectorRegister(int operandIndex) override {
72            assert(operandIndex >= 0 && operandIndex < getNumOperands());
73            return target.isVectorRegister();
74        }
75        bool isCondRegister(int operandIndex) override {
76            assert(operandIndex >= 0 && operandIndex < getNumOperands());
77            return target.isCondRegister();
78        }
79        bool isScalarRegister(int operandIndex) override {
80            assert(operandIndex >= 0 && operandIndex < getNumOperands());
81            return target.isScalarRegister();
82        }
83
84        bool isSrcOperand(int operandIndex) override {
85            assert(operandIndex >= 0 && operandIndex < getNumOperands());
86            return true;
87        }
88
89        bool isDstOperand(int operandIndex) override {
90            return false;
91        }
92
93        int getOperandSize(int operandIndex) override {
94            assert(operandIndex >= 0 && operandIndex < getNumOperands());
95            return target.opSize();
96        }
97
98        int getRegisterIndex(int operandIndex) override {
99            assert(operandIndex >= 0 && operandIndex < getNumOperands());
100            return target.regIndex();
101        }
102
103        int getNumOperands() override {
104            return 1;
105        }
106
107        void execute(GPUDynInstPtr gpuDynInst) override;
108    };
109
110    template<typename TargetType>
111    void
112    BrnInstBase<TargetType>::generateDisassembly()
113    {
114        std::string widthClause;
115
116        if (width != 1) {
117            widthClause = csprintf("_width(%d)", width);
118        }
119
120        disassembly = csprintf("%s%s %s", opcode, widthClause,
121                               target.disassemble());
122    }
123
124    template<typename TargetType>
125    void
126    BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
127    {
128        Wavefront *w = gpuDynInst->wavefront();
129
130        if (getTargetPc() == w->rpc()) {
131            w->popFromReconvergenceStack();
132        } else {
133            // Rpc and execution mask remain the same
134            w->pc(getTargetPc());
135        }
136        w->discardFetch();
137    }
138
139    class BrnDirectInst : public BrnInstBase<LabelOperand>
140    {
141      public:
142        BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
143            : BrnInstBase<LabelOperand>(ib, obj)
144        {
145        }
146        int numSrcRegOperands() { return 0; }
147        int numDstRegOperands() { return 0; }
148    };
149
150    class BrnIndirectInst : public BrnInstBase<SRegOperand>
151    {
152      public:
153        BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
154            : BrnInstBase<SRegOperand>(ib, obj)
155        {
156        }
157        int numSrcRegOperands() { return target.isVectorRegister(); }
158        int numDstRegOperands() { return 0; }
159    };
160
161    GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
162                             const BrigObject *obj);
163
164    template<typename TargetType>
165    class CbrInstBase : public HsailGPUStaticInst
166    {
167      public:
168        void generateDisassembly() override;
169
170        Brig::BrigWidth8_t width;
171        CRegOperand cond;
172        TargetType target;
173
174        CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
175           : HsailGPUStaticInst(obj, "cbr")
176        {
177            setFlag(Branch);
178            width = ((Brig::BrigInstBr *)ib)->width;
179            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
180            cond.init(op_offs, obj);
181            op_offs = obj->getOperandPtr(ib->operands, 1);
182            target.init(op_offs, obj);
183        }
184
185        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
186
187        void execute(GPUDynInstPtr gpuDynInst) override;
188        // Assumption: Target is operand 0, Condition Register is operand 1
189        bool isVectorRegister(int operandIndex) override {
190            assert(operandIndex >= 0 && operandIndex < getNumOperands());
191            if (!operandIndex)
192                return target.isVectorRegister();
193            else
194                return false;
195        }
196        bool isCondRegister(int operandIndex) override {
197            assert(operandIndex >= 0 && operandIndex < getNumOperands());
198            if (!operandIndex)
199                return target.isCondRegister();
200            else
201                return true;
202        }
203        bool isScalarRegister(int operandIndex) override {
204            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
205            if (!operandIndex)
206                return target.isScalarRegister();
207            else
208                return false;
209        }
210        bool isSrcOperand(int operandIndex) override {
211            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
212            if (operandIndex == 0)
213                return true;
214            return false;
215        }
216        // both Condition Register and Target are source operands
217        bool isDstOperand(int operandIndex) override {
218            return false;
219        }
220        int getOperandSize(int operandIndex) override {
221            assert(operandIndex >= 0 && operandIndex < getNumOperands());
222            if (!operandIndex)
223                return target.opSize();
224            else
225                return 1;
226        }
227        int getRegisterIndex(int operandIndex) override {
228            assert(operandIndex >= 0 && operandIndex < getNumOperands());
229            if (!operandIndex)
230                return target.regIndex();
231            else
232                return -1;
233         }
234
235        // Operands = Target, Condition Register
236        int getNumOperands() override {
237            return 2;
238        }
239    };
240
241    template<typename TargetType>
242    void
243    CbrInstBase<TargetType>::generateDisassembly()
244    {
245        std::string widthClause;
246
247        if (width != 1) {
248            widthClause = csprintf("_width(%d)", width);
249        }
250
251        disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
252                               cond.disassemble(), target.disassemble());
253    }
254
255    template<typename TargetType>
256    void
257    CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
258    {
259        Wavefront *w = gpuDynInst->wavefront();
260
261        const uint32_t curr_pc = w->pc();
262        const uint32_t curr_rpc = w->rpc();
263        const VectorMask curr_mask = w->execMask();
264
265        /**
266         * TODO: can we move this pop outside the instruction, and
267         * into the wavefront?
268         */
269        w->popFromReconvergenceStack();
270
271        // immediate post-dominator instruction
272        const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
273        if (curr_rpc != rpc) {
274            w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
275        }
276
277        // taken branch
278        const uint32_t true_pc = getTargetPc();
279        VectorMask true_mask;
280        for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
281            true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
282        }
283
284        // not taken branch
285        const uint32_t false_pc = curr_pc + 1;
286        assert(true_pc != false_pc);
287        if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
288            VectorMask false_mask = curr_mask & ~true_mask;
289            w->pushToReconvergenceStack(false_pc, rpc, false_mask);
290        }
291
292        if (true_pc != rpc && true_mask.count()) {
293            w->pushToReconvergenceStack(true_pc, rpc, true_mask);
294        }
295        assert(w->pc() != curr_pc);
296        w->discardFetch();
297    }
298
299
300    class CbrDirectInst : public CbrInstBase<LabelOperand>
301    {
302      public:
303        CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
304            : CbrInstBase<LabelOperand>(ib, obj)
305        {
306        }
307        // the source operand of a conditional branch is a Condition
308        // Register which is not stored in the VRF
309        // so we do not count it as a source-register operand
310        // even though, formally, it is one.
311        int numSrcRegOperands() { return 0; }
312        int numDstRegOperands() { return 0; }
313    };
314
315    class CbrIndirectInst : public CbrInstBase<SRegOperand>
316    {
317      public:
318        CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
319            : CbrInstBase<SRegOperand>(ib, obj)
320        {
321        }
322        // one source operand of the conditional indirect branch is a Condition
323        // register which is not stored in the VRF so we do not count it
324        // as a source-register operand even though, formally, it is one.
325        int numSrcRegOperands() { return target.isVectorRegister(); }
326        int numDstRegOperands() { return 0; }
327    };
328
329    GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
330                             const BrigObject *obj);
331
332    template<typename TargetType>
333    class BrInstBase : public HsailGPUStaticInst
334    {
335      public:
336        void generateDisassembly() override;
337
338        ImmOperand<uint32_t> width;
339        TargetType target;
340
341        BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
342           : HsailGPUStaticInst(obj, "br")
343        {
344            setFlag(Branch);
345            setFlag(UnconditionalJump);
346            width.init(((Brig::BrigInstBr *)ib)->width, obj);
347            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
348            target.init(op_offs, obj);
349        }
350
351        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
352
353        void execute(GPUDynInstPtr gpuDynInst) override;
354        bool isVectorRegister(int operandIndex) override {
355            assert(operandIndex >= 0 && operandIndex < getNumOperands());
356            return target.isVectorRegister();
357        }
358        bool isCondRegister(int operandIndex) override {
359            assert(operandIndex >= 0 && operandIndex < getNumOperands());
360            return target.isCondRegister();
361        }
362        bool isScalarRegister(int operandIndex) override {
363            assert(operandIndex >= 0 && operandIndex < getNumOperands());
364            return target.isScalarRegister();
365        }
366        bool isSrcOperand(int operandIndex) override {
367            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
368            return true;
369        }
370        bool isDstOperand(int operandIndex) override { return false; }
371        int getOperandSize(int operandIndex) override {
372            assert(operandIndex >= 0 && operandIndex < getNumOperands());
373            return target.opSize();
374        }
375        int getRegisterIndex(int operandIndex) override {
376            assert(operandIndex >= 0 && operandIndex < getNumOperands());
377            return target.regIndex();
378        }
379        int getNumOperands() override { return 1; }
380    };
381
382    template<typename TargetType>
383    void
384    BrInstBase<TargetType>::generateDisassembly()
385    {
386        std::string widthClause;
387
388        if (width.bits != 1) {
389            widthClause = csprintf("_width(%d)", width.bits);
390        }
391
392        disassembly = csprintf("%s%s %s", opcode, widthClause,
393                               target.disassemble());
394    }
395
396    template<typename TargetType>
397    void
398    BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
399    {
400        Wavefront *w = gpuDynInst->wavefront();
401
402        if (getTargetPc() == w->rpc()) {
403            w->popFromReconvergenceStack();
404        } else {
405            // Rpc and execution mask remain the same
406            w->pc(getTargetPc());
407        }
408        w->discardFetch();
409    }
410
411    class BrDirectInst : public BrInstBase<LabelOperand>
412    {
413      public:
414        BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
415            : BrInstBase<LabelOperand>(ib, obj)
416        {
417        }
418
419        int numSrcRegOperands() { return 0; }
420        int numDstRegOperands() { return 0; }
421    };
422
423    class BrIndirectInst : public BrInstBase<SRegOperand>
424    {
425      public:
426        BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
427            : BrInstBase<SRegOperand>(ib, obj)
428        {
429        }
430        int numSrcRegOperands() { return target.isVectorRegister(); }
431        int numDstRegOperands() { return 0; }
432    };
433
434    GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
435                            const BrigObject *obj);
436} // namespace HsailISA
437
438#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__
439