branch.hh revision 11694:c3b4d57a15c5
1/*
2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
37#define __ARCH_HSAIL_INSTS_BRANCH_HH__
38
39#include "arch/hsail/insts/gpu_static_inst.hh"
40#include "arch/hsail/operand.hh"
41#include "gpu-compute/gpu_dyn_inst.hh"
42#include "gpu-compute/wavefront.hh"
43
44namespace HsailISA
45{
46
47    // The main difference between a direct branch and an indirect branch
48    // is whether the target is a register or a label, so we can share a
49    // lot of code if we template the base implementation on that type.
50    template<typename TargetType>
51    class BrnInstBase : public HsailGPUStaticInst
52    {
53    public:
54        void generateDisassembly() override;
55
56        Brig::BrigWidth8_t width;
57        TargetType target;
58
59        BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
60           : HsailGPUStaticInst(obj, "brn")
61        {
62            setFlag(Branch);
63            setFlag(UnconditionalJump);
64            width = ((Brig::BrigInstBr*)ib)->width;
65            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
66            target.init(op_offs, obj);
67        }
68
69        uint32_t getTargetPc()  override { return target.getTarget(0, 0); }
70
71        bool isVectorRegister(int operandIndex) override {
72            assert(operandIndex >= 0 && operandIndex < getNumOperands());
73            return target.isVectorRegister();
74        }
75        bool isCondRegister(int operandIndex) override {
76            assert(operandIndex >= 0 && operandIndex < getNumOperands());
77            return target.isCondRegister();
78        }
79        bool isScalarRegister(int operandIndex) override {
80            assert(operandIndex >= 0 && operandIndex < getNumOperands());
81            return target.isScalarRegister();
82        }
83
84        bool isSrcOperand(int operandIndex) override {
85            assert(operandIndex >= 0 && operandIndex < getNumOperands());
86            return true;
87        }
88
89        bool isDstOperand(int operandIndex) override {
90            return false;
91        }
92
93        int getOperandSize(int operandIndex) override {
94            assert(operandIndex >= 0 && operandIndex < getNumOperands());
95            return target.opSize();
96        }
97
98        int getRegisterIndex(int operandIndex) override {
99            assert(operandIndex >= 0 && operandIndex < getNumOperands());
100            return target.regIndex();
101        }
102
103        int getNumOperands() override {
104            return 1;
105        }
106
107        void execute(GPUDynInstPtr gpuDynInst) override;
108    };
109
110    template<typename TargetType>
111    void
112    BrnInstBase<TargetType>::generateDisassembly()
113    {
114        std::string widthClause;
115
116        if (width != 1) {
117            widthClause = csprintf("_width(%d)", width);
118        }
119
120        disassembly = csprintf("%s%s %s", opcode, widthClause,
121                               target.disassemble());
122    }
123
124    template<typename TargetType>
125    void
126    BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
127    {
128        Wavefront *w = gpuDynInst->wavefront();
129
130        if (getTargetPc() == w->rpc()) {
131            w->popFromReconvergenceStack();
132        } else {
133            // Rpc and execution mask remain the same
134            w->pc(getTargetPc());
135        }
136    }
137
138    class BrnDirectInst : public BrnInstBase<LabelOperand>
139    {
140      public:
141        BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
142            : BrnInstBase<LabelOperand>(ib, obj)
143        {
144        }
145        int numSrcRegOperands() { return 0; }
146        int numDstRegOperands() { return 0; }
147    };
148
149    class BrnIndirectInst : public BrnInstBase<SRegOperand>
150    {
151      public:
152        BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
153            : BrnInstBase<SRegOperand>(ib, obj)
154        {
155        }
156        int numSrcRegOperands() { return target.isVectorRegister(); }
157        int numDstRegOperands() { return 0; }
158    };
159
160    GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
161                             const BrigObject *obj);
162
163    template<typename TargetType>
164    class CbrInstBase : public HsailGPUStaticInst
165    {
166      public:
167        void generateDisassembly() override;
168
169        Brig::BrigWidth8_t width;
170        CRegOperand cond;
171        TargetType target;
172
173        CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
174           : HsailGPUStaticInst(obj, "cbr")
175        {
176            setFlag(Branch);
177            width = ((Brig::BrigInstBr *)ib)->width;
178            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
179            cond.init(op_offs, obj);
180            op_offs = obj->getOperandPtr(ib->operands, 1);
181            target.init(op_offs, obj);
182        }
183
184        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
185
186        void execute(GPUDynInstPtr gpuDynInst) override;
187        // Assumption: Target is operand 0, Condition Register is operand 1
188        bool isVectorRegister(int operandIndex) override {
189            assert(operandIndex >= 0 && operandIndex < getNumOperands());
190            if (!operandIndex)
191                return target.isVectorRegister();
192            else
193                return false;
194        }
195        bool isCondRegister(int operandIndex) override {
196            assert(operandIndex >= 0 && operandIndex < getNumOperands());
197            if (!operandIndex)
198                return target.isCondRegister();
199            else
200                return true;
201        }
202        bool isScalarRegister(int operandIndex) override {
203            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
204            if (!operandIndex)
205                return target.isScalarRegister();
206            else
207                return false;
208        }
209        bool isSrcOperand(int operandIndex) override {
210            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
211            if (operandIndex == 0)
212                return true;
213            return false;
214        }
215        // both Condition Register and Target are source operands
216        bool isDstOperand(int operandIndex) override {
217            return false;
218        }
219        int getOperandSize(int operandIndex) override {
220            assert(operandIndex >= 0 && operandIndex < getNumOperands());
221            if (!operandIndex)
222                return target.opSize();
223            else
224                return 1;
225        }
226        int getRegisterIndex(int operandIndex) override {
227            assert(operandIndex >= 0 && operandIndex < getNumOperands());
228            if (!operandIndex)
229                return target.regIndex();
230            else
231                return -1;
232         }
233
234        // Operands = Target, Condition Register
235        int getNumOperands() override {
236            return 2;
237        }
238    };
239
240    template<typename TargetType>
241    void
242    CbrInstBase<TargetType>::generateDisassembly()
243    {
244        std::string widthClause;
245
246        if (width != 1) {
247            widthClause = csprintf("_width(%d)", width);
248        }
249
250        disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
251                               cond.disassemble(), target.disassemble());
252    }
253
254    template<typename TargetType>
255    void
256    CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
257    {
258        Wavefront *w = gpuDynInst->wavefront();
259
260        const uint32_t curr_pc = w->pc();
261        const uint32_t curr_rpc = w->rpc();
262        const VectorMask curr_mask = w->execMask();
263
264        /**
265         * TODO: can we move this pop outside the instruction, and
266         * into the wavefront?
267         */
268        w->popFromReconvergenceStack();
269
270        // immediate post-dominator instruction
271        const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
272        if (curr_rpc != rpc) {
273            w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
274        }
275
276        // taken branch
277        const uint32_t true_pc = getTargetPc();
278        VectorMask true_mask;
279        for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
280            true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
281        }
282
283        // not taken branch
284        const uint32_t false_pc = curr_pc + 1;
285        assert(true_pc != false_pc);
286        if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
287            VectorMask false_mask = curr_mask & ~true_mask;
288            w->pushToReconvergenceStack(false_pc, rpc, false_mask);
289        }
290
291        if (true_pc != rpc && true_mask.count()) {
292            w->pushToReconvergenceStack(true_pc, rpc, true_mask);
293        }
294        assert(w->pc() != curr_pc);
295    }
296
297
298    class CbrDirectInst : public CbrInstBase<LabelOperand>
299    {
300      public:
301        CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
302            : CbrInstBase<LabelOperand>(ib, obj)
303        {
304        }
305        // the source operand of a conditional branch is a Condition
306        // Register which is not stored in the VRF
307        // so we do not count it as a source-register operand
308        // even though, formally, it is one.
309        int numSrcRegOperands() { return 0; }
310        int numDstRegOperands() { return 0; }
311    };
312
313    class CbrIndirectInst : public CbrInstBase<SRegOperand>
314    {
315      public:
316        CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
317            : CbrInstBase<SRegOperand>(ib, obj)
318        {
319        }
320        // one source operand of the conditional indirect branch is a Condition
321        // register which is not stored in the VRF so we do not count it
322        // as a source-register operand even though, formally, it is one.
323        int numSrcRegOperands() { return target.isVectorRegister(); }
324        int numDstRegOperands() { return 0; }
325    };
326
327    GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
328                             const BrigObject *obj);
329
330    template<typename TargetType>
331    class BrInstBase : public HsailGPUStaticInst
332    {
333      public:
334        void generateDisassembly() override;
335
336        ImmOperand<uint32_t> width;
337        TargetType target;
338
339        BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
340           : HsailGPUStaticInst(obj, "br")
341        {
342            setFlag(Branch);
343            setFlag(UnconditionalJump);
344            width.init(((Brig::BrigInstBr *)ib)->width, obj);
345            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
346            target.init(op_offs, obj);
347        }
348
349        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
350
351        void execute(GPUDynInstPtr gpuDynInst) override;
352        bool isVectorRegister(int operandIndex) override {
353            assert(operandIndex >= 0 && operandIndex < getNumOperands());
354            return target.isVectorRegister();
355        }
356        bool isCondRegister(int operandIndex) override {
357            assert(operandIndex >= 0 && operandIndex < getNumOperands());
358            return target.isCondRegister();
359        }
360        bool isScalarRegister(int operandIndex) override {
361            assert(operandIndex >= 0 && operandIndex < getNumOperands());
362            return target.isScalarRegister();
363        }
364        bool isSrcOperand(int operandIndex) override {
365            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
366            return true;
367        }
368        bool isDstOperand(int operandIndex) override { return false; }
369        int getOperandSize(int operandIndex) override {
370            assert(operandIndex >= 0 && operandIndex < getNumOperands());
371            return target.opSize();
372        }
373        int getRegisterIndex(int operandIndex) override {
374            assert(operandIndex >= 0 && operandIndex < getNumOperands());
375            return target.regIndex();
376        }
377        int getNumOperands() override { return 1; }
378    };
379
380    template<typename TargetType>
381    void
382    BrInstBase<TargetType>::generateDisassembly()
383    {
384        std::string widthClause;
385
386        if (width.bits != 1) {
387            widthClause = csprintf("_width(%d)", width.bits);
388        }
389
390        disassembly = csprintf("%s%s %s", opcode, widthClause,
391                               target.disassemble());
392    }
393
394    template<typename TargetType>
395    void
396    BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
397    {
398        Wavefront *w = gpuDynInst->wavefront();
399
400        if (getTargetPc() == w->rpc()) {
401            w->popFromReconvergenceStack();
402        } else {
403            // Rpc and execution mask remain the same
404            w->pc(getTargetPc());
405        }
406    }
407
408    class BrDirectInst : public BrInstBase<LabelOperand>
409    {
410      public:
411        BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
412            : BrInstBase<LabelOperand>(ib, obj)
413        {
414        }
415
416        int numSrcRegOperands() { return 0; }
417        int numDstRegOperands() { return 0; }
418    };
419
420    class BrIndirectInst : public BrInstBase<SRegOperand>
421    {
422      public:
423        BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
424            : BrInstBase<SRegOperand>(ib, obj)
425        {
426        }
427        int numSrcRegOperands() { return target.isVectorRegister(); }
428        int numDstRegOperands() { return 0; }
429    };
430
431    GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
432                            const BrigObject *obj);
433} // namespace HsailISA
434
435#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__
436