branch.hh revision 11697:c63431b7bbeb
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__ 37#define __ARCH_HSAIL_INSTS_BRANCH_HH__ 38 39#include "arch/hsail/insts/gpu_static_inst.hh" 40#include "arch/hsail/operand.hh" 41#include "gpu-compute/gpu_dyn_inst.hh" 42#include "gpu-compute/wavefront.hh" 43 44namespace HsailISA 45{ 46 47 // The main difference between a direct branch and an indirect branch 48 // is whether the target is a register or a label, so we can share a 49 // lot of code if we template the base implementation on that type. 50 template<typename TargetType> 51 class BrnInstBase : public HsailGPUStaticInst 52 { 53 public: 54 void generateDisassembly() override; 55 56 Brig::BrigWidth8_t width; 57 TargetType target; 58 59 BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 60 : HsailGPUStaticInst(obj, "brn") 61 { 62 setFlag(Branch); 63 setFlag(UnconditionalJump); 64 width = ((Brig::BrigInstBr*)ib)->width; 65 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 66 target.init(op_offs, obj); 67 } 68 69 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 70 71 bool isVectorRegister(int operandIndex) override { 72 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 73 return target.isVectorRegister(); 74 } 75 bool isCondRegister(int operandIndex) override { 76 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 77 return target.isCondRegister(); 78 } 79 bool isScalarRegister(int operandIndex) override { 80 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 81 return target.isScalarRegister(); 82 } 83 84 bool isSrcOperand(int operandIndex) override { 85 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 86 return true; 87 } 88 89 bool isDstOperand(int operandIndex) override { 90 return false; 91 } 92 93 int getOperandSize(int operandIndex) override { 94 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 95 return target.opSize(); 96 } 97 98 int getRegisterIndex(int operandIndex) override { 99 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 100 return target.regIndex(); 101 } 102 103 int getNumOperands() override { 104 return 1; 105 } 106 107 void execute(GPUDynInstPtr gpuDynInst) override; 108 }; 109 110 template<typename TargetType> 111 void 112 BrnInstBase<TargetType>::generateDisassembly() 113 { 114 std::string widthClause; 115 116 if (width != 1) { 117 widthClause = csprintf("_width(%d)", width); 118 } 119 120 disassembly = csprintf("%s%s %s", opcode, widthClause, 121 target.disassemble()); 122 } 123 124 template<typename TargetType> 125 void 126 BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 127 { 128 Wavefront *w = gpuDynInst->wavefront(); 129 130 if (getTargetPc() == w->rpc()) { 131 w->popFromReconvergenceStack(); 132 } else { 133 // Rpc and execution mask remain the same 134 w->pc(getTargetPc()); 135 } 136 } 137 138 class BrnDirectInst : public BrnInstBase<LabelOperand> 139 { 140 public: 141 BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 142 : BrnInstBase<LabelOperand>(ib, obj) 143 { 144 } 145 int numSrcRegOperands() { return 0; } 146 int numDstRegOperands() { return 0; } 147 }; 148 149 class BrnIndirectInst : public BrnInstBase<SRegOperand> 150 { 151 public: 152 BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 153 : BrnInstBase<SRegOperand>(ib, obj) 154 { 155 } 156 int numSrcRegOperands() { return target.isVectorRegister(); } 157 int numDstRegOperands() { return 0; } 158 }; 159 160 GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib, 161 const BrigObject *obj); 162 163 template<typename TargetType> 164 class CbrInstBase : public HsailGPUStaticInst 165 { 166 public: 167 void generateDisassembly() override; 168 169 Brig::BrigWidth8_t width; 170 CRegOperand cond; 171 TargetType target; 172 173 CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 174 : HsailGPUStaticInst(obj, "cbr") 175 { 176 setFlag(Branch); 177 width = ((Brig::BrigInstBr *)ib)->width; 178 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 179 cond.init(op_offs, obj); 180 op_offs = obj->getOperandPtr(ib->operands, 1); 181 target.init(op_offs, obj); 182 } 183 184 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 185 186 void execute(GPUDynInstPtr gpuDynInst) override; 187 // Assumption: Target is operand 0, Condition Register is operand 1 188 bool isVectorRegister(int operandIndex) override { 189 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 190 if (!operandIndex) 191 return target.isVectorRegister(); 192 else 193 return false; 194 } 195 bool isCondRegister(int operandIndex) override { 196 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 197 if (!operandIndex) 198 return target.isCondRegister(); 199 else 200 return true; 201 } 202 bool isScalarRegister(int operandIndex) override { 203 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 204 if (!operandIndex) 205 return target.isScalarRegister(); 206 else 207 return false; 208 } 209 bool isSrcOperand(int operandIndex) override { 210 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 211 if (operandIndex == 0) 212 return true; 213 return false; 214 } 215 // both Condition Register and Target are source operands 216 bool isDstOperand(int operandIndex) override { 217 return false; 218 } 219 int getOperandSize(int operandIndex) override { 220 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 221 if (!operandIndex) 222 return target.opSize(); 223 else 224 return 1; 225 } 226 int getRegisterIndex(int operandIndex) override { 227 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 228 if (!operandIndex) 229 return target.regIndex(); 230 else 231 return -1; 232 } 233 234 // Operands = Target, Condition Register 235 int getNumOperands() override { 236 return 2; 237 } 238 }; 239 240 template<typename TargetType> 241 void 242 CbrInstBase<TargetType>::generateDisassembly() 243 { 244 std::string widthClause; 245 246 if (width != 1) { 247 widthClause = csprintf("_width(%d)", width); 248 } 249 250 disassembly = csprintf("%s%s %s,%s", opcode, widthClause, 251 cond.disassemble(), target.disassemble()); 252 } 253 254 template<typename TargetType> 255 void 256 CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 257 { 258 Wavefront *w = gpuDynInst->wavefront(); 259 260 const uint32_t curr_pc M5_VAR_USED = w->pc(); 261 const uint32_t curr_rpc = w->rpc(); 262 const VectorMask curr_mask = w->execMask(); 263 264 /** 265 * TODO: can we move this pop outside the instruction, and 266 * into the wavefront? 267 */ 268 w->popFromReconvergenceStack(); 269 270 // immediate post-dominator instruction 271 const uint32_t rpc = static_cast<uint32_t>(ipdInstNum()); 272 if (curr_rpc != rpc) { 273 w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask); 274 } 275 276 // taken branch 277 const uint32_t true_pc = getTargetPc(); 278 VectorMask true_mask; 279 for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 280 true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane]; 281 } 282 283 // not taken branch 284 const uint32_t false_pc = nextInstAddr(); 285 assert(true_pc != false_pc); 286 if (false_pc != rpc && true_mask.count() < curr_mask.count()) { 287 VectorMask false_mask = curr_mask & ~true_mask; 288 w->pushToReconvergenceStack(false_pc, rpc, false_mask); 289 } 290 291 if (true_pc != rpc && true_mask.count()) { 292 w->pushToReconvergenceStack(true_pc, rpc, true_mask); 293 } 294 assert(w->pc() != curr_pc); 295 } 296 297 298 class CbrDirectInst : public CbrInstBase<LabelOperand> 299 { 300 public: 301 CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 302 : CbrInstBase<LabelOperand>(ib, obj) 303 { 304 } 305 // the source operand of a conditional branch is a Condition 306 // Register which is not stored in the VRF 307 // so we do not count it as a source-register operand 308 // even though, formally, it is one. 309 int numSrcRegOperands() { return 0; } 310 int numDstRegOperands() { return 0; } 311 }; 312 313 class CbrIndirectInst : public CbrInstBase<SRegOperand> 314 { 315 public: 316 CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 317 : CbrInstBase<SRegOperand>(ib, obj) 318 { 319 } 320 // one source operand of the conditional indirect branch is a Condition 321 // register which is not stored in the VRF so we do not count it 322 // as a source-register operand even though, formally, it is one. 323 int numSrcRegOperands() { return target.isVectorRegister(); } 324 int numDstRegOperands() { return 0; } 325 }; 326 327 GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib, 328 const BrigObject *obj); 329 330 template<typename TargetType> 331 class BrInstBase : public HsailGPUStaticInst 332 { 333 public: 334 void generateDisassembly() override; 335 336 ImmOperand<uint32_t> width; 337 TargetType target; 338 339 BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 340 : HsailGPUStaticInst(obj, "br") 341 { 342 setFlag(Branch); 343 setFlag(UnconditionalJump); 344 width.init(((Brig::BrigInstBr *)ib)->width, obj); 345 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 346 target.init(op_offs, obj); 347 } 348 349 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 350 351 void execute(GPUDynInstPtr gpuDynInst) override; 352 bool isVectorRegister(int operandIndex) override { 353 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 354 return target.isVectorRegister(); 355 } 356 bool isCondRegister(int operandIndex) override { 357 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 358 return target.isCondRegister(); 359 } 360 bool isScalarRegister(int operandIndex) override { 361 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 362 return target.isScalarRegister(); 363 } 364 bool isSrcOperand(int operandIndex) override { 365 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 366 return true; 367 } 368 bool isDstOperand(int operandIndex) override { return false; } 369 int getOperandSize(int operandIndex) override { 370 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 371 return target.opSize(); 372 } 373 int getRegisterIndex(int operandIndex) override { 374 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 375 return target.regIndex(); 376 } 377 int getNumOperands() override { return 1; } 378 }; 379 380 template<typename TargetType> 381 void 382 BrInstBase<TargetType>::generateDisassembly() 383 { 384 std::string widthClause; 385 386 if (width.bits != 1) { 387 widthClause = csprintf("_width(%d)", width.bits); 388 } 389 390 disassembly = csprintf("%s%s %s", opcode, widthClause, 391 target.disassemble()); 392 } 393 394 template<typename TargetType> 395 void 396 BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 397 { 398 Wavefront *w = gpuDynInst->wavefront(); 399 400 if (getTargetPc() == w->rpc()) { 401 w->popFromReconvergenceStack(); 402 } else { 403 // Rpc and execution mask remain the same 404 w->pc(getTargetPc()); 405 } 406 } 407 408 class BrDirectInst : public BrInstBase<LabelOperand> 409 { 410 public: 411 BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 412 : BrInstBase<LabelOperand>(ib, obj) 413 { 414 } 415 416 int numSrcRegOperands() { return 0; } 417 int numDstRegOperands() { return 0; } 418 }; 419 420 class BrIndirectInst : public BrInstBase<SRegOperand> 421 { 422 public: 423 BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 424 : BrInstBase<SRegOperand>(ib, obj) 425 { 426 } 427 int numSrcRegOperands() { return target.isVectorRegister(); } 428 int numDstRegOperands() { return 0; } 429 }; 430 431 GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib, 432 const BrigObject *obj); 433} // namespace HsailISA 434 435#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__ 436