branch.hh revision 11692:e772fdcd3809
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__ 37#define __ARCH_HSAIL_INSTS_BRANCH_HH__ 38 39#include "arch/hsail/insts/gpu_static_inst.hh" 40#include "arch/hsail/operand.hh" 41#include "gpu-compute/gpu_dyn_inst.hh" 42#include "gpu-compute/wavefront.hh" 43 44namespace HsailISA 45{ 46 47 // The main difference between a direct branch and an indirect branch 48 // is whether the target is a register or a label, so we can share a 49 // lot of code if we template the base implementation on that type. 50 template<typename TargetType> 51 class BrnInstBase : public HsailGPUStaticInst 52 { 53 public: 54 void generateDisassembly() override; 55 56 Brig::BrigWidth8_t width; 57 TargetType target; 58 59 BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 60 : HsailGPUStaticInst(obj, "brn") 61 { 62 setFlag(Branch); 63 setFlag(UnconditionalJump); 64 width = ((Brig::BrigInstBr*)ib)->width; 65 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 66 target.init(op_offs, obj); 67 } 68 69 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 70 71 bool isVectorRegister(int operandIndex) override { 72 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 73 return target.isVectorRegister(); 74 } 75 bool isCondRegister(int operandIndex) override { 76 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 77 return target.isCondRegister(); 78 } 79 bool isScalarRegister(int operandIndex) override { 80 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 81 return target.isScalarRegister(); 82 } 83 84 bool isSrcOperand(int operandIndex) override { 85 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 86 return true; 87 } 88 89 bool isDstOperand(int operandIndex) override { 90 return false; 91 } 92 93 int getOperandSize(int operandIndex) override { 94 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 95 return target.opSize(); 96 } 97 98 int getRegisterIndex(int operandIndex) override { 99 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 100 return target.regIndex(); 101 } 102 103 int getNumOperands() override { 104 return 1; 105 } 106 107 void execute(GPUDynInstPtr gpuDynInst) override; 108 }; 109 110 template<typename TargetType> 111 void 112 BrnInstBase<TargetType>::generateDisassembly() 113 { 114 std::string widthClause; 115 116 if (width != 1) { 117 widthClause = csprintf("_width(%d)", width); 118 } 119 120 disassembly = csprintf("%s%s %s", opcode, widthClause, 121 target.disassemble()); 122 } 123 124 template<typename TargetType> 125 void 126 BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 127 { 128 Wavefront *w = gpuDynInst->wavefront(); 129 130 if (getTargetPc() == w->rpc()) { 131 w->popFromReconvergenceStack(); 132 } else { 133 // Rpc and execution mask remain the same 134 w->pc(getTargetPc()); 135 } 136 w->discardFetch(); 137 } 138 139 class BrnDirectInst : public BrnInstBase<LabelOperand> 140 { 141 public: 142 BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 143 : BrnInstBase<LabelOperand>(ib, obj) 144 { 145 } 146 int numSrcRegOperands() { return 0; } 147 int numDstRegOperands() { return 0; } 148 }; 149 150 class BrnIndirectInst : public BrnInstBase<SRegOperand> 151 { 152 public: 153 BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 154 : BrnInstBase<SRegOperand>(ib, obj) 155 { 156 } 157 int numSrcRegOperands() { return target.isVectorRegister(); } 158 int numDstRegOperands() { return 0; } 159 }; 160 161 GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib, 162 const BrigObject *obj); 163 164 template<typename TargetType> 165 class CbrInstBase : public HsailGPUStaticInst 166 { 167 public: 168 void generateDisassembly() override; 169 170 Brig::BrigWidth8_t width; 171 CRegOperand cond; 172 TargetType target; 173 174 CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 175 : HsailGPUStaticInst(obj, "cbr") 176 { 177 setFlag(Branch); 178 width = ((Brig::BrigInstBr *)ib)->width; 179 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 180 cond.init(op_offs, obj); 181 op_offs = obj->getOperandPtr(ib->operands, 1); 182 target.init(op_offs, obj); 183 } 184 185 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 186 187 void execute(GPUDynInstPtr gpuDynInst) override; 188 // Assumption: Target is operand 0, Condition Register is operand 1 189 bool isVectorRegister(int operandIndex) override { 190 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 191 if (!operandIndex) 192 return target.isVectorRegister(); 193 else 194 return false; 195 } 196 bool isCondRegister(int operandIndex) override { 197 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 198 if (!operandIndex) 199 return target.isCondRegister(); 200 else 201 return true; 202 } 203 bool isScalarRegister(int operandIndex) override { 204 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 205 if (!operandIndex) 206 return target.isScalarRegister(); 207 else 208 return false; 209 } 210 bool isSrcOperand(int operandIndex) override { 211 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 212 if (operandIndex == 0) 213 return true; 214 return false; 215 } 216 // both Condition Register and Target are source operands 217 bool isDstOperand(int operandIndex) override { 218 return false; 219 } 220 int getOperandSize(int operandIndex) override { 221 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 222 if (!operandIndex) 223 return target.opSize(); 224 else 225 return 1; 226 } 227 int getRegisterIndex(int operandIndex) override { 228 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 229 if (!operandIndex) 230 return target.regIndex(); 231 else 232 return -1; 233 } 234 235 // Operands = Target, Condition Register 236 int getNumOperands() override { 237 return 2; 238 } 239 }; 240 241 template<typename TargetType> 242 void 243 CbrInstBase<TargetType>::generateDisassembly() 244 { 245 std::string widthClause; 246 247 if (width != 1) { 248 widthClause = csprintf("_width(%d)", width); 249 } 250 251 disassembly = csprintf("%s%s %s,%s", opcode, widthClause, 252 cond.disassemble(), target.disassemble()); 253 } 254 255 template<typename TargetType> 256 void 257 CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 258 { 259 Wavefront *w = gpuDynInst->wavefront(); 260 261 const uint32_t curr_pc = w->pc(); 262 const uint32_t curr_rpc = w->rpc(); 263 const VectorMask curr_mask = w->execMask(); 264 265 /** 266 * TODO: can we move this pop outside the instruction, and 267 * into the wavefront? 268 */ 269 w->popFromReconvergenceStack(); 270 271 // immediate post-dominator instruction 272 const uint32_t rpc = static_cast<uint32_t>(ipdInstNum()); 273 if (curr_rpc != rpc) { 274 w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask); 275 } 276 277 // taken branch 278 const uint32_t true_pc = getTargetPc(); 279 VectorMask true_mask; 280 for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 281 true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane]; 282 } 283 284 // not taken branch 285 const uint32_t false_pc = curr_pc + 1; 286 assert(true_pc != false_pc); 287 if (false_pc != rpc && true_mask.count() < curr_mask.count()) { 288 VectorMask false_mask = curr_mask & ~true_mask; 289 w->pushToReconvergenceStack(false_pc, rpc, false_mask); 290 } 291 292 if (true_pc != rpc && true_mask.count()) { 293 w->pushToReconvergenceStack(true_pc, rpc, true_mask); 294 } 295 assert(w->pc() != curr_pc); 296 w->discardFetch(); 297 } 298 299 300 class CbrDirectInst : public CbrInstBase<LabelOperand> 301 { 302 public: 303 CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 304 : CbrInstBase<LabelOperand>(ib, obj) 305 { 306 } 307 // the source operand of a conditional branch is a Condition 308 // Register which is not stored in the VRF 309 // so we do not count it as a source-register operand 310 // even though, formally, it is one. 311 int numSrcRegOperands() { return 0; } 312 int numDstRegOperands() { return 0; } 313 }; 314 315 class CbrIndirectInst : public CbrInstBase<SRegOperand> 316 { 317 public: 318 CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 319 : CbrInstBase<SRegOperand>(ib, obj) 320 { 321 } 322 // one source operand of the conditional indirect branch is a Condition 323 // register which is not stored in the VRF so we do not count it 324 // as a source-register operand even though, formally, it is one. 325 int numSrcRegOperands() { return target.isVectorRegister(); } 326 int numDstRegOperands() { return 0; } 327 }; 328 329 GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib, 330 const BrigObject *obj); 331 332 template<typename TargetType> 333 class BrInstBase : public HsailGPUStaticInst 334 { 335 public: 336 void generateDisassembly() override; 337 338 ImmOperand<uint32_t> width; 339 TargetType target; 340 341 BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 342 : HsailGPUStaticInst(obj, "br") 343 { 344 setFlag(Branch); 345 setFlag(UnconditionalJump); 346 width.init(((Brig::BrigInstBr *)ib)->width, obj); 347 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 348 target.init(op_offs, obj); 349 } 350 351 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 352 353 void execute(GPUDynInstPtr gpuDynInst) override; 354 bool isVectorRegister(int operandIndex) override { 355 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 356 return target.isVectorRegister(); 357 } 358 bool isCondRegister(int operandIndex) override { 359 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 360 return target.isCondRegister(); 361 } 362 bool isScalarRegister(int operandIndex) override { 363 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 364 return target.isScalarRegister(); 365 } 366 bool isSrcOperand(int operandIndex) override { 367 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 368 return true; 369 } 370 bool isDstOperand(int operandIndex) override { return false; } 371 int getOperandSize(int operandIndex) override { 372 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 373 return target.opSize(); 374 } 375 int getRegisterIndex(int operandIndex) override { 376 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 377 return target.regIndex(); 378 } 379 int getNumOperands() override { return 1; } 380 }; 381 382 template<typename TargetType> 383 void 384 BrInstBase<TargetType>::generateDisassembly() 385 { 386 std::string widthClause; 387 388 if (width.bits != 1) { 389 widthClause = csprintf("_width(%d)", width.bits); 390 } 391 392 disassembly = csprintf("%s%s %s", opcode, widthClause, 393 target.disassemble()); 394 } 395 396 template<typename TargetType> 397 void 398 BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 399 { 400 Wavefront *w = gpuDynInst->wavefront(); 401 402 if (getTargetPc() == w->rpc()) { 403 w->popFromReconvergenceStack(); 404 } else { 405 // Rpc and execution mask remain the same 406 w->pc(getTargetPc()); 407 } 408 w->discardFetch(); 409 } 410 411 class BrDirectInst : public BrInstBase<LabelOperand> 412 { 413 public: 414 BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 415 : BrInstBase<LabelOperand>(ib, obj) 416 { 417 } 418 419 int numSrcRegOperands() { return 0; } 420 int numDstRegOperands() { return 0; } 421 }; 422 423 class BrIndirectInst : public BrInstBase<SRegOperand> 424 { 425 public: 426 BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 427 : BrInstBase<SRegOperand>(ib, obj) 428 { 429 } 430 int numSrcRegOperands() { return target.isVectorRegister(); } 431 int numDstRegOperands() { return 0; } 432 }; 433 434 GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib, 435 const BrigObject *obj); 436} // namespace HsailISA 437 438#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__ 439