1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__ 37#define __ARCH_HSAIL_INSTS_BRANCH_HH__ 38 39#include "arch/hsail/insts/gpu_static_inst.hh" 40#include "arch/hsail/operand.hh" 41#include "gpu-compute/gpu_dyn_inst.hh" 42#include "gpu-compute/wavefront.hh" 43 44namespace HsailISA 45{ 46 47 // The main difference between a direct branch and an indirect branch 48 // is whether the target is a register or a label, so we can share a 49 // lot of code if we template the base implementation on that type. 50 template<typename TargetType> 51 class BrnInstBase : public HsailGPUStaticInst 52 { 53 public: 54 void generateDisassembly() override; 55 56 Brig::BrigWidth8_t width; 57 TargetType target; 58 59 BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 60 : HsailGPUStaticInst(obj, "brn") 61 { 62 setFlag(Branch); 63 setFlag(UnconditionalJump); 64 width = ((Brig::BrigInstBr*)ib)->width; 65 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 66 target.init(op_offs, obj); 67 } 68 69 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 70 71 bool isVectorRegister(int operandIndex) override { 72 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 73 return target.isVectorRegister(); 74 } 75 bool isCondRegister(int operandIndex) override { 76 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 77 return target.isCondRegister(); 78 } 79 bool isScalarRegister(int operandIndex) override { 80 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 81 return target.isScalarRegister(); 82 } 83 84 bool isSrcOperand(int operandIndex) override { 85 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 86 return true; 87 } 88 89 bool isDstOperand(int operandIndex) override { 90 return false; 91 } 92 93 int getOperandSize(int operandIndex) override { 94 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 95 return target.opSize(); 96 } 97 98 int 99 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override 100 { 101 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 102 return target.regIndex(); 103 } 104 105 int getNumOperands() override { 106 return 1; 107 } 108 109 void execute(GPUDynInstPtr gpuDynInst) override; 110 }; 111 112 template<typename TargetType> 113 void 114 BrnInstBase<TargetType>::generateDisassembly() 115 { 116 std::string widthClause; 117 118 if (width != 1) { 119 widthClause = csprintf("_width(%d)", width); 120 } 121 122 disassembly = csprintf("%s%s %s", opcode, widthClause, 123 target.disassemble()); 124 } 125 126 template<typename TargetType> 127 void 128 BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 129 { 130 Wavefront *w = gpuDynInst->wavefront(); 131 132 if (getTargetPc() == w->rpc()) { 133 w->popFromReconvergenceStack(); 134 } else { 135 // Rpc and execution mask remain the same 136 w->pc(getTargetPc()); 137 } 138 } 139 140 class BrnDirectInst : public BrnInstBase<LabelOperand> 141 { 142 public: 143 BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 144 : BrnInstBase<LabelOperand>(ib, obj) 145 { 146 } 147 int numSrcRegOperands() { return 0; } 148 int numDstRegOperands() { return 0; } 149 }; 150 151 class BrnIndirectInst : public BrnInstBase<SRegOperand> 152 { 153 public: 154 BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 155 : BrnInstBase<SRegOperand>(ib, obj) 156 { 157 } 158 int numSrcRegOperands() { return target.isVectorRegister(); } 159 int numDstRegOperands() { return 0; } 160 }; 161 162 GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib, 163 const BrigObject *obj); 164 165 template<typename TargetType> 166 class CbrInstBase : public HsailGPUStaticInst 167 { 168 public: 169 void generateDisassembly() override; 170 171 Brig::BrigWidth8_t width; 172 CRegOperand cond; 173 TargetType target; 174 175 CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 176 : HsailGPUStaticInst(obj, "cbr") 177 { 178 setFlag(Branch); 179 width = ((Brig::BrigInstBr *)ib)->width; 180 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 181 cond.init(op_offs, obj); 182 op_offs = obj->getOperandPtr(ib->operands, 1); 183 target.init(op_offs, obj); 184 } 185 186 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 187 188 void execute(GPUDynInstPtr gpuDynInst) override; 189 // Assumption: Target is operand 0, Condition Register is operand 1 190 bool isVectorRegister(int operandIndex) override { 191 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 192 if (!operandIndex) 193 return target.isVectorRegister(); 194 else 195 return false; 196 } 197 bool isCondRegister(int operandIndex) override { 198 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 199 if (!operandIndex) 200 return target.isCondRegister(); 201 else 202 return true; 203 } 204 bool isScalarRegister(int operandIndex) override { 205 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 206 if (!operandIndex) 207 return target.isScalarRegister(); 208 else 209 return false; 210 } 211 bool isSrcOperand(int operandIndex) override { 212 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 213 if (operandIndex == 0) 214 return true; 215 return false; 216 } 217 // both Condition Register and Target are source operands 218 bool isDstOperand(int operandIndex) override { 219 return false; 220 } 221 int getOperandSize(int operandIndex) override { 222 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 223 if (!operandIndex) 224 return target.opSize(); 225 else 226 return 1; 227 } 228 int 229 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override 230 { 231 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 232 if (!operandIndex) 233 return target.regIndex(); 234 else 235 return -1; 236 } 237 238 // Operands = Target, Condition Register 239 int getNumOperands() override { 240 return 2; 241 } 242 }; 243 244 template<typename TargetType> 245 void 246 CbrInstBase<TargetType>::generateDisassembly() 247 { 248 std::string widthClause; 249 250 if (width != 1) { 251 widthClause = csprintf("_width(%d)", width); 252 } 253 254 disassembly = csprintf("%s%s %s,%s", opcode, widthClause, 255 cond.disassemble(), target.disassemble()); 256 } 257 258 template<typename TargetType> 259 void 260 CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 261 { 262 Wavefront *w = gpuDynInst->wavefront(); 263 264 const uint32_t curr_pc M5_VAR_USED = w->pc(); 265 const uint32_t curr_rpc = w->rpc(); 266 const VectorMask curr_mask = w->execMask(); 267 268 /** 269 * TODO: can we move this pop outside the instruction, and 270 * into the wavefront? 271 */ 272 w->popFromReconvergenceStack(); 273 274 // immediate post-dominator instruction 275 const uint32_t rpc = static_cast<uint32_t>(ipdInstNum()); 276 if (curr_rpc != rpc) { 277 w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask); 278 } 279 280 // taken branch 281 const uint32_t true_pc = getTargetPc(); 282 VectorMask true_mask; 283 for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { 284 true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane]; 285 } 286 287 // not taken branch 288 const uint32_t false_pc = nextInstAddr(); 289 assert(true_pc != false_pc); 290 if (false_pc != rpc && true_mask.count() < curr_mask.count()) { 291 VectorMask false_mask = curr_mask & ~true_mask; 292 w->pushToReconvergenceStack(false_pc, rpc, false_mask); 293 } 294 295 if (true_pc != rpc && true_mask.count()) { 296 w->pushToReconvergenceStack(true_pc, rpc, true_mask); 297 } 298 assert(w->pc() != curr_pc); 299 } 300 301 302 class CbrDirectInst : public CbrInstBase<LabelOperand> 303 { 304 public: 305 CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 306 : CbrInstBase<LabelOperand>(ib, obj) 307 { 308 } 309 // the source operand of a conditional branch is a Condition 310 // Register which is not stored in the VRF 311 // so we do not count it as a source-register operand 312 // even though, formally, it is one. 313 int numSrcRegOperands() { return 0; } 314 int numDstRegOperands() { return 0; } 315 }; 316 317 class CbrIndirectInst : public CbrInstBase<SRegOperand> 318 { 319 public: 320 CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 321 : CbrInstBase<SRegOperand>(ib, obj) 322 { 323 } 324 // one source operand of the conditional indirect branch is a Condition 325 // register which is not stored in the VRF so we do not count it 326 // as a source-register operand even though, formally, it is one. 327 int numSrcRegOperands() { return target.isVectorRegister(); } 328 int numDstRegOperands() { return 0; } 329 }; 330 331 GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib, 332 const BrigObject *obj); 333 334 template<typename TargetType> 335 class BrInstBase : public HsailGPUStaticInst 336 { 337 public: 338 void generateDisassembly() override; 339 340 ImmOperand<uint32_t> width; 341 TargetType target; 342 343 BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 344 : HsailGPUStaticInst(obj, "br") 345 { 346 setFlag(Branch); 347 setFlag(UnconditionalJump); 348 width.init(((Brig::BrigInstBr *)ib)->width, obj); 349 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 350 target.init(op_offs, obj); 351 } 352 353 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 354 355 void execute(GPUDynInstPtr gpuDynInst) override; 356 bool isVectorRegister(int operandIndex) override { 357 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 358 return target.isVectorRegister(); 359 } 360 bool isCondRegister(int operandIndex) override { 361 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 362 return target.isCondRegister(); 363 } 364 bool isScalarRegister(int operandIndex) override { 365 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 366 return target.isScalarRegister(); 367 } 368 bool isSrcOperand(int operandIndex) override { 369 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 370 return true; 371 } 372 bool isDstOperand(int operandIndex) override { return false; } 373 int getOperandSize(int operandIndex) override { 374 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 375 return target.opSize(); 376 } 377 int 378 getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override 379 { 380 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 381 return target.regIndex(); 382 } 383 int getNumOperands() override { return 1; } 384 }; 385 386 template<typename TargetType> 387 void 388 BrInstBase<TargetType>::generateDisassembly() 389 { 390 std::string widthClause; 391 392 if (width.bits != 1) { 393 widthClause = csprintf("_width(%d)", width.bits); 394 } 395 396 disassembly = csprintf("%s%s %s", opcode, widthClause, 397 target.disassemble()); 398 } 399 400 template<typename TargetType> 401 void 402 BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 403 { 404 Wavefront *w = gpuDynInst->wavefront(); 405 406 if (getTargetPc() == w->rpc()) { 407 w->popFromReconvergenceStack(); 408 } else { 409 // Rpc and execution mask remain the same 410 w->pc(getTargetPc()); 411 } 412 } 413 414 class BrDirectInst : public BrInstBase<LabelOperand> 415 { 416 public: 417 BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 418 : BrInstBase<LabelOperand>(ib, obj) 419 { 420 } 421 422 int numSrcRegOperands() { return 0; } 423 int numDstRegOperands() { return 0; } 424 }; 425 426 class BrIndirectInst : public BrInstBase<SRegOperand> 427 { 428 public: 429 BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 430 : BrInstBase<SRegOperand>(ib, obj) 431 { 432 } 433 int numSrcRegOperands() { return target.isVectorRegister(); } 434 int numDstRegOperands() { return 0; } 435 }; 436 437 GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib, 438 const BrigObject *obj); 439} // namespace HsailISA 440 441#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__ 442