branch.hh revision 11308:7d8836fd043d
1/* 2 * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__ 37#define __ARCH_HSAIL_INSTS_BRANCH_HH__ 38 39#include "arch/hsail/insts/gpu_static_inst.hh" 40#include "arch/hsail/operand.hh" 41#include "gpu-compute/gpu_dyn_inst.hh" 42#include "gpu-compute/wavefront.hh" 43 44namespace HsailISA 45{ 46 47 // The main difference between a direct branch and an indirect branch 48 // is whether the target is a register or a label, so we can share a 49 // lot of code if we template the base implementation on that type. 50 template<typename TargetType> 51 class BrnInstBase : public HsailGPUStaticInst 52 { 53 public: 54 void generateDisassembly(); 55 56 Brig::BrigWidth8_t width; 57 TargetType target; 58 59 BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 60 : HsailGPUStaticInst(obj, "brn") 61 { 62 o_type = Enums::OT_BRANCH; 63 width = ((Brig::BrigInstBr*)ib)->width; 64 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 65 target.init(op_offs, obj); 66 o_type = Enums::OT_BRANCH; 67 } 68 69 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 70 71 bool unconditionalJumpInstruction() override { return true; } 72 bool isVectorRegister(int operandIndex) { 73 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 74 return target.isVectorRegister(); 75 } 76 bool isCondRegister(int operandIndex) { 77 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 78 return target.isCondRegister(); 79 } 80 bool isScalarRegister(int operandIndex) { 81 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 82 return target.isScalarRegister(); 83 } 84 85 bool isSrcOperand(int operandIndex) { 86 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 87 return true; 88 } 89 90 bool isDstOperand(int operandIndex) { 91 return false; 92 } 93 94 int getOperandSize(int operandIndex) { 95 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 96 return target.opSize(); 97 } 98 99 int getRegisterIndex(int operandIndex) { 100 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 101 return target.regIndex(); 102 } 103 104 int getNumOperands() { 105 return 1; 106 } 107 108 void execute(GPUDynInstPtr gpuDynInst); 109 }; 110 111 template<typename TargetType> 112 void 113 BrnInstBase<TargetType>::generateDisassembly() 114 { 115 std::string widthClause; 116 117 if (width != 1) { 118 widthClause = csprintf("_width(%d)", width); 119 } 120 121 disassembly = csprintf("%s%s %s", opcode, widthClause, 122 target.disassemble()); 123 } 124 125 template<typename TargetType> 126 void 127 BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 128 { 129 Wavefront *w = gpuDynInst->wavefront(); 130 131 if (getTargetPc() == w->rpc()) { 132 w->popFromReconvergenceStack(); 133 } else { 134 // Rpc and execution mask remain the same 135 w->pc(getTargetPc()); 136 } 137 w->discardFetch(); 138 } 139 140 class BrnDirectInst : public BrnInstBase<LabelOperand> 141 { 142 public: 143 BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 144 : BrnInstBase<LabelOperand>(ib, obj) 145 { 146 } 147 int numSrcRegOperands() { return 0; } 148 int numDstRegOperands() { return 0; } 149 }; 150 151 class BrnIndirectInst : public BrnInstBase<SRegOperand> 152 { 153 public: 154 BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 155 : BrnInstBase<SRegOperand>(ib, obj) 156 { 157 } 158 int numSrcRegOperands() { return target.isVectorRegister(); } 159 int numDstRegOperands() { return 0; } 160 }; 161 162 GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib, 163 const BrigObject *obj); 164 165 template<typename TargetType> 166 class CbrInstBase : public HsailGPUStaticInst 167 { 168 public: 169 void generateDisassembly(); 170 171 Brig::BrigWidth8_t width; 172 CRegOperand cond; 173 TargetType target; 174 175 CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 176 : HsailGPUStaticInst(obj, "cbr") 177 { 178 o_type = Enums::OT_BRANCH; 179 width = ((Brig::BrigInstBr *)ib)->width; 180 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 181 cond.init(op_offs, obj); 182 op_offs = obj->getOperandPtr(ib->operands, 1); 183 target.init(op_offs, obj); 184 o_type = Enums::OT_BRANCH; 185 } 186 187 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 188 189 void execute(GPUDynInstPtr gpuDynInst); 190 // Assumption: Target is operand 0, Condition Register is operand 1 191 bool isVectorRegister(int operandIndex) { 192 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 193 if (!operandIndex) 194 return target.isVectorRegister(); 195 else 196 return false; 197 } 198 bool isCondRegister(int operandIndex) { 199 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 200 if (!operandIndex) 201 return target.isCondRegister(); 202 else 203 return true; 204 } 205 bool isScalarRegister(int operandIndex) { 206 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 207 if (!operandIndex) 208 return target.isScalarRegister(); 209 else 210 return false; 211 } 212 bool isSrcOperand(int operandIndex) { 213 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 214 if (operandIndex == 0) 215 return true; 216 return false; 217 } 218 // both Condition Register and Target are source operands 219 bool isDstOperand(int operandIndex) { 220 return false; 221 } 222 int getOperandSize(int operandIndex) { 223 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 224 if (!operandIndex) 225 return target.opSize(); 226 else 227 return 1; 228 } 229 int getRegisterIndex(int operandIndex) { 230 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 231 if (!operandIndex) 232 return target.regIndex(); 233 else 234 return -1; 235 } 236 237 // Operands = Target, Condition Register 238 int getNumOperands() { 239 return 2; 240 } 241 }; 242 243 template<typename TargetType> 244 void 245 CbrInstBase<TargetType>::generateDisassembly() 246 { 247 std::string widthClause; 248 249 if (width != 1) { 250 widthClause = csprintf("_width(%d)", width); 251 } 252 253 disassembly = csprintf("%s%s %s,%s", opcode, widthClause, 254 cond.disassemble(), target.disassemble()); 255 } 256 257 template<typename TargetType> 258 void 259 CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 260 { 261 Wavefront *w = gpuDynInst->wavefront(); 262 263 const uint32_t curr_pc = w->pc(); 264 const uint32_t curr_rpc = w->rpc(); 265 const VectorMask curr_mask = w->execMask(); 266 267 /** 268 * TODO: can we move this pop outside the instruction, and 269 * into the wavefront? 270 */ 271 w->popFromReconvergenceStack(); 272 273 // immediate post-dominator instruction 274 const uint32_t rpc = static_cast<uint32_t>(ipdInstNum()); 275 if (curr_rpc != rpc) { 276 w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask); 277 } 278 279 // taken branch 280 const uint32_t true_pc = getTargetPc(); 281 VectorMask true_mask; 282 for (unsigned int lane = 0; lane < VSZ; ++lane) { 283 true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane]; 284 } 285 286 // not taken branch 287 const uint32_t false_pc = curr_pc + 1; 288 assert(true_pc != false_pc); 289 if (false_pc != rpc && true_mask.count() < curr_mask.count()) { 290 VectorMask false_mask = curr_mask & ~true_mask; 291 w->pushToReconvergenceStack(false_pc, rpc, false_mask); 292 } 293 294 if (true_pc != rpc && true_mask.count()) { 295 w->pushToReconvergenceStack(true_pc, rpc, true_mask); 296 } 297 assert(w->pc() != curr_pc); 298 w->discardFetch(); 299 } 300 301 302 class CbrDirectInst : public CbrInstBase<LabelOperand> 303 { 304 public: 305 CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 306 : CbrInstBase<LabelOperand>(ib, obj) 307 { 308 } 309 // the source operand of a conditional branch is a Condition 310 // Register which is not stored in the VRF 311 // so we do not count it as a source-register operand 312 // even though, formally, it is one. 313 int numSrcRegOperands() { return 0; } 314 int numDstRegOperands() { return 0; } 315 }; 316 317 class CbrIndirectInst : public CbrInstBase<SRegOperand> 318 { 319 public: 320 CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 321 : CbrInstBase<SRegOperand>(ib, obj) 322 { 323 } 324 // one source operand of the conditional indirect branch is a Condition 325 // register which is not stored in the VRF so we do not count it 326 // as a source-register operand even though, formally, it is one. 327 int numSrcRegOperands() { return target.isVectorRegister(); } 328 int numDstRegOperands() { return 0; } 329 }; 330 331 GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib, 332 const BrigObject *obj); 333 334 template<typename TargetType> 335 class BrInstBase : public HsailGPUStaticInst 336 { 337 public: 338 void generateDisassembly(); 339 340 ImmOperand<uint32_t> width; 341 TargetType target; 342 343 BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) 344 : HsailGPUStaticInst(obj, "br") 345 { 346 o_type = Enums::OT_BRANCH; 347 width.init(((Brig::BrigInstBr *)ib)->width, obj); 348 unsigned op_offs = obj->getOperandPtr(ib->operands, 0); 349 target.init(op_offs, obj); 350 o_type = Enums::OT_BRANCH; 351 } 352 353 uint32_t getTargetPc() override { return target.getTarget(0, 0); } 354 355 bool unconditionalJumpInstruction() override { return true; } 356 357 void execute(GPUDynInstPtr gpuDynInst); 358 bool isVectorRegister(int operandIndex) { 359 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 360 return target.isVectorRegister(); 361 } 362 bool isCondRegister(int operandIndex) { 363 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 364 return target.isCondRegister(); 365 } 366 bool isScalarRegister(int operandIndex) { 367 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 368 return target.isScalarRegister(); 369 } 370 bool isSrcOperand(int operandIndex) { 371 assert((operandIndex >= 0) && (operandIndex < getNumOperands())); 372 return true; 373 } 374 bool isDstOperand(int operandIndex) { return false; } 375 int getOperandSize(int operandIndex) { 376 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 377 return target.opSize(); 378 } 379 int getRegisterIndex(int operandIndex) { 380 assert(operandIndex >= 0 && operandIndex < getNumOperands()); 381 return target.regIndex(); 382 } 383 int getNumOperands() { return 1; } 384 }; 385 386 template<typename TargetType> 387 void 388 BrInstBase<TargetType>::generateDisassembly() 389 { 390 std::string widthClause; 391 392 if (width.bits != 1) { 393 widthClause = csprintf("_width(%d)", width.bits); 394 } 395 396 disassembly = csprintf("%s%s %s", opcode, widthClause, 397 target.disassemble()); 398 } 399 400 template<typename TargetType> 401 void 402 BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst) 403 { 404 Wavefront *w = gpuDynInst->wavefront(); 405 406 if (getTargetPc() == w->rpc()) { 407 w->popFromReconvergenceStack(); 408 } else { 409 // Rpc and execution mask remain the same 410 w->pc(getTargetPc()); 411 } 412 w->discardFetch(); 413 } 414 415 class BrDirectInst : public BrInstBase<LabelOperand> 416 { 417 public: 418 BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 419 : BrInstBase<LabelOperand>(ib, obj) 420 { 421 } 422 423 int numSrcRegOperands() { return 0; } 424 int numDstRegOperands() { return 0; } 425 }; 426 427 class BrIndirectInst : public BrInstBase<SRegOperand> 428 { 429 public: 430 BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) 431 : BrInstBase<SRegOperand>(ib, obj) 432 { 433 } 434 int numSrcRegOperands() { return target.isVectorRegister(); } 435 int numDstRegOperands() { return 0; } 436 }; 437 438 GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib, 439 const BrigObject *obj); 440} // namespace HsailISA 441 442#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__ 443