Cross Reference: /gem5/src/cpu/minor/fetch2.cc

Deleted Added

sdiff udiff text old ( 11783:f94c14fd6561 ) new ( 11793:ef606668d247 )

full compact

fetch2.cc (11783:f94c14fd6561)	fetch2.cc (11793:ef606668d247)
1/* 2 * Copyright (c) 2013-2014 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Andrew Bardsley 38 */ 39	1/* 2 * Copyright (c) 2013-2014 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Andrew Bardsley 38 */ 39
	40#include "cpu/minor/fetch2.hh" 41
40#include <string> 41 42#include "arch/decoder.hh" 43#include "arch/utility.hh"	42#include <string> 43 44#include "arch/decoder.hh" 45#include "arch/utility.hh"
44#include "cpu/minor/fetch2.hh"
45#include "cpu/minor/pipeline.hh" 46#include "cpu/pred/bpred_unit.hh" 47#include "debug/Branch.hh" 48#include "debug/Fetch.hh" 49#include "debug/MinorTrace.hh" 50 51namespace Minor 52{ 53 54Fetch2::Fetch2(const std::string &name, 55 MinorCPU &cpu_, 56 MinorCPUParams &params, 57 Latch<ForwardLineData>::Output inp_, 58 Latch<BranchData>::Output branchInp_, 59 Latch<BranchData>::Input predictionOut_, 60 Latch<ForwardInstData>::Input out_, 61 std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer) : 62 Named(name), 63 cpu(cpu_), 64 inp(inp_), 65 branchInp(branchInp_), 66 predictionOut(predictionOut_), 67 out(out_), 68 nextStageReserve(next_stage_input_buffer), 69 outputWidth(params.decodeInputWidth), 70 processMoreThanOneInput(params.fetch2CycleInput), 71 branchPredictor(params.branchPred), 72 fetchInfo(params.numThreads), 73 threadPriority(0) 74{ 75 if (outputWidth < 1) 76 fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name, outputWidth); 77 78 if (params.fetch2InputBufferSize < 1) { 79 fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name, 80 params.fetch2InputBufferSize); 81 } 82 83 / Per-thread input buffers / 84 for (ThreadID tid = 0; tid < params.numThreads; tid++) { 85 inputBuffer.push_back( 86 InputBuffer<ForwardLineData>( 87 name + ".inputBuffer" + std::to_string(tid), "lines", 88 params.fetch2InputBufferSize)); 89 } 90} 91 92const ForwardLineData 93Fetch2::getInput(ThreadID tid) 94{ 95 /* Get a line from the inputBuffer to work with / 96 if (!inputBuffer[tid].empty()) { 97 return &(inputBuffer[tid].front()); 98 } else { 99 return NULL; 100* } 101} 102 103void 104Fetch2::popInput(ThreadID tid) 105{ 106 if (!inputBuffer[tid].empty()) { 107 inputBuffer[tid].front().freeLine(); 108 inputBuffer[tid].pop(); 109 } 110 111 fetchInfo[tid].inputIndex = 0; 112} 113 114void 115Fetch2::dumpAllInput(ThreadID tid) 116{ 117 DPRINTF(Fetch, "Dumping whole input buffer\n"); 118 while (!inputBuffer[tid].empty()) 119 popInput(tid); 120 121 fetchInfo[tid].inputIndex = 0; 122} 123 124void 125Fetch2::updateBranchPrediction(const BranchData &branch) 126{ 127 MinorDynInstPtr inst = branch.inst; 128 129 /* Don't even consider instructions we didn't try to predict or faults / 130* if (inst->isFault() \|\| !inst->triedToPredict) 131 return; 132 133 switch (branch.reason) { 134 case BranchData::NoBranch: 135 /* No data to update / 136* break; 137 case BranchData::Interrupt: 138 /* Never try to predict interrupts / 139* break; 140 case BranchData::SuspendThread: 141 /* Don't need to act on suspends / 142* break; 143 case BranchData::HaltFetch: 144 /* Don't need to act on fetch wakeup / 145* break; 146 case BranchData::BranchPrediction: 147 /* Shouldn't happen. Fetch2 is the only source of 148 * BranchPredictions / 149* break; 150 case BranchData::UnpredictedBranch: 151 /* Unpredicted branch or barrier / 152* DPRINTF(Branch, "Unpredicted branch seen inst: %s\n", inst); 153* branchPredictor.squash(inst->id.fetchSeqNum, 154 branch.target, true, inst->id.threadId); 155 // Update after squashing to accomodate O3CPU 156 // using the branch prediction code. 157 branchPredictor.update(inst->id.fetchSeqNum, 158 inst->id.threadId); 159 break; 160 case BranchData::CorrectlyPredictedBranch: 161 /* Predicted taken, was taken / 162* DPRINTF(Branch, "Branch predicted correctly inst: %s\n", inst); 163* branchPredictor.update(inst->id.fetchSeqNum, 164 inst->id.threadId); 165 break; 166 case BranchData::BadlyPredictedBranch: 167 /* Predicted taken, not taken / 168* DPRINTF(Branch, "Branch mis-predicted inst: %s\n", inst); 169* branchPredictor.squash(inst->id.fetchSeqNum, 170 branch.target /* Not used /, false, inst->id.threadId); 171* // Update after squashing to accomodate O3CPU 172 // using the branch prediction code. 173 branchPredictor.update(inst->id.fetchSeqNum, 174 inst->id.threadId); 175 break; 176 case BranchData::BadlyPredictedBranchTarget: 177 /* Predicted taken, was taken but to a different target / 178* DPRINTF(Branch, "Branch mis-predicted target inst: %s target: %s\n", 179 inst, branch.target); 180* branchPredictor.squash(inst->id.fetchSeqNum, 181 branch.target, true, inst->id.threadId); 182 break; 183 } 184} 185 186void 187Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch) 188{ 189 Fetch2ThreadInfo &thread = fetchInfo[inst->id.threadId]; 190 TheISA::PCState inst_pc = inst->pc; 191 192 assert(!inst->predictedTaken); 193 194 /* Skip non-control/sys call instructions / 195* if (inst->staticInst->isControl() \|\| 196 inst->staticInst->isSyscall()) 197 { 198 /* Tried to predict / 199* inst->triedToPredict = true; 200 201 DPRINTF(Branch, "Trying to predict for inst: %s\n", inst); 202* 203 if (branchPredictor.predict(inst->staticInst, 204 inst->id.fetchSeqNum, inst_pc, 205 inst->id.threadId)) 206 { 207 inst->predictedTaken = true; 208 inst->predictedTarget = inst_pc; 209 branch.target = inst_pc; 210 } 211 } else { 212 DPRINTF(Branch, "Not attempting prediction for inst: %s\n", inst); 213* } 214 215 /* If we predict taken, set branch and update sequence numbers / 216* if (inst->predictedTaken) { 217 /* Update the predictionSeqNum and remember the streamSeqNum that it 218 * was associated with / 219* thread.expectedStreamSeqNum = inst->id.streamSeqNum; 220 221 BranchData new_branch = BranchData(BranchData::BranchPrediction, 222 inst->id.threadId, 223 inst->id.streamSeqNum, thread.predictionSeqNum + 1, 224 inst->predictedTarget, inst); 225 226 /* Mark with a new prediction number by the stream number of the 227 * instruction causing the prediction / 228* thread.predictionSeqNum++; 229 branch = new_branch; 230 231 DPRINTF(Branch, "Branch predicted taken inst: %s target: %s" 232 " new predictionSeqNum: %d\n", 233 inst, inst->predictedTarget, thread.predictionSeqNum); 234* } 235} 236 237void 238Fetch2::evaluate() 239{ 240 /* Push input onto appropriate input buffer / 241* if (!inp.outputWire->isBubble()) 242 inputBuffer[inp.outputWire->id.threadId].setTail(inp.outputWire); 243* 244 ForwardInstData &insts_out = out.inputWire; 245* BranchData prediction; 246 BranchData &branch_inp = branchInp.outputWire; 247* 248 assert(insts_out.isBubble()); 249 250 /* React to branches from Execute to update local branch prediction 251 * structures / 252* updateBranchPrediction(branch_inp); 253 254 /* If a branch arrives, don't try and do anything about it. Only 255 * react to your own predictions / 256* if (branch_inp.isStreamChange()) { 257 DPRINTF(Fetch, "Dumping all input as a stream changing branch" 258 " has arrived\n"); 259 dumpAllInput(branch_inp.threadId); 260 fetchInfo[branch_inp.threadId].havePC = false; 261 } 262 263 assert(insts_out.isBubble()); 264 /* Even when blocked, clear out input lines with the wrong 265 * prediction sequence number / 266* for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 267 Fetch2ThreadInfo &thread = fetchInfo[tid]; 268 269 thread.blocked = !nextStageReserve[tid].canReserve(); 270 271 const ForwardLineData line_in = getInput(tid); 272* 273 while (line_in && 274 thread.expectedStreamSeqNum == line_in->id.streamSeqNum && 275 thread.predictionSeqNum != line_in->id.predictionSeqNum) 276 { 277 DPRINTF(Fetch, "Discarding line %s" 278 " due to predictionSeqNum mismatch (expected: %d)\n", 279 line_in->id, thread.predictionSeqNum); 280 281 popInput(tid); 282 fetchInfo[tid].havePC = false; 283 284 if (processMoreThanOneInput) { 285 DPRINTF(Fetch, "Wrapping\n"); 286 line_in = getInput(tid); 287 } else { 288 line_in = NULL; 289 } 290 } 291 } 292 293 ThreadID tid = getScheduledThread(); 294 DPRINTF(Fetch, "Scheduled Thread: %d\n", tid); 295 296 assert(insts_out.isBubble()); 297 if (tid != InvalidThreadID) { 298 Fetch2ThreadInfo &fetch_info = fetchInfo[tid]; 299 300 const ForwardLineData line_in = getInput(tid); 301* 302 unsigned int output_index = 0; 303 304 /* Pack instructions into the output while we can. This may involve 305 * using more than one input line. Note that lineWidth will be 0 306 * for faulting lines / 307* while (line_in && 308 (line_in->isFault() \|\| 309 fetch_info.inputIndex < line_in->lineWidth) && /* More input / 310* output_index < outputWidth && /* More output to fill / 311* prediction.isBubble() /* No predicted branch /) 312* { 313 ThreadContext thread = cpu.getContext(line_in->id.threadId); 314* TheISA::Decoder decoder = thread->getDecoderPtr(); 315* 316 /* Discard line due to prediction sequence number being wrong but 317 * without the streamSeqNum number having changed / 318* bool discard_line = 319 fetch_info.expectedStreamSeqNum == line_in->id.streamSeqNum && 320 fetch_info.predictionSeqNum != line_in->id.predictionSeqNum; 321 322 /* Set the PC if the stream changes. Setting havePC to false in 323 * a previous cycle handles all other change of flow of control 324 * issues / 325* bool set_pc = fetch_info.lastStreamSeqNum != line_in->id.streamSeqNum; 326 327 if (!discard_line && (!fetch_info.havePC \|\| set_pc)) { 328 /* Set the inputIndex to be the MachInst-aligned offset 329 * from lineBaseAddr of the new PC value / 330* fetch_info.inputIndex = 331 (line_in->pc.instAddr() & BaseCPU::PCMask) - 332 line_in->lineBaseAddr; 333 DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x" 334 " lineBaseAddr: 0x%x lineWidth: 0x%x\n", 335 line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr, 336 line_in->lineWidth); 337 fetch_info.pc = line_in->pc; 338 fetch_info.havePC = true; 339 decoder->reset(); 340 } 341 342 /* The generated instruction. Leave as NULL if no instruction 343 * is to be packed into the output / 344* MinorDynInstPtr dyn_inst = NULL; 345 346 if (discard_line) { 347 /* Rest of line was from an older prediction in the same 348 * stream / 349* DPRINTF(Fetch, "Discarding line %s (from inputIndex: %d)" 350 " due to predictionSeqNum mismatch (expected: %d)\n", 351 line_in->id, fetch_info.inputIndex, 352 fetch_info.predictionSeqNum); 353 } else if (line_in->isFault()) { 354 /* Pack a fault as a MinorDynInst with ->fault set / 355* 356 /* Make a new instruction and pick up the line, stream, 357 * prediction, thread ids from the incoming line / 358* dyn_inst = new MinorDynInst(line_in->id); 359 360 /* Fetch and prediction sequence numbers originate here / 361* dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum; 362 dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum; 363 /* To complete the set, test that exec sequence number has 364 * not been set / 365* assert(dyn_inst->id.execSeqNum == 0); 366 367 dyn_inst->pc = fetch_info.pc; 368 369 /* Pack a faulting instruction but allow other 370 * instructions to be generated. (Fetch2 makes no 371 * immediate judgement about streamSeqNum) / 372* dyn_inst->fault = line_in->fault; 373 DPRINTF(Fetch, "Fault being passed output_index: " 374 "%d: %s\n", output_index, dyn_inst->fault->name()); 375 } else { 376 uint8_t line = line_in->line; 377* 378 TheISA::MachInst inst_word; 379 /* The instruction is wholly in the line, can just 380 * assign / 381* inst_word = TheISA::gtoh( 382 (reinterpret_cast<TheISA::MachInst > 383 (line + fetch_info.inputIndex))); 384 385 if (!decoder->instReady()) { 386 decoder->moreBytes(fetch_info.pc, 387 line_in->lineBaseAddr + fetch_info.inputIndex, 388 inst_word); 389 DPRINTF(Fetch, "Offering MachInst to decoder addr: 0x%x\n", 390 line_in->lineBaseAddr + fetch_info.inputIndex); 391 } 392 393 /* Maybe make the above a loop to accomodate ISAs with 394 * instructions longer than sizeof(MachInst) / 395* 396 if (decoder->instReady()) { 397 /* Make a new instruction and pick up the line, stream, 398 * prediction, thread ids from the incoming line / 399* dyn_inst = new MinorDynInst(line_in->id); 400 401 /* Fetch and prediction sequence numbers originate here / 402* dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum; 403 dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum; 404 /* To complete the set, test that exec sequence number 405 * has not been set / 406* assert(dyn_inst->id.execSeqNum == 0); 407 408 /* Note that the decoder can update the given PC. 409 * Remember not to assign it until after calling 410 * decode / 411* StaticInstPtr decoded_inst = decoder->decode(fetch_info.pc); 412 dyn_inst->staticInst = decoded_inst; 413 414 dyn_inst->pc = fetch_info.pc; 415 DPRINTF(Fetch, "decoder inst %s\n", dyn_inst); 416* 417 418 DPRINTF(Fetch, "Instruction extracted from line %s" 419 " lineWidth: %d output_index: %d inputIndex: %d" 420 " pc: %s inst: %s\n", 421 line_in->id, 422 line_in->lineWidth, output_index, fetch_info.inputIndex, 423 fetch_info.pc, dyn_inst); 424* 425#if THE_ISA == X86_ISA \|\| THE_ISA == ARM_ISA 426 /* In SE mode, it's possible to branch to a microop when 427 * replaying faults such as page faults (or simply 428 * intra-microcode branches in X86). Unfortunately, 429 * as Minor has micro-op decomposition in a separate 430 * pipeline stage from instruction decomposition, the 431 * following advancePC (which may follow a branch with 432 * microPC() != 0) must see a fresh macroop. This 433 * kludge should be improved with an addition to PCState 434 * but I offer it in this form for the moment 435 * 436 * X86 can branch within microops so we need to deal with 437 * the case that, after a branch, the first un-advanced PC 438 * may be pointing to a microop other than 0. Once 439 * advanced, however, the microop number must be 0 / 440* fetch_info.pc.upc(0); 441 fetch_info.pc.nupc(1); 442#endif 443 444 /* Advance PC for the next instruction / 445* TheISA::advancePC(fetch_info.pc, decoded_inst); 446 447 /* Predict any branches and issue a branch if 448 * necessary / 449* predictBranch(dyn_inst, prediction); 450 } else { 451 DPRINTF(Fetch, "Inst not ready yet\n"); 452 } 453 454 /* Step on the pointer into the line if there's no 455 * complete instruction waiting / 456* if (decoder->needMoreBytes()) { 457 fetch_info.inputIndex += sizeof(TheISA::MachInst); 458 459 DPRINTF(Fetch, "Updated inputIndex value PC: %s" 460 " inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n", 461 line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr, 462 line_in->lineWidth); 463 } 464 } 465 466 if (dyn_inst) { 467 /* Step to next sequence number / 468* fetch_info.fetchSeqNum++; 469 470 /* Correctly size the output before writing / 471* if (output_index == 0) { 472 insts_out.resize(outputWidth); 473 } 474 /* Pack the generated dynamic instruction into the output / 475* insts_out.insts[output_index] = dyn_inst; 476 output_index++; 477 478 /* Output MinorTrace instruction info for 479 * pre-microop decomposition macroops / 480* if (DTRACE(MinorTrace) && !dyn_inst->isFault() && 481 dyn_inst->staticInst->isMacroop()) 482 { 483 dyn_inst->minorTraceInst(this); 484* } 485 } 486 487 /* Remember the streamSeqNum of this line so we can tell when 488 * we change stream / 489* fetch_info.lastStreamSeqNum = line_in->id.streamSeqNum; 490 491 /* Asked to discard line or there was a branch or fault / 492* if (!prediction.isBubble() \|\| /* The remains of a 493 line with a prediction in it / 494* line_in->isFault() /* A line which is just a fault /) 495* { 496 DPRINTF(Fetch, "Discarding all input on branch/fault\n"); 497 dumpAllInput(tid); 498 fetch_info.havePC = false; 499 line_in = NULL; 500 } else if (discard_line) { 501 /* Just discard one line, one's behind it may have new 502 * stream sequence numbers. There's a DPRINTF above 503 * for this event / 504* popInput(tid); 505 fetch_info.havePC = false; 506 line_in = NULL; 507 } else if (fetch_info.inputIndex == line_in->lineWidth) { 508 /* Got to end of a line, pop the line but keep PC 509 * in case this is a line-wrapping inst. / 510* popInput(tid); 511 line_in = NULL; 512 } 513 514 if (!line_in && processMoreThanOneInput) { 515 DPRINTF(Fetch, "Wrapping\n"); 516 line_in = getInput(tid); 517 } 518 } 519 520 /* The rest of the output (if any) should already have been packed 521 * with bubble instructions by insts_out's initialisation / 522* } 523 if (tid == InvalidThreadID) { 524 assert(insts_out.isBubble()); 525 } 526 /** Reserve a slot in the next stage and output data / 527* predictionOut.inputWire = prediction; 528* 529 /* If we generated output, reserve space for the result in the next stage 530 * and mark the stage as being active this cycle / 531* if (!insts_out.isBubble()) { 532 /* Note activity of following buffer / 533* cpu.activityRecorder->activity(); 534 insts_out.threadId = tid; 535 nextStageReserve[tid].reserve(); 536 } 537 538 /* If we still have input to process and somewhere to put it, 539 * mark stage as active / 540* for (ThreadID i = 0; i < cpu.numThreads; i++) 541 { 542 if (getInput(i) && nextStageReserve[i].canReserve()) { 543 cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId); 544 break; 545 } 546 } 547 548 /* Make sure the input (if any left) is pushed / 549* if (!inp.outputWire->isBubble()) 550 inputBuffer[inp.outputWire->id.threadId].pushTail(); 551} 552 553inline ThreadID 554Fetch2::getScheduledThread() 555{ 556 /* Select thread via policy. / 557* std::vector<ThreadID> priority_list; 558 559 switch (cpu.threadPolicy) { 560 case Enums::SingleThreaded: 561 priority_list.push_back(0); 562 break; 563 case Enums::RoundRobin: 564 priority_list = cpu.roundRobinPriority(threadPriority); 565 break; 566 case Enums::Random: 567 priority_list = cpu.randomPriority(); 568 break; 569 default: 570 panic("Unknown fetch policy"); 571 } 572 573 for (auto tid : priority_list) { 574 if (getInput(tid) && !fetchInfo[tid].blocked) { 575 threadPriority = tid; 576 return tid; 577 } 578 } 579 580 return InvalidThreadID; 581} 582 583bool 584Fetch2::isDrained() 585{ 586 for (const auto &buffer : inputBuffer) { 587 if (!buffer.empty()) 588 return false; 589 } 590 591 return (inp.outputWire).isBubble() && 592* (predictionOut.inputWire).isBubble(); 593} 594* 595void 596Fetch2::minorTrace() const 597{ 598 std::ostringstream data; 599 600 if (fetchInfo[0].blocked) 601 data << 'B'; 602 else 603 (out.inputWire).reportData(data); 604* 605 MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n", 606 fetchInfo[0].inputIndex, fetchInfo[0].havePC, fetchInfo[0].predictionSeqNum, data.str()); 607 inputBuffer[0].minorTrace(); 608} 609 610}	46#include "cpu/minor/pipeline.hh" 47#include "cpu/pred/bpred_unit.hh" 48#include "debug/Branch.hh" 49#include "debug/Fetch.hh" 50#include "debug/MinorTrace.hh" 51 52namespace Minor 53{ 54 55Fetch2::Fetch2(const std::string &name, 56 MinorCPU &cpu_, 57 MinorCPUParams &params, 58 Latch<ForwardLineData>::Output inp_, 59 Latch<BranchData>::Output branchInp_, 60 Latch<BranchData>::Input predictionOut_, 61 Latch<ForwardInstData>::Input out_, 62 std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer) : 63 Named(name), 64 cpu(cpu_), 65 inp(inp_), 66 branchInp(branchInp_), 67 predictionOut(predictionOut_), 68 out(out_), 69 nextStageReserve(next_stage_input_buffer), 70 outputWidth(params.decodeInputWidth), 71 processMoreThanOneInput(params.fetch2CycleInput), 72 branchPredictor(params.branchPred), 73 fetchInfo(params.numThreads), 74 threadPriority(0) 75{ 76 if (outputWidth < 1) 77 fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name, outputWidth); 78 79 if (params.fetch2InputBufferSize < 1) { 80 fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name, 81 params.fetch2InputBufferSize); 82 } 83 84 / Per-thread input buffers / 85 for (ThreadID tid = 0; tid < params.numThreads; tid++) { 86 inputBuffer.push_back( 87 InputBuffer<ForwardLineData>( 88 name + ".inputBuffer" + std::to_string(tid), "lines", 89 params.fetch2InputBufferSize)); 90 } 91} 92 93const ForwardLineData 94Fetch2::getInput(ThreadID tid) 95{ 96 /* Get a line from the inputBuffer to work with / 97 if (!inputBuffer[tid].empty()) { 98 return &(inputBuffer[tid].front()); 99 } else { 100* return NULL; 101 } 102} 103 104void 105Fetch2::popInput(ThreadID tid) 106{ 107 if (!inputBuffer[tid].empty()) { 108 inputBuffer[tid].front().freeLine(); 109 inputBuffer[tid].pop(); 110 } 111 112 fetchInfo[tid].inputIndex = 0; 113} 114 115void 116Fetch2::dumpAllInput(ThreadID tid) 117{ 118 DPRINTF(Fetch, "Dumping whole input buffer\n"); 119 while (!inputBuffer[tid].empty()) 120 popInput(tid); 121 122 fetchInfo[tid].inputIndex = 0; 123} 124 125void 126Fetch2::updateBranchPrediction(const BranchData &branch) 127{ 128 MinorDynInstPtr inst = branch.inst; 129 130 /* Don't even consider instructions we didn't try to predict or faults / 131* if (inst->isFault() \|\| !inst->triedToPredict) 132 return; 133 134 switch (branch.reason) { 135 case BranchData::NoBranch: 136 /* No data to update / 137* break; 138 case BranchData::Interrupt: 139 /* Never try to predict interrupts / 140* break; 141 case BranchData::SuspendThread: 142 /* Don't need to act on suspends / 143* break; 144 case BranchData::HaltFetch: 145 /* Don't need to act on fetch wakeup / 146* break; 147 case BranchData::BranchPrediction: 148 /* Shouldn't happen. Fetch2 is the only source of 149 * BranchPredictions / 150* break; 151 case BranchData::UnpredictedBranch: 152 /* Unpredicted branch or barrier / 153* DPRINTF(Branch, "Unpredicted branch seen inst: %s\n", inst); 154* branchPredictor.squash(inst->id.fetchSeqNum, 155 branch.target, true, inst->id.threadId); 156 // Update after squashing to accomodate O3CPU 157 // using the branch prediction code. 158 branchPredictor.update(inst->id.fetchSeqNum, 159 inst->id.threadId); 160 break; 161 case BranchData::CorrectlyPredictedBranch: 162 /* Predicted taken, was taken / 163* DPRINTF(Branch, "Branch predicted correctly inst: %s\n", inst); 164* branchPredictor.update(inst->id.fetchSeqNum, 165 inst->id.threadId); 166 break; 167 case BranchData::BadlyPredictedBranch: 168 /* Predicted taken, not taken / 169* DPRINTF(Branch, "Branch mis-predicted inst: %s\n", inst); 170* branchPredictor.squash(inst->id.fetchSeqNum, 171 branch.target /* Not used /, false, inst->id.threadId); 172* // Update after squashing to accomodate O3CPU 173 // using the branch prediction code. 174 branchPredictor.update(inst->id.fetchSeqNum, 175 inst->id.threadId); 176 break; 177 case BranchData::BadlyPredictedBranchTarget: 178 /* Predicted taken, was taken but to a different target / 179* DPRINTF(Branch, "Branch mis-predicted target inst: %s target: %s\n", 180 inst, branch.target); 181* branchPredictor.squash(inst->id.fetchSeqNum, 182 branch.target, true, inst->id.threadId); 183 break; 184 } 185} 186 187void 188Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch) 189{ 190 Fetch2ThreadInfo &thread = fetchInfo[inst->id.threadId]; 191 TheISA::PCState inst_pc = inst->pc; 192 193 assert(!inst->predictedTaken); 194 195 /* Skip non-control/sys call instructions / 196* if (inst->staticInst->isControl() \|\| 197 inst->staticInst->isSyscall()) 198 { 199 /* Tried to predict / 200* inst->triedToPredict = true; 201 202 DPRINTF(Branch, "Trying to predict for inst: %s\n", inst); 203* 204 if (branchPredictor.predict(inst->staticInst, 205 inst->id.fetchSeqNum, inst_pc, 206 inst->id.threadId)) 207 { 208 inst->predictedTaken = true; 209 inst->predictedTarget = inst_pc; 210 branch.target = inst_pc; 211 } 212 } else { 213 DPRINTF(Branch, "Not attempting prediction for inst: %s\n", inst); 214* } 215 216 /* If we predict taken, set branch and update sequence numbers / 217* if (inst->predictedTaken) { 218 /* Update the predictionSeqNum and remember the streamSeqNum that it 219 * was associated with / 220* thread.expectedStreamSeqNum = inst->id.streamSeqNum; 221 222 BranchData new_branch = BranchData(BranchData::BranchPrediction, 223 inst->id.threadId, 224 inst->id.streamSeqNum, thread.predictionSeqNum + 1, 225 inst->predictedTarget, inst); 226 227 /* Mark with a new prediction number by the stream number of the 228 * instruction causing the prediction / 229* thread.predictionSeqNum++; 230 branch = new_branch; 231 232 DPRINTF(Branch, "Branch predicted taken inst: %s target: %s" 233 " new predictionSeqNum: %d\n", 234 inst, inst->predictedTarget, thread.predictionSeqNum); 235* } 236} 237 238void 239Fetch2::evaluate() 240{ 241 /* Push input onto appropriate input buffer / 242* if (!inp.outputWire->isBubble()) 243 inputBuffer[inp.outputWire->id.threadId].setTail(inp.outputWire); 244* 245 ForwardInstData &insts_out = out.inputWire; 246* BranchData prediction; 247 BranchData &branch_inp = branchInp.outputWire; 248* 249 assert(insts_out.isBubble()); 250 251 /* React to branches from Execute to update local branch prediction 252 * structures / 253* updateBranchPrediction(branch_inp); 254 255 /* If a branch arrives, don't try and do anything about it. Only 256 * react to your own predictions / 257* if (branch_inp.isStreamChange()) { 258 DPRINTF(Fetch, "Dumping all input as a stream changing branch" 259 " has arrived\n"); 260 dumpAllInput(branch_inp.threadId); 261 fetchInfo[branch_inp.threadId].havePC = false; 262 } 263 264 assert(insts_out.isBubble()); 265 /* Even when blocked, clear out input lines with the wrong 266 * prediction sequence number / 267* for (ThreadID tid = 0; tid < cpu.numThreads; tid++) { 268 Fetch2ThreadInfo &thread = fetchInfo[tid]; 269 270 thread.blocked = !nextStageReserve[tid].canReserve(); 271 272 const ForwardLineData line_in = getInput(tid); 273* 274 while (line_in && 275 thread.expectedStreamSeqNum == line_in->id.streamSeqNum && 276 thread.predictionSeqNum != line_in->id.predictionSeqNum) 277 { 278 DPRINTF(Fetch, "Discarding line %s" 279 " due to predictionSeqNum mismatch (expected: %d)\n", 280 line_in->id, thread.predictionSeqNum); 281 282 popInput(tid); 283 fetchInfo[tid].havePC = false; 284 285 if (processMoreThanOneInput) { 286 DPRINTF(Fetch, "Wrapping\n"); 287 line_in = getInput(tid); 288 } else { 289 line_in = NULL; 290 } 291 } 292 } 293 294 ThreadID tid = getScheduledThread(); 295 DPRINTF(Fetch, "Scheduled Thread: %d\n", tid); 296 297 assert(insts_out.isBubble()); 298 if (tid != InvalidThreadID) { 299 Fetch2ThreadInfo &fetch_info = fetchInfo[tid]; 300 301 const ForwardLineData line_in = getInput(tid); 302* 303 unsigned int output_index = 0; 304 305 /* Pack instructions into the output while we can. This may involve 306 * using more than one input line. Note that lineWidth will be 0 307 * for faulting lines / 308* while (line_in && 309 (line_in->isFault() \|\| 310 fetch_info.inputIndex < line_in->lineWidth) && /* More input / 311* output_index < outputWidth && /* More output to fill / 312* prediction.isBubble() /* No predicted branch /) 313* { 314 ThreadContext thread = cpu.getContext(line_in->id.threadId); 315* TheISA::Decoder decoder = thread->getDecoderPtr(); 316* 317 /* Discard line due to prediction sequence number being wrong but 318 * without the streamSeqNum number having changed / 319* bool discard_line = 320 fetch_info.expectedStreamSeqNum == line_in->id.streamSeqNum && 321 fetch_info.predictionSeqNum != line_in->id.predictionSeqNum; 322 323 /* Set the PC if the stream changes. Setting havePC to false in 324 * a previous cycle handles all other change of flow of control 325 * issues / 326* bool set_pc = fetch_info.lastStreamSeqNum != line_in->id.streamSeqNum; 327 328 if (!discard_line && (!fetch_info.havePC \|\| set_pc)) { 329 /* Set the inputIndex to be the MachInst-aligned offset 330 * from lineBaseAddr of the new PC value / 331* fetch_info.inputIndex = 332 (line_in->pc.instAddr() & BaseCPU::PCMask) - 333 line_in->lineBaseAddr; 334 DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x" 335 " lineBaseAddr: 0x%x lineWidth: 0x%x\n", 336 line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr, 337 line_in->lineWidth); 338 fetch_info.pc = line_in->pc; 339 fetch_info.havePC = true; 340 decoder->reset(); 341 } 342 343 /* The generated instruction. Leave as NULL if no instruction 344 * is to be packed into the output / 345* MinorDynInstPtr dyn_inst = NULL; 346 347 if (discard_line) { 348 /* Rest of line was from an older prediction in the same 349 * stream / 350* DPRINTF(Fetch, "Discarding line %s (from inputIndex: %d)" 351 " due to predictionSeqNum mismatch (expected: %d)\n", 352 line_in->id, fetch_info.inputIndex, 353 fetch_info.predictionSeqNum); 354 } else if (line_in->isFault()) { 355 /* Pack a fault as a MinorDynInst with ->fault set / 356* 357 /* Make a new instruction and pick up the line, stream, 358 * prediction, thread ids from the incoming line / 359* dyn_inst = new MinorDynInst(line_in->id); 360 361 /* Fetch and prediction sequence numbers originate here / 362* dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum; 363 dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum; 364 /* To complete the set, test that exec sequence number has 365 * not been set / 366* assert(dyn_inst->id.execSeqNum == 0); 367 368 dyn_inst->pc = fetch_info.pc; 369 370 /* Pack a faulting instruction but allow other 371 * instructions to be generated. (Fetch2 makes no 372 * immediate judgement about streamSeqNum) / 373* dyn_inst->fault = line_in->fault; 374 DPRINTF(Fetch, "Fault being passed output_index: " 375 "%d: %s\n", output_index, dyn_inst->fault->name()); 376 } else { 377 uint8_t line = line_in->line; 378* 379 TheISA::MachInst inst_word; 380 /* The instruction is wholly in the line, can just 381 * assign / 382* inst_word = TheISA::gtoh( 383 (reinterpret_cast<TheISA::MachInst > 384 (line + fetch_info.inputIndex))); 385 386 if (!decoder->instReady()) { 387 decoder->moreBytes(fetch_info.pc, 388 line_in->lineBaseAddr + fetch_info.inputIndex, 389 inst_word); 390 DPRINTF(Fetch, "Offering MachInst to decoder addr: 0x%x\n", 391 line_in->lineBaseAddr + fetch_info.inputIndex); 392 } 393 394 /* Maybe make the above a loop to accomodate ISAs with 395 * instructions longer than sizeof(MachInst) / 396* 397 if (decoder->instReady()) { 398 /* Make a new instruction and pick up the line, stream, 399 * prediction, thread ids from the incoming line / 400* dyn_inst = new MinorDynInst(line_in->id); 401 402 /* Fetch and prediction sequence numbers originate here / 403* dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum; 404 dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum; 405 /* To complete the set, test that exec sequence number 406 * has not been set / 407* assert(dyn_inst->id.execSeqNum == 0); 408 409 /* Note that the decoder can update the given PC. 410 * Remember not to assign it until after calling 411 * decode / 412* StaticInstPtr decoded_inst = decoder->decode(fetch_info.pc); 413 dyn_inst->staticInst = decoded_inst; 414 415 dyn_inst->pc = fetch_info.pc; 416 DPRINTF(Fetch, "decoder inst %s\n", dyn_inst); 417* 418 419 DPRINTF(Fetch, "Instruction extracted from line %s" 420 " lineWidth: %d output_index: %d inputIndex: %d" 421 " pc: %s inst: %s\n", 422 line_in->id, 423 line_in->lineWidth, output_index, fetch_info.inputIndex, 424 fetch_info.pc, dyn_inst); 425* 426#if THE_ISA == X86_ISA \|\| THE_ISA == ARM_ISA 427 /* In SE mode, it's possible to branch to a microop when 428 * replaying faults such as page faults (or simply 429 * intra-microcode branches in X86). Unfortunately, 430 * as Minor has micro-op decomposition in a separate 431 * pipeline stage from instruction decomposition, the 432 * following advancePC (which may follow a branch with 433 * microPC() != 0) must see a fresh macroop. This 434 * kludge should be improved with an addition to PCState 435 * but I offer it in this form for the moment 436 * 437 * X86 can branch within microops so we need to deal with 438 * the case that, after a branch, the first un-advanced PC 439 * may be pointing to a microop other than 0. Once 440 * advanced, however, the microop number must be 0 / 441* fetch_info.pc.upc(0); 442 fetch_info.pc.nupc(1); 443#endif 444 445 /* Advance PC for the next instruction / 446* TheISA::advancePC(fetch_info.pc, decoded_inst); 447 448 /* Predict any branches and issue a branch if 449 * necessary / 450* predictBranch(dyn_inst, prediction); 451 } else { 452 DPRINTF(Fetch, "Inst not ready yet\n"); 453 } 454 455 /* Step on the pointer into the line if there's no 456 * complete instruction waiting / 457* if (decoder->needMoreBytes()) { 458 fetch_info.inputIndex += sizeof(TheISA::MachInst); 459 460 DPRINTF(Fetch, "Updated inputIndex value PC: %s" 461 " inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n", 462 line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr, 463 line_in->lineWidth); 464 } 465 } 466 467 if (dyn_inst) { 468 /* Step to next sequence number / 469* fetch_info.fetchSeqNum++; 470 471 /* Correctly size the output before writing / 472* if (output_index == 0) { 473 insts_out.resize(outputWidth); 474 } 475 /* Pack the generated dynamic instruction into the output / 476* insts_out.insts[output_index] = dyn_inst; 477 output_index++; 478 479 /* Output MinorTrace instruction info for 480 * pre-microop decomposition macroops / 481* if (DTRACE(MinorTrace) && !dyn_inst->isFault() && 482 dyn_inst->staticInst->isMacroop()) 483 { 484 dyn_inst->minorTraceInst(this); 485* } 486 } 487 488 /* Remember the streamSeqNum of this line so we can tell when 489 * we change stream / 490* fetch_info.lastStreamSeqNum = line_in->id.streamSeqNum; 491 492 /* Asked to discard line or there was a branch or fault / 493* if (!prediction.isBubble() \|\| /* The remains of a 494 line with a prediction in it / 495* line_in->isFault() /* A line which is just a fault /) 496* { 497 DPRINTF(Fetch, "Discarding all input on branch/fault\n"); 498 dumpAllInput(tid); 499 fetch_info.havePC = false; 500 line_in = NULL; 501 } else if (discard_line) { 502 /* Just discard one line, one's behind it may have new 503 * stream sequence numbers. There's a DPRINTF above 504 * for this event / 505* popInput(tid); 506 fetch_info.havePC = false; 507 line_in = NULL; 508 } else if (fetch_info.inputIndex == line_in->lineWidth) { 509 /* Got to end of a line, pop the line but keep PC 510 * in case this is a line-wrapping inst. / 511* popInput(tid); 512 line_in = NULL; 513 } 514 515 if (!line_in && processMoreThanOneInput) { 516 DPRINTF(Fetch, "Wrapping\n"); 517 line_in = getInput(tid); 518 } 519 } 520 521 /* The rest of the output (if any) should already have been packed 522 * with bubble instructions by insts_out's initialisation / 523* } 524 if (tid == InvalidThreadID) { 525 assert(insts_out.isBubble()); 526 } 527 /** Reserve a slot in the next stage and output data / 528* predictionOut.inputWire = prediction; 529* 530 /* If we generated output, reserve space for the result in the next stage 531 * and mark the stage as being active this cycle / 532* if (!insts_out.isBubble()) { 533 /* Note activity of following buffer / 534* cpu.activityRecorder->activity(); 535 insts_out.threadId = tid; 536 nextStageReserve[tid].reserve(); 537 } 538 539 /* If we still have input to process and somewhere to put it, 540 * mark stage as active / 541* for (ThreadID i = 0; i < cpu.numThreads; i++) 542 { 543 if (getInput(i) && nextStageReserve[i].canReserve()) { 544 cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId); 545 break; 546 } 547 } 548 549 /* Make sure the input (if any left) is pushed / 550* if (!inp.outputWire->isBubble()) 551 inputBuffer[inp.outputWire->id.threadId].pushTail(); 552} 553 554inline ThreadID 555Fetch2::getScheduledThread() 556{ 557 /* Select thread via policy. / 558* std::vector<ThreadID> priority_list; 559 560 switch (cpu.threadPolicy) { 561 case Enums::SingleThreaded: 562 priority_list.push_back(0); 563 break; 564 case Enums::RoundRobin: 565 priority_list = cpu.roundRobinPriority(threadPriority); 566 break; 567 case Enums::Random: 568 priority_list = cpu.randomPriority(); 569 break; 570 default: 571 panic("Unknown fetch policy"); 572 } 573 574 for (auto tid : priority_list) { 575 if (getInput(tid) && !fetchInfo[tid].blocked) { 576 threadPriority = tid; 577 return tid; 578 } 579 } 580 581 return InvalidThreadID; 582} 583 584bool 585Fetch2::isDrained() 586{ 587 for (const auto &buffer : inputBuffer) { 588 if (!buffer.empty()) 589 return false; 590 } 591 592 return (inp.outputWire).isBubble() && 593* (predictionOut.inputWire).isBubble(); 594} 595* 596void 597Fetch2::minorTrace() const 598{ 599 std::ostringstream data; 600 601 if (fetchInfo[0].blocked) 602 data << 'B'; 603 else 604 (out.inputWire).reportData(data); 605* 606 MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n", 607 fetchInfo[0].inputIndex, fetchInfo[0].havePC, fetchInfo[0].predictionSeqNum, data.str()); 608 inputBuffer[0].minorTrace(); 609} 610 611}