fetch2.cc (11783:f94c14fd6561) fetch2.cc (11793:ef606668d247)
1/*
2 * Copyright (c) 2013-2014 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andrew Bardsley
38 */
39
1/*
2 * Copyright (c) 2013-2014 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andrew Bardsley
38 */
39
40#include "cpu/minor/fetch2.hh"
41
40#include <string>
41
42#include "arch/decoder.hh"
43#include "arch/utility.hh"
42#include <string>
43
44#include "arch/decoder.hh"
45#include "arch/utility.hh"
44#include "cpu/minor/fetch2.hh"
45#include "cpu/minor/pipeline.hh"
46#include "cpu/pred/bpred_unit.hh"
47#include "debug/Branch.hh"
48#include "debug/Fetch.hh"
49#include "debug/MinorTrace.hh"
50
51namespace Minor
52{
53
54Fetch2::Fetch2(const std::string &name,
55 MinorCPU &cpu_,
56 MinorCPUParams &params,
57 Latch<ForwardLineData>::Output inp_,
58 Latch<BranchData>::Output branchInp_,
59 Latch<BranchData>::Input predictionOut_,
60 Latch<ForwardInstData>::Input out_,
61 std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer) :
62 Named(name),
63 cpu(cpu_),
64 inp(inp_),
65 branchInp(branchInp_),
66 predictionOut(predictionOut_),
67 out(out_),
68 nextStageReserve(next_stage_input_buffer),
69 outputWidth(params.decodeInputWidth),
70 processMoreThanOneInput(params.fetch2CycleInput),
71 branchPredictor(*params.branchPred),
72 fetchInfo(params.numThreads),
73 threadPriority(0)
74{
75 if (outputWidth < 1)
76 fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name, outputWidth);
77
78 if (params.fetch2InputBufferSize < 1) {
79 fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name,
80 params.fetch2InputBufferSize);
81 }
82
83 /* Per-thread input buffers */
84 for (ThreadID tid = 0; tid < params.numThreads; tid++) {
85 inputBuffer.push_back(
86 InputBuffer<ForwardLineData>(
87 name + ".inputBuffer" + std::to_string(tid), "lines",
88 params.fetch2InputBufferSize));
89 }
90}
91
92const ForwardLineData *
93Fetch2::getInput(ThreadID tid)
94{
95 /* Get a line from the inputBuffer to work with */
96 if (!inputBuffer[tid].empty()) {
97 return &(inputBuffer[tid].front());
98 } else {
99 return NULL;
100 }
101}
102
103void
104Fetch2::popInput(ThreadID tid)
105{
106 if (!inputBuffer[tid].empty()) {
107 inputBuffer[tid].front().freeLine();
108 inputBuffer[tid].pop();
109 }
110
111 fetchInfo[tid].inputIndex = 0;
112}
113
114void
115Fetch2::dumpAllInput(ThreadID tid)
116{
117 DPRINTF(Fetch, "Dumping whole input buffer\n");
118 while (!inputBuffer[tid].empty())
119 popInput(tid);
120
121 fetchInfo[tid].inputIndex = 0;
122}
123
124void
125Fetch2::updateBranchPrediction(const BranchData &branch)
126{
127 MinorDynInstPtr inst = branch.inst;
128
129 /* Don't even consider instructions we didn't try to predict or faults */
130 if (inst->isFault() || !inst->triedToPredict)
131 return;
132
133 switch (branch.reason) {
134 case BranchData::NoBranch:
135 /* No data to update */
136 break;
137 case BranchData::Interrupt:
138 /* Never try to predict interrupts */
139 break;
140 case BranchData::SuspendThread:
141 /* Don't need to act on suspends */
142 break;
143 case BranchData::HaltFetch:
144 /* Don't need to act on fetch wakeup */
145 break;
146 case BranchData::BranchPrediction:
147 /* Shouldn't happen. Fetch2 is the only source of
148 * BranchPredictions */
149 break;
150 case BranchData::UnpredictedBranch:
151 /* Unpredicted branch or barrier */
152 DPRINTF(Branch, "Unpredicted branch seen inst: %s\n", *inst);
153 branchPredictor.squash(inst->id.fetchSeqNum,
154 branch.target, true, inst->id.threadId);
155 // Update after squashing to accomodate O3CPU
156 // using the branch prediction code.
157 branchPredictor.update(inst->id.fetchSeqNum,
158 inst->id.threadId);
159 break;
160 case BranchData::CorrectlyPredictedBranch:
161 /* Predicted taken, was taken */
162 DPRINTF(Branch, "Branch predicted correctly inst: %s\n", *inst);
163 branchPredictor.update(inst->id.fetchSeqNum,
164 inst->id.threadId);
165 break;
166 case BranchData::BadlyPredictedBranch:
167 /* Predicted taken, not taken */
168 DPRINTF(Branch, "Branch mis-predicted inst: %s\n", *inst);
169 branchPredictor.squash(inst->id.fetchSeqNum,
170 branch.target /* Not used */, false, inst->id.threadId);
171 // Update after squashing to accomodate O3CPU
172 // using the branch prediction code.
173 branchPredictor.update(inst->id.fetchSeqNum,
174 inst->id.threadId);
175 break;
176 case BranchData::BadlyPredictedBranchTarget:
177 /* Predicted taken, was taken but to a different target */
178 DPRINTF(Branch, "Branch mis-predicted target inst: %s target: %s\n",
179 *inst, branch.target);
180 branchPredictor.squash(inst->id.fetchSeqNum,
181 branch.target, true, inst->id.threadId);
182 break;
183 }
184}
185
186void
187Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch)
188{
189 Fetch2ThreadInfo &thread = fetchInfo[inst->id.threadId];
190 TheISA::PCState inst_pc = inst->pc;
191
192 assert(!inst->predictedTaken);
193
194 /* Skip non-control/sys call instructions */
195 if (inst->staticInst->isControl() ||
196 inst->staticInst->isSyscall())
197 {
198 /* Tried to predict */
199 inst->triedToPredict = true;
200
201 DPRINTF(Branch, "Trying to predict for inst: %s\n", *inst);
202
203 if (branchPredictor.predict(inst->staticInst,
204 inst->id.fetchSeqNum, inst_pc,
205 inst->id.threadId))
206 {
207 inst->predictedTaken = true;
208 inst->predictedTarget = inst_pc;
209 branch.target = inst_pc;
210 }
211 } else {
212 DPRINTF(Branch, "Not attempting prediction for inst: %s\n", *inst);
213 }
214
215 /* If we predict taken, set branch and update sequence numbers */
216 if (inst->predictedTaken) {
217 /* Update the predictionSeqNum and remember the streamSeqNum that it
218 * was associated with */
219 thread.expectedStreamSeqNum = inst->id.streamSeqNum;
220
221 BranchData new_branch = BranchData(BranchData::BranchPrediction,
222 inst->id.threadId,
223 inst->id.streamSeqNum, thread.predictionSeqNum + 1,
224 inst->predictedTarget, inst);
225
226 /* Mark with a new prediction number by the stream number of the
227 * instruction causing the prediction */
228 thread.predictionSeqNum++;
229 branch = new_branch;
230
231 DPRINTF(Branch, "Branch predicted taken inst: %s target: %s"
232 " new predictionSeqNum: %d\n",
233 *inst, inst->predictedTarget, thread.predictionSeqNum);
234 }
235}
236
237void
238Fetch2::evaluate()
239{
240 /* Push input onto appropriate input buffer */
241 if (!inp.outputWire->isBubble())
242 inputBuffer[inp.outputWire->id.threadId].setTail(*inp.outputWire);
243
244 ForwardInstData &insts_out = *out.inputWire;
245 BranchData prediction;
246 BranchData &branch_inp = *branchInp.outputWire;
247
248 assert(insts_out.isBubble());
249
250 /* React to branches from Execute to update local branch prediction
251 * structures */
252 updateBranchPrediction(branch_inp);
253
254 /* If a branch arrives, don't try and do anything about it. Only
255 * react to your own predictions */
256 if (branch_inp.isStreamChange()) {
257 DPRINTF(Fetch, "Dumping all input as a stream changing branch"
258 " has arrived\n");
259 dumpAllInput(branch_inp.threadId);
260 fetchInfo[branch_inp.threadId].havePC = false;
261 }
262
263 assert(insts_out.isBubble());
264 /* Even when blocked, clear out input lines with the wrong
265 * prediction sequence number */
266 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
267 Fetch2ThreadInfo &thread = fetchInfo[tid];
268
269 thread.blocked = !nextStageReserve[tid].canReserve();
270
271 const ForwardLineData *line_in = getInput(tid);
272
273 while (line_in &&
274 thread.expectedStreamSeqNum == line_in->id.streamSeqNum &&
275 thread.predictionSeqNum != line_in->id.predictionSeqNum)
276 {
277 DPRINTF(Fetch, "Discarding line %s"
278 " due to predictionSeqNum mismatch (expected: %d)\n",
279 line_in->id, thread.predictionSeqNum);
280
281 popInput(tid);
282 fetchInfo[tid].havePC = false;
283
284 if (processMoreThanOneInput) {
285 DPRINTF(Fetch, "Wrapping\n");
286 line_in = getInput(tid);
287 } else {
288 line_in = NULL;
289 }
290 }
291 }
292
293 ThreadID tid = getScheduledThread();
294 DPRINTF(Fetch, "Scheduled Thread: %d\n", tid);
295
296 assert(insts_out.isBubble());
297 if (tid != InvalidThreadID) {
298 Fetch2ThreadInfo &fetch_info = fetchInfo[tid];
299
300 const ForwardLineData *line_in = getInput(tid);
301
302 unsigned int output_index = 0;
303
304 /* Pack instructions into the output while we can. This may involve
305 * using more than one input line. Note that lineWidth will be 0
306 * for faulting lines */
307 while (line_in &&
308 (line_in->isFault() ||
309 fetch_info.inputIndex < line_in->lineWidth) && /* More input */
310 output_index < outputWidth && /* More output to fill */
311 prediction.isBubble() /* No predicted branch */)
312 {
313 ThreadContext *thread = cpu.getContext(line_in->id.threadId);
314 TheISA::Decoder *decoder = thread->getDecoderPtr();
315
316 /* Discard line due to prediction sequence number being wrong but
317 * without the streamSeqNum number having changed */
318 bool discard_line =
319 fetch_info.expectedStreamSeqNum == line_in->id.streamSeqNum &&
320 fetch_info.predictionSeqNum != line_in->id.predictionSeqNum;
321
322 /* Set the PC if the stream changes. Setting havePC to false in
323 * a previous cycle handles all other change of flow of control
324 * issues */
325 bool set_pc = fetch_info.lastStreamSeqNum != line_in->id.streamSeqNum;
326
327 if (!discard_line && (!fetch_info.havePC || set_pc)) {
328 /* Set the inputIndex to be the MachInst-aligned offset
329 * from lineBaseAddr of the new PC value */
330 fetch_info.inputIndex =
331 (line_in->pc.instAddr() & BaseCPU::PCMask) -
332 line_in->lineBaseAddr;
333 DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x"
334 " lineBaseAddr: 0x%x lineWidth: 0x%x\n",
335 line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr,
336 line_in->lineWidth);
337 fetch_info.pc = line_in->pc;
338 fetch_info.havePC = true;
339 decoder->reset();
340 }
341
342 /* The generated instruction. Leave as NULL if no instruction
343 * is to be packed into the output */
344 MinorDynInstPtr dyn_inst = NULL;
345
346 if (discard_line) {
347 /* Rest of line was from an older prediction in the same
348 * stream */
349 DPRINTF(Fetch, "Discarding line %s (from inputIndex: %d)"
350 " due to predictionSeqNum mismatch (expected: %d)\n",
351 line_in->id, fetch_info.inputIndex,
352 fetch_info.predictionSeqNum);
353 } else if (line_in->isFault()) {
354 /* Pack a fault as a MinorDynInst with ->fault set */
355
356 /* Make a new instruction and pick up the line, stream,
357 * prediction, thread ids from the incoming line */
358 dyn_inst = new MinorDynInst(line_in->id);
359
360 /* Fetch and prediction sequence numbers originate here */
361 dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum;
362 dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum;
363 /* To complete the set, test that exec sequence number has
364 * not been set */
365 assert(dyn_inst->id.execSeqNum == 0);
366
367 dyn_inst->pc = fetch_info.pc;
368
369 /* Pack a faulting instruction but allow other
370 * instructions to be generated. (Fetch2 makes no
371 * immediate judgement about streamSeqNum) */
372 dyn_inst->fault = line_in->fault;
373 DPRINTF(Fetch, "Fault being passed output_index: "
374 "%d: %s\n", output_index, dyn_inst->fault->name());
375 } else {
376 uint8_t *line = line_in->line;
377
378 TheISA::MachInst inst_word;
379 /* The instruction is wholly in the line, can just
380 * assign */
381 inst_word = TheISA::gtoh(
382 *(reinterpret_cast<TheISA::MachInst *>
383 (line + fetch_info.inputIndex)));
384
385 if (!decoder->instReady()) {
386 decoder->moreBytes(fetch_info.pc,
387 line_in->lineBaseAddr + fetch_info.inputIndex,
388 inst_word);
389 DPRINTF(Fetch, "Offering MachInst to decoder addr: 0x%x\n",
390 line_in->lineBaseAddr + fetch_info.inputIndex);
391 }
392
393 /* Maybe make the above a loop to accomodate ISAs with
394 * instructions longer than sizeof(MachInst) */
395
396 if (decoder->instReady()) {
397 /* Make a new instruction and pick up the line, stream,
398 * prediction, thread ids from the incoming line */
399 dyn_inst = new MinorDynInst(line_in->id);
400
401 /* Fetch and prediction sequence numbers originate here */
402 dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum;
403 dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum;
404 /* To complete the set, test that exec sequence number
405 * has not been set */
406 assert(dyn_inst->id.execSeqNum == 0);
407
408 /* Note that the decoder can update the given PC.
409 * Remember not to assign it until *after* calling
410 * decode */
411 StaticInstPtr decoded_inst = decoder->decode(fetch_info.pc);
412 dyn_inst->staticInst = decoded_inst;
413
414 dyn_inst->pc = fetch_info.pc;
415 DPRINTF(Fetch, "decoder inst %s\n", *dyn_inst);
416
417
418 DPRINTF(Fetch, "Instruction extracted from line %s"
419 " lineWidth: %d output_index: %d inputIndex: %d"
420 " pc: %s inst: %s\n",
421 line_in->id,
422 line_in->lineWidth, output_index, fetch_info.inputIndex,
423 fetch_info.pc, *dyn_inst);
424
425#if THE_ISA == X86_ISA || THE_ISA == ARM_ISA
426 /* In SE mode, it's possible to branch to a microop when
427 * replaying faults such as page faults (or simply
428 * intra-microcode branches in X86). Unfortunately,
429 * as Minor has micro-op decomposition in a separate
430 * pipeline stage from instruction decomposition, the
431 * following advancePC (which may follow a branch with
432 * microPC() != 0) *must* see a fresh macroop. This
433 * kludge should be improved with an addition to PCState
434 * but I offer it in this form for the moment
435 *
436 * X86 can branch within microops so we need to deal with
437 * the case that, after a branch, the first un-advanced PC
438 * may be pointing to a microop other than 0. Once
439 * advanced, however, the microop number *must* be 0 */
440 fetch_info.pc.upc(0);
441 fetch_info.pc.nupc(1);
442#endif
443
444 /* Advance PC for the next instruction */
445 TheISA::advancePC(fetch_info.pc, decoded_inst);
446
447 /* Predict any branches and issue a branch if
448 * necessary */
449 predictBranch(dyn_inst, prediction);
450 } else {
451 DPRINTF(Fetch, "Inst not ready yet\n");
452 }
453
454 /* Step on the pointer into the line if there's no
455 * complete instruction waiting */
456 if (decoder->needMoreBytes()) {
457 fetch_info.inputIndex += sizeof(TheISA::MachInst);
458
459 DPRINTF(Fetch, "Updated inputIndex value PC: %s"
460 " inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n",
461 line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr,
462 line_in->lineWidth);
463 }
464 }
465
466 if (dyn_inst) {
467 /* Step to next sequence number */
468 fetch_info.fetchSeqNum++;
469
470 /* Correctly size the output before writing */
471 if (output_index == 0) {
472 insts_out.resize(outputWidth);
473 }
474 /* Pack the generated dynamic instruction into the output */
475 insts_out.insts[output_index] = dyn_inst;
476 output_index++;
477
478 /* Output MinorTrace instruction info for
479 * pre-microop decomposition macroops */
480 if (DTRACE(MinorTrace) && !dyn_inst->isFault() &&
481 dyn_inst->staticInst->isMacroop())
482 {
483 dyn_inst->minorTraceInst(*this);
484 }
485 }
486
487 /* Remember the streamSeqNum of this line so we can tell when
488 * we change stream */
489 fetch_info.lastStreamSeqNum = line_in->id.streamSeqNum;
490
491 /* Asked to discard line or there was a branch or fault */
492 if (!prediction.isBubble() || /* The remains of a
493 line with a prediction in it */
494 line_in->isFault() /* A line which is just a fault */)
495 {
496 DPRINTF(Fetch, "Discarding all input on branch/fault\n");
497 dumpAllInput(tid);
498 fetch_info.havePC = false;
499 line_in = NULL;
500 } else if (discard_line) {
501 /* Just discard one line, one's behind it may have new
502 * stream sequence numbers. There's a DPRINTF above
503 * for this event */
504 popInput(tid);
505 fetch_info.havePC = false;
506 line_in = NULL;
507 } else if (fetch_info.inputIndex == line_in->lineWidth) {
508 /* Got to end of a line, pop the line but keep PC
509 * in case this is a line-wrapping inst. */
510 popInput(tid);
511 line_in = NULL;
512 }
513
514 if (!line_in && processMoreThanOneInput) {
515 DPRINTF(Fetch, "Wrapping\n");
516 line_in = getInput(tid);
517 }
518 }
519
520 /* The rest of the output (if any) should already have been packed
521 * with bubble instructions by insts_out's initialisation */
522 }
523 if (tid == InvalidThreadID) {
524 assert(insts_out.isBubble());
525 }
526 /** Reserve a slot in the next stage and output data */
527 *predictionOut.inputWire = prediction;
528
529 /* If we generated output, reserve space for the result in the next stage
530 * and mark the stage as being active this cycle */
531 if (!insts_out.isBubble()) {
532 /* Note activity of following buffer */
533 cpu.activityRecorder->activity();
534 insts_out.threadId = tid;
535 nextStageReserve[tid].reserve();
536 }
537
538 /* If we still have input to process and somewhere to put it,
539 * mark stage as active */
540 for (ThreadID i = 0; i < cpu.numThreads; i++)
541 {
542 if (getInput(i) && nextStageReserve[i].canReserve()) {
543 cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId);
544 break;
545 }
546 }
547
548 /* Make sure the input (if any left) is pushed */
549 if (!inp.outputWire->isBubble())
550 inputBuffer[inp.outputWire->id.threadId].pushTail();
551}
552
553inline ThreadID
554Fetch2::getScheduledThread()
555{
556 /* Select thread via policy. */
557 std::vector<ThreadID> priority_list;
558
559 switch (cpu.threadPolicy) {
560 case Enums::SingleThreaded:
561 priority_list.push_back(0);
562 break;
563 case Enums::RoundRobin:
564 priority_list = cpu.roundRobinPriority(threadPriority);
565 break;
566 case Enums::Random:
567 priority_list = cpu.randomPriority();
568 break;
569 default:
570 panic("Unknown fetch policy");
571 }
572
573 for (auto tid : priority_list) {
574 if (getInput(tid) && !fetchInfo[tid].blocked) {
575 threadPriority = tid;
576 return tid;
577 }
578 }
579
580 return InvalidThreadID;
581}
582
583bool
584Fetch2::isDrained()
585{
586 for (const auto &buffer : inputBuffer) {
587 if (!buffer.empty())
588 return false;
589 }
590
591 return (*inp.outputWire).isBubble() &&
592 (*predictionOut.inputWire).isBubble();
593}
594
595void
596Fetch2::minorTrace() const
597{
598 std::ostringstream data;
599
600 if (fetchInfo[0].blocked)
601 data << 'B';
602 else
603 (*out.inputWire).reportData(data);
604
605 MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n",
606 fetchInfo[0].inputIndex, fetchInfo[0].havePC, fetchInfo[0].predictionSeqNum, data.str());
607 inputBuffer[0].minorTrace();
608}
609
610}
46#include "cpu/minor/pipeline.hh"
47#include "cpu/pred/bpred_unit.hh"
48#include "debug/Branch.hh"
49#include "debug/Fetch.hh"
50#include "debug/MinorTrace.hh"
51
52namespace Minor
53{
54
55Fetch2::Fetch2(const std::string &name,
56 MinorCPU &cpu_,
57 MinorCPUParams &params,
58 Latch<ForwardLineData>::Output inp_,
59 Latch<BranchData>::Output branchInp_,
60 Latch<BranchData>::Input predictionOut_,
61 Latch<ForwardInstData>::Input out_,
62 std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer) :
63 Named(name),
64 cpu(cpu_),
65 inp(inp_),
66 branchInp(branchInp_),
67 predictionOut(predictionOut_),
68 out(out_),
69 nextStageReserve(next_stage_input_buffer),
70 outputWidth(params.decodeInputWidth),
71 processMoreThanOneInput(params.fetch2CycleInput),
72 branchPredictor(*params.branchPred),
73 fetchInfo(params.numThreads),
74 threadPriority(0)
75{
76 if (outputWidth < 1)
77 fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name, outputWidth);
78
79 if (params.fetch2InputBufferSize < 1) {
80 fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name,
81 params.fetch2InputBufferSize);
82 }
83
84 /* Per-thread input buffers */
85 for (ThreadID tid = 0; tid < params.numThreads; tid++) {
86 inputBuffer.push_back(
87 InputBuffer<ForwardLineData>(
88 name + ".inputBuffer" + std::to_string(tid), "lines",
89 params.fetch2InputBufferSize));
90 }
91}
92
93const ForwardLineData *
94Fetch2::getInput(ThreadID tid)
95{
96 /* Get a line from the inputBuffer to work with */
97 if (!inputBuffer[tid].empty()) {
98 return &(inputBuffer[tid].front());
99 } else {
100 return NULL;
101 }
102}
103
104void
105Fetch2::popInput(ThreadID tid)
106{
107 if (!inputBuffer[tid].empty()) {
108 inputBuffer[tid].front().freeLine();
109 inputBuffer[tid].pop();
110 }
111
112 fetchInfo[tid].inputIndex = 0;
113}
114
115void
116Fetch2::dumpAllInput(ThreadID tid)
117{
118 DPRINTF(Fetch, "Dumping whole input buffer\n");
119 while (!inputBuffer[tid].empty())
120 popInput(tid);
121
122 fetchInfo[tid].inputIndex = 0;
123}
124
125void
126Fetch2::updateBranchPrediction(const BranchData &branch)
127{
128 MinorDynInstPtr inst = branch.inst;
129
130 /* Don't even consider instructions we didn't try to predict or faults */
131 if (inst->isFault() || !inst->triedToPredict)
132 return;
133
134 switch (branch.reason) {
135 case BranchData::NoBranch:
136 /* No data to update */
137 break;
138 case BranchData::Interrupt:
139 /* Never try to predict interrupts */
140 break;
141 case BranchData::SuspendThread:
142 /* Don't need to act on suspends */
143 break;
144 case BranchData::HaltFetch:
145 /* Don't need to act on fetch wakeup */
146 break;
147 case BranchData::BranchPrediction:
148 /* Shouldn't happen. Fetch2 is the only source of
149 * BranchPredictions */
150 break;
151 case BranchData::UnpredictedBranch:
152 /* Unpredicted branch or barrier */
153 DPRINTF(Branch, "Unpredicted branch seen inst: %s\n", *inst);
154 branchPredictor.squash(inst->id.fetchSeqNum,
155 branch.target, true, inst->id.threadId);
156 // Update after squashing to accomodate O3CPU
157 // using the branch prediction code.
158 branchPredictor.update(inst->id.fetchSeqNum,
159 inst->id.threadId);
160 break;
161 case BranchData::CorrectlyPredictedBranch:
162 /* Predicted taken, was taken */
163 DPRINTF(Branch, "Branch predicted correctly inst: %s\n", *inst);
164 branchPredictor.update(inst->id.fetchSeqNum,
165 inst->id.threadId);
166 break;
167 case BranchData::BadlyPredictedBranch:
168 /* Predicted taken, not taken */
169 DPRINTF(Branch, "Branch mis-predicted inst: %s\n", *inst);
170 branchPredictor.squash(inst->id.fetchSeqNum,
171 branch.target /* Not used */, false, inst->id.threadId);
172 // Update after squashing to accomodate O3CPU
173 // using the branch prediction code.
174 branchPredictor.update(inst->id.fetchSeqNum,
175 inst->id.threadId);
176 break;
177 case BranchData::BadlyPredictedBranchTarget:
178 /* Predicted taken, was taken but to a different target */
179 DPRINTF(Branch, "Branch mis-predicted target inst: %s target: %s\n",
180 *inst, branch.target);
181 branchPredictor.squash(inst->id.fetchSeqNum,
182 branch.target, true, inst->id.threadId);
183 break;
184 }
185}
186
187void
188Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch)
189{
190 Fetch2ThreadInfo &thread = fetchInfo[inst->id.threadId];
191 TheISA::PCState inst_pc = inst->pc;
192
193 assert(!inst->predictedTaken);
194
195 /* Skip non-control/sys call instructions */
196 if (inst->staticInst->isControl() ||
197 inst->staticInst->isSyscall())
198 {
199 /* Tried to predict */
200 inst->triedToPredict = true;
201
202 DPRINTF(Branch, "Trying to predict for inst: %s\n", *inst);
203
204 if (branchPredictor.predict(inst->staticInst,
205 inst->id.fetchSeqNum, inst_pc,
206 inst->id.threadId))
207 {
208 inst->predictedTaken = true;
209 inst->predictedTarget = inst_pc;
210 branch.target = inst_pc;
211 }
212 } else {
213 DPRINTF(Branch, "Not attempting prediction for inst: %s\n", *inst);
214 }
215
216 /* If we predict taken, set branch and update sequence numbers */
217 if (inst->predictedTaken) {
218 /* Update the predictionSeqNum and remember the streamSeqNum that it
219 * was associated with */
220 thread.expectedStreamSeqNum = inst->id.streamSeqNum;
221
222 BranchData new_branch = BranchData(BranchData::BranchPrediction,
223 inst->id.threadId,
224 inst->id.streamSeqNum, thread.predictionSeqNum + 1,
225 inst->predictedTarget, inst);
226
227 /* Mark with a new prediction number by the stream number of the
228 * instruction causing the prediction */
229 thread.predictionSeqNum++;
230 branch = new_branch;
231
232 DPRINTF(Branch, "Branch predicted taken inst: %s target: %s"
233 " new predictionSeqNum: %d\n",
234 *inst, inst->predictedTarget, thread.predictionSeqNum);
235 }
236}
237
238void
239Fetch2::evaluate()
240{
241 /* Push input onto appropriate input buffer */
242 if (!inp.outputWire->isBubble())
243 inputBuffer[inp.outputWire->id.threadId].setTail(*inp.outputWire);
244
245 ForwardInstData &insts_out = *out.inputWire;
246 BranchData prediction;
247 BranchData &branch_inp = *branchInp.outputWire;
248
249 assert(insts_out.isBubble());
250
251 /* React to branches from Execute to update local branch prediction
252 * structures */
253 updateBranchPrediction(branch_inp);
254
255 /* If a branch arrives, don't try and do anything about it. Only
256 * react to your own predictions */
257 if (branch_inp.isStreamChange()) {
258 DPRINTF(Fetch, "Dumping all input as a stream changing branch"
259 " has arrived\n");
260 dumpAllInput(branch_inp.threadId);
261 fetchInfo[branch_inp.threadId].havePC = false;
262 }
263
264 assert(insts_out.isBubble());
265 /* Even when blocked, clear out input lines with the wrong
266 * prediction sequence number */
267 for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
268 Fetch2ThreadInfo &thread = fetchInfo[tid];
269
270 thread.blocked = !nextStageReserve[tid].canReserve();
271
272 const ForwardLineData *line_in = getInput(tid);
273
274 while (line_in &&
275 thread.expectedStreamSeqNum == line_in->id.streamSeqNum &&
276 thread.predictionSeqNum != line_in->id.predictionSeqNum)
277 {
278 DPRINTF(Fetch, "Discarding line %s"
279 " due to predictionSeqNum mismatch (expected: %d)\n",
280 line_in->id, thread.predictionSeqNum);
281
282 popInput(tid);
283 fetchInfo[tid].havePC = false;
284
285 if (processMoreThanOneInput) {
286 DPRINTF(Fetch, "Wrapping\n");
287 line_in = getInput(tid);
288 } else {
289 line_in = NULL;
290 }
291 }
292 }
293
294 ThreadID tid = getScheduledThread();
295 DPRINTF(Fetch, "Scheduled Thread: %d\n", tid);
296
297 assert(insts_out.isBubble());
298 if (tid != InvalidThreadID) {
299 Fetch2ThreadInfo &fetch_info = fetchInfo[tid];
300
301 const ForwardLineData *line_in = getInput(tid);
302
303 unsigned int output_index = 0;
304
305 /* Pack instructions into the output while we can. This may involve
306 * using more than one input line. Note that lineWidth will be 0
307 * for faulting lines */
308 while (line_in &&
309 (line_in->isFault() ||
310 fetch_info.inputIndex < line_in->lineWidth) && /* More input */
311 output_index < outputWidth && /* More output to fill */
312 prediction.isBubble() /* No predicted branch */)
313 {
314 ThreadContext *thread = cpu.getContext(line_in->id.threadId);
315 TheISA::Decoder *decoder = thread->getDecoderPtr();
316
317 /* Discard line due to prediction sequence number being wrong but
318 * without the streamSeqNum number having changed */
319 bool discard_line =
320 fetch_info.expectedStreamSeqNum == line_in->id.streamSeqNum &&
321 fetch_info.predictionSeqNum != line_in->id.predictionSeqNum;
322
323 /* Set the PC if the stream changes. Setting havePC to false in
324 * a previous cycle handles all other change of flow of control
325 * issues */
326 bool set_pc = fetch_info.lastStreamSeqNum != line_in->id.streamSeqNum;
327
328 if (!discard_line && (!fetch_info.havePC || set_pc)) {
329 /* Set the inputIndex to be the MachInst-aligned offset
330 * from lineBaseAddr of the new PC value */
331 fetch_info.inputIndex =
332 (line_in->pc.instAddr() & BaseCPU::PCMask) -
333 line_in->lineBaseAddr;
334 DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x"
335 " lineBaseAddr: 0x%x lineWidth: 0x%x\n",
336 line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr,
337 line_in->lineWidth);
338 fetch_info.pc = line_in->pc;
339 fetch_info.havePC = true;
340 decoder->reset();
341 }
342
343 /* The generated instruction. Leave as NULL if no instruction
344 * is to be packed into the output */
345 MinorDynInstPtr dyn_inst = NULL;
346
347 if (discard_line) {
348 /* Rest of line was from an older prediction in the same
349 * stream */
350 DPRINTF(Fetch, "Discarding line %s (from inputIndex: %d)"
351 " due to predictionSeqNum mismatch (expected: %d)\n",
352 line_in->id, fetch_info.inputIndex,
353 fetch_info.predictionSeqNum);
354 } else if (line_in->isFault()) {
355 /* Pack a fault as a MinorDynInst with ->fault set */
356
357 /* Make a new instruction and pick up the line, stream,
358 * prediction, thread ids from the incoming line */
359 dyn_inst = new MinorDynInst(line_in->id);
360
361 /* Fetch and prediction sequence numbers originate here */
362 dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum;
363 dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum;
364 /* To complete the set, test that exec sequence number has
365 * not been set */
366 assert(dyn_inst->id.execSeqNum == 0);
367
368 dyn_inst->pc = fetch_info.pc;
369
370 /* Pack a faulting instruction but allow other
371 * instructions to be generated. (Fetch2 makes no
372 * immediate judgement about streamSeqNum) */
373 dyn_inst->fault = line_in->fault;
374 DPRINTF(Fetch, "Fault being passed output_index: "
375 "%d: %s\n", output_index, dyn_inst->fault->name());
376 } else {
377 uint8_t *line = line_in->line;
378
379 TheISA::MachInst inst_word;
380 /* The instruction is wholly in the line, can just
381 * assign */
382 inst_word = TheISA::gtoh(
383 *(reinterpret_cast<TheISA::MachInst *>
384 (line + fetch_info.inputIndex)));
385
386 if (!decoder->instReady()) {
387 decoder->moreBytes(fetch_info.pc,
388 line_in->lineBaseAddr + fetch_info.inputIndex,
389 inst_word);
390 DPRINTF(Fetch, "Offering MachInst to decoder addr: 0x%x\n",
391 line_in->lineBaseAddr + fetch_info.inputIndex);
392 }
393
394 /* Maybe make the above a loop to accomodate ISAs with
395 * instructions longer than sizeof(MachInst) */
396
397 if (decoder->instReady()) {
398 /* Make a new instruction and pick up the line, stream,
399 * prediction, thread ids from the incoming line */
400 dyn_inst = new MinorDynInst(line_in->id);
401
402 /* Fetch and prediction sequence numbers originate here */
403 dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum;
404 dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum;
405 /* To complete the set, test that exec sequence number
406 * has not been set */
407 assert(dyn_inst->id.execSeqNum == 0);
408
409 /* Note that the decoder can update the given PC.
410 * Remember not to assign it until *after* calling
411 * decode */
412 StaticInstPtr decoded_inst = decoder->decode(fetch_info.pc);
413 dyn_inst->staticInst = decoded_inst;
414
415 dyn_inst->pc = fetch_info.pc;
416 DPRINTF(Fetch, "decoder inst %s\n", *dyn_inst);
417
418
419 DPRINTF(Fetch, "Instruction extracted from line %s"
420 " lineWidth: %d output_index: %d inputIndex: %d"
421 " pc: %s inst: %s\n",
422 line_in->id,
423 line_in->lineWidth, output_index, fetch_info.inputIndex,
424 fetch_info.pc, *dyn_inst);
425
426#if THE_ISA == X86_ISA || THE_ISA == ARM_ISA
427 /* In SE mode, it's possible to branch to a microop when
428 * replaying faults such as page faults (or simply
429 * intra-microcode branches in X86). Unfortunately,
430 * as Minor has micro-op decomposition in a separate
431 * pipeline stage from instruction decomposition, the
432 * following advancePC (which may follow a branch with
433 * microPC() != 0) *must* see a fresh macroop. This
434 * kludge should be improved with an addition to PCState
435 * but I offer it in this form for the moment
436 *
437 * X86 can branch within microops so we need to deal with
438 * the case that, after a branch, the first un-advanced PC
439 * may be pointing to a microop other than 0. Once
440 * advanced, however, the microop number *must* be 0 */
441 fetch_info.pc.upc(0);
442 fetch_info.pc.nupc(1);
443#endif
444
445 /* Advance PC for the next instruction */
446 TheISA::advancePC(fetch_info.pc, decoded_inst);
447
448 /* Predict any branches and issue a branch if
449 * necessary */
450 predictBranch(dyn_inst, prediction);
451 } else {
452 DPRINTF(Fetch, "Inst not ready yet\n");
453 }
454
455 /* Step on the pointer into the line if there's no
456 * complete instruction waiting */
457 if (decoder->needMoreBytes()) {
458 fetch_info.inputIndex += sizeof(TheISA::MachInst);
459
460 DPRINTF(Fetch, "Updated inputIndex value PC: %s"
461 " inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n",
462 line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr,
463 line_in->lineWidth);
464 }
465 }
466
467 if (dyn_inst) {
468 /* Step to next sequence number */
469 fetch_info.fetchSeqNum++;
470
471 /* Correctly size the output before writing */
472 if (output_index == 0) {
473 insts_out.resize(outputWidth);
474 }
475 /* Pack the generated dynamic instruction into the output */
476 insts_out.insts[output_index] = dyn_inst;
477 output_index++;
478
479 /* Output MinorTrace instruction info for
480 * pre-microop decomposition macroops */
481 if (DTRACE(MinorTrace) && !dyn_inst->isFault() &&
482 dyn_inst->staticInst->isMacroop())
483 {
484 dyn_inst->minorTraceInst(*this);
485 }
486 }
487
488 /* Remember the streamSeqNum of this line so we can tell when
489 * we change stream */
490 fetch_info.lastStreamSeqNum = line_in->id.streamSeqNum;
491
492 /* Asked to discard line or there was a branch or fault */
493 if (!prediction.isBubble() || /* The remains of a
494 line with a prediction in it */
495 line_in->isFault() /* A line which is just a fault */)
496 {
497 DPRINTF(Fetch, "Discarding all input on branch/fault\n");
498 dumpAllInput(tid);
499 fetch_info.havePC = false;
500 line_in = NULL;
501 } else if (discard_line) {
502 /* Just discard one line, one's behind it may have new
503 * stream sequence numbers. There's a DPRINTF above
504 * for this event */
505 popInput(tid);
506 fetch_info.havePC = false;
507 line_in = NULL;
508 } else if (fetch_info.inputIndex == line_in->lineWidth) {
509 /* Got to end of a line, pop the line but keep PC
510 * in case this is a line-wrapping inst. */
511 popInput(tid);
512 line_in = NULL;
513 }
514
515 if (!line_in && processMoreThanOneInput) {
516 DPRINTF(Fetch, "Wrapping\n");
517 line_in = getInput(tid);
518 }
519 }
520
521 /* The rest of the output (if any) should already have been packed
522 * with bubble instructions by insts_out's initialisation */
523 }
524 if (tid == InvalidThreadID) {
525 assert(insts_out.isBubble());
526 }
527 /** Reserve a slot in the next stage and output data */
528 *predictionOut.inputWire = prediction;
529
530 /* If we generated output, reserve space for the result in the next stage
531 * and mark the stage as being active this cycle */
532 if (!insts_out.isBubble()) {
533 /* Note activity of following buffer */
534 cpu.activityRecorder->activity();
535 insts_out.threadId = tid;
536 nextStageReserve[tid].reserve();
537 }
538
539 /* If we still have input to process and somewhere to put it,
540 * mark stage as active */
541 for (ThreadID i = 0; i < cpu.numThreads; i++)
542 {
543 if (getInput(i) && nextStageReserve[i].canReserve()) {
544 cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId);
545 break;
546 }
547 }
548
549 /* Make sure the input (if any left) is pushed */
550 if (!inp.outputWire->isBubble())
551 inputBuffer[inp.outputWire->id.threadId].pushTail();
552}
553
554inline ThreadID
555Fetch2::getScheduledThread()
556{
557 /* Select thread via policy. */
558 std::vector<ThreadID> priority_list;
559
560 switch (cpu.threadPolicy) {
561 case Enums::SingleThreaded:
562 priority_list.push_back(0);
563 break;
564 case Enums::RoundRobin:
565 priority_list = cpu.roundRobinPriority(threadPriority);
566 break;
567 case Enums::Random:
568 priority_list = cpu.randomPriority();
569 break;
570 default:
571 panic("Unknown fetch policy");
572 }
573
574 for (auto tid : priority_list) {
575 if (getInput(tid) && !fetchInfo[tid].blocked) {
576 threadPriority = tid;
577 return tid;
578 }
579 }
580
581 return InvalidThreadID;
582}
583
584bool
585Fetch2::isDrained()
586{
587 for (const auto &buffer : inputBuffer) {
588 if (!buffer.empty())
589 return false;
590 }
591
592 return (*inp.outputWire).isBubble() &&
593 (*predictionOut.inputWire).isBubble();
594}
595
596void
597Fetch2::minorTrace() const
598{
599 std::ostringstream data;
600
601 if (fetchInfo[0].blocked)
602 data << 'B';
603 else
604 (*out.inputWire).reportData(data);
605
606 MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n",
607 fetchInfo[0].inputIndex, fetchInfo[0].havePC, fetchInfo[0].predictionSeqNum, data.str());
608 inputBuffer[0].minorTrace();
609}
610
611}