fetch2.cc revision 11567:560d7fbbddd1
1/*
2 * Copyright (c) 2013-2014 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andrew Bardsley
38 */
39
40#include <string>
41
42#include "arch/decoder.hh"
43#include "arch/utility.hh"
44#include "cpu/minor/fetch2.hh"
45#include "cpu/minor/pipeline.hh"
46#include "cpu/pred/bpred_unit.hh"
47#include "debug/Branch.hh"
48#include "debug/Fetch.hh"
49#include "debug/MinorTrace.hh"
50
51namespace Minor
52{
53
54Fetch2::Fetch2(const std::string &name,
55    MinorCPU &cpu_,
56    MinorCPUParams &params,
57    Latch<ForwardLineData>::Output inp_,
58    Latch<BranchData>::Output branchInp_,
59    Latch<BranchData>::Input predictionOut_,
60    Latch<ForwardInstData>::Input out_,
61    std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer) :
62    Named(name),
63    cpu(cpu_),
64    inp(inp_),
65    branchInp(branchInp_),
66    predictionOut(predictionOut_),
67    out(out_),
68    nextStageReserve(next_stage_input_buffer),
69    outputWidth(params.decodeInputWidth),
70    processMoreThanOneInput(params.fetch2CycleInput),
71    branchPredictor(*params.branchPred),
72    fetchInfo(params.numThreads),
73    threadPriority(0)
74{
75    if (outputWidth < 1)
76        fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name, outputWidth);
77
78    if (params.fetch2InputBufferSize < 1) {
79        fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name,
80        params.fetch2InputBufferSize);
81    }
82
83    /* Per-thread input buffers */
84    for (ThreadID tid = 0; tid < params.numThreads; tid++) {
85        inputBuffer.push_back(
86            InputBuffer<ForwardLineData>(
87                name + ".inputBuffer" + std::to_string(tid), "lines",
88                params.fetch2InputBufferSize));
89    }
90}
91
92const ForwardLineData *
93Fetch2::getInput(ThreadID tid)
94{
95    /* Get a line from the inputBuffer to work with */
96    if (!inputBuffer[tid].empty()) {
97        return &(inputBuffer[tid].front());
98    } else {
99        return NULL;
100    }
101}
102
103void
104Fetch2::popInput(ThreadID tid)
105{
106    if (!inputBuffer[tid].empty()) {
107        inputBuffer[tid].front().freeLine();
108        inputBuffer[tid].pop();
109    }
110
111    fetchInfo[tid].inputIndex = 0;
112}
113
114void
115Fetch2::dumpAllInput(ThreadID tid)
116{
117    DPRINTF(Fetch, "Dumping whole input buffer\n");
118    while (!inputBuffer[tid].empty())
119        popInput(tid);
120
121    fetchInfo[tid].inputIndex = 0;
122}
123
124void
125Fetch2::updateBranchPrediction(const BranchData &branch)
126{
127    MinorDynInstPtr inst = branch.inst;
128
129    /* Don't even consider instructions we didn't try to predict or faults */
130    if (inst->isFault() || !inst->triedToPredict)
131        return;
132
133    switch (branch.reason) {
134      case BranchData::NoBranch:
135        /* No data to update */
136        break;
137      case BranchData::Interrupt:
138        /* Never try to predict interrupts */
139        break;
140      case BranchData::SuspendThread:
141        /* Don't need to act on suspends */
142        break;
143      case BranchData::HaltFetch:
144        /* Don't need to act on fetch wakeup */
145        break;
146      case BranchData::BranchPrediction:
147        /* Shouldn't happen.  Fetch2 is the only source of
148         *  BranchPredictions */
149        break;
150      case BranchData::UnpredictedBranch:
151        /* Unpredicted branch or barrier */
152        DPRINTF(Branch, "Unpredicted branch seen inst: %s\n", *inst);
153        branchPredictor.squash(inst->id.fetchSeqNum,
154            branch.target, true, inst->id.threadId);
155        break;
156      case BranchData::CorrectlyPredictedBranch:
157        /* Predicted taken, was taken */
158        DPRINTF(Branch, "Branch predicted correctly inst: %s\n", *inst);
159        branchPredictor.update(inst->id.fetchSeqNum,
160            inst->id.threadId);
161        break;
162      case BranchData::BadlyPredictedBranch:
163        /* Predicted taken, not taken */
164        DPRINTF(Branch, "Branch mis-predicted inst: %s\n", *inst);
165        branchPredictor.squash(inst->id.fetchSeqNum,
166            branch.target /* Not used */, false, inst->id.threadId);
167        break;
168      case BranchData::BadlyPredictedBranchTarget:
169        /* Predicted taken, was taken but to a different target */
170        DPRINTF(Branch, "Branch mis-predicted target inst: %s target: %s\n",
171            *inst, branch.target);
172        branchPredictor.squash(inst->id.fetchSeqNum,
173            branch.target, true, inst->id.threadId);
174        break;
175    }
176}
177
178void
179Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch)
180{
181    Fetch2ThreadInfo &thread = fetchInfo[inst->id.threadId];
182    TheISA::PCState inst_pc = inst->pc;
183
184    assert(!inst->predictedTaken);
185
186    /* Skip non-control/sys call instructions */
187    if (inst->staticInst->isControl() ||
188        inst->staticInst->isSyscall())
189    {
190        /* Tried to predict */
191        inst->triedToPredict = true;
192
193        DPRINTF(Branch, "Trying to predict for inst: %s\n", *inst);
194
195        if (branchPredictor.predict(inst->staticInst,
196            inst->id.fetchSeqNum, inst_pc,
197            inst->id.threadId))
198        {
199            inst->predictedTaken = true;
200            inst->predictedTarget = inst_pc;
201            branch.target = inst_pc;
202        }
203    } else {
204        DPRINTF(Branch, "Not attempting prediction for inst: %s\n", *inst);
205    }
206
207    /* If we predict taken, set branch and update sequence numbers */
208    if (inst->predictedTaken) {
209        /* Update the predictionSeqNum and remember the streamSeqNum that it
210         *  was associated with */
211        thread.expectedStreamSeqNum = inst->id.streamSeqNum;
212
213        BranchData new_branch = BranchData(BranchData::BranchPrediction,
214            inst->id.threadId,
215            inst->id.streamSeqNum, thread.predictionSeqNum + 1,
216            inst->predictedTarget, inst);
217
218        /* Mark with a new prediction number by the stream number of the
219         *  instruction causing the prediction */
220        thread.predictionSeqNum++;
221        branch = new_branch;
222
223        DPRINTF(Branch, "Branch predicted taken inst: %s target: %s"
224            " new predictionSeqNum: %d\n",
225            *inst, inst->predictedTarget, thread.predictionSeqNum);
226    }
227}
228
229void
230Fetch2::evaluate()
231{
232    /* Push input onto appropriate input buffer */
233    if (!inp.outputWire->isBubble())
234        inputBuffer[inp.outputWire->id.threadId].setTail(*inp.outputWire);
235
236    ForwardInstData &insts_out = *out.inputWire;
237    BranchData prediction;
238    BranchData &branch_inp = *branchInp.outputWire;
239
240    assert(insts_out.isBubble());
241
242    /* React to branches from Execute to update local branch prediction
243     *  structures */
244    updateBranchPrediction(branch_inp);
245
246    /* If a branch arrives, don't try and do anything about it.  Only
247     *  react to your own predictions */
248    if (branch_inp.isStreamChange()) {
249        DPRINTF(Fetch, "Dumping all input as a stream changing branch"
250            " has arrived\n");
251        dumpAllInput(branch_inp.threadId);
252        fetchInfo[branch_inp.threadId].havePC = false;
253    }
254
255    assert(insts_out.isBubble());
256    /* Even when blocked, clear out input lines with the wrong
257     *  prediction sequence number */
258    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
259        Fetch2ThreadInfo &thread = fetchInfo[tid];
260
261        thread.blocked = !nextStageReserve[tid].canReserve();
262
263        const ForwardLineData *line_in = getInput(tid);
264
265        while (line_in &&
266            thread.expectedStreamSeqNum == line_in->id.streamSeqNum &&
267            thread.predictionSeqNum != line_in->id.predictionSeqNum)
268        {
269            DPRINTF(Fetch, "Discarding line %s"
270                " due to predictionSeqNum mismatch (expected: %d)\n",
271                line_in->id, thread.predictionSeqNum);
272
273            popInput(tid);
274            fetchInfo[tid].havePC = false;
275
276            if (processMoreThanOneInput) {
277                DPRINTF(Fetch, "Wrapping\n");
278                line_in = getInput(tid);
279            } else {
280                line_in = NULL;
281            }
282        }
283    }
284
285    ThreadID tid = getScheduledThread();
286    DPRINTF(Fetch, "Scheduled Thread: %d\n", tid);
287
288    assert(insts_out.isBubble());
289    if (tid != InvalidThreadID) {
290        Fetch2ThreadInfo &fetch_info = fetchInfo[tid];
291
292        const ForwardLineData *line_in = getInput(tid);
293
294        unsigned int output_index = 0;
295
296        /* Pack instructions into the output while we can.  This may involve
297         * using more than one input line.  Note that lineWidth will be 0
298         * for faulting lines */
299        while (line_in &&
300            (line_in->isFault() ||
301                fetch_info.inputIndex < line_in->lineWidth) && /* More input */
302            output_index < outputWidth && /* More output to fill */
303            prediction.isBubble() /* No predicted branch */)
304        {
305            ThreadContext *thread = cpu.getContext(line_in->id.threadId);
306            TheISA::Decoder *decoder = thread->getDecoderPtr();
307
308            /* Discard line due to prediction sequence number being wrong but
309             * without the streamSeqNum number having changed */
310            bool discard_line =
311                fetch_info.expectedStreamSeqNum == line_in->id.streamSeqNum &&
312                fetch_info.predictionSeqNum != line_in->id.predictionSeqNum;
313
314            /* Set the PC if the stream changes.  Setting havePC to false in
315             *  a previous cycle handles all other change of flow of control
316             *  issues */
317            bool set_pc = fetch_info.lastStreamSeqNum != line_in->id.streamSeqNum;
318
319            if (!discard_line && (!fetch_info.havePC || set_pc)) {
320                /* Set the inputIndex to be the MachInst-aligned offset
321                 *  from lineBaseAddr of the new PC value */
322                fetch_info.inputIndex =
323                    (line_in->pc.instAddr() & BaseCPU::PCMask) -
324                    line_in->lineBaseAddr;
325                DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x"
326                    " lineBaseAddr: 0x%x lineWidth: 0x%x\n",
327                    line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr,
328                    line_in->lineWidth);
329                fetch_info.pc = line_in->pc;
330                fetch_info.havePC = true;
331                decoder->reset();
332            }
333
334            /* The generated instruction.  Leave as NULL if no instruction
335             *  is to be packed into the output */
336            MinorDynInstPtr dyn_inst = NULL;
337
338            if (discard_line) {
339                /* Rest of line was from an older prediction in the same
340                 *  stream */
341                DPRINTF(Fetch, "Discarding line %s (from inputIndex: %d)"
342                    " due to predictionSeqNum mismatch (expected: %d)\n",
343                    line_in->id, fetch_info.inputIndex,
344                    fetch_info.predictionSeqNum);
345            } else if (line_in->isFault()) {
346                /* Pack a fault as a MinorDynInst with ->fault set */
347
348                /* Make a new instruction and pick up the line, stream,
349                 *  prediction, thread ids from the incoming line */
350                dyn_inst = new MinorDynInst(line_in->id);
351
352                /* Fetch and prediction sequence numbers originate here */
353                dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum;
354                dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum;
355                /* To complete the set, test that exec sequence number has
356                 *  not been set */
357                assert(dyn_inst->id.execSeqNum == 0);
358
359                dyn_inst->pc = fetch_info.pc;
360
361                /* Pack a faulting instruction but allow other
362                 *  instructions to be generated. (Fetch2 makes no
363                 *  immediate judgement about streamSeqNum) */
364                dyn_inst->fault = line_in->fault;
365                DPRINTF(Fetch, "Fault being passed output_index: "
366                    "%d: %s\n", output_index, dyn_inst->fault->name());
367            } else {
368                uint8_t *line = line_in->line;
369
370                TheISA::MachInst inst_word;
371                /* The instruction is wholly in the line, can just
372                 *  assign */
373                inst_word = TheISA::gtoh(
374                    *(reinterpret_cast<TheISA::MachInst *>
375                    (line + fetch_info.inputIndex)));
376
377                if (!decoder->instReady()) {
378                    decoder->moreBytes(fetch_info.pc,
379                        line_in->lineBaseAddr + fetch_info.inputIndex,
380                        inst_word);
381                    DPRINTF(Fetch, "Offering MachInst to decoder addr: 0x%x\n",
382                            line_in->lineBaseAddr + fetch_info.inputIndex);
383                }
384
385                /* Maybe make the above a loop to accomodate ISAs with
386                 *  instructions longer than sizeof(MachInst) */
387
388                if (decoder->instReady()) {
389                    /* Make a new instruction and pick up the line, stream,
390                     *  prediction, thread ids from the incoming line */
391                    dyn_inst = new MinorDynInst(line_in->id);
392
393                    /* Fetch and prediction sequence numbers originate here */
394                    dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum;
395                    dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum;
396                    /* To complete the set, test that exec sequence number
397                     *  has not been set */
398                    assert(dyn_inst->id.execSeqNum == 0);
399
400                    /* Note that the decoder can update the given PC.
401                     *  Remember not to assign it until *after* calling
402                     *  decode */
403                    StaticInstPtr decoded_inst = decoder->decode(fetch_info.pc);
404                    dyn_inst->staticInst = decoded_inst;
405
406                    dyn_inst->pc = fetch_info.pc;
407                    DPRINTF(Fetch, "decoder inst %s\n", *dyn_inst);
408
409
410                    DPRINTF(Fetch, "Instruction extracted from line %s"
411                        " lineWidth: %d output_index: %d inputIndex: %d"
412                        " pc: %s inst: %s\n",
413                        line_in->id,
414                        line_in->lineWidth, output_index, fetch_info.inputIndex,
415                        fetch_info.pc, *dyn_inst);
416
417#if THE_ISA == X86_ISA || THE_ISA == ARM_ISA
418                    /* In SE mode, it's possible to branch to a microop when
419                     *  replaying faults such as page faults (or simply
420                     *  intra-microcode branches in X86).  Unfortunately,
421                     *  as Minor has micro-op decomposition in a separate
422                     *  pipeline stage from instruction decomposition, the
423                     *  following advancePC (which may follow a branch with
424                     *  microPC() != 0) *must* see a fresh macroop.  This
425                     *  kludge should be improved with an addition to PCState
426                     *  but I offer it in this form for the moment
427                     *
428                     * X86 can branch within microops so we need to deal with
429                     * the case that, after a branch, the first un-advanced PC
430                     * may be pointing to a microop other than 0.  Once
431                     * advanced, however, the microop number *must* be 0 */
432                    fetch_info.pc.upc(0);
433                    fetch_info.pc.nupc(1);
434#endif
435
436                    /* Advance PC for the next instruction */
437                    TheISA::advancePC(fetch_info.pc, decoded_inst);
438
439                    /* Predict any branches and issue a branch if
440                     *  necessary */
441                    predictBranch(dyn_inst, prediction);
442                } else {
443                    DPRINTF(Fetch, "Inst not ready yet\n");
444                }
445
446                /* Step on the pointer into the line if there's no
447                 *  complete instruction waiting */
448                if (decoder->needMoreBytes()) {
449                    fetch_info.inputIndex += sizeof(TheISA::MachInst);
450
451                DPRINTF(Fetch, "Updated inputIndex value PC: %s"
452                    " inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n",
453                    line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr,
454                    line_in->lineWidth);
455                }
456            }
457
458            if (dyn_inst) {
459                /* Step to next sequence number */
460                fetch_info.fetchSeqNum++;
461
462                /* Correctly size the output before writing */
463                if (output_index == 0) {
464                    insts_out.resize(outputWidth);
465                }
466                /* Pack the generated dynamic instruction into the output */
467                insts_out.insts[output_index] = dyn_inst;
468                output_index++;
469
470                /* Output MinorTrace instruction info for
471                 *  pre-microop decomposition macroops */
472                if (DTRACE(MinorTrace) && !dyn_inst->isFault() &&
473                    dyn_inst->staticInst->isMacroop())
474                {
475                    dyn_inst->minorTraceInst(*this);
476                }
477            }
478
479            /* Remember the streamSeqNum of this line so we can tell when
480             *  we change stream */
481            fetch_info.lastStreamSeqNum = line_in->id.streamSeqNum;
482
483            /* Asked to discard line or there was a branch or fault */
484            if (!prediction.isBubble() || /* The remains of a
485                    line with a prediction in it */
486                line_in->isFault() /* A line which is just a fault */)
487            {
488                DPRINTF(Fetch, "Discarding all input on branch/fault\n");
489                dumpAllInput(tid);
490                fetch_info.havePC = false;
491                line_in = NULL;
492            } else if (discard_line) {
493                /* Just discard one line, one's behind it may have new
494                 *  stream sequence numbers.  There's a DPRINTF above
495                 *  for this event */
496                popInput(tid);
497                fetch_info.havePC = false;
498                line_in = NULL;
499            } else if (fetch_info.inputIndex == line_in->lineWidth) {
500                /* Got to end of a line, pop the line but keep PC
501                 *  in case this is a line-wrapping inst. */
502                popInput(tid);
503                line_in = NULL;
504            }
505
506            if (!line_in && processMoreThanOneInput) {
507                DPRINTF(Fetch, "Wrapping\n");
508                line_in = getInput(tid);
509            }
510        }
511
512        /* The rest of the output (if any) should already have been packed
513         *  with bubble instructions by insts_out's initialisation */
514    }
515    if (tid == InvalidThreadID) {
516        assert(insts_out.isBubble());
517    }
518    /** Reserve a slot in the next stage and output data */
519    *predictionOut.inputWire = prediction;
520
521    /* If we generated output, reserve space for the result in the next stage
522     *  and mark the stage as being active this cycle */
523    if (!insts_out.isBubble()) {
524        /* Note activity of following buffer */
525        cpu.activityRecorder->activity();
526        insts_out.threadId = tid;
527        nextStageReserve[tid].reserve();
528    }
529
530    /* If we still have input to process and somewhere to put it,
531     *  mark stage as active */
532    for (ThreadID i = 0; i < cpu.numThreads; i++)
533    {
534        if (getInput(i) && nextStageReserve[i].canReserve()) {
535            cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId);
536            break;
537        }
538    }
539
540    /* Make sure the input (if any left) is pushed */
541    if (!inp.outputWire->isBubble())
542        inputBuffer[inp.outputWire->id.threadId].pushTail();
543}
544
545inline ThreadID
546Fetch2::getScheduledThread()
547{
548    /* Select thread via policy. */
549    std::vector<ThreadID> priority_list;
550
551    switch (cpu.threadPolicy) {
552      case Enums::SingleThreaded:
553        priority_list.push_back(0);
554        break;
555      case Enums::RoundRobin:
556        priority_list = cpu.roundRobinPriority(threadPriority);
557        break;
558      case Enums::Random:
559        priority_list = cpu.randomPriority();
560        break;
561      default:
562        panic("Unknown fetch policy");
563    }
564
565    for (auto tid : priority_list) {
566        if (cpu.getContext(tid)->status() == ThreadContext::Active &&
567            getInput(tid) && !fetchInfo[tid].blocked) {
568            threadPriority = tid;
569            return tid;
570        }
571    }
572
573   return InvalidThreadID;
574}
575
576bool
577Fetch2::isDrained()
578{
579    for (const auto &buffer : inputBuffer) {
580        if (!buffer.empty())
581            return false;
582    }
583
584    return (*inp.outputWire).isBubble() &&
585           (*predictionOut.inputWire).isBubble();
586}
587
588void
589Fetch2::minorTrace() const
590{
591    std::ostringstream data;
592
593    if (fetchInfo[0].blocked)
594        data << 'B';
595    else
596        (*out.inputWire).reportData(data);
597
598    MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n",
599        fetchInfo[0].inputIndex, fetchInfo[0].havePC, fetchInfo[0].predictionSeqNum, data.str());
600    inputBuffer[0].minorTrace();
601}
602
603}
604