fetch_impl.hh revision 2665:a124942bacb8
1/*
2 * Copyright (c) 2004-2005 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Kevin Lim
29 */
30
31// Remove this later; used only for debugging.
32#define OPCODE(X)                       (X >> 26) & 0x3f
33
34#include "arch/isa_traits.hh"
35#include "sim/byteswap.hh"
36#include "cpu/exetrace.hh"
37#include "mem/base_mem.hh"
38#include "mem/mem_interface.hh"
39#include "mem/mem_req.hh"
40#include "cpu/o3/fetch.hh"
41
42#include "sim/root.hh"
43
44template<class Impl>
45SimpleFetch<Impl>::CacheCompletionEvent
46::CacheCompletionEvent(SimpleFetch *_fetch)
47    : Event(&mainEventQueue),
48      fetch(_fetch)
49{
50}
51
52template<class Impl>
53void
54SimpleFetch<Impl>::CacheCompletionEvent::process()
55{
56    fetch->processCacheCompletion();
57}
58
59template<class Impl>
60const char *
61SimpleFetch<Impl>::CacheCompletionEvent::description()
62{
63    return "SimpleFetch cache completion event";
64}
65
66template<class Impl>
67SimpleFetch<Impl>::SimpleFetch(Params &params)
68    : icacheInterface(params.icacheInterface),
69      branchPred(params),
70      decodeToFetchDelay(params.decodeToFetchDelay),
71      renameToFetchDelay(params.renameToFetchDelay),
72      iewToFetchDelay(params.iewToFetchDelay),
73      commitToFetchDelay(params.commitToFetchDelay),
74      fetchWidth(params.fetchWidth)
75{
76    DPRINTF(Fetch, "Fetch: Fetch constructor called\n");
77
78    // Set status to idle.
79    _status = Idle;
80
81    // Create a new memory request.
82    memReq = new MemReq();
83    // Not sure of this parameter.  I think it should be based on the
84    // thread number.
85#if !FULL_SYSTEM
86    memReq->asid = 0;
87#else
88    memReq->asid = 0;
89#endif // FULL_SYSTEM
90    memReq->data = new uint8_t[64];
91
92    // Size of cache block.
93    cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
94
95    // Create mask to get rid of offset bits.
96    cacheBlkMask = (cacheBlkSize - 1);
97
98    // Get the size of an instruction.
99    instSize = sizeof(MachInst);
100
101    // Create space to store a cache line.
102    cacheData = new uint8_t[cacheBlkSize];
103}
104
105template <class Impl>
106void
107SimpleFetch<Impl>::regStats()
108{
109    icacheStallCycles
110        .name(name() + ".icacheStallCycles")
111        .desc("Number of cycles fetch is stalled on an Icache miss")
112        .prereq(icacheStallCycles);
113
114    fetchedInsts
115        .name(name() + ".fetchedInsts")
116        .desc("Number of instructions fetch has processed")
117        .prereq(fetchedInsts);
118    predictedBranches
119        .name(name() + ".predictedBranches")
120        .desc("Number of branches that fetch has predicted taken")
121        .prereq(predictedBranches);
122    fetchCycles
123        .name(name() + ".fetchCycles")
124        .desc("Number of cycles fetch has run and was not squashing or"
125              " blocked")
126        .prereq(fetchCycles);
127    fetchSquashCycles
128        .name(name() + ".fetchSquashCycles")
129        .desc("Number of cycles fetch has spent squashing")
130        .prereq(fetchSquashCycles);
131    fetchBlockedCycles
132        .name(name() + ".fetchBlockedCycles")
133        .desc("Number of cycles fetch has spent blocked")
134        .prereq(fetchBlockedCycles);
135    fetchedCacheLines
136        .name(name() + ".fetchedCacheLines")
137        .desc("Number of cache lines fetched")
138        .prereq(fetchedCacheLines);
139
140    fetch_nisn_dist
141        .init(/* base value */ 0,
142              /* last value */ fetchWidth,
143              /* bucket size */ 1)
144        .name(name() + ".FETCH:rate_dist")
145        .desc("Number of instructions fetched each cycle (Total)")
146        .flags(Stats::pdf)
147        ;
148
149    branchPred.regStats();
150}
151
152template<class Impl>
153void
154SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr)
155{
156    DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n");
157    cpu = cpu_ptr;
158    // This line will be removed eventually.
159    memReq->xc = cpu->xcBase();
160}
161
162template<class Impl>
163void
164SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
165{
166    DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n");
167    timeBuffer = time_buffer;
168
169    // Create wires to get information from proper places in time buffer.
170    fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
171    fromRename = timeBuffer->getWire(-renameToFetchDelay);
172    fromIEW = timeBuffer->getWire(-iewToFetchDelay);
173    fromCommit = timeBuffer->getWire(-commitToFetchDelay);
174}
175
176template<class Impl>
177void
178SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
179{
180    DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n");
181    fetchQueue = fq_ptr;
182
183    // Create wire to write information to proper place in fetch queue.
184    toDecode = fetchQueue->getWire(0);
185}
186
187template<class Impl>
188void
189SimpleFetch<Impl>::processCacheCompletion()
190{
191    DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n");
192
193    // Only change the status if it's still waiting on the icache access
194    // to return.
195    // Can keep track of how many cache accesses go unused due to
196    // misspeculation here.
197    if (_status == IcacheMissStall)
198        _status = IcacheMissComplete;
199}
200
201template <class Impl>
202bool
203SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
204{
205    // Do branch prediction check here.
206    // A bit of a misnomer...next_PC is actually the current PC until
207    // this function updates it.
208    bool predict_taken;
209
210    if (!inst->isControl()) {
211        next_PC = next_PC + instSize;
212        inst->setPredTarg(next_PC);
213        return false;
214    }
215
216    predict_taken = branchPred.predict(inst, next_PC);
217
218    if (predict_taken) {
219        ++predictedBranches;
220    }
221
222    return predict_taken;
223}
224
225template <class Impl>
226Fault
227SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC)
228{
229    // Check if the instruction exists within the cache.
230    // If it does, then proceed on to read the instruction and the rest
231    // of the instructions in the cache line until either the end of the
232    // cache line or a predicted taken branch is encountered.
233
234#if FULL_SYSTEM
235    // Flag to say whether or not address is physical addr.
236    unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
237#else
238    unsigned flags = 0;
239#endif // FULL_SYSTEM
240
241    Fault fault = NoFault;
242
243    // Align the fetch PC so it's at the start of a cache block.
244    fetch_PC = icacheBlockAlignPC(fetch_PC);
245
246    // Setup the memReq to do a read of the first isntruction's address.
247    // Set the appropriate read size and flags as well.
248    memReq->cmd = Read;
249    memReq->reset(fetch_PC, cacheBlkSize, flags);
250
251    // Translate the instruction request.
252    // Should this function be
253    // in the CPU class ?  Probably...ITB/DTB should exist within the
254    // CPU.
255
256    fault = cpu->translateInstReq(memReq);
257
258    // In the case of faults, the fetch stage may need to stall and wait
259    // on what caused the fetch (ITB or Icache miss).
260
261    // If translation was successful, attempt to read the first
262    // instruction.
263    if (fault == NoFault) {
264        DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
265        fault = cpu->mem->read(memReq, cacheData);
266        // This read may change when the mem interface changes.
267
268        fetchedCacheLines++;
269    }
270
271    // Now do the timing access to see whether or not the instruction
272    // exists within the cache.
273    if (icacheInterface && fault == NoFault) {
274        DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");
275        memReq->completionEvent = NULL;
276
277        memReq->time = curTick;
278
279        MemAccessResult result = icacheInterface->access(memReq);
280
281        // If the cache missed (in this model functional and timing
282        // memories are different), then schedule an event to wake
283        // up this stage once the cache miss completes.
284        if (result != MA_HIT && icacheInterface->doEvents()) {
285            memReq->completionEvent = new CacheCompletionEvent(this);
286
287            // How does current model work as far as individual
288            // stages scheduling/unscheduling?
289            // Perhaps have only the main CPU scheduled/unscheduled,
290            // and have it choose what stages to run appropriately.
291
292            DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");
293            _status = IcacheMissStall;
294        }
295    }
296
297    return fault;
298}
299
300template <class Impl>
301inline void
302SimpleFetch<Impl>::doSquash(const Addr &new_PC)
303{
304    DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);
305
306    cpu->setNextPC(new_PC + instSize);
307    cpu->setPC(new_PC);
308
309    // Clear the icache miss if it's outstanding.
310    if (_status == IcacheMissStall && icacheInterface) {
311        DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n");
312        // @todo: Use an actual thread number here.
313        icacheInterface->squash(0);
314    }
315
316    _status = Squashing;
317
318    ++fetchSquashCycles;
319}
320
321template<class Impl>
322void
323SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC,
324                                    const InstSeqNum &seq_num)
325{
326    DPRINTF(Fetch, "Fetch: Squashing from decode.\n");
327
328    doSquash(new_PC);
329
330    // Tell the CPU to remove any instructions that are in flight between
331    // fetch and decode.
332    cpu->removeInstsUntil(seq_num);
333}
334
335template <class Impl>
336void
337SimpleFetch<Impl>::squash(const Addr &new_PC)
338{
339    DPRINTF(Fetch, "Fetch: Squash from commit.\n");
340
341    doSquash(new_PC);
342
343    // Tell the CPU to remove any instructions that are not in the ROB.
344    cpu->removeInstsNotInROB();
345}
346
347template<class Impl>
348void
349SimpleFetch<Impl>::tick()
350{
351    // Check squash signals from commit.
352    if (fromCommit->commitInfo.squash) {
353        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
354                "from commit.\n");
355
356        // In any case, squash.
357        squash(fromCommit->commitInfo.nextPC);
358
359        // Also check if there's a mispredict that happened.
360        if (fromCommit->commitInfo.branchMispredict) {
361            branchPred.squash(fromCommit->commitInfo.doneSeqNum,
362                              fromCommit->commitInfo.nextPC,
363                              fromCommit->commitInfo.branchTaken);
364        } else {
365            branchPred.squash(fromCommit->commitInfo.doneSeqNum);
366        }
367
368        return;
369    } else if (fromCommit->commitInfo.doneSeqNum) {
370        // Update the branch predictor if it wasn't a squashed instruction
371        // that was braodcasted.
372        branchPred.update(fromCommit->commitInfo.doneSeqNum);
373    }
374
375    // Check ROB squash signals from commit.
376    if (fromCommit->commitInfo.robSquashing) {
377        DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
378
379        // Continue to squash.
380        _status = Squashing;
381
382        ++fetchSquashCycles;
383        return;
384    }
385
386    // Check squash signals from decode.
387    if (fromDecode->decodeInfo.squash) {
388        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
389                "from decode.\n");
390
391        // Update the branch predictor.
392        if (fromDecode->decodeInfo.branchMispredict) {
393            branchPred.squash(fromDecode->decodeInfo.doneSeqNum,
394                              fromDecode->decodeInfo.nextPC,
395                              fromDecode->decodeInfo.branchTaken);
396        } else {
397            branchPred.squash(fromDecode->decodeInfo.doneSeqNum);
398        }
399
400        if (_status != Squashing) {
401            // Squash unless we're already squashing?
402            squashFromDecode(fromDecode->decodeInfo.nextPC,
403                             fromDecode->decodeInfo.doneSeqNum);
404            return;
405        }
406    }
407
408    // Check if any of the stall signals are high.
409    if (fromDecode->decodeInfo.stall ||
410        fromRename->renameInfo.stall ||
411        fromIEW->iewInfo.stall ||
412        fromCommit->commitInfo.stall)
413    {
414        // Block stage, regardless of current status.
415
416        DPRINTF(Fetch, "Fetch: Stalling stage.\n");
417        DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
418                "Commit: %i\n",
419                fromDecode->decodeInfo.stall,
420                fromRename->renameInfo.stall,
421                fromIEW->iewInfo.stall,
422                fromCommit->commitInfo.stall);
423
424        _status = Blocked;
425
426        ++fetchBlockedCycles;
427        return;
428    } else if (_status == Blocked) {
429        // Unblock stage if status is currently blocked and none of the
430        // stall signals are being held high.
431        _status = Running;
432
433        ++fetchBlockedCycles;
434        return;
435    }
436
437    // If fetch has reached this point, then there are no squash signals
438    // still being held high.  Check if fetch is in the squashing state;
439    // if so, fetch can switch to running.
440    // Similarly, there are no blocked signals still being held high.
441    // Check if fetch is in the blocked state; if so, fetch can switch to
442    // running.
443    if (_status == Squashing) {
444        DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n");
445
446        // Switch status to running
447        _status = Running;
448
449        ++fetchCycles;
450
451        fetch();
452    } else if (_status != IcacheMissStall) {
453        DPRINTF(Fetch, "Fetch: Running stage.\n");
454
455        ++fetchCycles;
456
457        fetch();
458    }
459}
460
461template<class Impl>
462void
463SimpleFetch<Impl>::fetch()
464{
465    //////////////////////////////////////////
466    // Start actual fetch
467    //////////////////////////////////////////
468
469    // The current PC.
470    Addr fetch_PC = cpu->readPC();
471
472    // Fault code for memory access.
473    Fault fault = NoFault;
474
475    // If returning from the delay of a cache miss, then update the status
476    // to running, otherwise do the cache access.  Possibly move this up
477    // to tick() function.
478    if (_status == IcacheMissComplete) {
479        DPRINTF(Fetch, "Fetch: Icache miss is complete.\n");
480
481        // Reset the completion event to NULL.
482        memReq->completionEvent = NULL;
483
484        _status = Running;
485    } else {
486        DPRINTF(Fetch, "Fetch: Attempting to translate and read "
487                       "instruction, starting at PC %08p.\n",
488                fetch_PC);
489
490        fault = fetchCacheLine(fetch_PC);
491    }
492
493    // If we had a stall due to an icache miss, then return.  It'd
494    // be nicer if this were handled through the kind of fault that
495    // is returned by the function.
496    if (_status == IcacheMissStall) {
497        return;
498    }
499
500    // As far as timing goes, the CPU will need to send an event through
501    // the MemReq in order to be woken up once the memory access completes.
502    // Probably have a status on a per thread basis so each thread can
503    // block independently and be woken up independently.
504
505    Addr next_PC = fetch_PC;
506    InstSeqNum inst_seq;
507    MachInst inst;
508    unsigned offset = fetch_PC & cacheBlkMask;
509    unsigned fetched;
510
511    if (fault == NoFault) {
512        // If the read of the first instruction was successful, then grab the
513        // instructions from the rest of the cache line and put them into the
514        // queue heading to decode.
515
516        DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");
517
518        //////////////////////////
519        // Fetch first instruction
520        //////////////////////////
521
522        // Need to keep track of whether or not a predicted branch
523        // ended this fetch block.
524        bool predicted_branch = false;
525
526        for (fetched = 0;
527             offset < cacheBlkSize &&
528                 fetched < fetchWidth &&
529                 !predicted_branch;
530             ++fetched)
531        {
532
533            // Get a sequence number.
534            inst_seq = cpu->getAndIncrementInstSeq();
535
536            // Make sure this is a valid index.
537            assert(offset <= cacheBlkSize - instSize);
538
539            // Get the instruction from the array of the cache line.
540            inst = gtoh(*reinterpret_cast<MachInst *>
541                        (&cacheData[offset]));
542
543            // Create a new DynInst from the instruction fetched.
544            DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC,
545                                                 inst_seq, cpu);
546
547            DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
548                    inst_seq, instruction->readPC());
549
550            DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
551                    OPCODE(inst));
552
553            instruction->traceData =
554                Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
555                                     instruction->staticInst,
556                                     instruction->readPC(), 0);
557
558            predicted_branch = lookupAndUpdateNextPC(instruction, next_PC);
559
560            // Add instruction to the CPU's list of instructions.
561            cpu->addInst(instruction);
562
563            // Write the instruction to the first slot in the queue
564            // that heads to decode.
565            toDecode->insts[fetched] = instruction;
566
567            toDecode->size++;
568
569            // Increment stat of fetched instructions.
570            ++fetchedInsts;
571
572            // Move to the next instruction, unless we have a branch.
573            fetch_PC = next_PC;
574
575            offset+= instSize;
576        }
577
578        fetch_nisn_dist.sample(fetched);
579    }
580
581    // Now that fetching is completed, update the PC to signify what the next
582    // cycle will be.  Might want to move this to the beginning of this
583    // function so that the PC updates at the beginning of everything.
584    // Or might want to leave setting the PC to the main CPU, with fetch
585    // only changing the nextPC (will require correct determination of
586    // next PC).
587    if (fault == NoFault) {
588        DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC);
589        cpu->setPC(next_PC);
590        cpu->setNextPC(next_PC + instSize);
591    } else {
592        // If the issue was an icache miss, then we can just return and
593        // wait until it is handled.
594        if (_status == IcacheMissStall) {
595            return;
596        }
597
598        // Handle the fault.
599        // This stage will not be able to continue until all the ROB
600        // slots are empty, at which point the fault can be handled.
601        // The only other way it can wake up is if a squash comes along
602        // and changes the PC.  Not sure how to handle that case...perhaps
603        // have it handled by the upper level CPU class which peeks into the
604        // time buffer and sees if a squash comes along, in which case it
605        // changes the status.
606
607        DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n");
608
609        _status = Blocked;
610#if FULL_SYSTEM
611//        cpu->trap(fault);
612        // Send a signal to the ROB indicating that there's a trap from the
613        // fetch stage that needs to be handled.  Need to indicate that
614        // there's a fault, and the fault type.
615#else // !FULL_SYSTEM
616        fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC());
617#endif // FULL_SYSTEM
618    }
619}
620