fetch_impl.hh revision 2632:1bb2f91485ea
1/*
2 * Copyright (c) 2004-2005 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29// Remove this later; used only for debugging.
30#define OPCODE(X)                       (X >> 26) & 0x3f
31
32#include "arch/isa_traits.hh"
33#include "sim/byteswap.hh"
34#include "cpu/exetrace.hh"
35#include "mem/base_mem.hh"
36#include "mem/mem_interface.hh"
37#include "mem/mem_req.hh"
38#include "cpu/o3/fetch.hh"
39
40#include "sim/root.hh"
41
42template<class Impl>
43SimpleFetch<Impl>::CacheCompletionEvent
44::CacheCompletionEvent(SimpleFetch *_fetch)
45    : Event(&mainEventQueue),
46      fetch(_fetch)
47{
48}
49
50template<class Impl>
51void
52SimpleFetch<Impl>::CacheCompletionEvent::process()
53{
54    fetch->processCacheCompletion();
55}
56
57template<class Impl>
58const char *
59SimpleFetch<Impl>::CacheCompletionEvent::description()
60{
61    return "SimpleFetch cache completion event";
62}
63
64template<class Impl>
65SimpleFetch<Impl>::SimpleFetch(Params &params)
66    : icacheInterface(params.icacheInterface),
67      branchPred(params),
68      decodeToFetchDelay(params.decodeToFetchDelay),
69      renameToFetchDelay(params.renameToFetchDelay),
70      iewToFetchDelay(params.iewToFetchDelay),
71      commitToFetchDelay(params.commitToFetchDelay),
72      fetchWidth(params.fetchWidth)
73{
74    DPRINTF(Fetch, "Fetch: Fetch constructor called\n");
75
76    // Set status to idle.
77    _status = Idle;
78
79    // Create a new memory request.
80    memReq = new MemReq();
81    // Not sure of this parameter.  I think it should be based on the
82    // thread number.
83#if !FULL_SYSTEM
84    memReq->asid = 0;
85#else
86    memReq->asid = 0;
87#endif // FULL_SYSTEM
88    memReq->data = new uint8_t[64];
89
90    // Size of cache block.
91    cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;
92
93    // Create mask to get rid of offset bits.
94    cacheBlkMask = (cacheBlkSize - 1);
95
96    // Get the size of an instruction.
97    instSize = sizeof(MachInst);
98
99    // Create space to store a cache line.
100    cacheData = new uint8_t[cacheBlkSize];
101}
102
103template <class Impl>
104void
105SimpleFetch<Impl>::regStats()
106{
107    icacheStallCycles
108        .name(name() + ".icacheStallCycles")
109        .desc("Number of cycles fetch is stalled on an Icache miss")
110        .prereq(icacheStallCycles);
111
112    fetchedInsts
113        .name(name() + ".fetchedInsts")
114        .desc("Number of instructions fetch has processed")
115        .prereq(fetchedInsts);
116    predictedBranches
117        .name(name() + ".predictedBranches")
118        .desc("Number of branches that fetch has predicted taken")
119        .prereq(predictedBranches);
120    fetchCycles
121        .name(name() + ".fetchCycles")
122        .desc("Number of cycles fetch has run and was not squashing or"
123              " blocked")
124        .prereq(fetchCycles);
125    fetchSquashCycles
126        .name(name() + ".fetchSquashCycles")
127        .desc("Number of cycles fetch has spent squashing")
128        .prereq(fetchSquashCycles);
129    fetchBlockedCycles
130        .name(name() + ".fetchBlockedCycles")
131        .desc("Number of cycles fetch has spent blocked")
132        .prereq(fetchBlockedCycles);
133    fetchedCacheLines
134        .name(name() + ".fetchedCacheLines")
135        .desc("Number of cache lines fetched")
136        .prereq(fetchedCacheLines);
137
138    fetch_nisn_dist
139        .init(/* base value */ 0,
140              /* last value */ fetchWidth,
141              /* bucket size */ 1)
142        .name(name() + ".FETCH:rate_dist")
143        .desc("Number of instructions fetched each cycle (Total)")
144        .flags(Stats::pdf)
145        ;
146
147    branchPred.regStats();
148}
149
150template<class Impl>
151void
152SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr)
153{
154    DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n");
155    cpu = cpu_ptr;
156    // This line will be removed eventually.
157    memReq->xc = cpu->xcBase();
158}
159
160template<class Impl>
161void
162SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
163{
164    DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n");
165    timeBuffer = time_buffer;
166
167    // Create wires to get information from proper places in time buffer.
168    fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
169    fromRename = timeBuffer->getWire(-renameToFetchDelay);
170    fromIEW = timeBuffer->getWire(-iewToFetchDelay);
171    fromCommit = timeBuffer->getWire(-commitToFetchDelay);
172}
173
174template<class Impl>
175void
176SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
177{
178    DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n");
179    fetchQueue = fq_ptr;
180
181    // Create wire to write information to proper place in fetch queue.
182    toDecode = fetchQueue->getWire(0);
183}
184
185template<class Impl>
186void
187SimpleFetch<Impl>::processCacheCompletion()
188{
189    DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n");
190
191    // Only change the status if it's still waiting on the icache access
192    // to return.
193    // Can keep track of how many cache accesses go unused due to
194    // misspeculation here.
195    if (_status == IcacheMissStall)
196        _status = IcacheMissComplete;
197}
198
199template <class Impl>
200bool
201SimpleFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
202{
203    // Do branch prediction check here.
204    // A bit of a misnomer...next_PC is actually the current PC until
205    // this function updates it.
206    bool predict_taken;
207
208    if (!inst->isControl()) {
209        next_PC = next_PC + instSize;
210        inst->setPredTarg(next_PC);
211        return false;
212    }
213
214    predict_taken = branchPred.predict(inst, next_PC);
215
216    if (predict_taken) {
217        ++predictedBranches;
218    }
219
220    return predict_taken;
221}
222
223template <class Impl>
224Fault
225SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC)
226{
227    // Check if the instruction exists within the cache.
228    // If it does, then proceed on to read the instruction and the rest
229    // of the instructions in the cache line until either the end of the
230    // cache line or a predicted taken branch is encountered.
231
232#if FULL_SYSTEM
233    // Flag to say whether or not address is physical addr.
234    unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
235#else
236    unsigned flags = 0;
237#endif // FULL_SYSTEM
238
239    Fault fault = NoFault;
240
241    // Align the fetch PC so it's at the start of a cache block.
242    fetch_PC = icacheBlockAlignPC(fetch_PC);
243
244    // Setup the memReq to do a read of the first isntruction's address.
245    // Set the appropriate read size and flags as well.
246    memReq->cmd = Read;
247    memReq->reset(fetch_PC, cacheBlkSize, flags);
248
249    // Translate the instruction request.
250    // Should this function be
251    // in the CPU class ?  Probably...ITB/DTB should exist within the
252    // CPU.
253
254    fault = cpu->translateInstReq(memReq);
255
256    // In the case of faults, the fetch stage may need to stall and wait
257    // on what caused the fetch (ITB or Icache miss).
258
259    // If translation was successful, attempt to read the first
260    // instruction.
261    if (fault == NoFault) {
262        DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
263        fault = cpu->mem->read(memReq, cacheData);
264        // This read may change when the mem interface changes.
265
266        fetchedCacheLines++;
267    }
268
269    // Now do the timing access to see whether or not the instruction
270    // exists within the cache.
271    if (icacheInterface && fault == NoFault) {
272        DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");
273        memReq->completionEvent = NULL;
274
275        memReq->time = curTick;
276
277        MemAccessResult result = icacheInterface->access(memReq);
278
279        // If the cache missed (in this model functional and timing
280        // memories are different), then schedule an event to wake
281        // up this stage once the cache miss completes.
282        if (result != MA_HIT && icacheInterface->doEvents()) {
283            memReq->completionEvent = new CacheCompletionEvent(this);
284
285            // How does current model work as far as individual
286            // stages scheduling/unscheduling?
287            // Perhaps have only the main CPU scheduled/unscheduled,
288            // and have it choose what stages to run appropriately.
289
290            DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");
291            _status = IcacheMissStall;
292        }
293    }
294
295    return fault;
296}
297
298template <class Impl>
299inline void
300SimpleFetch<Impl>::doSquash(const Addr &new_PC)
301{
302    DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);
303
304    cpu->setNextPC(new_PC + instSize);
305    cpu->setPC(new_PC);
306
307    // Clear the icache miss if it's outstanding.
308    if (_status == IcacheMissStall && icacheInterface) {
309        DPRINTF(Fetch, "Fetch: Squashing outstanding Icache miss.\n");
310        // @todo: Use an actual thread number here.
311        icacheInterface->squash(0);
312    }
313
314    _status = Squashing;
315
316    ++fetchSquashCycles;
317}
318
319template<class Impl>
320void
321SimpleFetch<Impl>::squashFromDecode(const Addr &new_PC,
322                                    const InstSeqNum &seq_num)
323{
324    DPRINTF(Fetch, "Fetch: Squashing from decode.\n");
325
326    doSquash(new_PC);
327
328    // Tell the CPU to remove any instructions that are in flight between
329    // fetch and decode.
330    cpu->removeInstsUntil(seq_num);
331}
332
333template <class Impl>
334void
335SimpleFetch<Impl>::squash(const Addr &new_PC)
336{
337    DPRINTF(Fetch, "Fetch: Squash from commit.\n");
338
339    doSquash(new_PC);
340
341    // Tell the CPU to remove any instructions that are not in the ROB.
342    cpu->removeInstsNotInROB();
343}
344
345template<class Impl>
346void
347SimpleFetch<Impl>::tick()
348{
349    // Check squash signals from commit.
350    if (fromCommit->commitInfo.squash) {
351        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
352                "from commit.\n");
353
354        // In any case, squash.
355        squash(fromCommit->commitInfo.nextPC);
356
357        // Also check if there's a mispredict that happened.
358        if (fromCommit->commitInfo.branchMispredict) {
359            branchPred.squash(fromCommit->commitInfo.doneSeqNum,
360                              fromCommit->commitInfo.nextPC,
361                              fromCommit->commitInfo.branchTaken);
362        } else {
363            branchPred.squash(fromCommit->commitInfo.doneSeqNum);
364        }
365
366        return;
367    } else if (fromCommit->commitInfo.doneSeqNum) {
368        // Update the branch predictor if it wasn't a squashed instruction
369        // that was braodcasted.
370        branchPred.update(fromCommit->commitInfo.doneSeqNum);
371    }
372
373    // Check ROB squash signals from commit.
374    if (fromCommit->commitInfo.robSquashing) {
375        DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
376
377        // Continue to squash.
378        _status = Squashing;
379
380        ++fetchSquashCycles;
381        return;
382    }
383
384    // Check squash signals from decode.
385    if (fromDecode->decodeInfo.squash) {
386        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
387                "from decode.\n");
388
389        // Update the branch predictor.
390        if (fromDecode->decodeInfo.branchMispredict) {
391            branchPred.squash(fromDecode->decodeInfo.doneSeqNum,
392                              fromDecode->decodeInfo.nextPC,
393                              fromDecode->decodeInfo.branchTaken);
394        } else {
395            branchPred.squash(fromDecode->decodeInfo.doneSeqNum);
396        }
397
398        if (_status != Squashing) {
399            // Squash unless we're already squashing?
400            squashFromDecode(fromDecode->decodeInfo.nextPC,
401                             fromDecode->decodeInfo.doneSeqNum);
402            return;
403        }
404    }
405
406    // Check if any of the stall signals are high.
407    if (fromDecode->decodeInfo.stall ||
408        fromRename->renameInfo.stall ||
409        fromIEW->iewInfo.stall ||
410        fromCommit->commitInfo.stall)
411    {
412        // Block stage, regardless of current status.
413
414        DPRINTF(Fetch, "Fetch: Stalling stage.\n");
415        DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
416                "Commit: %i\n",
417                fromDecode->decodeInfo.stall,
418                fromRename->renameInfo.stall,
419                fromIEW->iewInfo.stall,
420                fromCommit->commitInfo.stall);
421
422        _status = Blocked;
423
424        ++fetchBlockedCycles;
425        return;
426    } else if (_status == Blocked) {
427        // Unblock stage if status is currently blocked and none of the
428        // stall signals are being held high.
429        _status = Running;
430
431        ++fetchBlockedCycles;
432        return;
433    }
434
435    // If fetch has reached this point, then there are no squash signals
436    // still being held high.  Check if fetch is in the squashing state;
437    // if so, fetch can switch to running.
438    // Similarly, there are no blocked signals still being held high.
439    // Check if fetch is in the blocked state; if so, fetch can switch to
440    // running.
441    if (_status == Squashing) {
442        DPRINTF(Fetch, "Fetch: Done squashing, switching to running.\n");
443
444        // Switch status to running
445        _status = Running;
446
447        ++fetchCycles;
448
449        fetch();
450    } else if (_status != IcacheMissStall) {
451        DPRINTF(Fetch, "Fetch: Running stage.\n");
452
453        ++fetchCycles;
454
455        fetch();
456    }
457}
458
459template<class Impl>
460void
461SimpleFetch<Impl>::fetch()
462{
463    //////////////////////////////////////////
464    // Start actual fetch
465    //////////////////////////////////////////
466
467    // The current PC.
468    Addr fetch_PC = cpu->readPC();
469
470    // Fault code for memory access.
471    Fault fault = NoFault;
472
473    // If returning from the delay of a cache miss, then update the status
474    // to running, otherwise do the cache access.  Possibly move this up
475    // to tick() function.
476    if (_status == IcacheMissComplete) {
477        DPRINTF(Fetch, "Fetch: Icache miss is complete.\n");
478
479        // Reset the completion event to NULL.
480        memReq->completionEvent = NULL;
481
482        _status = Running;
483    } else {
484        DPRINTF(Fetch, "Fetch: Attempting to translate and read "
485                       "instruction, starting at PC %08p.\n",
486                fetch_PC);
487
488        fault = fetchCacheLine(fetch_PC);
489    }
490
491    // If we had a stall due to an icache miss, then return.  It'd
492    // be nicer if this were handled through the kind of fault that
493    // is returned by the function.
494    if (_status == IcacheMissStall) {
495        return;
496    }
497
498    // As far as timing goes, the CPU will need to send an event through
499    // the MemReq in order to be woken up once the memory access completes.
500    // Probably have a status on a per thread basis so each thread can
501    // block independently and be woken up independently.
502
503    Addr next_PC = fetch_PC;
504    InstSeqNum inst_seq;
505    MachInst inst;
506    unsigned offset = fetch_PC & cacheBlkMask;
507    unsigned fetched;
508
509    if (fault == NoFault) {
510        // If the read of the first instruction was successful, then grab the
511        // instructions from the rest of the cache line and put them into the
512        // queue heading to decode.
513
514        DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");
515
516        //////////////////////////
517        // Fetch first instruction
518        //////////////////////////
519
520        // Need to keep track of whether or not a predicted branch
521        // ended this fetch block.
522        bool predicted_branch = false;
523
524        for (fetched = 0;
525             offset < cacheBlkSize &&
526                 fetched < fetchWidth &&
527                 !predicted_branch;
528             ++fetched)
529        {
530
531            // Get a sequence number.
532            inst_seq = cpu->getAndIncrementInstSeq();
533
534            // Make sure this is a valid index.
535            assert(offset <= cacheBlkSize - instSize);
536
537            // Get the instruction from the array of the cache line.
538            inst = gtoh(*reinterpret_cast<MachInst *>
539                        (&cacheData[offset]));
540
541            // Create a new DynInst from the instruction fetched.
542            DynInstPtr instruction = new DynInst(inst, fetch_PC, next_PC,
543                                                 inst_seq, cpu);
544
545            DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
546                    inst_seq, instruction->readPC());
547
548            DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
549                    OPCODE(inst));
550
551            instruction->traceData =
552                Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
553                                     instruction->staticInst,
554                                     instruction->readPC(), 0);
555
556            predicted_branch = lookupAndUpdateNextPC(instruction, next_PC);
557
558            // Add instruction to the CPU's list of instructions.
559            cpu->addInst(instruction);
560
561            // Write the instruction to the first slot in the queue
562            // that heads to decode.
563            toDecode->insts[fetched] = instruction;
564
565            toDecode->size++;
566
567            // Increment stat of fetched instructions.
568            ++fetchedInsts;
569
570            // Move to the next instruction, unless we have a branch.
571            fetch_PC = next_PC;
572
573            offset+= instSize;
574        }
575
576        fetch_nisn_dist.sample(fetched);
577    }
578
579    // Now that fetching is completed, update the PC to signify what the next
580    // cycle will be.  Might want to move this to the beginning of this
581    // function so that the PC updates at the beginning of everything.
582    // Or might want to leave setting the PC to the main CPU, with fetch
583    // only changing the nextPC (will require correct determination of
584    // next PC).
585    if (fault == NoFault) {
586        DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC);
587        cpu->setPC(next_PC);
588        cpu->setNextPC(next_PC + instSize);
589    } else {
590        // If the issue was an icache miss, then we can just return and
591        // wait until it is handled.
592        if (_status == IcacheMissStall) {
593            return;
594        }
595
596        // Handle the fault.
597        // This stage will not be able to continue until all the ROB
598        // slots are empty, at which point the fault can be handled.
599        // The only other way it can wake up is if a squash comes along
600        // and changes the PC.  Not sure how to handle that case...perhaps
601        // have it handled by the upper level CPU class which peeks into the
602        // time buffer and sees if a squash comes along, in which case it
603        // changes the status.
604
605        DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n");
606
607        _status = Blocked;
608#if FULL_SYSTEM
609//        cpu->trap(fault);
610        // Send a signal to the ROB indicating that there's a trap from the
611        // fetch stage that needs to be handled.  Need to indicate that
612        // there's a fault, and the fault type.
613#else // !FULL_SYSTEM
614        fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC());
615#endif // FULL_SYSTEM
616    }
617}
618