inst_queue_impl.hh revision 13453:4a7a060ea26e
12139SN/A/*
22139SN/A * Copyright (c) 2011-2014 ARM Limited
32139SN/A * Copyright (c) 2013 Advanced Micro Devices, Inc.
42139SN/A * All rights reserved.
52139SN/A *
62139SN/A * The license below extends only to copyright in the software and shall
72139SN/A * not be construed as granting a license to any other intellectual
82139SN/A * property including but not limited to intellectual property relating
92139SN/A * to a hardware implementation of the functionality of the software
102139SN/A * licensed hereunder.  You may use the software subject to the license
112139SN/A * terms below provided that you ensure that this notice is replicated
122139SN/A * unmodified and in its entirety in all distributions of the software,
132139SN/A * modified or unmodified, in source code or in binary form.
142139SN/A *
152139SN/A * Copyright (c) 2004-2006 The Regents of The University of Michigan
162139SN/A * All rights reserved.
172139SN/A *
182139SN/A * Redistribution and use in source and binary forms, with or without
192139SN/A * modification, are permitted provided that the following conditions are
202139SN/A * met: redistributions of source code must retain the above copyright
212139SN/A * notice, this list of conditions and the following disclaimer;
222139SN/A * redistributions in binary form must reproduce the above copyright
232139SN/A * notice, this list of conditions and the following disclaimer in the
242139SN/A * documentation and/or other materials provided with the distribution;
252139SN/A * neither the name of the copyright holders nor the names of its
262139SN/A * contributors may be used to endorse or promote products derived from
272139SN/A * this software without specific prior written permission.
282665Ssaidi@eecs.umich.edu *
292665Ssaidi@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
302139SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
312139SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
322139SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
332139SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
342139SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
352139SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
362152SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
372152SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
382152SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
392152SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
402139SN/A *
412139SN/A * Authors: Kevin Lim
422139SN/A *          Korey Sewell
432139SN/A */
442139SN/A
452152SN/A#ifndef __CPU_O3_INST_QUEUE_IMPL_HH__
462152SN/A#define __CPU_O3_INST_QUEUE_IMPL_HH__
472139SN/A
482139SN/A#include <limits>
492139SN/A#include <vector>
502439SN/A
512439SN/A#include "base/logging.hh"
522439SN/A#include "cpu/o3/fu_pool.hh"
532139SN/A#include "cpu/o3/inst_queue.hh"
542439SN/A#include "debug/IQ.hh"
552460SN/A#include "enums/OpClass.hh"
562439SN/A#include "params/DerivO3CPU.hh"
572171SN/A#include "sim/core.hh"
582439SN/A
592439SN/A// clang complains about std::set being overloaded with Packet::set if
602170SN/A// we open up the entire namespace std
612139SN/Ausing std::list;
622139SN/A
632139SN/Atemplate <class Impl>
642139SN/AInstructionQueue<Impl>::FUCompletion::FUCompletion(const DynInstPtr &_inst,
652139SN/A    int fu_idx, InstructionQueue<Impl> *iq_ptr)
662139SN/A    : Event(Stat_Event_Pri, AutoDelete),
672139SN/A      inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
682139SN/A{
692139SN/A}
702139SN/A
712139SN/Atemplate <class Impl>
722139SN/Avoid
732139SN/AInstructionQueue<Impl>::FUCompletion::process()
742139SN/A{
752139SN/A    iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1);
762139SN/A    inst = NULL;
772139SN/A}
782139SN/A
792139SN/A
802139SN/Atemplate <class Impl>
812139SN/Aconst char *
822139SN/AInstructionQueue<Impl>::FUCompletion::description() const
832139SN/A{
842139SN/A    return "Functional unit completion";
852178SN/A}
862139SN/A
872139SN/Atemplate <class Impl>
882139SN/AInstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
892139SN/A                                         DerivO3CPUParams *params)
902139SN/A    : cpu(cpu_ptr),
912139SN/A      iewStage(iew_ptr),
922139SN/A      fuPool(params->fuPool),
932152SN/A      numEntries(params->numIQEntries),
942152SN/A      totalWidth(params->issueWidth),
952152SN/A      commitToIEWDelay(params->commitToIEWDelay)
962152SN/A{
972152SN/A    assert(fuPool);
982152SN/A
992152SN/A    numThreads = params->numThreads;
1002152SN/A
1012152SN/A    // Set the number of total physical registers
1022152SN/A    // As the vector registers have two addressing modes, they are added twice
1032152SN/A    numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs +
1042152SN/A                    params->numPhysVecRegs +
1052504SN/A                    params->numPhysVecRegs * TheISA::NumVecElemPerVecReg +
1062504SN/A                    params->numPhysCCRegs;
1072504SN/A
1082504SN/A    //Create an entry for each physical register within the
1092152SN/A    //dependency graph.
1102504SN/A    dependGraph.resize(numPhysRegs);
1112152SN/A
1122152SN/A    // Resize the register scoreboard.
1132152SN/A    regScoreboard.resize(numPhysRegs);
1142152SN/A
1152152SN/A    //Initialize Mem Dependence Units
1162152SN/A    for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) {
1172152SN/A        memDepUnit[tid].init(params, tid);
1182152SN/A        memDepUnit[tid].setIQ(this);
1192632Sstever@eecs.umich.edu    }
1202155SN/A
1212155SN/A    resetState();
1222155SN/A
1232155SN/A    std::string policy = params->smtIQPolicy;
1242155SN/A
1252155SN/A    //Convert string to lowercase
1262155SN/A    std::transform(policy.begin(), policy.end(), policy.begin(),
1272155SN/A                   (int(*)(int)) tolower);
1282155SN/A
1292155SN/A    //Figure out resource sharing policy
1302152SN/A    if (policy == "dynamic") {
1312152SN/A        iqPolicy = Dynamic;
1322152SN/A
1332152SN/A        //Set Max Entries to Total ROB Capacity
1342155SN/A        for (ThreadID tid = 0; tid < numThreads; tid++) {
1352152SN/A            maxEntries[tid] = numEntries;
1362152SN/A        }
1372637Sstever@eecs.umich.edu
1382152SN/A    } else if (policy == "partitioned") {
1392152SN/A        iqPolicy = Partitioned;
1402152SN/A
1412152SN/A        //@todo:make work if part_amt doesnt divide evenly.
1422152SN/A        int part_amt = numEntries / numThreads;
1432152SN/A
1442152SN/A        //Divide ROB up evenly
1452152SN/A        for (ThreadID tid = 0; tid < numThreads; tid++) {
1462152SN/A            maxEntries[tid] = part_amt;
1472152SN/A        }
1482152SN/A
1492667Sstever@eecs.umich.edu        DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
1502152SN/A                "%i entries per thread.\n",part_amt);
1512152SN/A    } else if (policy == "threshold") {
152        iqPolicy = Threshold;
153
154        double threshold =  (double)params->smtIQThreshold / 100;
155
156        int thresholdIQ = (int)((double)threshold * numEntries);
157
158        //Divide up by threshold amount
159        for (ThreadID tid = 0; tid < numThreads; tid++) {
160            maxEntries[tid] = thresholdIQ;
161        }
162
163        DPRINTF(IQ, "IQ sharing policy set to Threshold:"
164                "%i entries per thread.\n",thresholdIQ);
165   } else {
166       panic("Invalid IQ sharing policy. Options are: Dynamic, "
167              "Partitioned, Threshold");
168   }
169    for (ThreadID tid = numThreads; tid < Impl::MaxThreads; tid++) {
170        maxEntries[tid] = 0;
171    }
172}
173
174template <class Impl>
175InstructionQueue<Impl>::~InstructionQueue()
176{
177    dependGraph.reset();
178#ifdef DEBUG
179    cprintf("Nodes traversed: %i, removed: %i\n",
180            dependGraph.nodesTraversed, dependGraph.nodesRemoved);
181#endif
182}
183
184template <class Impl>
185std::string
186InstructionQueue<Impl>::name() const
187{
188    return cpu->name() + ".iq";
189}
190
191template <class Impl>
192void
193InstructionQueue<Impl>::regStats()
194{
195    using namespace Stats;
196    iqInstsAdded
197        .name(name() + ".iqInstsAdded")
198        .desc("Number of instructions added to the IQ (excludes non-spec)")
199        .prereq(iqInstsAdded);
200
201    iqNonSpecInstsAdded
202        .name(name() + ".iqNonSpecInstsAdded")
203        .desc("Number of non-speculative instructions added to the IQ")
204        .prereq(iqNonSpecInstsAdded);
205
206    iqInstsIssued
207        .name(name() + ".iqInstsIssued")
208        .desc("Number of instructions issued")
209        .prereq(iqInstsIssued);
210
211    iqIntInstsIssued
212        .name(name() + ".iqIntInstsIssued")
213        .desc("Number of integer instructions issued")
214        .prereq(iqIntInstsIssued);
215
216    iqFloatInstsIssued
217        .name(name() + ".iqFloatInstsIssued")
218        .desc("Number of float instructions issued")
219        .prereq(iqFloatInstsIssued);
220
221    iqBranchInstsIssued
222        .name(name() + ".iqBranchInstsIssued")
223        .desc("Number of branch instructions issued")
224        .prereq(iqBranchInstsIssued);
225
226    iqMemInstsIssued
227        .name(name() + ".iqMemInstsIssued")
228        .desc("Number of memory instructions issued")
229        .prereq(iqMemInstsIssued);
230
231    iqMiscInstsIssued
232        .name(name() + ".iqMiscInstsIssued")
233        .desc("Number of miscellaneous instructions issued")
234        .prereq(iqMiscInstsIssued);
235
236    iqSquashedInstsIssued
237        .name(name() + ".iqSquashedInstsIssued")
238        .desc("Number of squashed instructions issued")
239        .prereq(iqSquashedInstsIssued);
240
241    iqSquashedInstsExamined
242        .name(name() + ".iqSquashedInstsExamined")
243        .desc("Number of squashed instructions iterated over during squash;"
244              " mainly for profiling")
245        .prereq(iqSquashedInstsExamined);
246
247    iqSquashedOperandsExamined
248        .name(name() + ".iqSquashedOperandsExamined")
249        .desc("Number of squashed operands that are examined and possibly "
250              "removed from graph")
251        .prereq(iqSquashedOperandsExamined);
252
253    iqSquashedNonSpecRemoved
254        .name(name() + ".iqSquashedNonSpecRemoved")
255        .desc("Number of squashed non-spec instructions that were removed")
256        .prereq(iqSquashedNonSpecRemoved);
257/*
258    queueResDist
259        .init(Num_OpClasses, 0, 99, 2)
260        .name(name() + ".IQ:residence:")
261        .desc("cycles from dispatch to issue")
262        .flags(total | pdf | cdf )
263        ;
264    for (int i = 0; i < Num_OpClasses; ++i) {
265        queueResDist.subname(i, opClassStrings[i]);
266    }
267*/
268    numIssuedDist
269        .init(0,totalWidth,1)
270        .name(name() + ".issued_per_cycle")
271        .desc("Number of insts issued each cycle")
272        .flags(pdf)
273        ;
274/*
275    dist_unissued
276        .init(Num_OpClasses+2)
277        .name(name() + ".unissued_cause")
278        .desc("Reason ready instruction not issued")
279        .flags(pdf | dist)
280        ;
281    for (int i=0; i < (Num_OpClasses + 2); ++i) {
282        dist_unissued.subname(i, unissued_names[i]);
283    }
284*/
285    statIssuedInstType
286        .init(numThreads,Enums::Num_OpClass)
287        .name(name() + ".FU_type")
288        .desc("Type of FU issued")
289        .flags(total | pdf | dist)
290        ;
291    statIssuedInstType.ysubnames(Enums::OpClassStrings);
292
293    //
294    //  How long did instructions for a particular FU type wait prior to issue
295    //
296/*
297    issueDelayDist
298        .init(Num_OpClasses,0,99,2)
299        .name(name() + ".")
300        .desc("cycles from operands ready to issue")
301        .flags(pdf | cdf)
302        ;
303
304    for (int i=0; i<Num_OpClasses; ++i) {
305        std::stringstream subname;
306        subname << opClassStrings[i] << "_delay";
307        issueDelayDist.subname(i, subname.str());
308    }
309*/
310    issueRate
311        .name(name() + ".rate")
312        .desc("Inst issue rate")
313        .flags(total)
314        ;
315    issueRate = iqInstsIssued / cpu->numCycles;
316
317    statFuBusy
318        .init(Num_OpClasses)
319        .name(name() + ".fu_full")
320        .desc("attempts to use FU when none available")
321        .flags(pdf | dist)
322        ;
323    for (int i=0; i < Num_OpClasses; ++i) {
324        statFuBusy.subname(i, Enums::OpClassStrings[i]);
325    }
326
327    fuBusy
328        .init(numThreads)
329        .name(name() + ".fu_busy_cnt")
330        .desc("FU busy when requested")
331        .flags(total)
332        ;
333
334    fuBusyRate
335        .name(name() + ".fu_busy_rate")
336        .desc("FU busy rate (busy events/executed inst)")
337        .flags(total)
338        ;
339    fuBusyRate = fuBusy / iqInstsIssued;
340
341    for (ThreadID tid = 0; tid < numThreads; tid++) {
342        // Tell mem dependence unit to reg stats as well.
343        memDepUnit[tid].regStats();
344    }
345
346    intInstQueueReads
347        .name(name() + ".int_inst_queue_reads")
348        .desc("Number of integer instruction queue reads")
349        .flags(total);
350
351    intInstQueueWrites
352        .name(name() + ".int_inst_queue_writes")
353        .desc("Number of integer instruction queue writes")
354        .flags(total);
355
356    intInstQueueWakeupAccesses
357        .name(name() + ".int_inst_queue_wakeup_accesses")
358        .desc("Number of integer instruction queue wakeup accesses")
359        .flags(total);
360
361    fpInstQueueReads
362        .name(name() + ".fp_inst_queue_reads")
363        .desc("Number of floating instruction queue reads")
364        .flags(total);
365
366    fpInstQueueWrites
367        .name(name() + ".fp_inst_queue_writes")
368        .desc("Number of floating instruction queue writes")
369        .flags(total);
370
371    fpInstQueueWakeupAccesses
372        .name(name() + ".fp_inst_queue_wakeup_accesses")
373        .desc("Number of floating instruction queue wakeup accesses")
374        .flags(total);
375
376    vecInstQueueReads
377        .name(name() + ".vec_inst_queue_reads")
378        .desc("Number of vector instruction queue reads")
379        .flags(total);
380
381    vecInstQueueWrites
382        .name(name() + ".vec_inst_queue_writes")
383        .desc("Number of vector instruction queue writes")
384        .flags(total);
385
386    vecInstQueueWakeupAccesses
387        .name(name() + ".vec_inst_queue_wakeup_accesses")
388        .desc("Number of vector instruction queue wakeup accesses")
389        .flags(total);
390
391    intAluAccesses
392        .name(name() + ".int_alu_accesses")
393        .desc("Number of integer alu accesses")
394        .flags(total);
395
396    fpAluAccesses
397        .name(name() + ".fp_alu_accesses")
398        .desc("Number of floating point alu accesses")
399        .flags(total);
400
401    vecAluAccesses
402        .name(name() + ".vec_alu_accesses")
403        .desc("Number of vector alu accesses")
404        .flags(total);
405
406}
407
408template <class Impl>
409void
410InstructionQueue<Impl>::resetState()
411{
412    //Initialize thread IQ counts
413    for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) {
414        count[tid] = 0;
415        instList[tid].clear();
416    }
417
418    // Initialize the number of free IQ entries.
419    freeEntries = numEntries;
420
421    // Note that in actuality, the registers corresponding to the logical
422    // registers start off as ready.  However this doesn't matter for the
423    // IQ as the instruction should have been correctly told if those
424    // registers are ready in rename.  Thus it can all be initialized as
425    // unready.
426    for (int i = 0; i < numPhysRegs; ++i) {
427        regScoreboard[i] = false;
428    }
429
430    for (ThreadID tid = 0; tid < Impl::MaxThreads; ++tid) {
431        squashedSeqNum[tid] = 0;
432    }
433
434    for (int i = 0; i < Num_OpClasses; ++i) {
435        while (!readyInsts[i].empty())
436            readyInsts[i].pop();
437        queueOnList[i] = false;
438        readyIt[i] = listOrder.end();
439    }
440    nonSpecInsts.clear();
441    listOrder.clear();
442    deferredMemInsts.clear();
443    blockedMemInsts.clear();
444    retryMemInsts.clear();
445    wbOutstanding = 0;
446}
447
448template <class Impl>
449void
450InstructionQueue<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
451{
452    activeThreads = at_ptr;
453}
454
455template <class Impl>
456void
457InstructionQueue<Impl>::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
458{
459      issueToExecuteQueue = i2e_ptr;
460}
461
462template <class Impl>
463void
464InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
465{
466    timeBuffer = tb_ptr;
467
468    fromCommit = timeBuffer->getWire(-commitToIEWDelay);
469}
470
471template <class Impl>
472bool
473InstructionQueue<Impl>::isDrained() const
474{
475    bool drained = dependGraph.empty() &&
476                   instsToExecute.empty() &&
477                   wbOutstanding == 0;
478    for (ThreadID tid = 0; tid < numThreads; ++tid)
479        drained = drained && memDepUnit[tid].isDrained();
480
481    return drained;
482}
483
484template <class Impl>
485void
486InstructionQueue<Impl>::drainSanityCheck() const
487{
488    assert(dependGraph.empty());
489    assert(instsToExecute.empty());
490    for (ThreadID tid = 0; tid < numThreads; ++tid)
491        memDepUnit[tid].drainSanityCheck();
492}
493
494template <class Impl>
495void
496InstructionQueue<Impl>::takeOverFrom()
497{
498    resetState();
499}
500
501template <class Impl>
502int
503InstructionQueue<Impl>::entryAmount(ThreadID num_threads)
504{
505    if (iqPolicy == Partitioned) {
506        return numEntries / num_threads;
507    } else {
508        return 0;
509    }
510}
511
512
513template <class Impl>
514void
515InstructionQueue<Impl>::resetEntries()
516{
517    if (iqPolicy != Dynamic || numThreads > 1) {
518        int active_threads = activeThreads->size();
519
520        list<ThreadID>::iterator threads = activeThreads->begin();
521        list<ThreadID>::iterator end = activeThreads->end();
522
523        while (threads != end) {
524            ThreadID tid = *threads++;
525
526            if (iqPolicy == Partitioned) {
527                maxEntries[tid] = numEntries / active_threads;
528            } else if (iqPolicy == Threshold && active_threads == 1) {
529                maxEntries[tid] = numEntries;
530            }
531        }
532    }
533}
534
535template <class Impl>
536unsigned
537InstructionQueue<Impl>::numFreeEntries()
538{
539    return freeEntries;
540}
541
542template <class Impl>
543unsigned
544InstructionQueue<Impl>::numFreeEntries(ThreadID tid)
545{
546    return maxEntries[tid] - count[tid];
547}
548
549// Might want to do something more complex if it knows how many instructions
550// will be issued this cycle.
551template <class Impl>
552bool
553InstructionQueue<Impl>::isFull()
554{
555    if (freeEntries == 0) {
556        return(true);
557    } else {
558        return(false);
559    }
560}
561
562template <class Impl>
563bool
564InstructionQueue<Impl>::isFull(ThreadID tid)
565{
566    if (numFreeEntries(tid) == 0) {
567        return(true);
568    } else {
569        return(false);
570    }
571}
572
573template <class Impl>
574bool
575InstructionQueue<Impl>::hasReadyInsts()
576{
577    if (!listOrder.empty()) {
578        return true;
579    }
580
581    for (int i = 0; i < Num_OpClasses; ++i) {
582        if (!readyInsts[i].empty()) {
583            return true;
584        }
585    }
586
587    return false;
588}
589
590template <class Impl>
591void
592InstructionQueue<Impl>::insert(const DynInstPtr &new_inst)
593{
594    if (new_inst->isFloating()) {
595        fpInstQueueWrites++;
596    } else if (new_inst->isVector()) {
597        vecInstQueueWrites++;
598    } else {
599        intInstQueueWrites++;
600    }
601    // Make sure the instruction is valid
602    assert(new_inst);
603
604    DPRINTF(IQ, "Adding instruction [sn:%lli] PC %s to the IQ.\n",
605            new_inst->seqNum, new_inst->pcState());
606
607    assert(freeEntries != 0);
608
609    instList[new_inst->threadNumber].push_back(new_inst);
610
611    --freeEntries;
612
613    new_inst->setInIQ();
614
615    // Look through its source registers (physical regs), and mark any
616    // dependencies.
617    addToDependents(new_inst);
618
619    // Have this instruction set itself as the producer of its destination
620    // register(s).
621    addToProducers(new_inst);
622
623    if (new_inst->isMemRef()) {
624        memDepUnit[new_inst->threadNumber].insert(new_inst);
625    } else {
626        addIfReady(new_inst);
627    }
628
629    ++iqInstsAdded;
630
631    count[new_inst->threadNumber]++;
632
633    assert(freeEntries == (numEntries - countInsts()));
634}
635
636template <class Impl>
637void
638InstructionQueue<Impl>::insertNonSpec(const DynInstPtr &new_inst)
639{
640    // @todo: Clean up this code; can do it by setting inst as unable
641    // to issue, then calling normal insert on the inst.
642    if (new_inst->isFloating()) {
643        fpInstQueueWrites++;
644    } else if (new_inst->isVector()) {
645        vecInstQueueWrites++;
646    } else {
647        intInstQueueWrites++;
648    }
649
650    assert(new_inst);
651
652    nonSpecInsts[new_inst->seqNum] = new_inst;
653
654    DPRINTF(IQ, "Adding non-speculative instruction [sn:%lli] PC %s "
655            "to the IQ.\n",
656            new_inst->seqNum, new_inst->pcState());
657
658    assert(freeEntries != 0);
659
660    instList[new_inst->threadNumber].push_back(new_inst);
661
662    --freeEntries;
663
664    new_inst->setInIQ();
665
666    // Have this instruction set itself as the producer of its destination
667    // register(s).
668    addToProducers(new_inst);
669
670    // If it's a memory instruction, add it to the memory dependency
671    // unit.
672    if (new_inst->isMemRef()) {
673        memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst);
674    }
675
676    ++iqNonSpecInstsAdded;
677
678    count[new_inst->threadNumber]++;
679
680    assert(freeEntries == (numEntries - countInsts()));
681}
682
683template <class Impl>
684void
685InstructionQueue<Impl>::insertBarrier(const DynInstPtr &barr_inst)
686{
687    memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
688
689    insertNonSpec(barr_inst);
690}
691
692template <class Impl>
693typename Impl::DynInstPtr
694InstructionQueue<Impl>::getInstToExecute()
695{
696    assert(!instsToExecute.empty());
697    DynInstPtr inst = std::move(instsToExecute.front());
698    instsToExecute.pop_front();
699    if (inst->isFloating()) {
700        fpInstQueueReads++;
701    } else if (inst->isVector()) {
702        vecInstQueueReads++;
703    } else {
704        intInstQueueReads++;
705    }
706    return inst;
707}
708
709template <class Impl>
710void
711InstructionQueue<Impl>::addToOrderList(OpClass op_class)
712{
713    assert(!readyInsts[op_class].empty());
714
715    ListOrderEntry queue_entry;
716
717    queue_entry.queueType = op_class;
718
719    queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
720
721    ListOrderIt list_it = listOrder.begin();
722    ListOrderIt list_end_it = listOrder.end();
723
724    while (list_it != list_end_it) {
725        if ((*list_it).oldestInst > queue_entry.oldestInst) {
726            break;
727        }
728
729        list_it++;
730    }
731
732    readyIt[op_class] = listOrder.insert(list_it, queue_entry);
733    queueOnList[op_class] = true;
734}
735
736template <class Impl>
737void
738InstructionQueue<Impl>::moveToYoungerInst(ListOrderIt list_order_it)
739{
740    // Get iterator of next item on the list
741    // Delete the original iterator
742    // Determine if the next item is either the end of the list or younger
743    // than the new instruction.  If so, then add in a new iterator right here.
744    // If not, then move along.
745    ListOrderEntry queue_entry;
746    OpClass op_class = (*list_order_it).queueType;
747    ListOrderIt next_it = list_order_it;
748
749    ++next_it;
750
751    queue_entry.queueType = op_class;
752    queue_entry.oldestInst = readyInsts[op_class].top()->seqNum;
753
754    while (next_it != listOrder.end() &&
755           (*next_it).oldestInst < queue_entry.oldestInst) {
756        ++next_it;
757    }
758
759    readyIt[op_class] = listOrder.insert(next_it, queue_entry);
760}
761
762template <class Impl>
763void
764InstructionQueue<Impl>::processFUCompletion(const DynInstPtr &inst, int fu_idx)
765{
766    DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum);
767    assert(!cpu->switchedOut());
768    // The CPU could have been sleeping until this op completed (*extremely*
769    // long latency op).  Wake it if it was.  This may be overkill.
770   --wbOutstanding;
771    iewStage->wakeCPU();
772
773    if (fu_idx > -1)
774        fuPool->freeUnitNextCycle(fu_idx);
775
776    // @todo: Ensure that these FU Completions happen at the beginning
777    // of a cycle, otherwise they could add too many instructions to
778    // the queue.
779    issueToExecuteQueue->access(-1)->size++;
780    instsToExecute.push_back(inst);
781}
782
783// @todo: Figure out a better way to remove the squashed items from the
784// lists.  Checking the top item of each list to see if it's squashed
785// wastes time and forces jumps.
786template <class Impl>
787void
788InstructionQueue<Impl>::scheduleReadyInsts()
789{
790    DPRINTF(IQ, "Attempting to schedule ready instructions from "
791            "the IQ.\n");
792
793    IssueStruct *i2e_info = issueToExecuteQueue->access(0);
794
795    DynInstPtr mem_inst;
796    while (mem_inst = std::move(getDeferredMemInstToExecute())) {
797        addReadyMemInst(mem_inst);
798    }
799
800    // See if any cache blocked instructions are able to be executed
801    while (mem_inst = std::move(getBlockedMemInstToExecute())) {
802        addReadyMemInst(mem_inst);
803    }
804
805    // Have iterator to head of the list
806    // While I haven't exceeded bandwidth or reached the end of the list,
807    // Try to get a FU that can do what this op needs.
808    // If successful, change the oldestInst to the new top of the list, put
809    // the queue in the proper place in the list.
810    // Increment the iterator.
811    // This will avoid trying to schedule a certain op class if there are no
812    // FUs that handle it.
813    int total_issued = 0;
814    ListOrderIt order_it = listOrder.begin();
815    ListOrderIt order_end_it = listOrder.end();
816
817    while (total_issued < totalWidth && order_it != order_end_it) {
818        OpClass op_class = (*order_it).queueType;
819
820        assert(!readyInsts[op_class].empty());
821
822        DynInstPtr issuing_inst = readyInsts[op_class].top();
823
824        if (issuing_inst->isFloating()) {
825            fpInstQueueReads++;
826        } else if (issuing_inst->isVector()) {
827            vecInstQueueReads++;
828        } else {
829            intInstQueueReads++;
830        }
831
832        assert(issuing_inst->seqNum == (*order_it).oldestInst);
833
834        if (issuing_inst->isSquashed()) {
835            readyInsts[op_class].pop();
836
837            if (!readyInsts[op_class].empty()) {
838                moveToYoungerInst(order_it);
839            } else {
840                readyIt[op_class] = listOrder.end();
841                queueOnList[op_class] = false;
842            }
843
844            listOrder.erase(order_it++);
845
846            ++iqSquashedInstsIssued;
847
848            continue;
849        }
850
851        int idx = FUPool::NoCapableFU;
852        Cycles op_latency = Cycles(1);
853        ThreadID tid = issuing_inst->threadNumber;
854
855        if (op_class != No_OpClass) {
856            idx = fuPool->getUnit(op_class);
857            if (issuing_inst->isFloating()) {
858                fpAluAccesses++;
859            } else if (issuing_inst->isVector()) {
860                vecAluAccesses++;
861            } else {
862                intAluAccesses++;
863            }
864            if (idx > FUPool::NoFreeFU) {
865                op_latency = fuPool->getOpLatency(op_class);
866            }
867        }
868
869        // If we have an instruction that doesn't require a FU, or a
870        // valid FU, then schedule for execution.
871        if (idx != FUPool::NoFreeFU) {
872            if (op_latency == Cycles(1)) {
873                i2e_info->size++;
874                instsToExecute.push_back(issuing_inst);
875
876                // Add the FU onto the list of FU's to be freed next
877                // cycle if we used one.
878                if (idx >= 0)
879                    fuPool->freeUnitNextCycle(idx);
880            } else {
881                bool pipelined = fuPool->isPipelined(op_class);
882                // Generate completion event for the FU
883                ++wbOutstanding;
884                FUCompletion *execution = new FUCompletion(issuing_inst,
885                                                           idx, this);
886
887                cpu->schedule(execution,
888                              cpu->clockEdge(Cycles(op_latency - 1)));
889
890                if (!pipelined) {
891                    // If FU isn't pipelined, then it must be freed
892                    // upon the execution completing.
893                    execution->setFreeFU();
894                } else {
895                    // Add the FU onto the list of FU's to be freed next cycle.
896                    fuPool->freeUnitNextCycle(idx);
897                }
898            }
899
900            DPRINTF(IQ, "Thread %i: Issuing instruction PC %s "
901                    "[sn:%lli]\n",
902                    tid, issuing_inst->pcState(),
903                    issuing_inst->seqNum);
904
905            readyInsts[op_class].pop();
906
907            if (!readyInsts[op_class].empty()) {
908                moveToYoungerInst(order_it);
909            } else {
910                readyIt[op_class] = listOrder.end();
911                queueOnList[op_class] = false;
912            }
913
914            issuing_inst->setIssued();
915            ++total_issued;
916
917#if TRACING_ON
918            issuing_inst->issueTick = curTick() - issuing_inst->fetchTick;
919#endif
920
921            if (!issuing_inst->isMemRef()) {
922                // Memory instructions can not be freed from the IQ until they
923                // complete.
924                ++freeEntries;
925                count[tid]--;
926                issuing_inst->clearInIQ();
927            } else {
928                memDepUnit[tid].issue(issuing_inst);
929            }
930
931            listOrder.erase(order_it++);
932            statIssuedInstType[tid][op_class]++;
933        } else {
934            statFuBusy[op_class]++;
935            fuBusy[tid]++;
936            ++order_it;
937        }
938    }
939
940    numIssuedDist.sample(total_issued);
941    iqInstsIssued+= total_issued;
942
943    // If we issued any instructions, tell the CPU we had activity.
944    // @todo If the way deferred memory instructions are handeled due to
945    // translation changes then the deferredMemInsts condition should be removed
946    // from the code below.
947    if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) {
948        cpu->activityThisCycle();
949    } else {
950        DPRINTF(IQ, "Not able to schedule any instructions.\n");
951    }
952}
953
954template <class Impl>
955void
956InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
957{
958    DPRINTF(IQ, "Marking nonspeculative instruction [sn:%lli] as ready "
959            "to execute.\n", inst);
960
961    NonSpecMapIt inst_it = nonSpecInsts.find(inst);
962
963    assert(inst_it != nonSpecInsts.end());
964
965    ThreadID tid = (*inst_it).second->threadNumber;
966
967    (*inst_it).second->setAtCommit();
968
969    (*inst_it).second->setCanIssue();
970
971    if (!(*inst_it).second->isMemRef()) {
972        addIfReady((*inst_it).second);
973    } else {
974        memDepUnit[tid].nonSpecInstReady((*inst_it).second);
975    }
976
977    (*inst_it).second = NULL;
978
979    nonSpecInsts.erase(inst_it);
980}
981
982template <class Impl>
983void
984InstructionQueue<Impl>::commit(const InstSeqNum &inst, ThreadID tid)
985{
986    DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n",
987            tid,inst);
988
989    ListIt iq_it = instList[tid].begin();
990
991    while (iq_it != instList[tid].end() &&
992           (*iq_it)->seqNum <= inst) {
993        ++iq_it;
994        instList[tid].pop_front();
995    }
996
997    assert(freeEntries == (numEntries - countInsts()));
998}
999
1000template <class Impl>
1001int
1002InstructionQueue<Impl>::wakeDependents(const DynInstPtr &completed_inst)
1003{
1004    int dependents = 0;
1005
1006    // The instruction queue here takes care of both floating and int ops
1007    if (completed_inst->isFloating()) {
1008        fpInstQueueWakeupAccesses++;
1009    } else if (completed_inst->isVector()) {
1010        vecInstQueueWakeupAccesses++;
1011    } else {
1012        intInstQueueWakeupAccesses++;
1013    }
1014
1015    DPRINTF(IQ, "Waking dependents of completed instruction.\n");
1016
1017    assert(!completed_inst->isSquashed());
1018
1019    // Tell the memory dependence unit to wake any dependents on this
1020    // instruction if it is a memory instruction.  Also complete the memory
1021    // instruction at this point since we know it executed without issues.
1022    // @todo: Might want to rename "completeMemInst" to something that
1023    // indicates that it won't need to be replayed, and call this
1024    // earlier.  Might not be a big deal.
1025    if (completed_inst->isMemRef()) {
1026        memDepUnit[completed_inst->threadNumber].wakeDependents(completed_inst);
1027        completeMemInst(completed_inst);
1028    } else if (completed_inst->isMemBarrier() ||
1029               completed_inst->isWriteBarrier()) {
1030        memDepUnit[completed_inst->threadNumber].completeBarrier(completed_inst);
1031    }
1032
1033    for (int dest_reg_idx = 0;
1034         dest_reg_idx < completed_inst->numDestRegs();
1035         dest_reg_idx++)
1036    {
1037        PhysRegIdPtr dest_reg =
1038            completed_inst->renamedDestRegIdx(dest_reg_idx);
1039
1040        // Special case of uniq or control registers.  They are not
1041        // handled by the IQ and thus have no dependency graph entry.
1042        if (dest_reg->isFixedMapping()) {
1043            DPRINTF(IQ, "Reg %d [%s] is part of a fix mapping, skipping\n",
1044                    dest_reg->index(), dest_reg->className());
1045            continue;
1046        }
1047
1048        DPRINTF(IQ, "Waking any dependents on register %i (%s).\n",
1049                dest_reg->index(),
1050                dest_reg->className());
1051
1052        //Go through the dependency chain, marking the registers as
1053        //ready within the waiting instructions.
1054        DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
1055
1056        while (dep_inst) {
1057            DPRINTF(IQ, "Waking up a dependent instruction, [sn:%lli] "
1058                    "PC %s.\n", dep_inst->seqNum, dep_inst->pcState());
1059
1060            // Might want to give more information to the instruction
1061            // so that it knows which of its source registers is
1062            // ready.  However that would mean that the dependency
1063            // graph entries would need to hold the src_reg_idx.
1064            dep_inst->markSrcRegReady();
1065
1066            addIfReady(dep_inst);
1067
1068            dep_inst = dependGraph.pop(dest_reg->flatIndex());
1069
1070            ++dependents;
1071        }
1072
1073        // Reset the head node now that all of its dependents have
1074        // been woken up.
1075        assert(dependGraph.empty(dest_reg->flatIndex()));
1076        dependGraph.clearInst(dest_reg->flatIndex());
1077
1078        // Mark the scoreboard as having that register ready.
1079        regScoreboard[dest_reg->flatIndex()] = true;
1080    }
1081    return dependents;
1082}
1083
1084template <class Impl>
1085void
1086InstructionQueue<Impl>::addReadyMemInst(const DynInstPtr &ready_inst)
1087{
1088    OpClass op_class = ready_inst->opClass();
1089
1090    readyInsts[op_class].push(ready_inst);
1091
1092    // Will need to reorder the list if either a queue is not on the list,
1093    // or it has an older instruction than last time.
1094    if (!queueOnList[op_class]) {
1095        addToOrderList(op_class);
1096    } else if (readyInsts[op_class].top()->seqNum  <
1097               (*readyIt[op_class]).oldestInst) {
1098        listOrder.erase(readyIt[op_class]);
1099        addToOrderList(op_class);
1100    }
1101
1102    DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1103            "the ready list, PC %s opclass:%i [sn:%lli].\n",
1104            ready_inst->pcState(), op_class, ready_inst->seqNum);
1105}
1106
1107template <class Impl>
1108void
1109InstructionQueue<Impl>::rescheduleMemInst(const DynInstPtr &resched_inst)
1110{
1111    DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum);
1112
1113    // Reset DTB translation state
1114    resched_inst->translationStarted(false);
1115    resched_inst->translationCompleted(false);
1116
1117    resched_inst->clearCanIssue();
1118    memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
1119}
1120
1121template <class Impl>
1122void
1123InstructionQueue<Impl>::replayMemInst(const DynInstPtr &replay_inst)
1124{
1125    memDepUnit[replay_inst->threadNumber].replay();
1126}
1127
1128template <class Impl>
1129void
1130InstructionQueue<Impl>::completeMemInst(const DynInstPtr &completed_inst)
1131{
1132    ThreadID tid = completed_inst->threadNumber;
1133
1134    DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%lli]\n",
1135            completed_inst->pcState(), completed_inst->seqNum);
1136
1137    ++freeEntries;
1138
1139    completed_inst->memOpDone(true);
1140
1141    memDepUnit[tid].completed(completed_inst);
1142    count[tid]--;
1143}
1144
1145template <class Impl>
1146void
1147InstructionQueue<Impl>::deferMemInst(const DynInstPtr &deferred_inst)
1148{
1149    deferredMemInsts.push_back(deferred_inst);
1150}
1151
1152template <class Impl>
1153void
1154InstructionQueue<Impl>::blockMemInst(const DynInstPtr &blocked_inst)
1155{
1156    blocked_inst->translationStarted(false);
1157    blocked_inst->translationCompleted(false);
1158
1159    blocked_inst->clearIssued();
1160    blocked_inst->clearCanIssue();
1161    blockedMemInsts.push_back(blocked_inst);
1162}
1163
1164template <class Impl>
1165void
1166InstructionQueue<Impl>::cacheUnblocked()
1167{
1168    retryMemInsts.splice(retryMemInsts.end(), blockedMemInsts);
1169    // Get the CPU ticking again
1170    cpu->wakeCPU();
1171}
1172
1173template <class Impl>
1174typename Impl::DynInstPtr
1175InstructionQueue<Impl>::getDeferredMemInstToExecute()
1176{
1177    for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
1178         ++it) {
1179        if ((*it)->translationCompleted() || (*it)->isSquashed()) {
1180            DynInstPtr mem_inst = std::move(*it);
1181            deferredMemInsts.erase(it);
1182            return mem_inst;
1183        }
1184    }
1185    return nullptr;
1186}
1187
1188template <class Impl>
1189typename Impl::DynInstPtr
1190InstructionQueue<Impl>::getBlockedMemInstToExecute()
1191{
1192    if (retryMemInsts.empty()) {
1193        return nullptr;
1194    } else {
1195        DynInstPtr mem_inst = std::move(retryMemInsts.front());
1196        retryMemInsts.pop_front();
1197        return mem_inst;
1198    }
1199}
1200
1201template <class Impl>
1202void
1203InstructionQueue<Impl>::violation(const DynInstPtr &store,
1204                                  const DynInstPtr &faulting_load)
1205{
1206    intInstQueueWrites++;
1207    memDepUnit[store->threadNumber].violation(store, faulting_load);
1208}
1209
1210template <class Impl>
1211void
1212InstructionQueue<Impl>::squash(ThreadID tid)
1213{
1214    DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in "
1215            "the IQ.\n", tid);
1216
1217    // Read instruction sequence number of last instruction out of the
1218    // time buffer.
1219    squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum;
1220
1221    doSquash(tid);
1222
1223    // Also tell the memory dependence unit to squash.
1224    memDepUnit[tid].squash(squashedSeqNum[tid], tid);
1225}
1226
1227template <class Impl>
1228void
1229InstructionQueue<Impl>::doSquash(ThreadID tid)
1230{
1231    // Start at the tail.
1232    ListIt squash_it = instList[tid].end();
1233    --squash_it;
1234
1235    DPRINTF(IQ, "[tid:%i]: Squashing until sequence number %i!\n",
1236            tid, squashedSeqNum[tid]);
1237
1238    // Squash any instructions younger than the squashed sequence number
1239    // given.
1240    while (squash_it != instList[tid].end() &&
1241           (*squash_it)->seqNum > squashedSeqNum[tid]) {
1242
1243        DynInstPtr squashed_inst = (*squash_it);
1244        if (squashed_inst->isFloating()) {
1245            fpInstQueueWrites++;
1246        } else if (squashed_inst->isVector()) {
1247            vecInstQueueWrites++;
1248        } else {
1249            intInstQueueWrites++;
1250        }
1251
1252        // Only handle the instruction if it actually is in the IQ and
1253        // hasn't already been squashed in the IQ.
1254        if (squashed_inst->threadNumber != tid ||
1255            squashed_inst->isSquashedInIQ()) {
1256            --squash_it;
1257            continue;
1258        }
1259
1260        if (!squashed_inst->isIssued() ||
1261            (squashed_inst->isMemRef() &&
1262             !squashed_inst->memOpDone())) {
1263
1264            DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %s squashed.\n",
1265                    tid, squashed_inst->seqNum, squashed_inst->pcState());
1266
1267            bool is_acq_rel = squashed_inst->isMemBarrier() &&
1268                         (squashed_inst->isLoad() ||
1269                           (squashed_inst->isStore() &&
1270                             !squashed_inst->isStoreConditional()));
1271
1272            // Remove the instruction from the dependency list.
1273            if (is_acq_rel ||
1274                (!squashed_inst->isNonSpeculative() &&
1275                 !squashed_inst->isStoreConditional() &&
1276                 !squashed_inst->isMemBarrier() &&
1277                 !squashed_inst->isWriteBarrier())) {
1278
1279                for (int src_reg_idx = 0;
1280                     src_reg_idx < squashed_inst->numSrcRegs();
1281                     src_reg_idx++)
1282                {
1283                    PhysRegIdPtr src_reg =
1284                        squashed_inst->renamedSrcRegIdx(src_reg_idx);
1285
1286                    // Only remove it from the dependency graph if it
1287                    // was placed there in the first place.
1288
1289                    // Instead of doing a linked list traversal, we
1290                    // can just remove these squashed instructions
1291                    // either at issue time, or when the register is
1292                    // overwritten.  The only downside to this is it
1293                    // leaves more room for error.
1294
1295                    if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
1296                        !src_reg->isFixedMapping()) {
1297                        dependGraph.remove(src_reg->flatIndex(),
1298                                           squashed_inst);
1299                    }
1300
1301
1302                    ++iqSquashedOperandsExamined;
1303                }
1304            } else if (!squashed_inst->isStoreConditional() ||
1305                       !squashed_inst->isCompleted()) {
1306                NonSpecMapIt ns_inst_it =
1307                    nonSpecInsts.find(squashed_inst->seqNum);
1308
1309                // we remove non-speculative instructions from
1310                // nonSpecInsts already when they are ready, and so we
1311                // cannot always expect to find them
1312                if (ns_inst_it == nonSpecInsts.end()) {
1313                    // loads that became ready but stalled on a
1314                    // blocked cache are alreayd removed from
1315                    // nonSpecInsts, and have not faulted
1316                    assert(squashed_inst->getFault() != NoFault ||
1317                           squashed_inst->isMemRef());
1318                } else {
1319
1320                    (*ns_inst_it).second = NULL;
1321
1322                    nonSpecInsts.erase(ns_inst_it);
1323
1324                    ++iqSquashedNonSpecRemoved;
1325                }
1326            }
1327
1328            // Might want to also clear out the head of the dependency graph.
1329
1330            // Mark it as squashed within the IQ.
1331            squashed_inst->setSquashedInIQ();
1332
1333            // @todo: Remove this hack where several statuses are set so the
1334            // inst will flow through the rest of the pipeline.
1335            squashed_inst->setIssued();
1336            squashed_inst->setCanCommit();
1337            squashed_inst->clearInIQ();
1338
1339            //Update Thread IQ Count
1340            count[squashed_inst->threadNumber]--;
1341
1342            ++freeEntries;
1343        }
1344
1345        // IQ clears out the heads of the dependency graph only when
1346        // instructions reach writeback stage. If an instruction is squashed
1347        // before writeback stage, its head of dependency graph would not be
1348        // cleared out; it holds the instruction's DynInstPtr. This prevents
1349        // freeing the squashed instruction's DynInst.
1350        // Thus, we need to manually clear out the squashed instructions' heads
1351        // of dependency graph.
1352        for (int dest_reg_idx = 0;
1353             dest_reg_idx < squashed_inst->numDestRegs();
1354             dest_reg_idx++)
1355        {
1356            PhysRegIdPtr dest_reg =
1357                squashed_inst->renamedDestRegIdx(dest_reg_idx);
1358            if (dest_reg->isFixedMapping()){
1359                continue;
1360            }
1361            assert(dependGraph.empty(dest_reg->flatIndex()));
1362            dependGraph.clearInst(dest_reg->flatIndex());
1363        }
1364        instList[tid].erase(squash_it--);
1365        ++iqSquashedInstsExamined;
1366    }
1367}
1368
1369template <class Impl>
1370bool
1371InstructionQueue<Impl>::addToDependents(const DynInstPtr &new_inst)
1372{
1373    // Loop through the instruction's source registers, adding
1374    // them to the dependency list if they are not ready.
1375    int8_t total_src_regs = new_inst->numSrcRegs();
1376    bool return_val = false;
1377
1378    for (int src_reg_idx = 0;
1379         src_reg_idx < total_src_regs;
1380         src_reg_idx++)
1381    {
1382        // Only add it to the dependency graph if it's not ready.
1383        if (!new_inst->isReadySrcRegIdx(src_reg_idx)) {
1384            PhysRegIdPtr src_reg = new_inst->renamedSrcRegIdx(src_reg_idx);
1385
1386            // Check the IQ's scoreboard to make sure the register
1387            // hasn't become ready while the instruction was in flight
1388            // between stages.  Only if it really isn't ready should
1389            // it be added to the dependency graph.
1390            if (src_reg->isFixedMapping()) {
1391                continue;
1392            } else if (!regScoreboard[src_reg->flatIndex()]) {
1393                DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1394                        "is being added to the dependency chain.\n",
1395                        new_inst->pcState(), src_reg->index(),
1396                        src_reg->className());
1397
1398                dependGraph.insert(src_reg->flatIndex(), new_inst);
1399
1400                // Change the return value to indicate that something
1401                // was added to the dependency graph.
1402                return_val = true;
1403            } else {
1404                DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that "
1405                        "became ready before it reached the IQ.\n",
1406                        new_inst->pcState(), src_reg->index(),
1407                        src_reg->className());
1408                // Mark a register ready within the instruction.
1409                new_inst->markSrcRegReady(src_reg_idx);
1410            }
1411        }
1412    }
1413
1414    return return_val;
1415}
1416
1417template <class Impl>
1418void
1419InstructionQueue<Impl>::addToProducers(const DynInstPtr &new_inst)
1420{
1421    // Nothing really needs to be marked when an instruction becomes
1422    // the producer of a register's value, but for convenience a ptr
1423    // to the producing instruction will be placed in the head node of
1424    // the dependency links.
1425    int8_t total_dest_regs = new_inst->numDestRegs();
1426
1427    for (int dest_reg_idx = 0;
1428         dest_reg_idx < total_dest_regs;
1429         dest_reg_idx++)
1430    {
1431        PhysRegIdPtr dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
1432
1433        // Some registers have fixed mapping, and there is no need to track
1434        // dependencies as these instructions must be executed at commit.
1435        if (dest_reg->isFixedMapping()) {
1436            continue;
1437        }
1438
1439        if (!dependGraph.empty(dest_reg->flatIndex())) {
1440            dependGraph.dump();
1441            panic("Dependency graph %i (%s) (flat: %i) not empty!",
1442                  dest_reg->index(), dest_reg->className(),
1443                  dest_reg->flatIndex());
1444        }
1445
1446        dependGraph.setInst(dest_reg->flatIndex(), new_inst);
1447
1448        // Mark the scoreboard to say it's not yet ready.
1449        regScoreboard[dest_reg->flatIndex()] = false;
1450    }
1451}
1452
1453template <class Impl>
1454void
1455InstructionQueue<Impl>::addIfReady(const DynInstPtr &inst)
1456{
1457    // If the instruction now has all of its source registers
1458    // available, then add it to the list of ready instructions.
1459    if (inst->readyToIssue()) {
1460
1461        //Add the instruction to the proper ready list.
1462        if (inst->isMemRef()) {
1463
1464            DPRINTF(IQ, "Checking if memory instruction can issue.\n");
1465
1466            // Message to the mem dependence unit that this instruction has
1467            // its registers ready.
1468            memDepUnit[inst->threadNumber].regsReady(inst);
1469
1470            return;
1471        }
1472
1473        OpClass op_class = inst->opClass();
1474
1475        DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
1476                "the ready list, PC %s opclass:%i [sn:%lli].\n",
1477                inst->pcState(), op_class, inst->seqNum);
1478
1479        readyInsts[op_class].push(inst);
1480
1481        // Will need to reorder the list if either a queue is not on the list,
1482        // or it has an older instruction than last time.
1483        if (!queueOnList[op_class]) {
1484            addToOrderList(op_class);
1485        } else if (readyInsts[op_class].top()->seqNum  <
1486                   (*readyIt[op_class]).oldestInst) {
1487            listOrder.erase(readyIt[op_class]);
1488            addToOrderList(op_class);
1489        }
1490    }
1491}
1492
1493template <class Impl>
1494int
1495InstructionQueue<Impl>::countInsts()
1496{
1497#if 0
1498    //ksewell:This works but definitely could use a cleaner write
1499    //with a more intuitive way of counting. Right now it's
1500    //just brute force ....
1501    // Change the #if if you want to use this method.
1502    int total_insts = 0;
1503
1504    for (ThreadID tid = 0; tid < numThreads; ++tid) {
1505        ListIt count_it = instList[tid].begin();
1506
1507        while (count_it != instList[tid].end()) {
1508            if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) {
1509                if (!(*count_it)->isIssued()) {
1510                    ++total_insts;
1511                } else if ((*count_it)->isMemRef() &&
1512                           !(*count_it)->memOpDone) {
1513                    // Loads that have not been marked as executed still count
1514                    // towards the total instructions.
1515                    ++total_insts;
1516                }
1517            }
1518
1519            ++count_it;
1520        }
1521    }
1522
1523    return total_insts;
1524#else
1525    return numEntries - freeEntries;
1526#endif
1527}
1528
1529template <class Impl>
1530void
1531InstructionQueue<Impl>::dumpLists()
1532{
1533    for (int i = 0; i < Num_OpClasses; ++i) {
1534        cprintf("Ready list %i size: %i\n", i, readyInsts[i].size());
1535
1536        cprintf("\n");
1537    }
1538
1539    cprintf("Non speculative list size: %i\n", nonSpecInsts.size());
1540
1541    NonSpecMapIt non_spec_it = nonSpecInsts.begin();
1542    NonSpecMapIt non_spec_end_it = nonSpecInsts.end();
1543
1544    cprintf("Non speculative list: ");
1545
1546    while (non_spec_it != non_spec_end_it) {
1547        cprintf("%s [sn:%lli]", (*non_spec_it).second->pcState(),
1548                (*non_spec_it).second->seqNum);
1549        ++non_spec_it;
1550    }
1551
1552    cprintf("\n");
1553
1554    ListOrderIt list_order_it = listOrder.begin();
1555    ListOrderIt list_order_end_it = listOrder.end();
1556    int i = 1;
1557
1558    cprintf("List order: ");
1559
1560    while (list_order_it != list_order_end_it) {
1561        cprintf("%i OpClass:%i [sn:%lli] ", i, (*list_order_it).queueType,
1562                (*list_order_it).oldestInst);
1563
1564        ++list_order_it;
1565        ++i;
1566    }
1567
1568    cprintf("\n");
1569}
1570
1571
1572template <class Impl>
1573void
1574InstructionQueue<Impl>::dumpInsts()
1575{
1576    for (ThreadID tid = 0; tid < numThreads; ++tid) {
1577        int num = 0;
1578        int valid_num = 0;
1579        ListIt inst_list_it = instList[tid].begin();
1580
1581        while (inst_list_it != instList[tid].end()) {
1582            cprintf("Instruction:%i\n", num);
1583            if (!(*inst_list_it)->isSquashed()) {
1584                if (!(*inst_list_it)->isIssued()) {
1585                    ++valid_num;
1586                    cprintf("Count:%i\n", valid_num);
1587                } else if ((*inst_list_it)->isMemRef() &&
1588                           !(*inst_list_it)->memOpDone()) {
1589                    // Loads that have not been marked as executed
1590                    // still count towards the total instructions.
1591                    ++valid_num;
1592                    cprintf("Count:%i\n", valid_num);
1593                }
1594            }
1595
1596            cprintf("PC: %s\n[sn:%lli]\n[tid:%i]\n"
1597                    "Issued:%i\nSquashed:%i\n",
1598                    (*inst_list_it)->pcState(),
1599                    (*inst_list_it)->seqNum,
1600                    (*inst_list_it)->threadNumber,
1601                    (*inst_list_it)->isIssued(),
1602                    (*inst_list_it)->isSquashed());
1603
1604            if ((*inst_list_it)->isMemRef()) {
1605                cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1606            }
1607
1608            cprintf("\n");
1609
1610            inst_list_it++;
1611            ++num;
1612        }
1613    }
1614
1615    cprintf("Insts to Execute list:\n");
1616
1617    int num = 0;
1618    int valid_num = 0;
1619    ListIt inst_list_it = instsToExecute.begin();
1620
1621    while (inst_list_it != instsToExecute.end())
1622    {
1623        cprintf("Instruction:%i\n",
1624                num);
1625        if (!(*inst_list_it)->isSquashed()) {
1626            if (!(*inst_list_it)->isIssued()) {
1627                ++valid_num;
1628                cprintf("Count:%i\n", valid_num);
1629            } else if ((*inst_list_it)->isMemRef() &&
1630                       !(*inst_list_it)->memOpDone()) {
1631                // Loads that have not been marked as executed
1632                // still count towards the total instructions.
1633                ++valid_num;
1634                cprintf("Count:%i\n", valid_num);
1635            }
1636        }
1637
1638        cprintf("PC: %s\n[sn:%lli]\n[tid:%i]\n"
1639                "Issued:%i\nSquashed:%i\n",
1640                (*inst_list_it)->pcState(),
1641                (*inst_list_it)->seqNum,
1642                (*inst_list_it)->threadNumber,
1643                (*inst_list_it)->isIssued(),
1644                (*inst_list_it)->isSquashed());
1645
1646        if ((*inst_list_it)->isMemRef()) {
1647            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
1648        }
1649
1650        cprintf("\n");
1651
1652        inst_list_it++;
1653        ++num;
1654    }
1655}
1656
1657#endif//__CPU_O3_INST_QUEUE_IMPL_HH__
1658