lsq_unit_impl.hh revision 13492:3679580cd1e7
1
2/*
3 * Copyright (c) 2010-2014, 2017 ARM Limited
4 * Copyright (c) 2013 Advanced Micro Devices, Inc.
5 * All rights reserved
6 *
7 * The license below extends only to copyright in the software and shall
8 * not be construed as granting a license to any other intellectual
9 * property including but not limited to intellectual property relating
10 * to a hardware implementation of the functionality of the software
11 * licensed hereunder.  You may use the software subject to the license
12 * terms below provided that you ensure that this notice is replicated
13 * unmodified and in its entirety in all distributions of the software,
14 * modified or unmodified, in source code or in binary form.
15 *
16 * Copyright (c) 2004-2005 The Regents of The University of Michigan
17 * All rights reserved.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are
21 * met: redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer;
23 * redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution;
26 * neither the name of the copyright holders nor the names of its
27 * contributors may be used to endorse or promote products derived from
28 * this software without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 *
42 * Authors: Kevin Lim
43 *          Korey Sewell
44 */
45
46#ifndef __CPU_O3_LSQ_UNIT_IMPL_HH__
47#define __CPU_O3_LSQ_UNIT_IMPL_HH__
48
49#include "arch/generic/debugfaults.hh"
50#include "arch/locked_mem.hh"
51#include "base/str.hh"
52#include "config/the_isa.hh"
53#include "cpu/checker/cpu.hh"
54#include "cpu/o3/lsq.hh"
55#include "cpu/o3/lsq_unit.hh"
56#include "debug/Activity.hh"
57#include "debug/IEW.hh"
58#include "debug/LSQUnit.hh"
59#include "debug/O3PipeView.hh"
60#include "mem/packet.hh"
61#include "mem/request.hh"
62
63template<class Impl>
64LSQUnit<Impl>::WritebackEvent::WritebackEvent(const DynInstPtr &_inst,
65        PacketPtr _pkt, LSQUnit *lsq_ptr)
66    : Event(Default_Pri, AutoDelete),
67      inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
68{
69}
70
71template<class Impl>
72void
73LSQUnit<Impl>::WritebackEvent::process()
74{
75    assert(!lsqPtr->cpu->switchedOut());
76
77    lsqPtr->writeback(inst, pkt);
78
79    if (pkt->senderState)
80        delete pkt->senderState;
81
82    delete pkt;
83}
84
85template<class Impl>
86const char *
87LSQUnit<Impl>::WritebackEvent::description() const
88{
89    return "Store writeback";
90}
91
92template<class Impl>
93void
94LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
95{
96    LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
97    DynInstPtr inst = state->inst;
98    DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum);
99    DPRINTF(Activity, "Activity: Writeback event [sn:%lli].\n", inst->seqNum);
100
101    if (state->cacheBlocked) {
102        // This is the first half of a previous split load,
103        // where the 2nd half blocked, ignore this response
104        DPRINTF(IEW, "[sn:%lli]: Response from first half of earlier "
105                "blocked split load recieved. Ignoring.\n", inst->seqNum);
106        delete state;
107        return;
108    }
109
110    // If this is a split access, wait until all packets are received.
111    if (TheISA::HasUnalignedMemAcc && !state->complete()) {
112        return;
113    }
114
115    assert(!cpu->switchedOut());
116    if (!inst->isSquashed()) {
117        if (!state->noWB) {
118            // Only loads and store conditionals perform the writeback
119            // after receving the response from the memory
120            assert(inst->isLoad() || inst->isStoreConditional());
121            if (!TheISA::HasUnalignedMemAcc || !state->isSplit ||
122                !state->isLoad) {
123                writeback(inst, pkt);
124            } else {
125                writeback(inst, state->mainPkt);
126            }
127        }
128
129        if (inst->isStore()) {
130            completeStore(state->idx);
131        }
132    }
133
134    if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) {
135        delete state->mainPkt;
136    }
137
138    pkt->req->setAccessLatency();
139    cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
140
141    delete state;
142}
143
144template <class Impl>
145LSQUnit<Impl>::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
146    : lsqID(-1), storeQueue(sqEntries+1), loadQueue(lqEntries+1),
147      LQEntries(lqEntries+1), SQEntries(sqEntries+1),
148      loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
149      isStoreBlocked(false), storeInFlight(false), hasPendingPkt(false),
150      pendingPkt(nullptr)
151{
152}
153
154template<class Impl>
155void
156LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
157        LSQ *lsq_ptr, unsigned id)
158{
159    lsqID = id;
160
161    cpu = cpu_ptr;
162    iewStage = iew_ptr;
163
164    lsq = lsq_ptr;
165
166    DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",lsqID);
167
168    depCheckShift = params->LSQDepCheckShift;
169    checkLoads = params->LSQCheckLoads;
170    cacheStorePorts = params->cacheStorePorts;
171    needsTSO = params->needsTSO;
172
173    resetState();
174}
175
176
177template<class Impl>
178void
179LSQUnit<Impl>::resetState()
180{
181    loads = stores = storesToWB = 0;
182
183    loadHead = loadTail = 0;
184
185    storeHead = storeWBIdx = storeTail = 0;
186
187    usedStorePorts = 0;
188
189    retryPkt = NULL;
190    memDepViolator = NULL;
191
192    stalled = false;
193
194    cacheBlockMask = ~(cpu->cacheLineSize() - 1);
195}
196
197template<class Impl>
198std::string
199LSQUnit<Impl>::name() const
200{
201    if (Impl::MaxThreads == 1) {
202        return iewStage->name() + ".lsq";
203    } else {
204        return iewStage->name() + ".lsq.thread" + std::to_string(lsqID);
205    }
206}
207
208template<class Impl>
209void
210LSQUnit<Impl>::regStats()
211{
212    lsqForwLoads
213        .name(name() + ".forwLoads")
214        .desc("Number of loads that had data forwarded from stores");
215
216    invAddrLoads
217        .name(name() + ".invAddrLoads")
218        .desc("Number of loads ignored due to an invalid address");
219
220    lsqSquashedLoads
221        .name(name() + ".squashedLoads")
222        .desc("Number of loads squashed");
223
224    lsqIgnoredResponses
225        .name(name() + ".ignoredResponses")
226        .desc("Number of memory responses ignored because the instruction is squashed");
227
228    lsqMemOrderViolation
229        .name(name() + ".memOrderViolation")
230        .desc("Number of memory ordering violations");
231
232    lsqSquashedStores
233        .name(name() + ".squashedStores")
234        .desc("Number of stores squashed");
235
236    invAddrSwpfs
237        .name(name() + ".invAddrSwpfs")
238        .desc("Number of software prefetches ignored due to an invalid address");
239
240    lsqBlockedLoads
241        .name(name() + ".blockedLoads")
242        .desc("Number of blocked loads due to partial load-store forwarding");
243
244    lsqRescheduledLoads
245        .name(name() + ".rescheduledLoads")
246        .desc("Number of loads that were rescheduled");
247
248    lsqCacheBlocked
249        .name(name() + ".cacheBlocked")
250        .desc("Number of times an access to memory failed due to the cache being blocked");
251}
252
253template<class Impl>
254void
255LSQUnit<Impl>::setDcachePort(MasterPort *dcache_port)
256{
257    dcachePort = dcache_port;
258}
259
260template<class Impl>
261void
262LSQUnit<Impl>::clearLQ()
263{
264    loadQueue.clear();
265}
266
267template<class Impl>
268void
269LSQUnit<Impl>::clearSQ()
270{
271    storeQueue.clear();
272}
273
274template<class Impl>
275void
276LSQUnit<Impl>::drainSanityCheck() const
277{
278    for (int i = 0; i < loadQueue.size(); ++i)
279        assert(!loadQueue[i]);
280
281    assert(storesToWB == 0);
282    assert(!retryPkt);
283}
284
285template<class Impl>
286void
287LSQUnit<Impl>::takeOverFrom()
288{
289    resetState();
290}
291
292template<class Impl>
293void
294LSQUnit<Impl>::resizeLQ(unsigned size)
295{
296    unsigned size_plus_sentinel = size + 1;
297    assert(size_plus_sentinel >= LQEntries);
298
299    if (size_plus_sentinel > LQEntries) {
300        while (size_plus_sentinel > loadQueue.size()) {
301            DynInstPtr dummy;
302            loadQueue.push_back(dummy);
303            LQEntries++;
304        }
305    } else {
306        LQEntries = size_plus_sentinel;
307    }
308
309    assert(LQEntries <= 256);
310}
311
312template<class Impl>
313void
314LSQUnit<Impl>::resizeSQ(unsigned size)
315{
316    unsigned size_plus_sentinel = size + 1;
317    if (size_plus_sentinel > SQEntries) {
318        while (size_plus_sentinel > storeQueue.size()) {
319            SQEntry dummy;
320            storeQueue.push_back(dummy);
321            SQEntries++;
322        }
323    } else {
324        SQEntries = size_plus_sentinel;
325    }
326
327    assert(SQEntries <= 256);
328}
329
330template <class Impl>
331void
332LSQUnit<Impl>::insert(const DynInstPtr &inst)
333{
334    assert(inst->isMemRef());
335
336    assert(inst->isLoad() || inst->isStore());
337
338    if (inst->isLoad()) {
339        insertLoad(inst);
340    } else {
341        insertStore(inst);
342    }
343
344    inst->setInLSQ();
345}
346
347template <class Impl>
348void
349LSQUnit<Impl>::insertLoad(const DynInstPtr &load_inst)
350{
351    assert((loadTail + 1) % LQEntries != loadHead);
352    assert(loads < LQEntries);
353
354    DPRINTF(LSQUnit, "Inserting load PC %s, idx:%i [sn:%lli]\n",
355            load_inst->pcState(), loadTail, load_inst->seqNum);
356
357    load_inst->lqIdx = loadTail;
358
359    if (stores == 0) {
360        load_inst->sqIdx = -1;
361    } else {
362        load_inst->sqIdx = storeTail;
363    }
364
365    loadQueue[loadTail] = load_inst;
366
367    incrLdIdx(loadTail);
368
369    ++loads;
370}
371
372template <class Impl>
373void
374LSQUnit<Impl>::insertStore(const DynInstPtr &store_inst)
375{
376    // Make sure it is not full before inserting an instruction.
377    assert((storeTail + 1) % SQEntries != storeHead);
378    assert(stores < SQEntries);
379
380    DPRINTF(LSQUnit, "Inserting store PC %s, idx:%i [sn:%lli]\n",
381            store_inst->pcState(), storeTail, store_inst->seqNum);
382
383    store_inst->sqIdx = storeTail;
384    store_inst->lqIdx = loadTail;
385
386    storeQueue[storeTail] = SQEntry(store_inst);
387
388    incrStIdx(storeTail);
389
390    ++stores;
391}
392
393template <class Impl>
394typename Impl::DynInstPtr
395LSQUnit<Impl>::getMemDepViolator()
396{
397    DynInstPtr temp = memDepViolator;
398
399    memDepViolator = NULL;
400
401    return temp;
402}
403
404template <class Impl>
405unsigned
406LSQUnit<Impl>::numFreeLoadEntries()
407{
408        //LQ has an extra dummy entry to differentiate
409        //empty/full conditions. Subtract 1 from the free entries.
410        DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n", LQEntries, loads);
411        return LQEntries - loads - 1;
412}
413
414template <class Impl>
415unsigned
416LSQUnit<Impl>::numFreeStoreEntries()
417{
418        //SQ has an extra dummy entry to differentiate
419        //empty/full conditions. Subtract 1 from the free entries.
420        DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n", SQEntries, stores);
421        return SQEntries - stores - 1;
422
423 }
424
425template <class Impl>
426void
427LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
428{
429    // Should only ever get invalidations in here
430    assert(pkt->isInvalidate());
431
432    int load_idx = loadHead;
433    DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
434
435    // Only Invalidate packet calls checkSnoop
436    assert(pkt->isInvalidate());
437    for (int x = 0; x < cpu->numContexts(); x++) {
438        ThreadContext *tc = cpu->getContext(x);
439        bool no_squash = cpu->thread[x]->noSquashFromTC;
440        cpu->thread[x]->noSquashFromTC = true;
441        TheISA::handleLockedSnoop(tc, pkt, cacheBlockMask);
442        cpu->thread[x]->noSquashFromTC = no_squash;
443    }
444
445    Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
446
447    DynInstPtr ld_inst = loadQueue[load_idx];
448    if (ld_inst) {
449        Addr load_addr_low = ld_inst->physEffAddrLow & cacheBlockMask;
450        Addr load_addr_high = ld_inst->physEffAddrHigh & cacheBlockMask;
451
452        // Check that this snoop didn't just invalidate our lock flag
453        if (ld_inst->effAddrValid() && (load_addr_low == invalidate_addr
454                                        || load_addr_high == invalidate_addr)
455            && ld_inst->memReqFlags & Request::LLSC)
456            TheISA::handleLockedSnoopHit(ld_inst.get());
457    }
458
459    // If this is the only load in the LSQ we don't care
460    if (load_idx == loadTail)
461        return;
462
463    incrLdIdx(load_idx);
464
465    bool force_squash = false;
466
467    while (load_idx != loadTail) {
468        DynInstPtr ld_inst = loadQueue[load_idx];
469
470        if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
471            incrLdIdx(load_idx);
472            continue;
473        }
474
475        Addr load_addr_low = ld_inst->physEffAddrLow & cacheBlockMask;
476        Addr load_addr_high = ld_inst->physEffAddrHigh & cacheBlockMask;
477
478        DPRINTF(LSQUnit, "-- inst [sn:%lli] load_addr: %#x to pktAddr:%#x\n",
479                    ld_inst->seqNum, load_addr_low, invalidate_addr);
480
481        if ((load_addr_low == invalidate_addr
482             || load_addr_high == invalidate_addr) || force_squash) {
483            if (needsTSO) {
484                // If we have a TSO system, as all loads must be ordered with
485                // all other loads, this load as well as *all* subsequent loads
486                // need to be squashed to prevent possible load reordering.
487                force_squash = true;
488            }
489            if (ld_inst->possibleLoadViolation() || force_squash) {
490                DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
491                        pkt->getAddr(), ld_inst->seqNum);
492
493                // Mark the load for re-execution
494                ld_inst->fault = std::make_shared<ReExec>();
495            } else {
496                DPRINTF(LSQUnit, "HitExternal Snoop for addr %#x [sn:%lli]\n",
497                        pkt->getAddr(), ld_inst->seqNum);
498
499                // Make sure that we don't lose a snoop hitting a LOCKED
500                // address since the LOCK* flags don't get updated until
501                // commit.
502                if (ld_inst->memReqFlags & Request::LLSC)
503                    TheISA::handleLockedSnoopHit(ld_inst.get());
504
505                // If a older load checks this and it's true
506                // then we might have missed the snoop
507                // in which case we need to invalidate to be sure
508                ld_inst->hitExternalSnoop(true);
509            }
510        }
511        incrLdIdx(load_idx);
512    }
513    return;
514}
515
516template <class Impl>
517Fault
518LSQUnit<Impl>::checkViolations(int load_idx, const DynInstPtr &inst)
519{
520    Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
521    Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
522
523    /** @todo in theory you only need to check an instruction that has executed
524     * however, there isn't a good way in the pipeline at the moment to check
525     * all instructions that will execute before the store writes back. Thus,
526     * like the implementation that came before it, we're overly conservative.
527     */
528    while (load_idx != loadTail) {
529        DynInstPtr ld_inst = loadQueue[load_idx];
530        if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
531            incrLdIdx(load_idx);
532            continue;
533        }
534
535        Addr ld_eff_addr1 = ld_inst->effAddr >> depCheckShift;
536        Addr ld_eff_addr2 =
537            (ld_inst->effAddr + ld_inst->effSize - 1) >> depCheckShift;
538
539        if (inst_eff_addr2 >= ld_eff_addr1 && inst_eff_addr1 <= ld_eff_addr2) {
540            if (inst->isLoad()) {
541                // If this load is to the same block as an external snoop
542                // invalidate that we've observed then the load needs to be
543                // squashed as it could have newer data
544                if (ld_inst->hitExternalSnoop()) {
545                    if (!memDepViolator ||
546                            ld_inst->seqNum < memDepViolator->seqNum) {
547                        DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] "
548                                "and [sn:%lli] at address %#x\n",
549                                inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
550                        memDepViolator = ld_inst;
551
552                        ++lsqMemOrderViolation;
553
554                        return std::make_shared<GenericISA::M5PanicFault>(
555                            "Detected fault with inst [sn:%lli] and "
556                            "[sn:%lli] at address %#x\n",
557                            inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
558                    }
559                }
560
561                // Otherwise, mark the load has a possible load violation
562                // and if we see a snoop before it's commited, we need to squash
563                ld_inst->possibleLoadViolation(true);
564                DPRINTF(LSQUnit, "Found possible load violation at addr: %#x"
565                        " between instructions [sn:%lli] and [sn:%lli]\n",
566                        inst_eff_addr1, inst->seqNum, ld_inst->seqNum);
567            } else {
568                // A load/store incorrectly passed this store.
569                // Check if we already have a violator, or if it's newer
570                // squash and refetch.
571                if (memDepViolator && ld_inst->seqNum > memDepViolator->seqNum)
572                    break;
573
574                DPRINTF(LSQUnit, "Detected fault with inst [sn:%lli] and "
575                        "[sn:%lli] at address %#x\n",
576                        inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
577                memDepViolator = ld_inst;
578
579                ++lsqMemOrderViolation;
580
581                return std::make_shared<GenericISA::M5PanicFault>(
582                    "Detected fault with "
583                    "inst [sn:%lli] and [sn:%lli] at address %#x\n",
584                    inst->seqNum, ld_inst->seqNum, ld_eff_addr1);
585            }
586        }
587
588        incrLdIdx(load_idx);
589    }
590    return NoFault;
591}
592
593
594
595
596template <class Impl>
597Fault
598LSQUnit<Impl>::executeLoad(const DynInstPtr &inst)
599{
600    using namespace TheISA;
601    // Execute a specific load.
602    Fault load_fault = NoFault;
603
604    DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n",
605            inst->pcState(), inst->seqNum);
606
607    assert(!inst->isSquashed());
608
609    load_fault = inst->initiateAcc();
610
611    if (inst->isTranslationDelayed() &&
612        load_fault == NoFault)
613        return load_fault;
614
615    // If the instruction faulted or predicated false, then we need to send it
616    // along to commit without the instruction completing.
617    if (load_fault != NoFault || !inst->readPredicate()) {
618        // Send this instruction to commit, also make sure iew stage
619        // realizes there is activity.  Mark it as executed unless it
620        // is a strictly ordered load that needs to hit the head of
621        // commit.
622        if (!inst->readPredicate())
623            inst->forwardOldRegs();
624        DPRINTF(LSQUnit, "Load [sn:%lli] not executed from %s\n",
625                inst->seqNum,
626                (load_fault != NoFault ? "fault" : "predication"));
627        if (!(inst->hasRequest() && inst->strictlyOrdered()) ||
628            inst->isAtCommit()) {
629            inst->setExecuted();
630        }
631        iewStage->instToCommit(inst);
632        iewStage->activityThisCycle();
633    } else {
634        assert(inst->effAddrValid());
635        int load_idx = inst->lqIdx;
636        incrLdIdx(load_idx);
637
638        if (checkLoads)
639            return checkViolations(load_idx, inst);
640    }
641
642    return load_fault;
643}
644
645template <class Impl>
646Fault
647LSQUnit<Impl>::executeStore(const DynInstPtr &store_inst)
648{
649    using namespace TheISA;
650    // Make sure that a store exists.
651    assert(stores != 0);
652
653    int store_idx = store_inst->sqIdx;
654
655    DPRINTF(LSQUnit, "Executing store PC %s [sn:%lli]\n",
656            store_inst->pcState(), store_inst->seqNum);
657
658    assert(!store_inst->isSquashed());
659
660    // Check the recently completed loads to see if any match this store's
661    // address.  If so, then we have a memory ordering violation.
662    int load_idx = store_inst->lqIdx;
663
664    Fault store_fault = store_inst->initiateAcc();
665
666    if (store_inst->isTranslationDelayed() &&
667        store_fault == NoFault)
668        return store_fault;
669
670    if (!store_inst->readPredicate()) {
671        DPRINTF(LSQUnit, "Store [sn:%lli] not executed from predication\n",
672                store_inst->seqNum);
673        store_inst->forwardOldRegs();
674        return store_fault;
675    }
676
677    if (storeQueue[store_idx].size == 0) {
678        DPRINTF(LSQUnit,"Fault on Store PC %s, [sn:%lli], Size = 0\n",
679                store_inst->pcState(), store_inst->seqNum);
680
681        return store_fault;
682    }
683
684    assert(store_fault == NoFault);
685
686    if (store_inst->isStoreConditional()) {
687        // Store conditionals need to set themselves as able to
688        // writeback if we haven't had a fault by here.
689        storeQueue[store_idx].canWB = true;
690
691        ++storesToWB;
692    }
693
694    return checkViolations(load_idx, store_inst);
695
696}
697
698template <class Impl>
699void
700LSQUnit<Impl>::commitLoad()
701{
702    assert(loadQueue[loadHead]);
703
704    DPRINTF(LSQUnit, "Committing head load instruction, PC %s\n",
705            loadQueue[loadHead]->pcState());
706
707    loadQueue[loadHead] = NULL;
708
709    incrLdIdx(loadHead);
710
711    --loads;
712}
713
714template <class Impl>
715void
716LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
717{
718    assert(loads == 0 || loadQueue[loadHead]);
719
720    while (loads != 0 && loadQueue[loadHead]->seqNum <= youngest_inst) {
721        commitLoad();
722    }
723}
724
725template <class Impl>
726void
727LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
728{
729    assert(stores == 0 || storeQueue[storeHead].inst);
730
731    int store_idx = storeHead;
732
733    while (store_idx != storeTail) {
734        assert(storeQueue[store_idx].inst);
735        // Mark any stores that are now committed and have not yet
736        // been marked as able to write back.
737        if (!storeQueue[store_idx].canWB) {
738            if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
739                break;
740            }
741            DPRINTF(LSQUnit, "Marking store as able to write back, PC "
742                    "%s [sn:%lli]\n",
743                    storeQueue[store_idx].inst->pcState(),
744                    storeQueue[store_idx].inst->seqNum);
745
746            storeQueue[store_idx].canWB = true;
747
748            ++storesToWB;
749        }
750
751        incrStIdx(store_idx);
752    }
753}
754
755template <class Impl>
756void
757LSQUnit<Impl>::writebackPendingStore()
758{
759    if (hasPendingPkt) {
760        assert(pendingPkt != NULL);
761
762        // If the cache is blocked, this will store the packet for retry.
763        if (sendStore(pendingPkt)) {
764            storePostSend(pendingPkt);
765        }
766        pendingPkt = NULL;
767        hasPendingPkt = false;
768    }
769}
770
771template <class Impl>
772void
773LSQUnit<Impl>::writebackStores()
774{
775    // First writeback the second packet from any split store that didn't
776    // complete last cycle because there weren't enough cache ports available.
777    if (TheISA::HasUnalignedMemAcc) {
778        writebackPendingStore();
779    }
780
781    while (storesToWB > 0 &&
782           storeWBIdx != storeTail &&
783           storeQueue[storeWBIdx].inst &&
784           storeQueue[storeWBIdx].canWB &&
785           ((!needsTSO) || (!storeInFlight)) &&
786           usedStorePorts < cacheStorePorts) {
787
788        if (isStoreBlocked) {
789            DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
790                    " is blocked!\n");
791            break;
792        }
793
794        // Store didn't write any data so no need to write it back to
795        // memory.
796        if (storeQueue[storeWBIdx].size == 0) {
797            completeStore(storeWBIdx);
798
799            incrStIdx(storeWBIdx);
800
801            continue;
802        }
803
804        ++usedStorePorts;
805
806        if (storeQueue[storeWBIdx].inst->isDataPrefetch()) {
807            incrStIdx(storeWBIdx);
808
809            continue;
810        }
811
812        assert(storeQueue[storeWBIdx].req);
813        assert(!storeQueue[storeWBIdx].committed);
814
815        if (TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit) {
816            assert(storeQueue[storeWBIdx].sreqLow);
817            assert(storeQueue[storeWBIdx].sreqHigh);
818        }
819
820        DynInstPtr inst = storeQueue[storeWBIdx].inst;
821
822        RequestPtr &req = storeQueue[storeWBIdx].req;
823        const RequestPtr &sreqLow = storeQueue[storeWBIdx].sreqLow;
824        const RequestPtr &sreqHigh = storeQueue[storeWBIdx].sreqHigh;
825
826        storeQueue[storeWBIdx].committed = true;
827
828        assert(!inst->memData);
829        inst->memData = new uint8_t[req->getSize()];
830
831        if (storeQueue[storeWBIdx].isAllZeros)
832            memset(inst->memData, 0, req->getSize());
833        else
834            memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize());
835
836        PacketPtr data_pkt;
837        PacketPtr snd_data_pkt = NULL;
838
839        LSQSenderState *state = new LSQSenderState;
840        state->isLoad = false;
841        state->idx = storeWBIdx;
842        state->inst = inst;
843
844        if (!TheISA::HasUnalignedMemAcc || !storeQueue[storeWBIdx].isSplit) {
845
846            // Build a single data packet if the store isn't split.
847            data_pkt = Packet::createWrite(req);
848            data_pkt->dataStatic(inst->memData);
849            data_pkt->senderState = state;
850        } else {
851            // Create two packets if the store is split in two.
852            data_pkt = Packet::createWrite(sreqLow);
853            snd_data_pkt = Packet::createWrite(sreqHigh);
854
855            data_pkt->dataStatic(inst->memData);
856            snd_data_pkt->dataStatic(inst->memData + sreqLow->getSize());
857
858            data_pkt->senderState = state;
859            snd_data_pkt->senderState = state;
860
861            state->isSplit = true;
862            state->outstanding = 2;
863
864            // Can delete the main request now.
865            req = sreqLow;
866        }
867
868        DPRINTF(LSQUnit, "D-Cache: Writing back store idx:%i PC:%s "
869                "to Addr:%#x, data:%#x [sn:%lli]\n",
870                storeWBIdx, inst->pcState(),
871                req->getPaddr(), (int)*(inst->memData),
872                inst->seqNum);
873
874        // @todo: Remove this SC hack once the memory system handles it.
875        if (inst->isStoreConditional()) {
876            assert(!storeQueue[storeWBIdx].isSplit);
877            // Disable recording the result temporarily.  Writing to
878            // misc regs normally updates the result, but this is not
879            // the desired behavior when handling store conditionals.
880            inst->recordResult(false);
881            bool success = TheISA::handleLockedWrite(inst.get(), req, cacheBlockMask);
882            inst->recordResult(true);
883
884            if (!success) {
885                // Instantly complete this store.
886                DPRINTF(LSQUnit, "Store conditional [sn:%lli] failed.  "
887                        "Instantly completing it.\n",
888                        inst->seqNum);
889                WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this);
890                cpu->schedule(wb, curTick() + 1);
891                completeStore(storeWBIdx);
892                incrStIdx(storeWBIdx);
893                continue;
894            }
895        } else {
896            // Non-store conditionals do not need a writeback.
897            state->noWB = true;
898        }
899
900        bool split =
901            TheISA::HasUnalignedMemAcc && storeQueue[storeWBIdx].isSplit;
902
903        ThreadContext *thread = cpu->tcBase(lsqID);
904
905        if (req->isMmappedIpr()) {
906            assert(!inst->isStoreConditional());
907            TheISA::handleIprWrite(thread, data_pkt);
908            delete data_pkt;
909            if (split) {
910                assert(snd_data_pkt->req->isMmappedIpr());
911                TheISA::handleIprWrite(thread, snd_data_pkt);
912                delete snd_data_pkt;
913            }
914            delete state;
915            completeStore(storeWBIdx);
916            incrStIdx(storeWBIdx);
917        } else if (!sendStore(data_pkt)) {
918            DPRINTF(IEW, "D-Cache became blocked when writing [sn:%lli], will"
919                    "retry later\n",
920                    inst->seqNum);
921
922            // Need to store the second packet, if split.
923            if (split) {
924                state->pktToSend = true;
925                state->pendingPacket = snd_data_pkt;
926            }
927        } else {
928
929            // If split, try to send the second packet too
930            if (split) {
931                assert(snd_data_pkt);
932
933                // Ensure there are enough ports to use.
934                if (usedStorePorts < cacheStorePorts) {
935                    ++usedStorePorts;
936                    if (sendStore(snd_data_pkt)) {
937                        storePostSend(snd_data_pkt);
938                    } else {
939                        DPRINTF(IEW, "D-Cache became blocked when writing"
940                                " [sn:%lli] second packet, will retry later\n",
941                                inst->seqNum);
942                    }
943                } else {
944
945                    // Store the packet for when there's free ports.
946                    assert(pendingPkt == NULL);
947                    pendingPkt = snd_data_pkt;
948                    hasPendingPkt = true;
949                }
950            } else {
951
952                // Not a split store.
953                storePostSend(data_pkt);
954            }
955        }
956    }
957
958    // Not sure this should set it to 0.
959    usedStorePorts = 0;
960
961    assert(stores >= 0 && storesToWB >= 0);
962}
963
964/*template <class Impl>
965void
966LSQUnit<Impl>::removeMSHR(InstSeqNum seqNum)
967{
968    list<InstSeqNum>::iterator mshr_it = find(mshrSeqNums.begin(),
969                                              mshrSeqNums.end(),
970                                              seqNum);
971
972    if (mshr_it != mshrSeqNums.end()) {
973        mshrSeqNums.erase(mshr_it);
974        DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size());
975    }
976}*/
977
978template <class Impl>
979void
980LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
981{
982    DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
983            "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
984
985    int load_idx = loadTail;
986    decrLdIdx(load_idx);
987
988    while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
989        DPRINTF(LSQUnit,"Load Instruction PC %s squashed, "
990                "[sn:%lli]\n",
991                loadQueue[load_idx]->pcState(),
992                loadQueue[load_idx]->seqNum);
993
994        if (isStalled() && load_idx == stallingLoadIdx) {
995            stalled = false;
996            stallingStoreIsn = 0;
997            stallingLoadIdx = 0;
998        }
999
1000        // Clear the smart pointer to make sure it is decremented.
1001        loadQueue[load_idx]->setSquashed();
1002        loadQueue[load_idx] = NULL;
1003        --loads;
1004
1005        // Inefficient!
1006        loadTail = load_idx;
1007
1008        decrLdIdx(load_idx);
1009        ++lsqSquashedLoads;
1010    }
1011
1012    if (memDepViolator && squashed_num < memDepViolator->seqNum) {
1013        memDepViolator = NULL;
1014    }
1015
1016    int store_idx = storeTail;
1017    decrStIdx(store_idx);
1018
1019    while (stores != 0 &&
1020           storeQueue[store_idx].inst->seqNum > squashed_num) {
1021        // Instructions marked as can WB are already committed.
1022        if (storeQueue[store_idx].canWB) {
1023            break;
1024        }
1025
1026        DPRINTF(LSQUnit,"Store Instruction PC %s squashed, "
1027                "idx:%i [sn:%lli]\n",
1028                storeQueue[store_idx].inst->pcState(),
1029                store_idx, storeQueue[store_idx].inst->seqNum);
1030
1031        // I don't think this can happen.  It should have been cleared
1032        // by the stalling load.
1033        if (isStalled() &&
1034            storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
1035            panic("Is stalled should have been cleared by stalling load!\n");
1036            stalled = false;
1037            stallingStoreIsn = 0;
1038        }
1039
1040        // Clear the smart pointer to make sure it is decremented.
1041        storeQueue[store_idx].inst->setSquashed();
1042        storeQueue[store_idx].inst = NULL;
1043        storeQueue[store_idx].canWB = 0;
1044
1045        // Must delete request now that it wasn't handed off to
1046        // memory.  This is quite ugly.  @todo: Figure out the proper
1047        // place to really handle request deletes.
1048        storeQueue[store_idx].req.reset();
1049        if (TheISA::HasUnalignedMemAcc && storeQueue[store_idx].isSplit) {
1050            storeQueue[store_idx].sreqLow.reset();
1051            storeQueue[store_idx].sreqHigh.reset();
1052        }
1053
1054        --stores;
1055
1056        // Inefficient!
1057        storeTail = store_idx;
1058
1059        decrStIdx(store_idx);
1060        ++lsqSquashedStores;
1061    }
1062}
1063
1064template <class Impl>
1065void
1066LSQUnit<Impl>::storePostSend(PacketPtr pkt)
1067{
1068    if (isStalled() &&
1069        storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) {
1070        DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1071                "load idx:%i\n",
1072                stallingStoreIsn, stallingLoadIdx);
1073        stalled = false;
1074        stallingStoreIsn = 0;
1075        iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
1076    }
1077
1078    if (!storeQueue[storeWBIdx].inst->isStoreConditional()) {
1079        // The store is basically completed at this time. This
1080        // only works so long as the checker doesn't try to
1081        // verify the value in memory for stores.
1082        storeQueue[storeWBIdx].inst->setCompleted();
1083
1084        if (cpu->checker) {
1085            cpu->checker->verify(storeQueue[storeWBIdx].inst);
1086        }
1087    }
1088
1089    if (needsTSO) {
1090        storeInFlight = true;
1091    }
1092
1093    incrStIdx(storeWBIdx);
1094}
1095
1096template <class Impl>
1097void
1098LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
1099{
1100    iewStage->wakeCPU();
1101
1102    // Squashed instructions do not need to complete their access.
1103    if (inst->isSquashed()) {
1104        assert(!inst->isStore());
1105        ++lsqIgnoredResponses;
1106        return;
1107    }
1108
1109    if (!inst->isExecuted()) {
1110        inst->setExecuted();
1111
1112        if (inst->fault == NoFault) {
1113            // Complete access to copy data to proper place.
1114            inst->completeAcc(pkt);
1115        } else {
1116            // If the instruction has an outstanding fault, we cannot complete
1117            // the access as this discards the current fault.
1118
1119            // If we have an outstanding fault, the fault should only be of
1120            // type ReExec.
1121            assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr);
1122
1123            DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access "
1124                    "due to pending fault.\n", inst->seqNum);
1125        }
1126    }
1127
1128    // Need to insert instruction into queue to commit
1129    iewStage->instToCommit(inst);
1130
1131    iewStage->activityThisCycle();
1132
1133    // see if this load changed the PC
1134    iewStage->checkMisprediction(inst);
1135}
1136
1137template <class Impl>
1138void
1139LSQUnit<Impl>::completeStore(int store_idx)
1140{
1141    assert(storeQueue[store_idx].inst);
1142    storeQueue[store_idx].completed = true;
1143    --storesToWB;
1144    // A bit conservative because a store completion may not free up entries,
1145    // but hopefully avoids two store completions in one cycle from making
1146    // the CPU tick twice.
1147    cpu->wakeCPU();
1148    cpu->activityThisCycle();
1149
1150    if (store_idx == storeHead) {
1151        do {
1152            incrStIdx(storeHead);
1153
1154            --stores;
1155        } while (storeQueue[storeHead].completed &&
1156                 storeHead != storeTail);
1157
1158        iewStage->updateLSQNextCycle = true;
1159    }
1160
1161    DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
1162            "idx:%i\n",
1163            storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
1164
1165#if TRACING_ON
1166    if (DTRACE(O3PipeView)) {
1167        storeQueue[store_idx].inst->storeTick =
1168            curTick() - storeQueue[store_idx].inst->fetchTick;
1169    }
1170#endif
1171
1172    if (isStalled() &&
1173        storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
1174        DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] "
1175                "load idx:%i\n",
1176                stallingStoreIsn, stallingLoadIdx);
1177        stalled = false;
1178        stallingStoreIsn = 0;
1179        iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
1180    }
1181
1182    storeQueue[store_idx].inst->setCompleted();
1183
1184    if (needsTSO) {
1185        storeInFlight = false;
1186    }
1187
1188    // Tell the checker we've completed this instruction.  Some stores
1189    // may get reported twice to the checker, but the checker can
1190    // handle that case.
1191
1192    // Store conditionals cannot be sent to the checker yet, they have
1193    // to update the misc registers first which should take place
1194    // when they commit
1195    if (cpu->checker && !storeQueue[store_idx].inst->isStoreConditional()) {
1196        cpu->checker->verify(storeQueue[store_idx].inst);
1197    }
1198}
1199
1200template <class Impl>
1201bool
1202LSQUnit<Impl>::sendStore(PacketPtr data_pkt)
1203{
1204    if (!dcachePort->sendTimingReq(data_pkt)) {
1205        // Need to handle becoming blocked on a store.
1206        isStoreBlocked = true;
1207        ++lsqCacheBlocked;
1208        assert(retryPkt == NULL);
1209        retryPkt = data_pkt;
1210        return false;
1211    }
1212    return true;
1213}
1214
1215template <class Impl>
1216void
1217LSQUnit<Impl>::recvRetry()
1218{
1219    if (isStoreBlocked) {
1220        DPRINTF(LSQUnit, "Receiving retry: store blocked\n");
1221        assert(retryPkt != NULL);
1222
1223        LSQSenderState *state =
1224            dynamic_cast<LSQSenderState *>(retryPkt->senderState);
1225
1226        if (dcachePort->sendTimingReq(retryPkt)) {
1227            // Don't finish the store unless this is the last packet.
1228            if (!TheISA::HasUnalignedMemAcc || !state->pktToSend ||
1229                    state->pendingPacket == retryPkt) {
1230                state->pktToSend = false;
1231                storePostSend(retryPkt);
1232            }
1233            retryPkt = NULL;
1234            isStoreBlocked = false;
1235
1236            // Send any outstanding packet.
1237            if (TheISA::HasUnalignedMemAcc && state->pktToSend) {
1238                assert(state->pendingPacket);
1239                if (sendStore(state->pendingPacket)) {
1240                    storePostSend(state->pendingPacket);
1241                }
1242            }
1243        } else {
1244            // Still blocked!
1245            ++lsqCacheBlocked;
1246        }
1247    }
1248}
1249
1250template <class Impl>
1251inline void
1252LSQUnit<Impl>::incrStIdx(int &store_idx) const
1253{
1254    if (++store_idx >= SQEntries)
1255        store_idx = 0;
1256}
1257
1258template <class Impl>
1259inline void
1260LSQUnit<Impl>::decrStIdx(int &store_idx) const
1261{
1262    if (--store_idx < 0)
1263        store_idx += SQEntries;
1264}
1265
1266template <class Impl>
1267inline void
1268LSQUnit<Impl>::incrLdIdx(int &load_idx) const
1269{
1270    if (++load_idx >= LQEntries)
1271        load_idx = 0;
1272}
1273
1274template <class Impl>
1275inline void
1276LSQUnit<Impl>::decrLdIdx(int &load_idx) const
1277{
1278    if (--load_idx < 0)
1279        load_idx += LQEntries;
1280}
1281
1282template <class Impl>
1283void
1284LSQUnit<Impl>::dumpInsts() const
1285{
1286    cprintf("Load store queue: Dumping instructions.\n");
1287    cprintf("Load queue size: %i\n", loads);
1288    cprintf("Load queue: ");
1289
1290    int load_idx = loadHead;
1291
1292    while (load_idx != loadTail && loadQueue[load_idx]) {
1293        const DynInstPtr &inst(loadQueue[load_idx]);
1294        cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum);
1295
1296        incrLdIdx(load_idx);
1297    }
1298    cprintf("\n");
1299
1300    cprintf("Store queue size: %i\n", stores);
1301    cprintf("Store queue: ");
1302
1303    int store_idx = storeHead;
1304
1305    while (store_idx != storeTail && storeQueue[store_idx].inst) {
1306        const DynInstPtr &inst(storeQueue[store_idx].inst);
1307        cprintf("%s.[sn:%i] ", inst->pcState(), inst->seqNum);
1308
1309        incrStIdx(store_idx);
1310    }
1311
1312    cprintf("\n");
1313}
1314
1315#endif//__CPU_O3_LSQ_UNIT_IMPL_HH__
1316