lsq.cc revision 14297
1/*
2 * Copyright (c) 2013-2014,2017-2018 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andrew Bardsley
38 */
39
40#include "cpu/minor/lsq.hh"
41
42#include <iomanip>
43#include <sstream>
44
45#include "arch/locked_mem.hh"
46#include "arch/mmapped_ipr.hh"
47#include "base/logging.hh"
48#include "cpu/minor/cpu.hh"
49#include "cpu/minor/exec_context.hh"
50#include "cpu/minor/execute.hh"
51#include "cpu/minor/pipeline.hh"
52#include "cpu/utils.hh"
53#include "debug/Activity.hh"
54#include "debug/MinorMem.hh"
55
56namespace Minor
57{
58
59LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_,
60    PacketDataPtr data_, uint64_t *res_) :
61    SenderState(),
62    port(port_),
63    inst(inst_),
64    isLoad(isLoad_),
65    data(data_),
66    packet(NULL),
67    request(),
68    res(res_),
69    skipped(false),
70    issuedToMemory(false),
71    isTranslationDelayed(false),
72    state(NotIssued)
73{
74    request = std::make_shared<Request>();
75}
76
77void
78LSQ::LSQRequest::tryToSuppressFault()
79{
80    SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
81    TheISA::PCState old_pc = thread.pcState();
82    ExecContext context(port.cpu, thread, port.execute, inst);
83    Fault M5_VAR_USED fault = inst->translationFault;
84
85    // Give the instruction a chance to suppress a translation fault
86    inst->translationFault = inst->staticInst->initiateAcc(&context, nullptr);
87    if (inst->translationFault == NoFault) {
88        DPRINTFS(MinorMem, (&port),
89                 "Translation fault suppressed for inst:%s\n", *inst);
90    } else {
91        assert(inst->translationFault == fault);
92    }
93    thread.pcState(old_pc);
94}
95
96void
97LSQ::LSQRequest::completeDisabledMemAccess()
98{
99    DPRINTFS(MinorMem, (&port), "Complete disabled mem access for inst:%s\n",
100             *inst);
101
102    SimpleThread &thread = *port.cpu.threads[inst->id.threadId];
103    TheISA::PCState old_pc = thread.pcState();
104
105    ExecContext context(port.cpu, thread, port.execute, inst);
106
107    context.setMemAccPredicate(false);
108    inst->staticInst->completeAcc(nullptr, &context, inst->traceData);
109
110    thread.pcState(old_pc);
111}
112
113void
114LSQ::LSQRequest::disableMemAccess()
115{
116    port.cpu.threads[inst->id.threadId]->setMemAccPredicate(false);
117    DPRINTFS(MinorMem, (&port), "Disable mem access for inst:%s\n", *inst);
118}
119
120LSQ::AddrRangeCoverage
121LSQ::LSQRequest::containsAddrRangeOf(
122    Addr req1_addr, unsigned int req1_size,
123    Addr req2_addr, unsigned int req2_size)
124{
125    /* 'end' here means the address of the byte just past the request
126     *  blocks */
127    Addr req2_end_addr = req2_addr + req2_size;
128    Addr req1_end_addr = req1_addr + req1_size;
129
130    AddrRangeCoverage ret;
131
132    if (req1_addr >= req2_end_addr || req1_end_addr <= req2_addr)
133        ret = NoAddrRangeCoverage;
134    else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr)
135        ret = FullAddrRangeCoverage;
136    else
137        ret = PartialAddrRangeCoverage;
138
139    return ret;
140}
141
142LSQ::AddrRangeCoverage
143LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request)
144{
145    return containsAddrRangeOf(request->getPaddr(), request->getSize(),
146        other_request->request->getPaddr(), other_request->request->getSize());
147}
148
149bool
150LSQ::LSQRequest::isBarrier()
151{
152    return inst->isInst() && inst->staticInst->isMemBarrier();
153}
154
155bool
156LSQ::LSQRequest::needsToBeSentToStoreBuffer()
157{
158    return state == StoreToStoreBuffer;
159}
160
161void
162LSQ::LSQRequest::setState(LSQRequestState new_state)
163{
164    DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:"
165        " %s\n", state, new_state, *inst);
166    state = new_state;
167}
168
169bool
170LSQ::LSQRequest::isComplete() const
171{
172    /* @todo, There is currently only one 'completed' state.  This
173     *  may not be a good choice */
174    return state == Complete;
175}
176
177void
178LSQ::LSQRequest::reportData(std::ostream &os) const
179{
180    os << (isLoad ? 'R' : 'W') << ';';
181    inst->reportData(os);
182    os << ';' << state;
183}
184
185std::ostream &
186operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage)
187{
188    switch (coverage) {
189      case LSQ::PartialAddrRangeCoverage:
190        os << "PartialAddrRangeCoverage";
191        break;
192      case LSQ::FullAddrRangeCoverage:
193        os << "FullAddrRangeCoverage";
194        break;
195      case LSQ::NoAddrRangeCoverage:
196        os << "NoAddrRangeCoverage";
197        break;
198      default:
199        os << "AddrRangeCoverage-" << static_cast<int>(coverage);
200        break;
201    }
202    return os;
203}
204
205std::ostream &
206operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state)
207{
208    switch (state) {
209      case LSQ::LSQRequest::NotIssued:
210        os << "NotIssued";
211        break;
212      case LSQ::LSQRequest::InTranslation:
213        os << "InTranslation";
214        break;
215      case LSQ::LSQRequest::Translated:
216        os << "Translated";
217        break;
218      case LSQ::LSQRequest::Failed:
219        os << "Failed";
220        break;
221      case LSQ::LSQRequest::RequestIssuing:
222        os << "RequestIssuing";
223        break;
224      case LSQ::LSQRequest::StoreToStoreBuffer:
225        os << "StoreToStoreBuffer";
226        break;
227      case LSQ::LSQRequest::StoreInStoreBuffer:
228        os << "StoreInStoreBuffer";
229        break;
230      case LSQ::LSQRequest::StoreBufferIssuing:
231        os << "StoreBufferIssuing";
232        break;
233      case LSQ::LSQRequest::RequestNeedsRetry:
234        os << "RequestNeedsRetry";
235        break;
236      case LSQ::LSQRequest::StoreBufferNeedsRetry:
237        os << "StoreBufferNeedsRetry";
238        break;
239      case LSQ::LSQRequest::Complete:
240        os << "Complete";
241        break;
242      default:
243        os << "LSQRequestState-" << static_cast<int>(state);
244        break;
245    }
246    return os;
247}
248
249void
250LSQ::clearMemBarrier(MinorDynInstPtr inst)
251{
252    bool is_last_barrier =
253        inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId];
254
255    DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n",
256        (is_last_barrier ? "last" : "a"), *inst);
257
258    if (is_last_barrier)
259        lastMemBarrier[inst->id.threadId] = 0;
260}
261
262void
263LSQ::SingleDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
264                               ThreadContext *tc, BaseTLB::Mode mode)
265{
266    port.numAccessesInDTLB--;
267
268    DPRINTFS(MinorMem, (&port), "Received translation response for"
269             " request: %s delayed:%d %s\n", *inst, isTranslationDelayed,
270             fault_ != NoFault ? fault_->name() : "");
271
272    if (fault_ != NoFault) {
273        inst->translationFault = fault_;
274        if (isTranslationDelayed) {
275            tryToSuppressFault();
276            if (inst->translationFault == NoFault) {
277                completeDisabledMemAccess();
278                setState(Complete);
279            }
280        }
281        setState(Translated);
282    } else {
283        setState(Translated);
284        makePacket();
285    }
286    port.tryToSendToTransfers(this);
287
288    /* Let's try and wake up the processor for the next cycle */
289    port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
290}
291
292void
293LSQ::SingleDataRequest::startAddrTranslation()
294{
295    ThreadContext *thread = port.cpu.getContext(
296        inst->id.threadId);
297
298    const auto &byteEnable = request->getByteEnable();
299    if (byteEnable.size() == 0 ||
300        isAnyActiveElement(byteEnable.cbegin(), byteEnable.cend())) {
301        port.numAccessesInDTLB++;
302
303        setState(LSQ::LSQRequest::InTranslation);
304
305        DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n");
306        /* Submit the translation request.  The response will come through
307         *  finish/markDelayed on the LSQRequest as it bears the Translation
308         *  interface */
309        thread->getDTBPtr()->translateTiming(
310            request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write));
311    } else {
312        disableMemAccess();
313        setState(LSQ::LSQRequest::Complete);
314    }
315}
316
317void
318LSQ::SingleDataRequest::retireResponse(PacketPtr packet_)
319{
320    DPRINTFS(MinorMem, (&port), "Retiring packet\n");
321    packet = packet_;
322    packetInFlight = false;
323    setState(Complete);
324}
325
326void
327LSQ::SplitDataRequest::finish(const Fault &fault_, const RequestPtr &request_,
328                              ThreadContext *tc, BaseTLB::Mode mode)
329{
330    port.numAccessesInDTLB--;
331
332    unsigned int M5_VAR_USED expected_fragment_index =
333        numTranslatedFragments;
334
335    numInTranslationFragments--;
336    numTranslatedFragments++;
337
338    DPRINTFS(MinorMem, (&port), "Received translation response for fragment"
339             " %d of request: %s delayed:%d %s\n", expected_fragment_index,
340             *inst, isTranslationDelayed,
341             fault_ != NoFault ? fault_->name() : "");
342
343    assert(request_ == fragmentRequests[expected_fragment_index]);
344
345    /* Wake up next cycle to get things going again in case the
346     *  tryToSendToTransfers does take */
347    port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
348
349    if (fault_ != NoFault) {
350        /* tryToSendToTransfers will handle the fault */
351        inst->translationFault = fault_;
352
353        DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:"
354            " %d of request: %s\n",
355            expected_fragment_index, *inst);
356
357        if (expected_fragment_index > 0 || isTranslationDelayed)
358            tryToSuppressFault();
359        if (expected_fragment_index == 0) {
360            if (isTranslationDelayed && inst->translationFault == NoFault) {
361                completeDisabledMemAccess();
362                setState(Complete);
363            } else {
364                setState(Translated);
365            }
366        } else if (inst->translationFault == NoFault) {
367            setState(Translated);
368            numTranslatedFragments--;
369            makeFragmentPackets();
370        } else {
371            setState(Translated);
372        }
373        port.tryToSendToTransfers(this);
374    } else if (numTranslatedFragments == numFragments) {
375        makeFragmentPackets();
376        setState(Translated);
377        port.tryToSendToTransfers(this);
378    } else {
379        /* Avoid calling translateTiming from within ::finish */
380        assert(!translationEvent.scheduled());
381        port.cpu.schedule(translationEvent, curTick());
382    }
383}
384
385LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_,
386    bool isLoad_, PacketDataPtr data_, uint64_t *res_) :
387    LSQRequest(port_, inst_, isLoad_, data_, res_),
388    translationEvent([this]{ sendNextFragmentToTranslation(); },
389                     "translationEvent"),
390    numFragments(0),
391    numInTranslationFragments(0),
392    numTranslatedFragments(0),
393    numIssuedFragments(0),
394    numRetiredFragments(0),
395    fragmentRequests(),
396    fragmentPackets()
397{
398    /* Don't know how many elements are needed until the request is
399     *  populated by the caller. */
400}
401
402LSQ::SplitDataRequest::~SplitDataRequest()
403{
404    for (auto i = fragmentPackets.begin();
405         i != fragmentPackets.end(); i++)
406    {
407        delete *i;
408    }
409}
410
411void
412LSQ::SplitDataRequest::makeFragmentRequests()
413{
414    Addr base_addr = request->getVaddr();
415    unsigned int whole_size = request->getSize();
416    unsigned int line_width = port.lineWidth;
417
418    unsigned int fragment_size;
419    Addr fragment_addr;
420
421    std::vector<bool> fragment_write_byte_en;
422
423    /* Assume that this transfer is across potentially many block snap
424     * boundaries:
425     *
426     * |      _|________|________|________|___     |
427     * |     |0| 1      | 2      | 3      | 4 |    |
428     * |     |_|________|________|________|___|    |
429     * |       |        |        |        |        |
430     *
431     *  The first transfer (0) can be up to lineWidth in size.
432     *  All the middle transfers (1-3) are lineWidth in size
433     *  The last transfer (4) can be from zero to lineWidth - 1 in size
434     */
435    unsigned int first_fragment_offset =
436        addrBlockOffset(base_addr, line_width);
437    unsigned int last_fragment_size =
438        addrBlockOffset(base_addr + whole_size, line_width);
439    unsigned int first_fragment_size =
440        line_width - first_fragment_offset;
441
442    unsigned int middle_fragments_total_size =
443        whole_size - (first_fragment_size + last_fragment_size);
444
445    assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0);
446
447    unsigned int middle_fragment_count =
448        middle_fragments_total_size / line_width;
449
450    numFragments = 1 /* first */ + middle_fragment_count +
451        (last_fragment_size == 0 ? 0 : 1);
452
453    DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests."
454        " First fragment size: %d Last fragment size: %d\n",
455        numFragments, first_fragment_size,
456        (last_fragment_size == 0 ? line_width : last_fragment_size));
457
458    assert(((middle_fragment_count * line_width) +
459        first_fragment_size + last_fragment_size) == whole_size);
460
461    fragment_addr = base_addr;
462    fragment_size = first_fragment_size;
463
464    /* Just past the last address in the request */
465    Addr end_addr = base_addr + whole_size;
466
467    auto& byte_enable = request->getByteEnable();
468    unsigned int num_disabled_fragments = 0;
469
470    for (unsigned int fragment_index = 0; fragment_index < numFragments;
471         fragment_index++)
472    {
473        bool M5_VAR_USED is_last_fragment = false;
474
475        if (fragment_addr == base_addr) {
476            /* First fragment */
477            fragment_size = first_fragment_size;
478        } else {
479            if ((fragment_addr + line_width) > end_addr) {
480                /* Adjust size of last fragment */
481                fragment_size = end_addr - fragment_addr;
482                is_last_fragment = true;
483            } else {
484                /* Middle fragments */
485                fragment_size = line_width;
486            }
487        }
488
489        RequestPtr fragment = std::make_shared<Request>();
490        bool disabled_fragment = false;
491
492        fragment->setContext(request->contextId());
493        if (byte_enable.empty()) {
494            fragment->setVirt(0 /* asid */,
495                fragment_addr, fragment_size, request->getFlags(),
496                request->masterId(),
497                request->getPC());
498        } else {
499            // Set up byte-enable mask for the current fragment
500            auto it_start = byte_enable.begin() +
501                (fragment_addr - base_addr);
502            auto it_end = byte_enable.begin() +
503                (fragment_addr - base_addr) + fragment_size;
504            if (isAnyActiveElement(it_start, it_end)) {
505                fragment->setVirt(0 /* asid */,
506                    fragment_addr, fragment_size, request->getFlags(),
507                    request->masterId(),
508                    request->getPC());
509                fragment->setByteEnable(std::vector<bool>(it_start, it_end));
510            } else {
511                disabled_fragment = true;
512            }
513        }
514
515        if (!disabled_fragment) {
516            DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x"
517                " size: %d (whole request addr: 0x%x size: %d) %s\n",
518                fragment_addr, fragment_size, base_addr, whole_size,
519                (is_last_fragment ? "last fragment" : ""));
520
521            fragmentRequests.push_back(fragment);
522        } else {
523            num_disabled_fragments++;
524        }
525
526        fragment_addr += fragment_size;
527    }
528    assert(numFragments >= num_disabled_fragments);
529    numFragments -= num_disabled_fragments;
530}
531
532void
533LSQ::SplitDataRequest::makeFragmentPackets()
534{
535    assert(numTranslatedFragments > 0);
536    Addr base_addr = request->getVaddr();
537
538    DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst);
539
540    for (unsigned int fragment_index = 0;
541         fragment_index < numTranslatedFragments;
542         fragment_index++)
543    {
544        RequestPtr fragment = fragmentRequests[fragment_index];
545
546        DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s"
547            " (%d, 0x%x)\n",
548            fragment_index, *inst,
549            (fragment->hasPaddr() ? "has paddr" : "no paddr"),
550            (fragment->hasPaddr() ? fragment->getPaddr() : 0));
551
552        Addr fragment_addr = fragment->getVaddr();
553        unsigned int fragment_size = fragment->getSize();
554
555        uint8_t *request_data = NULL;
556
557        if (!isLoad) {
558            /* Split data for Packets.  Will become the property of the
559             *  outgoing Packets */
560            request_data = new uint8_t[fragment_size];
561            std::memcpy(request_data, data + (fragment_addr - base_addr),
562                fragment_size);
563        }
564
565        assert(fragment->hasPaddr());
566
567        PacketPtr fragment_packet =
568            makePacketForRequest(fragment, isLoad, this, request_data);
569
570        fragmentPackets.push_back(fragment_packet);
571        /* Accumulate flags in parent request */
572        request->setFlags(fragment->getFlags());
573    }
574
575    /* Might as well make the overall/response packet here */
576    /* Get the physical address for the whole request/packet from the first
577     *  fragment */
578    request->setPaddr(fragmentRequests[0]->getPaddr());
579    makePacket();
580}
581
582void
583LSQ::SplitDataRequest::startAddrTranslation()
584{
585    makeFragmentRequests();
586
587    if (numFragments > 0) {
588        setState(LSQ::LSQRequest::InTranslation);
589        numInTranslationFragments = 0;
590        numTranslatedFragments = 0;
591
592        /* @todo, just do these in sequence for now with
593         * a loop of:
594         * do {
595         *  sendNextFragmentToTranslation ; translateTiming ; finish
596         * } while (numTranslatedFragments != numFragments);
597         */
598
599        /* Do first translation */
600        sendNextFragmentToTranslation();
601    } else {
602        disableMemAccess();
603        setState(LSQ::LSQRequest::Complete);
604    }
605}
606
607PacketPtr
608LSQ::SplitDataRequest::getHeadPacket()
609{
610    assert(numIssuedFragments < numTranslatedFragments);
611
612    return fragmentPackets[numIssuedFragments];
613}
614
615void
616LSQ::SplitDataRequest::stepToNextPacket()
617{
618    assert(numIssuedFragments < numTranslatedFragments);
619
620    numIssuedFragments++;
621}
622
623void
624LSQ::SplitDataRequest::retireResponse(PacketPtr response)
625{
626    assert(inst->translationFault == NoFault);
627    assert(numRetiredFragments < numTranslatedFragments);
628
629    DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d"
630        " offset: 0x%x (retired fragment num: %d)\n",
631        response->req->getVaddr(), response->req->getSize(),
632        request->getVaddr() - response->req->getVaddr(),
633        numRetiredFragments);
634
635    numRetiredFragments++;
636
637    if (skipped) {
638        /* Skip because we already knew the request had faulted or been
639         *  skipped */
640        DPRINTFS(MinorMem, (&port), "Skipping this fragment\n");
641    } else if (response->isError()) {
642        /* Mark up the error and leave to execute to handle it */
643        DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n");
644        setSkipped();
645        packet->copyError(response);
646    } else {
647        if (isLoad) {
648            if (!data) {
649                /* For a split transfer, a Packet must be constructed
650                 *  to contain all returning data.  This is that packet's
651                 *  data */
652                data = new uint8_t[request->getSize()];
653            }
654
655            /* Populate the portion of the overall response data represented
656             *  by the response fragment */
657            std::memcpy(
658                data + (response->req->getVaddr() - request->getVaddr()),
659                response->getConstPtr<uint8_t>(),
660                response->req->getSize());
661        }
662    }
663
664    /* Complete early if we're skipping are no more in-flight accesses */
665    if (skipped && !hasPacketsInMemSystem()) {
666        DPRINTFS(MinorMem, (&port), "Completed skipped burst\n");
667        setState(Complete);
668        if (packet->needsResponse())
669            packet->makeResponse();
670    }
671
672    if (numRetiredFragments == numTranslatedFragments)
673        setState(Complete);
674
675    if (!skipped && isComplete()) {
676        DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL);
677
678        DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d"
679             " needsResponse: %d packetSize: %s requestSize: %s responseSize:"
680             " %s\n", packet->isRead(), packet->isWrite(),
681             packet->needsResponse(), packet->getSize(), request->getSize(),
682             response->getSize());
683
684        /* A request can become complete by several paths, this is a sanity
685         *  check to make sure the packet's data is created */
686        if (!data) {
687            data = new uint8_t[request->getSize()];
688        }
689
690        if (isLoad) {
691            DPRINTFS(MinorMem, (&port), "Copying read data\n");
692            std::memcpy(packet->getPtr<uint8_t>(), data, request->getSize());
693        }
694        packet->makeResponse();
695    }
696
697    /* Packets are all deallocated together in ~SplitLSQRequest */
698}
699
700void
701LSQ::SplitDataRequest::sendNextFragmentToTranslation()
702{
703    unsigned int fragment_index = numTranslatedFragments;
704
705    ThreadContext *thread = port.cpu.getContext(
706        inst->id.threadId);
707
708    DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n",
709        fragment_index);
710
711    port.numAccessesInDTLB++;
712    numInTranslationFragments++;
713
714    thread->getDTBPtr()->translateTiming(
715        fragmentRequests[fragment_index], thread, this, (isLoad ?
716        BaseTLB::Read : BaseTLB::Write));
717}
718
719bool
720LSQ::StoreBuffer::canInsert() const
721{
722    /* @todo, support store amalgamation */
723    return slots.size() < numSlots;
724}
725
726void
727LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request)
728{
729    auto found = std::find(slots.begin(), slots.end(), request);
730
731    if (found != slots.end()) {
732        DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n",
733            request, *found, *(request->inst));
734        slots.erase(found);
735
736        delete request;
737    }
738}
739
740void
741LSQ::StoreBuffer::insert(LSQRequestPtr request)
742{
743    if (!canInsert()) {
744        warn("%s: store buffer insertion without space to insert from"
745            " inst: %s\n", name(), *(request->inst));
746    }
747
748    DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request);
749
750    numUnissuedAccesses++;
751
752    if (request->state != LSQRequest::Complete)
753        request->setState(LSQRequest::StoreInStoreBuffer);
754
755    slots.push_back(request);
756
757    /* Let's try and wake up the processor for the next cycle to step
758     *  the store buffer */
759    lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
760}
761
762LSQ::AddrRangeCoverage
763LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request,
764    unsigned int &found_slot)
765{
766    unsigned int slot_index = slots.size() - 1;
767    auto i = slots.rbegin();
768    AddrRangeCoverage ret = NoAddrRangeCoverage;
769
770    /* Traverse the store buffer in reverse order (most to least recent)
771     *  and try to find a slot whose address range overlaps this request */
772    while (ret == NoAddrRangeCoverage && i != slots.rend()) {
773        LSQRequestPtr slot = *i;
774
775        /* Cache maintenance instructions go down via the store path but
776         * they carry no data and they shouldn't be considered
777         * for forwarding */
778        if (slot->packet &&
779            slot->inst->id.threadId == request->inst->id.threadId &&
780            !slot->packet->req->isCacheMaintenance()) {
781            AddrRangeCoverage coverage = slot->containsAddrRangeOf(request);
782
783            if (coverage != NoAddrRangeCoverage) {
784                DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:"
785                    " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n",
786                    slot_index, coverage,
787                    request->request->getPaddr(), request->request->getSize(),
788                    slot->request->getPaddr(), slot->request->getSize());
789
790                found_slot = slot_index;
791                ret = coverage;
792            }
793        }
794
795        i++;
796        slot_index--;
797    }
798
799    return ret;
800}
801
802/** Fill the given packet with appropriate date from slot slot_number */
803void
804LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load,
805    unsigned int slot_number)
806{
807    assert(slot_number < slots.size());
808    assert(load->packet);
809    assert(load->isLoad);
810
811    LSQRequestPtr store = slots[slot_number];
812
813    assert(store->packet);
814    assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage);
815
816    Addr load_addr = load->request->getPaddr();
817    Addr store_addr = store->request->getPaddr();
818    Addr addr_offset = load_addr - store_addr;
819
820    unsigned int load_size = load->request->getSize();
821
822    DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer"
823        " slot: %d addr: 0x%x addressOffset: 0x%x\n",
824        load_size, load_addr, slot_number,
825        store_addr, addr_offset);
826
827    void *load_packet_data = load->packet->getPtr<void>();
828    void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset;
829
830    std::memcpy(load_packet_data, store_packet_data, load_size);
831}
832
833void
834LSQ::StoreBuffer::countIssuedStore(LSQRequestPtr request)
835{
836    /* Barriers are accounted for as they are cleared from
837     *  the queue, not after their transfers are complete */
838    if (!request->isBarrier())
839        numUnissuedAccesses--;
840}
841
842void
843LSQ::StoreBuffer::step()
844{
845    DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n",
846        numUnissuedAccesses);
847
848    if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) {
849        /* Clear all the leading barriers */
850        while (!slots.empty() &&
851            slots.front()->isComplete() && slots.front()->isBarrier())
852        {
853            LSQRequestPtr barrier = slots.front();
854
855            DPRINTF(MinorMem, "Clearing barrier for inst: %s\n",
856                *(barrier->inst));
857
858            numUnissuedAccesses--;
859            lsq.clearMemBarrier(barrier->inst);
860            slots.pop_front();
861
862            delete barrier;
863        }
864
865        auto i = slots.begin();
866        bool issued = true;
867        unsigned int issue_count = 0;
868
869        /* Skip trying if the memory system is busy */
870        if (lsq.state == LSQ::MemoryNeedsRetry)
871            issued = false;
872
873        /* Try to issue all stores in order starting from the head
874         *  of the queue.  Responses are allowed to be retired
875         *  out of order */
876        while (issued &&
877            issue_count < storeLimitPerCycle &&
878            lsq.canSendToMemorySystem() &&
879            i != slots.end())
880        {
881            LSQRequestPtr request = *i;
882
883            DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d"
884                " state: %s\n",
885                *(request->inst), request->sentAllPackets(),
886                request->state);
887
888            if (request->isBarrier() && request->isComplete()) {
889                /* Give up at barriers */
890                issued = false;
891            } else if (!(request->state == LSQRequest::StoreBufferIssuing &&
892                request->sentAllPackets()))
893            {
894                DPRINTF(MinorMem, "Trying to send request: %s to memory"
895                    " system\n", *(request->inst));
896
897                if (lsq.tryToSend(request)) {
898                    countIssuedStore(request);
899                    issue_count++;
900                } else {
901                    /* Don't step on to the next store buffer entry if this
902                     *  one hasn't issued all its packets as the store
903                     *  buffer must still enforce ordering */
904                    issued = false;
905                }
906            }
907            i++;
908        }
909    }
910}
911
912void
913LSQ::completeMemBarrierInst(MinorDynInstPtr inst,
914    bool committed)
915{
916    if (committed) {
917        /* Not already sent to the store buffer as a store request? */
918        if (!inst->inStoreBuffer) {
919            /* Insert an entry into the store buffer to tick off barriers
920             *  until there are none in flight */
921            storeBuffer.insert(new BarrierDataRequest(*this, inst));
922        }
923    } else {
924        /* Clear the barrier anyway if it wasn't actually committed */
925        clearMemBarrier(inst);
926    }
927}
928
929void
930LSQ::StoreBuffer::minorTrace() const
931{
932    unsigned int size = slots.size();
933    unsigned int i = 0;
934    std::ostringstream os;
935
936    while (i < size) {
937        LSQRequestPtr request = slots[i];
938
939        request->reportData(os);
940
941        i++;
942        if (i < numSlots)
943            os << ',';
944    }
945
946    while (i < numSlots) {
947        os << '-';
948
949        i++;
950        if (i < numSlots)
951            os << ',';
952    }
953
954    MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(),
955        numUnissuedAccesses);
956}
957
958void
959LSQ::tryToSendToTransfers(LSQRequestPtr request)
960{
961    if (state == MemoryNeedsRetry) {
962        DPRINTF(MinorMem, "Request needs retry, not issuing to"
963            " memory until retry arrives\n");
964        return;
965    }
966
967    if (request->state == LSQRequest::InTranslation) {
968        DPRINTF(MinorMem, "Request still in translation, not issuing to"
969            " memory\n");
970        return;
971    }
972
973    assert(request->state == LSQRequest::Translated ||
974        request->state == LSQRequest::RequestIssuing ||
975        request->state == LSQRequest::Failed ||
976        request->state == LSQRequest::Complete);
977
978    if (requests.empty() || requests.front() != request) {
979        DPRINTF(MinorMem, "Request not at front of requests queue, can't"
980            " issue to memory\n");
981        return;
982    }
983
984    if (transfers.unreservedRemainingSpace() == 0) {
985        DPRINTF(MinorMem, "No space to insert request into transfers"
986            " queue\n");
987        return;
988    }
989
990    if (request->isComplete() || request->state == LSQRequest::Failed) {
991        DPRINTF(MinorMem, "Passing a %s transfer on to transfers"
992            " queue\n", (request->isComplete() ? "completed" : "failed"));
993        request->setState(LSQRequest::Complete);
994        request->setSkipped();
995        moveFromRequestsToTransfers(request);
996        return;
997    }
998
999    if (!execute.instIsRightStream(request->inst)) {
1000        /* Wrong stream, try to abort the transfer but only do so if
1001         *  there are no packets in flight */
1002        if (request->hasPacketsInMemSystem()) {
1003            DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
1004                " waiting for responses before aborting request\n");
1005        } else {
1006            DPRINTF(MinorMem, "Request's inst. is from the wrong stream,"
1007                " aborting request\n");
1008            request->setState(LSQRequest::Complete);
1009            request->setSkipped();
1010            moveFromRequestsToTransfers(request);
1011        }
1012        return;
1013    }
1014
1015    if (request->inst->translationFault != NoFault) {
1016        if (request->inst->staticInst->isPrefetch()) {
1017            DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n");
1018        }
1019        DPRINTF(MinorMem, "Moving faulting request into the transfers"
1020            " queue\n");
1021        request->setState(LSQRequest::Complete);
1022        request->setSkipped();
1023        moveFromRequestsToTransfers(request);
1024        return;
1025    }
1026
1027    bool is_load = request->isLoad;
1028    bool is_llsc = request->request->isLLSC();
1029    bool is_swap = request->request->isSwap();
1030    bool is_atomic = request->request->isAtomic();
1031    bool bufferable = !(request->request->isStrictlyOrdered() ||
1032                        is_llsc || is_swap || is_atomic);
1033
1034    if (is_load) {
1035        if (numStoresInTransfers != 0) {
1036            DPRINTF(MinorMem, "Load request with stores still in transfers"
1037                " queue, stalling\n");
1038            return;
1039        }
1040    } else {
1041        /* Store.  Can it be sent to the store buffer? */
1042        if (bufferable && !request->request->isMmappedIpr()) {
1043            request->setState(LSQRequest::StoreToStoreBuffer);
1044            moveFromRequestsToTransfers(request);
1045            DPRINTF(MinorMem, "Moving store into transfers queue\n");
1046            return;
1047        }
1048    }
1049
1050    /* Check if this is the head instruction (and so must be executable as
1051     *  its stream sequence number was checked above) for loads which must
1052     *  not be speculatively issued and stores which must be issued here */
1053    if (!bufferable) {
1054        if (!execute.instIsHeadInst(request->inst)) {
1055            DPRINTF(MinorMem, "Memory access not the head inst., can't be"
1056                " sure it can be performed, not issuing\n");
1057            return;
1058        }
1059
1060        unsigned int forwarding_slot = 0;
1061
1062        if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) !=
1063            NoAddrRangeCoverage)
1064        {
1065            // There's at least another request that targets the same
1066            // address and is staying in the storeBuffer. Since our
1067            // request is non-bufferable (e.g., strictly ordered or atomic),
1068            // we must wait for the other request in the storeBuffer to
1069            // complete before we can issue this non-bufferable request.
1070            // This is to make sure that the order they access the cache is
1071            // correct.
1072            DPRINTF(MinorMem, "Memory access can receive forwarded data"
1073                " from the store buffer, but need to wait for store buffer"
1074                " to drain\n");
1075            return;
1076        }
1077    }
1078
1079    /* True: submit this packet to the transfers queue to be sent to the
1080     * memory system.
1081     * False: skip the memory and push a packet for this request onto
1082     * requests */
1083    bool do_access = true;
1084
1085    if (!is_llsc) {
1086        /* Check for match in the store buffer */
1087        if (is_load) {
1088            unsigned int forwarding_slot = 0;
1089            AddrRangeCoverage forwarding_result =
1090                storeBuffer.canForwardDataToLoad(request,
1091                forwarding_slot);
1092
1093            switch (forwarding_result) {
1094              case FullAddrRangeCoverage:
1095                /* Forward data from the store buffer into this request and
1096                 *  repurpose this request's packet into a response packet */
1097                storeBuffer.forwardStoreData(request, forwarding_slot);
1098                request->packet->makeResponse();
1099
1100                /* Just move between queues, no access */
1101                do_access = false;
1102                break;
1103              case PartialAddrRangeCoverage:
1104                DPRINTF(MinorMem, "Load partly satisfied by store buffer"
1105                    " data. Must wait for the store to complete\n");
1106                return;
1107                break;
1108              case NoAddrRangeCoverage:
1109                DPRINTF(MinorMem, "No forwardable data from store buffer\n");
1110                /* Fall through to try access */
1111                break;
1112            }
1113        }
1114    } else {
1115        if (!canSendToMemorySystem()) {
1116            DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1117            return;
1118        }
1119
1120        SimpleThread &thread = *cpu.threads[request->inst->id.threadId];
1121
1122        TheISA::PCState old_pc = thread.pcState();
1123        ExecContext context(cpu, thread, execute, request->inst);
1124
1125        /* Handle LLSC requests and tests */
1126        if (is_load) {
1127            TheISA::handleLockedRead(&context, request->request);
1128        } else {
1129            do_access = TheISA::handleLockedWrite(&context,
1130                request->request, cacheBlockMask);
1131
1132            if (!do_access) {
1133                DPRINTF(MinorMem, "Not perfoming a memory "
1134                    "access for store conditional\n");
1135            }
1136        }
1137        thread.pcState(old_pc);
1138    }
1139
1140    /* See the do_access comment above */
1141    if (do_access) {
1142        if (!canSendToMemorySystem()) {
1143            DPRINTF(MinorMem, "Can't send request to memory system yet\n");
1144            return;
1145        }
1146
1147        /* Remember if this is an access which can't be idly
1148         *  discarded by an interrupt */
1149        if (!bufferable && !request->issuedToMemory) {
1150            numAccessesIssuedToMemory++;
1151            request->issuedToMemory = true;
1152        }
1153
1154        if (tryToSend(request)) {
1155            moveFromRequestsToTransfers(request);
1156        }
1157    } else {
1158        request->setState(LSQRequest::Complete);
1159        moveFromRequestsToTransfers(request);
1160    }
1161}
1162
1163bool
1164LSQ::tryToSend(LSQRequestPtr request)
1165{
1166    bool ret = false;
1167
1168    if (!canSendToMemorySystem()) {
1169        DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n",
1170            *(request->inst));
1171    } else {
1172        PacketPtr packet = request->getHeadPacket();
1173
1174        DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n",
1175            *(request->inst), packet->req->getVaddr());
1176
1177        /* The sender state of the packet *must* be an LSQRequest
1178         *  so the response can be correctly handled */
1179        assert(packet->findNextSenderState<LSQRequest>());
1180
1181        if (request->request->isMmappedIpr()) {
1182            ThreadContext *thread =
1183                cpu.getContext(cpu.contextToThread(
1184                                request->request->contextId()));
1185
1186            if (request->isLoad) {
1187                DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst));
1188                TheISA::handleIprRead(thread, packet);
1189            } else {
1190                DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst));
1191                TheISA::handleIprWrite(thread, packet);
1192            }
1193
1194            request->stepToNextPacket();
1195            ret = request->sentAllPackets();
1196
1197            if (!ret) {
1198                DPRINTF(MinorMem, "IPR access has another packet: %s\n",
1199                    *(request->inst));
1200            }
1201
1202            if (ret)
1203                request->setState(LSQRequest::Complete);
1204            else
1205                request->setState(LSQRequest::RequestIssuing);
1206        } else if (dcachePort.sendTimingReq(packet)) {
1207            DPRINTF(MinorMem, "Sent data memory request\n");
1208
1209            numAccessesInMemorySystem++;
1210
1211            request->stepToNextPacket();
1212
1213            ret = request->sentAllPackets();
1214
1215            switch (request->state) {
1216              case LSQRequest::Translated:
1217              case LSQRequest::RequestIssuing:
1218                /* Fully or partially issued a request in the transfers
1219                 *  queue */
1220                request->setState(LSQRequest::RequestIssuing);
1221                break;
1222              case LSQRequest::StoreInStoreBuffer:
1223              case LSQRequest::StoreBufferIssuing:
1224                /* Fully or partially issued a request in the store
1225                 *  buffer */
1226                request->setState(LSQRequest::StoreBufferIssuing);
1227                break;
1228              default:
1229                panic("Unrecognized LSQ request state %d.", request->state);
1230            }
1231
1232            state = MemoryRunning;
1233        } else {
1234            DPRINTF(MinorMem,
1235                "Sending data memory request - needs retry\n");
1236
1237            /* Needs to be resent, wait for that */
1238            state = MemoryNeedsRetry;
1239            retryRequest = request;
1240
1241            switch (request->state) {
1242              case LSQRequest::Translated:
1243              case LSQRequest::RequestIssuing:
1244                request->setState(LSQRequest::RequestNeedsRetry);
1245                break;
1246              case LSQRequest::StoreInStoreBuffer:
1247              case LSQRequest::StoreBufferIssuing:
1248                request->setState(LSQRequest::StoreBufferNeedsRetry);
1249                break;
1250              default:
1251                panic("Unrecognized LSQ request state %d.", request->state);
1252            }
1253        }
1254    }
1255
1256    if (ret)
1257        threadSnoop(request);
1258
1259    return ret;
1260}
1261
1262void
1263LSQ::moveFromRequestsToTransfers(LSQRequestPtr request)
1264{
1265    assert(!requests.empty() && requests.front() == request);
1266    assert(transfers.unreservedRemainingSpace() != 0);
1267
1268    /* Need to count the number of stores in the transfers
1269     *  queue so that loads know when their store buffer forwarding
1270     *  results will be correct (only when all those stores
1271     *  have reached the store buffer) */
1272    if (!request->isLoad)
1273        numStoresInTransfers++;
1274
1275    requests.pop();
1276    transfers.push(request);
1277}
1278
1279bool
1280LSQ::canSendToMemorySystem()
1281{
1282    return state == MemoryRunning &&
1283        numAccessesInMemorySystem < inMemorySystemLimit;
1284}
1285
1286bool
1287LSQ::recvTimingResp(PacketPtr response)
1288{
1289    LSQRequestPtr request =
1290        safe_cast<LSQRequestPtr>(response->popSenderState());
1291
1292    DPRINTF(MinorMem, "Received response packet inst: %s"
1293        " addr: 0x%x cmd: %s\n",
1294        *(request->inst), response->getAddr(),
1295        response->cmd.toString());
1296
1297    numAccessesInMemorySystem--;
1298
1299    if (response->isError()) {
1300        DPRINTF(MinorMem, "Received error response packet: %s\n",
1301            *request->inst);
1302    }
1303
1304    switch (request->state) {
1305      case LSQRequest::RequestIssuing:
1306      case LSQRequest::RequestNeedsRetry:
1307        /* Response to a request from the transfers queue */
1308        request->retireResponse(response);
1309
1310        DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n",
1311            request->hasPacketsInMemSystem(), request->isComplete());
1312
1313        break;
1314      case LSQRequest::StoreBufferIssuing:
1315      case LSQRequest::StoreBufferNeedsRetry:
1316        /* Response to a request from the store buffer */
1317        request->retireResponse(response);
1318
1319        /* Remove completed requests unless they are barriers (which will
1320         *  need to be removed in order */
1321        if (request->isComplete()) {
1322            if (!request->isBarrier()) {
1323                storeBuffer.deleteRequest(request);
1324            } else {
1325                DPRINTF(MinorMem, "Completed transfer for barrier: %s"
1326                    " leaving the request as it is also a barrier\n",
1327                    *(request->inst));
1328            }
1329        }
1330        break;
1331      default:
1332        panic("Shouldn't be allowed to receive a response from another state");
1333    }
1334
1335    /* We go to idle even if there are more things in the requests queue
1336     * as it's the job of step to actually step us on to the next
1337     * transaction */
1338
1339    /* Let's try and wake up the processor for the next cycle */
1340    cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
1341
1342    /* Never busy */
1343    return true;
1344}
1345
1346void
1347LSQ::recvReqRetry()
1348{
1349    DPRINTF(MinorMem, "Received retry request\n");
1350
1351    assert(state == MemoryNeedsRetry);
1352
1353    switch (retryRequest->state) {
1354      case LSQRequest::RequestNeedsRetry:
1355        /* Retry in the requests queue */
1356        retryRequest->setState(LSQRequest::Translated);
1357        break;
1358      case LSQRequest::StoreBufferNeedsRetry:
1359        /* Retry in the store buffer */
1360        retryRequest->setState(LSQRequest::StoreInStoreBuffer);
1361        break;
1362      default:
1363        panic("Unrecognized retry request state %d.", retryRequest->state);
1364    }
1365
1366    /* Set state back to MemoryRunning so that the following
1367     *  tryToSend can actually send.  Note that this won't
1368     *  allow another transfer in as tryToSend should
1369     *  issue a memory request and either succeed for this
1370     *  request or return the LSQ back to MemoryNeedsRetry */
1371    state = MemoryRunning;
1372
1373    /* Try to resend the request */
1374    if (tryToSend(retryRequest)) {
1375        /* Successfully sent, need to move the request */
1376        switch (retryRequest->state) {
1377          case LSQRequest::RequestIssuing:
1378            /* In the requests queue */
1379            moveFromRequestsToTransfers(retryRequest);
1380            break;
1381          case LSQRequest::StoreBufferIssuing:
1382            /* In the store buffer */
1383            storeBuffer.countIssuedStore(retryRequest);
1384            break;
1385          default:
1386            panic("Unrecognized retry request state %d.", retryRequest->state);
1387        }
1388
1389        retryRequest = NULL;
1390    }
1391}
1392
1393LSQ::LSQ(std::string name_, std::string dcache_port_name_,
1394    MinorCPU &cpu_, Execute &execute_,
1395    unsigned int in_memory_system_limit, unsigned int line_width,
1396    unsigned int requests_queue_size, unsigned int transfers_queue_size,
1397    unsigned int store_buffer_size,
1398    unsigned int store_buffer_cycle_store_limit) :
1399    Named(name_),
1400    cpu(cpu_),
1401    execute(execute_),
1402    dcachePort(dcache_port_name_, *this, cpu_),
1403    lastMemBarrier(cpu.numThreads, 0),
1404    state(MemoryRunning),
1405    inMemorySystemLimit(in_memory_system_limit),
1406    lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)),
1407    requests(name_ + ".requests", "addr", requests_queue_size),
1408    transfers(name_ + ".transfers", "addr", transfers_queue_size),
1409    storeBuffer(name_ + ".storeBuffer",
1410        *this, store_buffer_size, store_buffer_cycle_store_limit),
1411    numAccessesInMemorySystem(0),
1412    numAccessesInDTLB(0),
1413    numStoresInTransfers(0),
1414    numAccessesIssuedToMemory(0),
1415    retryRequest(NULL),
1416    cacheBlockMask(~(cpu_.cacheLineSize() - 1))
1417{
1418    if (in_memory_system_limit < 1) {
1419        fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_,
1420            in_memory_system_limit);
1421    }
1422
1423    if (store_buffer_cycle_store_limit < 1) {
1424        fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be"
1425            " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit);
1426    }
1427
1428    if (requests_queue_size < 1) {
1429        fatal("%s: executeLSQRequestsQueueSize must be"
1430            " >= 1 (%d)\n", name_, requests_queue_size);
1431    }
1432
1433    if (transfers_queue_size < 1) {
1434        fatal("%s: executeLSQTransfersQueueSize must be"
1435            " >= 1 (%d)\n", name_, transfers_queue_size);
1436    }
1437
1438    if (store_buffer_size < 1) {
1439        fatal("%s: executeLSQStoreBufferSize must be"
1440            " >= 1 (%d)\n", name_, store_buffer_size);
1441    }
1442
1443    if ((lineWidth & (lineWidth - 1)) != 0) {
1444        fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth);
1445    }
1446}
1447
1448LSQ::~LSQ()
1449{ }
1450
1451LSQ::LSQRequest::~LSQRequest()
1452{
1453    if (packet)
1454        delete packet;
1455    if (data)
1456        delete [] data;
1457}
1458
1459/**
1460 *  Step the memory access mechanism on to its next state.  In reality, most
1461 *  of the stepping is done by the callbacks on the LSQ but this
1462 *  function is responsible for issuing memory requests lodged in the
1463 *  requests queue.
1464 */
1465void
1466LSQ::step()
1467{
1468    /* Try to move address-translated requests between queues and issue
1469     *  them */
1470    if (!requests.empty())
1471        tryToSendToTransfers(requests.front());
1472
1473    storeBuffer.step();
1474}
1475
1476LSQ::LSQRequestPtr
1477LSQ::findResponse(MinorDynInstPtr inst)
1478{
1479    LSQ::LSQRequestPtr ret = NULL;
1480
1481    if (!transfers.empty()) {
1482        LSQRequestPtr request = transfers.front();
1483
1484        /* Same instruction and complete access or a store that's
1485         *  capable of being moved to the store buffer */
1486        if (request->inst->id == inst->id) {
1487            bool complete = request->isComplete();
1488            bool can_store = storeBuffer.canInsert();
1489            bool to_store_buffer = request->state ==
1490                LSQRequest::StoreToStoreBuffer;
1491
1492            if ((complete && !(request->isBarrier() && !can_store)) ||
1493                (to_store_buffer && can_store))
1494            {
1495                ret = request;
1496            }
1497        }
1498    }
1499
1500    if (ret) {
1501        DPRINTF(MinorMem, "Found matching memory response for inst: %s\n",
1502            *inst);
1503    } else {
1504        DPRINTF(MinorMem, "No matching memory response for inst: %s\n",
1505            *inst);
1506    }
1507
1508    return ret;
1509}
1510
1511void
1512LSQ::popResponse(LSQ::LSQRequestPtr response)
1513{
1514    assert(!transfers.empty() && transfers.front() == response);
1515
1516    transfers.pop();
1517
1518    if (!response->isLoad)
1519        numStoresInTransfers--;
1520
1521    if (response->issuedToMemory)
1522        numAccessesIssuedToMemory--;
1523
1524    if (response->state != LSQRequest::StoreInStoreBuffer) {
1525        DPRINTF(MinorMem, "Deleting %s request: %s\n",
1526            (response->isLoad ? "load" : "store"),
1527            *(response->inst));
1528
1529        delete response;
1530    }
1531}
1532
1533void
1534LSQ::sendStoreToStoreBuffer(LSQRequestPtr request)
1535{
1536    assert(request->state == LSQRequest::StoreToStoreBuffer);
1537
1538    DPRINTF(MinorMem, "Sending store: %s to store buffer\n",
1539        *(request->inst));
1540
1541    request->inst->inStoreBuffer = true;
1542
1543    storeBuffer.insert(request);
1544}
1545
1546bool
1547LSQ::isDrained()
1548{
1549    return requests.empty() && transfers.empty() &&
1550        storeBuffer.isDrained();
1551}
1552
1553bool
1554LSQ::needsToTick()
1555{
1556    bool ret = false;
1557
1558    if (canSendToMemorySystem()) {
1559        bool have_translated_requests = !requests.empty() &&
1560            requests.front()->state != LSQRequest::InTranslation &&
1561            transfers.unreservedRemainingSpace() != 0;
1562
1563        ret = have_translated_requests ||
1564            storeBuffer.numUnissuedStores() != 0;
1565    }
1566
1567    if (ret)
1568        DPRINTF(Activity, "Need to tick\n");
1569
1570    return ret;
1571}
1572
1573Fault
1574LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
1575                 unsigned int size, Addr addr, Request::Flags flags,
1576                 uint64_t *res, AtomicOpFunctorPtr amo_op,
1577                 const std::vector<bool>& byteEnable)
1578{
1579    assert(inst->translationFault == NoFault || inst->inLSQ);
1580
1581    if (inst->inLSQ) {
1582        return inst->translationFault;
1583    }
1584
1585    bool needs_burst = transferNeedsBurst(addr, size, lineWidth);
1586
1587    if (needs_burst && inst->staticInst->isAtomic()) {
1588        // AMO requests that access across a cache line boundary are not
1589        // allowed since the cache does not guarantee AMO ops to be executed
1590        // atomically in two cache lines
1591        // For ISAs such as x86 that requires AMO operations to work on
1592        // accesses that cross cache-line boundaries, the cache needs to be
1593        // modified to support locking both cache lines to guarantee the
1594        // atomicity.
1595        panic("Do not expect cross-cache-line atomic memory request\n");
1596    }
1597
1598    LSQRequestPtr request;
1599
1600    /* Copy given data into the request.  The request will pass this to the
1601     *  packet and then it will own the data */
1602    uint8_t *request_data = NULL;
1603
1604    DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:"
1605        " 0x%x%s lineWidth : 0x%x\n",
1606        (isLoad ? "load" : "store/atomic"), addr, size, flags,
1607            (needs_burst ? " (needs burst)" : ""), lineWidth);
1608
1609    if (!isLoad) {
1610        /* Request_data becomes the property of a ...DataRequest (see below)
1611         *  and destroyed by its destructor */
1612        request_data = new uint8_t[size];
1613        if (inst->staticInst->isAtomic() ||
1614            (flags & Request::STORE_NO_DATA)) {
1615            /* For atomic or store-no-data, just use zeroed data */
1616            std::memset(request_data, 0, size);
1617        } else {
1618            std::memcpy(request_data, data, size);
1619        }
1620    }
1621
1622    if (needs_burst) {
1623        request = new SplitDataRequest(
1624            *this, inst, isLoad, request_data, res);
1625    } else {
1626        request = new SingleDataRequest(
1627            *this, inst, isLoad, request_data, res);
1628    }
1629
1630    if (inst->traceData)
1631        inst->traceData->setMem(addr, size, flags);
1632
1633    int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
1634    request->request->setContext(cid);
1635    request->request->setVirt(0 /* asid */,
1636        addr, size, flags, cpu.dataMasterId(),
1637        /* I've no idea why we need the PC, but give it */
1638        inst->pc.instAddr(), std::move(amo_op));
1639    request->request->setByteEnable(byteEnable);
1640
1641    requests.push(request);
1642    inst->inLSQ = true;
1643    request->startAddrTranslation();
1644
1645    return inst->translationFault;
1646}
1647
1648void
1649LSQ::pushFailedRequest(MinorDynInstPtr inst)
1650{
1651    LSQRequestPtr request = new FailedDataRequest(*this, inst);
1652    requests.push(request);
1653}
1654
1655void
1656LSQ::minorTrace() const
1657{
1658    MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
1659        " lastMemBarrier=%d\n",
1660        state, numAccessesInDTLB, numAccessesInMemorySystem,
1661        numStoresInTransfers, lastMemBarrier[0]);
1662    requests.minorTrace();
1663    transfers.minorTrace();
1664    storeBuffer.minorTrace();
1665}
1666
1667LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_,
1668    unsigned int store_buffer_size,
1669    unsigned int store_limit_per_cycle) :
1670    Named(name_), lsq(lsq_),
1671    numSlots(store_buffer_size),
1672    storeLimitPerCycle(store_limit_per_cycle),
1673    slots(),
1674    numUnissuedAccesses(0)
1675{
1676}
1677
1678PacketPtr
1679makePacketForRequest(const RequestPtr &request, bool isLoad,
1680    Packet::SenderState *sender_state, PacketDataPtr data)
1681{
1682    PacketPtr ret = isLoad ? Packet::createRead(request)
1683                           : Packet::createWrite(request);
1684
1685    if (sender_state)
1686        ret->pushSenderState(sender_state);
1687
1688    if (isLoad) {
1689        ret->allocate();
1690    } else if (!request->isCacheMaintenance()) {
1691        // CMOs are treated as stores but they don't have data. All
1692        // stores otherwise need to allocate for data.
1693        ret->dataDynamic(data);
1694    }
1695
1696    return ret;
1697}
1698
1699void
1700LSQ::issuedMemBarrierInst(MinorDynInstPtr inst)
1701{
1702    assert(inst->isInst() && inst->staticInst->isMemBarrier());
1703    assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]);
1704
1705    /* Remember the barrier.  We only have a notion of one
1706     *  barrier so this may result in some mem refs being
1707     *  delayed if they are between barriers */
1708    lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum;
1709}
1710
1711void
1712LSQ::LSQRequest::makePacket()
1713{
1714    assert(inst->translationFault == NoFault);
1715
1716    /* Make the function idempotent */
1717    if (packet)
1718        return;
1719
1720    packet = makePacketForRequest(request, isLoad, this, data);
1721    /* Null the ret data so we know not to deallocate it when the
1722     * ret is destroyed.  The data now belongs to the ret and
1723     * the ret is responsible for its destruction */
1724    data = NULL;
1725}
1726
1727std::ostream &
1728operator <<(std::ostream &os, LSQ::MemoryState state)
1729{
1730    switch (state) {
1731      case LSQ::MemoryRunning:
1732        os << "MemoryRunning";
1733        break;
1734      case LSQ::MemoryNeedsRetry:
1735        os << "MemoryNeedsRetry";
1736        break;
1737      default:
1738        os << "MemoryState-" << static_cast<int>(state);
1739        break;
1740    }
1741    return os;
1742}
1743
1744void
1745LSQ::recvTimingSnoopReq(PacketPtr pkt)
1746{
1747    /* LLSC operations in Minor can't be speculative and are executed from
1748     * the head of the requests queue.  We shouldn't need to do more than
1749     * this action on snoops. */
1750    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1751        if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1752            cpu.wakeup(tid);
1753        }
1754    }
1755
1756    if (pkt->isInvalidate() || pkt->isWrite()) {
1757        for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1758            TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
1759                                      cacheBlockMask);
1760        }
1761    }
1762}
1763
1764void
1765LSQ::threadSnoop(LSQRequestPtr request)
1766{
1767    /* LLSC operations in Minor can't be speculative and are executed from
1768     * the head of the requests queue.  We shouldn't need to do more than
1769     * this action on snoops. */
1770    ThreadID req_tid = request->inst->id.threadId;
1771    PacketPtr pkt = request->packet;
1772
1773    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
1774        if (tid != req_tid) {
1775            if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
1776                cpu.wakeup(tid);
1777            }
1778
1779            if (pkt->isInvalidate() || pkt->isWrite()) {
1780                TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
1781                                          cacheBlockMask);
1782            }
1783        }
1784    }
1785}
1786
1787}
1788