Cross Reference: /gem5/src/mem/cache/cache.cc

Deleted Added

sdiff udiff text old ( 12719:68a20fbd07a6 ) new ( 12720:8db2ee0c2cf6 )

full compact

1/*
2 * Copyright (c) 2010-2018 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software

--- 629 unchanged lines hidden (view full) ---

638 if (doFastWrites && (pkt->cmd == MemCmd::WriteReq) &&
639 (pkt->getSize() == blkSize) && (pkt->getOffset(blkSize) == 0)) {
640 pkt->cmd = MemCmd::WriteLineReq;
641 DPRINTF(Cache, "packet promoted from Write to WriteLineReq\n");
642 }
643}
644
645void

646Cache::handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time)
647{
648 // should never be satisfying an uncacheable access as we
649 // flush and invalidate any existing block as part of the
650 // lookup
651 assert(!pkt->req->isUncacheable());
652
653 if (pkt->needsResponse()) {
654 pkt->makeTimingResponse();
655 // @todo: Make someone pay for this
656 pkt->headerDelay = pkt->payloadDelay = 0;
657
658 // In this case we are considering request_time that takes
659 // into account the delay of the xbar, if any, and just
660 // lat, neglecting responseLatency, modelling hit latency
661 // just as lookupLatency or or the value of lat overriden
662 // by access(), that calls accessBlock() function.
663 cpuSidePort->schedTimingResp(pkt, request_time, true);
664 } else {
665 DPRINTF(Cache, "%s satisfied %s, no response needed\n", __func__,
666 pkt->print());
667
668 // queue the packet for deletion, as the sending cache is
669 // still relying on it; if the block is found in access(),
670 // CleanEvict and Writeback messages will be deleted
671 // here as well
672 pendingDelete.reset(pkt);
673 }
674}
675
676void
677Cache::handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk, Tick forward_time,
678 Tick request_time)
679{
680 Addr blk_addr = pkt->getBlockAddr(blkSize);
681
682 // ignore any existing MSHR if we are dealing with an
683 // uncacheable request
684 MSHR *mshr = pkt->req->isUncacheable() ? nullptr :
685 mshrQueue.findMatch(blk_addr, pkt->isSecure());
686
687 // Software prefetch handling:
688 // To keep the core from waiting on data it won't look at
689 // anyway, send back a response with dummy data. Miss handling
690 // will continue asynchronously. Unfortunately, the core will
691 // insist upon freeing original Packet/Request, so we have to
692 // create a new pair with a different lifecycle. Note that this
693 // processing happens before any MSHR munging on the behalf of
694 // this request because this new Request will be the one stored
695 // into the MSHRs, not the original.
696 if (pkt->cmd.isSWPrefetch()) {
697 assert(pkt->needsResponse());
698 assert(pkt->req->hasPaddr());
699 assert(!pkt->req->isUncacheable());
700
701 // There's no reason to add a prefetch as an additional target
702 // to an existing MSHR. If an outstanding request is already
703 // in progress, there is nothing for the prefetch to do.
704 // If this is the case, we don't even create a request at all.
705 PacketPtr pf = nullptr;
706
707 if (!mshr) {
708 // copy the request and create a new SoftPFReq packet
709 RequestPtr req = new Request(pkt->req->getPaddr(),
710 pkt->req->getSize(),
711 pkt->req->getFlags(),
712 pkt->req->masterId());
713 pf = new Packet(req, pkt->cmd);
714 pf->allocate();
715 assert(pf->getAddr() == pkt->getAddr());
716 assert(pf->getSize() == pkt->getSize());
717 }
718
719 pkt->makeTimingResponse();
720
721 // request_time is used here, taking into account lat and the delay
722 // charged if the packet comes from the xbar.
723 cpuSidePort->schedTimingResp(pkt, request_time, true);
724
725 // If an outstanding request is in progress (we found an
726 // MSHR) this is set to null
727 pkt = pf;
728 }
729
730 if (mshr) {
731 /// MSHR hit
732 /// @note writebacks will be checked in getNextMSHR()
733 /// for any conflicting requests to the same block
734
735 //@todo remove hw_pf here
736
737 // Coalesce unless it was a software prefetch (see above).
738 if (pkt) {
739 assert(!pkt->isWriteback());
740 // CleanEvicts corresponding to blocks which have
741 // outstanding requests in MSHRs are simply sunk here
742 if (pkt->cmd == MemCmd::CleanEvict) {
743 pendingDelete.reset(pkt);
744 } else if (pkt->cmd == MemCmd::WriteClean) {
745 // A WriteClean should never coalesce with any
746 // outstanding cache maintenance requests.
747
748 // We use forward_time here because there is an
749 // uncached memory write, forwarded to WriteBuffer.
750 allocateWriteBuffer(pkt, forward_time);
751 } else {
752 DPRINTF(Cache, "%s coalescing MSHR for %s\n", __func__,
753 pkt->print());
754
755 assert(pkt->req->masterId() < system->maxMasters());
756 mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
757
758 // uncacheable accesses always allocate a new
759 // MSHR, and cacheable accesses ignore any
760 // uncacheable MSHRs, thus we should never have
761 // targets addded if originally allocated
762 // uncacheable
763 assert(!mshr->isUncacheable());
764
765 // We use forward_time here because it is the same
766 // considering new targets. We have multiple
767 // requests for the same address here. It
768 // specifies the latency to allocate an internal
769 // buffer and to schedule an event to the queued
770 // port and also takes into account the additional
771 // delay of the xbar.
772 mshr->allocateTarget(pkt, forward_time, order++,
773 allocOnFill(pkt->cmd));
774 if (mshr->getNumTargets() == numTarget) {
775 noTargetMSHR = mshr;
776 setBlocked(Blocked_NoTargets);
777 // need to be careful with this... if this mshr isn't
778 // ready yet (i.e. time > curTick()), we don't want to
779 // move it ahead of mshrs that are ready
780 // mshrQueue.moveToFront(mshr);
781 }
782 }
783 }
784 } else {
785 // no MSHR
786 assert(pkt->req->masterId() < system->maxMasters());
787 if (pkt->req->isUncacheable()) {
788 mshr_uncacheable[pkt->cmdToIndex()][pkt->req->masterId()]++;
789 } else {
790 mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
791 }
792
793 if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean ||
794 (pkt->req->isUncacheable() && pkt->isWrite())) {
795 // We use forward_time here because there is an
796 // uncached memory write, forwarded to WriteBuffer.
797 allocateWriteBuffer(pkt, forward_time);
798 } else {
799 if (blk && blk->isValid()) {
800 // should have flushed and have no valid block
801 assert(!pkt->req->isUncacheable());
802
803 // If we have a write miss to a valid block, we
804 // need to mark the block non-readable. Otherwise
805 // if we allow reads while there's an outstanding
806 // write miss, the read could return stale data
807 // out of the cache block... a more aggressive
808 // system could detect the overlap (if any) and
809 // forward data out of the MSHRs, but we don't do
810 // that yet. Note that we do need to leave the
811 // block valid so that it stays in the cache, in
812 // case we get an upgrade response (and hence no
813 // new data) when the write miss completes.
814 // As long as CPUs do proper store/load forwarding
815 // internally, and have a sufficiently weak memory
816 // model, this is probably unnecessary, but at some
817 // point it must have seemed like we needed it...
818 assert((pkt->needsWritable() && !blk->isWritable()) ||
819 pkt->req->isCacheMaintenance());
820 blk->status &= ~BlkReadable;
821 }
822 // Here we are using forward_time, modelling the latency of
823 // a miss (outbound) just as forwardLatency, neglecting the
824 // lookupLatency component.
825 allocateMissBuffer(pkt, forward_time);
826 }
827 }
828}
829
830void

831Cache::recvTimingReq(PacketPtr pkt)
832{
833 DPRINTF(CacheTags, "%s tags:\n%s\n", __func__, tags->print());
834
835 assert(pkt->isRequest());
836
837 // Just forward the packet if caches are disabled.
838 if (system->bypassCaches()) {

--- 101 unchanged lines hidden (view full) ---

940 // In case of a miss we are neglecting forward latency.
941 Tick request_time = clockEdge(lat) + pkt->headerDelay;
942 // Here we reset the timing of the packet.
943 pkt->headerDelay = pkt->payloadDelay = 0;
944
945 // track time of availability of next prefetch, if any
946 Tick next_pf_time = MaxTick;
947

~~763~~ bool needsResponse = pkt->needsResponse();
~~764~~

948 if (satisfied) {

~~766~~ // should never be satisfying an uncacheable access as we
~~767~~ // flush and invalidate any existing block as part of the
~~768~~ // lookup
~~769~~ assert(!pkt->req->isUncacheable());
~~770~~
~~771~~ // hit (for all other request types)
~~772~~
~~773~~ if (prefetcher && (prefetchOnAccess ||
~~774~~ (blk && blk->wasPrefetched()))) {

949 // if need to notify the prefetcher we need to do it anything
950 // else, handleTimingReqHit might turn the packet into a
951 // response
952 if (prefetcher &&
953 (prefetchOnAccess || (blk && blk->wasPrefetched()))) {

954 if (blk)
955 blk->status &= ~BlkHWPrefetched;
956
957 // Don't notify on SWPrefetch
958 if (!pkt->cmd.isSWPrefetch()) {
959 assert(!pkt->req->isCacheMaintenance());
960 next_pf_time = prefetcher->notify(pkt);
961 }
962 }
963

~~785~~ if (needsResponse) {
~~786~~ pkt->makeTimingResponse();
~~787~~ // @todo: Make someone pay for this
~~788~~ pkt->headerDelay = pkt->payloadDelay = 0;
~~789~~
~~790~~ // In this case we are considering request_time that takes
~~791~~ // into account the delay of the xbar, if any, and just
~~792~~ // lat, neglecting responseLatency, modelling hit latency
~~793~~ // just as lookupLatency or or the value of lat overriden
~~794~~ // by access(), that calls accessBlock() function.
~~795~~ cpuSidePort->schedTimingResp(pkt, request_time, true);
~~796~~ } else {
~~797~~ DPRINTF(Cache, "%s satisfied %s, no response needed\n", __func__,
~~798~~ pkt->print());
~~799~~
~~800~~ // queue the packet for deletion, as the sending cache is
~~801~~ // still relying on it; if the block is found in access(),
~~802~~ // CleanEvict and Writeback messages will be deleted
~~803~~ // here as well
~~804~~ pendingDelete.reset(pkt);
~~805~~ }

964 handleTimingReqHit(pkt, blk, request_time);

965 } else {

~~807~~ // miss

966 handleTimingReqMiss(pkt, blk, forward_time, request_time);

967

~~809~~ Addr blk_addr = pkt->getBlockAddr(blkSize);
~~810~~
~~811~~ // ignore any existing MSHR if we are dealing with an
~~812~~ // uncacheable request
~~813~~ MSHR *mshr = pkt->req->isUncacheable() ? nullptr :
~~814~~ mshrQueue.findMatch(blk_addr, pkt->isSecure());
~~815~~
~~816~~ // Software prefetch handling:
~~817~~ // To keep the core from waiting on data it won't look at
~~818~~ // anyway, send back a response with dummy data. Miss handling
~~819~~ // will continue asynchronously. Unfortunately, the core will
~~820~~ // insist upon freeing original Packet/Request, so we have to
~~821~~ // create a new pair with a different lifecycle. Note that this
~~822~~ // processing happens before any MSHR munging on the behalf of
~~823~~ // this request because this new Request will be the one stored
~~824~~ // into the MSHRs, not the original.
~~825~~ if (pkt->cmd.isSWPrefetch()) {
~~826~~ assert(needsResponse);
~~827~~ assert(pkt->req->hasPaddr());
~~828~~ assert(!pkt->req->isUncacheable());
~~829~~
~~830~~ // There's no reason to add a prefetch as an additional target
~~831~~ // to an existing MSHR. If an outstanding request is already
~~832~~ // in progress, there is nothing for the prefetch to do.
~~833~~ // If this is the case, we don't even create a request at all.
~~834~~ PacketPtr pf = nullptr;
~~835~~
~~836~~ if (!mshr) {
~~837~~ // copy the request and create a new SoftPFReq packet
~~838~~ RequestPtr req = new Request(pkt->req->getPaddr(),
~~839~~ pkt->req->getSize(),
~~840~~ pkt->req->getFlags(),
~~841~~ pkt->req->masterId());
~~842~~ pf = new Packet(req, pkt->cmd);
~~843~~ pf->allocate();
~~844~~ assert(pf->getAddr() == pkt->getAddr());
~~845~~ assert(pf->getSize() == pkt->getSize());
~~846~~ }
~~847~~
~~848~~ pkt->makeTimingResponse();
~~849~~
~~850~~ // request_time is used here, taking into account lat and the delay
~~851~~ // charged if the packet comes from the xbar.
~~852~~ cpuSidePort->schedTimingResp(pkt, request_time, true);
~~853~~
~~854~~ // If an outstanding request is in progress (we found an
~~855~~ // MSHR) this is set to null
~~856~~ pkt = pf;

968 // We should call the prefetcher reguardless if the request is
969 // satisfied or not, reguardless if the request is in the MSHR
970 // or not. The request could be a ReadReq hit, but still not
971 // satisfied (potentially because of a prior write to the same
972 // cache line. So, even when not satisfied, there is an MSHR
973 // already allocated for this, we need to let the prefetcher
974 // know about the request
975 if (prefetcher && pkt &&
976 !pkt->cmd.isSWPrefetch() &&
977 !pkt->req->isCacheMaintenance()) {
978 next_pf_time = prefetcher->notify(pkt);

979 }

~~858~~
~~859~~ if (mshr) {
~~860~~ /// MSHR hit
~~861~~ /// @note writebacks will be checked in getNextMSHR()
~~862~~ /// for any conflicting requests to the same block
~~863~~
~~864~~ //@todo remove hw_pf here
~~865~~
~~866~~ // Coalesce unless it was a software prefetch (see above).
~~867~~ if (pkt) {
~~868~~ assert(!pkt->isWriteback());
~~869~~ // CleanEvicts corresponding to blocks which have
~~870~~ // outstanding requests in MSHRs are simply sunk here
~~871~~ if (pkt->cmd == MemCmd::CleanEvict) {
~~872~~ pendingDelete.reset(pkt);
~~873~~ } else if (pkt->cmd == MemCmd::WriteClean) {
~~874~~ // A WriteClean should never coalesce with any
~~875~~ // outstanding cache maintenance requests.
~~876~~
~~877~~ // We use forward_time here because there is an
~~878~~ // uncached memory write, forwarded to WriteBuffer.
~~879~~ allocateWriteBuffer(pkt, forward_time);
~~880~~ } else {
~~881~~ DPRINTF(Cache, "%s coalescing MSHR for %s\n", __func__,
~~882~~ pkt->print());
~~883~~
~~884~~ assert(pkt->req->masterId() < system->maxMasters());
~~885~~ mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
~~886~~ // We use forward_time here because it is the same
~~887~~ // considering new targets. We have multiple
~~888~~ // requests for the same address here. It
~~889~~ // specifies the latency to allocate an internal
~~890~~ // buffer and to schedule an event to the queued
~~891~~ // port and also takes into account the additional
~~892~~ // delay of the xbar.
~~893~~ mshr->allocateTarget(pkt, forward_time, order++,
~~894~~ allocOnFill(pkt->cmd));
~~895~~ if (mshr->getNumTargets() == numTarget) {
~~896~~ noTargetMSHR = mshr;
~~897~~ setBlocked(Blocked_NoTargets);
~~898~~ // need to be careful with this... if this mshr isn't
~~899~~ // ready yet (i.e. time > curTick()), we don't want to
~~900~~ // move it ahead of mshrs that are ready
~~901~~ // mshrQueue.moveToFront(mshr);
~~902~~ }
~~903~~ }
~~904~~ // We should call the prefetcher reguardless if the request is
~~905~~ // satisfied or not, reguardless if the request is in the MSHR
~~906~~ // or not. The request could be a ReadReq hit, but still not
~~907~~ // satisfied (potentially because of a prior write to the same
~~908~~ // cache line. So, even when not satisfied, tehre is an MSHR
~~909~~ // already allocated for this, we need to let the prefetcher
~~910~~ // know about the request
~~911~~ if (prefetcher) {
~~912~~ // Don't notify on SWPrefetch
~~913~~ if (!pkt->cmd.isSWPrefetch() &&
~~914~~ !pkt->req->isCacheMaintenance())
~~915~~ next_pf_time = prefetcher->notify(pkt);
~~916~~ }
~~917~~ }
~~918~~ } else {
~~919~~ // no MSHR
~~920~~ assert(pkt->req->masterId() < system->maxMasters());
~~921~~ if (pkt->req->isUncacheable()) {
~~922~~ mshr_uncacheable[pkt->cmdToIndex()][pkt->req->masterId()]++;
~~923~~ } else {
~~924~~ mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
~~925~~ }
~~926~~
~~927~~ if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean ||
~~928~~ (pkt->req->isUncacheable() && pkt->isWrite())) {
~~929~~ // We use forward_time here because there is an
~~930~~ // uncached memory write, forwarded to WriteBuffer.
~~931~~ allocateWriteBuffer(pkt, forward_time);
~~932~~ } else {
~~933~~ if (blk && blk->isValid()) {
~~934~~ // should have flushed and have no valid block
~~935~~ assert(!pkt->req->isUncacheable());
~~936~~
~~937~~ // If we have a write miss to a valid block, we
~~938~~ // need to mark the block non-readable. Otherwise
~~939~~ // if we allow reads while there's an outstanding
~~940~~ // write miss, the read could return stale data
~~941~~ // out of the cache block... a more aggressive
~~942~~ // system could detect the overlap (if any) and
~~943~~ // forward data out of the MSHRs, but we don't do
~~944~~ // that yet. Note that we do need to leave the
~~945~~ // block valid so that it stays in the cache, in
~~946~~ // case we get an upgrade response (and hence no
~~947~~ // new data) when the write miss completes.
~~948~~ // As long as CPUs do proper store/load forwarding
~~949~~ // internally, and have a sufficiently weak memory
~~950~~ // model, this is probably unnecessary, but at some
~~951~~ // point it must have seemed like we needed it...
~~952~~ assert((pkt->needsWritable() && !blk->isWritable()) ||
~~953~~ pkt->req->isCacheMaintenance());
~~954~~ blk->status &= ~BlkReadable;
~~955~~ }
~~956~~ // Here we are using forward_time, modelling the latency of
~~957~~ // a miss (outbound) just as forwardLatency, neglecting the
~~958~~ // lookupLatency component.
~~959~~ allocateMissBuffer(pkt, forward_time);
~~960~~ }
~~961~~
~~962~~ if (prefetcher) {
~~963~~ // Don't notify on SWPrefetch
~~964~~ if (!pkt->cmd.isSWPrefetch() &&
~~965~~ !pkt->req->isCacheMaintenance())
~~966~~ next_pf_time = prefetcher->notify(pkt);
~~967~~ }
~~968~~ }

980 }
981
982 if (next_pf_time != MaxTick)
983 schedMemSideSendEvent(next_pf_time);
984}
985
986PacketPtr
987Cache::createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk,

--- 1892 unchanged lines hidden ---