tlb_coalescer.hh revision 13892
13142SN/A/*
23142SN/A * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
33142SN/A * All rights reserved.
49988SN/A *
58835SN/A * For use for simulation and test purposes only
69988SN/A *
77935SN/A * Redistribution and use in source and binary forms, with or without
87935SN/A * modification, are permitted provided that the following conditions are met:
97935SN/A *
103142SN/A * 1. Redistributions of source code must retain the above copyright notice,
113142SN/A * this list of conditions and the following disclaimer.
123142SN/A *
1310315SN/A * 2. Redistributions in binary form must reproduce the above copyright notice,
148835SN/A * this list of conditions and the following disclaimer in the documentation
159885SN/A * and/or other materials provided with the distribution.
169885SN/A *
179988SN/A * 3. Neither the name of the copyright holder nor the names of its
1811312Santhony.gutierrez@amd.com * contributors may be used to endorse or promote products derived from this
198835SN/A * software without specific prior written permission.
208835SN/A *
2110315SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
228835SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2310167SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
249481SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
259481SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
268464SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2710736SN/A * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2811219Snilay@cs.wisc.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
298721SN/A * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
308835SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
318835SN/A * POSSIBILITY OF SUCH DAMAGE.
3211440SCurtis.Dunham@arm.com *
3311440SCurtis.Dunham@arm.com * Authors: Lisa Hsu
347935SN/A */
357935SN/A
367935SN/A#ifndef __TLB_COALESCER_HH__
377935SN/A#define __TLB_COALESCER_HH__
387935SN/A
397935SN/A#include <list>
407935SN/A#include <queue>
418983SN/A#include <string>
423142SN/A#include <vector>
439885SN/A
449885SN/A#include "arch/generic/tlb.hh"
459885SN/A#include "arch/isa.hh"
4610315SN/A#include "arch/isa_traits.hh"
479988SN/A#include "arch/x86/pagetable.hh"
4810315SN/A#include "arch/x86/regs/segment.hh"
499885SN/A#include "base/logging.hh"
509885SN/A#include "base/statistics.hh"
513142SN/A#include "gpu-compute/gpu_tlb.hh"
523142SN/A#include "mem/port.hh"
5311219Snilay@cs.wisc.edu#include "mem/request.hh"
543142SN/A#include "params/TLBCoalescer.hh"
553142SN/A#include "sim/clocked_object.hh"
568241SN/A
578241SN/Aclass BaseTLB;
583142SN/Aclass Packet;
593142SN/Aclass ThreadContext;
603142SN/A
613142SN/A/**
629481SN/A * The TLBCoalescer is a ClockedObject sitting on the front side (CPUSide) of
634657SN/A * each TLB. It receives packets and issues coalesced requests to the
645876SN/A * TLB below it. It controls how requests are coalesced (the rules)
659885SN/A * and the permitted number of TLB probes per cycle (i.e., how many
663142SN/A * coalesced requests it feeds the TLB per cycle).
673142SN/A */
683142SN/Aclass TLBCoalescer : public ClockedObject
693142SN/A{
703142SN/A   protected:
714289SN/A    // TLB clock: will inherit clock from shader's clock period in terms
723142SN/A    // of nuber of ticks of curTime (aka global simulation clock)
733142SN/A    // The assignment of TLB clock from shader clock is done in the
743142SN/A    // python config files.
753142SN/A    int clock;
765876SN/A
778835SN/A  public:
785876SN/A    typedef TLBCoalescerParams Params;
795000SN/A    TLBCoalescer(const Params *p);
809988SN/A    ~TLBCoalescer() { }
819988SN/A
8210451SN/A    // Number of TLB probes per cycle. Parameterizable - default 2.
833142SN/A    int TLBProbesPerCycle;
843142SN/A
853142SN/A    // Consider coalescing across that many ticks.
863142SN/A    // Paraemterizable - default 1.
873142SN/A    int coalescingWindow;
883142SN/A
893142SN/A    // Each coalesced request consists of multiple packets
903142SN/A    // that all fall within the same virtual page
913142SN/A    typedef std::vector<PacketPtr> coalescedReq;
923142SN/A
933142SN/A    // disables coalescing when true
9411219Snilay@cs.wisc.edu    bool disableCoalescing;
959481SN/A
963142SN/A    /*
973142SN/A     * This is a hash map with <tick_index> as a key.
985000SN/A     * It contains a vector of coalescedReqs per <tick_index>.
993142SN/A     * Requests are buffered here until they can be issued to
1003142SN/A     * the TLB, at which point they are copied to the
1013142SN/A     * issuedTranslationsTable hash map.
1023142SN/A     *
1038835SN/A     * In terms of coalescing, we coalesce requests in a given
1043142SN/A     * window of x cycles by using tick_index = issueTime/x as a
1059924SN/A     * key, where x = coalescingWindow. issueTime is the issueTime
1063142SN/A     * of the pkt from the ComputeUnit's perspective, but another
1073142SN/A     * option is to change it to curTick(), so we coalesce based
1083142SN/A     * on the receive time.
1093142SN/A     */
1106980SN/A    typedef std::unordered_map<int64_t, std::vector<coalescedReq>> CoalescingFIFO;
1118835SN/A
1123142SN/A    CoalescingFIFO coalescerFIFO;
1133142SN/A
1143142SN/A    /*
1153142SN/A     * issuedTranslationsTabler: a hash_map indexed by virtual page
1163142SN/A     * address. Each hash_map entry has a vector of PacketPtr associated
1173142SN/A     * with it denoting the different packets that share an outstanding
1189885SN/A     * coalesced translation request for the same virtual page.
1194657SN/A     *
1204657SN/A     * The rules that determine which requests we can coalesce are
1214657SN/A     * specified in the canCoalesce() method.
1224657SN/A     */
1234657SN/A    typedef std::unordered_map<Addr, coalescedReq> CoalescingTable;
1244657SN/A
1254657SN/A    CoalescingTable issuedTranslationsTable;
1264657SN/A
1274657SN/A    // number of packets the coalescer receives
12810242SN/A    Stats::Scalar uncoalescedAccesses;
1293142SN/A    // number packets the coalescer send to the TLB
1308546SN/A    Stats::Scalar coalescedAccesses;
1319481SN/A
1323142SN/A    // Number of cycles the coalesced requests spend waiting in
1334938SN/A    // coalescerFIFO. For each packet the coalescer receives we take into
1343142SN/A    // account the number of all uncoalesced requests this pkt "represents"
1353142SN/A    Stats::Scalar queuingCycles;
1363142SN/A
1373147SN/A    // On average how much time a request from the
1383147SN/A    // uncoalescedAccesses that reaches the TLB
1393147SN/A    // spends waiting?
1409481SN/A    Stats::Scalar localqueuingCycles;
14110798SN/A    // localqueuingCycles/uncoalescedAccesses
1429481SN/A    Stats::Formula localLatency;
1439481SN/A
1449481SN/A    bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2);
1459481SN/A    void updatePhysAddresses(PacketPtr pkt);
1469481SN/A    void regStats();
1479988SN/A
1489481SN/A    // Clock related functions. Maps to-and-from
1499481SN/A    // Simulation ticks and object clocks.
15011440SCurtis.Dunham@arm.com    Tick frequency() const { return SimClock::Frequency / clock; }
15111440SCurtis.Dunham@arm.com    Tick ticks(int numCycles) const { return (Tick)clock * numCycles; }
15211440SCurtis.Dunham@arm.com    Tick curCycle() const { return curTick() / clock; }
15311440SCurtis.Dunham@arm.com    Tick tickToCycles(Tick val) const { return val / clock;}
15411440SCurtis.Dunham@arm.com
15511440SCurtis.Dunham@arm.com    class CpuSidePort : public SlavePort
1569481SN/A    {
1579481SN/A      public:
1589481SN/A        CpuSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
1599481SN/A                    PortID _index)
1609481SN/A            : SlavePort(_name, tlb_coalescer), coalescer(tlb_coalescer),
16111440SCurtis.Dunham@arm.com              index(_index) { }
1629481SN/A
1633147SN/A      protected:
16411066SN/A        TLBCoalescer *coalescer;
1659885SN/A        int index;
1668983SN/A
1673147SN/A        virtual bool recvTimingReq(PacketPtr pkt);
1689885SN/A        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
16911219Snilay@cs.wisc.edu        virtual void recvFunctional(PacketPtr pkt);
17010736SN/A        virtual void recvRangeChange() { }
1719988SN/A        virtual void recvReqRetry();
1729348SN/A
17310900SN/A        virtual void
1743147SN/A        recvRespRetry()
1759348SN/A        {
1765876SN/A            fatal("recvRespRetry() is not implemented in the TLB coalescer.\n");
1778835SN/A        }
1789348SN/A
17910036SN/A        virtual AddrRangeList getAddrRanges() const;
1803147SN/A    };
1818835SN/A
1829885SN/A    class MemSidePort : public MasterPort
1834316SN/A    {
1843147SN/A      public:
18511219Snilay@cs.wisc.edu        MemSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
1863147SN/A                    PortID _index)
1878983SN/A            : MasterPort(_name, tlb_coalescer), coalescer(tlb_coalescer),
1883142SN/A              index(_index) { }
1899885SN/A
1909885SN/A        std::deque<PacketPtr> retries;
1919885SN/A
1929885SN/A      protected:
1939885SN/A        TLBCoalescer *coalescer;
1949988SN/A        int index;
1959885SN/A
19610036SN/A        virtual bool recvTimingResp(PacketPtr pkt);
1979885SN/A        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
1989885SN/A        virtual void recvFunctional(PacketPtr pkt);
1995000SN/A        virtual void recvRangeChange() { }
2006024SN/A        virtual void recvReqRetry();
2019988SN/A
2025000SN/A        virtual void
2035000SN/A        recvRespRetry()
2043142SN/A        {
2053142SN/A            fatal("recvRespRetry() not implemented in TLB coalescer");
2067761SN/A        }
2077761SN/A    };
2089988SN/A
2093142SN/A    // Coalescer slave ports on the cpu Side
2103142SN/A    std::vector<CpuSidePort*> cpuSidePort;
2113142SN/A    // Coalescer master ports on the memory side
2124938SN/A    std::vector<MemSidePort*> memSidePort;
2133142SN/A
2149988SN/A    Port &getPort(const std::string &if_name,
2154938SN/A                  PortID idx=InvalidPortID) override;
2163142SN/A
2174938SN/A    void processProbeTLBEvent();
2183142SN/A    /// This event issues the TLB probes
2199988SN/A    EventFunctionWrapper probeTLBEvent;
2203142SN/A
2213142SN/A    void processCleanupEvent();
22210900SN/A    /// The cleanupEvent is scheduled after a TLBEvent triggers
2233142SN/A    /// in order to free memory and do the required clean-up
2243142SN/A    EventFunctionWrapper cleanupEvent;
2253142SN/A
2263142SN/A    // this FIFO queue keeps track of the virt. page
2273142SN/A    // addresses that are pending cleanup
2289988SN/A    std::queue<Addr> cleanupQueue;
2293142SN/A};
2303142SN/A
2313142SN/A#endif // __TLB_COALESCER_HH__
2323142SN/A