1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Lisa Hsu
34 */
35
36#ifndef __TLB_COALESCER_HH__
37#define __TLB_COALESCER_HH__
38
39#include <list>
40#include <queue>
41#include <string>
42#include <vector>
43
44#include "arch/generic/tlb.hh"
45#include "arch/isa.hh"
46#include "arch/isa_traits.hh"
47#include "arch/x86/pagetable.hh"
48#include "arch/x86/regs/segment.hh"
49#include "base/logging.hh"
50#include "base/statistics.hh"
51#include "gpu-compute/gpu_tlb.hh"
52#include "mem/port.hh"
53#include "mem/request.hh"
54#include "params/TLBCoalescer.hh"
55#include "sim/clocked_object.hh"
56
57class BaseTLB;
58class Packet;
59class ThreadContext;
60
61/**
62 * The TLBCoalescer is a ClockedObject sitting on the front side (CPUSide) of
63 * each TLB. It receives packets and issues coalesced requests to the
64 * TLB below it. It controls how requests are coalesced (the rules)
65 * and the permitted number of TLB probes per cycle (i.e., how many
66 * coalesced requests it feeds the TLB per cycle).
67 */
68class TLBCoalescer : public ClockedObject
69{
70   protected:
71    // TLB clock: will inherit clock from shader's clock period in terms
72    // of nuber of ticks of curTime (aka global simulation clock)
73    // The assignment of TLB clock from shader clock is done in the
74    // python config files.
75    int clock;
76
77  public:
78    typedef TLBCoalescerParams Params;
79    TLBCoalescer(const Params *p);
80    ~TLBCoalescer() { }
81
82    // Number of TLB probes per cycle. Parameterizable - default 2.
83    int TLBProbesPerCycle;
84
85    // Consider coalescing across that many ticks.
86    // Paraemterizable - default 1.
87    int coalescingWindow;
88
89    // Each coalesced request consists of multiple packets
90    // that all fall within the same virtual page
91    typedef std::vector<PacketPtr> coalescedReq;
92
93    // disables coalescing when true
94    bool disableCoalescing;
95
96    /*
97     * This is a hash map with <tick_index> as a key.
98     * It contains a vector of coalescedReqs per <tick_index>.
99     * Requests are buffered here until they can be issued to
100     * the TLB, at which point they are copied to the
101     * issuedTranslationsTable hash map.
102     *
103     * In terms of coalescing, we coalesce requests in a given
104     * window of x cycles by using tick_index = issueTime/x as a
105     * key, where x = coalescingWindow. issueTime is the issueTime
106     * of the pkt from the ComputeUnit's perspective, but another
107     * option is to change it to curTick(), so we coalesce based
108     * on the receive time.
109     */
110    typedef std::unordered_map<int64_t, std::vector<coalescedReq>> CoalescingFIFO;
111
112    CoalescingFIFO coalescerFIFO;
113
114    /*
115     * issuedTranslationsTabler: a hash_map indexed by virtual page
116     * address. Each hash_map entry has a vector of PacketPtr associated
117     * with it denoting the different packets that share an outstanding
118     * coalesced translation request for the same virtual page.
119     *
120     * The rules that determine which requests we can coalesce are
121     * specified in the canCoalesce() method.
122     */
123    typedef std::unordered_map<Addr, coalescedReq> CoalescingTable;
124
125    CoalescingTable issuedTranslationsTable;
126
127    // number of packets the coalescer receives
128    Stats::Scalar uncoalescedAccesses;
129    // number packets the coalescer send to the TLB
130    Stats::Scalar coalescedAccesses;
131
132    // Number of cycles the coalesced requests spend waiting in
133    // coalescerFIFO. For each packet the coalescer receives we take into
134    // account the number of all uncoalesced requests this pkt "represents"
135    Stats::Scalar queuingCycles;
136
137    // On average how much time a request from the
138    // uncoalescedAccesses that reaches the TLB
139    // spends waiting?
140    Stats::Scalar localqueuingCycles;
141    // localqueuingCycles/uncoalescedAccesses
142    Stats::Formula localLatency;
143
144    bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2);
145    void updatePhysAddresses(PacketPtr pkt);
146    void regStats() override;
147
148    // Clock related functions. Maps to-and-from
149    // Simulation ticks and object clocks.
150    Tick frequency() const { return SimClock::Frequency / clock; }
151    Tick ticks(int numCycles) const { return (Tick)clock * numCycles; }
152    Tick curCycle() const { return curTick() / clock; }
153    Tick tickToCycles(Tick val) const { return val / clock;}
154
155    class CpuSidePort : public SlavePort
156    {
157      public:
158        CpuSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
159                    PortID _index)
160            : SlavePort(_name, tlb_coalescer), coalescer(tlb_coalescer),
161              index(_index) { }
162
163      protected:
164        TLBCoalescer *coalescer;
165        int index;
166
167        virtual bool recvTimingReq(PacketPtr pkt);
168        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
169        virtual void recvFunctional(PacketPtr pkt);
170        virtual void recvRangeChange() { }
171        virtual void recvReqRetry();
172
173        virtual void
174        recvRespRetry()
175        {
176            fatal("recvRespRetry() is not implemented in the TLB coalescer.\n");
177        }
178
179        virtual AddrRangeList getAddrRanges() const;
180    };
181
182    class MemSidePort : public MasterPort
183    {
184      public:
185        MemSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
186                    PortID _index)
187            : MasterPort(_name, tlb_coalescer), coalescer(tlb_coalescer),
188              index(_index) { }
189
190        std::deque<PacketPtr> retries;
191
192      protected:
193        TLBCoalescer *coalescer;
194        int index;
195
196        virtual bool recvTimingResp(PacketPtr pkt);
197        virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
198        virtual void recvFunctional(PacketPtr pkt);
199        virtual void recvRangeChange() { }
200        virtual void recvReqRetry();
201
202        virtual void
203        recvRespRetry()
204        {
205            fatal("recvRespRetry() not implemented in TLB coalescer");
206        }
207    };
208
209    // Coalescer slave ports on the cpu Side
210    std::vector<CpuSidePort*> cpuSidePort;
211    // Coalescer master ports on the memory side
212    std::vector<MemSidePort*> memSidePort;
213
214    Port &getPort(const std::string &if_name,
215                  PortID idx=InvalidPortID) override;
216
217    void processProbeTLBEvent();
218    /// This event issues the TLB probes
219    EventFunctionWrapper probeTLBEvent;
220
221    void processCleanupEvent();
222    /// The cleanupEvent is scheduled after a TLBEvent triggers
223    /// in order to free memory and do the required clean-up
224    EventFunctionWrapper cleanupEvent;
225
226    // this FIFO queue keeps track of the virt. page
227    // addresses that are pending cleanup
228    std::queue<Addr> cleanupQueue;
229};
230
231#endif // __TLB_COALESCER_HH__
232