gpu_tlb.hh (12085:de78ea63e0ca) gpu_tlb.hh (12334:e0ab29a34764)
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#ifndef __GPU_TLB_HH__
37#define __GPU_TLB_HH__
38
39#include <fstream>
40#include <list>
41#include <queue>
42#include <string>
43#include <vector>
44
45#include "arch/generic/tlb.hh"
46#include "arch/x86/pagetable.hh"
47#include "arch/x86/pagetable_walker.hh"
48#include "arch/x86/regs/segment.hh"
49#include "base/callback.hh"
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#ifndef __GPU_TLB_HH__
37#define __GPU_TLB_HH__
38
39#include <fstream>
40#include <list>
41#include <queue>
42#include <string>
43#include <vector>
44
45#include "arch/generic/tlb.hh"
46#include "arch/x86/pagetable.hh"
47#include "arch/x86/pagetable_walker.hh"
48#include "arch/x86/regs/segment.hh"
49#include "base/callback.hh"
50#include "base/misc.hh"
50#include "base/logging.hh"
51#include "base/statistics.hh"
52#include "gpu-compute/compute_unit.hh"
53#include "mem/mem_object.hh"
54#include "mem/port.hh"
55#include "mem/request.hh"
56#include "params/X86GPUTLB.hh"
57#include "sim/sim_object.hh"
58
59class BaseTLB;
60class Packet;
61class ThreadContext;
62
63namespace X86ISA
64{
65 class GpuTlbEntry : public TlbEntry
66 {
67 public:
68 GpuTlbEntry(Addr asn, Addr _vaddr, Addr _paddr, bool _valid)
69 : TlbEntry(asn, _vaddr, _paddr, false, false), valid(_valid) { }
70
71 GpuTlbEntry() : TlbEntry(), valid(false) { }
72
73 bool valid;
74 };
75
76 class GpuTLB : public MemObject
77 {
78 protected:
79 friend class Walker;
80
81 typedef std::list<GpuTlbEntry*> EntryList;
82
83 uint32_t configAddress;
84
85 // TLB clock: will inherit clock from shader's clock period in terms
86 // of nuber of ticks of curTime (aka global simulation clock)
87 // The assignment of TLB clock from shader clock is done in the python
88 // config files.
89 int clock;
90
91 public:
92 // clock related functions ; maps to-and-from Simulation ticks and
93 // object clocks.
94 Tick frequency() const { return SimClock::Frequency / clock; }
95
96 Tick
97 ticks(int numCycles) const
98 {
99 return (Tick)clock * numCycles;
100 }
101
102 Tick curCycle() const { return curTick() / clock; }
103 Tick tickToCycles(Tick val) const { return val / clock;}
104
105 typedef X86GPUTLBParams Params;
106 GpuTLB(const Params *p);
107 ~GpuTLB();
108
109 typedef enum BaseTLB::Mode Mode;
110
111 class Translation
112 {
113 public:
114 virtual ~Translation() { }
115
116 /**
117 * Signal that the translation has been delayed due to a hw page
118 * table walk.
119 */
120 virtual void markDelayed() = 0;
121
122 /**
123 * The memory for this object may be dynamically allocated, and it
124 * may be responsible for cleaning itslef up which will happen in
125 * this function. Once it's called the object is no longer valid.
126 */
127 virtual void finish(Fault fault, RequestPtr req, ThreadContext *tc,
128 Mode mode) = 0;
129 };
130
131 void dumpAll();
132 GpuTlbEntry *lookup(Addr va, bool update_lru=true);
133 void setConfigAddress(uint32_t addr);
134
135 protected:
136 EntryList::iterator lookupIt(Addr va, bool update_lru=true);
137 Walker *walker;
138
139 public:
140 Walker *getWalker();
141 void invalidateAll();
142 void invalidateNonGlobal();
143 void demapPage(Addr va, uint64_t asn);
144
145 protected:
146 int size;
147 int assoc;
148 int numSets;
149
150 /**
151 * true if this is a fully-associative TLB
152 */
153 bool FA;
154 Addr setMask;
155
156 /**
157 * Allocation Policy: true if we always allocate on a hit, false
158 * otherwise. Default is true.
159 */
160 bool allocationPolicy;
161
162 /**
163 * if true, then this is not the last level TLB
164 */
165 bool hasMemSidePort;
166
167 /**
168 * Print out accessDistance stats. One stat file
169 * per TLB.
170 */
171 bool accessDistance;
172
173 std::vector<GpuTlbEntry> tlb;
174
175 /*
176 * It's a per-set list. As long as we have not reached
177 * the full capacity of the given set, grab an entry from
178 * the freeList.
179 */
180 std::vector<EntryList> freeList;
181
182 /**
183 * An entryList per set is the equivalent of an LRU stack;
184 * it's used to guide replacement decisions. The head of the list
185 * contains the MRU TLB entry of the given set. If the freeList
186 * for this set is empty, the last element of the list
187 * is evicted (i.e., dropped on the floor).
188 */
189 std::vector<EntryList> entryList;
190
191 Fault translateInt(RequestPtr req, ThreadContext *tc);
192
193 Fault translate(RequestPtr req, ThreadContext *tc,
194 Translation *translation, Mode mode, bool &delayedResponse,
195 bool timing, int &latency);
196
197 public:
198 // latencies for a TLB hit, miss and page fault
199 int hitLatency;
200 int missLatency1;
201 int missLatency2;
202
203 // local_stats are as seen from the TLB
204 // without taking into account coalescing
205 Stats::Scalar localNumTLBAccesses;
206 Stats::Scalar localNumTLBHits;
207 Stats::Scalar localNumTLBMisses;
208 Stats::Formula localTLBMissRate;
209
210 // global_stats are as seen from the
211 // CU's perspective taking into account
212 // all coalesced requests.
213 Stats::Scalar globalNumTLBAccesses;
214 Stats::Scalar globalNumTLBHits;
215 Stats::Scalar globalNumTLBMisses;
216 Stats::Formula globalTLBMissRate;
217
218 // from the CU perspective (global)
219 Stats::Scalar accessCycles;
220 // from the CU perspective (global)
221 Stats::Scalar pageTableCycles;
222 Stats::Scalar numUniquePages;
223 // from the perspective of this TLB
224 Stats::Scalar localCycles;
225 // from the perspective of this TLB
226 Stats::Formula localLatency;
227 // I take the avg. per page and then
228 // the avg. over all pages.
229 Stats::Scalar avgReuseDistance;
230
231 void regStats();
232 void updatePageFootprint(Addr virt_page_addr);
233 void printAccessPattern();
234
235
236 Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
237 int &latency);
238
239 void translateTiming(RequestPtr req, ThreadContext *tc,
240 Translation *translation, Mode mode,
241 int &latency);
242
243 Tick doMmuRegRead(ThreadContext *tc, Packet *pkt);
244 Tick doMmuRegWrite(ThreadContext *tc, Packet *pkt);
245
246 GpuTlbEntry *insert(Addr vpn, GpuTlbEntry &entry);
247
248 // Checkpointing
249 virtual void serialize(CheckpointOut& cp) const;
250 virtual void unserialize(CheckpointIn& cp);
251 void issueTranslation();
252 enum tlbOutcome {TLB_HIT, TLB_MISS, PAGE_WALK, MISS_RETURN};
253 bool tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats);
254
255 void handleTranslationReturn(Addr addr, tlbOutcome outcome,
256 PacketPtr pkt);
257
258 void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome);
259
260 void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
261 GpuTlbEntry *tlb_entry, Mode mode);
262
263 void updatePhysAddresses(Addr virt_page_addr, GpuTlbEntry *tlb_entry,
264 Addr phys_page_addr);
265
266 void issueTLBLookup(PacketPtr pkt);
267
268 // CpuSidePort is the TLB Port closer to the CPU/CU side
269 class CpuSidePort : public SlavePort
270 {
271 public:
272 CpuSidePort(const std::string &_name, GpuTLB * gpu_TLB,
273 PortID _index)
274 : SlavePort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
275
276 protected:
277 GpuTLB *tlb;
278 int index;
279
280 virtual bool recvTimingReq(PacketPtr pkt);
281 virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
282 virtual void recvFunctional(PacketPtr pkt);
283 virtual void recvRangeChange() { }
284 virtual void recvReqRetry();
285 virtual void recvRespRetry() { assert(false); }
286 virtual AddrRangeList getAddrRanges() const;
287 };
288
289 /**
290 * MemSidePort is the TLB Port closer to the memory side
291 * If this is a last level TLB then this port will not be connected.
292 *
293 * Future action item: if we ever do real page walks, then this port
294 * should be connected to a RubyPort.
295 */
296 class MemSidePort : public MasterPort
297 {
298 public:
299 MemSidePort(const std::string &_name, GpuTLB * gpu_TLB,
300 PortID _index)
301 : MasterPort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
302
303 std::deque<PacketPtr> retries;
304
305 protected:
306 GpuTLB *tlb;
307 int index;
308
309 virtual bool recvTimingResp(PacketPtr pkt);
310 virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
311 virtual void recvFunctional(PacketPtr pkt) { }
312 virtual void recvRangeChange() { }
313 virtual void recvReqRetry();
314 };
315
316 // TLB ports on the cpu Side
317 std::vector<CpuSidePort*> cpuSidePort;
318 // TLB ports on the memory side
319 std::vector<MemSidePort*> memSidePort;
320
321 BaseMasterPort &getMasterPort(const std::string &if_name,
322 PortID idx=InvalidPortID);
323
324 BaseSlavePort &getSlavePort(const std::string &if_name,
325 PortID idx=InvalidPortID);
326
327 /**
328 * TLB TranslationState: this currently is a somewhat bastardization of
329 * the usage of SenderState, whereby the receiver of a packet is not
330 * usually supposed to need to look at the contents of the senderState,
331 * you're really only supposed to look at what you pushed on, pop it
332 * off, and send it back.
333 *
334 * However, since there is state that we want to pass to the TLBs using
335 * the send/recv Timing/Functional/etc. APIs, which don't allow for new
336 * arguments, we need a common TLB senderState to pass between TLBs,
337 * both "forwards" and "backwards."
338 *
339 * So, basically, the rule is that any packet received by a TLB port
340 * (cpuside OR memside) must be safely castable to a TranslationState.
341 */
342
343 struct TranslationState : public Packet::SenderState
344 {
345 // TLB mode, read or write
346 Mode tlbMode;
347 // Thread context associated with this req
348 ThreadContext *tc;
349
350 /*
351 * TLB entry to be populated and passed back and filled in
352 * previous TLBs. Equivalent to the data cache concept of
353 * "data return."
354 */
355 GpuTlbEntry *tlbEntry;
356 // Is this a TLB prefetch request?
357 bool prefetch;
358 // When was the req for this translation issued
359 uint64_t issueTime;
360 // Remember where this came from
361 std::vector<SlavePort*>ports;
362
363 // keep track of #uncoalesced reqs per packet per TLB level;
364 // reqCnt per level >= reqCnt higher level
365 std::vector<int> reqCnt;
366 // TLB level this packet hit in; 0 if it hit in the page table
367 int hitLevel;
368 Packet::SenderState *saved;
369
370 TranslationState(Mode tlb_mode, ThreadContext *_tc,
371 bool _prefetch=false,
372 Packet::SenderState *_saved=nullptr)
373 : tlbMode(tlb_mode), tc(_tc), tlbEntry(nullptr),
374 prefetch(_prefetch), issueTime(0),
375 hitLevel(0),saved(_saved) { }
376 };
377
378 // maximum number of permitted coalesced requests per cycle
379 int maxCoalescedReqs;
380
381 // Current number of outstandings coalesced requests.
382 // Should be <= maxCoalescedReqs
383 int outstandingReqs;
384
385 /**
386 * A TLBEvent is scheduled after the TLB lookup and helps us take the
387 * appropriate actions:
388 * (e.g., update TLB on a hit,
389 * send request to lower level TLB on a miss,
390 * or start a page walk if this was the last-level TLB).
391 */
392 void translationReturn(Addr virtPageAddr, tlbOutcome outcome,
393 PacketPtr pkt);
394
395 class TLBEvent : public Event
396 {
397 private:
398 GpuTLB *tlb;
399 Addr virtPageAddr;
400 /**
401 * outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
402 */
403 tlbOutcome outcome;
404 PacketPtr pkt;
405
406 public:
407 TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome,
408 PacketPtr _pkt);
409
410 void process();
411 const char *description() const;
412
413 // updateOutcome updates the tlbOutcome of a TLBEvent
414 void updateOutcome(tlbOutcome _outcome);
415 Addr getTLBEventVaddr();
416 };
417
418 std::unordered_map<Addr, TLBEvent*> translationReturnEvent;
419
420 // this FIFO queue keeps track of the virt. page addresses
421 // that are pending cleanup
422 std::queue<Addr> cleanupQueue;
423
424 // the cleanupEvent is scheduled after a TLBEvent triggers in order to
425 // free memory and do the required clean-up
426 void cleanup();
427
428 EventFunctionWrapper cleanupEvent;
429
430 /**
431 * This hash map will use the virtual page address as a key
432 * and will keep track of total number of accesses per page
433 */
434
435 struct AccessInfo
436 {
437 unsigned int lastTimeAccessed; // last access to this page
438 unsigned int accessesPerPage;
439 // need to divide it by accessesPerPage at the end
440 unsigned int totalReuseDistance;
441
442 /**
443 * The field below will help us compute the access distance,
444 * that is the number of (coalesced) TLB accesses that
445 * happened in between each access to this page
446 *
447 * localTLBAccesses[x] is the value of localTLBNumAccesses
448 * when the page <Addr> was accessed for the <x>th time
449 */
450 std::vector<unsigned int> localTLBAccesses;
451 unsigned int sumDistance;
452 unsigned int meanDistance;
453 };
454
455 typedef std::unordered_map<Addr, AccessInfo> AccessPatternTable;
456 AccessPatternTable TLBFootprint;
457
458 // Called at the end of simulation to dump page access stats.
459 void exitCallback();
460
461 EventFunctionWrapper exitEvent;
462 };
463}
464
465#endif // __GPU_TLB_HH__
51#include "base/statistics.hh"
52#include "gpu-compute/compute_unit.hh"
53#include "mem/mem_object.hh"
54#include "mem/port.hh"
55#include "mem/request.hh"
56#include "params/X86GPUTLB.hh"
57#include "sim/sim_object.hh"
58
59class BaseTLB;
60class Packet;
61class ThreadContext;
62
63namespace X86ISA
64{
65 class GpuTlbEntry : public TlbEntry
66 {
67 public:
68 GpuTlbEntry(Addr asn, Addr _vaddr, Addr _paddr, bool _valid)
69 : TlbEntry(asn, _vaddr, _paddr, false, false), valid(_valid) { }
70
71 GpuTlbEntry() : TlbEntry(), valid(false) { }
72
73 bool valid;
74 };
75
76 class GpuTLB : public MemObject
77 {
78 protected:
79 friend class Walker;
80
81 typedef std::list<GpuTlbEntry*> EntryList;
82
83 uint32_t configAddress;
84
85 // TLB clock: will inherit clock from shader's clock period in terms
86 // of nuber of ticks of curTime (aka global simulation clock)
87 // The assignment of TLB clock from shader clock is done in the python
88 // config files.
89 int clock;
90
91 public:
92 // clock related functions ; maps to-and-from Simulation ticks and
93 // object clocks.
94 Tick frequency() const { return SimClock::Frequency / clock; }
95
96 Tick
97 ticks(int numCycles) const
98 {
99 return (Tick)clock * numCycles;
100 }
101
102 Tick curCycle() const { return curTick() / clock; }
103 Tick tickToCycles(Tick val) const { return val / clock;}
104
105 typedef X86GPUTLBParams Params;
106 GpuTLB(const Params *p);
107 ~GpuTLB();
108
109 typedef enum BaseTLB::Mode Mode;
110
111 class Translation
112 {
113 public:
114 virtual ~Translation() { }
115
116 /**
117 * Signal that the translation has been delayed due to a hw page
118 * table walk.
119 */
120 virtual void markDelayed() = 0;
121
122 /**
123 * The memory for this object may be dynamically allocated, and it
124 * may be responsible for cleaning itslef up which will happen in
125 * this function. Once it's called the object is no longer valid.
126 */
127 virtual void finish(Fault fault, RequestPtr req, ThreadContext *tc,
128 Mode mode) = 0;
129 };
130
131 void dumpAll();
132 GpuTlbEntry *lookup(Addr va, bool update_lru=true);
133 void setConfigAddress(uint32_t addr);
134
135 protected:
136 EntryList::iterator lookupIt(Addr va, bool update_lru=true);
137 Walker *walker;
138
139 public:
140 Walker *getWalker();
141 void invalidateAll();
142 void invalidateNonGlobal();
143 void demapPage(Addr va, uint64_t asn);
144
145 protected:
146 int size;
147 int assoc;
148 int numSets;
149
150 /**
151 * true if this is a fully-associative TLB
152 */
153 bool FA;
154 Addr setMask;
155
156 /**
157 * Allocation Policy: true if we always allocate on a hit, false
158 * otherwise. Default is true.
159 */
160 bool allocationPolicy;
161
162 /**
163 * if true, then this is not the last level TLB
164 */
165 bool hasMemSidePort;
166
167 /**
168 * Print out accessDistance stats. One stat file
169 * per TLB.
170 */
171 bool accessDistance;
172
173 std::vector<GpuTlbEntry> tlb;
174
175 /*
176 * It's a per-set list. As long as we have not reached
177 * the full capacity of the given set, grab an entry from
178 * the freeList.
179 */
180 std::vector<EntryList> freeList;
181
182 /**
183 * An entryList per set is the equivalent of an LRU stack;
184 * it's used to guide replacement decisions. The head of the list
185 * contains the MRU TLB entry of the given set. If the freeList
186 * for this set is empty, the last element of the list
187 * is evicted (i.e., dropped on the floor).
188 */
189 std::vector<EntryList> entryList;
190
191 Fault translateInt(RequestPtr req, ThreadContext *tc);
192
193 Fault translate(RequestPtr req, ThreadContext *tc,
194 Translation *translation, Mode mode, bool &delayedResponse,
195 bool timing, int &latency);
196
197 public:
198 // latencies for a TLB hit, miss and page fault
199 int hitLatency;
200 int missLatency1;
201 int missLatency2;
202
203 // local_stats are as seen from the TLB
204 // without taking into account coalescing
205 Stats::Scalar localNumTLBAccesses;
206 Stats::Scalar localNumTLBHits;
207 Stats::Scalar localNumTLBMisses;
208 Stats::Formula localTLBMissRate;
209
210 // global_stats are as seen from the
211 // CU's perspective taking into account
212 // all coalesced requests.
213 Stats::Scalar globalNumTLBAccesses;
214 Stats::Scalar globalNumTLBHits;
215 Stats::Scalar globalNumTLBMisses;
216 Stats::Formula globalTLBMissRate;
217
218 // from the CU perspective (global)
219 Stats::Scalar accessCycles;
220 // from the CU perspective (global)
221 Stats::Scalar pageTableCycles;
222 Stats::Scalar numUniquePages;
223 // from the perspective of this TLB
224 Stats::Scalar localCycles;
225 // from the perspective of this TLB
226 Stats::Formula localLatency;
227 // I take the avg. per page and then
228 // the avg. over all pages.
229 Stats::Scalar avgReuseDistance;
230
231 void regStats();
232 void updatePageFootprint(Addr virt_page_addr);
233 void printAccessPattern();
234
235
236 Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
237 int &latency);
238
239 void translateTiming(RequestPtr req, ThreadContext *tc,
240 Translation *translation, Mode mode,
241 int &latency);
242
243 Tick doMmuRegRead(ThreadContext *tc, Packet *pkt);
244 Tick doMmuRegWrite(ThreadContext *tc, Packet *pkt);
245
246 GpuTlbEntry *insert(Addr vpn, GpuTlbEntry &entry);
247
248 // Checkpointing
249 virtual void serialize(CheckpointOut& cp) const;
250 virtual void unserialize(CheckpointIn& cp);
251 void issueTranslation();
252 enum tlbOutcome {TLB_HIT, TLB_MISS, PAGE_WALK, MISS_RETURN};
253 bool tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats);
254
255 void handleTranslationReturn(Addr addr, tlbOutcome outcome,
256 PacketPtr pkt);
257
258 void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome);
259
260 void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
261 GpuTlbEntry *tlb_entry, Mode mode);
262
263 void updatePhysAddresses(Addr virt_page_addr, GpuTlbEntry *tlb_entry,
264 Addr phys_page_addr);
265
266 void issueTLBLookup(PacketPtr pkt);
267
268 // CpuSidePort is the TLB Port closer to the CPU/CU side
269 class CpuSidePort : public SlavePort
270 {
271 public:
272 CpuSidePort(const std::string &_name, GpuTLB * gpu_TLB,
273 PortID _index)
274 : SlavePort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
275
276 protected:
277 GpuTLB *tlb;
278 int index;
279
280 virtual bool recvTimingReq(PacketPtr pkt);
281 virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
282 virtual void recvFunctional(PacketPtr pkt);
283 virtual void recvRangeChange() { }
284 virtual void recvReqRetry();
285 virtual void recvRespRetry() { assert(false); }
286 virtual AddrRangeList getAddrRanges() const;
287 };
288
289 /**
290 * MemSidePort is the TLB Port closer to the memory side
291 * If this is a last level TLB then this port will not be connected.
292 *
293 * Future action item: if we ever do real page walks, then this port
294 * should be connected to a RubyPort.
295 */
296 class MemSidePort : public MasterPort
297 {
298 public:
299 MemSidePort(const std::string &_name, GpuTLB * gpu_TLB,
300 PortID _index)
301 : MasterPort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
302
303 std::deque<PacketPtr> retries;
304
305 protected:
306 GpuTLB *tlb;
307 int index;
308
309 virtual bool recvTimingResp(PacketPtr pkt);
310 virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
311 virtual void recvFunctional(PacketPtr pkt) { }
312 virtual void recvRangeChange() { }
313 virtual void recvReqRetry();
314 };
315
316 // TLB ports on the cpu Side
317 std::vector<CpuSidePort*> cpuSidePort;
318 // TLB ports on the memory side
319 std::vector<MemSidePort*> memSidePort;
320
321 BaseMasterPort &getMasterPort(const std::string &if_name,
322 PortID idx=InvalidPortID);
323
324 BaseSlavePort &getSlavePort(const std::string &if_name,
325 PortID idx=InvalidPortID);
326
327 /**
328 * TLB TranslationState: this currently is a somewhat bastardization of
329 * the usage of SenderState, whereby the receiver of a packet is not
330 * usually supposed to need to look at the contents of the senderState,
331 * you're really only supposed to look at what you pushed on, pop it
332 * off, and send it back.
333 *
334 * However, since there is state that we want to pass to the TLBs using
335 * the send/recv Timing/Functional/etc. APIs, which don't allow for new
336 * arguments, we need a common TLB senderState to pass between TLBs,
337 * both "forwards" and "backwards."
338 *
339 * So, basically, the rule is that any packet received by a TLB port
340 * (cpuside OR memside) must be safely castable to a TranslationState.
341 */
342
343 struct TranslationState : public Packet::SenderState
344 {
345 // TLB mode, read or write
346 Mode tlbMode;
347 // Thread context associated with this req
348 ThreadContext *tc;
349
350 /*
351 * TLB entry to be populated and passed back and filled in
352 * previous TLBs. Equivalent to the data cache concept of
353 * "data return."
354 */
355 GpuTlbEntry *tlbEntry;
356 // Is this a TLB prefetch request?
357 bool prefetch;
358 // When was the req for this translation issued
359 uint64_t issueTime;
360 // Remember where this came from
361 std::vector<SlavePort*>ports;
362
363 // keep track of #uncoalesced reqs per packet per TLB level;
364 // reqCnt per level >= reqCnt higher level
365 std::vector<int> reqCnt;
366 // TLB level this packet hit in; 0 if it hit in the page table
367 int hitLevel;
368 Packet::SenderState *saved;
369
370 TranslationState(Mode tlb_mode, ThreadContext *_tc,
371 bool _prefetch=false,
372 Packet::SenderState *_saved=nullptr)
373 : tlbMode(tlb_mode), tc(_tc), tlbEntry(nullptr),
374 prefetch(_prefetch), issueTime(0),
375 hitLevel(0),saved(_saved) { }
376 };
377
378 // maximum number of permitted coalesced requests per cycle
379 int maxCoalescedReqs;
380
381 // Current number of outstandings coalesced requests.
382 // Should be <= maxCoalescedReqs
383 int outstandingReqs;
384
385 /**
386 * A TLBEvent is scheduled after the TLB lookup and helps us take the
387 * appropriate actions:
388 * (e.g., update TLB on a hit,
389 * send request to lower level TLB on a miss,
390 * or start a page walk if this was the last-level TLB).
391 */
392 void translationReturn(Addr virtPageAddr, tlbOutcome outcome,
393 PacketPtr pkt);
394
395 class TLBEvent : public Event
396 {
397 private:
398 GpuTLB *tlb;
399 Addr virtPageAddr;
400 /**
401 * outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
402 */
403 tlbOutcome outcome;
404 PacketPtr pkt;
405
406 public:
407 TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome,
408 PacketPtr _pkt);
409
410 void process();
411 const char *description() const;
412
413 // updateOutcome updates the tlbOutcome of a TLBEvent
414 void updateOutcome(tlbOutcome _outcome);
415 Addr getTLBEventVaddr();
416 };
417
418 std::unordered_map<Addr, TLBEvent*> translationReturnEvent;
419
420 // this FIFO queue keeps track of the virt. page addresses
421 // that are pending cleanup
422 std::queue<Addr> cleanupQueue;
423
424 // the cleanupEvent is scheduled after a TLBEvent triggers in order to
425 // free memory and do the required clean-up
426 void cleanup();
427
428 EventFunctionWrapper cleanupEvent;
429
430 /**
431 * This hash map will use the virtual page address as a key
432 * and will keep track of total number of accesses per page
433 */
434
435 struct AccessInfo
436 {
437 unsigned int lastTimeAccessed; // last access to this page
438 unsigned int accessesPerPage;
439 // need to divide it by accessesPerPage at the end
440 unsigned int totalReuseDistance;
441
442 /**
443 * The field below will help us compute the access distance,
444 * that is the number of (coalesced) TLB accesses that
445 * happened in between each access to this page
446 *
447 * localTLBAccesses[x] is the value of localTLBNumAccesses
448 * when the page <Addr> was accessed for the <x>th time
449 */
450 std::vector<unsigned int> localTLBAccesses;
451 unsigned int sumDistance;
452 unsigned int meanDistance;
453 };
454
455 typedef std::unordered_map<Addr, AccessInfo> AccessPatternTable;
456 AccessPatternTable TLBFootprint;
457
458 // Called at the end of simulation to dump page access stats.
459 void exitCallback();
460
461 EventFunctionWrapper exitEvent;
462 };
463}
464
465#endif // __GPU_TLB_HH__