gpu_tlb.hh revision 13449
18706Sandreas.hansson@arm.com/* 28706Sandreas.hansson@arm.com * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 38706Sandreas.hansson@arm.com * All rights reserved. 48706Sandreas.hansson@arm.com * 58706Sandreas.hansson@arm.com * For use for simulation and test purposes only 68706Sandreas.hansson@arm.com * 78706Sandreas.hansson@arm.com * Redistribution and use in source and binary forms, with or without 88706Sandreas.hansson@arm.com * modification, are permitted provided that the following conditions are met: 98706Sandreas.hansson@arm.com * 108706Sandreas.hansson@arm.com * 1. Redistributions of source code must retain the above copyright notice, 118706Sandreas.hansson@arm.com * this list of conditions and the following disclaimer. 128706Sandreas.hansson@arm.com * 135369Ssaidi@eecs.umich.edu * 2. Redistributions in binary form must reproduce the above copyright notice, 143005Sstever@eecs.umich.edu * this list of conditions and the following disclaimer in the documentation 153005Sstever@eecs.umich.edu * and/or other materials provided with the distribution. 163005Sstever@eecs.umich.edu * 173005Sstever@eecs.umich.edu * 3. Neither the name of the copyright holder nor the names of its 183005Sstever@eecs.umich.edu * contributors may be used to endorse or promote products derived from this 193005Sstever@eecs.umich.edu * software without specific prior written permission. 203005Sstever@eecs.umich.edu * 213005Sstever@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 223005Sstever@eecs.umich.edu * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 233005Sstever@eecs.umich.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 243005Sstever@eecs.umich.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 253005Sstever@eecs.umich.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 263005Sstever@eecs.umich.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 273005Sstever@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 283005Sstever@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 293005Sstever@eecs.umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 303005Sstever@eecs.umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 313005Sstever@eecs.umich.edu * POSSIBILITY OF SUCH DAMAGE. 323005Sstever@eecs.umich.edu * 333005Sstever@eecs.umich.edu * Authors: Lisa Hsu 343005Sstever@eecs.umich.edu */ 353005Sstever@eecs.umich.edu 363005Sstever@eecs.umich.edu#ifndef __GPU_TLB_HH__ 373005Sstever@eecs.umich.edu#define __GPU_TLB_HH__ 383005Sstever@eecs.umich.edu 393005Sstever@eecs.umich.edu#include <fstream> 403005Sstever@eecs.umich.edu#include <list> 412710SN/A#include <queue> 422710SN/A#include <string> 433005Sstever@eecs.umich.edu#include <vector> 442889SN/A 456654Snate@binkert.org#include "arch/generic/tlb.hh" 466654Snate@binkert.org#include "arch/x86/pagetable.hh" 476654Snate@binkert.org#include "arch/x86/pagetable_walker.hh" 482667SN/A#include "arch/x86/regs/segment.hh" 496654Snate@binkert.org#include "base/callback.hh" 506654Snate@binkert.org#include "base/logging.hh" 516654Snate@binkert.org#include "base/statistics.hh" 525457Ssaidi@eecs.umich.edu#include "gpu-compute/compute_unit.hh" 536654Snate@binkert.org#include "mem/mem_object.hh" 548169SLisa.Hsu@amd.com#include "mem/port.hh" 559100SBrad.Beckmann@amd.com#include "mem/request.hh" 568169SLisa.Hsu@amd.com#include "params/X86GPUTLB.hh" 578920Snilay@cs.wisc.edu#include "sim/sim_object.hh" 588169SLisa.Hsu@amd.com 593395Shsul@eecs.umich.educlass BaseTLB; 606981SLisa.Hsu@amd.comclass Packet; 613448Shsul@eecs.umich.educlass ThreadContext; 625369Ssaidi@eecs.umich.edu 633394Shsul@eecs.umich.edunamespace X86ISA 649197Snilay@cs.wisc.edu{ 659197Snilay@cs.wisc.edu class GpuTLB : public MemObject 669197Snilay@cs.wisc.edu { 679197Snilay@cs.wisc.edu protected: 689197Snilay@cs.wisc.edu friend class Walker; 699197Snilay@cs.wisc.edu 709197Snilay@cs.wisc.edu typedef std::list<TlbEntry*> EntryList; 719197Snilay@cs.wisc.edu 729197Snilay@cs.wisc.edu uint32_t configAddress; 739197Snilay@cs.wisc.edu 749197Snilay@cs.wisc.edu // TLB clock: will inherit clock from shader's clock period in terms 759197Snilay@cs.wisc.edu // of nuber of ticks of curTime (aka global simulation clock) 769197Snilay@cs.wisc.edu // The assignment of TLB clock from shader clock is done in the python 779197Snilay@cs.wisc.edu // config files. 789197Snilay@cs.wisc.edu int clock; 799197Snilay@cs.wisc.edu 809197Snilay@cs.wisc.edu public: 819197Snilay@cs.wisc.edu // clock related functions ; maps to-and-from Simulation ticks and 829197Snilay@cs.wisc.edu // object clocks. 839197Snilay@cs.wisc.edu Tick frequency() const { return SimClock::Frequency / clock; } 849197Snilay@cs.wisc.edu 859197Snilay@cs.wisc.edu Tick 869197Snilay@cs.wisc.edu ticks(int numCycles) const 879197Snilay@cs.wisc.edu { 889197Snilay@cs.wisc.edu return (Tick)clock * numCycles; 899217Snilay@cs.wisc.edu } 909197Snilay@cs.wisc.edu 919197Snilay@cs.wisc.edu Tick curCycle() const { return curTick() / clock; } 929197Snilay@cs.wisc.edu Tick tickToCycles(Tick val) const { return val / clock;} 939197Snilay@cs.wisc.edu 949197Snilay@cs.wisc.edu typedef X86GPUTLBParams Params; 959197Snilay@cs.wisc.edu GpuTLB(const Params *p); 969197Snilay@cs.wisc.edu ~GpuTLB(); 979197Snilay@cs.wisc.edu 989197Snilay@cs.wisc.edu typedef enum BaseTLB::Mode Mode; 999197Snilay@cs.wisc.edu 1009197Snilay@cs.wisc.edu class Translation 1019197Snilay@cs.wisc.edu { 1029197Snilay@cs.wisc.edu public: 1039197Snilay@cs.wisc.edu virtual ~Translation() { } 1049197Snilay@cs.wisc.edu 1059197Snilay@cs.wisc.edu /** 1069197Snilay@cs.wisc.edu * Signal that the translation has been delayed due to a hw page 1079197Snilay@cs.wisc.edu * table walk. 1089197Snilay@cs.wisc.edu */ 1099197Snilay@cs.wisc.edu virtual void markDelayed() = 0; 1102957SN/A 1118920Snilay@cs.wisc.edu /** 1128920Snilay@cs.wisc.edu * The memory for this object may be dynamically allocated, and it 1132957SN/A * may be responsible for cleaning itslef up which will happen in 1148862Snilay@cs.wisc.edu * this function. Once it's called the object is no longer valid. 1158862Snilay@cs.wisc.edu */ 1168467Snilay@cs.wisc.edu virtual void finish(Fault fault, const RequestPtr &req, 1172957SN/A ThreadContext *tc, Mode mode) = 0; 1182957SN/A }; 1192957SN/A 1202957SN/A void dumpAll(); 1212957SN/A TlbEntry *lookup(Addr va, bool update_lru=true); 1222957SN/A void setConfigAddress(uint32_t addr); 1238167SLisa.Hsu@amd.com 1249197Snilay@cs.wisc.edu protected: 1258167SLisa.Hsu@amd.com EntryList::iterator lookupIt(Addr va, bool update_lru=true); 1265369Ssaidi@eecs.umich.edu Walker *walker; 1278167SLisa.Hsu@amd.com 1288167SLisa.Hsu@amd.com public: 1298167SLisa.Hsu@amd.com Walker *getWalker(); 1308167SLisa.Hsu@amd.com void invalidateAll(); 1318167SLisa.Hsu@amd.com void invalidateNonGlobal(); 1328167SLisa.Hsu@amd.com void demapPage(Addr va, uint64_t asn); 1338167SLisa.Hsu@amd.com 1348168SLisa.Hsu@amd.com protected: 1358168SLisa.Hsu@amd.com int size; 1368168SLisa.Hsu@amd.com int assoc; 1378168SLisa.Hsu@amd.com int numSets; 1388167SLisa.Hsu@amd.com 1398167SLisa.Hsu@amd.com /** 1408168SLisa.Hsu@amd.com * true if this is a fully-associative TLB 1415369Ssaidi@eecs.umich.edu */ 1428920Snilay@cs.wisc.edu bool FA; 1439197Snilay@cs.wisc.edu Addr setMask; 1448920Snilay@cs.wisc.edu 1458920Snilay@cs.wisc.edu /** 1468920Snilay@cs.wisc.edu * Allocation Policy: true if we always allocate on a hit, false 1475369Ssaidi@eecs.umich.edu * otherwise. Default is true. 1485369Ssaidi@eecs.umich.edu */ 1498718Snilay@cs.wisc.edu bool allocationPolicy; 1509129Sandreas.hansson@arm.com 1519197Snilay@cs.wisc.edu /** 1529197Snilay@cs.wisc.edu * if true, then this is not the last level TLB 1539197Snilay@cs.wisc.edu */ 1549197Snilay@cs.wisc.edu bool hasMemSidePort; 1559197Snilay@cs.wisc.edu 1563005Sstever@eecs.umich.edu /** 1573395Shsul@eecs.umich.edu * Print out accessDistance stats. One stat file 1583395Shsul@eecs.umich.edu * per TLB. 1598931Sandreas.hansson@arm.com */ 1609036Sandreas.hansson@arm.com bool accessDistance; 1613395Shsul@eecs.umich.edu 1628926Sandreas.hansson@arm.com std::vector<TlbEntry> tlb; 1638926Sandreas.hansson@arm.com 1648926Sandreas.hansson@arm.com /* 1658926Sandreas.hansson@arm.com * It's a per-set list. As long as we have not reached 1663395Shsul@eecs.umich.edu * the full capacity of the given set, grab an entry from 1679197Snilay@cs.wisc.edu * the freeList. 1689197Snilay@cs.wisc.edu */ 1699197Snilay@cs.wisc.edu std::vector<EntryList> freeList; 1708957Sjayneel@cs.wisc.edu 1718957Sjayneel@cs.wisc.edu /** 1728957Sjayneel@cs.wisc.edu * An entryList per set is the equivalent of an LRU stack; 1733005Sstever@eecs.umich.edu * it's used to guide replacement decisions. The head of the list 1744968Sacolyte@umich.edu * contains the MRU TLB entry of the given set. If the freeList 1759006Sandreas.hansson@arm.com * for this set is empty, the last element of the list 1764968Sacolyte@umich.edu * is evicted (i.e., dropped on the floor). 1778887Sgeoffrey.blake@arm.com */ 1788887Sgeoffrey.blake@arm.com std::vector<EntryList> entryList; 1798887Sgeoffrey.blake@arm.com 1808887Sgeoffrey.blake@arm.com Fault translateInt(const RequestPtr &req, ThreadContext *tc); 1818896Snilay@cs.wisc.edu 1828896Snilay@cs.wisc.edu Fault translate(const RequestPtr &req, ThreadContext *tc, 1838896Snilay@cs.wisc.edu Translation *translation, Mode mode, bool &delayedResponse, 1848896Snilay@cs.wisc.edu bool timing, int &latency); 1858887Sgeoffrey.blake@arm.com 1868887Sgeoffrey.blake@arm.com public: 1878887Sgeoffrey.blake@arm.com // latencies for a TLB hit, miss and page fault 1888896Snilay@cs.wisc.edu int hitLatency; 1898896Snilay@cs.wisc.edu int missLatency1; 1908896Snilay@cs.wisc.edu int missLatency2; 1918896Snilay@cs.wisc.edu 1928896Snilay@cs.wisc.edu // local_stats are as seen from the TLB 1939268Smalek.musleh@gmail.com // without taking into account coalescing 1949268Smalek.musleh@gmail.com Stats::Scalar localNumTLBAccesses; 1958896Snilay@cs.wisc.edu Stats::Scalar localNumTLBHits; 1968896Snilay@cs.wisc.edu Stats::Scalar localNumTLBMisses; 1978896Snilay@cs.wisc.edu Stats::Formula localTLBMissRate; 1988896Snilay@cs.wisc.edu 1998896Snilay@cs.wisc.edu // global_stats are as seen from the 2009222Shestness@cs.wisc.edu // CU's perspective taking into account 2019268Smalek.musleh@gmail.com // all coalesced requests. 2029268Smalek.musleh@gmail.com Stats::Scalar globalNumTLBAccesses; 2039268Smalek.musleh@gmail.com Stats::Scalar globalNumTLBHits; 2049222Shestness@cs.wisc.edu Stats::Scalar globalNumTLBMisses; 2059222Shestness@cs.wisc.edu Stats::Formula globalTLBMissRate; 2068887Sgeoffrey.blake@arm.com 2078887Sgeoffrey.blake@arm.com // from the CU perspective (global) 2088887Sgeoffrey.blake@arm.com Stats::Scalar accessCycles; 2098887Sgeoffrey.blake@arm.com // from the CU perspective (global) 2108887Sgeoffrey.blake@arm.com Stats::Scalar pageTableCycles; 2118801Sgblack@eecs.umich.edu Stats::Scalar numUniquePages; 2123481Shsul@eecs.umich.edu // from the perspective of this TLB 213 Stats::Scalar localCycles; 214 // from the perspective of this TLB 215 Stats::Formula localLatency; 216 // I take the avg. per page and then 217 // the avg. over all pages. 218 Stats::Scalar avgReuseDistance; 219 220 void regStats(); 221 void updatePageFootprint(Addr virt_page_addr); 222 void printAccessPattern(); 223 224 225 Fault translateAtomic(const RequestPtr &req, ThreadContext *tc, 226 Mode mode, int &latency); 227 228 void translateTiming(const RequestPtr &req, ThreadContext *tc, 229 Translation *translation, Mode mode, 230 int &latency); 231 232 Tick doMmuRegRead(ThreadContext *tc, Packet *pkt); 233 Tick doMmuRegWrite(ThreadContext *tc, Packet *pkt); 234 235 TlbEntry *insert(Addr vpn, TlbEntry &entry); 236 237 // Checkpointing 238 virtual void serialize(CheckpointOut& cp) const; 239 virtual void unserialize(CheckpointIn& cp); 240 void issueTranslation(); 241 enum tlbOutcome {TLB_HIT, TLB_MISS, PAGE_WALK, MISS_RETURN}; 242 bool tlbLookup(const RequestPtr &req, 243 ThreadContext *tc, bool update_stats); 244 245 void handleTranslationReturn(Addr addr, tlbOutcome outcome, 246 PacketPtr pkt); 247 248 void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome); 249 250 void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, 251 TlbEntry *tlb_entry, Mode mode); 252 253 void updatePhysAddresses(Addr virt_page_addr, TlbEntry *tlb_entry, 254 Addr phys_page_addr); 255 256 void issueTLBLookup(PacketPtr pkt); 257 258 // CpuSidePort is the TLB Port closer to the CPU/CU side 259 class CpuSidePort : public SlavePort 260 { 261 public: 262 CpuSidePort(const std::string &_name, GpuTLB * gpu_TLB, 263 PortID _index) 264 : SlavePort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { } 265 266 protected: 267 GpuTLB *tlb; 268 int index; 269 270 virtual bool recvTimingReq(PacketPtr pkt); 271 virtual Tick recvAtomic(PacketPtr pkt) { return 0; } 272 virtual void recvFunctional(PacketPtr pkt); 273 virtual void recvRangeChange() { } 274 virtual void recvReqRetry(); 275 virtual void recvRespRetry() { panic("recvRespRetry called"); } 276 virtual AddrRangeList getAddrRanges() const; 277 }; 278 279 /** 280 * MemSidePort is the TLB Port closer to the memory side 281 * If this is a last level TLB then this port will not be connected. 282 * 283 * Future action item: if we ever do real page walks, then this port 284 * should be connected to a RubyPort. 285 */ 286 class MemSidePort : public MasterPort 287 { 288 public: 289 MemSidePort(const std::string &_name, GpuTLB * gpu_TLB, 290 PortID _index) 291 : MasterPort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { } 292 293 std::deque<PacketPtr> retries; 294 295 protected: 296 GpuTLB *tlb; 297 int index; 298 299 virtual bool recvTimingResp(PacketPtr pkt); 300 virtual Tick recvAtomic(PacketPtr pkt) { return 0; } 301 virtual void recvFunctional(PacketPtr pkt) { } 302 virtual void recvRangeChange() { } 303 virtual void recvReqRetry(); 304 }; 305 306 // TLB ports on the cpu Side 307 std::vector<CpuSidePort*> cpuSidePort; 308 // TLB ports on the memory side 309 std::vector<MemSidePort*> memSidePort; 310 311 BaseMasterPort &getMasterPort(const std::string &if_name, 312 PortID idx=InvalidPortID); 313 314 BaseSlavePort &getSlavePort(const std::string &if_name, 315 PortID idx=InvalidPortID); 316 317 /** 318 * TLB TranslationState: this currently is a somewhat bastardization of 319 * the usage of SenderState, whereby the receiver of a packet is not 320 * usually supposed to need to look at the contents of the senderState, 321 * you're really only supposed to look at what you pushed on, pop it 322 * off, and send it back. 323 * 324 * However, since there is state that we want to pass to the TLBs using 325 * the send/recv Timing/Functional/etc. APIs, which don't allow for new 326 * arguments, we need a common TLB senderState to pass between TLBs, 327 * both "forwards" and "backwards." 328 * 329 * So, basically, the rule is that any packet received by a TLB port 330 * (cpuside OR memside) must be safely castable to a TranslationState. 331 */ 332 333 struct TranslationState : public Packet::SenderState 334 { 335 // TLB mode, read or write 336 Mode tlbMode; 337 // Thread context associated with this req 338 ThreadContext *tc; 339 340 /* 341 * TLB entry to be populated and passed back and filled in 342 * previous TLBs. Equivalent to the data cache concept of 343 * "data return." 344 */ 345 TlbEntry *tlbEntry; 346 // Is this a TLB prefetch request? 347 bool prefetch; 348 // When was the req for this translation issued 349 uint64_t issueTime; 350 // Remember where this came from 351 std::vector<SlavePort*>ports; 352 353 // keep track of #uncoalesced reqs per packet per TLB level; 354 // reqCnt per level >= reqCnt higher level 355 std::vector<int> reqCnt; 356 // TLB level this packet hit in; 0 if it hit in the page table 357 int hitLevel; 358 Packet::SenderState *saved; 359 360 TranslationState(Mode tlb_mode, ThreadContext *_tc, 361 bool _prefetch=false, 362 Packet::SenderState *_saved=nullptr) 363 : tlbMode(tlb_mode), tc(_tc), tlbEntry(nullptr), 364 prefetch(_prefetch), issueTime(0), 365 hitLevel(0),saved(_saved) { } 366 }; 367 368 // maximum number of permitted coalesced requests per cycle 369 int maxCoalescedReqs; 370 371 // Current number of outstandings coalesced requests. 372 // Should be <= maxCoalescedReqs 373 int outstandingReqs; 374 375 /** 376 * A TLBEvent is scheduled after the TLB lookup and helps us take the 377 * appropriate actions: 378 * (e.g., update TLB on a hit, 379 * send request to lower level TLB on a miss, 380 * or start a page walk if this was the last-level TLB). 381 */ 382 void translationReturn(Addr virtPageAddr, tlbOutcome outcome, 383 PacketPtr pkt); 384 385 class TLBEvent : public Event 386 { 387 private: 388 GpuTLB *tlb; 389 Addr virtPageAddr; 390 /** 391 * outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK 392 */ 393 tlbOutcome outcome; 394 PacketPtr pkt; 395 396 public: 397 TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, 398 PacketPtr _pkt); 399 400 void process(); 401 const char *description() const; 402 403 // updateOutcome updates the tlbOutcome of a TLBEvent 404 void updateOutcome(tlbOutcome _outcome); 405 Addr getTLBEventVaddr(); 406 }; 407 408 std::unordered_map<Addr, TLBEvent*> translationReturnEvent; 409 410 // this FIFO queue keeps track of the virt. page addresses 411 // that are pending cleanup 412 std::queue<Addr> cleanupQueue; 413 414 // the cleanupEvent is scheduled after a TLBEvent triggers in order to 415 // free memory and do the required clean-up 416 void cleanup(); 417 418 EventFunctionWrapper cleanupEvent; 419 420 /** 421 * This hash map will use the virtual page address as a key 422 * and will keep track of total number of accesses per page 423 */ 424 425 struct AccessInfo 426 { 427 unsigned int lastTimeAccessed; // last access to this page 428 unsigned int accessesPerPage; 429 // need to divide it by accessesPerPage at the end 430 unsigned int totalReuseDistance; 431 432 /** 433 * The field below will help us compute the access distance, 434 * that is the number of (coalesced) TLB accesses that 435 * happened in between each access to this page 436 * 437 * localTLBAccesses[x] is the value of localTLBNumAccesses 438 * when the page <Addr> was accessed for the <x>th time 439 */ 440 std::vector<unsigned int> localTLBAccesses; 441 unsigned int sumDistance; 442 unsigned int meanDistance; 443 }; 444 445 typedef std::unordered_map<Addr, AccessInfo> AccessPatternTable; 446 AccessPatternTable TLBFootprint; 447 448 // Called at the end of simulation to dump page access stats. 449 void exitCallback(); 450 451 EventFunctionWrapper exitEvent; 452 }; 453} 454 455#endif // __GPU_TLB_HH__ 456