gpu_tlb.hh revision 13449
18706Sandreas.hansson@arm.com/*
28706Sandreas.hansson@arm.com * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
38706Sandreas.hansson@arm.com * All rights reserved.
48706Sandreas.hansson@arm.com *
58706Sandreas.hansson@arm.com * For use for simulation and test purposes only
68706Sandreas.hansson@arm.com *
78706Sandreas.hansson@arm.com * Redistribution and use in source and binary forms, with or without
88706Sandreas.hansson@arm.com * modification, are permitted provided that the following conditions are met:
98706Sandreas.hansson@arm.com *
108706Sandreas.hansson@arm.com * 1. Redistributions of source code must retain the above copyright notice,
118706Sandreas.hansson@arm.com * this list of conditions and the following disclaimer.
128706Sandreas.hansson@arm.com *
135369Ssaidi@eecs.umich.edu * 2. Redistributions in binary form must reproduce the above copyright notice,
143005Sstever@eecs.umich.edu * this list of conditions and the following disclaimer in the documentation
153005Sstever@eecs.umich.edu * and/or other materials provided with the distribution.
163005Sstever@eecs.umich.edu *
173005Sstever@eecs.umich.edu * 3. Neither the name of the copyright holder nor the names of its
183005Sstever@eecs.umich.edu * contributors may be used to endorse or promote products derived from this
193005Sstever@eecs.umich.edu * software without specific prior written permission.
203005Sstever@eecs.umich.edu *
213005Sstever@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
223005Sstever@eecs.umich.edu * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
233005Sstever@eecs.umich.edu * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
243005Sstever@eecs.umich.edu * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
253005Sstever@eecs.umich.edu * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
263005Sstever@eecs.umich.edu * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
273005Sstever@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
283005Sstever@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
293005Sstever@eecs.umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
303005Sstever@eecs.umich.edu * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
313005Sstever@eecs.umich.edu * POSSIBILITY OF SUCH DAMAGE.
323005Sstever@eecs.umich.edu *
333005Sstever@eecs.umich.edu * Authors: Lisa Hsu
343005Sstever@eecs.umich.edu */
353005Sstever@eecs.umich.edu
363005Sstever@eecs.umich.edu#ifndef __GPU_TLB_HH__
373005Sstever@eecs.umich.edu#define __GPU_TLB_HH__
383005Sstever@eecs.umich.edu
393005Sstever@eecs.umich.edu#include <fstream>
403005Sstever@eecs.umich.edu#include <list>
412710SN/A#include <queue>
422710SN/A#include <string>
433005Sstever@eecs.umich.edu#include <vector>
442889SN/A
456654Snate@binkert.org#include "arch/generic/tlb.hh"
466654Snate@binkert.org#include "arch/x86/pagetable.hh"
476654Snate@binkert.org#include "arch/x86/pagetable_walker.hh"
482667SN/A#include "arch/x86/regs/segment.hh"
496654Snate@binkert.org#include "base/callback.hh"
506654Snate@binkert.org#include "base/logging.hh"
516654Snate@binkert.org#include "base/statistics.hh"
525457Ssaidi@eecs.umich.edu#include "gpu-compute/compute_unit.hh"
536654Snate@binkert.org#include "mem/mem_object.hh"
548169SLisa.Hsu@amd.com#include "mem/port.hh"
559100SBrad.Beckmann@amd.com#include "mem/request.hh"
568169SLisa.Hsu@amd.com#include "params/X86GPUTLB.hh"
578920Snilay@cs.wisc.edu#include "sim/sim_object.hh"
588169SLisa.Hsu@amd.com
593395Shsul@eecs.umich.educlass BaseTLB;
606981SLisa.Hsu@amd.comclass Packet;
613448Shsul@eecs.umich.educlass ThreadContext;
625369Ssaidi@eecs.umich.edu
633394Shsul@eecs.umich.edunamespace X86ISA
649197Snilay@cs.wisc.edu{
659197Snilay@cs.wisc.edu    class GpuTLB : public MemObject
669197Snilay@cs.wisc.edu    {
679197Snilay@cs.wisc.edu      protected:
689197Snilay@cs.wisc.edu        friend class Walker;
699197Snilay@cs.wisc.edu
709197Snilay@cs.wisc.edu        typedef std::list<TlbEntry*> EntryList;
719197Snilay@cs.wisc.edu
729197Snilay@cs.wisc.edu        uint32_t configAddress;
739197Snilay@cs.wisc.edu
749197Snilay@cs.wisc.edu        // TLB clock: will inherit clock from shader's clock period in terms
759197Snilay@cs.wisc.edu        // of nuber of ticks of curTime (aka global simulation clock)
769197Snilay@cs.wisc.edu        // The assignment of TLB clock from shader clock is done in the python
779197Snilay@cs.wisc.edu        // config files.
789197Snilay@cs.wisc.edu        int clock;
799197Snilay@cs.wisc.edu
809197Snilay@cs.wisc.edu      public:
819197Snilay@cs.wisc.edu        // clock related functions ; maps to-and-from Simulation ticks and
829197Snilay@cs.wisc.edu        // object clocks.
839197Snilay@cs.wisc.edu        Tick frequency() const { return SimClock::Frequency / clock; }
849197Snilay@cs.wisc.edu
859197Snilay@cs.wisc.edu        Tick
869197Snilay@cs.wisc.edu        ticks(int numCycles) const
879197Snilay@cs.wisc.edu        {
889197Snilay@cs.wisc.edu            return (Tick)clock * numCycles;
899217Snilay@cs.wisc.edu        }
909197Snilay@cs.wisc.edu
919197Snilay@cs.wisc.edu        Tick curCycle() const { return curTick() / clock; }
929197Snilay@cs.wisc.edu        Tick tickToCycles(Tick val) const { return val / clock;}
939197Snilay@cs.wisc.edu
949197Snilay@cs.wisc.edu        typedef X86GPUTLBParams Params;
959197Snilay@cs.wisc.edu        GpuTLB(const Params *p);
969197Snilay@cs.wisc.edu        ~GpuTLB();
979197Snilay@cs.wisc.edu
989197Snilay@cs.wisc.edu        typedef enum BaseTLB::Mode Mode;
999197Snilay@cs.wisc.edu
1009197Snilay@cs.wisc.edu        class Translation
1019197Snilay@cs.wisc.edu        {
1029197Snilay@cs.wisc.edu          public:
1039197Snilay@cs.wisc.edu            virtual ~Translation() { }
1049197Snilay@cs.wisc.edu
1059197Snilay@cs.wisc.edu            /**
1069197Snilay@cs.wisc.edu             * Signal that the translation has been delayed due to a hw page
1079197Snilay@cs.wisc.edu             * table walk.
1089197Snilay@cs.wisc.edu             */
1099197Snilay@cs.wisc.edu            virtual void markDelayed() = 0;
1102957SN/A
1118920Snilay@cs.wisc.edu            /**
1128920Snilay@cs.wisc.edu             * The memory for this object may be dynamically allocated, and it
1132957SN/A             * may be responsible for cleaning itslef up which will happen in
1148862Snilay@cs.wisc.edu             * this function. Once it's called the object is no longer valid.
1158862Snilay@cs.wisc.edu             */
1168467Snilay@cs.wisc.edu            virtual void finish(Fault fault, const RequestPtr &req,
1172957SN/A                                ThreadContext *tc, Mode mode) = 0;
1182957SN/A        };
1192957SN/A
1202957SN/A        void dumpAll();
1212957SN/A        TlbEntry *lookup(Addr va, bool update_lru=true);
1222957SN/A        void setConfigAddress(uint32_t addr);
1238167SLisa.Hsu@amd.com
1249197Snilay@cs.wisc.edu      protected:
1258167SLisa.Hsu@amd.com        EntryList::iterator lookupIt(Addr va, bool update_lru=true);
1265369Ssaidi@eecs.umich.edu        Walker *walker;
1278167SLisa.Hsu@amd.com
1288167SLisa.Hsu@amd.com      public:
1298167SLisa.Hsu@amd.com        Walker *getWalker();
1308167SLisa.Hsu@amd.com        void invalidateAll();
1318167SLisa.Hsu@amd.com        void invalidateNonGlobal();
1328167SLisa.Hsu@amd.com        void demapPage(Addr va, uint64_t asn);
1338167SLisa.Hsu@amd.com
1348168SLisa.Hsu@amd.com      protected:
1358168SLisa.Hsu@amd.com        int size;
1368168SLisa.Hsu@amd.com        int assoc;
1378168SLisa.Hsu@amd.com        int numSets;
1388167SLisa.Hsu@amd.com
1398167SLisa.Hsu@amd.com        /**
1408168SLisa.Hsu@amd.com         *  true if this is a fully-associative TLB
1415369Ssaidi@eecs.umich.edu         */
1428920Snilay@cs.wisc.edu        bool FA;
1439197Snilay@cs.wisc.edu        Addr setMask;
1448920Snilay@cs.wisc.edu
1458920Snilay@cs.wisc.edu        /**
1468920Snilay@cs.wisc.edu         * Allocation Policy: true if we always allocate on a hit, false
1475369Ssaidi@eecs.umich.edu         * otherwise. Default is true.
1485369Ssaidi@eecs.umich.edu         */
1498718Snilay@cs.wisc.edu        bool allocationPolicy;
1509129Sandreas.hansson@arm.com
1519197Snilay@cs.wisc.edu        /**
1529197Snilay@cs.wisc.edu         * if true, then this is not the last level TLB
1539197Snilay@cs.wisc.edu         */
1549197Snilay@cs.wisc.edu        bool hasMemSidePort;
1559197Snilay@cs.wisc.edu
1563005Sstever@eecs.umich.edu        /**
1573395Shsul@eecs.umich.edu         * Print out accessDistance stats. One stat file
1583395Shsul@eecs.umich.edu         * per TLB.
1598931Sandreas.hansson@arm.com         */
1609036Sandreas.hansson@arm.com        bool accessDistance;
1613395Shsul@eecs.umich.edu
1628926Sandreas.hansson@arm.com        std::vector<TlbEntry> tlb;
1638926Sandreas.hansson@arm.com
1648926Sandreas.hansson@arm.com        /*
1658926Sandreas.hansson@arm.com         * It's a per-set list. As long as we have not reached
1663395Shsul@eecs.umich.edu         * the full capacity of the given set, grab an entry from
1679197Snilay@cs.wisc.edu         * the freeList.
1689197Snilay@cs.wisc.edu         */
1699197Snilay@cs.wisc.edu        std::vector<EntryList> freeList;
1708957Sjayneel@cs.wisc.edu
1718957Sjayneel@cs.wisc.edu        /**
1728957Sjayneel@cs.wisc.edu         * An entryList per set is the equivalent of an LRU stack;
1733005Sstever@eecs.umich.edu         * it's used to guide replacement decisions. The head of the list
1744968Sacolyte@umich.edu         * contains the MRU TLB entry of the given set. If the freeList
1759006Sandreas.hansson@arm.com         * for this set is empty, the last element of the list
1764968Sacolyte@umich.edu         * is evicted (i.e., dropped on the floor).
1778887Sgeoffrey.blake@arm.com         */
1788887Sgeoffrey.blake@arm.com        std::vector<EntryList> entryList;
1798887Sgeoffrey.blake@arm.com
1808887Sgeoffrey.blake@arm.com        Fault translateInt(const RequestPtr &req, ThreadContext *tc);
1818896Snilay@cs.wisc.edu
1828896Snilay@cs.wisc.edu        Fault translate(const RequestPtr &req, ThreadContext *tc,
1838896Snilay@cs.wisc.edu                Translation *translation, Mode mode, bool &delayedResponse,
1848896Snilay@cs.wisc.edu                bool timing, int &latency);
1858887Sgeoffrey.blake@arm.com
1868887Sgeoffrey.blake@arm.com      public:
1878887Sgeoffrey.blake@arm.com        // latencies for a TLB hit, miss and page fault
1888896Snilay@cs.wisc.edu        int hitLatency;
1898896Snilay@cs.wisc.edu        int missLatency1;
1908896Snilay@cs.wisc.edu        int missLatency2;
1918896Snilay@cs.wisc.edu
1928896Snilay@cs.wisc.edu        // local_stats are as seen from the TLB
1939268Smalek.musleh@gmail.com        // without taking into account coalescing
1949268Smalek.musleh@gmail.com        Stats::Scalar localNumTLBAccesses;
1958896Snilay@cs.wisc.edu        Stats::Scalar localNumTLBHits;
1968896Snilay@cs.wisc.edu        Stats::Scalar localNumTLBMisses;
1978896Snilay@cs.wisc.edu        Stats::Formula localTLBMissRate;
1988896Snilay@cs.wisc.edu
1998896Snilay@cs.wisc.edu        // global_stats are as seen from the
2009222Shestness@cs.wisc.edu        // CU's perspective taking into account
2019268Smalek.musleh@gmail.com        // all coalesced requests.
2029268Smalek.musleh@gmail.com        Stats::Scalar globalNumTLBAccesses;
2039268Smalek.musleh@gmail.com        Stats::Scalar globalNumTLBHits;
2049222Shestness@cs.wisc.edu        Stats::Scalar globalNumTLBMisses;
2059222Shestness@cs.wisc.edu        Stats::Formula globalTLBMissRate;
2068887Sgeoffrey.blake@arm.com
2078887Sgeoffrey.blake@arm.com        // from the CU perspective (global)
2088887Sgeoffrey.blake@arm.com        Stats::Scalar accessCycles;
2098887Sgeoffrey.blake@arm.com        // from the CU perspective (global)
2108887Sgeoffrey.blake@arm.com        Stats::Scalar pageTableCycles;
2118801Sgblack@eecs.umich.edu        Stats::Scalar numUniquePages;
2123481Shsul@eecs.umich.edu        // from the perspective of this TLB
213        Stats::Scalar localCycles;
214        // from the perspective of this TLB
215        Stats::Formula localLatency;
216        // I take the avg. per page and then
217        // the avg. over all pages.
218        Stats::Scalar avgReuseDistance;
219
220        void regStats();
221        void updatePageFootprint(Addr virt_page_addr);
222        void printAccessPattern();
223
224
225        Fault translateAtomic(const RequestPtr &req, ThreadContext *tc,
226                              Mode mode, int &latency);
227
228        void translateTiming(const RequestPtr &req, ThreadContext *tc,
229                             Translation *translation, Mode mode,
230                             int &latency);
231
232        Tick doMmuRegRead(ThreadContext *tc, Packet *pkt);
233        Tick doMmuRegWrite(ThreadContext *tc, Packet *pkt);
234
235        TlbEntry *insert(Addr vpn, TlbEntry &entry);
236
237        // Checkpointing
238        virtual void serialize(CheckpointOut& cp) const;
239        virtual void unserialize(CheckpointIn& cp);
240        void issueTranslation();
241        enum tlbOutcome {TLB_HIT, TLB_MISS, PAGE_WALK, MISS_RETURN};
242        bool tlbLookup(const RequestPtr &req,
243                       ThreadContext *tc, bool update_stats);
244
245        void handleTranslationReturn(Addr addr, tlbOutcome outcome,
246                                     PacketPtr pkt);
247
248        void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome);
249
250        void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
251                                    TlbEntry *tlb_entry, Mode mode);
252
253        void updatePhysAddresses(Addr virt_page_addr, TlbEntry *tlb_entry,
254                                 Addr phys_page_addr);
255
256        void issueTLBLookup(PacketPtr pkt);
257
258        // CpuSidePort is the TLB Port closer to the CPU/CU side
259        class CpuSidePort : public SlavePort
260        {
261          public:
262            CpuSidePort(const std::string &_name, GpuTLB * gpu_TLB,
263                        PortID _index)
264                : SlavePort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
265
266          protected:
267            GpuTLB *tlb;
268            int index;
269
270            virtual bool recvTimingReq(PacketPtr pkt);
271            virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
272            virtual void recvFunctional(PacketPtr pkt);
273            virtual void recvRangeChange() { }
274            virtual void recvReqRetry();
275            virtual void recvRespRetry() { panic("recvRespRetry called"); }
276            virtual AddrRangeList getAddrRanges() const;
277        };
278
279        /**
280         * MemSidePort is the TLB Port closer to the memory side
281         * If this is a last level TLB then this port will not be connected.
282         *
283         * Future action item: if we ever do real page walks, then this port
284         * should be connected to a RubyPort.
285         */
286        class MemSidePort : public MasterPort
287        {
288          public:
289            MemSidePort(const std::string &_name, GpuTLB * gpu_TLB,
290                        PortID _index)
291                : MasterPort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
292
293            std::deque<PacketPtr> retries;
294
295          protected:
296            GpuTLB *tlb;
297            int index;
298
299            virtual bool recvTimingResp(PacketPtr pkt);
300            virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
301            virtual void recvFunctional(PacketPtr pkt) { }
302            virtual void recvRangeChange() { }
303            virtual void recvReqRetry();
304        };
305
306        // TLB ports on the cpu Side
307        std::vector<CpuSidePort*> cpuSidePort;
308        // TLB ports on the memory side
309        std::vector<MemSidePort*> memSidePort;
310
311        BaseMasterPort &getMasterPort(const std::string &if_name,
312                                      PortID idx=InvalidPortID);
313
314        BaseSlavePort &getSlavePort(const std::string &if_name,
315                                    PortID idx=InvalidPortID);
316
317        /**
318         * TLB TranslationState: this currently is a somewhat bastardization of
319         * the usage of SenderState, whereby the receiver of a packet is not
320         * usually supposed to need to look at the contents of the senderState,
321         * you're really only supposed to look at what you pushed on, pop it
322         * off, and send it back.
323         *
324         * However, since there is state that we want to pass to the TLBs using
325         * the send/recv Timing/Functional/etc. APIs, which don't allow for new
326         * arguments, we need a common TLB senderState to pass between TLBs,
327         * both "forwards" and "backwards."
328         *
329         * So, basically, the rule is that any packet received by a TLB port
330         * (cpuside OR memside) must be safely castable to a TranslationState.
331         */
332
333        struct TranslationState : public Packet::SenderState
334        {
335            // TLB mode, read or write
336            Mode tlbMode;
337            // Thread context associated with this req
338            ThreadContext *tc;
339
340            /*
341            * TLB entry to be populated and passed back and filled in
342            * previous TLBs.  Equivalent to the data cache concept of
343            * "data return."
344            */
345            TlbEntry *tlbEntry;
346            // Is this a TLB prefetch request?
347            bool prefetch;
348            // When was the req for this translation issued
349            uint64_t issueTime;
350            // Remember where this came from
351            std::vector<SlavePort*>ports;
352
353            // keep track of #uncoalesced reqs per packet per TLB level;
354            // reqCnt per level >= reqCnt higher level
355            std::vector<int> reqCnt;
356            // TLB level this packet hit in; 0 if it hit in the page table
357            int hitLevel;
358            Packet::SenderState *saved;
359
360            TranslationState(Mode tlb_mode, ThreadContext *_tc,
361                             bool _prefetch=false,
362                             Packet::SenderState *_saved=nullptr)
363                : tlbMode(tlb_mode), tc(_tc), tlbEntry(nullptr),
364                  prefetch(_prefetch), issueTime(0),
365                  hitLevel(0),saved(_saved) { }
366        };
367
368        // maximum number of permitted coalesced requests per cycle
369        int maxCoalescedReqs;
370
371        // Current number of outstandings coalesced requests.
372        // Should be <= maxCoalescedReqs
373        int outstandingReqs;
374
375        /**
376         * A TLBEvent is scheduled after the TLB lookup and helps us take the
377         * appropriate actions:
378         *  (e.g., update TLB on a hit,
379         *  send request to lower level TLB on a miss,
380         *  or start a page walk if this was the last-level TLB).
381         */
382        void translationReturn(Addr virtPageAddr, tlbOutcome outcome,
383                               PacketPtr pkt);
384
385        class TLBEvent : public Event
386        {
387            private:
388                GpuTLB *tlb;
389                Addr virtPageAddr;
390                /**
391                 * outcome can be TLB_HIT, TLB_MISS, or PAGE_WALK
392                 */
393                tlbOutcome outcome;
394                PacketPtr pkt;
395
396            public:
397                TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome,
398                        PacketPtr _pkt);
399
400                void process();
401                const char *description() const;
402
403                // updateOutcome updates the tlbOutcome of a TLBEvent
404                void updateOutcome(tlbOutcome _outcome);
405                Addr getTLBEventVaddr();
406        };
407
408        std::unordered_map<Addr, TLBEvent*> translationReturnEvent;
409
410        // this FIFO queue keeps track of the virt. page addresses
411        // that are pending cleanup
412        std::queue<Addr> cleanupQueue;
413
414        // the cleanupEvent is scheduled after a TLBEvent triggers in order to
415        // free memory and do the required clean-up
416        void cleanup();
417
418        EventFunctionWrapper cleanupEvent;
419
420        /**
421         * This hash map will use the virtual page address as a key
422         * and will keep track of total number of accesses per page
423         */
424
425        struct AccessInfo
426        {
427            unsigned int lastTimeAccessed; // last access to this page
428            unsigned int accessesPerPage;
429            // need to divide it by accessesPerPage at the end
430            unsigned int totalReuseDistance;
431
432            /**
433             * The field below will help us compute the access distance,
434             * that is the number of (coalesced) TLB accesses that
435             * happened in between each access to this page
436             *
437             * localTLBAccesses[x] is the value of localTLBNumAccesses
438             * when the page <Addr> was accessed for the <x>th time
439             */
440            std::vector<unsigned int> localTLBAccesses;
441            unsigned int sumDistance;
442            unsigned int meanDistance;
443        };
444
445        typedef std::unordered_map<Addr, AccessInfo> AccessPatternTable;
446        AccessPatternTable TLBFootprint;
447
448        // Called at the end of simulation to dump page access stats.
449        void exitCallback();
450
451        EventFunctionWrapper exitEvent;
452    };
453}
454
455#endif // __GPU_TLB_HH__
456