gpu_tlb.cc revision 13892
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/logging.hh"
49#include "base/output.hh"
50#include "base/trace.hh"
51#include "cpu/base.hh"
52#include "cpu/thread_context.hh"
53#include "debug/GPUPrefetch.hh"
54#include "debug/GPUTLB.hh"
55#include "mem/packet_access.hh"
56#include "mem/page_table.hh"
57#include "mem/request.hh"
58#include "sim/process.hh"
59
60namespace X86ISA
61{
62
63    GpuTLB::GpuTLB(const Params *p)
64        : ClockedObject(p), configAddress(0), size(p->size),
65          cleanupEvent([this]{ cleanup(); }, name(), false,
66                       Event::Maximum_Pri),
67          exitEvent([this]{ exitCallback(); }, name())
68    {
69        assoc = p->assoc;
70        assert(assoc <= size);
71        numSets = size/assoc;
72        allocationPolicy = p->allocationPolicy;
73        hasMemSidePort = false;
74        accessDistance = p->accessDistance;
75        clock = p->clk_domain->clockPeriod();
76
77        tlb.assign(size, TlbEntry());
78
79        freeList.resize(numSets);
80        entryList.resize(numSets);
81
82        for (int set = 0; set < numSets; ++set) {
83            for (int way = 0; way < assoc; ++way) {
84                int x = set * assoc + way;
85                freeList[set].push_back(&tlb.at(x));
86            }
87        }
88
89        FA = (size == assoc);
90
91        /**
92         * @warning: the set-associative version assumes you have a
93         * fixed page size of 4KB.
94         * If the page size is greather than 4KB (as defined in the
95         * TheISA::PageBytes), then there are various issues w/ the current
96         * implementation (you'd have the same 8KB page being replicated in
97         * different sets etc)
98         */
99        setMask = numSets - 1;
100
101    #if 0
102        // GpuTLB doesn't yet support full system
103        walker = p->walker;
104        walker->setTLB(this);
105    #endif
106
107        maxCoalescedReqs = p->maxOutstandingReqs;
108
109        // Do not allow maxCoalescedReqs to be more than the TLB associativity
110        if (maxCoalescedReqs > assoc) {
111            maxCoalescedReqs = assoc;
112            cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
113        }
114
115        outstandingReqs = 0;
116        hitLatency = p->hitLatency;
117        missLatency1 = p->missLatency1;
118        missLatency2 = p->missLatency2;
119
120        // create the slave ports based on the number of connected ports
121        for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
122            cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
123                                  name(), i), this, i));
124        }
125
126        // create the master ports based on the number of connected ports
127        for (size_t i = 0; i < p->port_master_connection_count; ++i) {
128            memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
129                                  name(), i), this, i));
130        }
131    }
132
133    // fixme: this is never called?
134    GpuTLB::~GpuTLB()
135    {
136        // make sure all the hash-maps are empty
137        assert(translationReturnEvent.empty());
138    }
139
140    Port &
141    GpuTLB::getPort(const std::string &if_name, PortID idx)
142    {
143        if (if_name == "slave") {
144            if (idx >= static_cast<PortID>(cpuSidePort.size())) {
145                panic("TLBCoalescer::getPort: unknown index %d\n", idx);
146            }
147
148            return *cpuSidePort[idx];
149        } else if (if_name == "master") {
150            if (idx >= static_cast<PortID>(memSidePort.size())) {
151                panic("TLBCoalescer::getPort: unknown index %d\n", idx);
152            }
153
154            hasMemSidePort = true;
155
156            return *memSidePort[idx];
157        } else {
158            panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
159        }
160    }
161
162    TlbEntry*
163    GpuTLB::insert(Addr vpn, TlbEntry &entry)
164    {
165        TlbEntry *newEntry = nullptr;
166
167        /**
168         * vpn holds the virtual page address
169         * The least significant bits are simply masked
170         */
171        int set = (vpn >> TheISA::PageShift) & setMask;
172
173        if (!freeList[set].empty()) {
174            newEntry = freeList[set].front();
175            freeList[set].pop_front();
176        } else {
177            newEntry = entryList[set].back();
178            entryList[set].pop_back();
179        }
180
181        *newEntry = entry;
182        newEntry->vaddr = vpn;
183        entryList[set].push_front(newEntry);
184
185        return newEntry;
186    }
187
188    GpuTLB::EntryList::iterator
189    GpuTLB::lookupIt(Addr va, bool update_lru)
190    {
191        int set = (va >> TheISA::PageShift) & setMask;
192
193        if (FA) {
194            assert(!set);
195        }
196
197        auto entry = entryList[set].begin();
198        for (; entry != entryList[set].end(); ++entry) {
199            int page_size = (*entry)->size();
200
201            if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
202                DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
203                        "with size %#x.\n", va, (*entry)->vaddr, page_size);
204
205                if (update_lru) {
206                    entryList[set].push_front(*entry);
207                    entryList[set].erase(entry);
208                    entry = entryList[set].begin();
209                }
210
211                break;
212            }
213        }
214
215        return entry;
216    }
217
218    TlbEntry*
219    GpuTLB::lookup(Addr va, bool update_lru)
220    {
221        int set = (va >> TheISA::PageShift) & setMask;
222
223        auto entry = lookupIt(va, update_lru);
224
225        if (entry == entryList[set].end())
226            return nullptr;
227        else
228            return *entry;
229    }
230
231    void
232    GpuTLB::invalidateAll()
233    {
234        DPRINTF(GPUTLB, "Invalidating all entries.\n");
235
236        for (int i = 0; i < numSets; ++i) {
237            while (!entryList[i].empty()) {
238                TlbEntry *entry = entryList[i].front();
239                entryList[i].pop_front();
240                freeList[i].push_back(entry);
241            }
242        }
243    }
244
245    void
246    GpuTLB::setConfigAddress(uint32_t addr)
247    {
248        configAddress = addr;
249    }
250
251    void
252    GpuTLB::invalidateNonGlobal()
253    {
254        DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
255
256        for (int i = 0; i < numSets; ++i) {
257            for (auto entryIt = entryList[i].begin();
258                 entryIt != entryList[i].end();) {
259                if (!(*entryIt)->global) {
260                    freeList[i].push_back(*entryIt);
261                    entryList[i].erase(entryIt++);
262                } else {
263                    ++entryIt;
264                }
265            }
266        }
267    }
268
269    void
270    GpuTLB::demapPage(Addr va, uint64_t asn)
271    {
272
273        int set = (va >> TheISA::PageShift) & setMask;
274        auto entry = lookupIt(va, false);
275
276        if (entry != entryList[set].end()) {
277            freeList[set].push_back(*entry);
278            entryList[set].erase(entry);
279        }
280    }
281
282    Fault
283    GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
284    {
285        DPRINTF(GPUTLB, "Addresses references internal memory.\n");
286        Addr vaddr = req->getVaddr();
287        Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
288
289        if (prefix == IntAddrPrefixCPUID) {
290            panic("CPUID memory space not yet implemented!\n");
291        } else if (prefix == IntAddrPrefixMSR) {
292            vaddr = vaddr >> 3;
293            req->setFlags(Request::MMAPPED_IPR);
294            Addr regNum = 0;
295
296            switch (vaddr & ~IntAddrPrefixMask) {
297              case 0x10:
298                regNum = MISCREG_TSC;
299                break;
300              case 0x1B:
301                regNum = MISCREG_APIC_BASE;
302                break;
303              case 0xFE:
304                regNum = MISCREG_MTRRCAP;
305                break;
306              case 0x174:
307                regNum = MISCREG_SYSENTER_CS;
308                break;
309              case 0x175:
310                regNum = MISCREG_SYSENTER_ESP;
311                break;
312              case 0x176:
313                regNum = MISCREG_SYSENTER_EIP;
314                break;
315              case 0x179:
316                regNum = MISCREG_MCG_CAP;
317                break;
318              case 0x17A:
319                regNum = MISCREG_MCG_STATUS;
320                break;
321              case 0x17B:
322                regNum = MISCREG_MCG_CTL;
323                break;
324              case 0x1D9:
325                regNum = MISCREG_DEBUG_CTL_MSR;
326                break;
327              case 0x1DB:
328                regNum = MISCREG_LAST_BRANCH_FROM_IP;
329                break;
330              case 0x1DC:
331                regNum = MISCREG_LAST_BRANCH_TO_IP;
332                break;
333              case 0x1DD:
334                regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
335                break;
336              case 0x1DE:
337                regNum = MISCREG_LAST_EXCEPTION_TO_IP;
338                break;
339              case 0x200:
340                regNum = MISCREG_MTRR_PHYS_BASE_0;
341                break;
342              case 0x201:
343                regNum = MISCREG_MTRR_PHYS_MASK_0;
344                break;
345              case 0x202:
346                regNum = MISCREG_MTRR_PHYS_BASE_1;
347                break;
348              case 0x203:
349                regNum = MISCREG_MTRR_PHYS_MASK_1;
350                break;
351              case 0x204:
352                regNum = MISCREG_MTRR_PHYS_BASE_2;
353                break;
354              case 0x205:
355                regNum = MISCREG_MTRR_PHYS_MASK_2;
356                break;
357              case 0x206:
358                regNum = MISCREG_MTRR_PHYS_BASE_3;
359                break;
360              case 0x207:
361                regNum = MISCREG_MTRR_PHYS_MASK_3;
362                break;
363              case 0x208:
364                regNum = MISCREG_MTRR_PHYS_BASE_4;
365                break;
366              case 0x209:
367                regNum = MISCREG_MTRR_PHYS_MASK_4;
368                break;
369              case 0x20A:
370                regNum = MISCREG_MTRR_PHYS_BASE_5;
371                break;
372              case 0x20B:
373                regNum = MISCREG_MTRR_PHYS_MASK_5;
374                break;
375              case 0x20C:
376                regNum = MISCREG_MTRR_PHYS_BASE_6;
377                break;
378              case 0x20D:
379                regNum = MISCREG_MTRR_PHYS_MASK_6;
380                break;
381              case 0x20E:
382                regNum = MISCREG_MTRR_PHYS_BASE_7;
383                break;
384              case 0x20F:
385                regNum = MISCREG_MTRR_PHYS_MASK_7;
386                break;
387              case 0x250:
388                regNum = MISCREG_MTRR_FIX_64K_00000;
389                break;
390              case 0x258:
391                regNum = MISCREG_MTRR_FIX_16K_80000;
392                break;
393              case 0x259:
394                regNum = MISCREG_MTRR_FIX_16K_A0000;
395                break;
396              case 0x268:
397                regNum = MISCREG_MTRR_FIX_4K_C0000;
398                break;
399              case 0x269:
400                regNum = MISCREG_MTRR_FIX_4K_C8000;
401                break;
402              case 0x26A:
403                regNum = MISCREG_MTRR_FIX_4K_D0000;
404                break;
405              case 0x26B:
406                regNum = MISCREG_MTRR_FIX_4K_D8000;
407                break;
408              case 0x26C:
409                regNum = MISCREG_MTRR_FIX_4K_E0000;
410                break;
411              case 0x26D:
412                regNum = MISCREG_MTRR_FIX_4K_E8000;
413                break;
414              case 0x26E:
415                regNum = MISCREG_MTRR_FIX_4K_F0000;
416                break;
417              case 0x26F:
418                regNum = MISCREG_MTRR_FIX_4K_F8000;
419                break;
420              case 0x277:
421                regNum = MISCREG_PAT;
422                break;
423              case 0x2FF:
424                regNum = MISCREG_DEF_TYPE;
425                break;
426              case 0x400:
427                regNum = MISCREG_MC0_CTL;
428                break;
429              case 0x404:
430                regNum = MISCREG_MC1_CTL;
431                break;
432              case 0x408:
433                regNum = MISCREG_MC2_CTL;
434                break;
435              case 0x40C:
436                regNum = MISCREG_MC3_CTL;
437                break;
438              case 0x410:
439                regNum = MISCREG_MC4_CTL;
440                break;
441              case 0x414:
442                regNum = MISCREG_MC5_CTL;
443                break;
444              case 0x418:
445                regNum = MISCREG_MC6_CTL;
446                break;
447              case 0x41C:
448                regNum = MISCREG_MC7_CTL;
449                break;
450              case 0x401:
451                regNum = MISCREG_MC0_STATUS;
452                break;
453              case 0x405:
454                regNum = MISCREG_MC1_STATUS;
455                break;
456              case 0x409:
457                regNum = MISCREG_MC2_STATUS;
458                break;
459              case 0x40D:
460                regNum = MISCREG_MC3_STATUS;
461                break;
462              case 0x411:
463                regNum = MISCREG_MC4_STATUS;
464                break;
465              case 0x415:
466                regNum = MISCREG_MC5_STATUS;
467                break;
468              case 0x419:
469                regNum = MISCREG_MC6_STATUS;
470                break;
471              case 0x41D:
472                regNum = MISCREG_MC7_STATUS;
473                break;
474              case 0x402:
475                regNum = MISCREG_MC0_ADDR;
476                break;
477              case 0x406:
478                regNum = MISCREG_MC1_ADDR;
479                break;
480              case 0x40A:
481                regNum = MISCREG_MC2_ADDR;
482                break;
483              case 0x40E:
484                regNum = MISCREG_MC3_ADDR;
485                break;
486              case 0x412:
487                regNum = MISCREG_MC4_ADDR;
488                break;
489              case 0x416:
490                regNum = MISCREG_MC5_ADDR;
491                break;
492              case 0x41A:
493                regNum = MISCREG_MC6_ADDR;
494                break;
495              case 0x41E:
496                regNum = MISCREG_MC7_ADDR;
497                break;
498              case 0x403:
499                regNum = MISCREG_MC0_MISC;
500                break;
501              case 0x407:
502                regNum = MISCREG_MC1_MISC;
503                break;
504              case 0x40B:
505                regNum = MISCREG_MC2_MISC;
506                break;
507              case 0x40F:
508                regNum = MISCREG_MC3_MISC;
509                break;
510              case 0x413:
511                regNum = MISCREG_MC4_MISC;
512                break;
513              case 0x417:
514                regNum = MISCREG_MC5_MISC;
515                break;
516              case 0x41B:
517                regNum = MISCREG_MC6_MISC;
518                break;
519              case 0x41F:
520                regNum = MISCREG_MC7_MISC;
521                break;
522              case 0xC0000080:
523                regNum = MISCREG_EFER;
524                break;
525              case 0xC0000081:
526                regNum = MISCREG_STAR;
527                break;
528              case 0xC0000082:
529                regNum = MISCREG_LSTAR;
530                break;
531              case 0xC0000083:
532                regNum = MISCREG_CSTAR;
533                break;
534              case 0xC0000084:
535                regNum = MISCREG_SF_MASK;
536                break;
537              case 0xC0000100:
538                regNum = MISCREG_FS_BASE;
539                break;
540              case 0xC0000101:
541                regNum = MISCREG_GS_BASE;
542                break;
543              case 0xC0000102:
544                regNum = MISCREG_KERNEL_GS_BASE;
545                break;
546              case 0xC0000103:
547                regNum = MISCREG_TSC_AUX;
548                break;
549              case 0xC0010000:
550                regNum = MISCREG_PERF_EVT_SEL0;
551                break;
552              case 0xC0010001:
553                regNum = MISCREG_PERF_EVT_SEL1;
554                break;
555              case 0xC0010002:
556                regNum = MISCREG_PERF_EVT_SEL2;
557                break;
558              case 0xC0010003:
559                regNum = MISCREG_PERF_EVT_SEL3;
560                break;
561              case 0xC0010004:
562                regNum = MISCREG_PERF_EVT_CTR0;
563                break;
564              case 0xC0010005:
565                regNum = MISCREG_PERF_EVT_CTR1;
566                break;
567              case 0xC0010006:
568                regNum = MISCREG_PERF_EVT_CTR2;
569                break;
570              case 0xC0010007:
571                regNum = MISCREG_PERF_EVT_CTR3;
572                break;
573              case 0xC0010010:
574                regNum = MISCREG_SYSCFG;
575                break;
576              case 0xC0010016:
577                regNum = MISCREG_IORR_BASE0;
578                break;
579              case 0xC0010017:
580                regNum = MISCREG_IORR_BASE1;
581                break;
582              case 0xC0010018:
583                regNum = MISCREG_IORR_MASK0;
584                break;
585              case 0xC0010019:
586                regNum = MISCREG_IORR_MASK1;
587                break;
588              case 0xC001001A:
589                regNum = MISCREG_TOP_MEM;
590                break;
591              case 0xC001001D:
592                regNum = MISCREG_TOP_MEM2;
593                break;
594              case 0xC0010114:
595                regNum = MISCREG_VM_CR;
596                break;
597              case 0xC0010115:
598                regNum = MISCREG_IGNNE;
599                break;
600              case 0xC0010116:
601                regNum = MISCREG_SMM_CTL;
602                break;
603              case 0xC0010117:
604                regNum = MISCREG_VM_HSAVE_PA;
605                break;
606              default:
607                return std::make_shared<GeneralProtection>(0);
608            }
609            //The index is multiplied by the size of a MiscReg so that
610            //any memory dependence calculations will not see these as
611            //overlapping.
612            req->setPaddr(regNum * sizeof(RegVal));
613            return NoFault;
614        } else if (prefix == IntAddrPrefixIO) {
615            // TODO If CPL > IOPL or in virtual mode, check the I/O permission
616            // bitmap in the TSS.
617
618            Addr IOPort = vaddr & ~IntAddrPrefixMask;
619            // Make sure the address fits in the expected 16 bit IO address
620            // space.
621            assert(!(IOPort & ~0xFFFF));
622
623            if (IOPort == 0xCF8 && req->getSize() == 4) {
624                req->setFlags(Request::MMAPPED_IPR);
625                req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(RegVal));
626            } else if ((IOPort & ~mask(2)) == 0xCFC) {
627                req->setFlags(Request::UNCACHEABLE);
628
629                Addr configAddress =
630                    tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
631
632                if (bits(configAddress, 31, 31)) {
633                    req->setPaddr(PhysAddrPrefixPciConfig |
634                                  mbits(configAddress, 30, 2) |
635                                  (IOPort & mask(2)));
636                } else {
637                    req->setPaddr(PhysAddrPrefixIO | IOPort);
638                }
639            } else {
640                req->setFlags(Request::UNCACHEABLE);
641                req->setPaddr(PhysAddrPrefixIO | IOPort);
642            }
643            return NoFault;
644        } else {
645            panic("Access to unrecognized internal address space %#x.\n",
646                  prefix);
647        }
648    }
649
650    /**
651     * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
652     * and false on a TLB miss.
653     * Many of the checks about different modes have been converted to
654     * assertions, since these parts of the code are not really used.
655     * On a hit it will update the LRU stack.
656     */
657    bool
658    GpuTLB::tlbLookup(const RequestPtr &req,
659                      ThreadContext *tc, bool update_stats)
660    {
661        bool tlb_hit = false;
662    #ifndef NDEBUG
663        uint32_t flags = req->getFlags();
664        int seg = flags & SegmentFlagMask;
665    #endif
666
667        assert(seg != SEGMENT_REG_MS);
668        Addr vaddr = req->getVaddr();
669        DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
670        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
671
672        if (m5Reg.prot) {
673            DPRINTF(GPUTLB, "In protected mode.\n");
674            // make sure we are in 64-bit mode
675            assert(m5Reg.mode == LongMode);
676
677            // If paging is enabled, do the translation.
678            if (m5Reg.paging) {
679                DPRINTF(GPUTLB, "Paging enabled.\n");
680                //update LRU stack on a hit
681                TlbEntry *entry = lookup(vaddr, true);
682
683                if (entry)
684                    tlb_hit = true;
685
686                if (!update_stats) {
687                    // functional tlb access for memory initialization
688                    // i.e., memory seeding or instr. seeding -> don't update
689                    // TLB and stats
690                    return tlb_hit;
691                }
692
693                localNumTLBAccesses++;
694
695                if (!entry) {
696                    localNumTLBMisses++;
697                } else {
698                    localNumTLBHits++;
699                }
700            }
701        }
702
703        return tlb_hit;
704    }
705
706    Fault
707    GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
708                      Translation *translation, Mode mode,
709                      bool &delayedResponse, bool timing, int &latency)
710    {
711        uint32_t flags = req->getFlags();
712        int seg = flags & SegmentFlagMask;
713        bool storeCheck = flags & (StoreCheck << FlagShift);
714
715        // If this is true, we're dealing with a request
716        // to a non-memory address space.
717        if (seg == SEGMENT_REG_MS) {
718            return translateInt(req, tc);
719        }
720
721        delayedResponse = false;
722        Addr vaddr = req->getVaddr();
723        DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
724
725        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
726
727        // If protected mode has been enabled...
728        if (m5Reg.prot) {
729            DPRINTF(GPUTLB, "In protected mode.\n");
730            // If we're not in 64-bit mode, do protection/limit checks
731            if (m5Reg.mode != LongMode) {
732                DPRINTF(GPUTLB, "Not in long mode. Checking segment "
733                        "protection.\n");
734
735                // Check for a null segment selector.
736                if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
737                    seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
738                    && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
739                    return std::make_shared<GeneralProtection>(0);
740                }
741
742                bool expandDown = false;
743                SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
744
745                if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
746                    if (!attr.writable && (mode == BaseTLB::Write ||
747                        storeCheck))
748                        return std::make_shared<GeneralProtection>(0);
749
750                    if (!attr.readable && mode == BaseTLB::Read)
751                        return std::make_shared<GeneralProtection>(0);
752
753                    expandDown = attr.expandDown;
754
755                }
756
757                Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
758                Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
759                // This assumes we're not in 64 bit mode. If we were, the
760                // default address size is 64 bits, overridable to 32.
761                int size = 32;
762                bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
763                SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
764
765                if ((csAttr.defaultSize && sizeOverride) ||
766                    (!csAttr.defaultSize && !sizeOverride)) {
767                    size = 16;
768                }
769
770                Addr offset = bits(vaddr - base, size - 1, 0);
771                Addr endOffset = offset + req->getSize() - 1;
772
773                if (expandDown) {
774                    DPRINTF(GPUTLB, "Checking an expand down segment.\n");
775                    warn_once("Expand down segments are untested.\n");
776
777                    if (offset <= limit || endOffset <= limit)
778                        return std::make_shared<GeneralProtection>(0);
779                } else {
780                    if (offset > limit || endOffset > limit)
781                        return std::make_shared<GeneralProtection>(0);
782                }
783            }
784
785            // If paging is enabled, do the translation.
786            if (m5Reg.paging) {
787                DPRINTF(GPUTLB, "Paging enabled.\n");
788                // The vaddr already has the segment base applied.
789                TlbEntry *entry = lookup(vaddr);
790                localNumTLBAccesses++;
791
792                if (!entry) {
793                    localNumTLBMisses++;
794                    if (timing) {
795                        latency = missLatency1;
796                    }
797
798                    if (FullSystem) {
799                        fatal("GpuTLB doesn't support full-system mode\n");
800                    } else {
801                        DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
802                                "at pc %#x.\n", vaddr, tc->instAddr());
803
804                        Process *p = tc->getProcessPtr();
805                        const EmulationPageTable::Entry *pte =
806                            p->pTable->lookup(vaddr);
807
808                        if (!pte && mode != BaseTLB::Execute) {
809                            // penalize a "page fault" more
810                            if (timing)
811                                latency += missLatency2;
812
813                            if (p->fixupStackFault(vaddr))
814                                pte = p->pTable->lookup(vaddr);
815                        }
816
817                        if (!pte) {
818                            return std::make_shared<PageFault>(vaddr, true,
819                                                               mode, true,
820                                                               false);
821                        } else {
822                            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
823
824                            DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
825                                    alignedVaddr, pte->paddr);
826
827                            TlbEntry gpuEntry(p->pid(), alignedVaddr,
828                                              pte->paddr, false, false);
829                            entry = insert(alignedVaddr, gpuEntry);
830                        }
831
832                        DPRINTF(GPUTLB, "Miss was serviced.\n");
833                    }
834                } else {
835                    localNumTLBHits++;
836
837                    if (timing) {
838                        latency = hitLatency;
839                    }
840                }
841
842                // Do paging protection checks.
843                bool inUser = (m5Reg.cpl == 3 &&
844                               !(flags & (CPL0FlagBit << FlagShift)));
845
846                CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
847                bool badWrite = (!entry->writable && (inUser || cr0.wp));
848
849                if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
850                     badWrite)) {
851                    // The page must have been present to get into the TLB in
852                    // the first place. We'll assume the reserved bits are
853                    // fine even though we're not checking them.
854                    return std::make_shared<PageFault>(vaddr, true, mode,
855                                                       inUser, false);
856                }
857
858                if (storeCheck && badWrite) {
859                    // This would fault if this were a write, so return a page
860                    // fault that reflects that happening.
861                    return std::make_shared<PageFault>(vaddr, true,
862                                                       BaseTLB::Write,
863                                                       inUser, false);
864                }
865
866
867                DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
868                        "checks.\n", entry->paddr);
869
870                int page_size = entry->size();
871                Addr paddr = entry->paddr | (vaddr & (page_size - 1));
872                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
873                req->setPaddr(paddr);
874
875                if (entry->uncacheable)
876                    req->setFlags(Request::UNCACHEABLE);
877            } else {
878                //Use the address which already has segmentation applied.
879                DPRINTF(GPUTLB, "Paging disabled.\n");
880                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
881                req->setPaddr(vaddr);
882            }
883        } else {
884            // Real mode
885            DPRINTF(GPUTLB, "In real mode.\n");
886            DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
887            req->setPaddr(vaddr);
888        }
889
890        // Check for an access to the local APIC
891        if (FullSystem) {
892            LocalApicBase localApicBase =
893                tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
894
895            Addr baseAddr = localApicBase.base * PageBytes;
896            Addr paddr = req->getPaddr();
897
898            if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
899                // Force the access to be uncacheable.
900                req->setFlags(Request::UNCACHEABLE);
901                req->setPaddr(x86LocalAPICAddress(tc->contextId(),
902                                                  paddr - baseAddr));
903            }
904        }
905
906        return NoFault;
907    };
908
909    Fault
910    GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
911                            Mode mode, int &latency)
912    {
913        bool delayedResponse;
914
915        return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
916                                 latency);
917    }
918
919    void
920    GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
921            Translation *translation, Mode mode, int &latency)
922    {
923        bool delayedResponse;
924        assert(translation);
925
926        Fault fault = GpuTLB::translate(req, tc, translation, mode,
927                                        delayedResponse, true, latency);
928
929        if (!delayedResponse)
930            translation->finish(fault, req, tc, mode);
931    }
932
933    Walker*
934    GpuTLB::getWalker()
935    {
936        return walker;
937    }
938
939
940    void
941    GpuTLB::serialize(CheckpointOut &cp) const
942    {
943    }
944
945    void
946    GpuTLB::unserialize(CheckpointIn &cp)
947    {
948    }
949
950    void
951    GpuTLB::regStats()
952    {
953        ClockedObject::regStats();
954
955        localNumTLBAccesses
956            .name(name() + ".local_TLB_accesses")
957            .desc("Number of TLB accesses")
958            ;
959
960        localNumTLBHits
961            .name(name() + ".local_TLB_hits")
962            .desc("Number of TLB hits")
963            ;
964
965        localNumTLBMisses
966            .name(name() + ".local_TLB_misses")
967            .desc("Number of TLB misses")
968            ;
969
970        localTLBMissRate
971            .name(name() + ".local_TLB_miss_rate")
972            .desc("TLB miss rate")
973            ;
974
975        accessCycles
976            .name(name() + ".access_cycles")
977            .desc("Cycles spent accessing this TLB level")
978            ;
979
980        pageTableCycles
981            .name(name() + ".page_table_cycles")
982            .desc("Cycles spent accessing the page table")
983            ;
984
985        localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
986
987        numUniquePages
988            .name(name() + ".unique_pages")
989            .desc("Number of unique pages touched")
990            ;
991
992        localCycles
993            .name(name() + ".local_cycles")
994            .desc("Number of cycles spent in queue for all incoming reqs")
995            ;
996
997        localLatency
998            .name(name() + ".local_latency")
999            .desc("Avg. latency over incoming coalesced reqs")
1000            ;
1001
1002        localLatency = localCycles / localNumTLBAccesses;
1003
1004        globalNumTLBAccesses
1005            .name(name() + ".global_TLB_accesses")
1006            .desc("Number of TLB accesses")
1007            ;
1008
1009        globalNumTLBHits
1010            .name(name() + ".global_TLB_hits")
1011            .desc("Number of TLB hits")
1012            ;
1013
1014        globalNumTLBMisses
1015            .name(name() + ".global_TLB_misses")
1016            .desc("Number of TLB misses")
1017            ;
1018
1019        globalTLBMissRate
1020            .name(name() + ".global_TLB_miss_rate")
1021            .desc("TLB miss rate")
1022            ;
1023
1024        globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1025
1026        avgReuseDistance
1027            .name(name() + ".avg_reuse_distance")
1028            .desc("avg. reuse distance over all pages (in ticks)")
1029            ;
1030
1031    }
1032
1033    /**
1034     * Do the TLB lookup for this coalesced request and schedule
1035     * another event <TLB access latency> cycles later.
1036     */
1037
1038    void
1039    GpuTLB::issueTLBLookup(PacketPtr pkt)
1040    {
1041        assert(pkt);
1042        assert(pkt->senderState);
1043
1044        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1045                                        TheISA::PageBytes);
1046
1047        TranslationState *sender_state =
1048                safe_cast<TranslationState*>(pkt->senderState);
1049
1050        bool update_stats = !sender_state->prefetch;
1051        ThreadContext * tmp_tc = sender_state->tc;
1052
1053        DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1054                virt_page_addr);
1055
1056        int req_cnt = sender_state->reqCnt.back();
1057
1058        if (update_stats) {
1059            accessCycles -= (curTick() * req_cnt);
1060            localCycles -= curTick();
1061            updatePageFootprint(virt_page_addr);
1062            globalNumTLBAccesses += req_cnt;
1063        }
1064
1065        tlbOutcome lookup_outcome = TLB_MISS;
1066        const RequestPtr &tmp_req = pkt->req;
1067
1068        // Access the TLB and figure out if it's a hit or a miss.
1069        bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1070
1071        if (success) {
1072            lookup_outcome = TLB_HIT;
1073            // Put the entry in SenderState
1074            TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1075            assert(entry);
1076
1077            auto p = sender_state->tc->getProcessPtr();
1078            sender_state->tlbEntry =
1079                new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1080                             false, false);
1081
1082            if (update_stats) {
1083                // the reqCnt has an entry per level, so its size tells us
1084                // which level we are in
1085                sender_state->hitLevel = sender_state->reqCnt.size();
1086                globalNumTLBHits += req_cnt;
1087            }
1088        } else {
1089            if (update_stats)
1090                globalNumTLBMisses += req_cnt;
1091        }
1092
1093        /*
1094         * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1095         * as the TLB access latency.
1096         *
1097         * We create and schedule a new TLBEvent which will help us take the
1098         * appropriate actions (e.g., update TLB on a hit, send request to lower
1099         * level TLB on a miss, or start a page walk if this was the last-level
1100         * TLB)
1101         */
1102        TLBEvent *tlb_event =
1103            new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1104
1105        if (translationReturnEvent.count(virt_page_addr)) {
1106            panic("Virtual Page Address %#x already has a return event\n",
1107                  virt_page_addr);
1108        }
1109
1110        translationReturnEvent[virt_page_addr] = tlb_event;
1111        assert(tlb_event);
1112
1113        DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1114                curTick() + this->ticks(hitLatency));
1115
1116        schedule(tlb_event, curTick() + this->ticks(hitLatency));
1117    }
1118
1119    GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1120                               PacketPtr _pkt)
1121        : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1122        outcome(tlb_outcome), pkt(_pkt)
1123    {
1124    }
1125
1126    /**
1127     * Do Paging protection checks. If we encounter a page fault, then
1128     * an assertion is fired.
1129     */
1130    void
1131    GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1132            TlbEntry * tlb_entry, Mode mode)
1133    {
1134        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1135        uint32_t flags = pkt->req->getFlags();
1136        bool storeCheck = flags & (StoreCheck << FlagShift);
1137
1138        // Do paging protection checks.
1139        bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1140        CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1141
1142        bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1143
1144        if ((inUser && !tlb_entry->user) ||
1145            (mode == BaseTLB::Write && badWrite)) {
1146            // The page must have been present to get into the TLB in
1147            // the first place. We'll assume the reserved bits are
1148            // fine even though we're not checking them.
1149            panic("Page fault detected");
1150        }
1151
1152        if (storeCheck && badWrite) {
1153            // This would fault if this were a write, so return a page
1154            // fault that reflects that happening.
1155            panic("Page fault detected");
1156        }
1157    }
1158
1159    /**
1160     * handleTranslationReturn is called on a TLB hit,
1161     * when a TLB miss returns or when a page fault returns.
1162     * The latter calls handelHit with TLB miss as tlbOutcome.
1163     */
1164    void
1165    GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1166            PacketPtr pkt)
1167    {
1168
1169        assert(pkt);
1170        Addr vaddr = pkt->req->getVaddr();
1171
1172        TranslationState *sender_state =
1173            safe_cast<TranslationState*>(pkt->senderState);
1174
1175        ThreadContext *tc = sender_state->tc;
1176        Mode mode = sender_state->tlbMode;
1177
1178        TlbEntry *local_entry, *new_entry;
1179
1180        if (tlb_outcome == TLB_HIT) {
1181            DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1182            local_entry = sender_state->tlbEntry;
1183        } else {
1184            DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1185                    vaddr);
1186
1187            // We are returning either from a page walk or from a hit at a lower
1188            // TLB level. The senderState should be "carrying" a pointer to the
1189            // correct TLBEntry.
1190            new_entry = sender_state->tlbEntry;
1191            assert(new_entry);
1192            local_entry = new_entry;
1193
1194            if (allocationPolicy) {
1195                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1196                        virt_page_addr);
1197
1198                local_entry = insert(virt_page_addr, *new_entry);
1199            }
1200
1201            assert(local_entry);
1202        }
1203
1204        /**
1205         * At this point the packet carries an up-to-date tlbEntry pointer
1206         * in its senderState.
1207         * Next step is to do the paging protection checks.
1208         */
1209        DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
1210                "while paddr was %#x.\n", local_entry->vaddr,
1211                local_entry->paddr);
1212
1213        pagingProtectionChecks(tc, pkt, local_entry, mode);
1214        int page_size = local_entry->size();
1215        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1216        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1217
1218        // Since this packet will be sent through the cpu side slave port,
1219        // it must be converted to a response pkt if it is not one already
1220        if (pkt->isRequest()) {
1221            pkt->makeTimingResponse();
1222        }
1223
1224        pkt->req->setPaddr(paddr);
1225
1226        if (local_entry->uncacheable) {
1227             pkt->req->setFlags(Request::UNCACHEABLE);
1228        }
1229
1230        //send packet back to coalescer
1231        cpuSidePort[0]->sendTimingResp(pkt);
1232        //schedule cleanup event
1233        cleanupQueue.push(virt_page_addr);
1234
1235        // schedule this only once per cycle.
1236        // The check is required because we might have multiple translations
1237        // returning the same cycle
1238        // this is a maximum priority event and must be on the same cycle
1239        // as the cleanup event in TLBCoalescer to avoid a race with
1240        // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1241        if (!cleanupEvent.scheduled())
1242            schedule(cleanupEvent, curTick());
1243    }
1244
1245    /**
1246     * Here we take the appropriate actions based on the result of the
1247     * TLB lookup.
1248     */
1249    void
1250    GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1251                              PacketPtr pkt)
1252    {
1253        DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1254
1255        assert(translationReturnEvent[virtPageAddr]);
1256        assert(pkt);
1257
1258        TranslationState *tmp_sender_state =
1259            safe_cast<TranslationState*>(pkt->senderState);
1260
1261        int req_cnt = tmp_sender_state->reqCnt.back();
1262        bool update_stats = !tmp_sender_state->prefetch;
1263
1264
1265        if (outcome == TLB_HIT) {
1266            handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1267
1268            if (update_stats) {
1269                accessCycles += (req_cnt * curTick());
1270                localCycles += curTick();
1271            }
1272
1273        } else if (outcome == TLB_MISS) {
1274
1275            DPRINTF(GPUTLB, "This is a TLB miss\n");
1276            if (update_stats) {
1277                accessCycles += (req_cnt*curTick());
1278                localCycles += curTick();
1279            }
1280
1281            if (hasMemSidePort) {
1282                // the one cyle added here represent the delay from when we get
1283                // the reply back till when we propagate it to the coalescer
1284                // above.
1285                if (update_stats) {
1286                    accessCycles += (req_cnt * 1);
1287                    localCycles += 1;
1288                }
1289
1290                /**
1291                 * There is a TLB below. Send the coalesced request.
1292                 * We actually send the very first packet of all the
1293                 * pending packets for this virtual page address.
1294                 */
1295                if (!memSidePort[0]->sendTimingReq(pkt)) {
1296                    DPRINTF(GPUTLB, "Failed sending translation request to "
1297                            "lower level TLB for addr %#x\n", virtPageAddr);
1298
1299                    memSidePort[0]->retries.push_back(pkt);
1300                } else {
1301                    DPRINTF(GPUTLB, "Sent translation request to lower level "
1302                            "TLB for addr %#x\n", virtPageAddr);
1303                }
1304            } else {
1305                //this is the last level TLB. Start a page walk
1306                DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1307                        "addr %#x\n", virtPageAddr);
1308
1309                if (update_stats)
1310                    pageTableCycles -= (req_cnt*curTick());
1311
1312                TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1313                assert(tlb_event);
1314                tlb_event->updateOutcome(PAGE_WALK);
1315                schedule(tlb_event, curTick() + ticks(missLatency2));
1316            }
1317        } else if (outcome == PAGE_WALK) {
1318            if (update_stats)
1319                pageTableCycles += (req_cnt*curTick());
1320
1321            // Need to access the page table and update the TLB
1322            DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1323                    virtPageAddr);
1324
1325            TranslationState *sender_state =
1326                safe_cast<TranslationState*>(pkt->senderState);
1327
1328            Process *p = sender_state->tc->getProcessPtr();
1329            Addr vaddr = pkt->req->getVaddr();
1330    #ifndef NDEBUG
1331            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1332            assert(alignedVaddr == virtPageAddr);
1333    #endif
1334            const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1335            if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1336                    p->fixupStackFault(vaddr)) {
1337                pte = p->pTable->lookup(vaddr);
1338            }
1339
1340            if (pte) {
1341                DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1342                        pte->paddr);
1343
1344                sender_state->tlbEntry =
1345                    new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1346                                 false);
1347            } else {
1348                sender_state->tlbEntry = nullptr;
1349            }
1350
1351            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1352        } else if (outcome == MISS_RETURN) {
1353            /** we add an extra cycle in the return path of the translation
1354             * requests in between the various TLB levels.
1355             */
1356            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1357        } else {
1358            panic("Unexpected TLB outcome %d", outcome);
1359        }
1360    }
1361
1362    void
1363    GpuTLB::TLBEvent::process()
1364    {
1365        tlb->translationReturn(virtPageAddr, outcome, pkt);
1366    }
1367
1368    const char*
1369    GpuTLB::TLBEvent::description() const
1370    {
1371        return "trigger translationDoneEvent";
1372    }
1373
1374    void
1375    GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1376    {
1377        outcome = _outcome;
1378    }
1379
1380    Addr
1381    GpuTLB::TLBEvent::getTLBEventVaddr()
1382    {
1383        return virtPageAddr;
1384    }
1385
1386    /*
1387     * recvTiming receives a coalesced timing request from a TLBCoalescer
1388     * and it calls issueTLBLookup()
1389     * It only rejects the packet if we have exceeded the max
1390     * outstanding number of requests for the TLB
1391     */
1392    bool
1393    GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1394    {
1395        if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1396            tlb->issueTLBLookup(pkt);
1397            // update number of outstanding translation requests
1398            tlb->outstandingReqs++;
1399            return true;
1400         } else {
1401            DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1402                    tlb->outstandingReqs);
1403            return false;
1404         }
1405    }
1406
1407    /**
1408     * handleFuncTranslationReturn is called on a TLB hit,
1409     * when a TLB miss returns or when a page fault returns.
1410     * It updates LRU, inserts the TLB entry on a miss
1411     * depending on the allocation policy and does the required
1412     * protection checks. It does NOT create a new packet to
1413     * update the packet's addr; this is done in hsail-gpu code.
1414     */
1415    void
1416    GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1417    {
1418        TranslationState *sender_state =
1419            safe_cast<TranslationState*>(pkt->senderState);
1420
1421        ThreadContext *tc = sender_state->tc;
1422        Mode mode = sender_state->tlbMode;
1423        Addr vaddr = pkt->req->getVaddr();
1424
1425        TlbEntry *local_entry, *new_entry;
1426
1427        if (tlb_outcome == TLB_HIT) {
1428            DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1429                    "%#x\n", vaddr);
1430
1431            local_entry = sender_state->tlbEntry;
1432        } else {
1433            DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1434                    "%#x\n", vaddr);
1435
1436            // We are returning either from a page walk or from a hit at a lower
1437            // TLB level. The senderState should be "carrying" a pointer to the
1438            // correct TLBEntry.
1439            new_entry = sender_state->tlbEntry;
1440            assert(new_entry);
1441            local_entry = new_entry;
1442
1443            if (allocationPolicy) {
1444                Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1445
1446                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1447                        virt_page_addr);
1448
1449                local_entry = insert(virt_page_addr, *new_entry);
1450            }
1451
1452            assert(local_entry);
1453        }
1454
1455        DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1456                "while paddr was %#x.\n", local_entry->vaddr,
1457                local_entry->paddr);
1458
1459        /**
1460         * Do paging checks if it's a normal functional access.  If it's for a
1461         * prefetch, then sometimes you can try to prefetch something that
1462         * won't pass protection. We don't actually want to fault becuase there
1463         * is no demand access to deem this a violation.  Just put it in the
1464         * TLB and it will fault if indeed a future demand access touches it in
1465         * violation.
1466         *
1467         * This feature could be used to explore security issues around
1468         * speculative memory accesses.
1469         */
1470        if (!sender_state->prefetch && sender_state->tlbEntry)
1471            pagingProtectionChecks(tc, pkt, local_entry, mode);
1472
1473        int page_size = local_entry->size();
1474        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1475        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1476
1477        pkt->req->setPaddr(paddr);
1478
1479        if (local_entry->uncacheable)
1480             pkt->req->setFlags(Request::UNCACHEABLE);
1481    }
1482
1483    // This is used for atomic translations. Need to
1484    // make it all happen during the same cycle.
1485    void
1486    GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1487    {
1488        TranslationState *sender_state =
1489            safe_cast<TranslationState*>(pkt->senderState);
1490
1491        ThreadContext *tc = sender_state->tc;
1492        bool update_stats = !sender_state->prefetch;
1493
1494        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1495                                        TheISA::PageBytes);
1496
1497        if (update_stats)
1498            tlb->updatePageFootprint(virt_page_addr);
1499
1500        // do the TLB lookup without updating the stats
1501        bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1502        tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1503
1504        // functional mode means no coalescing
1505        // global metrics are the same as the local metrics
1506        if (update_stats) {
1507            tlb->globalNumTLBAccesses++;
1508
1509            if (success) {
1510                sender_state->hitLevel = sender_state->reqCnt.size();
1511                tlb->globalNumTLBHits++;
1512            }
1513        }
1514
1515        if (!success) {
1516            if (update_stats)
1517                tlb->globalNumTLBMisses++;
1518            if (tlb->hasMemSidePort) {
1519                // there is a TLB below -> propagate down the TLB hierarchy
1520                tlb->memSidePort[0]->sendFunctional(pkt);
1521                // If no valid translation from a prefetch, then just return
1522                if (sender_state->prefetch && !pkt->req->hasPaddr())
1523                    return;
1524            } else {
1525                // Need to access the page table and update the TLB
1526                DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1527                        virt_page_addr);
1528
1529                Process *p = tc->getProcessPtr();
1530
1531                Addr vaddr = pkt->req->getVaddr();
1532    #ifndef NDEBUG
1533                Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1534                assert(alignedVaddr == virt_page_addr);
1535    #endif
1536
1537                const EmulationPageTable::Entry *pte =
1538                        p->pTable->lookup(vaddr);
1539                if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1540                        p->fixupStackFault(vaddr)) {
1541                    pte = p->pTable->lookup(vaddr);
1542                }
1543
1544                if (!sender_state->prefetch) {
1545                    // no PageFaults are permitted after
1546                    // the second page table lookup
1547                    assert(pte);
1548
1549                    DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1550                            pte->paddr);
1551
1552                    sender_state->tlbEntry =
1553                        new TlbEntry(p->pid(), virt_page_addr,
1554                                     pte->paddr, false, false);
1555                } else {
1556                    // If this was a prefetch, then do the normal thing if it
1557                    // was a successful translation.  Otherwise, send an empty
1558                    // TLB entry back so that it can be figured out as empty and
1559                    // handled accordingly.
1560                    if (pte) {
1561                        DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1562                                pte->paddr);
1563
1564                        sender_state->tlbEntry =
1565                            new TlbEntry(p->pid(), virt_page_addr,
1566                                         pte->paddr, false, false);
1567                    } else {
1568                        DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1569                                alignedVaddr);
1570
1571                        sender_state->tlbEntry = nullptr;
1572
1573                        return;
1574                    }
1575                }
1576            }
1577        } else {
1578            DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1579                    tlb->lookup(pkt->req->getVaddr()));
1580
1581            TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1582                                             update_stats);
1583
1584            assert(entry);
1585
1586            auto p = sender_state->tc->getProcessPtr();
1587            sender_state->tlbEntry =
1588                new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1589                             false, false);
1590        }
1591        // This is the function that would populate pkt->req with the paddr of
1592        // the translation. But if no translation happens (i.e Prefetch fails)
1593        // then the early returns in the above code wiill keep this function
1594        // from executing.
1595        tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1596    }
1597
1598    void
1599    GpuTLB::CpuSidePort::recvReqRetry()
1600    {
1601        // The CPUSidePort never sends anything but replies. No retries
1602        // expected.
1603        panic("recvReqRetry called");
1604    }
1605
1606    AddrRangeList
1607    GpuTLB::CpuSidePort::getAddrRanges() const
1608    {
1609        // currently not checked by the master
1610        AddrRangeList ranges;
1611
1612        return ranges;
1613    }
1614
1615    /**
1616     * MemSidePort receives the packet back.
1617     * We need to call the handleTranslationReturn
1618     * and propagate up the hierarchy.
1619     */
1620    bool
1621    GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1622    {
1623        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1624                                        TheISA::PageBytes);
1625
1626        DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1627                virt_page_addr);
1628
1629        TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1630        assert(tlb_event);
1631        assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1632
1633        tlb_event->updateOutcome(MISS_RETURN);
1634        tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1635
1636        return true;
1637    }
1638
1639    void
1640    GpuTLB::MemSidePort::recvReqRetry()
1641    {
1642        // No retries should reach the TLB. The retries
1643        // should only reach the TLBCoalescer.
1644        panic("recvReqRetry called");
1645    }
1646
1647    void
1648    GpuTLB::cleanup()
1649    {
1650        while (!cleanupQueue.empty()) {
1651            Addr cleanup_addr = cleanupQueue.front();
1652            cleanupQueue.pop();
1653
1654            // delete TLBEvent
1655            TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1656            delete old_tlb_event;
1657            translationReturnEvent.erase(cleanup_addr);
1658
1659            // update number of outstanding requests
1660            outstandingReqs--;
1661        }
1662
1663        /** the higher level coalescer should retry if it has
1664         * any pending requests.
1665         */
1666        for (int i = 0; i < cpuSidePort.size(); ++i) {
1667            cpuSidePort[i]->sendRetryReq();
1668        }
1669    }
1670
1671    void
1672    GpuTLB::updatePageFootprint(Addr virt_page_addr)
1673    {
1674
1675        std::pair<AccessPatternTable::iterator, bool> ret;
1676
1677        AccessInfo tmp_access_info;
1678        tmp_access_info.lastTimeAccessed = 0;
1679        tmp_access_info.accessesPerPage = 0;
1680        tmp_access_info.totalReuseDistance = 0;
1681        tmp_access_info.sumDistance = 0;
1682        tmp_access_info.meanDistance = 0;
1683
1684        ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1685                                  tmp_access_info));
1686
1687        bool first_page_access = ret.second;
1688
1689        if (first_page_access) {
1690            numUniquePages++;
1691        } else  {
1692            int accessed_before;
1693            accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1694            ret.first->second.totalReuseDistance += accessed_before;
1695        }
1696
1697        ret.first->second.accessesPerPage++;
1698        ret.first->second.lastTimeAccessed = curTick();
1699
1700        if (accessDistance) {
1701            ret.first->second.localTLBAccesses
1702                .push_back(localNumTLBAccesses.value());
1703        }
1704    }
1705
1706    void
1707    GpuTLB::exitCallback()
1708    {
1709        std::ostream *page_stat_file = nullptr;
1710
1711        if (accessDistance) {
1712
1713            // print per page statistics to a separate file (.csv format)
1714            // simout is the gem5 output directory (default is m5out or the one
1715            // specified with -d
1716            page_stat_file = simout.create(name().c_str())->stream();
1717
1718            // print header
1719            *page_stat_file << "page,max_access_distance,mean_access_distance, "
1720                            << "stddev_distance" << std::endl;
1721        }
1722
1723        // update avg. reuse distance footprint
1724        AccessPatternTable::iterator iter, iter_begin, iter_end;
1725        unsigned int sum_avg_reuse_distance_per_page = 0;
1726
1727        // iterate through all pages seen by this TLB
1728        for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1729            sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1730                                               iter->second.accessesPerPage;
1731
1732            if (accessDistance) {
1733                unsigned int tmp = iter->second.localTLBAccesses[0];
1734                unsigned int prev = tmp;
1735
1736                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1737                    if (i) {
1738                        tmp = prev + 1;
1739                    }
1740
1741                    prev = iter->second.localTLBAccesses[i];
1742                    // update the localTLBAccesses value
1743                    // with the actual differece
1744                    iter->second.localTLBAccesses[i] -= tmp;
1745                    // compute the sum of AccessDistance per page
1746                    // used later for mean
1747                    iter->second.sumDistance +=
1748                        iter->second.localTLBAccesses[i];
1749                }
1750
1751                iter->second.meanDistance =
1752                    iter->second.sumDistance / iter->second.accessesPerPage;
1753
1754                // compute std_dev and max  (we need a second round because we
1755                // need to know the mean value
1756                unsigned int max_distance = 0;
1757                unsigned int stddev_distance = 0;
1758
1759                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1760                    unsigned int tmp_access_distance =
1761                        iter->second.localTLBAccesses[i];
1762
1763                    if (tmp_access_distance > max_distance) {
1764                        max_distance = tmp_access_distance;
1765                    }
1766
1767                    unsigned int diff =
1768                        tmp_access_distance - iter->second.meanDistance;
1769                    stddev_distance += pow(diff, 2);
1770
1771                }
1772
1773                stddev_distance =
1774                    sqrt(stddev_distance/iter->second.accessesPerPage);
1775
1776                if (page_stat_file) {
1777                    *page_stat_file << std::hex << iter->first << ",";
1778                    *page_stat_file << std::dec << max_distance << ",";
1779                    *page_stat_file << std::dec << iter->second.meanDistance
1780                                    << ",";
1781                    *page_stat_file << std::dec << stddev_distance;
1782                    *page_stat_file << std::endl;
1783                }
1784
1785                // erase the localTLBAccesses array
1786                iter->second.localTLBAccesses.clear();
1787            }
1788        }
1789
1790        if (!TLBFootprint.empty()) {
1791            avgReuseDistance =
1792                sum_avg_reuse_distance_per_page / TLBFootprint.size();
1793        }
1794
1795        //clear the TLBFootprint map
1796        TLBFootprint.clear();
1797    }
1798} // namespace X86ISA
1799
1800X86ISA::GpuTLB*
1801X86GPUTLBParams::create()
1802{
1803    return new X86ISA::GpuTLB(this);
1804}
1805
1806