1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/logging.hh"
49#include "base/output.hh"
50#include "base/trace.hh"
51#include "cpu/base.hh"
52#include "cpu/thread_context.hh"
53#include "debug/GPUPrefetch.hh"
54#include "debug/GPUTLB.hh"
55#include "mem/packet_access.hh"
56#include "mem/page_table.hh"
57#include "mem/request.hh"
58#include "sim/process.hh"
59
60namespace X86ISA
61{
62
63    GpuTLB::GpuTLB(const Params *p)
64        : ClockedObject(p), configAddress(0), size(p->size),
65          cleanupEvent([this]{ cleanup(); }, name(), false,
66                       Event::Maximum_Pri),
67          exitEvent([this]{ exitCallback(); }, name())
68    {
69        assoc = p->assoc;
70        assert(assoc <= size);
71        numSets = size/assoc;
72        allocationPolicy = p->allocationPolicy;
73        hasMemSidePort = false;
74        accessDistance = p->accessDistance;
75        clock = p->clk_domain->clockPeriod();
76
77        tlb.assign(size, TlbEntry());
78
79        freeList.resize(numSets);
80        entryList.resize(numSets);
81
82        for (int set = 0; set < numSets; ++set) {
83            for (int way = 0; way < assoc; ++way) {
84                int x = set * assoc + way;
85                freeList[set].push_back(&tlb.at(x));
86            }
87        }
88
89        FA = (size == assoc);
90
91        /**
92         * @warning: the set-associative version assumes you have a
93         * fixed page size of 4KB.
94         * If the page size is greather than 4KB (as defined in the
95         * TheISA::PageBytes), then there are various issues w/ the current
96         * implementation (you'd have the same 8KB page being replicated in
97         * different sets etc)
98         */
99        setMask = numSets - 1;
100
101        maxCoalescedReqs = p->maxOutstandingReqs;
102
103        // Do not allow maxCoalescedReqs to be more than the TLB associativity
104        if (maxCoalescedReqs > assoc) {
105            maxCoalescedReqs = assoc;
106            cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
107        }
108
109        outstandingReqs = 0;
110        hitLatency = p->hitLatency;
111        missLatency1 = p->missLatency1;
112        missLatency2 = p->missLatency2;
113
114        // create the slave ports based on the number of connected ports
115        for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
116            cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
117                                  name(), i), this, i));
118        }
119
120        // create the master ports based on the number of connected ports
121        for (size_t i = 0; i < p->port_master_connection_count; ++i) {
122            memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
123                                  name(), i), this, i));
124        }
125    }
126
127    // fixme: this is never called?
128    GpuTLB::~GpuTLB()
129    {
130        // make sure all the hash-maps are empty
131        assert(translationReturnEvent.empty());
132    }
133
134    Port &
135    GpuTLB::getPort(const std::string &if_name, PortID idx)
136    {
137        if (if_name == "slave") {
138            if (idx >= static_cast<PortID>(cpuSidePort.size())) {
139                panic("TLBCoalescer::getPort: unknown index %d\n", idx);
140            }
141
142            return *cpuSidePort[idx];
143        } else if (if_name == "master") {
144            if (idx >= static_cast<PortID>(memSidePort.size())) {
145                panic("TLBCoalescer::getPort: unknown index %d\n", idx);
146            }
147
148            hasMemSidePort = true;
149
150            return *memSidePort[idx];
151        } else {
152            panic("TLBCoalescer::getPort: unknown port %s\n", if_name);
153        }
154    }
155
156    TlbEntry*
157    GpuTLB::insert(Addr vpn, TlbEntry &entry)
158    {
159        TlbEntry *newEntry = nullptr;
160
161        /**
162         * vpn holds the virtual page address
163         * The least significant bits are simply masked
164         */
165        int set = (vpn >> TheISA::PageShift) & setMask;
166
167        if (!freeList[set].empty()) {
168            newEntry = freeList[set].front();
169            freeList[set].pop_front();
170        } else {
171            newEntry = entryList[set].back();
172            entryList[set].pop_back();
173        }
174
175        *newEntry = entry;
176        newEntry->vaddr = vpn;
177        entryList[set].push_front(newEntry);
178
179        return newEntry;
180    }
181
182    GpuTLB::EntryList::iterator
183    GpuTLB::lookupIt(Addr va, bool update_lru)
184    {
185        int set = (va >> TheISA::PageShift) & setMask;
186
187        if (FA) {
188            assert(!set);
189        }
190
191        auto entry = entryList[set].begin();
192        for (; entry != entryList[set].end(); ++entry) {
193            int page_size = (*entry)->size();
194
195            if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
196                DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
197                        "with size %#x.\n", va, (*entry)->vaddr, page_size);
198
199                if (update_lru) {
200                    entryList[set].push_front(*entry);
201                    entryList[set].erase(entry);
202                    entry = entryList[set].begin();
203                }
204
205                break;
206            }
207        }
208
209        return entry;
210    }
211
212    TlbEntry*
213    GpuTLB::lookup(Addr va, bool update_lru)
214    {
215        int set = (va >> TheISA::PageShift) & setMask;
216
217        auto entry = lookupIt(va, update_lru);
218
219        if (entry == entryList[set].end())
220            return nullptr;
221        else
222            return *entry;
223    }
224
225    void
226    GpuTLB::invalidateAll()
227    {
228        DPRINTF(GPUTLB, "Invalidating all entries.\n");
229
230        for (int i = 0; i < numSets; ++i) {
231            while (!entryList[i].empty()) {
232                TlbEntry *entry = entryList[i].front();
233                entryList[i].pop_front();
234                freeList[i].push_back(entry);
235            }
236        }
237    }
238
239    void
240    GpuTLB::setConfigAddress(uint32_t addr)
241    {
242        configAddress = addr;
243    }
244
245    void
246    GpuTLB::invalidateNonGlobal()
247    {
248        DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
249
250        for (int i = 0; i < numSets; ++i) {
251            for (auto entryIt = entryList[i].begin();
252                 entryIt != entryList[i].end();) {
253                if (!(*entryIt)->global) {
254                    freeList[i].push_back(*entryIt);
255                    entryList[i].erase(entryIt++);
256                } else {
257                    ++entryIt;
258                }
259            }
260        }
261    }
262
263    void
264    GpuTLB::demapPage(Addr va, uint64_t asn)
265    {
266
267        int set = (va >> TheISA::PageShift) & setMask;
268        auto entry = lookupIt(va, false);
269
270        if (entry != entryList[set].end()) {
271            freeList[set].push_back(*entry);
272            entryList[set].erase(entry);
273        }
274    }
275
276    Fault
277    GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
278    {
279        DPRINTF(GPUTLB, "Addresses references internal memory.\n");
280        Addr vaddr = req->getVaddr();
281        Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
282
283        if (prefix == IntAddrPrefixCPUID) {
284            panic("CPUID memory space not yet implemented!\n");
285        } else if (prefix == IntAddrPrefixMSR) {
286            vaddr = vaddr >> 3;
287            req->setFlags(Request::MMAPPED_IPR);
288            Addr regNum = 0;
289
290            switch (vaddr & ~IntAddrPrefixMask) {
291              case 0x10:
292                regNum = MISCREG_TSC;
293                break;
294              case 0x1B:
295                regNum = MISCREG_APIC_BASE;
296                break;
297              case 0xFE:
298                regNum = MISCREG_MTRRCAP;
299                break;
300              case 0x174:
301                regNum = MISCREG_SYSENTER_CS;
302                break;
303              case 0x175:
304                regNum = MISCREG_SYSENTER_ESP;
305                break;
306              case 0x176:
307                regNum = MISCREG_SYSENTER_EIP;
308                break;
309              case 0x179:
310                regNum = MISCREG_MCG_CAP;
311                break;
312              case 0x17A:
313                regNum = MISCREG_MCG_STATUS;
314                break;
315              case 0x17B:
316                regNum = MISCREG_MCG_CTL;
317                break;
318              case 0x1D9:
319                regNum = MISCREG_DEBUG_CTL_MSR;
320                break;
321              case 0x1DB:
322                regNum = MISCREG_LAST_BRANCH_FROM_IP;
323                break;
324              case 0x1DC:
325                regNum = MISCREG_LAST_BRANCH_TO_IP;
326                break;
327              case 0x1DD:
328                regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
329                break;
330              case 0x1DE:
331                regNum = MISCREG_LAST_EXCEPTION_TO_IP;
332                break;
333              case 0x200:
334                regNum = MISCREG_MTRR_PHYS_BASE_0;
335                break;
336              case 0x201:
337                regNum = MISCREG_MTRR_PHYS_MASK_0;
338                break;
339              case 0x202:
340                regNum = MISCREG_MTRR_PHYS_BASE_1;
341                break;
342              case 0x203:
343                regNum = MISCREG_MTRR_PHYS_MASK_1;
344                break;
345              case 0x204:
346                regNum = MISCREG_MTRR_PHYS_BASE_2;
347                break;
348              case 0x205:
349                regNum = MISCREG_MTRR_PHYS_MASK_2;
350                break;
351              case 0x206:
352                regNum = MISCREG_MTRR_PHYS_BASE_3;
353                break;
354              case 0x207:
355                regNum = MISCREG_MTRR_PHYS_MASK_3;
356                break;
357              case 0x208:
358                regNum = MISCREG_MTRR_PHYS_BASE_4;
359                break;
360              case 0x209:
361                regNum = MISCREG_MTRR_PHYS_MASK_4;
362                break;
363              case 0x20A:
364                regNum = MISCREG_MTRR_PHYS_BASE_5;
365                break;
366              case 0x20B:
367                regNum = MISCREG_MTRR_PHYS_MASK_5;
368                break;
369              case 0x20C:
370                regNum = MISCREG_MTRR_PHYS_BASE_6;
371                break;
372              case 0x20D:
373                regNum = MISCREG_MTRR_PHYS_MASK_6;
374                break;
375              case 0x20E:
376                regNum = MISCREG_MTRR_PHYS_BASE_7;
377                break;
378              case 0x20F:
379                regNum = MISCREG_MTRR_PHYS_MASK_7;
380                break;
381              case 0x250:
382                regNum = MISCREG_MTRR_FIX_64K_00000;
383                break;
384              case 0x258:
385                regNum = MISCREG_MTRR_FIX_16K_80000;
386                break;
387              case 0x259:
388                regNum = MISCREG_MTRR_FIX_16K_A0000;
389                break;
390              case 0x268:
391                regNum = MISCREG_MTRR_FIX_4K_C0000;
392                break;
393              case 0x269:
394                regNum = MISCREG_MTRR_FIX_4K_C8000;
395                break;
396              case 0x26A:
397                regNum = MISCREG_MTRR_FIX_4K_D0000;
398                break;
399              case 0x26B:
400                regNum = MISCREG_MTRR_FIX_4K_D8000;
401                break;
402              case 0x26C:
403                regNum = MISCREG_MTRR_FIX_4K_E0000;
404                break;
405              case 0x26D:
406                regNum = MISCREG_MTRR_FIX_4K_E8000;
407                break;
408              case 0x26E:
409                regNum = MISCREG_MTRR_FIX_4K_F0000;
410                break;
411              case 0x26F:
412                regNum = MISCREG_MTRR_FIX_4K_F8000;
413                break;
414              case 0x277:
415                regNum = MISCREG_PAT;
416                break;
417              case 0x2FF:
418                regNum = MISCREG_DEF_TYPE;
419                break;
420              case 0x400:
421                regNum = MISCREG_MC0_CTL;
422                break;
423              case 0x404:
424                regNum = MISCREG_MC1_CTL;
425                break;
426              case 0x408:
427                regNum = MISCREG_MC2_CTL;
428                break;
429              case 0x40C:
430                regNum = MISCREG_MC3_CTL;
431                break;
432              case 0x410:
433                regNum = MISCREG_MC4_CTL;
434                break;
435              case 0x414:
436                regNum = MISCREG_MC5_CTL;
437                break;
438              case 0x418:
439                regNum = MISCREG_MC6_CTL;
440                break;
441              case 0x41C:
442                regNum = MISCREG_MC7_CTL;
443                break;
444              case 0x401:
445                regNum = MISCREG_MC0_STATUS;
446                break;
447              case 0x405:
448                regNum = MISCREG_MC1_STATUS;
449                break;
450              case 0x409:
451                regNum = MISCREG_MC2_STATUS;
452                break;
453              case 0x40D:
454                regNum = MISCREG_MC3_STATUS;
455                break;
456              case 0x411:
457                regNum = MISCREG_MC4_STATUS;
458                break;
459              case 0x415:
460                regNum = MISCREG_MC5_STATUS;
461                break;
462              case 0x419:
463                regNum = MISCREG_MC6_STATUS;
464                break;
465              case 0x41D:
466                regNum = MISCREG_MC7_STATUS;
467                break;
468              case 0x402:
469                regNum = MISCREG_MC0_ADDR;
470                break;
471              case 0x406:
472                regNum = MISCREG_MC1_ADDR;
473                break;
474              case 0x40A:
475                regNum = MISCREG_MC2_ADDR;
476                break;
477              case 0x40E:
478                regNum = MISCREG_MC3_ADDR;
479                break;
480              case 0x412:
481                regNum = MISCREG_MC4_ADDR;
482                break;
483              case 0x416:
484                regNum = MISCREG_MC5_ADDR;
485                break;
486              case 0x41A:
487                regNum = MISCREG_MC6_ADDR;
488                break;
489              case 0x41E:
490                regNum = MISCREG_MC7_ADDR;
491                break;
492              case 0x403:
493                regNum = MISCREG_MC0_MISC;
494                break;
495              case 0x407:
496                regNum = MISCREG_MC1_MISC;
497                break;
498              case 0x40B:
499                regNum = MISCREG_MC2_MISC;
500                break;
501              case 0x40F:
502                regNum = MISCREG_MC3_MISC;
503                break;
504              case 0x413:
505                regNum = MISCREG_MC4_MISC;
506                break;
507              case 0x417:
508                regNum = MISCREG_MC5_MISC;
509                break;
510              case 0x41B:
511                regNum = MISCREG_MC6_MISC;
512                break;
513              case 0x41F:
514                regNum = MISCREG_MC7_MISC;
515                break;
516              case 0xC0000080:
517                regNum = MISCREG_EFER;
518                break;
519              case 0xC0000081:
520                regNum = MISCREG_STAR;
521                break;
522              case 0xC0000082:
523                regNum = MISCREG_LSTAR;
524                break;
525              case 0xC0000083:
526                regNum = MISCREG_CSTAR;
527                break;
528              case 0xC0000084:
529                regNum = MISCREG_SF_MASK;
530                break;
531              case 0xC0000100:
532                regNum = MISCREG_FS_BASE;
533                break;
534              case 0xC0000101:
535                regNum = MISCREG_GS_BASE;
536                break;
537              case 0xC0000102:
538                regNum = MISCREG_KERNEL_GS_BASE;
539                break;
540              case 0xC0000103:
541                regNum = MISCREG_TSC_AUX;
542                break;
543              case 0xC0010000:
544                regNum = MISCREG_PERF_EVT_SEL0;
545                break;
546              case 0xC0010001:
547                regNum = MISCREG_PERF_EVT_SEL1;
548                break;
549              case 0xC0010002:
550                regNum = MISCREG_PERF_EVT_SEL2;
551                break;
552              case 0xC0010003:
553                regNum = MISCREG_PERF_EVT_SEL3;
554                break;
555              case 0xC0010004:
556                regNum = MISCREG_PERF_EVT_CTR0;
557                break;
558              case 0xC0010005:
559                regNum = MISCREG_PERF_EVT_CTR1;
560                break;
561              case 0xC0010006:
562                regNum = MISCREG_PERF_EVT_CTR2;
563                break;
564              case 0xC0010007:
565                regNum = MISCREG_PERF_EVT_CTR3;
566                break;
567              case 0xC0010010:
568                regNum = MISCREG_SYSCFG;
569                break;
570              case 0xC0010016:
571                regNum = MISCREG_IORR_BASE0;
572                break;
573              case 0xC0010017:
574                regNum = MISCREG_IORR_BASE1;
575                break;
576              case 0xC0010018:
577                regNum = MISCREG_IORR_MASK0;
578                break;
579              case 0xC0010019:
580                regNum = MISCREG_IORR_MASK1;
581                break;
582              case 0xC001001A:
583                regNum = MISCREG_TOP_MEM;
584                break;
585              case 0xC001001D:
586                regNum = MISCREG_TOP_MEM2;
587                break;
588              case 0xC0010114:
589                regNum = MISCREG_VM_CR;
590                break;
591              case 0xC0010115:
592                regNum = MISCREG_IGNNE;
593                break;
594              case 0xC0010116:
595                regNum = MISCREG_SMM_CTL;
596                break;
597              case 0xC0010117:
598                regNum = MISCREG_VM_HSAVE_PA;
599                break;
600              default:
601                return std::make_shared<GeneralProtection>(0);
602            }
603            //The index is multiplied by the size of a MiscReg so that
604            //any memory dependence calculations will not see these as
605            //overlapping.
606            req->setPaddr(regNum * sizeof(RegVal));
607            return NoFault;
608        } else if (prefix == IntAddrPrefixIO) {
609            // TODO If CPL > IOPL or in virtual mode, check the I/O permission
610            // bitmap in the TSS.
611
612            Addr IOPort = vaddr & ~IntAddrPrefixMask;
613            // Make sure the address fits in the expected 16 bit IO address
614            // space.
615            assert(!(IOPort & ~0xFFFF));
616
617            if (IOPort == 0xCF8 && req->getSize() == 4) {
618                req->setFlags(Request::MMAPPED_IPR);
619                req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(RegVal));
620            } else if ((IOPort & ~mask(2)) == 0xCFC) {
621                req->setFlags(Request::UNCACHEABLE);
622
623                Addr configAddress =
624                    tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
625
626                if (bits(configAddress, 31, 31)) {
627                    req->setPaddr(PhysAddrPrefixPciConfig |
628                                  mbits(configAddress, 30, 2) |
629                                  (IOPort & mask(2)));
630                } else {
631                    req->setPaddr(PhysAddrPrefixIO | IOPort);
632                }
633            } else {
634                req->setFlags(Request::UNCACHEABLE);
635                req->setPaddr(PhysAddrPrefixIO | IOPort);
636            }
637            return NoFault;
638        } else {
639            panic("Access to unrecognized internal address space %#x.\n",
640                  prefix);
641        }
642    }
643
644    /**
645     * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
646     * and false on a TLB miss.
647     * Many of the checks about different modes have been converted to
648     * assertions, since these parts of the code are not really used.
649     * On a hit it will update the LRU stack.
650     */
651    bool
652    GpuTLB::tlbLookup(const RequestPtr &req,
653                      ThreadContext *tc, bool update_stats)
654    {
655        bool tlb_hit = false;
656    #ifndef NDEBUG
657        uint32_t flags = req->getFlags();
658        int seg = flags & SegmentFlagMask;
659    #endif
660
661        assert(seg != SEGMENT_REG_MS);
662        Addr vaddr = req->getVaddr();
663        DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
664        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
665
666        if (m5Reg.prot) {
667            DPRINTF(GPUTLB, "In protected mode.\n");
668            // make sure we are in 64-bit mode
669            assert(m5Reg.mode == LongMode);
670
671            // If paging is enabled, do the translation.
672            if (m5Reg.paging) {
673                DPRINTF(GPUTLB, "Paging enabled.\n");
674                //update LRU stack on a hit
675                TlbEntry *entry = lookup(vaddr, true);
676
677                if (entry)
678                    tlb_hit = true;
679
680                if (!update_stats) {
681                    // functional tlb access for memory initialization
682                    // i.e., memory seeding or instr. seeding -> don't update
683                    // TLB and stats
684                    return tlb_hit;
685                }
686
687                localNumTLBAccesses++;
688
689                if (!entry) {
690                    localNumTLBMisses++;
691                } else {
692                    localNumTLBHits++;
693                }
694            }
695        }
696
697        return tlb_hit;
698    }
699
700    Fault
701    GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
702                      Translation *translation, Mode mode,
703                      bool &delayedResponse, bool timing, int &latency)
704    {
705        uint32_t flags = req->getFlags();
706        int seg = flags & SegmentFlagMask;
707        bool storeCheck = flags & (StoreCheck << FlagShift);
708
709        // If this is true, we're dealing with a request
710        // to a non-memory address space.
711        if (seg == SEGMENT_REG_MS) {
712            return translateInt(req, tc);
713        }
714
715        delayedResponse = false;
716        Addr vaddr = req->getVaddr();
717        DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
718
719        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
720
721        // If protected mode has been enabled...
722        if (m5Reg.prot) {
723            DPRINTF(GPUTLB, "In protected mode.\n");
724            // If we're not in 64-bit mode, do protection/limit checks
725            if (m5Reg.mode != LongMode) {
726                DPRINTF(GPUTLB, "Not in long mode. Checking segment "
727                        "protection.\n");
728
729                // Check for a null segment selector.
730                if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
731                    seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
732                    && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
733                    return std::make_shared<GeneralProtection>(0);
734                }
735
736                bool expandDown = false;
737                SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
738
739                if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
740                    if (!attr.writable && (mode == BaseTLB::Write ||
741                        storeCheck))
742                        return std::make_shared<GeneralProtection>(0);
743
744                    if (!attr.readable && mode == BaseTLB::Read)
745                        return std::make_shared<GeneralProtection>(0);
746
747                    expandDown = attr.expandDown;
748
749                }
750
751                Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
752                Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
753                // This assumes we're not in 64 bit mode. If we were, the
754                // default address size is 64 bits, overridable to 32.
755                int size = 32;
756                bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
757                SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
758
759                if ((csAttr.defaultSize && sizeOverride) ||
760                    (!csAttr.defaultSize && !sizeOverride)) {
761                    size = 16;
762                }
763
764                Addr offset = bits(vaddr - base, size - 1, 0);
765                Addr endOffset = offset + req->getSize() - 1;
766
767                if (expandDown) {
768                    DPRINTF(GPUTLB, "Checking an expand down segment.\n");
769                    warn_once("Expand down segments are untested.\n");
770
771                    if (offset <= limit || endOffset <= limit)
772                        return std::make_shared<GeneralProtection>(0);
773                } else {
774                    if (offset > limit || endOffset > limit)
775                        return std::make_shared<GeneralProtection>(0);
776                }
777            }
778
779            // If paging is enabled, do the translation.
780            if (m5Reg.paging) {
781                DPRINTF(GPUTLB, "Paging enabled.\n");
782                // The vaddr already has the segment base applied.
783                TlbEntry *entry = lookup(vaddr);
784                localNumTLBAccesses++;
785
786                if (!entry) {
787                    localNumTLBMisses++;
788                    if (timing) {
789                        latency = missLatency1;
790                    }
791
792                    if (FullSystem) {
793                        fatal("GpuTLB doesn't support full-system mode\n");
794                    } else {
795                        DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
796                                "at pc %#x.\n", vaddr, tc->instAddr());
797
798                        Process *p = tc->getProcessPtr();
799                        const EmulationPageTable::Entry *pte =
800                            p->pTable->lookup(vaddr);
801
802                        if (!pte && mode != BaseTLB::Execute) {
803                            // penalize a "page fault" more
804                            if (timing)
805                                latency += missLatency2;
806
807                            if (p->fixupStackFault(vaddr))
808                                pte = p->pTable->lookup(vaddr);
809                        }
810
811                        if (!pte) {
812                            return std::make_shared<PageFault>(vaddr, true,
813                                                               mode, true,
814                                                               false);
815                        } else {
816                            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
817
818                            DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
819                                    alignedVaddr, pte->paddr);
820
821                            TlbEntry gpuEntry(p->pid(), alignedVaddr,
822                                              pte->paddr, false, false);
823                            entry = insert(alignedVaddr, gpuEntry);
824                        }
825
826                        DPRINTF(GPUTLB, "Miss was serviced.\n");
827                    }
828                } else {
829                    localNumTLBHits++;
830
831                    if (timing) {
832                        latency = hitLatency;
833                    }
834                }
835
836                // Do paging protection checks.
837                bool inUser = (m5Reg.cpl == 3 &&
838                               !(flags & (CPL0FlagBit << FlagShift)));
839
840                CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
841                bool badWrite = (!entry->writable && (inUser || cr0.wp));
842
843                if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
844                     badWrite)) {
845                    // The page must have been present to get into the TLB in
846                    // the first place. We'll assume the reserved bits are
847                    // fine even though we're not checking them.
848                    return std::make_shared<PageFault>(vaddr, true, mode,
849                                                       inUser, false);
850                }
851
852                if (storeCheck && badWrite) {
853                    // This would fault if this were a write, so return a page
854                    // fault that reflects that happening.
855                    return std::make_shared<PageFault>(vaddr, true,
856                                                       BaseTLB::Write,
857                                                       inUser, false);
858                }
859
860
861                DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
862                        "checks.\n", entry->paddr);
863
864                int page_size = entry->size();
865                Addr paddr = entry->paddr | (vaddr & (page_size - 1));
866                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
867                req->setPaddr(paddr);
868
869                if (entry->uncacheable)
870                    req->setFlags(Request::UNCACHEABLE);
871            } else {
872                //Use the address which already has segmentation applied.
873                DPRINTF(GPUTLB, "Paging disabled.\n");
874                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
875                req->setPaddr(vaddr);
876            }
877        } else {
878            // Real mode
879            DPRINTF(GPUTLB, "In real mode.\n");
880            DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
881            req->setPaddr(vaddr);
882        }
883
884        // Check for an access to the local APIC
885        if (FullSystem) {
886            LocalApicBase localApicBase =
887                tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
888
889            Addr baseAddr = localApicBase.base * PageBytes;
890            Addr paddr = req->getPaddr();
891
892            if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
893                // Force the access to be uncacheable.
894                req->setFlags(Request::UNCACHEABLE);
895                req->setPaddr(x86LocalAPICAddress(tc->contextId(),
896                                                  paddr - baseAddr));
897            }
898        }
899
900        return NoFault;
901    };
902
903    Fault
904    GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
905                            Mode mode, int &latency)
906    {
907        bool delayedResponse;
908
909        return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
910                                 latency);
911    }
912
913    void
914    GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
915            Translation *translation, Mode mode, int &latency)
916    {
917        bool delayedResponse;
918        assert(translation);
919
920        Fault fault = GpuTLB::translate(req, tc, translation, mode,
921                                        delayedResponse, true, latency);
922
923        if (!delayedResponse)
924            translation->finish(fault, req, tc, mode);
925    }
926
927    Walker*
928    GpuTLB::getWalker()
929    {
930        return walker;
931    }
932
933
934    void
935    GpuTLB::serialize(CheckpointOut &cp) const
936    {
937    }
938
939    void
940    GpuTLB::unserialize(CheckpointIn &cp)
941    {
942    }
943
944    void
945    GpuTLB::regStats()
946    {
947        ClockedObject::regStats();
948
949        localNumTLBAccesses
950            .name(name() + ".local_TLB_accesses")
951            .desc("Number of TLB accesses")
952            ;
953
954        localNumTLBHits
955            .name(name() + ".local_TLB_hits")
956            .desc("Number of TLB hits")
957            ;
958
959        localNumTLBMisses
960            .name(name() + ".local_TLB_misses")
961            .desc("Number of TLB misses")
962            ;
963
964        localTLBMissRate
965            .name(name() + ".local_TLB_miss_rate")
966            .desc("TLB miss rate")
967            ;
968
969        accessCycles
970            .name(name() + ".access_cycles")
971            .desc("Cycles spent accessing this TLB level")
972            ;
973
974        pageTableCycles
975            .name(name() + ".page_table_cycles")
976            .desc("Cycles spent accessing the page table")
977            ;
978
979        localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
980
981        numUniquePages
982            .name(name() + ".unique_pages")
983            .desc("Number of unique pages touched")
984            ;
985
986        localCycles
987            .name(name() + ".local_cycles")
988            .desc("Number of cycles spent in queue for all incoming reqs")
989            ;
990
991        localLatency
992            .name(name() + ".local_latency")
993            .desc("Avg. latency over incoming coalesced reqs")
994            ;
995
996        localLatency = localCycles / localNumTLBAccesses;
997
998        globalNumTLBAccesses
999            .name(name() + ".global_TLB_accesses")
1000            .desc("Number of TLB accesses")
1001            ;
1002
1003        globalNumTLBHits
1004            .name(name() + ".global_TLB_hits")
1005            .desc("Number of TLB hits")
1006            ;
1007
1008        globalNumTLBMisses
1009            .name(name() + ".global_TLB_misses")
1010            .desc("Number of TLB misses")
1011            ;
1012
1013        globalTLBMissRate
1014            .name(name() + ".global_TLB_miss_rate")
1015            .desc("TLB miss rate")
1016            ;
1017
1018        globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1019
1020        avgReuseDistance
1021            .name(name() + ".avg_reuse_distance")
1022            .desc("avg. reuse distance over all pages (in ticks)")
1023            ;
1024
1025    }
1026
1027    /**
1028     * Do the TLB lookup for this coalesced request and schedule
1029     * another event <TLB access latency> cycles later.
1030     */
1031
1032    void
1033    GpuTLB::issueTLBLookup(PacketPtr pkt)
1034    {
1035        assert(pkt);
1036        assert(pkt->senderState);
1037
1038        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1039                                        TheISA::PageBytes);
1040
1041        TranslationState *sender_state =
1042                safe_cast<TranslationState*>(pkt->senderState);
1043
1044        bool update_stats = !sender_state->prefetch;
1045        ThreadContext * tmp_tc = sender_state->tc;
1046
1047        DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1048                virt_page_addr);
1049
1050        int req_cnt = sender_state->reqCnt.back();
1051
1052        if (update_stats) {
1053            accessCycles -= (curTick() * req_cnt);
1054            localCycles -= curTick();
1055            updatePageFootprint(virt_page_addr);
1056            globalNumTLBAccesses += req_cnt;
1057        }
1058
1059        tlbOutcome lookup_outcome = TLB_MISS;
1060        const RequestPtr &tmp_req = pkt->req;
1061
1062        // Access the TLB and figure out if it's a hit or a miss.
1063        bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1064
1065        if (success) {
1066            lookup_outcome = TLB_HIT;
1067            // Put the entry in SenderState
1068            TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1069            assert(entry);
1070
1071            auto p = sender_state->tc->getProcessPtr();
1072            sender_state->tlbEntry =
1073                new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1074                             false, false);
1075
1076            if (update_stats) {
1077                // the reqCnt has an entry per level, so its size tells us
1078                // which level we are in
1079                sender_state->hitLevel = sender_state->reqCnt.size();
1080                globalNumTLBHits += req_cnt;
1081            }
1082        } else {
1083            if (update_stats)
1084                globalNumTLBMisses += req_cnt;
1085        }
1086
1087        /*
1088         * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1089         * as the TLB access latency.
1090         *
1091         * We create and schedule a new TLBEvent which will help us take the
1092         * appropriate actions (e.g., update TLB on a hit, send request to lower
1093         * level TLB on a miss, or start a page walk if this was the last-level
1094         * TLB)
1095         */
1096        TLBEvent *tlb_event =
1097            new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1098
1099        if (translationReturnEvent.count(virt_page_addr)) {
1100            panic("Virtual Page Address %#x already has a return event\n",
1101                  virt_page_addr);
1102        }
1103
1104        translationReturnEvent[virt_page_addr] = tlb_event;
1105        assert(tlb_event);
1106
1107        DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1108                curTick() + this->ticks(hitLatency));
1109
1110        schedule(tlb_event, curTick() + this->ticks(hitLatency));
1111    }
1112
1113    GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1114                               PacketPtr _pkt)
1115        : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1116        outcome(tlb_outcome), pkt(_pkt)
1117    {
1118    }
1119
1120    /**
1121     * Do Paging protection checks. If we encounter a page fault, then
1122     * an assertion is fired.
1123     */
1124    void
1125    GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1126            TlbEntry * tlb_entry, Mode mode)
1127    {
1128        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1129        uint32_t flags = pkt->req->getFlags();
1130        bool storeCheck = flags & (StoreCheck << FlagShift);
1131
1132        // Do paging protection checks.
1133        bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1134        CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1135
1136        bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1137
1138        if ((inUser && !tlb_entry->user) ||
1139            (mode == BaseTLB::Write && badWrite)) {
1140            // The page must have been present to get into the TLB in
1141            // the first place. We'll assume the reserved bits are
1142            // fine even though we're not checking them.
1143            panic("Page fault detected");
1144        }
1145
1146        if (storeCheck && badWrite) {
1147            // This would fault if this were a write, so return a page
1148            // fault that reflects that happening.
1149            panic("Page fault detected");
1150        }
1151    }
1152
1153    /**
1154     * handleTranslationReturn is called on a TLB hit,
1155     * when a TLB miss returns or when a page fault returns.
1156     * The latter calls handelHit with TLB miss as tlbOutcome.
1157     */
1158    void
1159    GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1160            PacketPtr pkt)
1161    {
1162
1163        assert(pkt);
1164        Addr vaddr = pkt->req->getVaddr();
1165
1166        TranslationState *sender_state =
1167            safe_cast<TranslationState*>(pkt->senderState);
1168
1169        ThreadContext *tc = sender_state->tc;
1170        Mode mode = sender_state->tlbMode;
1171
1172        TlbEntry *local_entry, *new_entry;
1173
1174        if (tlb_outcome == TLB_HIT) {
1175            DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1176            local_entry = sender_state->tlbEntry;
1177        } else {
1178            DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1179                    vaddr);
1180
1181            // We are returning either from a page walk or from a hit at a lower
1182            // TLB level. The senderState should be "carrying" a pointer to the
1183            // correct TLBEntry.
1184            new_entry = sender_state->tlbEntry;
1185            assert(new_entry);
1186            local_entry = new_entry;
1187
1188            if (allocationPolicy) {
1189                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1190                        virt_page_addr);
1191
1192                local_entry = insert(virt_page_addr, *new_entry);
1193            }
1194
1195            assert(local_entry);
1196        }
1197
1198        /**
1199         * At this point the packet carries an up-to-date tlbEntry pointer
1200         * in its senderState.
1201         * Next step is to do the paging protection checks.
1202         */
1203        DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
1204                "while paddr was %#x.\n", local_entry->vaddr,
1205                local_entry->paddr);
1206
1207        pagingProtectionChecks(tc, pkt, local_entry, mode);
1208        int page_size = local_entry->size();
1209        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1210        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1211
1212        // Since this packet will be sent through the cpu side slave port,
1213        // it must be converted to a response pkt if it is not one already
1214        if (pkt->isRequest()) {
1215            pkt->makeTimingResponse();
1216        }
1217
1218        pkt->req->setPaddr(paddr);
1219
1220        if (local_entry->uncacheable) {
1221             pkt->req->setFlags(Request::UNCACHEABLE);
1222        }
1223
1224        //send packet back to coalescer
1225        cpuSidePort[0]->sendTimingResp(pkt);
1226        //schedule cleanup event
1227        cleanupQueue.push(virt_page_addr);
1228
1229        // schedule this only once per cycle.
1230        // The check is required because we might have multiple translations
1231        // returning the same cycle
1232        // this is a maximum priority event and must be on the same cycle
1233        // as the cleanup event in TLBCoalescer to avoid a race with
1234        // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1235        if (!cleanupEvent.scheduled())
1236            schedule(cleanupEvent, curTick());
1237    }
1238
1239    /**
1240     * Here we take the appropriate actions based on the result of the
1241     * TLB lookup.
1242     */
1243    void
1244    GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1245                              PacketPtr pkt)
1246    {
1247        DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1248
1249        assert(translationReturnEvent[virtPageAddr]);
1250        assert(pkt);
1251
1252        TranslationState *tmp_sender_state =
1253            safe_cast<TranslationState*>(pkt->senderState);
1254
1255        int req_cnt = tmp_sender_state->reqCnt.back();
1256        bool update_stats = !tmp_sender_state->prefetch;
1257
1258
1259        if (outcome == TLB_HIT) {
1260            handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1261
1262            if (update_stats) {
1263                accessCycles += (req_cnt * curTick());
1264                localCycles += curTick();
1265            }
1266
1267        } else if (outcome == TLB_MISS) {
1268
1269            DPRINTF(GPUTLB, "This is a TLB miss\n");
1270            if (update_stats) {
1271                accessCycles += (req_cnt*curTick());
1272                localCycles += curTick();
1273            }
1274
1275            if (hasMemSidePort) {
1276                // the one cyle added here represent the delay from when we get
1277                // the reply back till when we propagate it to the coalescer
1278                // above.
1279                if (update_stats) {
1280                    accessCycles += (req_cnt * 1);
1281                    localCycles += 1;
1282                }
1283
1284                /**
1285                 * There is a TLB below. Send the coalesced request.
1286                 * We actually send the very first packet of all the
1287                 * pending packets for this virtual page address.
1288                 */
1289                if (!memSidePort[0]->sendTimingReq(pkt)) {
1290                    DPRINTF(GPUTLB, "Failed sending translation request to "
1291                            "lower level TLB for addr %#x\n", virtPageAddr);
1292
1293                    memSidePort[0]->retries.push_back(pkt);
1294                } else {
1295                    DPRINTF(GPUTLB, "Sent translation request to lower level "
1296                            "TLB for addr %#x\n", virtPageAddr);
1297                }
1298            } else {
1299                //this is the last level TLB. Start a page walk
1300                DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1301                        "addr %#x\n", virtPageAddr);
1302
1303                if (update_stats)
1304                    pageTableCycles -= (req_cnt*curTick());
1305
1306                TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1307                assert(tlb_event);
1308                tlb_event->updateOutcome(PAGE_WALK);
1309                schedule(tlb_event, curTick() + ticks(missLatency2));
1310            }
1311        } else if (outcome == PAGE_WALK) {
1312            if (update_stats)
1313                pageTableCycles += (req_cnt*curTick());
1314
1315            // Need to access the page table and update the TLB
1316            DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1317                    virtPageAddr);
1318
1319            TranslationState *sender_state =
1320                safe_cast<TranslationState*>(pkt->senderState);
1321
1322            Process *p = sender_state->tc->getProcessPtr();
1323            Addr vaddr = pkt->req->getVaddr();
1324    #ifndef NDEBUG
1325            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1326            assert(alignedVaddr == virtPageAddr);
1327    #endif
1328            const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1329            if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1330                    p->fixupStackFault(vaddr)) {
1331                pte = p->pTable->lookup(vaddr);
1332            }
1333
1334            if (pte) {
1335                DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1336                        pte->paddr);
1337
1338                sender_state->tlbEntry =
1339                    new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1340                                 false);
1341            } else {
1342                sender_state->tlbEntry = nullptr;
1343            }
1344
1345            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1346        } else if (outcome == MISS_RETURN) {
1347            /** we add an extra cycle in the return path of the translation
1348             * requests in between the various TLB levels.
1349             */
1350            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1351        } else {
1352            panic("Unexpected TLB outcome %d", outcome);
1353        }
1354    }
1355
1356    void
1357    GpuTLB::TLBEvent::process()
1358    {
1359        tlb->translationReturn(virtPageAddr, outcome, pkt);
1360    }
1361
1362    const char*
1363    GpuTLB::TLBEvent::description() const
1364    {
1365        return "trigger translationDoneEvent";
1366    }
1367
1368    void
1369    GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1370    {
1371        outcome = _outcome;
1372    }
1373
1374    Addr
1375    GpuTLB::TLBEvent::getTLBEventVaddr()
1376    {
1377        return virtPageAddr;
1378    }
1379
1380    /*
1381     * recvTiming receives a coalesced timing request from a TLBCoalescer
1382     * and it calls issueTLBLookup()
1383     * It only rejects the packet if we have exceeded the max
1384     * outstanding number of requests for the TLB
1385     */
1386    bool
1387    GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1388    {
1389        if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1390            tlb->issueTLBLookup(pkt);
1391            // update number of outstanding translation requests
1392            tlb->outstandingReqs++;
1393            return true;
1394         } else {
1395            DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1396                    tlb->outstandingReqs);
1397            return false;
1398         }
1399    }
1400
1401    /**
1402     * handleFuncTranslationReturn is called on a TLB hit,
1403     * when a TLB miss returns or when a page fault returns.
1404     * It updates LRU, inserts the TLB entry on a miss
1405     * depending on the allocation policy and does the required
1406     * protection checks. It does NOT create a new packet to
1407     * update the packet's addr; this is done in hsail-gpu code.
1408     */
1409    void
1410    GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1411    {
1412        TranslationState *sender_state =
1413            safe_cast<TranslationState*>(pkt->senderState);
1414
1415        ThreadContext *tc = sender_state->tc;
1416        Mode mode = sender_state->tlbMode;
1417        Addr vaddr = pkt->req->getVaddr();
1418
1419        TlbEntry *local_entry, *new_entry;
1420
1421        if (tlb_outcome == TLB_HIT) {
1422            DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1423                    "%#x\n", vaddr);
1424
1425            local_entry = sender_state->tlbEntry;
1426        } else {
1427            DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1428                    "%#x\n", vaddr);
1429
1430            // We are returning either from a page walk or from a hit at a lower
1431            // TLB level. The senderState should be "carrying" a pointer to the
1432            // correct TLBEntry.
1433            new_entry = sender_state->tlbEntry;
1434            assert(new_entry);
1435            local_entry = new_entry;
1436
1437            if (allocationPolicy) {
1438                Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1439
1440                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1441                        virt_page_addr);
1442
1443                local_entry = insert(virt_page_addr, *new_entry);
1444            }
1445
1446            assert(local_entry);
1447        }
1448
1449        DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1450                "while paddr was %#x.\n", local_entry->vaddr,
1451                local_entry->paddr);
1452
1453        /**
1454         * Do paging checks if it's a normal functional access.  If it's for a
1455         * prefetch, then sometimes you can try to prefetch something that
1456         * won't pass protection. We don't actually want to fault becuase there
1457         * is no demand access to deem this a violation.  Just put it in the
1458         * TLB and it will fault if indeed a future demand access touches it in
1459         * violation.
1460         *
1461         * This feature could be used to explore security issues around
1462         * speculative memory accesses.
1463         */
1464        if (!sender_state->prefetch && sender_state->tlbEntry)
1465            pagingProtectionChecks(tc, pkt, local_entry, mode);
1466
1467        int page_size = local_entry->size();
1468        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1469        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1470
1471        pkt->req->setPaddr(paddr);
1472
1473        if (local_entry->uncacheable)
1474             pkt->req->setFlags(Request::UNCACHEABLE);
1475    }
1476
1477    // This is used for atomic translations. Need to
1478    // make it all happen during the same cycle.
1479    void
1480    GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1481    {
1482        TranslationState *sender_state =
1483            safe_cast<TranslationState*>(pkt->senderState);
1484
1485        ThreadContext *tc = sender_state->tc;
1486        bool update_stats = !sender_state->prefetch;
1487
1488        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1489                                        TheISA::PageBytes);
1490
1491        if (update_stats)
1492            tlb->updatePageFootprint(virt_page_addr);
1493
1494        // do the TLB lookup without updating the stats
1495        bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1496        tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1497
1498        // functional mode means no coalescing
1499        // global metrics are the same as the local metrics
1500        if (update_stats) {
1501            tlb->globalNumTLBAccesses++;
1502
1503            if (success) {
1504                sender_state->hitLevel = sender_state->reqCnt.size();
1505                tlb->globalNumTLBHits++;
1506            }
1507        }
1508
1509        if (!success) {
1510            if (update_stats)
1511                tlb->globalNumTLBMisses++;
1512            if (tlb->hasMemSidePort) {
1513                // there is a TLB below -> propagate down the TLB hierarchy
1514                tlb->memSidePort[0]->sendFunctional(pkt);
1515                // If no valid translation from a prefetch, then just return
1516                if (sender_state->prefetch && !pkt->req->hasPaddr())
1517                    return;
1518            } else {
1519                // Need to access the page table and update the TLB
1520                DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1521                        virt_page_addr);
1522
1523                Process *p = tc->getProcessPtr();
1524
1525                Addr vaddr = pkt->req->getVaddr();
1526    #ifndef NDEBUG
1527                Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1528                assert(alignedVaddr == virt_page_addr);
1529    #endif
1530
1531                const EmulationPageTable::Entry *pte =
1532                        p->pTable->lookup(vaddr);
1533                if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1534                        p->fixupStackFault(vaddr)) {
1535                    pte = p->pTable->lookup(vaddr);
1536                }
1537
1538                if (!sender_state->prefetch) {
1539                    // no PageFaults are permitted after
1540                    // the second page table lookup
1541                    assert(pte);
1542
1543                    DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1544                            pte->paddr);
1545
1546                    sender_state->tlbEntry =
1547                        new TlbEntry(p->pid(), virt_page_addr,
1548                                     pte->paddr, false, false);
1549                } else {
1550                    // If this was a prefetch, then do the normal thing if it
1551                    // was a successful translation.  Otherwise, send an empty
1552                    // TLB entry back so that it can be figured out as empty and
1553                    // handled accordingly.
1554                    if (pte) {
1555                        DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1556                                pte->paddr);
1557
1558                        sender_state->tlbEntry =
1559                            new TlbEntry(p->pid(), virt_page_addr,
1560                                         pte->paddr, false, false);
1561                    } else {
1562                        DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1563                                alignedVaddr);
1564
1565                        sender_state->tlbEntry = nullptr;
1566
1567                        return;
1568                    }
1569                }
1570            }
1571        } else {
1572            DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1573                    tlb->lookup(pkt->req->getVaddr()));
1574
1575            TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1576                                             update_stats);
1577
1578            assert(entry);
1579
1580            auto p = sender_state->tc->getProcessPtr();
1581            sender_state->tlbEntry =
1582                new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1583                             false, false);
1584        }
1585        // This is the function that would populate pkt->req with the paddr of
1586        // the translation. But if no translation happens (i.e Prefetch fails)
1587        // then the early returns in the above code wiill keep this function
1588        // from executing.
1589        tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1590    }
1591
1592    void
1593    GpuTLB::CpuSidePort::recvReqRetry()
1594    {
1595        // The CPUSidePort never sends anything but replies. No retries
1596        // expected.
1597        panic("recvReqRetry called");
1598    }
1599
1600    AddrRangeList
1601    GpuTLB::CpuSidePort::getAddrRanges() const
1602    {
1603        // currently not checked by the master
1604        AddrRangeList ranges;
1605
1606        return ranges;
1607    }
1608
1609    /**
1610     * MemSidePort receives the packet back.
1611     * We need to call the handleTranslationReturn
1612     * and propagate up the hierarchy.
1613     */
1614    bool
1615    GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1616    {
1617        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1618                                        TheISA::PageBytes);
1619
1620        DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1621                virt_page_addr);
1622
1623        TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1624        assert(tlb_event);
1625        assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1626
1627        tlb_event->updateOutcome(MISS_RETURN);
1628        tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1629
1630        return true;
1631    }
1632
1633    void
1634    GpuTLB::MemSidePort::recvReqRetry()
1635    {
1636        // No retries should reach the TLB. The retries
1637        // should only reach the TLBCoalescer.
1638        panic("recvReqRetry called");
1639    }
1640
1641    void
1642    GpuTLB::cleanup()
1643    {
1644        while (!cleanupQueue.empty()) {
1645            Addr cleanup_addr = cleanupQueue.front();
1646            cleanupQueue.pop();
1647
1648            // delete TLBEvent
1649            TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1650            delete old_tlb_event;
1651            translationReturnEvent.erase(cleanup_addr);
1652
1653            // update number of outstanding requests
1654            outstandingReqs--;
1655        }
1656
1657        /** the higher level coalescer should retry if it has
1658         * any pending requests.
1659         */
1660        for (int i = 0; i < cpuSidePort.size(); ++i) {
1661            cpuSidePort[i]->sendRetryReq();
1662        }
1663    }
1664
1665    void
1666    GpuTLB::updatePageFootprint(Addr virt_page_addr)
1667    {
1668
1669        std::pair<AccessPatternTable::iterator, bool> ret;
1670
1671        AccessInfo tmp_access_info;
1672        tmp_access_info.lastTimeAccessed = 0;
1673        tmp_access_info.accessesPerPage = 0;
1674        tmp_access_info.totalReuseDistance = 0;
1675        tmp_access_info.sumDistance = 0;
1676        tmp_access_info.meanDistance = 0;
1677
1678        ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1679                                  tmp_access_info));
1680
1681        bool first_page_access = ret.second;
1682
1683        if (first_page_access) {
1684            numUniquePages++;
1685        } else  {
1686            int accessed_before;
1687            accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1688            ret.first->second.totalReuseDistance += accessed_before;
1689        }
1690
1691        ret.first->second.accessesPerPage++;
1692        ret.first->second.lastTimeAccessed = curTick();
1693
1694        if (accessDistance) {
1695            ret.first->second.localTLBAccesses
1696                .push_back(localNumTLBAccesses.value());
1697        }
1698    }
1699
1700    void
1701    GpuTLB::exitCallback()
1702    {
1703        std::ostream *page_stat_file = nullptr;
1704
1705        if (accessDistance) {
1706
1707            // print per page statistics to a separate file (.csv format)
1708            // simout is the gem5 output directory (default is m5out or the one
1709            // specified with -d
1710            page_stat_file = simout.create(name().c_str())->stream();
1711
1712            // print header
1713            *page_stat_file << "page,max_access_distance,mean_access_distance, "
1714                            << "stddev_distance" << std::endl;
1715        }
1716
1717        // update avg. reuse distance footprint
1718        AccessPatternTable::iterator iter, iter_begin, iter_end;
1719        unsigned int sum_avg_reuse_distance_per_page = 0;
1720
1721        // iterate through all pages seen by this TLB
1722        for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1723            sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1724                                               iter->second.accessesPerPage;
1725
1726            if (accessDistance) {
1727                unsigned int tmp = iter->second.localTLBAccesses[0];
1728                unsigned int prev = tmp;
1729
1730                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1731                    if (i) {
1732                        tmp = prev + 1;
1733                    }
1734
1735                    prev = iter->second.localTLBAccesses[i];
1736                    // update the localTLBAccesses value
1737                    // with the actual differece
1738                    iter->second.localTLBAccesses[i] -= tmp;
1739                    // compute the sum of AccessDistance per page
1740                    // used later for mean
1741                    iter->second.sumDistance +=
1742                        iter->second.localTLBAccesses[i];
1743                }
1744
1745                iter->second.meanDistance =
1746                    iter->second.sumDistance / iter->second.accessesPerPage;
1747
1748                // compute std_dev and max  (we need a second round because we
1749                // need to know the mean value
1750                unsigned int max_distance = 0;
1751                unsigned int stddev_distance = 0;
1752
1753                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1754                    unsigned int tmp_access_distance =
1755                        iter->second.localTLBAccesses[i];
1756
1757                    if (tmp_access_distance > max_distance) {
1758                        max_distance = tmp_access_distance;
1759                    }
1760
1761                    unsigned int diff =
1762                        tmp_access_distance - iter->second.meanDistance;
1763                    stddev_distance += pow(diff, 2);
1764
1765                }
1766
1767                stddev_distance =
1768                    sqrt(stddev_distance/iter->second.accessesPerPage);
1769
1770                if (page_stat_file) {
1771                    *page_stat_file << std::hex << iter->first << ",";
1772                    *page_stat_file << std::dec << max_distance << ",";
1773                    *page_stat_file << std::dec << iter->second.meanDistance
1774                                    << ",";
1775                    *page_stat_file << std::dec << stddev_distance;
1776                    *page_stat_file << std::endl;
1777                }
1778
1779                // erase the localTLBAccesses array
1780                iter->second.localTLBAccesses.clear();
1781            }
1782        }
1783
1784        if (!TLBFootprint.empty()) {
1785            avgReuseDistance =
1786                sum_avg_reuse_distance_per_page / TLBFootprint.size();
1787        }
1788
1789        //clear the TLBFootprint map
1790        TLBFootprint.clear();
1791    }
1792} // namespace X86ISA
1793
1794X86ISA::GpuTLB*
1795X86GPUTLBParams::create()
1796{
1797    return new X86ISA::GpuTLB(this);
1798}
1799
1800