gpu_tlb.cc revision 11704
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/output.hh"
49#include "base/trace.hh"
50#include "cpu/base.hh"
51#include "cpu/thread_context.hh"
52#include "debug/GPUPrefetch.hh"
53#include "debug/GPUTLB.hh"
54#include "mem/packet_access.hh"
55#include "mem/page_table.hh"
56#include "mem/request.hh"
57#include "sim/process.hh"
58
59namespace X86ISA
60{
61
62    GpuTLB::GpuTLB(const Params *p)
63        : MemObject(p), configAddress(0), size(p->size),
64          cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this)
65    {
66        assoc = p->assoc;
67        assert(assoc <= size);
68        numSets = size/assoc;
69        allocationPolicy = p->allocationPolicy;
70        hasMemSidePort = false;
71        accessDistance = p->accessDistance;
72        clock = p->clk_domain->clockPeriod();
73
74        tlb.assign(size, GpuTlbEntry());
75
76        freeList.resize(numSets);
77        entryList.resize(numSets);
78
79        for (int set = 0; set < numSets; ++set) {
80            for (int way = 0; way < assoc; ++way) {
81                int x = set * assoc + way;
82                freeList[set].push_back(&tlb.at(x));
83            }
84        }
85
86        FA = (size == assoc);
87
88        /**
89         * @warning: the set-associative version assumes you have a
90         * fixed page size of 4KB.
91         * If the page size is greather than 4KB (as defined in the
92         * TheISA::PageBytes), then there are various issues w/ the current
93         * implementation (you'd have the same 8KB page being replicated in
94         * different sets etc)
95         */
96        setMask = numSets - 1;
97
98    #if 0
99        // GpuTLB doesn't yet support full system
100        walker = p->walker;
101        walker->setTLB(this);
102    #endif
103
104        maxCoalescedReqs = p->maxOutstandingReqs;
105
106        // Do not allow maxCoalescedReqs to be more than the TLB associativity
107        if (maxCoalescedReqs > assoc) {
108            maxCoalescedReqs = assoc;
109            cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
110        }
111
112        outstandingReqs = 0;
113        hitLatency = p->hitLatency;
114        missLatency1 = p->missLatency1;
115        missLatency2 = p->missLatency2;
116
117        // create the slave ports based on the number of connected ports
118        for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
119            cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
120                                  name(), i), this, i));
121        }
122
123        // create the master ports based on the number of connected ports
124        for (size_t i = 0; i < p->port_master_connection_count; ++i) {
125            memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
126                                  name(), i), this, i));
127        }
128    }
129
130    // fixme: this is never called?
131    GpuTLB::~GpuTLB()
132    {
133        // make sure all the hash-maps are empty
134        assert(translationReturnEvent.empty());
135    }
136
137    BaseSlavePort&
138    GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
139    {
140        if (if_name == "slave") {
141            if (idx >= static_cast<PortID>(cpuSidePort.size())) {
142                panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
143            }
144
145            return *cpuSidePort[idx];
146        } else {
147            panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
148        }
149    }
150
151    BaseMasterPort&
152    GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
153    {
154        if (if_name == "master") {
155            if (idx >= static_cast<PortID>(memSidePort.size())) {
156                panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
157            }
158
159            hasMemSidePort = true;
160
161            return *memSidePort[idx];
162        } else {
163            panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
164        }
165    }
166
167    GpuTlbEntry*
168    GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)
169    {
170        GpuTlbEntry *newEntry = nullptr;
171
172        /**
173         * vpn holds the virtual page address
174         * The least significant bits are simply masked
175         */
176        int set = (vpn >> TheISA::PageShift) & setMask;
177
178        if (!freeList[set].empty()) {
179            newEntry = freeList[set].front();
180            freeList[set].pop_front();
181        } else {
182            newEntry = entryList[set].back();
183            entryList[set].pop_back();
184        }
185
186        *newEntry = entry;
187        newEntry->vaddr = vpn;
188        entryList[set].push_front(newEntry);
189
190        return newEntry;
191    }
192
193    GpuTLB::EntryList::iterator
194    GpuTLB::lookupIt(Addr va, bool update_lru)
195    {
196        int set = (va >> TheISA::PageShift) & setMask;
197
198        if (FA) {
199            assert(!set);
200        }
201
202        auto entry = entryList[set].begin();
203        for (; entry != entryList[set].end(); ++entry) {
204            int page_size = (*entry)->size();
205
206            if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
207                DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
208                        "with size %#x.\n", va, (*entry)->vaddr, page_size);
209
210                if (update_lru) {
211                    entryList[set].push_front(*entry);
212                    entryList[set].erase(entry);
213                    entry = entryList[set].begin();
214                }
215
216                break;
217            }
218        }
219
220        return entry;
221    }
222
223    GpuTlbEntry*
224    GpuTLB::lookup(Addr va, bool update_lru)
225    {
226        int set = (va >> TheISA::PageShift) & setMask;
227
228        auto entry = lookupIt(va, update_lru);
229
230        if (entry == entryList[set].end())
231            return nullptr;
232        else
233            return *entry;
234    }
235
236    void
237    GpuTLB::invalidateAll()
238    {
239        DPRINTF(GPUTLB, "Invalidating all entries.\n");
240
241        for (int i = 0; i < numSets; ++i) {
242            while (!entryList[i].empty()) {
243                GpuTlbEntry *entry = entryList[i].front();
244                entryList[i].pop_front();
245                freeList[i].push_back(entry);
246            }
247        }
248    }
249
250    void
251    GpuTLB::setConfigAddress(uint32_t addr)
252    {
253        configAddress = addr;
254    }
255
256    void
257    GpuTLB::invalidateNonGlobal()
258    {
259        DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
260
261        for (int i = 0; i < numSets; ++i) {
262            for (auto entryIt = entryList[i].begin();
263                 entryIt != entryList[i].end();) {
264                if (!(*entryIt)->global) {
265                    freeList[i].push_back(*entryIt);
266                    entryList[i].erase(entryIt++);
267                } else {
268                    ++entryIt;
269                }
270            }
271        }
272    }
273
274    void
275    GpuTLB::demapPage(Addr va, uint64_t asn)
276    {
277
278        int set = (va >> TheISA::PageShift) & setMask;
279        auto entry = lookupIt(va, false);
280
281        if (entry != entryList[set].end()) {
282            freeList[set].push_back(*entry);
283            entryList[set].erase(entry);
284        }
285    }
286
287    Fault
288    GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)
289    {
290        DPRINTF(GPUTLB, "Addresses references internal memory.\n");
291        Addr vaddr = req->getVaddr();
292        Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
293
294        if (prefix == IntAddrPrefixCPUID) {
295            panic("CPUID memory space not yet implemented!\n");
296        } else if (prefix == IntAddrPrefixMSR) {
297            vaddr = vaddr >> 3;
298            req->setFlags(Request::MMAPPED_IPR);
299            Addr regNum = 0;
300
301            switch (vaddr & ~IntAddrPrefixMask) {
302              case 0x10:
303                regNum = MISCREG_TSC;
304                break;
305              case 0x1B:
306                regNum = MISCREG_APIC_BASE;
307                break;
308              case 0xFE:
309                regNum = MISCREG_MTRRCAP;
310                break;
311              case 0x174:
312                regNum = MISCREG_SYSENTER_CS;
313                break;
314              case 0x175:
315                regNum = MISCREG_SYSENTER_ESP;
316                break;
317              case 0x176:
318                regNum = MISCREG_SYSENTER_EIP;
319                break;
320              case 0x179:
321                regNum = MISCREG_MCG_CAP;
322                break;
323              case 0x17A:
324                regNum = MISCREG_MCG_STATUS;
325                break;
326              case 0x17B:
327                regNum = MISCREG_MCG_CTL;
328                break;
329              case 0x1D9:
330                regNum = MISCREG_DEBUG_CTL_MSR;
331                break;
332              case 0x1DB:
333                regNum = MISCREG_LAST_BRANCH_FROM_IP;
334                break;
335              case 0x1DC:
336                regNum = MISCREG_LAST_BRANCH_TO_IP;
337                break;
338              case 0x1DD:
339                regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
340                break;
341              case 0x1DE:
342                regNum = MISCREG_LAST_EXCEPTION_TO_IP;
343                break;
344              case 0x200:
345                regNum = MISCREG_MTRR_PHYS_BASE_0;
346                break;
347              case 0x201:
348                regNum = MISCREG_MTRR_PHYS_MASK_0;
349                break;
350              case 0x202:
351                regNum = MISCREG_MTRR_PHYS_BASE_1;
352                break;
353              case 0x203:
354                regNum = MISCREG_MTRR_PHYS_MASK_1;
355                break;
356              case 0x204:
357                regNum = MISCREG_MTRR_PHYS_BASE_2;
358                break;
359              case 0x205:
360                regNum = MISCREG_MTRR_PHYS_MASK_2;
361                break;
362              case 0x206:
363                regNum = MISCREG_MTRR_PHYS_BASE_3;
364                break;
365              case 0x207:
366                regNum = MISCREG_MTRR_PHYS_MASK_3;
367                break;
368              case 0x208:
369                regNum = MISCREG_MTRR_PHYS_BASE_4;
370                break;
371              case 0x209:
372                regNum = MISCREG_MTRR_PHYS_MASK_4;
373                break;
374              case 0x20A:
375                regNum = MISCREG_MTRR_PHYS_BASE_5;
376                break;
377              case 0x20B:
378                regNum = MISCREG_MTRR_PHYS_MASK_5;
379                break;
380              case 0x20C:
381                regNum = MISCREG_MTRR_PHYS_BASE_6;
382                break;
383              case 0x20D:
384                regNum = MISCREG_MTRR_PHYS_MASK_6;
385                break;
386              case 0x20E:
387                regNum = MISCREG_MTRR_PHYS_BASE_7;
388                break;
389              case 0x20F:
390                regNum = MISCREG_MTRR_PHYS_MASK_7;
391                break;
392              case 0x250:
393                regNum = MISCREG_MTRR_FIX_64K_00000;
394                break;
395              case 0x258:
396                regNum = MISCREG_MTRR_FIX_16K_80000;
397                break;
398              case 0x259:
399                regNum = MISCREG_MTRR_FIX_16K_A0000;
400                break;
401              case 0x268:
402                regNum = MISCREG_MTRR_FIX_4K_C0000;
403                break;
404              case 0x269:
405                regNum = MISCREG_MTRR_FIX_4K_C8000;
406                break;
407              case 0x26A:
408                regNum = MISCREG_MTRR_FIX_4K_D0000;
409                break;
410              case 0x26B:
411                regNum = MISCREG_MTRR_FIX_4K_D8000;
412                break;
413              case 0x26C:
414                regNum = MISCREG_MTRR_FIX_4K_E0000;
415                break;
416              case 0x26D:
417                regNum = MISCREG_MTRR_FIX_4K_E8000;
418                break;
419              case 0x26E:
420                regNum = MISCREG_MTRR_FIX_4K_F0000;
421                break;
422              case 0x26F:
423                regNum = MISCREG_MTRR_FIX_4K_F8000;
424                break;
425              case 0x277:
426                regNum = MISCREG_PAT;
427                break;
428              case 0x2FF:
429                regNum = MISCREG_DEF_TYPE;
430                break;
431              case 0x400:
432                regNum = MISCREG_MC0_CTL;
433                break;
434              case 0x404:
435                regNum = MISCREG_MC1_CTL;
436                break;
437              case 0x408:
438                regNum = MISCREG_MC2_CTL;
439                break;
440              case 0x40C:
441                regNum = MISCREG_MC3_CTL;
442                break;
443              case 0x410:
444                regNum = MISCREG_MC4_CTL;
445                break;
446              case 0x414:
447                regNum = MISCREG_MC5_CTL;
448                break;
449              case 0x418:
450                regNum = MISCREG_MC6_CTL;
451                break;
452              case 0x41C:
453                regNum = MISCREG_MC7_CTL;
454                break;
455              case 0x401:
456                regNum = MISCREG_MC0_STATUS;
457                break;
458              case 0x405:
459                regNum = MISCREG_MC1_STATUS;
460                break;
461              case 0x409:
462                regNum = MISCREG_MC2_STATUS;
463                break;
464              case 0x40D:
465                regNum = MISCREG_MC3_STATUS;
466                break;
467              case 0x411:
468                regNum = MISCREG_MC4_STATUS;
469                break;
470              case 0x415:
471                regNum = MISCREG_MC5_STATUS;
472                break;
473              case 0x419:
474                regNum = MISCREG_MC6_STATUS;
475                break;
476              case 0x41D:
477                regNum = MISCREG_MC7_STATUS;
478                break;
479              case 0x402:
480                regNum = MISCREG_MC0_ADDR;
481                break;
482              case 0x406:
483                regNum = MISCREG_MC1_ADDR;
484                break;
485              case 0x40A:
486                regNum = MISCREG_MC2_ADDR;
487                break;
488              case 0x40E:
489                regNum = MISCREG_MC3_ADDR;
490                break;
491              case 0x412:
492                regNum = MISCREG_MC4_ADDR;
493                break;
494              case 0x416:
495                regNum = MISCREG_MC5_ADDR;
496                break;
497              case 0x41A:
498                regNum = MISCREG_MC6_ADDR;
499                break;
500              case 0x41E:
501                regNum = MISCREG_MC7_ADDR;
502                break;
503              case 0x403:
504                regNum = MISCREG_MC0_MISC;
505                break;
506              case 0x407:
507                regNum = MISCREG_MC1_MISC;
508                break;
509              case 0x40B:
510                regNum = MISCREG_MC2_MISC;
511                break;
512              case 0x40F:
513                regNum = MISCREG_MC3_MISC;
514                break;
515              case 0x413:
516                regNum = MISCREG_MC4_MISC;
517                break;
518              case 0x417:
519                regNum = MISCREG_MC5_MISC;
520                break;
521              case 0x41B:
522                regNum = MISCREG_MC6_MISC;
523                break;
524              case 0x41F:
525                regNum = MISCREG_MC7_MISC;
526                break;
527              case 0xC0000080:
528                regNum = MISCREG_EFER;
529                break;
530              case 0xC0000081:
531                regNum = MISCREG_STAR;
532                break;
533              case 0xC0000082:
534                regNum = MISCREG_LSTAR;
535                break;
536              case 0xC0000083:
537                regNum = MISCREG_CSTAR;
538                break;
539              case 0xC0000084:
540                regNum = MISCREG_SF_MASK;
541                break;
542              case 0xC0000100:
543                regNum = MISCREG_FS_BASE;
544                break;
545              case 0xC0000101:
546                regNum = MISCREG_GS_BASE;
547                break;
548              case 0xC0000102:
549                regNum = MISCREG_KERNEL_GS_BASE;
550                break;
551              case 0xC0000103:
552                regNum = MISCREG_TSC_AUX;
553                break;
554              case 0xC0010000:
555                regNum = MISCREG_PERF_EVT_SEL0;
556                break;
557              case 0xC0010001:
558                regNum = MISCREG_PERF_EVT_SEL1;
559                break;
560              case 0xC0010002:
561                regNum = MISCREG_PERF_EVT_SEL2;
562                break;
563              case 0xC0010003:
564                regNum = MISCREG_PERF_EVT_SEL3;
565                break;
566              case 0xC0010004:
567                regNum = MISCREG_PERF_EVT_CTR0;
568                break;
569              case 0xC0010005:
570                regNum = MISCREG_PERF_EVT_CTR1;
571                break;
572              case 0xC0010006:
573                regNum = MISCREG_PERF_EVT_CTR2;
574                break;
575              case 0xC0010007:
576                regNum = MISCREG_PERF_EVT_CTR3;
577                break;
578              case 0xC0010010:
579                regNum = MISCREG_SYSCFG;
580                break;
581              case 0xC0010016:
582                regNum = MISCREG_IORR_BASE0;
583                break;
584              case 0xC0010017:
585                regNum = MISCREG_IORR_BASE1;
586                break;
587              case 0xC0010018:
588                regNum = MISCREG_IORR_MASK0;
589                break;
590              case 0xC0010019:
591                regNum = MISCREG_IORR_MASK1;
592                break;
593              case 0xC001001A:
594                regNum = MISCREG_TOP_MEM;
595                break;
596              case 0xC001001D:
597                regNum = MISCREG_TOP_MEM2;
598                break;
599              case 0xC0010114:
600                regNum = MISCREG_VM_CR;
601                break;
602              case 0xC0010115:
603                regNum = MISCREG_IGNNE;
604                break;
605              case 0xC0010116:
606                regNum = MISCREG_SMM_CTL;
607                break;
608              case 0xC0010117:
609                regNum = MISCREG_VM_HSAVE_PA;
610                break;
611              default:
612                return std::make_shared<GeneralProtection>(0);
613            }
614            //The index is multiplied by the size of a MiscReg so that
615            //any memory dependence calculations will not see these as
616            //overlapping.
617            req->setPaddr(regNum * sizeof(MiscReg));
618            return NoFault;
619        } else if (prefix == IntAddrPrefixIO) {
620            // TODO If CPL > IOPL or in virtual mode, check the I/O permission
621            // bitmap in the TSS.
622
623            Addr IOPort = vaddr & ~IntAddrPrefixMask;
624            // Make sure the address fits in the expected 16 bit IO address
625            // space.
626            assert(!(IOPort & ~0xFFFF));
627
628            if (IOPort == 0xCF8 && req->getSize() == 4) {
629                req->setFlags(Request::MMAPPED_IPR);
630                req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
631            } else if ((IOPort & ~mask(2)) == 0xCFC) {
632                req->setFlags(Request::UNCACHEABLE);
633
634                Addr configAddress =
635                    tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
636
637                if (bits(configAddress, 31, 31)) {
638                    req->setPaddr(PhysAddrPrefixPciConfig |
639                                  mbits(configAddress, 30, 2) |
640                                  (IOPort & mask(2)));
641                } else {
642                    req->setPaddr(PhysAddrPrefixIO | IOPort);
643                }
644            } else {
645                req->setFlags(Request::UNCACHEABLE);
646                req->setPaddr(PhysAddrPrefixIO | IOPort);
647            }
648            return NoFault;
649        } else {
650            panic("Access to unrecognized internal address space %#x.\n",
651                  prefix);
652        }
653    }
654
655    /**
656     * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
657     * and false on a TLB miss.
658     * Many of the checks about different modes have been converted to
659     * assertions, since these parts of the code are not really used.
660     * On a hit it will update the LRU stack.
661     */
662    bool
663    GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
664    {
665        bool tlb_hit = false;
666    #ifndef NDEBUG
667        uint32_t flags = req->getFlags();
668        int seg = flags & SegmentFlagMask;
669    #endif
670
671        assert(seg != SEGMENT_REG_MS);
672        Addr vaddr = req->getVaddr();
673        DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
674        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
675
676        if (m5Reg.prot) {
677            DPRINTF(GPUTLB, "In protected mode.\n");
678            // make sure we are in 64-bit mode
679            assert(m5Reg.mode == LongMode);
680
681            // If paging is enabled, do the translation.
682            if (m5Reg.paging) {
683                DPRINTF(GPUTLB, "Paging enabled.\n");
684                //update LRU stack on a hit
685                GpuTlbEntry *entry = lookup(vaddr, true);
686
687                if (entry)
688                    tlb_hit = true;
689
690                if (!update_stats) {
691                    // functional tlb access for memory initialization
692                    // i.e., memory seeding or instr. seeding -> don't update
693                    // TLB and stats
694                    return tlb_hit;
695                }
696
697                localNumTLBAccesses++;
698
699                if (!entry) {
700                    localNumTLBMisses++;
701                } else {
702                    localNumTLBHits++;
703                }
704            }
705        }
706
707        return tlb_hit;
708    }
709
710    Fault
711    GpuTLB::translate(RequestPtr req, ThreadContext *tc,
712                      Translation *translation, Mode mode,
713                      bool &delayedResponse, bool timing, int &latency)
714    {
715        uint32_t flags = req->getFlags();
716        int seg = flags & SegmentFlagMask;
717        bool storeCheck = flags & (StoreCheck << FlagShift);
718
719        // If this is true, we're dealing with a request
720        // to a non-memory address space.
721        if (seg == SEGMENT_REG_MS) {
722            return translateInt(req, tc);
723        }
724
725        delayedResponse = false;
726        Addr vaddr = req->getVaddr();
727        DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
728
729        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
730
731        // If protected mode has been enabled...
732        if (m5Reg.prot) {
733            DPRINTF(GPUTLB, "In protected mode.\n");
734            // If we're not in 64-bit mode, do protection/limit checks
735            if (m5Reg.mode != LongMode) {
736                DPRINTF(GPUTLB, "Not in long mode. Checking segment "
737                        "protection.\n");
738
739                // Check for a null segment selector.
740                if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
741                    seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
742                    && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
743                    return std::make_shared<GeneralProtection>(0);
744                }
745
746                bool expandDown = false;
747                SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
748
749                if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
750                    if (!attr.writable && (mode == BaseTLB::Write ||
751                        storeCheck))
752                        return std::make_shared<GeneralProtection>(0);
753
754                    if (!attr.readable && mode == BaseTLB::Read)
755                        return std::make_shared<GeneralProtection>(0);
756
757                    expandDown = attr.expandDown;
758
759                }
760
761                Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
762                Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
763                // This assumes we're not in 64 bit mode. If we were, the
764                // default address size is 64 bits, overridable to 32.
765                int size = 32;
766                bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
767                SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
768
769                if ((csAttr.defaultSize && sizeOverride) ||
770                    (!csAttr.defaultSize && !sizeOverride)) {
771                    size = 16;
772                }
773
774                Addr offset = bits(vaddr - base, size - 1, 0);
775                Addr endOffset = offset + req->getSize() - 1;
776
777                if (expandDown) {
778                    DPRINTF(GPUTLB, "Checking an expand down segment.\n");
779                    warn_once("Expand down segments are untested.\n");
780
781                    if (offset <= limit || endOffset <= limit)
782                        return std::make_shared<GeneralProtection>(0);
783                } else {
784                    if (offset > limit || endOffset > limit)
785                        return std::make_shared<GeneralProtection>(0);
786                }
787            }
788
789            // If paging is enabled, do the translation.
790            if (m5Reg.paging) {
791                DPRINTF(GPUTLB, "Paging enabled.\n");
792                // The vaddr already has the segment base applied.
793                GpuTlbEntry *entry = lookup(vaddr);
794                localNumTLBAccesses++;
795
796                if (!entry) {
797                    localNumTLBMisses++;
798                    if (timing) {
799                        latency = missLatency1;
800                    }
801
802                    if (FullSystem) {
803                        fatal("GpuTLB doesn't support full-system mode\n");
804                    } else {
805                        DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
806                                "at pc %#x.\n", vaddr, tc->instAddr());
807
808                        Process *p = tc->getProcessPtr();
809                        GpuTlbEntry newEntry;
810                        bool success = p->pTable->lookup(vaddr, newEntry);
811
812                        if (!success && mode != BaseTLB::Execute) {
813                            // penalize a "page fault" more
814                            if (timing) {
815                                latency += missLatency2;
816                            }
817
818                            if (p->fixupStackFault(vaddr))
819                                success = p->pTable->lookup(vaddr, newEntry);
820                        }
821
822                        if (!success) {
823                            return std::make_shared<PageFault>(vaddr, true,
824                                                               mode, true,
825                                                               false);
826                        } else {
827                            newEntry.valid = success;
828                            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
829
830                            DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
831                                    alignedVaddr, newEntry.pageStart());
832
833                            entry = insert(alignedVaddr, newEntry);
834                        }
835
836                        DPRINTF(GPUTLB, "Miss was serviced.\n");
837                    }
838                } else {
839                    localNumTLBHits++;
840
841                    if (timing) {
842                        latency = hitLatency;
843                    }
844                }
845
846                // Do paging protection checks.
847                bool inUser = (m5Reg.cpl == 3 &&
848                               !(flags & (CPL0FlagBit << FlagShift)));
849
850                CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
851                bool badWrite = (!entry->writable && (inUser || cr0.wp));
852
853                if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
854                     badWrite)) {
855                    // The page must have been present to get into the TLB in
856                    // the first place. We'll assume the reserved bits are
857                    // fine even though we're not checking them.
858                    return std::make_shared<PageFault>(vaddr, true, mode,
859                                                       inUser, false);
860                }
861
862                if (storeCheck && badWrite) {
863                    // This would fault if this were a write, so return a page
864                    // fault that reflects that happening.
865                    return std::make_shared<PageFault>(vaddr, true,
866                                                       BaseTLB::Write,
867                                                       inUser, false);
868                }
869
870
871                DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
872                        "checks.\n", entry->paddr);
873
874                int page_size = entry->size();
875                Addr paddr = entry->paddr | (vaddr & (page_size - 1));
876                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
877                req->setPaddr(paddr);
878
879                if (entry->uncacheable)
880                    req->setFlags(Request::UNCACHEABLE);
881            } else {
882                //Use the address which already has segmentation applied.
883                DPRINTF(GPUTLB, "Paging disabled.\n");
884                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
885                req->setPaddr(vaddr);
886            }
887        } else {
888            // Real mode
889            DPRINTF(GPUTLB, "In real mode.\n");
890            DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
891            req->setPaddr(vaddr);
892        }
893
894        // Check for an access to the local APIC
895        if (FullSystem) {
896            LocalApicBase localApicBase =
897                tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
898
899            Addr baseAddr = localApicBase.base * PageBytes;
900            Addr paddr = req->getPaddr();
901
902            if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
903                // Force the access to be uncacheable.
904                req->setFlags(Request::UNCACHEABLE);
905                req->setPaddr(x86LocalAPICAddress(tc->contextId(),
906                                                  paddr - baseAddr));
907            }
908        }
909
910        return NoFault;
911    };
912
913    Fault
914    GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
915                            int &latency)
916    {
917        bool delayedResponse;
918
919        return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
920                                 latency);
921    }
922
923    void
924    GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,
925            Translation *translation, Mode mode, int &latency)
926    {
927        bool delayedResponse;
928        assert(translation);
929
930        Fault fault = GpuTLB::translate(req, tc, translation, mode,
931                                        delayedResponse, true, latency);
932
933        if (!delayedResponse)
934            translation->finish(fault, req, tc, mode);
935    }
936
937    Walker*
938    GpuTLB::getWalker()
939    {
940        return walker;
941    }
942
943
944    void
945    GpuTLB::serialize(CheckpointOut &cp) const
946    {
947    }
948
949    void
950    GpuTLB::unserialize(CheckpointIn &cp)
951    {
952    }
953
954    void
955    GpuTLB::regStats()
956    {
957        MemObject::regStats();
958
959        localNumTLBAccesses
960            .name(name() + ".local_TLB_accesses")
961            .desc("Number of TLB accesses")
962            ;
963
964        localNumTLBHits
965            .name(name() + ".local_TLB_hits")
966            .desc("Number of TLB hits")
967            ;
968
969        localNumTLBMisses
970            .name(name() + ".local_TLB_misses")
971            .desc("Number of TLB misses")
972            ;
973
974        localTLBMissRate
975            .name(name() + ".local_TLB_miss_rate")
976            .desc("TLB miss rate")
977            ;
978
979        accessCycles
980            .name(name() + ".access_cycles")
981            .desc("Cycles spent accessing this TLB level")
982            ;
983
984        pageTableCycles
985            .name(name() + ".page_table_cycles")
986            .desc("Cycles spent accessing the page table")
987            ;
988
989        localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
990
991        numUniquePages
992            .name(name() + ".unique_pages")
993            .desc("Number of unique pages touched")
994            ;
995
996        localCycles
997            .name(name() + ".local_cycles")
998            .desc("Number of cycles spent in queue for all incoming reqs")
999            ;
1000
1001        localLatency
1002            .name(name() + ".local_latency")
1003            .desc("Avg. latency over incoming coalesced reqs")
1004            ;
1005
1006        localLatency = localCycles / localNumTLBAccesses;
1007
1008        globalNumTLBAccesses
1009            .name(name() + ".global_TLB_accesses")
1010            .desc("Number of TLB accesses")
1011            ;
1012
1013        globalNumTLBHits
1014            .name(name() + ".global_TLB_hits")
1015            .desc("Number of TLB hits")
1016            ;
1017
1018        globalNumTLBMisses
1019            .name(name() + ".global_TLB_misses")
1020            .desc("Number of TLB misses")
1021            ;
1022
1023        globalTLBMissRate
1024            .name(name() + ".global_TLB_miss_rate")
1025            .desc("TLB miss rate")
1026            ;
1027
1028        globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1029
1030        avgReuseDistance
1031            .name(name() + ".avg_reuse_distance")
1032            .desc("avg. reuse distance over all pages (in ticks)")
1033            ;
1034
1035    }
1036
1037    /**
1038     * Do the TLB lookup for this coalesced request and schedule
1039     * another event <TLB access latency> cycles later.
1040     */
1041
1042    void
1043    GpuTLB::issueTLBLookup(PacketPtr pkt)
1044    {
1045        assert(pkt);
1046        assert(pkt->senderState);
1047
1048        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1049                                        TheISA::PageBytes);
1050
1051        TranslationState *sender_state =
1052                safe_cast<TranslationState*>(pkt->senderState);
1053
1054        bool update_stats = !sender_state->prefetch;
1055        ThreadContext * tmp_tc = sender_state->tc;
1056
1057        DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1058                virt_page_addr);
1059
1060        int req_cnt = sender_state->reqCnt.back();
1061
1062        if (update_stats) {
1063            accessCycles -= (curTick() * req_cnt);
1064            localCycles -= curTick();
1065            updatePageFootprint(virt_page_addr);
1066            globalNumTLBAccesses += req_cnt;
1067        }
1068
1069        tlbOutcome lookup_outcome = TLB_MISS;
1070        RequestPtr tmp_req = pkt->req;
1071
1072        // Access the TLB and figure out if it's a hit or a miss.
1073        bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1074
1075        if (success) {
1076            lookup_outcome = TLB_HIT;
1077            // Put the entry in SenderState
1078            GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1079            assert(entry);
1080
1081            sender_state->tlbEntry =
1082                new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1083
1084            if (update_stats) {
1085                // the reqCnt has an entry per level, so its size tells us
1086                // which level we are in
1087                sender_state->hitLevel = sender_state->reqCnt.size();
1088                globalNumTLBHits += req_cnt;
1089            }
1090        } else {
1091            if (update_stats)
1092                globalNumTLBMisses += req_cnt;
1093        }
1094
1095        /*
1096         * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1097         * as the TLB access latency.
1098         *
1099         * We create and schedule a new TLBEvent which will help us take the
1100         * appropriate actions (e.g., update TLB on a hit, send request to lower
1101         * level TLB on a miss, or start a page walk if this was the last-level
1102         * TLB)
1103         */
1104        TLBEvent *tlb_event =
1105            new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1106
1107        if (translationReturnEvent.count(virt_page_addr)) {
1108            panic("Virtual Page Address %#x already has a return event\n",
1109                  virt_page_addr);
1110        }
1111
1112        translationReturnEvent[virt_page_addr] = tlb_event;
1113        assert(tlb_event);
1114
1115        DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1116                curTick() + this->ticks(hitLatency));
1117
1118        schedule(tlb_event, curTick() + this->ticks(hitLatency));
1119    }
1120
1121    GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1122                               PacketPtr _pkt)
1123        : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1124        outcome(tlb_outcome), pkt(_pkt)
1125    {
1126    }
1127
1128    /**
1129     * Do Paging protection checks. If we encounter a page fault, then
1130     * an assertion is fired.
1131     */
1132    void
1133    GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1134            GpuTlbEntry * tlb_entry, Mode mode)
1135    {
1136        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1137        uint32_t flags = pkt->req->getFlags();
1138        bool storeCheck = flags & (StoreCheck << FlagShift);
1139
1140        // Do paging protection checks.
1141        bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1142        CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1143
1144        bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1145
1146        if ((inUser && !tlb_entry->user) ||
1147            (mode == BaseTLB::Write && badWrite)) {
1148           // The page must have been present to get into the TLB in
1149           // the first place. We'll assume the reserved bits are
1150           // fine even though we're not checking them.
1151           assert(false);
1152        }
1153
1154        if (storeCheck && badWrite) {
1155           // This would fault if this were a write, so return a page
1156           // fault that reflects that happening.
1157           assert(false);
1158        }
1159    }
1160
1161    /**
1162     * handleTranslationReturn is called on a TLB hit,
1163     * when a TLB miss returns or when a page fault returns.
1164     * The latter calls handelHit with TLB miss as tlbOutcome.
1165     */
1166    void
1167    GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1168            PacketPtr pkt)
1169    {
1170
1171        assert(pkt);
1172        Addr vaddr = pkt->req->getVaddr();
1173
1174        TranslationState *sender_state =
1175            safe_cast<TranslationState*>(pkt->senderState);
1176
1177        ThreadContext *tc = sender_state->tc;
1178        Mode mode = sender_state->tlbMode;
1179
1180        GpuTlbEntry *local_entry, *new_entry;
1181
1182        if (tlb_outcome == TLB_HIT) {
1183            DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1184            local_entry = sender_state->tlbEntry;
1185        } else {
1186            DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1187                    vaddr);
1188
1189            // We are returning either from a page walk or from a hit at a lower
1190            // TLB level. The senderState should be "carrying" a pointer to the
1191            // correct TLBEntry.
1192            new_entry = sender_state->tlbEntry;
1193            assert(new_entry);
1194            local_entry = new_entry;
1195
1196            if (allocationPolicy) {
1197                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1198                        virt_page_addr);
1199
1200                local_entry = insert(virt_page_addr, *new_entry);
1201            }
1202
1203            assert(local_entry);
1204        }
1205
1206        /**
1207         * At this point the packet carries an up-to-date tlbEntry pointer
1208         * in its senderState.
1209         * Next step is to do the paging protection checks.
1210         */
1211        DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
1212                "while paddr was %#x.\n", local_entry->vaddr,
1213                local_entry->paddr);
1214
1215        pagingProtectionChecks(tc, pkt, local_entry, mode);
1216        int page_size = local_entry->size();
1217        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1218        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1219
1220        // Since this packet will be sent through the cpu side slave port,
1221        // it must be converted to a response pkt if it is not one already
1222        if (pkt->isRequest()) {
1223            pkt->makeTimingResponse();
1224        }
1225
1226        pkt->req->setPaddr(paddr);
1227
1228        if (local_entry->uncacheable) {
1229             pkt->req->setFlags(Request::UNCACHEABLE);
1230        }
1231
1232        //send packet back to coalescer
1233        cpuSidePort[0]->sendTimingResp(pkt);
1234        //schedule cleanup event
1235        cleanupQueue.push(virt_page_addr);
1236
1237        // schedule this only once per cycle.
1238        // The check is required because we might have multiple translations
1239        // returning the same cycle
1240        // this is a maximum priority event and must be on the same cycle
1241        // as the cleanup event in TLBCoalescer to avoid a race with
1242        // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1243        if (!cleanupEvent.scheduled())
1244            schedule(cleanupEvent, curTick());
1245    }
1246
1247    /**
1248     * Here we take the appropriate actions based on the result of the
1249     * TLB lookup.
1250     */
1251    void
1252    GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1253                              PacketPtr pkt)
1254    {
1255        DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1256
1257        assert(translationReturnEvent[virtPageAddr]);
1258        assert(pkt);
1259
1260        TranslationState *tmp_sender_state =
1261            safe_cast<TranslationState*>(pkt->senderState);
1262
1263        int req_cnt = tmp_sender_state->reqCnt.back();
1264        bool update_stats = !tmp_sender_state->prefetch;
1265
1266
1267        if (outcome == TLB_HIT) {
1268            handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1269
1270            if (update_stats) {
1271                accessCycles += (req_cnt * curTick());
1272                localCycles += curTick();
1273            }
1274
1275        } else if (outcome == TLB_MISS) {
1276
1277            DPRINTF(GPUTLB, "This is a TLB miss\n");
1278            if (update_stats) {
1279                accessCycles += (req_cnt*curTick());
1280                localCycles += curTick();
1281            }
1282
1283            if (hasMemSidePort) {
1284                // the one cyle added here represent the delay from when we get
1285                // the reply back till when we propagate it to the coalescer
1286                // above.
1287                if (update_stats) {
1288                    accessCycles += (req_cnt * 1);
1289                    localCycles += 1;
1290                }
1291
1292                /**
1293                 * There is a TLB below. Send the coalesced request.
1294                 * We actually send the very first packet of all the
1295                 * pending packets for this virtual page address.
1296                 */
1297                if (!memSidePort[0]->sendTimingReq(pkt)) {
1298                    DPRINTF(GPUTLB, "Failed sending translation request to "
1299                            "lower level TLB for addr %#x\n", virtPageAddr);
1300
1301                    memSidePort[0]->retries.push_back(pkt);
1302                } else {
1303                    DPRINTF(GPUTLB, "Sent translation request to lower level "
1304                            "TLB for addr %#x\n", virtPageAddr);
1305                }
1306            } else {
1307                //this is the last level TLB. Start a page walk
1308                DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1309                        "addr %#x\n", virtPageAddr);
1310
1311                if (update_stats)
1312                    pageTableCycles -= (req_cnt*curTick());
1313
1314                TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1315                assert(tlb_event);
1316                tlb_event->updateOutcome(PAGE_WALK);
1317                schedule(tlb_event, curTick() + ticks(missLatency2));
1318            }
1319        } else if (outcome == PAGE_WALK) {
1320            if (update_stats)
1321                pageTableCycles += (req_cnt*curTick());
1322
1323            // Need to access the page table and update the TLB
1324            DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1325                    virtPageAddr);
1326
1327            TranslationState *sender_state =
1328                safe_cast<TranslationState*>(pkt->senderState);
1329
1330            Process *p = sender_state->tc->getProcessPtr();
1331            TlbEntry newEntry;
1332            Addr vaddr = pkt->req->getVaddr();
1333    #ifndef NDEBUG
1334            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1335            assert(alignedVaddr == virtPageAddr);
1336    #endif
1337            bool success;
1338            success = p->pTable->lookup(vaddr, newEntry);
1339            if (!success && sender_state->tlbMode != BaseTLB::Execute) {
1340                if (p->fixupStackFault(vaddr)) {
1341                    success = p->pTable->lookup(vaddr, newEntry);
1342                }
1343            }
1344
1345            DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1346                    newEntry.pageStart());
1347
1348            sender_state->tlbEntry =
1349                new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success);
1350
1351            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1352        } else if (outcome == MISS_RETURN) {
1353            /** we add an extra cycle in the return path of the translation
1354             * requests in between the various TLB levels.
1355             */
1356            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1357        } else {
1358            assert(false);
1359        }
1360    }
1361
1362    void
1363    GpuTLB::TLBEvent::process()
1364    {
1365        tlb->translationReturn(virtPageAddr, outcome, pkt);
1366    }
1367
1368    const char*
1369    GpuTLB::TLBEvent::description() const
1370    {
1371        return "trigger translationDoneEvent";
1372    }
1373
1374    void
1375    GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1376    {
1377        outcome = _outcome;
1378    }
1379
1380    Addr
1381    GpuTLB::TLBEvent::getTLBEventVaddr()
1382    {
1383        return virtPageAddr;
1384    }
1385
1386    /*
1387     * recvTiming receives a coalesced timing request from a TLBCoalescer
1388     * and it calls issueTLBLookup()
1389     * It only rejects the packet if we have exceeded the max
1390     * outstanding number of requests for the TLB
1391     */
1392    bool
1393    GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1394    {
1395        if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1396            tlb->issueTLBLookup(pkt);
1397            // update number of outstanding translation requests
1398            tlb->outstandingReqs++;
1399            return true;
1400         } else {
1401            DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1402                    tlb->outstandingReqs);
1403            return false;
1404         }
1405    }
1406
1407    /**
1408     * handleFuncTranslationReturn is called on a TLB hit,
1409     * when a TLB miss returns or when a page fault returns.
1410     * It updates LRU, inserts the TLB entry on a miss
1411     * depending on the allocation policy and does the required
1412     * protection checks. It does NOT create a new packet to
1413     * update the packet's addr; this is done in hsail-gpu code.
1414     */
1415    void
1416    GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1417    {
1418        TranslationState *sender_state =
1419            safe_cast<TranslationState*>(pkt->senderState);
1420
1421        ThreadContext *tc = sender_state->tc;
1422        Mode mode = sender_state->tlbMode;
1423        Addr vaddr = pkt->req->getVaddr();
1424
1425        GpuTlbEntry *local_entry, *new_entry;
1426
1427        if (tlb_outcome == TLB_HIT) {
1428            DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1429                    "%#x\n", vaddr);
1430
1431            local_entry = sender_state->tlbEntry;
1432        } else {
1433            DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1434                    "%#x\n", vaddr);
1435
1436            // We are returning either from a page walk or from a hit at a lower
1437            // TLB level. The senderState should be "carrying" a pointer to the
1438            // correct TLBEntry.
1439            new_entry = sender_state->tlbEntry;
1440            assert(new_entry);
1441            local_entry = new_entry;
1442
1443            if (allocationPolicy) {
1444                Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1445
1446                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1447                        virt_page_addr);
1448
1449                local_entry = insert(virt_page_addr, *new_entry);
1450            }
1451
1452            assert(local_entry);
1453        }
1454
1455        DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1456                "while paddr was %#x.\n", local_entry->vaddr,
1457                local_entry->paddr);
1458
1459        // Do paging checks if it's a normal functional access.  If it's for a
1460        // prefetch, then sometimes you can try to prefetch something that won't
1461        // pass protection. We don't actually want to fault becuase there is no
1462        // demand access to deem this a violation.  Just put it in the TLB and
1463        // it will fault if indeed a future demand access touches it in
1464        // violation.
1465        if (!sender_state->prefetch && sender_state->tlbEntry->valid)
1466            pagingProtectionChecks(tc, pkt, local_entry, mode);
1467
1468        int page_size = local_entry->size();
1469        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1470        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1471
1472        pkt->req->setPaddr(paddr);
1473
1474        if (local_entry->uncacheable)
1475             pkt->req->setFlags(Request::UNCACHEABLE);
1476    }
1477
1478    // This is used for atomic translations. Need to
1479    // make it all happen during the same cycle.
1480    void
1481    GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1482    {
1483        TranslationState *sender_state =
1484            safe_cast<TranslationState*>(pkt->senderState);
1485
1486        ThreadContext *tc = sender_state->tc;
1487        bool update_stats = !sender_state->prefetch;
1488
1489        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1490                                        TheISA::PageBytes);
1491
1492        if (update_stats)
1493            tlb->updatePageFootprint(virt_page_addr);
1494
1495        // do the TLB lookup without updating the stats
1496        bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1497        tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1498
1499        // functional mode means no coalescing
1500        // global metrics are the same as the local metrics
1501        if (update_stats) {
1502            tlb->globalNumTLBAccesses++;
1503
1504            if (success) {
1505                sender_state->hitLevel = sender_state->reqCnt.size();
1506                tlb->globalNumTLBHits++;
1507            }
1508        }
1509
1510        if (!success) {
1511            if (update_stats)
1512                tlb->globalNumTLBMisses++;
1513            if (tlb->hasMemSidePort) {
1514                // there is a TLB below -> propagate down the TLB hierarchy
1515                tlb->memSidePort[0]->sendFunctional(pkt);
1516                // If no valid translation from a prefetch, then just return
1517                if (sender_state->prefetch && !pkt->req->hasPaddr())
1518                    return;
1519            } else {
1520                // Need to access the page table and update the TLB
1521                DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1522                        virt_page_addr);
1523
1524                Process *p = tc->getProcessPtr();
1525                TlbEntry newEntry;
1526
1527                Addr vaddr = pkt->req->getVaddr();
1528    #ifndef NDEBUG
1529                Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1530                assert(alignedVaddr == virt_page_addr);
1531    #endif
1532
1533                bool success = p->pTable->lookup(vaddr, newEntry);
1534                if (!success && sender_state->tlbMode != BaseTLB::Execute) {
1535                    if (p->fixupStackFault(vaddr))
1536                        success = p->pTable->lookup(vaddr, newEntry);
1537                }
1538
1539                if (!sender_state->prefetch) {
1540                    // no PageFaults are permitted after
1541                    // the second page table lookup
1542                    assert(success);
1543
1544                    DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1545                           newEntry.pageStart());
1546
1547                    sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr,
1548                                                             newEntry.paddr,
1549                                                             success);
1550                } else {
1551                    // If this was a prefetch, then do the normal thing if it
1552                    // was a successful translation.  Otherwise, send an empty
1553                    // TLB entry back so that it can be figured out as empty and
1554                    // handled accordingly.
1555                    if (success) {
1556                        DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1557                               newEntry.pageStart());
1558
1559                        sender_state->tlbEntry = new GpuTlbEntry(0,
1560                                                                 newEntry.vaddr,
1561                                                                 newEntry.paddr,
1562                                                                 success);
1563                    } else {
1564                        DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1565                                alignedVaddr);
1566
1567                        sender_state->tlbEntry = new GpuTlbEntry();
1568
1569                        return;
1570                    }
1571                }
1572            }
1573        } else {
1574            DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1575                    tlb->lookup(pkt->req->getVaddr()));
1576
1577            GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1578                                             update_stats);
1579
1580            assert(entry);
1581
1582            sender_state->tlbEntry =
1583                new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1584        }
1585        // This is the function that would populate pkt->req with the paddr of
1586        // the translation. But if no translation happens (i.e Prefetch fails)
1587        // then the early returns in the above code wiill keep this function
1588        // from executing.
1589        tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1590    }
1591
1592    void
1593    GpuTLB::CpuSidePort::recvReqRetry()
1594    {
1595        // The CPUSidePort never sends anything but replies. No retries
1596        // expected.
1597        assert(false);
1598    }
1599
1600    AddrRangeList
1601    GpuTLB::CpuSidePort::getAddrRanges() const
1602    {
1603        // currently not checked by the master
1604        AddrRangeList ranges;
1605
1606        return ranges;
1607    }
1608
1609    /**
1610     * MemSidePort receives the packet back.
1611     * We need to call the handleTranslationReturn
1612     * and propagate up the hierarchy.
1613     */
1614    bool
1615    GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1616    {
1617        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1618                                        TheISA::PageBytes);
1619
1620        DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1621                virt_page_addr);
1622
1623        TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1624        assert(tlb_event);
1625        assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1626
1627        tlb_event->updateOutcome(MISS_RETURN);
1628        tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1629
1630        return true;
1631    }
1632
1633    void
1634    GpuTLB::MemSidePort::recvReqRetry()
1635    {
1636        // No retries should reach the TLB. The retries
1637        // should only reach the TLBCoalescer.
1638        assert(false);
1639    }
1640
1641    void
1642    GpuTLB::cleanup()
1643    {
1644        while (!cleanupQueue.empty()) {
1645            Addr cleanup_addr = cleanupQueue.front();
1646            cleanupQueue.pop();
1647
1648            // delete TLBEvent
1649            TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1650            delete old_tlb_event;
1651            translationReturnEvent.erase(cleanup_addr);
1652
1653            // update number of outstanding requests
1654            outstandingReqs--;
1655        }
1656
1657        /** the higher level coalescer should retry if it has
1658         * any pending requests.
1659         */
1660        for (int i = 0; i < cpuSidePort.size(); ++i) {
1661            cpuSidePort[i]->sendRetryReq();
1662        }
1663    }
1664
1665    void
1666    GpuTLB::updatePageFootprint(Addr virt_page_addr)
1667    {
1668
1669        std::pair<AccessPatternTable::iterator, bool> ret;
1670
1671        AccessInfo tmp_access_info;
1672        tmp_access_info.lastTimeAccessed = 0;
1673        tmp_access_info.accessesPerPage = 0;
1674        tmp_access_info.totalReuseDistance = 0;
1675        tmp_access_info.sumDistance = 0;
1676        tmp_access_info.meanDistance = 0;
1677
1678        ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1679                                  tmp_access_info));
1680
1681        bool first_page_access = ret.second;
1682
1683        if (first_page_access) {
1684            numUniquePages++;
1685        } else  {
1686            int accessed_before;
1687            accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1688            ret.first->second.totalReuseDistance += accessed_before;
1689        }
1690
1691        ret.first->second.accessesPerPage++;
1692        ret.first->second.lastTimeAccessed = curTick();
1693
1694        if (accessDistance) {
1695            ret.first->second.localTLBAccesses
1696                .push_back(localNumTLBAccesses.value());
1697        }
1698    }
1699
1700    void
1701    GpuTLB::exitCallback()
1702    {
1703        std::ostream *page_stat_file = nullptr;
1704
1705        if (accessDistance) {
1706
1707            // print per page statistics to a separate file (.csv format)
1708            // simout is the gem5 output directory (default is m5out or the one
1709            // specified with -d
1710            page_stat_file = simout.create(name().c_str())->stream();
1711
1712            // print header
1713            *page_stat_file << "page,max_access_distance,mean_access_distance, "
1714                            << "stddev_distance" << std::endl;
1715        }
1716
1717        // update avg. reuse distance footprint
1718        AccessPatternTable::iterator iter, iter_begin, iter_end;
1719        unsigned int sum_avg_reuse_distance_per_page = 0;
1720
1721        // iterate through all pages seen by this TLB
1722        for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1723            sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1724                                               iter->second.accessesPerPage;
1725
1726            if (accessDistance) {
1727                unsigned int tmp = iter->second.localTLBAccesses[0];
1728                unsigned int prev = tmp;
1729
1730                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1731                    if (i) {
1732                        tmp = prev + 1;
1733                    }
1734
1735                    prev = iter->second.localTLBAccesses[i];
1736                    // update the localTLBAccesses value
1737                    // with the actual differece
1738                    iter->second.localTLBAccesses[i] -= tmp;
1739                    // compute the sum of AccessDistance per page
1740                    // used later for mean
1741                    iter->second.sumDistance +=
1742                        iter->second.localTLBAccesses[i];
1743                }
1744
1745                iter->second.meanDistance =
1746                    iter->second.sumDistance / iter->second.accessesPerPage;
1747
1748                // compute std_dev and max  (we need a second round because we
1749                // need to know the mean value
1750                unsigned int max_distance = 0;
1751                unsigned int stddev_distance = 0;
1752
1753                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1754                    unsigned int tmp_access_distance =
1755                        iter->second.localTLBAccesses[i];
1756
1757                    if (tmp_access_distance > max_distance) {
1758                        max_distance = tmp_access_distance;
1759                    }
1760
1761                    unsigned int diff =
1762                        tmp_access_distance - iter->second.meanDistance;
1763                    stddev_distance += pow(diff, 2);
1764
1765                }
1766
1767                stddev_distance =
1768                    sqrt(stddev_distance/iter->second.accessesPerPage);
1769
1770                if (page_stat_file) {
1771                    *page_stat_file << std::hex << iter->first << ",";
1772                    *page_stat_file << std::dec << max_distance << ",";
1773                    *page_stat_file << std::dec << iter->second.meanDistance
1774                                    << ",";
1775                    *page_stat_file << std::dec << stddev_distance;
1776                    *page_stat_file << std::endl;
1777                }
1778
1779                // erase the localTLBAccesses array
1780                iter->second.localTLBAccesses.clear();
1781            }
1782        }
1783
1784        if (!TLBFootprint.empty()) {
1785            avgReuseDistance =
1786                sum_avg_reuse_distance_per_page / TLBFootprint.size();
1787        }
1788
1789        //clear the TLBFootprint map
1790        TLBFootprint.clear();
1791    }
1792} // namespace X86ISA
1793
1794X86ISA::GpuTLB*
1795X86GPUTLBParams::create()
1796{
1797    return new X86ISA::GpuTLB(this);
1798}
1799
1800