gpu_tlb.cc revision 12749
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/output.hh"
49#include "base/trace.hh"
50#include "cpu/base.hh"
51#include "cpu/thread_context.hh"
52#include "debug/GPUPrefetch.hh"
53#include "debug/GPUTLB.hh"
54#include "mem/packet_access.hh"
55#include "mem/page_table.hh"
56#include "mem/request.hh"
57#include "sim/process.hh"
58
59namespace X86ISA
60{
61
62    GpuTLB::GpuTLB(const Params *p)
63        : MemObject(p), configAddress(0), size(p->size),
64          cleanupEvent([this]{ cleanup(); }, name(), false,
65                       Event::Maximum_Pri),
66          exitEvent([this]{ exitCallback(); }, name())
67    {
68        assoc = p->assoc;
69        assert(assoc <= size);
70        numSets = size/assoc;
71        allocationPolicy = p->allocationPolicy;
72        hasMemSidePort = false;
73        accessDistance = p->accessDistance;
74        clock = p->clk_domain->clockPeriod();
75
76        tlb.assign(size, TlbEntry());
77
78        freeList.resize(numSets);
79        entryList.resize(numSets);
80
81        for (int set = 0; set < numSets; ++set) {
82            for (int way = 0; way < assoc; ++way) {
83                int x = set * assoc + way;
84                freeList[set].push_back(&tlb.at(x));
85            }
86        }
87
88        FA = (size == assoc);
89
90        /**
91         * @warning: the set-associative version assumes you have a
92         * fixed page size of 4KB.
93         * If the page size is greather than 4KB (as defined in the
94         * TheISA::PageBytes), then there are various issues w/ the current
95         * implementation (you'd have the same 8KB page being replicated in
96         * different sets etc)
97         */
98        setMask = numSets - 1;
99
100    #if 0
101        // GpuTLB doesn't yet support full system
102        walker = p->walker;
103        walker->setTLB(this);
104    #endif
105
106        maxCoalescedReqs = p->maxOutstandingReqs;
107
108        // Do not allow maxCoalescedReqs to be more than the TLB associativity
109        if (maxCoalescedReqs > assoc) {
110            maxCoalescedReqs = assoc;
111            cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
112        }
113
114        outstandingReqs = 0;
115        hitLatency = p->hitLatency;
116        missLatency1 = p->missLatency1;
117        missLatency2 = p->missLatency2;
118
119        // create the slave ports based on the number of connected ports
120        for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
121            cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
122                                  name(), i), this, i));
123        }
124
125        // create the master ports based on the number of connected ports
126        for (size_t i = 0; i < p->port_master_connection_count; ++i) {
127            memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
128                                  name(), i), this, i));
129        }
130    }
131
132    // fixme: this is never called?
133    GpuTLB::~GpuTLB()
134    {
135        // make sure all the hash-maps are empty
136        assert(translationReturnEvent.empty());
137    }
138
139    BaseSlavePort&
140    GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
141    {
142        if (if_name == "slave") {
143            if (idx >= static_cast<PortID>(cpuSidePort.size())) {
144                panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
145            }
146
147            return *cpuSidePort[idx];
148        } else {
149            panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
150        }
151    }
152
153    BaseMasterPort&
154    GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
155    {
156        if (if_name == "master") {
157            if (idx >= static_cast<PortID>(memSidePort.size())) {
158                panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
159            }
160
161            hasMemSidePort = true;
162
163            return *memSidePort[idx];
164        } else {
165            panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
166        }
167    }
168
169    TlbEntry*
170    GpuTLB::insert(Addr vpn, TlbEntry &entry)
171    {
172        TlbEntry *newEntry = nullptr;
173
174        /**
175         * vpn holds the virtual page address
176         * The least significant bits are simply masked
177         */
178        int set = (vpn >> TheISA::PageShift) & setMask;
179
180        if (!freeList[set].empty()) {
181            newEntry = freeList[set].front();
182            freeList[set].pop_front();
183        } else {
184            newEntry = entryList[set].back();
185            entryList[set].pop_back();
186        }
187
188        *newEntry = entry;
189        newEntry->vaddr = vpn;
190        entryList[set].push_front(newEntry);
191
192        return newEntry;
193    }
194
195    GpuTLB::EntryList::iterator
196    GpuTLB::lookupIt(Addr va, bool update_lru)
197    {
198        int set = (va >> TheISA::PageShift) & setMask;
199
200        if (FA) {
201            assert(!set);
202        }
203
204        auto entry = entryList[set].begin();
205        for (; entry != entryList[set].end(); ++entry) {
206            int page_size = (*entry)->size();
207
208            if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
209                DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
210                        "with size %#x.\n", va, (*entry)->vaddr, page_size);
211
212                if (update_lru) {
213                    entryList[set].push_front(*entry);
214                    entryList[set].erase(entry);
215                    entry = entryList[set].begin();
216                }
217
218                break;
219            }
220        }
221
222        return entry;
223    }
224
225    TlbEntry*
226    GpuTLB::lookup(Addr va, bool update_lru)
227    {
228        int set = (va >> TheISA::PageShift) & setMask;
229
230        auto entry = lookupIt(va, update_lru);
231
232        if (entry == entryList[set].end())
233            return nullptr;
234        else
235            return *entry;
236    }
237
238    void
239    GpuTLB::invalidateAll()
240    {
241        DPRINTF(GPUTLB, "Invalidating all entries.\n");
242
243        for (int i = 0; i < numSets; ++i) {
244            while (!entryList[i].empty()) {
245                TlbEntry *entry = entryList[i].front();
246                entryList[i].pop_front();
247                freeList[i].push_back(entry);
248            }
249        }
250    }
251
252    void
253    GpuTLB::setConfigAddress(uint32_t addr)
254    {
255        configAddress = addr;
256    }
257
258    void
259    GpuTLB::invalidateNonGlobal()
260    {
261        DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
262
263        for (int i = 0; i < numSets; ++i) {
264            for (auto entryIt = entryList[i].begin();
265                 entryIt != entryList[i].end();) {
266                if (!(*entryIt)->global) {
267                    freeList[i].push_back(*entryIt);
268                    entryList[i].erase(entryIt++);
269                } else {
270                    ++entryIt;
271                }
272            }
273        }
274    }
275
276    void
277    GpuTLB::demapPage(Addr va, uint64_t asn)
278    {
279
280        int set = (va >> TheISA::PageShift) & setMask;
281        auto entry = lookupIt(va, false);
282
283        if (entry != entryList[set].end()) {
284            freeList[set].push_back(*entry);
285            entryList[set].erase(entry);
286        }
287    }
288
289    Fault
290    GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
291    {
292        DPRINTF(GPUTLB, "Addresses references internal memory.\n");
293        Addr vaddr = req->getVaddr();
294        Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
295
296        if (prefix == IntAddrPrefixCPUID) {
297            panic("CPUID memory space not yet implemented!\n");
298        } else if (prefix == IntAddrPrefixMSR) {
299            vaddr = vaddr >> 3;
300            req->setFlags(Request::MMAPPED_IPR);
301            Addr regNum = 0;
302
303            switch (vaddr & ~IntAddrPrefixMask) {
304              case 0x10:
305                regNum = MISCREG_TSC;
306                break;
307              case 0x1B:
308                regNum = MISCREG_APIC_BASE;
309                break;
310              case 0xFE:
311                regNum = MISCREG_MTRRCAP;
312                break;
313              case 0x174:
314                regNum = MISCREG_SYSENTER_CS;
315                break;
316              case 0x175:
317                regNum = MISCREG_SYSENTER_ESP;
318                break;
319              case 0x176:
320                regNum = MISCREG_SYSENTER_EIP;
321                break;
322              case 0x179:
323                regNum = MISCREG_MCG_CAP;
324                break;
325              case 0x17A:
326                regNum = MISCREG_MCG_STATUS;
327                break;
328              case 0x17B:
329                regNum = MISCREG_MCG_CTL;
330                break;
331              case 0x1D9:
332                regNum = MISCREG_DEBUG_CTL_MSR;
333                break;
334              case 0x1DB:
335                regNum = MISCREG_LAST_BRANCH_FROM_IP;
336                break;
337              case 0x1DC:
338                regNum = MISCREG_LAST_BRANCH_TO_IP;
339                break;
340              case 0x1DD:
341                regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
342                break;
343              case 0x1DE:
344                regNum = MISCREG_LAST_EXCEPTION_TO_IP;
345                break;
346              case 0x200:
347                regNum = MISCREG_MTRR_PHYS_BASE_0;
348                break;
349              case 0x201:
350                regNum = MISCREG_MTRR_PHYS_MASK_0;
351                break;
352              case 0x202:
353                regNum = MISCREG_MTRR_PHYS_BASE_1;
354                break;
355              case 0x203:
356                regNum = MISCREG_MTRR_PHYS_MASK_1;
357                break;
358              case 0x204:
359                regNum = MISCREG_MTRR_PHYS_BASE_2;
360                break;
361              case 0x205:
362                regNum = MISCREG_MTRR_PHYS_MASK_2;
363                break;
364              case 0x206:
365                regNum = MISCREG_MTRR_PHYS_BASE_3;
366                break;
367              case 0x207:
368                regNum = MISCREG_MTRR_PHYS_MASK_3;
369                break;
370              case 0x208:
371                regNum = MISCREG_MTRR_PHYS_BASE_4;
372                break;
373              case 0x209:
374                regNum = MISCREG_MTRR_PHYS_MASK_4;
375                break;
376              case 0x20A:
377                regNum = MISCREG_MTRR_PHYS_BASE_5;
378                break;
379              case 0x20B:
380                regNum = MISCREG_MTRR_PHYS_MASK_5;
381                break;
382              case 0x20C:
383                regNum = MISCREG_MTRR_PHYS_BASE_6;
384                break;
385              case 0x20D:
386                regNum = MISCREG_MTRR_PHYS_MASK_6;
387                break;
388              case 0x20E:
389                regNum = MISCREG_MTRR_PHYS_BASE_7;
390                break;
391              case 0x20F:
392                regNum = MISCREG_MTRR_PHYS_MASK_7;
393                break;
394              case 0x250:
395                regNum = MISCREG_MTRR_FIX_64K_00000;
396                break;
397              case 0x258:
398                regNum = MISCREG_MTRR_FIX_16K_80000;
399                break;
400              case 0x259:
401                regNum = MISCREG_MTRR_FIX_16K_A0000;
402                break;
403              case 0x268:
404                regNum = MISCREG_MTRR_FIX_4K_C0000;
405                break;
406              case 0x269:
407                regNum = MISCREG_MTRR_FIX_4K_C8000;
408                break;
409              case 0x26A:
410                regNum = MISCREG_MTRR_FIX_4K_D0000;
411                break;
412              case 0x26B:
413                regNum = MISCREG_MTRR_FIX_4K_D8000;
414                break;
415              case 0x26C:
416                regNum = MISCREG_MTRR_FIX_4K_E0000;
417                break;
418              case 0x26D:
419                regNum = MISCREG_MTRR_FIX_4K_E8000;
420                break;
421              case 0x26E:
422                regNum = MISCREG_MTRR_FIX_4K_F0000;
423                break;
424              case 0x26F:
425                regNum = MISCREG_MTRR_FIX_4K_F8000;
426                break;
427              case 0x277:
428                regNum = MISCREG_PAT;
429                break;
430              case 0x2FF:
431                regNum = MISCREG_DEF_TYPE;
432                break;
433              case 0x400:
434                regNum = MISCREG_MC0_CTL;
435                break;
436              case 0x404:
437                regNum = MISCREG_MC1_CTL;
438                break;
439              case 0x408:
440                regNum = MISCREG_MC2_CTL;
441                break;
442              case 0x40C:
443                regNum = MISCREG_MC3_CTL;
444                break;
445              case 0x410:
446                regNum = MISCREG_MC4_CTL;
447                break;
448              case 0x414:
449                regNum = MISCREG_MC5_CTL;
450                break;
451              case 0x418:
452                regNum = MISCREG_MC6_CTL;
453                break;
454              case 0x41C:
455                regNum = MISCREG_MC7_CTL;
456                break;
457              case 0x401:
458                regNum = MISCREG_MC0_STATUS;
459                break;
460              case 0x405:
461                regNum = MISCREG_MC1_STATUS;
462                break;
463              case 0x409:
464                regNum = MISCREG_MC2_STATUS;
465                break;
466              case 0x40D:
467                regNum = MISCREG_MC3_STATUS;
468                break;
469              case 0x411:
470                regNum = MISCREG_MC4_STATUS;
471                break;
472              case 0x415:
473                regNum = MISCREG_MC5_STATUS;
474                break;
475              case 0x419:
476                regNum = MISCREG_MC6_STATUS;
477                break;
478              case 0x41D:
479                regNum = MISCREG_MC7_STATUS;
480                break;
481              case 0x402:
482                regNum = MISCREG_MC0_ADDR;
483                break;
484              case 0x406:
485                regNum = MISCREG_MC1_ADDR;
486                break;
487              case 0x40A:
488                regNum = MISCREG_MC2_ADDR;
489                break;
490              case 0x40E:
491                regNum = MISCREG_MC3_ADDR;
492                break;
493              case 0x412:
494                regNum = MISCREG_MC4_ADDR;
495                break;
496              case 0x416:
497                regNum = MISCREG_MC5_ADDR;
498                break;
499              case 0x41A:
500                regNum = MISCREG_MC6_ADDR;
501                break;
502              case 0x41E:
503                regNum = MISCREG_MC7_ADDR;
504                break;
505              case 0x403:
506                regNum = MISCREG_MC0_MISC;
507                break;
508              case 0x407:
509                regNum = MISCREG_MC1_MISC;
510                break;
511              case 0x40B:
512                regNum = MISCREG_MC2_MISC;
513                break;
514              case 0x40F:
515                regNum = MISCREG_MC3_MISC;
516                break;
517              case 0x413:
518                regNum = MISCREG_MC4_MISC;
519                break;
520              case 0x417:
521                regNum = MISCREG_MC5_MISC;
522                break;
523              case 0x41B:
524                regNum = MISCREG_MC6_MISC;
525                break;
526              case 0x41F:
527                regNum = MISCREG_MC7_MISC;
528                break;
529              case 0xC0000080:
530                regNum = MISCREG_EFER;
531                break;
532              case 0xC0000081:
533                regNum = MISCREG_STAR;
534                break;
535              case 0xC0000082:
536                regNum = MISCREG_LSTAR;
537                break;
538              case 0xC0000083:
539                regNum = MISCREG_CSTAR;
540                break;
541              case 0xC0000084:
542                regNum = MISCREG_SF_MASK;
543                break;
544              case 0xC0000100:
545                regNum = MISCREG_FS_BASE;
546                break;
547              case 0xC0000101:
548                regNum = MISCREG_GS_BASE;
549                break;
550              case 0xC0000102:
551                regNum = MISCREG_KERNEL_GS_BASE;
552                break;
553              case 0xC0000103:
554                regNum = MISCREG_TSC_AUX;
555                break;
556              case 0xC0010000:
557                regNum = MISCREG_PERF_EVT_SEL0;
558                break;
559              case 0xC0010001:
560                regNum = MISCREG_PERF_EVT_SEL1;
561                break;
562              case 0xC0010002:
563                regNum = MISCREG_PERF_EVT_SEL2;
564                break;
565              case 0xC0010003:
566                regNum = MISCREG_PERF_EVT_SEL3;
567                break;
568              case 0xC0010004:
569                regNum = MISCREG_PERF_EVT_CTR0;
570                break;
571              case 0xC0010005:
572                regNum = MISCREG_PERF_EVT_CTR1;
573                break;
574              case 0xC0010006:
575                regNum = MISCREG_PERF_EVT_CTR2;
576                break;
577              case 0xC0010007:
578                regNum = MISCREG_PERF_EVT_CTR3;
579                break;
580              case 0xC0010010:
581                regNum = MISCREG_SYSCFG;
582                break;
583              case 0xC0010016:
584                regNum = MISCREG_IORR_BASE0;
585                break;
586              case 0xC0010017:
587                regNum = MISCREG_IORR_BASE1;
588                break;
589              case 0xC0010018:
590                regNum = MISCREG_IORR_MASK0;
591                break;
592              case 0xC0010019:
593                regNum = MISCREG_IORR_MASK1;
594                break;
595              case 0xC001001A:
596                regNum = MISCREG_TOP_MEM;
597                break;
598              case 0xC001001D:
599                regNum = MISCREG_TOP_MEM2;
600                break;
601              case 0xC0010114:
602                regNum = MISCREG_VM_CR;
603                break;
604              case 0xC0010115:
605                regNum = MISCREG_IGNNE;
606                break;
607              case 0xC0010116:
608                regNum = MISCREG_SMM_CTL;
609                break;
610              case 0xC0010117:
611                regNum = MISCREG_VM_HSAVE_PA;
612                break;
613              default:
614                return std::make_shared<GeneralProtection>(0);
615            }
616            //The index is multiplied by the size of a MiscReg so that
617            //any memory dependence calculations will not see these as
618            //overlapping.
619            req->setPaddr(regNum * sizeof(MiscReg));
620            return NoFault;
621        } else if (prefix == IntAddrPrefixIO) {
622            // TODO If CPL > IOPL or in virtual mode, check the I/O permission
623            // bitmap in the TSS.
624
625            Addr IOPort = vaddr & ~IntAddrPrefixMask;
626            // Make sure the address fits in the expected 16 bit IO address
627            // space.
628            assert(!(IOPort & ~0xFFFF));
629
630            if (IOPort == 0xCF8 && req->getSize() == 4) {
631                req->setFlags(Request::MMAPPED_IPR);
632                req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
633            } else if ((IOPort & ~mask(2)) == 0xCFC) {
634                req->setFlags(Request::UNCACHEABLE);
635
636                Addr configAddress =
637                    tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
638
639                if (bits(configAddress, 31, 31)) {
640                    req->setPaddr(PhysAddrPrefixPciConfig |
641                                  mbits(configAddress, 30, 2) |
642                                  (IOPort & mask(2)));
643                } else {
644                    req->setPaddr(PhysAddrPrefixIO | IOPort);
645                }
646            } else {
647                req->setFlags(Request::UNCACHEABLE);
648                req->setPaddr(PhysAddrPrefixIO | IOPort);
649            }
650            return NoFault;
651        } else {
652            panic("Access to unrecognized internal address space %#x.\n",
653                  prefix);
654        }
655    }
656
657    /**
658     * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
659     * and false on a TLB miss.
660     * Many of the checks about different modes have been converted to
661     * assertions, since these parts of the code are not really used.
662     * On a hit it will update the LRU stack.
663     */
664    bool
665    GpuTLB::tlbLookup(const RequestPtr &req,
666                      ThreadContext *tc, bool update_stats)
667    {
668        bool tlb_hit = false;
669    #ifndef NDEBUG
670        uint32_t flags = req->getFlags();
671        int seg = flags & SegmentFlagMask;
672    #endif
673
674        assert(seg != SEGMENT_REG_MS);
675        Addr vaddr = req->getVaddr();
676        DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
677        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
678
679        if (m5Reg.prot) {
680            DPRINTF(GPUTLB, "In protected mode.\n");
681            // make sure we are in 64-bit mode
682            assert(m5Reg.mode == LongMode);
683
684            // If paging is enabled, do the translation.
685            if (m5Reg.paging) {
686                DPRINTF(GPUTLB, "Paging enabled.\n");
687                //update LRU stack on a hit
688                TlbEntry *entry = lookup(vaddr, true);
689
690                if (entry)
691                    tlb_hit = true;
692
693                if (!update_stats) {
694                    // functional tlb access for memory initialization
695                    // i.e., memory seeding or instr. seeding -> don't update
696                    // TLB and stats
697                    return tlb_hit;
698                }
699
700                localNumTLBAccesses++;
701
702                if (!entry) {
703                    localNumTLBMisses++;
704                } else {
705                    localNumTLBHits++;
706                }
707            }
708        }
709
710        return tlb_hit;
711    }
712
713    Fault
714    GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
715                      Translation *translation, Mode mode,
716                      bool &delayedResponse, bool timing, int &latency)
717    {
718        uint32_t flags = req->getFlags();
719        int seg = flags & SegmentFlagMask;
720        bool storeCheck = flags & (StoreCheck << FlagShift);
721
722        // If this is true, we're dealing with a request
723        // to a non-memory address space.
724        if (seg == SEGMENT_REG_MS) {
725            return translateInt(req, tc);
726        }
727
728        delayedResponse = false;
729        Addr vaddr = req->getVaddr();
730        DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
731
732        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
733
734        // If protected mode has been enabled...
735        if (m5Reg.prot) {
736            DPRINTF(GPUTLB, "In protected mode.\n");
737            // If we're not in 64-bit mode, do protection/limit checks
738            if (m5Reg.mode != LongMode) {
739                DPRINTF(GPUTLB, "Not in long mode. Checking segment "
740                        "protection.\n");
741
742                // Check for a null segment selector.
743                if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
744                    seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
745                    && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
746                    return std::make_shared<GeneralProtection>(0);
747                }
748
749                bool expandDown = false;
750                SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
751
752                if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
753                    if (!attr.writable && (mode == BaseTLB::Write ||
754                        storeCheck))
755                        return std::make_shared<GeneralProtection>(0);
756
757                    if (!attr.readable && mode == BaseTLB::Read)
758                        return std::make_shared<GeneralProtection>(0);
759
760                    expandDown = attr.expandDown;
761
762                }
763
764                Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
765                Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
766                // This assumes we're not in 64 bit mode. If we were, the
767                // default address size is 64 bits, overridable to 32.
768                int size = 32;
769                bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
770                SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
771
772                if ((csAttr.defaultSize && sizeOverride) ||
773                    (!csAttr.defaultSize && !sizeOverride)) {
774                    size = 16;
775                }
776
777                Addr offset = bits(vaddr - base, size - 1, 0);
778                Addr endOffset = offset + req->getSize() - 1;
779
780                if (expandDown) {
781                    DPRINTF(GPUTLB, "Checking an expand down segment.\n");
782                    warn_once("Expand down segments are untested.\n");
783
784                    if (offset <= limit || endOffset <= limit)
785                        return std::make_shared<GeneralProtection>(0);
786                } else {
787                    if (offset > limit || endOffset > limit)
788                        return std::make_shared<GeneralProtection>(0);
789                }
790            }
791
792            // If paging is enabled, do the translation.
793            if (m5Reg.paging) {
794                DPRINTF(GPUTLB, "Paging enabled.\n");
795                // The vaddr already has the segment base applied.
796                TlbEntry *entry = lookup(vaddr);
797                localNumTLBAccesses++;
798
799                if (!entry) {
800                    localNumTLBMisses++;
801                    if (timing) {
802                        latency = missLatency1;
803                    }
804
805                    if (FullSystem) {
806                        fatal("GpuTLB doesn't support full-system mode\n");
807                    } else {
808                        DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
809                                "at pc %#x.\n", vaddr, tc->instAddr());
810
811                        Process *p = tc->getProcessPtr();
812                        const EmulationPageTable::Entry *pte =
813                            p->pTable->lookup(vaddr);
814
815                        if (!pte && mode != BaseTLB::Execute) {
816                            // penalize a "page fault" more
817                            if (timing)
818                                latency += missLatency2;
819
820                            if (p->fixupStackFault(vaddr))
821                                pte = p->pTable->lookup(vaddr);
822                        }
823
824                        if (!pte) {
825                            return std::make_shared<PageFault>(vaddr, true,
826                                                               mode, true,
827                                                               false);
828                        } else {
829                            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
830
831                            DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
832                                    alignedVaddr, pte->paddr);
833
834                            TlbEntry gpuEntry(p->pid(), alignedVaddr,
835                                              pte->paddr, false, false);
836                            entry = insert(alignedVaddr, gpuEntry);
837                        }
838
839                        DPRINTF(GPUTLB, "Miss was serviced.\n");
840                    }
841                } else {
842                    localNumTLBHits++;
843
844                    if (timing) {
845                        latency = hitLatency;
846                    }
847                }
848
849                // Do paging protection checks.
850                bool inUser = (m5Reg.cpl == 3 &&
851                               !(flags & (CPL0FlagBit << FlagShift)));
852
853                CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
854                bool badWrite = (!entry->writable && (inUser || cr0.wp));
855
856                if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
857                     badWrite)) {
858                    // The page must have been present to get into the TLB in
859                    // the first place. We'll assume the reserved bits are
860                    // fine even though we're not checking them.
861                    return std::make_shared<PageFault>(vaddr, true, mode,
862                                                       inUser, false);
863                }
864
865                if (storeCheck && badWrite) {
866                    // This would fault if this were a write, so return a page
867                    // fault that reflects that happening.
868                    return std::make_shared<PageFault>(vaddr, true,
869                                                       BaseTLB::Write,
870                                                       inUser, false);
871                }
872
873
874                DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
875                        "checks.\n", entry->paddr);
876
877                int page_size = entry->size();
878                Addr paddr = entry->paddr | (vaddr & (page_size - 1));
879                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
880                req->setPaddr(paddr);
881
882                if (entry->uncacheable)
883                    req->setFlags(Request::UNCACHEABLE);
884            } else {
885                //Use the address which already has segmentation applied.
886                DPRINTF(GPUTLB, "Paging disabled.\n");
887                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
888                req->setPaddr(vaddr);
889            }
890        } else {
891            // Real mode
892            DPRINTF(GPUTLB, "In real mode.\n");
893            DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
894            req->setPaddr(vaddr);
895        }
896
897        // Check for an access to the local APIC
898        if (FullSystem) {
899            LocalApicBase localApicBase =
900                tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
901
902            Addr baseAddr = localApicBase.base * PageBytes;
903            Addr paddr = req->getPaddr();
904
905            if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
906                // Force the access to be uncacheable.
907                req->setFlags(Request::UNCACHEABLE);
908                req->setPaddr(x86LocalAPICAddress(tc->contextId(),
909                                                  paddr - baseAddr));
910            }
911        }
912
913        return NoFault;
914    };
915
916    Fault
917    GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
918                            Mode mode, int &latency)
919    {
920        bool delayedResponse;
921
922        return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
923                                 latency);
924    }
925
926    void
927    GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
928            Translation *translation, Mode mode, int &latency)
929    {
930        bool delayedResponse;
931        assert(translation);
932
933        Fault fault = GpuTLB::translate(req, tc, translation, mode,
934                                        delayedResponse, true, latency);
935
936        if (!delayedResponse)
937            translation->finish(fault, req, tc, mode);
938    }
939
940    Walker*
941    GpuTLB::getWalker()
942    {
943        return walker;
944    }
945
946
947    void
948    GpuTLB::serialize(CheckpointOut &cp) const
949    {
950    }
951
952    void
953    GpuTLB::unserialize(CheckpointIn &cp)
954    {
955    }
956
957    void
958    GpuTLB::regStats()
959    {
960        MemObject::regStats();
961
962        localNumTLBAccesses
963            .name(name() + ".local_TLB_accesses")
964            .desc("Number of TLB accesses")
965            ;
966
967        localNumTLBHits
968            .name(name() + ".local_TLB_hits")
969            .desc("Number of TLB hits")
970            ;
971
972        localNumTLBMisses
973            .name(name() + ".local_TLB_misses")
974            .desc("Number of TLB misses")
975            ;
976
977        localTLBMissRate
978            .name(name() + ".local_TLB_miss_rate")
979            .desc("TLB miss rate")
980            ;
981
982        accessCycles
983            .name(name() + ".access_cycles")
984            .desc("Cycles spent accessing this TLB level")
985            ;
986
987        pageTableCycles
988            .name(name() + ".page_table_cycles")
989            .desc("Cycles spent accessing the page table")
990            ;
991
992        localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
993
994        numUniquePages
995            .name(name() + ".unique_pages")
996            .desc("Number of unique pages touched")
997            ;
998
999        localCycles
1000            .name(name() + ".local_cycles")
1001            .desc("Number of cycles spent in queue for all incoming reqs")
1002            ;
1003
1004        localLatency
1005            .name(name() + ".local_latency")
1006            .desc("Avg. latency over incoming coalesced reqs")
1007            ;
1008
1009        localLatency = localCycles / localNumTLBAccesses;
1010
1011        globalNumTLBAccesses
1012            .name(name() + ".global_TLB_accesses")
1013            .desc("Number of TLB accesses")
1014            ;
1015
1016        globalNumTLBHits
1017            .name(name() + ".global_TLB_hits")
1018            .desc("Number of TLB hits")
1019            ;
1020
1021        globalNumTLBMisses
1022            .name(name() + ".global_TLB_misses")
1023            .desc("Number of TLB misses")
1024            ;
1025
1026        globalTLBMissRate
1027            .name(name() + ".global_TLB_miss_rate")
1028            .desc("TLB miss rate")
1029            ;
1030
1031        globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1032
1033        avgReuseDistance
1034            .name(name() + ".avg_reuse_distance")
1035            .desc("avg. reuse distance over all pages (in ticks)")
1036            ;
1037
1038    }
1039
1040    /**
1041     * Do the TLB lookup for this coalesced request and schedule
1042     * another event <TLB access latency> cycles later.
1043     */
1044
1045    void
1046    GpuTLB::issueTLBLookup(PacketPtr pkt)
1047    {
1048        assert(pkt);
1049        assert(pkt->senderState);
1050
1051        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1052                                        TheISA::PageBytes);
1053
1054        TranslationState *sender_state =
1055                safe_cast<TranslationState*>(pkt->senderState);
1056
1057        bool update_stats = !sender_state->prefetch;
1058        ThreadContext * tmp_tc = sender_state->tc;
1059
1060        DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1061                virt_page_addr);
1062
1063        int req_cnt = sender_state->reqCnt.back();
1064
1065        if (update_stats) {
1066            accessCycles -= (curTick() * req_cnt);
1067            localCycles -= curTick();
1068            updatePageFootprint(virt_page_addr);
1069            globalNumTLBAccesses += req_cnt;
1070        }
1071
1072        tlbOutcome lookup_outcome = TLB_MISS;
1073        const RequestPtr &tmp_req = pkt->req;
1074
1075        // Access the TLB and figure out if it's a hit or a miss.
1076        bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1077
1078        if (success) {
1079            lookup_outcome = TLB_HIT;
1080            // Put the entry in SenderState
1081            TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1082            assert(entry);
1083
1084            auto p = sender_state->tc->getProcessPtr();
1085            sender_state->tlbEntry =
1086                new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1087                             false, false);
1088
1089            if (update_stats) {
1090                // the reqCnt has an entry per level, so its size tells us
1091                // which level we are in
1092                sender_state->hitLevel = sender_state->reqCnt.size();
1093                globalNumTLBHits += req_cnt;
1094            }
1095        } else {
1096            if (update_stats)
1097                globalNumTLBMisses += req_cnt;
1098        }
1099
1100        /*
1101         * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1102         * as the TLB access latency.
1103         *
1104         * We create and schedule a new TLBEvent which will help us take the
1105         * appropriate actions (e.g., update TLB on a hit, send request to lower
1106         * level TLB on a miss, or start a page walk if this was the last-level
1107         * TLB)
1108         */
1109        TLBEvent *tlb_event =
1110            new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1111
1112        if (translationReturnEvent.count(virt_page_addr)) {
1113            panic("Virtual Page Address %#x already has a return event\n",
1114                  virt_page_addr);
1115        }
1116
1117        translationReturnEvent[virt_page_addr] = tlb_event;
1118        assert(tlb_event);
1119
1120        DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1121                curTick() + this->ticks(hitLatency));
1122
1123        schedule(tlb_event, curTick() + this->ticks(hitLatency));
1124    }
1125
1126    GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1127                               PacketPtr _pkt)
1128        : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1129        outcome(tlb_outcome), pkt(_pkt)
1130    {
1131    }
1132
1133    /**
1134     * Do Paging protection checks. If we encounter a page fault, then
1135     * an assertion is fired.
1136     */
1137    void
1138    GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1139            TlbEntry * tlb_entry, Mode mode)
1140    {
1141        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1142        uint32_t flags = pkt->req->getFlags();
1143        bool storeCheck = flags & (StoreCheck << FlagShift);
1144
1145        // Do paging protection checks.
1146        bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1147        CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1148
1149        bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1150
1151        if ((inUser && !tlb_entry->user) ||
1152            (mode == BaseTLB::Write && badWrite)) {
1153           // The page must have been present to get into the TLB in
1154           // the first place. We'll assume the reserved bits are
1155           // fine even though we're not checking them.
1156           assert(false);
1157        }
1158
1159        if (storeCheck && badWrite) {
1160           // This would fault if this were a write, so return a page
1161           // fault that reflects that happening.
1162           assert(false);
1163        }
1164    }
1165
1166    /**
1167     * handleTranslationReturn is called on a TLB hit,
1168     * when a TLB miss returns or when a page fault returns.
1169     * The latter calls handelHit with TLB miss as tlbOutcome.
1170     */
1171    void
1172    GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1173            PacketPtr pkt)
1174    {
1175
1176        assert(pkt);
1177        Addr vaddr = pkt->req->getVaddr();
1178
1179        TranslationState *sender_state =
1180            safe_cast<TranslationState*>(pkt->senderState);
1181
1182        ThreadContext *tc = sender_state->tc;
1183        Mode mode = sender_state->tlbMode;
1184
1185        TlbEntry *local_entry, *new_entry;
1186
1187        if (tlb_outcome == TLB_HIT) {
1188            DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1189            local_entry = sender_state->tlbEntry;
1190        } else {
1191            DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1192                    vaddr);
1193
1194            // We are returning either from a page walk or from a hit at a lower
1195            // TLB level. The senderState should be "carrying" a pointer to the
1196            // correct TLBEntry.
1197            new_entry = sender_state->tlbEntry;
1198            assert(new_entry);
1199            local_entry = new_entry;
1200
1201            if (allocationPolicy) {
1202                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1203                        virt_page_addr);
1204
1205                local_entry = insert(virt_page_addr, *new_entry);
1206            }
1207
1208            assert(local_entry);
1209        }
1210
1211        /**
1212         * At this point the packet carries an up-to-date tlbEntry pointer
1213         * in its senderState.
1214         * Next step is to do the paging protection checks.
1215         */
1216        DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
1217                "while paddr was %#x.\n", local_entry->vaddr,
1218                local_entry->paddr);
1219
1220        pagingProtectionChecks(tc, pkt, local_entry, mode);
1221        int page_size = local_entry->size();
1222        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1223        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1224
1225        // Since this packet will be sent through the cpu side slave port,
1226        // it must be converted to a response pkt if it is not one already
1227        if (pkt->isRequest()) {
1228            pkt->makeTimingResponse();
1229        }
1230
1231        pkt->req->setPaddr(paddr);
1232
1233        if (local_entry->uncacheable) {
1234             pkt->req->setFlags(Request::UNCACHEABLE);
1235        }
1236
1237        //send packet back to coalescer
1238        cpuSidePort[0]->sendTimingResp(pkt);
1239        //schedule cleanup event
1240        cleanupQueue.push(virt_page_addr);
1241
1242        // schedule this only once per cycle.
1243        // The check is required because we might have multiple translations
1244        // returning the same cycle
1245        // this is a maximum priority event and must be on the same cycle
1246        // as the cleanup event in TLBCoalescer to avoid a race with
1247        // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1248        if (!cleanupEvent.scheduled())
1249            schedule(cleanupEvent, curTick());
1250    }
1251
1252    /**
1253     * Here we take the appropriate actions based on the result of the
1254     * TLB lookup.
1255     */
1256    void
1257    GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1258                              PacketPtr pkt)
1259    {
1260        DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1261
1262        assert(translationReturnEvent[virtPageAddr]);
1263        assert(pkt);
1264
1265        TranslationState *tmp_sender_state =
1266            safe_cast<TranslationState*>(pkt->senderState);
1267
1268        int req_cnt = tmp_sender_state->reqCnt.back();
1269        bool update_stats = !tmp_sender_state->prefetch;
1270
1271
1272        if (outcome == TLB_HIT) {
1273            handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1274
1275            if (update_stats) {
1276                accessCycles += (req_cnt * curTick());
1277                localCycles += curTick();
1278            }
1279
1280        } else if (outcome == TLB_MISS) {
1281
1282            DPRINTF(GPUTLB, "This is a TLB miss\n");
1283            if (update_stats) {
1284                accessCycles += (req_cnt*curTick());
1285                localCycles += curTick();
1286            }
1287
1288            if (hasMemSidePort) {
1289                // the one cyle added here represent the delay from when we get
1290                // the reply back till when we propagate it to the coalescer
1291                // above.
1292                if (update_stats) {
1293                    accessCycles += (req_cnt * 1);
1294                    localCycles += 1;
1295                }
1296
1297                /**
1298                 * There is a TLB below. Send the coalesced request.
1299                 * We actually send the very first packet of all the
1300                 * pending packets for this virtual page address.
1301                 */
1302                if (!memSidePort[0]->sendTimingReq(pkt)) {
1303                    DPRINTF(GPUTLB, "Failed sending translation request to "
1304                            "lower level TLB for addr %#x\n", virtPageAddr);
1305
1306                    memSidePort[0]->retries.push_back(pkt);
1307                } else {
1308                    DPRINTF(GPUTLB, "Sent translation request to lower level "
1309                            "TLB for addr %#x\n", virtPageAddr);
1310                }
1311            } else {
1312                //this is the last level TLB. Start a page walk
1313                DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1314                        "addr %#x\n", virtPageAddr);
1315
1316                if (update_stats)
1317                    pageTableCycles -= (req_cnt*curTick());
1318
1319                TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1320                assert(tlb_event);
1321                tlb_event->updateOutcome(PAGE_WALK);
1322                schedule(tlb_event, curTick() + ticks(missLatency2));
1323            }
1324        } else if (outcome == PAGE_WALK) {
1325            if (update_stats)
1326                pageTableCycles += (req_cnt*curTick());
1327
1328            // Need to access the page table and update the TLB
1329            DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1330                    virtPageAddr);
1331
1332            TranslationState *sender_state =
1333                safe_cast<TranslationState*>(pkt->senderState);
1334
1335            Process *p = sender_state->tc->getProcessPtr();
1336            Addr vaddr = pkt->req->getVaddr();
1337    #ifndef NDEBUG
1338            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1339            assert(alignedVaddr == virtPageAddr);
1340    #endif
1341            const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1342            if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1343                    p->fixupStackFault(vaddr)) {
1344                pte = p->pTable->lookup(vaddr);
1345            }
1346
1347            if (pte) {
1348                DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1349                        pte->paddr);
1350
1351                sender_state->tlbEntry =
1352                    new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1353                                 false);
1354            } else {
1355                sender_state->tlbEntry = nullptr;
1356            }
1357
1358            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1359        } else if (outcome == MISS_RETURN) {
1360            /** we add an extra cycle in the return path of the translation
1361             * requests in between the various TLB levels.
1362             */
1363            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1364        } else {
1365            assert(false);
1366        }
1367    }
1368
1369    void
1370    GpuTLB::TLBEvent::process()
1371    {
1372        tlb->translationReturn(virtPageAddr, outcome, pkt);
1373    }
1374
1375    const char*
1376    GpuTLB::TLBEvent::description() const
1377    {
1378        return "trigger translationDoneEvent";
1379    }
1380
1381    void
1382    GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1383    {
1384        outcome = _outcome;
1385    }
1386
1387    Addr
1388    GpuTLB::TLBEvent::getTLBEventVaddr()
1389    {
1390        return virtPageAddr;
1391    }
1392
1393    /*
1394     * recvTiming receives a coalesced timing request from a TLBCoalescer
1395     * and it calls issueTLBLookup()
1396     * It only rejects the packet if we have exceeded the max
1397     * outstanding number of requests for the TLB
1398     */
1399    bool
1400    GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1401    {
1402        if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1403            tlb->issueTLBLookup(pkt);
1404            // update number of outstanding translation requests
1405            tlb->outstandingReqs++;
1406            return true;
1407         } else {
1408            DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1409                    tlb->outstandingReqs);
1410            return false;
1411         }
1412    }
1413
1414    /**
1415     * handleFuncTranslationReturn is called on a TLB hit,
1416     * when a TLB miss returns or when a page fault returns.
1417     * It updates LRU, inserts the TLB entry on a miss
1418     * depending on the allocation policy and does the required
1419     * protection checks. It does NOT create a new packet to
1420     * update the packet's addr; this is done in hsail-gpu code.
1421     */
1422    void
1423    GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1424    {
1425        TranslationState *sender_state =
1426            safe_cast<TranslationState*>(pkt->senderState);
1427
1428        ThreadContext *tc = sender_state->tc;
1429        Mode mode = sender_state->tlbMode;
1430        Addr vaddr = pkt->req->getVaddr();
1431
1432        TlbEntry *local_entry, *new_entry;
1433
1434        if (tlb_outcome == TLB_HIT) {
1435            DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1436                    "%#x\n", vaddr);
1437
1438            local_entry = sender_state->tlbEntry;
1439        } else {
1440            DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1441                    "%#x\n", vaddr);
1442
1443            // We are returning either from a page walk or from a hit at a lower
1444            // TLB level. The senderState should be "carrying" a pointer to the
1445            // correct TLBEntry.
1446            new_entry = sender_state->tlbEntry;
1447            assert(new_entry);
1448            local_entry = new_entry;
1449
1450            if (allocationPolicy) {
1451                Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1452
1453                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1454                        virt_page_addr);
1455
1456                local_entry = insert(virt_page_addr, *new_entry);
1457            }
1458
1459            assert(local_entry);
1460        }
1461
1462        DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1463                "while paddr was %#x.\n", local_entry->vaddr,
1464                local_entry->paddr);
1465
1466        /**
1467         * Do paging checks if it's a normal functional access.  If it's for a
1468         * prefetch, then sometimes you can try to prefetch something that
1469         * won't pass protection. We don't actually want to fault becuase there
1470         * is no demand access to deem this a violation.  Just put it in the
1471         * TLB and it will fault if indeed a future demand access touches it in
1472         * violation.
1473         *
1474         * This feature could be used to explore security issues around
1475         * speculative memory accesses.
1476         */
1477        if (!sender_state->prefetch && sender_state->tlbEntry)
1478            pagingProtectionChecks(tc, pkt, local_entry, mode);
1479
1480        int page_size = local_entry->size();
1481        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1482        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1483
1484        pkt->req->setPaddr(paddr);
1485
1486        if (local_entry->uncacheable)
1487             pkt->req->setFlags(Request::UNCACHEABLE);
1488    }
1489
1490    // This is used for atomic translations. Need to
1491    // make it all happen during the same cycle.
1492    void
1493    GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1494    {
1495        TranslationState *sender_state =
1496            safe_cast<TranslationState*>(pkt->senderState);
1497
1498        ThreadContext *tc = sender_state->tc;
1499        bool update_stats = !sender_state->prefetch;
1500
1501        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1502                                        TheISA::PageBytes);
1503
1504        if (update_stats)
1505            tlb->updatePageFootprint(virt_page_addr);
1506
1507        // do the TLB lookup without updating the stats
1508        bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1509        tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1510
1511        // functional mode means no coalescing
1512        // global metrics are the same as the local metrics
1513        if (update_stats) {
1514            tlb->globalNumTLBAccesses++;
1515
1516            if (success) {
1517                sender_state->hitLevel = sender_state->reqCnt.size();
1518                tlb->globalNumTLBHits++;
1519            }
1520        }
1521
1522        if (!success) {
1523            if (update_stats)
1524                tlb->globalNumTLBMisses++;
1525            if (tlb->hasMemSidePort) {
1526                // there is a TLB below -> propagate down the TLB hierarchy
1527                tlb->memSidePort[0]->sendFunctional(pkt);
1528                // If no valid translation from a prefetch, then just return
1529                if (sender_state->prefetch && !pkt->req->hasPaddr())
1530                    return;
1531            } else {
1532                // Need to access the page table and update the TLB
1533                DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1534                        virt_page_addr);
1535
1536                Process *p = tc->getProcessPtr();
1537
1538                Addr vaddr = pkt->req->getVaddr();
1539    #ifndef NDEBUG
1540                Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1541                assert(alignedVaddr == virt_page_addr);
1542    #endif
1543
1544                const EmulationPageTable::Entry *pte =
1545                        p->pTable->lookup(vaddr);
1546                if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1547                        p->fixupStackFault(vaddr)) {
1548                    pte = p->pTable->lookup(vaddr);
1549                }
1550
1551                if (!sender_state->prefetch) {
1552                    // no PageFaults are permitted after
1553                    // the second page table lookup
1554                    assert(pte);
1555
1556                    DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1557                            pte->paddr);
1558
1559                    sender_state->tlbEntry =
1560                        new TlbEntry(p->pid(), virt_page_addr,
1561                                     pte->paddr, false, false);
1562                } else {
1563                    // If this was a prefetch, then do the normal thing if it
1564                    // was a successful translation.  Otherwise, send an empty
1565                    // TLB entry back so that it can be figured out as empty and
1566                    // handled accordingly.
1567                    if (pte) {
1568                        DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1569                                pte->paddr);
1570
1571                        sender_state->tlbEntry =
1572                            new TlbEntry(p->pid(), virt_page_addr,
1573                                         pte->paddr, false, false);
1574                    } else {
1575                        DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1576                                alignedVaddr);
1577
1578                        sender_state->tlbEntry = nullptr;
1579
1580                        return;
1581                    }
1582                }
1583            }
1584        } else {
1585            DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1586                    tlb->lookup(pkt->req->getVaddr()));
1587
1588            TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1589                                             update_stats);
1590
1591            assert(entry);
1592
1593            auto p = sender_state->tc->getProcessPtr();
1594            sender_state->tlbEntry =
1595                new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1596                             false, false);
1597        }
1598        // This is the function that would populate pkt->req with the paddr of
1599        // the translation. But if no translation happens (i.e Prefetch fails)
1600        // then the early returns in the above code wiill keep this function
1601        // from executing.
1602        tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1603    }
1604
1605    void
1606    GpuTLB::CpuSidePort::recvReqRetry()
1607    {
1608        // The CPUSidePort never sends anything but replies. No retries
1609        // expected.
1610        assert(false);
1611    }
1612
1613    AddrRangeList
1614    GpuTLB::CpuSidePort::getAddrRanges() const
1615    {
1616        // currently not checked by the master
1617        AddrRangeList ranges;
1618
1619        return ranges;
1620    }
1621
1622    /**
1623     * MemSidePort receives the packet back.
1624     * We need to call the handleTranslationReturn
1625     * and propagate up the hierarchy.
1626     */
1627    bool
1628    GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1629    {
1630        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1631                                        TheISA::PageBytes);
1632
1633        DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1634                virt_page_addr);
1635
1636        TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1637        assert(tlb_event);
1638        assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1639
1640        tlb_event->updateOutcome(MISS_RETURN);
1641        tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1642
1643        return true;
1644    }
1645
1646    void
1647    GpuTLB::MemSidePort::recvReqRetry()
1648    {
1649        // No retries should reach the TLB. The retries
1650        // should only reach the TLBCoalescer.
1651        assert(false);
1652    }
1653
1654    void
1655    GpuTLB::cleanup()
1656    {
1657        while (!cleanupQueue.empty()) {
1658            Addr cleanup_addr = cleanupQueue.front();
1659            cleanupQueue.pop();
1660
1661            // delete TLBEvent
1662            TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1663            delete old_tlb_event;
1664            translationReturnEvent.erase(cleanup_addr);
1665
1666            // update number of outstanding requests
1667            outstandingReqs--;
1668        }
1669
1670        /** the higher level coalescer should retry if it has
1671         * any pending requests.
1672         */
1673        for (int i = 0; i < cpuSidePort.size(); ++i) {
1674            cpuSidePort[i]->sendRetryReq();
1675        }
1676    }
1677
1678    void
1679    GpuTLB::updatePageFootprint(Addr virt_page_addr)
1680    {
1681
1682        std::pair<AccessPatternTable::iterator, bool> ret;
1683
1684        AccessInfo tmp_access_info;
1685        tmp_access_info.lastTimeAccessed = 0;
1686        tmp_access_info.accessesPerPage = 0;
1687        tmp_access_info.totalReuseDistance = 0;
1688        tmp_access_info.sumDistance = 0;
1689        tmp_access_info.meanDistance = 0;
1690
1691        ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1692                                  tmp_access_info));
1693
1694        bool first_page_access = ret.second;
1695
1696        if (first_page_access) {
1697            numUniquePages++;
1698        } else  {
1699            int accessed_before;
1700            accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1701            ret.first->second.totalReuseDistance += accessed_before;
1702        }
1703
1704        ret.first->second.accessesPerPage++;
1705        ret.first->second.lastTimeAccessed = curTick();
1706
1707        if (accessDistance) {
1708            ret.first->second.localTLBAccesses
1709                .push_back(localNumTLBAccesses.value());
1710        }
1711    }
1712
1713    void
1714    GpuTLB::exitCallback()
1715    {
1716        std::ostream *page_stat_file = nullptr;
1717
1718        if (accessDistance) {
1719
1720            // print per page statistics to a separate file (.csv format)
1721            // simout is the gem5 output directory (default is m5out or the one
1722            // specified with -d
1723            page_stat_file = simout.create(name().c_str())->stream();
1724
1725            // print header
1726            *page_stat_file << "page,max_access_distance,mean_access_distance, "
1727                            << "stddev_distance" << std::endl;
1728        }
1729
1730        // update avg. reuse distance footprint
1731        AccessPatternTable::iterator iter, iter_begin, iter_end;
1732        unsigned int sum_avg_reuse_distance_per_page = 0;
1733
1734        // iterate through all pages seen by this TLB
1735        for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1736            sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1737                                               iter->second.accessesPerPage;
1738
1739            if (accessDistance) {
1740                unsigned int tmp = iter->second.localTLBAccesses[0];
1741                unsigned int prev = tmp;
1742
1743                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1744                    if (i) {
1745                        tmp = prev + 1;
1746                    }
1747
1748                    prev = iter->second.localTLBAccesses[i];
1749                    // update the localTLBAccesses value
1750                    // with the actual differece
1751                    iter->second.localTLBAccesses[i] -= tmp;
1752                    // compute the sum of AccessDistance per page
1753                    // used later for mean
1754                    iter->second.sumDistance +=
1755                        iter->second.localTLBAccesses[i];
1756                }
1757
1758                iter->second.meanDistance =
1759                    iter->second.sumDistance / iter->second.accessesPerPage;
1760
1761                // compute std_dev and max  (we need a second round because we
1762                // need to know the mean value
1763                unsigned int max_distance = 0;
1764                unsigned int stddev_distance = 0;
1765
1766                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1767                    unsigned int tmp_access_distance =
1768                        iter->second.localTLBAccesses[i];
1769
1770                    if (tmp_access_distance > max_distance) {
1771                        max_distance = tmp_access_distance;
1772                    }
1773
1774                    unsigned int diff =
1775                        tmp_access_distance - iter->second.meanDistance;
1776                    stddev_distance += pow(diff, 2);
1777
1778                }
1779
1780                stddev_distance =
1781                    sqrt(stddev_distance/iter->second.accessesPerPage);
1782
1783                if (page_stat_file) {
1784                    *page_stat_file << std::hex << iter->first << ",";
1785                    *page_stat_file << std::dec << max_distance << ",";
1786                    *page_stat_file << std::dec << iter->second.meanDistance
1787                                    << ",";
1788                    *page_stat_file << std::dec << stddev_distance;
1789                    *page_stat_file << std::endl;
1790                }
1791
1792                // erase the localTLBAccesses array
1793                iter->second.localTLBAccesses.clear();
1794            }
1795        }
1796
1797        if (!TLBFootprint.empty()) {
1798            avgReuseDistance =
1799                sum_avg_reuse_distance_per_page / TLBFootprint.size();
1800        }
1801
1802        //clear the TLBFootprint map
1803        TLBFootprint.clear();
1804    }
1805} // namespace X86ISA
1806
1807X86ISA::GpuTLB*
1808X86GPUTLBParams::create()
1809{
1810    return new X86ISA::GpuTLB(this);
1811}
1812
1813