gpu_tlb.cc revision 13557:fc33e6048b25
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/logging.hh"
49#include "base/output.hh"
50#include "base/trace.hh"
51#include "cpu/base.hh"
52#include "cpu/thread_context.hh"
53#include "debug/GPUPrefetch.hh"
54#include "debug/GPUTLB.hh"
55#include "mem/packet_access.hh"
56#include "mem/page_table.hh"
57#include "mem/request.hh"
58#include "sim/process.hh"
59
60namespace X86ISA
61{
62
63    GpuTLB::GpuTLB(const Params *p)
64        : MemObject(p), configAddress(0), size(p->size),
65          cleanupEvent([this]{ cleanup(); }, name(), false,
66                       Event::Maximum_Pri),
67          exitEvent([this]{ exitCallback(); }, name())
68    {
69        assoc = p->assoc;
70        assert(assoc <= size);
71        numSets = size/assoc;
72        allocationPolicy = p->allocationPolicy;
73        hasMemSidePort = false;
74        accessDistance = p->accessDistance;
75        clock = p->clk_domain->clockPeriod();
76
77        tlb.assign(size, TlbEntry());
78
79        freeList.resize(numSets);
80        entryList.resize(numSets);
81
82        for (int set = 0; set < numSets; ++set) {
83            for (int way = 0; way < assoc; ++way) {
84                int x = set * assoc + way;
85                freeList[set].push_back(&tlb.at(x));
86            }
87        }
88
89        FA = (size == assoc);
90
91        /**
92         * @warning: the set-associative version assumes you have a
93         * fixed page size of 4KB.
94         * If the page size is greather than 4KB (as defined in the
95         * TheISA::PageBytes), then there are various issues w/ the current
96         * implementation (you'd have the same 8KB page being replicated in
97         * different sets etc)
98         */
99        setMask = numSets - 1;
100
101    #if 0
102        // GpuTLB doesn't yet support full system
103        walker = p->walker;
104        walker->setTLB(this);
105    #endif
106
107        maxCoalescedReqs = p->maxOutstandingReqs;
108
109        // Do not allow maxCoalescedReqs to be more than the TLB associativity
110        if (maxCoalescedReqs > assoc) {
111            maxCoalescedReqs = assoc;
112            cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
113        }
114
115        outstandingReqs = 0;
116        hitLatency = p->hitLatency;
117        missLatency1 = p->missLatency1;
118        missLatency2 = p->missLatency2;
119
120        // create the slave ports based on the number of connected ports
121        for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
122            cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
123                                  name(), i), this, i));
124        }
125
126        // create the master ports based on the number of connected ports
127        for (size_t i = 0; i < p->port_master_connection_count; ++i) {
128            memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
129                                  name(), i), this, i));
130        }
131    }
132
133    // fixme: this is never called?
134    GpuTLB::~GpuTLB()
135    {
136        // make sure all the hash-maps are empty
137        assert(translationReturnEvent.empty());
138    }
139
140    BaseSlavePort&
141    GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
142    {
143        if (if_name == "slave") {
144            if (idx >= static_cast<PortID>(cpuSidePort.size())) {
145                panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
146            }
147
148            return *cpuSidePort[idx];
149        } else {
150            panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
151        }
152    }
153
154    BaseMasterPort&
155    GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
156    {
157        if (if_name == "master") {
158            if (idx >= static_cast<PortID>(memSidePort.size())) {
159                panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
160            }
161
162            hasMemSidePort = true;
163
164            return *memSidePort[idx];
165        } else {
166            panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
167        }
168    }
169
170    TlbEntry*
171    GpuTLB::insert(Addr vpn, TlbEntry &entry)
172    {
173        TlbEntry *newEntry = nullptr;
174
175        /**
176         * vpn holds the virtual page address
177         * The least significant bits are simply masked
178         */
179        int set = (vpn >> TheISA::PageShift) & setMask;
180
181        if (!freeList[set].empty()) {
182            newEntry = freeList[set].front();
183            freeList[set].pop_front();
184        } else {
185            newEntry = entryList[set].back();
186            entryList[set].pop_back();
187        }
188
189        *newEntry = entry;
190        newEntry->vaddr = vpn;
191        entryList[set].push_front(newEntry);
192
193        return newEntry;
194    }
195
196    GpuTLB::EntryList::iterator
197    GpuTLB::lookupIt(Addr va, bool update_lru)
198    {
199        int set = (va >> TheISA::PageShift) & setMask;
200
201        if (FA) {
202            assert(!set);
203        }
204
205        auto entry = entryList[set].begin();
206        for (; entry != entryList[set].end(); ++entry) {
207            int page_size = (*entry)->size();
208
209            if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
210                DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
211                        "with size %#x.\n", va, (*entry)->vaddr, page_size);
212
213                if (update_lru) {
214                    entryList[set].push_front(*entry);
215                    entryList[set].erase(entry);
216                    entry = entryList[set].begin();
217                }
218
219                break;
220            }
221        }
222
223        return entry;
224    }
225
226    TlbEntry*
227    GpuTLB::lookup(Addr va, bool update_lru)
228    {
229        int set = (va >> TheISA::PageShift) & setMask;
230
231        auto entry = lookupIt(va, update_lru);
232
233        if (entry == entryList[set].end())
234            return nullptr;
235        else
236            return *entry;
237    }
238
239    void
240    GpuTLB::invalidateAll()
241    {
242        DPRINTF(GPUTLB, "Invalidating all entries.\n");
243
244        for (int i = 0; i < numSets; ++i) {
245            while (!entryList[i].empty()) {
246                TlbEntry *entry = entryList[i].front();
247                entryList[i].pop_front();
248                freeList[i].push_back(entry);
249            }
250        }
251    }
252
253    void
254    GpuTLB::setConfigAddress(uint32_t addr)
255    {
256        configAddress = addr;
257    }
258
259    void
260    GpuTLB::invalidateNonGlobal()
261    {
262        DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
263
264        for (int i = 0; i < numSets; ++i) {
265            for (auto entryIt = entryList[i].begin();
266                 entryIt != entryList[i].end();) {
267                if (!(*entryIt)->global) {
268                    freeList[i].push_back(*entryIt);
269                    entryList[i].erase(entryIt++);
270                } else {
271                    ++entryIt;
272                }
273            }
274        }
275    }
276
277    void
278    GpuTLB::demapPage(Addr va, uint64_t asn)
279    {
280
281        int set = (va >> TheISA::PageShift) & setMask;
282        auto entry = lookupIt(va, false);
283
284        if (entry != entryList[set].end()) {
285            freeList[set].push_back(*entry);
286            entryList[set].erase(entry);
287        }
288    }
289
290    Fault
291    GpuTLB::translateInt(const RequestPtr &req, ThreadContext *tc)
292    {
293        DPRINTF(GPUTLB, "Addresses references internal memory.\n");
294        Addr vaddr = req->getVaddr();
295        Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
296
297        if (prefix == IntAddrPrefixCPUID) {
298            panic("CPUID memory space not yet implemented!\n");
299        } else if (prefix == IntAddrPrefixMSR) {
300            vaddr = vaddr >> 3;
301            req->setFlags(Request::MMAPPED_IPR);
302            Addr regNum = 0;
303
304            switch (vaddr & ~IntAddrPrefixMask) {
305              case 0x10:
306                regNum = MISCREG_TSC;
307                break;
308              case 0x1B:
309                regNum = MISCREG_APIC_BASE;
310                break;
311              case 0xFE:
312                regNum = MISCREG_MTRRCAP;
313                break;
314              case 0x174:
315                regNum = MISCREG_SYSENTER_CS;
316                break;
317              case 0x175:
318                regNum = MISCREG_SYSENTER_ESP;
319                break;
320              case 0x176:
321                regNum = MISCREG_SYSENTER_EIP;
322                break;
323              case 0x179:
324                regNum = MISCREG_MCG_CAP;
325                break;
326              case 0x17A:
327                regNum = MISCREG_MCG_STATUS;
328                break;
329              case 0x17B:
330                regNum = MISCREG_MCG_CTL;
331                break;
332              case 0x1D9:
333                regNum = MISCREG_DEBUG_CTL_MSR;
334                break;
335              case 0x1DB:
336                regNum = MISCREG_LAST_BRANCH_FROM_IP;
337                break;
338              case 0x1DC:
339                regNum = MISCREG_LAST_BRANCH_TO_IP;
340                break;
341              case 0x1DD:
342                regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
343                break;
344              case 0x1DE:
345                regNum = MISCREG_LAST_EXCEPTION_TO_IP;
346                break;
347              case 0x200:
348                regNum = MISCREG_MTRR_PHYS_BASE_0;
349                break;
350              case 0x201:
351                regNum = MISCREG_MTRR_PHYS_MASK_0;
352                break;
353              case 0x202:
354                regNum = MISCREG_MTRR_PHYS_BASE_1;
355                break;
356              case 0x203:
357                regNum = MISCREG_MTRR_PHYS_MASK_1;
358                break;
359              case 0x204:
360                regNum = MISCREG_MTRR_PHYS_BASE_2;
361                break;
362              case 0x205:
363                regNum = MISCREG_MTRR_PHYS_MASK_2;
364                break;
365              case 0x206:
366                regNum = MISCREG_MTRR_PHYS_BASE_3;
367                break;
368              case 0x207:
369                regNum = MISCREG_MTRR_PHYS_MASK_3;
370                break;
371              case 0x208:
372                regNum = MISCREG_MTRR_PHYS_BASE_4;
373                break;
374              case 0x209:
375                regNum = MISCREG_MTRR_PHYS_MASK_4;
376                break;
377              case 0x20A:
378                regNum = MISCREG_MTRR_PHYS_BASE_5;
379                break;
380              case 0x20B:
381                regNum = MISCREG_MTRR_PHYS_MASK_5;
382                break;
383              case 0x20C:
384                regNum = MISCREG_MTRR_PHYS_BASE_6;
385                break;
386              case 0x20D:
387                regNum = MISCREG_MTRR_PHYS_MASK_6;
388                break;
389              case 0x20E:
390                regNum = MISCREG_MTRR_PHYS_BASE_7;
391                break;
392              case 0x20F:
393                regNum = MISCREG_MTRR_PHYS_MASK_7;
394                break;
395              case 0x250:
396                regNum = MISCREG_MTRR_FIX_64K_00000;
397                break;
398              case 0x258:
399                regNum = MISCREG_MTRR_FIX_16K_80000;
400                break;
401              case 0x259:
402                regNum = MISCREG_MTRR_FIX_16K_A0000;
403                break;
404              case 0x268:
405                regNum = MISCREG_MTRR_FIX_4K_C0000;
406                break;
407              case 0x269:
408                regNum = MISCREG_MTRR_FIX_4K_C8000;
409                break;
410              case 0x26A:
411                regNum = MISCREG_MTRR_FIX_4K_D0000;
412                break;
413              case 0x26B:
414                regNum = MISCREG_MTRR_FIX_4K_D8000;
415                break;
416              case 0x26C:
417                regNum = MISCREG_MTRR_FIX_4K_E0000;
418                break;
419              case 0x26D:
420                regNum = MISCREG_MTRR_FIX_4K_E8000;
421                break;
422              case 0x26E:
423                regNum = MISCREG_MTRR_FIX_4K_F0000;
424                break;
425              case 0x26F:
426                regNum = MISCREG_MTRR_FIX_4K_F8000;
427                break;
428              case 0x277:
429                regNum = MISCREG_PAT;
430                break;
431              case 0x2FF:
432                regNum = MISCREG_DEF_TYPE;
433                break;
434              case 0x400:
435                regNum = MISCREG_MC0_CTL;
436                break;
437              case 0x404:
438                regNum = MISCREG_MC1_CTL;
439                break;
440              case 0x408:
441                regNum = MISCREG_MC2_CTL;
442                break;
443              case 0x40C:
444                regNum = MISCREG_MC3_CTL;
445                break;
446              case 0x410:
447                regNum = MISCREG_MC4_CTL;
448                break;
449              case 0x414:
450                regNum = MISCREG_MC5_CTL;
451                break;
452              case 0x418:
453                regNum = MISCREG_MC6_CTL;
454                break;
455              case 0x41C:
456                regNum = MISCREG_MC7_CTL;
457                break;
458              case 0x401:
459                regNum = MISCREG_MC0_STATUS;
460                break;
461              case 0x405:
462                regNum = MISCREG_MC1_STATUS;
463                break;
464              case 0x409:
465                regNum = MISCREG_MC2_STATUS;
466                break;
467              case 0x40D:
468                regNum = MISCREG_MC3_STATUS;
469                break;
470              case 0x411:
471                regNum = MISCREG_MC4_STATUS;
472                break;
473              case 0x415:
474                regNum = MISCREG_MC5_STATUS;
475                break;
476              case 0x419:
477                regNum = MISCREG_MC6_STATUS;
478                break;
479              case 0x41D:
480                regNum = MISCREG_MC7_STATUS;
481                break;
482              case 0x402:
483                regNum = MISCREG_MC0_ADDR;
484                break;
485              case 0x406:
486                regNum = MISCREG_MC1_ADDR;
487                break;
488              case 0x40A:
489                regNum = MISCREG_MC2_ADDR;
490                break;
491              case 0x40E:
492                regNum = MISCREG_MC3_ADDR;
493                break;
494              case 0x412:
495                regNum = MISCREG_MC4_ADDR;
496                break;
497              case 0x416:
498                regNum = MISCREG_MC5_ADDR;
499                break;
500              case 0x41A:
501                regNum = MISCREG_MC6_ADDR;
502                break;
503              case 0x41E:
504                regNum = MISCREG_MC7_ADDR;
505                break;
506              case 0x403:
507                regNum = MISCREG_MC0_MISC;
508                break;
509              case 0x407:
510                regNum = MISCREG_MC1_MISC;
511                break;
512              case 0x40B:
513                regNum = MISCREG_MC2_MISC;
514                break;
515              case 0x40F:
516                regNum = MISCREG_MC3_MISC;
517                break;
518              case 0x413:
519                regNum = MISCREG_MC4_MISC;
520                break;
521              case 0x417:
522                regNum = MISCREG_MC5_MISC;
523                break;
524              case 0x41B:
525                regNum = MISCREG_MC6_MISC;
526                break;
527              case 0x41F:
528                regNum = MISCREG_MC7_MISC;
529                break;
530              case 0xC0000080:
531                regNum = MISCREG_EFER;
532                break;
533              case 0xC0000081:
534                regNum = MISCREG_STAR;
535                break;
536              case 0xC0000082:
537                regNum = MISCREG_LSTAR;
538                break;
539              case 0xC0000083:
540                regNum = MISCREG_CSTAR;
541                break;
542              case 0xC0000084:
543                regNum = MISCREG_SF_MASK;
544                break;
545              case 0xC0000100:
546                regNum = MISCREG_FS_BASE;
547                break;
548              case 0xC0000101:
549                regNum = MISCREG_GS_BASE;
550                break;
551              case 0xC0000102:
552                regNum = MISCREG_KERNEL_GS_BASE;
553                break;
554              case 0xC0000103:
555                regNum = MISCREG_TSC_AUX;
556                break;
557              case 0xC0010000:
558                regNum = MISCREG_PERF_EVT_SEL0;
559                break;
560              case 0xC0010001:
561                regNum = MISCREG_PERF_EVT_SEL1;
562                break;
563              case 0xC0010002:
564                regNum = MISCREG_PERF_EVT_SEL2;
565                break;
566              case 0xC0010003:
567                regNum = MISCREG_PERF_EVT_SEL3;
568                break;
569              case 0xC0010004:
570                regNum = MISCREG_PERF_EVT_CTR0;
571                break;
572              case 0xC0010005:
573                regNum = MISCREG_PERF_EVT_CTR1;
574                break;
575              case 0xC0010006:
576                regNum = MISCREG_PERF_EVT_CTR2;
577                break;
578              case 0xC0010007:
579                regNum = MISCREG_PERF_EVT_CTR3;
580                break;
581              case 0xC0010010:
582                regNum = MISCREG_SYSCFG;
583                break;
584              case 0xC0010016:
585                regNum = MISCREG_IORR_BASE0;
586                break;
587              case 0xC0010017:
588                regNum = MISCREG_IORR_BASE1;
589                break;
590              case 0xC0010018:
591                regNum = MISCREG_IORR_MASK0;
592                break;
593              case 0xC0010019:
594                regNum = MISCREG_IORR_MASK1;
595                break;
596              case 0xC001001A:
597                regNum = MISCREG_TOP_MEM;
598                break;
599              case 0xC001001D:
600                regNum = MISCREG_TOP_MEM2;
601                break;
602              case 0xC0010114:
603                regNum = MISCREG_VM_CR;
604                break;
605              case 0xC0010115:
606                regNum = MISCREG_IGNNE;
607                break;
608              case 0xC0010116:
609                regNum = MISCREG_SMM_CTL;
610                break;
611              case 0xC0010117:
612                regNum = MISCREG_VM_HSAVE_PA;
613                break;
614              default:
615                return std::make_shared<GeneralProtection>(0);
616            }
617            //The index is multiplied by the size of a MiscReg so that
618            //any memory dependence calculations will not see these as
619            //overlapping.
620            req->setPaddr(regNum * sizeof(RegVal));
621            return NoFault;
622        } else if (prefix == IntAddrPrefixIO) {
623            // TODO If CPL > IOPL or in virtual mode, check the I/O permission
624            // bitmap in the TSS.
625
626            Addr IOPort = vaddr & ~IntAddrPrefixMask;
627            // Make sure the address fits in the expected 16 bit IO address
628            // space.
629            assert(!(IOPort & ~0xFFFF));
630
631            if (IOPort == 0xCF8 && req->getSize() == 4) {
632                req->setFlags(Request::MMAPPED_IPR);
633                req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(RegVal));
634            } else if ((IOPort & ~mask(2)) == 0xCFC) {
635                req->setFlags(Request::UNCACHEABLE);
636
637                Addr configAddress =
638                    tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
639
640                if (bits(configAddress, 31, 31)) {
641                    req->setPaddr(PhysAddrPrefixPciConfig |
642                                  mbits(configAddress, 30, 2) |
643                                  (IOPort & mask(2)));
644                } else {
645                    req->setPaddr(PhysAddrPrefixIO | IOPort);
646                }
647            } else {
648                req->setFlags(Request::UNCACHEABLE);
649                req->setPaddr(PhysAddrPrefixIO | IOPort);
650            }
651            return NoFault;
652        } else {
653            panic("Access to unrecognized internal address space %#x.\n",
654                  prefix);
655        }
656    }
657
658    /**
659     * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
660     * and false on a TLB miss.
661     * Many of the checks about different modes have been converted to
662     * assertions, since these parts of the code are not really used.
663     * On a hit it will update the LRU stack.
664     */
665    bool
666    GpuTLB::tlbLookup(const RequestPtr &req,
667                      ThreadContext *tc, bool update_stats)
668    {
669        bool tlb_hit = false;
670    #ifndef NDEBUG
671        uint32_t flags = req->getFlags();
672        int seg = flags & SegmentFlagMask;
673    #endif
674
675        assert(seg != SEGMENT_REG_MS);
676        Addr vaddr = req->getVaddr();
677        DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
678        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
679
680        if (m5Reg.prot) {
681            DPRINTF(GPUTLB, "In protected mode.\n");
682            // make sure we are in 64-bit mode
683            assert(m5Reg.mode == LongMode);
684
685            // If paging is enabled, do the translation.
686            if (m5Reg.paging) {
687                DPRINTF(GPUTLB, "Paging enabled.\n");
688                //update LRU stack on a hit
689                TlbEntry *entry = lookup(vaddr, true);
690
691                if (entry)
692                    tlb_hit = true;
693
694                if (!update_stats) {
695                    // functional tlb access for memory initialization
696                    // i.e., memory seeding or instr. seeding -> don't update
697                    // TLB and stats
698                    return tlb_hit;
699                }
700
701                localNumTLBAccesses++;
702
703                if (!entry) {
704                    localNumTLBMisses++;
705                } else {
706                    localNumTLBHits++;
707                }
708            }
709        }
710
711        return tlb_hit;
712    }
713
714    Fault
715    GpuTLB::translate(const RequestPtr &req, ThreadContext *tc,
716                      Translation *translation, Mode mode,
717                      bool &delayedResponse, bool timing, int &latency)
718    {
719        uint32_t flags = req->getFlags();
720        int seg = flags & SegmentFlagMask;
721        bool storeCheck = flags & (StoreCheck << FlagShift);
722
723        // If this is true, we're dealing with a request
724        // to a non-memory address space.
725        if (seg == SEGMENT_REG_MS) {
726            return translateInt(req, tc);
727        }
728
729        delayedResponse = false;
730        Addr vaddr = req->getVaddr();
731        DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
732
733        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
734
735        // If protected mode has been enabled...
736        if (m5Reg.prot) {
737            DPRINTF(GPUTLB, "In protected mode.\n");
738            // If we're not in 64-bit mode, do protection/limit checks
739            if (m5Reg.mode != LongMode) {
740                DPRINTF(GPUTLB, "Not in long mode. Checking segment "
741                        "protection.\n");
742
743                // Check for a null segment selector.
744                if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
745                    seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
746                    && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
747                    return std::make_shared<GeneralProtection>(0);
748                }
749
750                bool expandDown = false;
751                SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
752
753                if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
754                    if (!attr.writable && (mode == BaseTLB::Write ||
755                        storeCheck))
756                        return std::make_shared<GeneralProtection>(0);
757
758                    if (!attr.readable && mode == BaseTLB::Read)
759                        return std::make_shared<GeneralProtection>(0);
760
761                    expandDown = attr.expandDown;
762
763                }
764
765                Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
766                Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
767                // This assumes we're not in 64 bit mode. If we were, the
768                // default address size is 64 bits, overridable to 32.
769                int size = 32;
770                bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
771                SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
772
773                if ((csAttr.defaultSize && sizeOverride) ||
774                    (!csAttr.defaultSize && !sizeOverride)) {
775                    size = 16;
776                }
777
778                Addr offset = bits(vaddr - base, size - 1, 0);
779                Addr endOffset = offset + req->getSize() - 1;
780
781                if (expandDown) {
782                    DPRINTF(GPUTLB, "Checking an expand down segment.\n");
783                    warn_once("Expand down segments are untested.\n");
784
785                    if (offset <= limit || endOffset <= limit)
786                        return std::make_shared<GeneralProtection>(0);
787                } else {
788                    if (offset > limit || endOffset > limit)
789                        return std::make_shared<GeneralProtection>(0);
790                }
791            }
792
793            // If paging is enabled, do the translation.
794            if (m5Reg.paging) {
795                DPRINTF(GPUTLB, "Paging enabled.\n");
796                // The vaddr already has the segment base applied.
797                TlbEntry *entry = lookup(vaddr);
798                localNumTLBAccesses++;
799
800                if (!entry) {
801                    localNumTLBMisses++;
802                    if (timing) {
803                        latency = missLatency1;
804                    }
805
806                    if (FullSystem) {
807                        fatal("GpuTLB doesn't support full-system mode\n");
808                    } else {
809                        DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
810                                "at pc %#x.\n", vaddr, tc->instAddr());
811
812                        Process *p = tc->getProcessPtr();
813                        const EmulationPageTable::Entry *pte =
814                            p->pTable->lookup(vaddr);
815
816                        if (!pte && mode != BaseTLB::Execute) {
817                            // penalize a "page fault" more
818                            if (timing)
819                                latency += missLatency2;
820
821                            if (p->fixupStackFault(vaddr))
822                                pte = p->pTable->lookup(vaddr);
823                        }
824
825                        if (!pte) {
826                            return std::make_shared<PageFault>(vaddr, true,
827                                                               mode, true,
828                                                               false);
829                        } else {
830                            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
831
832                            DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
833                                    alignedVaddr, pte->paddr);
834
835                            TlbEntry gpuEntry(p->pid(), alignedVaddr,
836                                              pte->paddr, false, false);
837                            entry = insert(alignedVaddr, gpuEntry);
838                        }
839
840                        DPRINTF(GPUTLB, "Miss was serviced.\n");
841                    }
842                } else {
843                    localNumTLBHits++;
844
845                    if (timing) {
846                        latency = hitLatency;
847                    }
848                }
849
850                // Do paging protection checks.
851                bool inUser = (m5Reg.cpl == 3 &&
852                               !(flags & (CPL0FlagBit << FlagShift)));
853
854                CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
855                bool badWrite = (!entry->writable && (inUser || cr0.wp));
856
857                if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
858                     badWrite)) {
859                    // The page must have been present to get into the TLB in
860                    // the first place. We'll assume the reserved bits are
861                    // fine even though we're not checking them.
862                    return std::make_shared<PageFault>(vaddr, true, mode,
863                                                       inUser, false);
864                }
865
866                if (storeCheck && badWrite) {
867                    // This would fault if this were a write, so return a page
868                    // fault that reflects that happening.
869                    return std::make_shared<PageFault>(vaddr, true,
870                                                       BaseTLB::Write,
871                                                       inUser, false);
872                }
873
874
875                DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
876                        "checks.\n", entry->paddr);
877
878                int page_size = entry->size();
879                Addr paddr = entry->paddr | (vaddr & (page_size - 1));
880                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
881                req->setPaddr(paddr);
882
883                if (entry->uncacheable)
884                    req->setFlags(Request::UNCACHEABLE);
885            } else {
886                //Use the address which already has segmentation applied.
887                DPRINTF(GPUTLB, "Paging disabled.\n");
888                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
889                req->setPaddr(vaddr);
890            }
891        } else {
892            // Real mode
893            DPRINTF(GPUTLB, "In real mode.\n");
894            DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
895            req->setPaddr(vaddr);
896        }
897
898        // Check for an access to the local APIC
899        if (FullSystem) {
900            LocalApicBase localApicBase =
901                tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
902
903            Addr baseAddr = localApicBase.base * PageBytes;
904            Addr paddr = req->getPaddr();
905
906            if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
907                // Force the access to be uncacheable.
908                req->setFlags(Request::UNCACHEABLE);
909                req->setPaddr(x86LocalAPICAddress(tc->contextId(),
910                                                  paddr - baseAddr));
911            }
912        }
913
914        return NoFault;
915    };
916
917    Fault
918    GpuTLB::translateAtomic(const RequestPtr &req, ThreadContext *tc,
919                            Mode mode, int &latency)
920    {
921        bool delayedResponse;
922
923        return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
924                                 latency);
925    }
926
927    void
928    GpuTLB::translateTiming(const RequestPtr &req, ThreadContext *tc,
929            Translation *translation, Mode mode, int &latency)
930    {
931        bool delayedResponse;
932        assert(translation);
933
934        Fault fault = GpuTLB::translate(req, tc, translation, mode,
935                                        delayedResponse, true, latency);
936
937        if (!delayedResponse)
938            translation->finish(fault, req, tc, mode);
939    }
940
941    Walker*
942    GpuTLB::getWalker()
943    {
944        return walker;
945    }
946
947
948    void
949    GpuTLB::serialize(CheckpointOut &cp) const
950    {
951    }
952
953    void
954    GpuTLB::unserialize(CheckpointIn &cp)
955    {
956    }
957
958    void
959    GpuTLB::regStats()
960    {
961        MemObject::regStats();
962
963        localNumTLBAccesses
964            .name(name() + ".local_TLB_accesses")
965            .desc("Number of TLB accesses")
966            ;
967
968        localNumTLBHits
969            .name(name() + ".local_TLB_hits")
970            .desc("Number of TLB hits")
971            ;
972
973        localNumTLBMisses
974            .name(name() + ".local_TLB_misses")
975            .desc("Number of TLB misses")
976            ;
977
978        localTLBMissRate
979            .name(name() + ".local_TLB_miss_rate")
980            .desc("TLB miss rate")
981            ;
982
983        accessCycles
984            .name(name() + ".access_cycles")
985            .desc("Cycles spent accessing this TLB level")
986            ;
987
988        pageTableCycles
989            .name(name() + ".page_table_cycles")
990            .desc("Cycles spent accessing the page table")
991            ;
992
993        localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
994
995        numUniquePages
996            .name(name() + ".unique_pages")
997            .desc("Number of unique pages touched")
998            ;
999
1000        localCycles
1001            .name(name() + ".local_cycles")
1002            .desc("Number of cycles spent in queue for all incoming reqs")
1003            ;
1004
1005        localLatency
1006            .name(name() + ".local_latency")
1007            .desc("Avg. latency over incoming coalesced reqs")
1008            ;
1009
1010        localLatency = localCycles / localNumTLBAccesses;
1011
1012        globalNumTLBAccesses
1013            .name(name() + ".global_TLB_accesses")
1014            .desc("Number of TLB accesses")
1015            ;
1016
1017        globalNumTLBHits
1018            .name(name() + ".global_TLB_hits")
1019            .desc("Number of TLB hits")
1020            ;
1021
1022        globalNumTLBMisses
1023            .name(name() + ".global_TLB_misses")
1024            .desc("Number of TLB misses")
1025            ;
1026
1027        globalTLBMissRate
1028            .name(name() + ".global_TLB_miss_rate")
1029            .desc("TLB miss rate")
1030            ;
1031
1032        globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1033
1034        avgReuseDistance
1035            .name(name() + ".avg_reuse_distance")
1036            .desc("avg. reuse distance over all pages (in ticks)")
1037            ;
1038
1039    }
1040
1041    /**
1042     * Do the TLB lookup for this coalesced request and schedule
1043     * another event <TLB access latency> cycles later.
1044     */
1045
1046    void
1047    GpuTLB::issueTLBLookup(PacketPtr pkt)
1048    {
1049        assert(pkt);
1050        assert(pkt->senderState);
1051
1052        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1053                                        TheISA::PageBytes);
1054
1055        TranslationState *sender_state =
1056                safe_cast<TranslationState*>(pkt->senderState);
1057
1058        bool update_stats = !sender_state->prefetch;
1059        ThreadContext * tmp_tc = sender_state->tc;
1060
1061        DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1062                virt_page_addr);
1063
1064        int req_cnt = sender_state->reqCnt.back();
1065
1066        if (update_stats) {
1067            accessCycles -= (curTick() * req_cnt);
1068            localCycles -= curTick();
1069            updatePageFootprint(virt_page_addr);
1070            globalNumTLBAccesses += req_cnt;
1071        }
1072
1073        tlbOutcome lookup_outcome = TLB_MISS;
1074        const RequestPtr &tmp_req = pkt->req;
1075
1076        // Access the TLB and figure out if it's a hit or a miss.
1077        bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1078
1079        if (success) {
1080            lookup_outcome = TLB_HIT;
1081            // Put the entry in SenderState
1082            TlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1083            assert(entry);
1084
1085            auto p = sender_state->tc->getProcessPtr();
1086            sender_state->tlbEntry =
1087                new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1088                             false, false);
1089
1090            if (update_stats) {
1091                // the reqCnt has an entry per level, so its size tells us
1092                // which level we are in
1093                sender_state->hitLevel = sender_state->reqCnt.size();
1094                globalNumTLBHits += req_cnt;
1095            }
1096        } else {
1097            if (update_stats)
1098                globalNumTLBMisses += req_cnt;
1099        }
1100
1101        /*
1102         * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1103         * as the TLB access latency.
1104         *
1105         * We create and schedule a new TLBEvent which will help us take the
1106         * appropriate actions (e.g., update TLB on a hit, send request to lower
1107         * level TLB on a miss, or start a page walk if this was the last-level
1108         * TLB)
1109         */
1110        TLBEvent *tlb_event =
1111            new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1112
1113        if (translationReturnEvent.count(virt_page_addr)) {
1114            panic("Virtual Page Address %#x already has a return event\n",
1115                  virt_page_addr);
1116        }
1117
1118        translationReturnEvent[virt_page_addr] = tlb_event;
1119        assert(tlb_event);
1120
1121        DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1122                curTick() + this->ticks(hitLatency));
1123
1124        schedule(tlb_event, curTick() + this->ticks(hitLatency));
1125    }
1126
1127    GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1128                               PacketPtr _pkt)
1129        : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1130        outcome(tlb_outcome), pkt(_pkt)
1131    {
1132    }
1133
1134    /**
1135     * Do Paging protection checks. If we encounter a page fault, then
1136     * an assertion is fired.
1137     */
1138    void
1139    GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1140            TlbEntry * tlb_entry, Mode mode)
1141    {
1142        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1143        uint32_t flags = pkt->req->getFlags();
1144        bool storeCheck = flags & (StoreCheck << FlagShift);
1145
1146        // Do paging protection checks.
1147        bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1148        CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1149
1150        bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1151
1152        if ((inUser && !tlb_entry->user) ||
1153            (mode == BaseTLB::Write && badWrite)) {
1154            // The page must have been present to get into the TLB in
1155            // the first place. We'll assume the reserved bits are
1156            // fine even though we're not checking them.
1157            panic("Page fault detected");
1158        }
1159
1160        if (storeCheck && badWrite) {
1161            // This would fault if this were a write, so return a page
1162            // fault that reflects that happening.
1163            panic("Page fault detected");
1164        }
1165    }
1166
1167    /**
1168     * handleTranslationReturn is called on a TLB hit,
1169     * when a TLB miss returns or when a page fault returns.
1170     * The latter calls handelHit with TLB miss as tlbOutcome.
1171     */
1172    void
1173    GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1174            PacketPtr pkt)
1175    {
1176
1177        assert(pkt);
1178        Addr vaddr = pkt->req->getVaddr();
1179
1180        TranslationState *sender_state =
1181            safe_cast<TranslationState*>(pkt->senderState);
1182
1183        ThreadContext *tc = sender_state->tc;
1184        Mode mode = sender_state->tlbMode;
1185
1186        TlbEntry *local_entry, *new_entry;
1187
1188        if (tlb_outcome == TLB_HIT) {
1189            DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1190            local_entry = sender_state->tlbEntry;
1191        } else {
1192            DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1193                    vaddr);
1194
1195            // We are returning either from a page walk or from a hit at a lower
1196            // TLB level. The senderState should be "carrying" a pointer to the
1197            // correct TLBEntry.
1198            new_entry = sender_state->tlbEntry;
1199            assert(new_entry);
1200            local_entry = new_entry;
1201
1202            if (allocationPolicy) {
1203                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1204                        virt_page_addr);
1205
1206                local_entry = insert(virt_page_addr, *new_entry);
1207            }
1208
1209            assert(local_entry);
1210        }
1211
1212        /**
1213         * At this point the packet carries an up-to-date tlbEntry pointer
1214         * in its senderState.
1215         * Next step is to do the paging protection checks.
1216         */
1217        DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
1218                "while paddr was %#x.\n", local_entry->vaddr,
1219                local_entry->paddr);
1220
1221        pagingProtectionChecks(tc, pkt, local_entry, mode);
1222        int page_size = local_entry->size();
1223        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1224        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1225
1226        // Since this packet will be sent through the cpu side slave port,
1227        // it must be converted to a response pkt if it is not one already
1228        if (pkt->isRequest()) {
1229            pkt->makeTimingResponse();
1230        }
1231
1232        pkt->req->setPaddr(paddr);
1233
1234        if (local_entry->uncacheable) {
1235             pkt->req->setFlags(Request::UNCACHEABLE);
1236        }
1237
1238        //send packet back to coalescer
1239        cpuSidePort[0]->sendTimingResp(pkt);
1240        //schedule cleanup event
1241        cleanupQueue.push(virt_page_addr);
1242
1243        // schedule this only once per cycle.
1244        // The check is required because we might have multiple translations
1245        // returning the same cycle
1246        // this is a maximum priority event and must be on the same cycle
1247        // as the cleanup event in TLBCoalescer to avoid a race with
1248        // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1249        if (!cleanupEvent.scheduled())
1250            schedule(cleanupEvent, curTick());
1251    }
1252
1253    /**
1254     * Here we take the appropriate actions based on the result of the
1255     * TLB lookup.
1256     */
1257    void
1258    GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1259                              PacketPtr pkt)
1260    {
1261        DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1262
1263        assert(translationReturnEvent[virtPageAddr]);
1264        assert(pkt);
1265
1266        TranslationState *tmp_sender_state =
1267            safe_cast<TranslationState*>(pkt->senderState);
1268
1269        int req_cnt = tmp_sender_state->reqCnt.back();
1270        bool update_stats = !tmp_sender_state->prefetch;
1271
1272
1273        if (outcome == TLB_HIT) {
1274            handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1275
1276            if (update_stats) {
1277                accessCycles += (req_cnt * curTick());
1278                localCycles += curTick();
1279            }
1280
1281        } else if (outcome == TLB_MISS) {
1282
1283            DPRINTF(GPUTLB, "This is a TLB miss\n");
1284            if (update_stats) {
1285                accessCycles += (req_cnt*curTick());
1286                localCycles += curTick();
1287            }
1288
1289            if (hasMemSidePort) {
1290                // the one cyle added here represent the delay from when we get
1291                // the reply back till when we propagate it to the coalescer
1292                // above.
1293                if (update_stats) {
1294                    accessCycles += (req_cnt * 1);
1295                    localCycles += 1;
1296                }
1297
1298                /**
1299                 * There is a TLB below. Send the coalesced request.
1300                 * We actually send the very first packet of all the
1301                 * pending packets for this virtual page address.
1302                 */
1303                if (!memSidePort[0]->sendTimingReq(pkt)) {
1304                    DPRINTF(GPUTLB, "Failed sending translation request to "
1305                            "lower level TLB for addr %#x\n", virtPageAddr);
1306
1307                    memSidePort[0]->retries.push_back(pkt);
1308                } else {
1309                    DPRINTF(GPUTLB, "Sent translation request to lower level "
1310                            "TLB for addr %#x\n", virtPageAddr);
1311                }
1312            } else {
1313                //this is the last level TLB. Start a page walk
1314                DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1315                        "addr %#x\n", virtPageAddr);
1316
1317                if (update_stats)
1318                    pageTableCycles -= (req_cnt*curTick());
1319
1320                TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1321                assert(tlb_event);
1322                tlb_event->updateOutcome(PAGE_WALK);
1323                schedule(tlb_event, curTick() + ticks(missLatency2));
1324            }
1325        } else if (outcome == PAGE_WALK) {
1326            if (update_stats)
1327                pageTableCycles += (req_cnt*curTick());
1328
1329            // Need to access the page table and update the TLB
1330            DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1331                    virtPageAddr);
1332
1333            TranslationState *sender_state =
1334                safe_cast<TranslationState*>(pkt->senderState);
1335
1336            Process *p = sender_state->tc->getProcessPtr();
1337            Addr vaddr = pkt->req->getVaddr();
1338    #ifndef NDEBUG
1339            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1340            assert(alignedVaddr == virtPageAddr);
1341    #endif
1342            const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1343            if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1344                    p->fixupStackFault(vaddr)) {
1345                pte = p->pTable->lookup(vaddr);
1346            }
1347
1348            if (pte) {
1349                DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1350                        pte->paddr);
1351
1352                sender_state->tlbEntry =
1353                    new TlbEntry(p->pid(), virtPageAddr, pte->paddr, false,
1354                                 false);
1355            } else {
1356                sender_state->tlbEntry = nullptr;
1357            }
1358
1359            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1360        } else if (outcome == MISS_RETURN) {
1361            /** we add an extra cycle in the return path of the translation
1362             * requests in between the various TLB levels.
1363             */
1364            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1365        } else {
1366            panic("Unexpected TLB outcome %d", outcome);
1367        }
1368    }
1369
1370    void
1371    GpuTLB::TLBEvent::process()
1372    {
1373        tlb->translationReturn(virtPageAddr, outcome, pkt);
1374    }
1375
1376    const char*
1377    GpuTLB::TLBEvent::description() const
1378    {
1379        return "trigger translationDoneEvent";
1380    }
1381
1382    void
1383    GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1384    {
1385        outcome = _outcome;
1386    }
1387
1388    Addr
1389    GpuTLB::TLBEvent::getTLBEventVaddr()
1390    {
1391        return virtPageAddr;
1392    }
1393
1394    /*
1395     * recvTiming receives a coalesced timing request from a TLBCoalescer
1396     * and it calls issueTLBLookup()
1397     * It only rejects the packet if we have exceeded the max
1398     * outstanding number of requests for the TLB
1399     */
1400    bool
1401    GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1402    {
1403        if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1404            tlb->issueTLBLookup(pkt);
1405            // update number of outstanding translation requests
1406            tlb->outstandingReqs++;
1407            return true;
1408         } else {
1409            DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1410                    tlb->outstandingReqs);
1411            return false;
1412         }
1413    }
1414
1415    /**
1416     * handleFuncTranslationReturn is called on a TLB hit,
1417     * when a TLB miss returns or when a page fault returns.
1418     * It updates LRU, inserts the TLB entry on a miss
1419     * depending on the allocation policy and does the required
1420     * protection checks. It does NOT create a new packet to
1421     * update the packet's addr; this is done in hsail-gpu code.
1422     */
1423    void
1424    GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1425    {
1426        TranslationState *sender_state =
1427            safe_cast<TranslationState*>(pkt->senderState);
1428
1429        ThreadContext *tc = sender_state->tc;
1430        Mode mode = sender_state->tlbMode;
1431        Addr vaddr = pkt->req->getVaddr();
1432
1433        TlbEntry *local_entry, *new_entry;
1434
1435        if (tlb_outcome == TLB_HIT) {
1436            DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1437                    "%#x\n", vaddr);
1438
1439            local_entry = sender_state->tlbEntry;
1440        } else {
1441            DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1442                    "%#x\n", vaddr);
1443
1444            // We are returning either from a page walk or from a hit at a lower
1445            // TLB level. The senderState should be "carrying" a pointer to the
1446            // correct TLBEntry.
1447            new_entry = sender_state->tlbEntry;
1448            assert(new_entry);
1449            local_entry = new_entry;
1450
1451            if (allocationPolicy) {
1452                Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1453
1454                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1455                        virt_page_addr);
1456
1457                local_entry = insert(virt_page_addr, *new_entry);
1458            }
1459
1460            assert(local_entry);
1461        }
1462
1463        DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1464                "while paddr was %#x.\n", local_entry->vaddr,
1465                local_entry->paddr);
1466
1467        /**
1468         * Do paging checks if it's a normal functional access.  If it's for a
1469         * prefetch, then sometimes you can try to prefetch something that
1470         * won't pass protection. We don't actually want to fault becuase there
1471         * is no demand access to deem this a violation.  Just put it in the
1472         * TLB and it will fault if indeed a future demand access touches it in
1473         * violation.
1474         *
1475         * This feature could be used to explore security issues around
1476         * speculative memory accesses.
1477         */
1478        if (!sender_state->prefetch && sender_state->tlbEntry)
1479            pagingProtectionChecks(tc, pkt, local_entry, mode);
1480
1481        int page_size = local_entry->size();
1482        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1483        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1484
1485        pkt->req->setPaddr(paddr);
1486
1487        if (local_entry->uncacheable)
1488             pkt->req->setFlags(Request::UNCACHEABLE);
1489    }
1490
1491    // This is used for atomic translations. Need to
1492    // make it all happen during the same cycle.
1493    void
1494    GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1495    {
1496        TranslationState *sender_state =
1497            safe_cast<TranslationState*>(pkt->senderState);
1498
1499        ThreadContext *tc = sender_state->tc;
1500        bool update_stats = !sender_state->prefetch;
1501
1502        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1503                                        TheISA::PageBytes);
1504
1505        if (update_stats)
1506            tlb->updatePageFootprint(virt_page_addr);
1507
1508        // do the TLB lookup without updating the stats
1509        bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1510        tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1511
1512        // functional mode means no coalescing
1513        // global metrics are the same as the local metrics
1514        if (update_stats) {
1515            tlb->globalNumTLBAccesses++;
1516
1517            if (success) {
1518                sender_state->hitLevel = sender_state->reqCnt.size();
1519                tlb->globalNumTLBHits++;
1520            }
1521        }
1522
1523        if (!success) {
1524            if (update_stats)
1525                tlb->globalNumTLBMisses++;
1526            if (tlb->hasMemSidePort) {
1527                // there is a TLB below -> propagate down the TLB hierarchy
1528                tlb->memSidePort[0]->sendFunctional(pkt);
1529                // If no valid translation from a prefetch, then just return
1530                if (sender_state->prefetch && !pkt->req->hasPaddr())
1531                    return;
1532            } else {
1533                // Need to access the page table and update the TLB
1534                DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1535                        virt_page_addr);
1536
1537                Process *p = tc->getProcessPtr();
1538
1539                Addr vaddr = pkt->req->getVaddr();
1540    #ifndef NDEBUG
1541                Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1542                assert(alignedVaddr == virt_page_addr);
1543    #endif
1544
1545                const EmulationPageTable::Entry *pte =
1546                        p->pTable->lookup(vaddr);
1547                if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1548                        p->fixupStackFault(vaddr)) {
1549                    pte = p->pTable->lookup(vaddr);
1550                }
1551
1552                if (!sender_state->prefetch) {
1553                    // no PageFaults are permitted after
1554                    // the second page table lookup
1555                    assert(pte);
1556
1557                    DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1558                            pte->paddr);
1559
1560                    sender_state->tlbEntry =
1561                        new TlbEntry(p->pid(), virt_page_addr,
1562                                     pte->paddr, false, false);
1563                } else {
1564                    // If this was a prefetch, then do the normal thing if it
1565                    // was a successful translation.  Otherwise, send an empty
1566                    // TLB entry back so that it can be figured out as empty and
1567                    // handled accordingly.
1568                    if (pte) {
1569                        DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1570                                pte->paddr);
1571
1572                        sender_state->tlbEntry =
1573                            new TlbEntry(p->pid(), virt_page_addr,
1574                                         pte->paddr, false, false);
1575                    } else {
1576                        DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1577                                alignedVaddr);
1578
1579                        sender_state->tlbEntry = nullptr;
1580
1581                        return;
1582                    }
1583                }
1584            }
1585        } else {
1586            DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1587                    tlb->lookup(pkt->req->getVaddr()));
1588
1589            TlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1590                                             update_stats);
1591
1592            assert(entry);
1593
1594            auto p = sender_state->tc->getProcessPtr();
1595            sender_state->tlbEntry =
1596                new TlbEntry(p->pid(), entry->vaddr, entry->paddr,
1597                             false, false);
1598        }
1599        // This is the function that would populate pkt->req with the paddr of
1600        // the translation. But if no translation happens (i.e Prefetch fails)
1601        // then the early returns in the above code wiill keep this function
1602        // from executing.
1603        tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1604    }
1605
1606    void
1607    GpuTLB::CpuSidePort::recvReqRetry()
1608    {
1609        // The CPUSidePort never sends anything but replies. No retries
1610        // expected.
1611        panic("recvReqRetry called");
1612    }
1613
1614    AddrRangeList
1615    GpuTLB::CpuSidePort::getAddrRanges() const
1616    {
1617        // currently not checked by the master
1618        AddrRangeList ranges;
1619
1620        return ranges;
1621    }
1622
1623    /**
1624     * MemSidePort receives the packet back.
1625     * We need to call the handleTranslationReturn
1626     * and propagate up the hierarchy.
1627     */
1628    bool
1629    GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1630    {
1631        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1632                                        TheISA::PageBytes);
1633
1634        DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1635                virt_page_addr);
1636
1637        TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1638        assert(tlb_event);
1639        assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1640
1641        tlb_event->updateOutcome(MISS_RETURN);
1642        tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1643
1644        return true;
1645    }
1646
1647    void
1648    GpuTLB::MemSidePort::recvReqRetry()
1649    {
1650        // No retries should reach the TLB. The retries
1651        // should only reach the TLBCoalescer.
1652        panic("recvReqRetry called");
1653    }
1654
1655    void
1656    GpuTLB::cleanup()
1657    {
1658        while (!cleanupQueue.empty()) {
1659            Addr cleanup_addr = cleanupQueue.front();
1660            cleanupQueue.pop();
1661
1662            // delete TLBEvent
1663            TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1664            delete old_tlb_event;
1665            translationReturnEvent.erase(cleanup_addr);
1666
1667            // update number of outstanding requests
1668            outstandingReqs--;
1669        }
1670
1671        /** the higher level coalescer should retry if it has
1672         * any pending requests.
1673         */
1674        for (int i = 0; i < cpuSidePort.size(); ++i) {
1675            cpuSidePort[i]->sendRetryReq();
1676        }
1677    }
1678
1679    void
1680    GpuTLB::updatePageFootprint(Addr virt_page_addr)
1681    {
1682
1683        std::pair<AccessPatternTable::iterator, bool> ret;
1684
1685        AccessInfo tmp_access_info;
1686        tmp_access_info.lastTimeAccessed = 0;
1687        tmp_access_info.accessesPerPage = 0;
1688        tmp_access_info.totalReuseDistance = 0;
1689        tmp_access_info.sumDistance = 0;
1690        tmp_access_info.meanDistance = 0;
1691
1692        ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1693                                  tmp_access_info));
1694
1695        bool first_page_access = ret.second;
1696
1697        if (first_page_access) {
1698            numUniquePages++;
1699        } else  {
1700            int accessed_before;
1701            accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1702            ret.first->second.totalReuseDistance += accessed_before;
1703        }
1704
1705        ret.first->second.accessesPerPage++;
1706        ret.first->second.lastTimeAccessed = curTick();
1707
1708        if (accessDistance) {
1709            ret.first->second.localTLBAccesses
1710                .push_back(localNumTLBAccesses.value());
1711        }
1712    }
1713
1714    void
1715    GpuTLB::exitCallback()
1716    {
1717        std::ostream *page_stat_file = nullptr;
1718
1719        if (accessDistance) {
1720
1721            // print per page statistics to a separate file (.csv format)
1722            // simout is the gem5 output directory (default is m5out or the one
1723            // specified with -d
1724            page_stat_file = simout.create(name().c_str())->stream();
1725
1726            // print header
1727            *page_stat_file << "page,max_access_distance,mean_access_distance, "
1728                            << "stddev_distance" << std::endl;
1729        }
1730
1731        // update avg. reuse distance footprint
1732        AccessPatternTable::iterator iter, iter_begin, iter_end;
1733        unsigned int sum_avg_reuse_distance_per_page = 0;
1734
1735        // iterate through all pages seen by this TLB
1736        for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1737            sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1738                                               iter->second.accessesPerPage;
1739
1740            if (accessDistance) {
1741                unsigned int tmp = iter->second.localTLBAccesses[0];
1742                unsigned int prev = tmp;
1743
1744                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1745                    if (i) {
1746                        tmp = prev + 1;
1747                    }
1748
1749                    prev = iter->second.localTLBAccesses[i];
1750                    // update the localTLBAccesses value
1751                    // with the actual differece
1752                    iter->second.localTLBAccesses[i] -= tmp;
1753                    // compute the sum of AccessDistance per page
1754                    // used later for mean
1755                    iter->second.sumDistance +=
1756                        iter->second.localTLBAccesses[i];
1757                }
1758
1759                iter->second.meanDistance =
1760                    iter->second.sumDistance / iter->second.accessesPerPage;
1761
1762                // compute std_dev and max  (we need a second round because we
1763                // need to know the mean value
1764                unsigned int max_distance = 0;
1765                unsigned int stddev_distance = 0;
1766
1767                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1768                    unsigned int tmp_access_distance =
1769                        iter->second.localTLBAccesses[i];
1770
1771                    if (tmp_access_distance > max_distance) {
1772                        max_distance = tmp_access_distance;
1773                    }
1774
1775                    unsigned int diff =
1776                        tmp_access_distance - iter->second.meanDistance;
1777                    stddev_distance += pow(diff, 2);
1778
1779                }
1780
1781                stddev_distance =
1782                    sqrt(stddev_distance/iter->second.accessesPerPage);
1783
1784                if (page_stat_file) {
1785                    *page_stat_file << std::hex << iter->first << ",";
1786                    *page_stat_file << std::dec << max_distance << ",";
1787                    *page_stat_file << std::dec << iter->second.meanDistance
1788                                    << ",";
1789                    *page_stat_file << std::dec << stddev_distance;
1790                    *page_stat_file << std::endl;
1791                }
1792
1793                // erase the localTLBAccesses array
1794                iter->second.localTLBAccesses.clear();
1795            }
1796        }
1797
1798        if (!TLBFootprint.empty()) {
1799            avgReuseDistance =
1800                sum_avg_reuse_distance_per_page / TLBFootprint.size();
1801        }
1802
1803        //clear the TLBFootprint map
1804        TLBFootprint.clear();
1805    }
1806} // namespace X86ISA
1807
1808X86ISA::GpuTLB*
1809X86GPUTLBParams::create()
1810{
1811    return new X86ISA::GpuTLB(this);
1812}
1813
1814