gpu_tlb.cc revision 11523:81332eb10367
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Lisa Hsu
34 */
35
36#include "gpu-compute/gpu_tlb.hh"
37
38#include <cmath>
39#include <cstring>
40
41#include "arch/x86/faults.hh"
42#include "arch/x86/insts/microldstop.hh"
43#include "arch/x86/pagetable.hh"
44#include "arch/x86/pagetable_walker.hh"
45#include "arch/x86/regs/misc.hh"
46#include "arch/x86/x86_traits.hh"
47#include "base/bitfield.hh"
48#include "base/output.hh"
49#include "base/trace.hh"
50#include "cpu/base.hh"
51#include "cpu/thread_context.hh"
52#include "debug/GPUPrefetch.hh"
53#include "debug/GPUTLB.hh"
54#include "mem/packet_access.hh"
55#include "mem/page_table.hh"
56#include "mem/request.hh"
57#include "sim/process.hh"
58
59namespace X86ISA
60{
61
62    GpuTLB::GpuTLB(const Params *p)
63        : MemObject(p), configAddress(0), size(p->size),
64          cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this)
65    {
66        assoc = p->assoc;
67        assert(assoc <= size);
68        numSets = size/assoc;
69        allocationPolicy = p->allocationPolicy;
70        hasMemSidePort = false;
71        accessDistance = p->accessDistance;
72        clock = p->clk_domain->clockPeriod();
73
74        tlb = new GpuTlbEntry[size];
75        std::memset(tlb, 0, sizeof(GpuTlbEntry) * size);
76
77        freeList.resize(numSets);
78        entryList.resize(numSets);
79
80        for (int set = 0; set < numSets; ++set) {
81            for (int way = 0; way < assoc; ++way) {
82                int x = set*assoc + way;
83                freeList[set].push_back(&tlb[x]);
84            }
85        }
86
87        FA = (size == assoc);
88
89        /**
90         * @warning: the set-associative version assumes you have a
91         * fixed page size of 4KB.
92         * If the page size is greather than 4KB (as defined in the
93         * TheISA::PageBytes), then there are various issues w/ the current
94         * implementation (you'd have the same 8KB page being replicated in
95         * different sets etc)
96         */
97        setMask = numSets - 1;
98
99    #if 0
100        // GpuTLB doesn't yet support full system
101        walker = p->walker;
102        walker->setTLB(this);
103    #endif
104
105        maxCoalescedReqs = p->maxOutstandingReqs;
106
107        // Do not allow maxCoalescedReqs to be more than the TLB associativity
108        if (maxCoalescedReqs > assoc) {
109            maxCoalescedReqs = assoc;
110            cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
111        }
112
113        outstandingReqs = 0;
114        hitLatency = p->hitLatency;
115        missLatency1 = p->missLatency1;
116        missLatency2 = p->missLatency2;
117
118        // create the slave ports based on the number of connected ports
119        for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
120            cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
121                                  name(), i), this, i));
122        }
123
124        // create the master ports based on the number of connected ports
125        for (size_t i = 0; i < p->port_master_connection_count; ++i) {
126            memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
127                                  name(), i), this, i));
128        }
129    }
130
131    // fixme: this is never called?
132    GpuTLB::~GpuTLB()
133    {
134        // make sure all the hash-maps are empty
135        assert(translationReturnEvent.empty());
136
137        // delete the TLB
138        delete[] tlb;
139    }
140
141    BaseSlavePort&
142    GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
143    {
144        if (if_name == "slave") {
145            if (idx >= static_cast<PortID>(cpuSidePort.size())) {
146                panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
147            }
148
149            return *cpuSidePort[idx];
150        } else {
151            panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
152        }
153    }
154
155    BaseMasterPort&
156    GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
157    {
158        if (if_name == "master") {
159            if (idx >= static_cast<PortID>(memSidePort.size())) {
160                panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
161            }
162
163            hasMemSidePort = true;
164
165            return *memSidePort[idx];
166        } else {
167            panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
168        }
169    }
170
171    GpuTlbEntry*
172    GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)
173    {
174        GpuTlbEntry *newEntry = nullptr;
175
176        /**
177         * vpn holds the virtual page address
178         * The least significant bits are simply masked
179         */
180        int set = (vpn >> TheISA::PageShift) & setMask;
181
182        if (!freeList[set].empty()) {
183            newEntry = freeList[set].front();
184            freeList[set].pop_front();
185        } else {
186            newEntry = entryList[set].back();
187            entryList[set].pop_back();
188        }
189
190        *newEntry = entry;
191        newEntry->vaddr = vpn;
192        entryList[set].push_front(newEntry);
193
194        return newEntry;
195    }
196
197    GpuTLB::EntryList::iterator
198    GpuTLB::lookupIt(Addr va, bool update_lru)
199    {
200        int set = (va >> TheISA::PageShift) & setMask;
201
202        if (FA) {
203            assert(!set);
204        }
205
206        auto entry = entryList[set].begin();
207        for (; entry != entryList[set].end(); ++entry) {
208            int page_size = (*entry)->size();
209
210            if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
211                DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
212                        "with size %#x.\n", va, (*entry)->vaddr, page_size);
213
214                if (update_lru) {
215                    entryList[set].push_front(*entry);
216                    entryList[set].erase(entry);
217                    entry = entryList[set].begin();
218                }
219
220                break;
221            }
222        }
223
224        return entry;
225    }
226
227    GpuTlbEntry*
228    GpuTLB::lookup(Addr va, bool update_lru)
229    {
230        int set = (va >> TheISA::PageShift) & setMask;
231
232        auto entry = lookupIt(va, update_lru);
233
234        if (entry == entryList[set].end())
235            return nullptr;
236        else
237            return *entry;
238    }
239
240    void
241    GpuTLB::invalidateAll()
242    {
243        DPRINTF(GPUTLB, "Invalidating all entries.\n");
244
245        for (int i = 0; i < numSets; ++i) {
246            while (!entryList[i].empty()) {
247                GpuTlbEntry *entry = entryList[i].front();
248                entryList[i].pop_front();
249                freeList[i].push_back(entry);
250            }
251        }
252    }
253
254    void
255    GpuTLB::setConfigAddress(uint32_t addr)
256    {
257        configAddress = addr;
258    }
259
260    void
261    GpuTLB::invalidateNonGlobal()
262    {
263        DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
264
265        for (int i = 0; i < numSets; ++i) {
266            for (auto entryIt = entryList[i].begin();
267                 entryIt != entryList[i].end();) {
268                if (!(*entryIt)->global) {
269                    freeList[i].push_back(*entryIt);
270                    entryList[i].erase(entryIt++);
271                } else {
272                    ++entryIt;
273                }
274            }
275        }
276    }
277
278    void
279    GpuTLB::demapPage(Addr va, uint64_t asn)
280    {
281
282        int set = (va >> TheISA::PageShift) & setMask;
283        auto entry = lookupIt(va, false);
284
285        if (entry != entryList[set].end()) {
286            freeList[set].push_back(*entry);
287            entryList[set].erase(entry);
288        }
289    }
290
291    Fault
292    GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)
293    {
294        DPRINTF(GPUTLB, "Addresses references internal memory.\n");
295        Addr vaddr = req->getVaddr();
296        Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
297
298        if (prefix == IntAddrPrefixCPUID) {
299            panic("CPUID memory space not yet implemented!\n");
300        } else if (prefix == IntAddrPrefixMSR) {
301            vaddr = vaddr >> 3;
302            req->setFlags(Request::MMAPPED_IPR);
303            Addr regNum = 0;
304
305            switch (vaddr & ~IntAddrPrefixMask) {
306              case 0x10:
307                regNum = MISCREG_TSC;
308                break;
309              case 0x1B:
310                regNum = MISCREG_APIC_BASE;
311                break;
312              case 0xFE:
313                regNum = MISCREG_MTRRCAP;
314                break;
315              case 0x174:
316                regNum = MISCREG_SYSENTER_CS;
317                break;
318              case 0x175:
319                regNum = MISCREG_SYSENTER_ESP;
320                break;
321              case 0x176:
322                regNum = MISCREG_SYSENTER_EIP;
323                break;
324              case 0x179:
325                regNum = MISCREG_MCG_CAP;
326                break;
327              case 0x17A:
328                regNum = MISCREG_MCG_STATUS;
329                break;
330              case 0x17B:
331                regNum = MISCREG_MCG_CTL;
332                break;
333              case 0x1D9:
334                regNum = MISCREG_DEBUG_CTL_MSR;
335                break;
336              case 0x1DB:
337                regNum = MISCREG_LAST_BRANCH_FROM_IP;
338                break;
339              case 0x1DC:
340                regNum = MISCREG_LAST_BRANCH_TO_IP;
341                break;
342              case 0x1DD:
343                regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
344                break;
345              case 0x1DE:
346                regNum = MISCREG_LAST_EXCEPTION_TO_IP;
347                break;
348              case 0x200:
349                regNum = MISCREG_MTRR_PHYS_BASE_0;
350                break;
351              case 0x201:
352                regNum = MISCREG_MTRR_PHYS_MASK_0;
353                break;
354              case 0x202:
355                regNum = MISCREG_MTRR_PHYS_BASE_1;
356                break;
357              case 0x203:
358                regNum = MISCREG_MTRR_PHYS_MASK_1;
359                break;
360              case 0x204:
361                regNum = MISCREG_MTRR_PHYS_BASE_2;
362                break;
363              case 0x205:
364                regNum = MISCREG_MTRR_PHYS_MASK_2;
365                break;
366              case 0x206:
367                regNum = MISCREG_MTRR_PHYS_BASE_3;
368                break;
369              case 0x207:
370                regNum = MISCREG_MTRR_PHYS_MASK_3;
371                break;
372              case 0x208:
373                regNum = MISCREG_MTRR_PHYS_BASE_4;
374                break;
375              case 0x209:
376                regNum = MISCREG_MTRR_PHYS_MASK_4;
377                break;
378              case 0x20A:
379                regNum = MISCREG_MTRR_PHYS_BASE_5;
380                break;
381              case 0x20B:
382                regNum = MISCREG_MTRR_PHYS_MASK_5;
383                break;
384              case 0x20C:
385                regNum = MISCREG_MTRR_PHYS_BASE_6;
386                break;
387              case 0x20D:
388                regNum = MISCREG_MTRR_PHYS_MASK_6;
389                break;
390              case 0x20E:
391                regNum = MISCREG_MTRR_PHYS_BASE_7;
392                break;
393              case 0x20F:
394                regNum = MISCREG_MTRR_PHYS_MASK_7;
395                break;
396              case 0x250:
397                regNum = MISCREG_MTRR_FIX_64K_00000;
398                break;
399              case 0x258:
400                regNum = MISCREG_MTRR_FIX_16K_80000;
401                break;
402              case 0x259:
403                regNum = MISCREG_MTRR_FIX_16K_A0000;
404                break;
405              case 0x268:
406                regNum = MISCREG_MTRR_FIX_4K_C0000;
407                break;
408              case 0x269:
409                regNum = MISCREG_MTRR_FIX_4K_C8000;
410                break;
411              case 0x26A:
412                regNum = MISCREG_MTRR_FIX_4K_D0000;
413                break;
414              case 0x26B:
415                regNum = MISCREG_MTRR_FIX_4K_D8000;
416                break;
417              case 0x26C:
418                regNum = MISCREG_MTRR_FIX_4K_E0000;
419                break;
420              case 0x26D:
421                regNum = MISCREG_MTRR_FIX_4K_E8000;
422                break;
423              case 0x26E:
424                regNum = MISCREG_MTRR_FIX_4K_F0000;
425                break;
426              case 0x26F:
427                regNum = MISCREG_MTRR_FIX_4K_F8000;
428                break;
429              case 0x277:
430                regNum = MISCREG_PAT;
431                break;
432              case 0x2FF:
433                regNum = MISCREG_DEF_TYPE;
434                break;
435              case 0x400:
436                regNum = MISCREG_MC0_CTL;
437                break;
438              case 0x404:
439                regNum = MISCREG_MC1_CTL;
440                break;
441              case 0x408:
442                regNum = MISCREG_MC2_CTL;
443                break;
444              case 0x40C:
445                regNum = MISCREG_MC3_CTL;
446                break;
447              case 0x410:
448                regNum = MISCREG_MC4_CTL;
449                break;
450              case 0x414:
451                regNum = MISCREG_MC5_CTL;
452                break;
453              case 0x418:
454                regNum = MISCREG_MC6_CTL;
455                break;
456              case 0x41C:
457                regNum = MISCREG_MC7_CTL;
458                break;
459              case 0x401:
460                regNum = MISCREG_MC0_STATUS;
461                break;
462              case 0x405:
463                regNum = MISCREG_MC1_STATUS;
464                break;
465              case 0x409:
466                regNum = MISCREG_MC2_STATUS;
467                break;
468              case 0x40D:
469                regNum = MISCREG_MC3_STATUS;
470                break;
471              case 0x411:
472                regNum = MISCREG_MC4_STATUS;
473                break;
474              case 0x415:
475                regNum = MISCREG_MC5_STATUS;
476                break;
477              case 0x419:
478                regNum = MISCREG_MC6_STATUS;
479                break;
480              case 0x41D:
481                regNum = MISCREG_MC7_STATUS;
482                break;
483              case 0x402:
484                regNum = MISCREG_MC0_ADDR;
485                break;
486              case 0x406:
487                regNum = MISCREG_MC1_ADDR;
488                break;
489              case 0x40A:
490                regNum = MISCREG_MC2_ADDR;
491                break;
492              case 0x40E:
493                regNum = MISCREG_MC3_ADDR;
494                break;
495              case 0x412:
496                regNum = MISCREG_MC4_ADDR;
497                break;
498              case 0x416:
499                regNum = MISCREG_MC5_ADDR;
500                break;
501              case 0x41A:
502                regNum = MISCREG_MC6_ADDR;
503                break;
504              case 0x41E:
505                regNum = MISCREG_MC7_ADDR;
506                break;
507              case 0x403:
508                regNum = MISCREG_MC0_MISC;
509                break;
510              case 0x407:
511                regNum = MISCREG_MC1_MISC;
512                break;
513              case 0x40B:
514                regNum = MISCREG_MC2_MISC;
515                break;
516              case 0x40F:
517                regNum = MISCREG_MC3_MISC;
518                break;
519              case 0x413:
520                regNum = MISCREG_MC4_MISC;
521                break;
522              case 0x417:
523                regNum = MISCREG_MC5_MISC;
524                break;
525              case 0x41B:
526                regNum = MISCREG_MC6_MISC;
527                break;
528              case 0x41F:
529                regNum = MISCREG_MC7_MISC;
530                break;
531              case 0xC0000080:
532                regNum = MISCREG_EFER;
533                break;
534              case 0xC0000081:
535                regNum = MISCREG_STAR;
536                break;
537              case 0xC0000082:
538                regNum = MISCREG_LSTAR;
539                break;
540              case 0xC0000083:
541                regNum = MISCREG_CSTAR;
542                break;
543              case 0xC0000084:
544                regNum = MISCREG_SF_MASK;
545                break;
546              case 0xC0000100:
547                regNum = MISCREG_FS_BASE;
548                break;
549              case 0xC0000101:
550                regNum = MISCREG_GS_BASE;
551                break;
552              case 0xC0000102:
553                regNum = MISCREG_KERNEL_GS_BASE;
554                break;
555              case 0xC0000103:
556                regNum = MISCREG_TSC_AUX;
557                break;
558              case 0xC0010000:
559                regNum = MISCREG_PERF_EVT_SEL0;
560                break;
561              case 0xC0010001:
562                regNum = MISCREG_PERF_EVT_SEL1;
563                break;
564              case 0xC0010002:
565                regNum = MISCREG_PERF_EVT_SEL2;
566                break;
567              case 0xC0010003:
568                regNum = MISCREG_PERF_EVT_SEL3;
569                break;
570              case 0xC0010004:
571                regNum = MISCREG_PERF_EVT_CTR0;
572                break;
573              case 0xC0010005:
574                regNum = MISCREG_PERF_EVT_CTR1;
575                break;
576              case 0xC0010006:
577                regNum = MISCREG_PERF_EVT_CTR2;
578                break;
579              case 0xC0010007:
580                regNum = MISCREG_PERF_EVT_CTR3;
581                break;
582              case 0xC0010010:
583                regNum = MISCREG_SYSCFG;
584                break;
585              case 0xC0010016:
586                regNum = MISCREG_IORR_BASE0;
587                break;
588              case 0xC0010017:
589                regNum = MISCREG_IORR_BASE1;
590                break;
591              case 0xC0010018:
592                regNum = MISCREG_IORR_MASK0;
593                break;
594              case 0xC0010019:
595                regNum = MISCREG_IORR_MASK1;
596                break;
597              case 0xC001001A:
598                regNum = MISCREG_TOP_MEM;
599                break;
600              case 0xC001001D:
601                regNum = MISCREG_TOP_MEM2;
602                break;
603              case 0xC0010114:
604                regNum = MISCREG_VM_CR;
605                break;
606              case 0xC0010115:
607                regNum = MISCREG_IGNNE;
608                break;
609              case 0xC0010116:
610                regNum = MISCREG_SMM_CTL;
611                break;
612              case 0xC0010117:
613                regNum = MISCREG_VM_HSAVE_PA;
614                break;
615              default:
616                return std::make_shared<GeneralProtection>(0);
617            }
618            //The index is multiplied by the size of a MiscReg so that
619            //any memory dependence calculations will not see these as
620            //overlapping.
621            req->setPaddr(regNum * sizeof(MiscReg));
622            return NoFault;
623        } else if (prefix == IntAddrPrefixIO) {
624            // TODO If CPL > IOPL or in virtual mode, check the I/O permission
625            // bitmap in the TSS.
626
627            Addr IOPort = vaddr & ~IntAddrPrefixMask;
628            // Make sure the address fits in the expected 16 bit IO address
629            // space.
630            assert(!(IOPort & ~0xFFFF));
631
632            if (IOPort == 0xCF8 && req->getSize() == 4) {
633                req->setFlags(Request::MMAPPED_IPR);
634                req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
635            } else if ((IOPort & ~mask(2)) == 0xCFC) {
636                req->setFlags(Request::UNCACHEABLE);
637
638                Addr configAddress =
639                    tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
640
641                if (bits(configAddress, 31, 31)) {
642                    req->setPaddr(PhysAddrPrefixPciConfig |
643                                  mbits(configAddress, 30, 2) |
644                                  (IOPort & mask(2)));
645                } else {
646                    req->setPaddr(PhysAddrPrefixIO | IOPort);
647                }
648            } else {
649                req->setFlags(Request::UNCACHEABLE);
650                req->setPaddr(PhysAddrPrefixIO | IOPort);
651            }
652            return NoFault;
653        } else {
654            panic("Access to unrecognized internal address space %#x.\n",
655                  prefix);
656        }
657    }
658
659    /**
660     * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
661     * and false on a TLB miss.
662     * Many of the checks about different modes have been converted to
663     * assertions, since these parts of the code are not really used.
664     * On a hit it will update the LRU stack.
665     */
666    bool
667    GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
668    {
669        bool tlb_hit = false;
670    #ifndef NDEBUG
671        uint32_t flags = req->getFlags();
672        int seg = flags & SegmentFlagMask;
673    #endif
674
675        assert(seg != SEGMENT_REG_MS);
676        Addr vaddr = req->getVaddr();
677        DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
678        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
679
680        if (m5Reg.prot) {
681            DPRINTF(GPUTLB, "In protected mode.\n");
682            // make sure we are in 64-bit mode
683            assert(m5Reg.mode == LongMode);
684
685            // If paging is enabled, do the translation.
686            if (m5Reg.paging) {
687                DPRINTF(GPUTLB, "Paging enabled.\n");
688                //update LRU stack on a hit
689                GpuTlbEntry *entry = lookup(vaddr, true);
690
691                if (entry)
692                    tlb_hit = true;
693
694                if (!update_stats) {
695                    // functional tlb access for memory initialization
696                    // i.e., memory seeding or instr. seeding -> don't update
697                    // TLB and stats
698                    return tlb_hit;
699                }
700
701                localNumTLBAccesses++;
702
703                if (!entry) {
704                    localNumTLBMisses++;
705                } else {
706                    localNumTLBHits++;
707                }
708            }
709        }
710
711        return tlb_hit;
712    }
713
714    Fault
715    GpuTLB::translate(RequestPtr req, ThreadContext *tc,
716                      Translation *translation, Mode mode,
717                      bool &delayedResponse, bool timing, int &latency)
718    {
719        uint32_t flags = req->getFlags();
720        int seg = flags & SegmentFlagMask;
721        bool storeCheck = flags & (StoreCheck << FlagShift);
722
723        // If this is true, we're dealing with a request
724        // to a non-memory address space.
725        if (seg == SEGMENT_REG_MS) {
726            return translateInt(req, tc);
727        }
728
729        delayedResponse = false;
730        Addr vaddr = req->getVaddr();
731        DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
732
733        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
734
735        // If protected mode has been enabled...
736        if (m5Reg.prot) {
737            DPRINTF(GPUTLB, "In protected mode.\n");
738            // If we're not in 64-bit mode, do protection/limit checks
739            if (m5Reg.mode != LongMode) {
740                DPRINTF(GPUTLB, "Not in long mode. Checking segment "
741                        "protection.\n");
742
743                // Check for a null segment selector.
744                if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
745                    seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
746                    && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
747                    return std::make_shared<GeneralProtection>(0);
748                }
749
750                bool expandDown = false;
751                SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
752
753                if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
754                    if (!attr.writable && (mode == BaseTLB::Write ||
755                        storeCheck))
756                        return std::make_shared<GeneralProtection>(0);
757
758                    if (!attr.readable && mode == BaseTLB::Read)
759                        return std::make_shared<GeneralProtection>(0);
760
761                    expandDown = attr.expandDown;
762
763                }
764
765                Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
766                Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
767                // This assumes we're not in 64 bit mode. If we were, the
768                // default address size is 64 bits, overridable to 32.
769                int size = 32;
770                bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
771                SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
772
773                if ((csAttr.defaultSize && sizeOverride) ||
774                    (!csAttr.defaultSize && !sizeOverride)) {
775                    size = 16;
776                }
777
778                Addr offset = bits(vaddr - base, size - 1, 0);
779                Addr endOffset = offset + req->getSize() - 1;
780
781                if (expandDown) {
782                    DPRINTF(GPUTLB, "Checking an expand down segment.\n");
783                    warn_once("Expand down segments are untested.\n");
784
785                    if (offset <= limit || endOffset <= limit)
786                        return std::make_shared<GeneralProtection>(0);
787                } else {
788                    if (offset > limit || endOffset > limit)
789                        return std::make_shared<GeneralProtection>(0);
790                }
791            }
792
793            // If paging is enabled, do the translation.
794            if (m5Reg.paging) {
795                DPRINTF(GPUTLB, "Paging enabled.\n");
796                // The vaddr already has the segment base applied.
797                GpuTlbEntry *entry = lookup(vaddr);
798                localNumTLBAccesses++;
799
800                if (!entry) {
801                    localNumTLBMisses++;
802                    if (timing) {
803                        latency = missLatency1;
804                    }
805
806                    if (FullSystem) {
807                        fatal("GpuTLB doesn't support full-system mode\n");
808                    } else {
809                        DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
810                                "at pc %#x.\n", vaddr, tc->instAddr());
811
812                        Process *p = tc->getProcessPtr();
813                        GpuTlbEntry newEntry;
814                        bool success = p->pTable->lookup(vaddr, newEntry);
815
816                        if (!success && mode != BaseTLB::Execute) {
817                            // penalize a "page fault" more
818                            if (timing) {
819                                latency += missLatency2;
820                            }
821
822                            if (p->fixupStackFault(vaddr))
823                                success = p->pTable->lookup(vaddr, newEntry);
824                        }
825
826                        if (!success) {
827                            return std::make_shared<PageFault>(vaddr, true,
828                                                               mode, true,
829                                                               false);
830                        } else {
831                            newEntry.valid = success;
832                            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
833
834                            DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
835                                    alignedVaddr, newEntry.pageStart());
836
837                            entry = insert(alignedVaddr, newEntry);
838                        }
839
840                        DPRINTF(GPUTLB, "Miss was serviced.\n");
841                    }
842                } else {
843                    localNumTLBHits++;
844
845                    if (timing) {
846                        latency = hitLatency;
847                    }
848                }
849
850                // Do paging protection checks.
851                bool inUser = (m5Reg.cpl == 3 &&
852                               !(flags & (CPL0FlagBit << FlagShift)));
853
854                CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
855                bool badWrite = (!entry->writable && (inUser || cr0.wp));
856
857                if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
858                     badWrite)) {
859                    // The page must have been present to get into the TLB in
860                    // the first place. We'll assume the reserved bits are
861                    // fine even though we're not checking them.
862                    return std::make_shared<PageFault>(vaddr, true, mode,
863                                                       inUser, false);
864                }
865
866                if (storeCheck && badWrite) {
867                    // This would fault if this were a write, so return a page
868                    // fault that reflects that happening.
869                    return std::make_shared<PageFault>(vaddr, true,
870                                                       BaseTLB::Write,
871                                                       inUser, false);
872                }
873
874
875                DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
876                        "checks.\n", entry->paddr);
877
878                int page_size = entry->size();
879                Addr paddr = entry->paddr | (vaddr & (page_size - 1));
880                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
881                req->setPaddr(paddr);
882
883                if (entry->uncacheable)
884                    req->setFlags(Request::UNCACHEABLE);
885            } else {
886                //Use the address which already has segmentation applied.
887                DPRINTF(GPUTLB, "Paging disabled.\n");
888                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
889                req->setPaddr(vaddr);
890            }
891        } else {
892            // Real mode
893            DPRINTF(GPUTLB, "In real mode.\n");
894            DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
895            req->setPaddr(vaddr);
896        }
897
898        // Check for an access to the local APIC
899        if (FullSystem) {
900            LocalApicBase localApicBase =
901                tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
902
903            Addr baseAddr = localApicBase.base * PageBytes;
904            Addr paddr = req->getPaddr();
905
906            if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
907                // Force the access to be uncacheable.
908                req->setFlags(Request::UNCACHEABLE);
909                req->setPaddr(x86LocalAPICAddress(tc->contextId(),
910                                                  paddr - baseAddr));
911            }
912        }
913
914        return NoFault;
915    };
916
917    Fault
918    GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
919                            int &latency)
920    {
921        bool delayedResponse;
922
923        return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
924                                 latency);
925    }
926
927    void
928    GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,
929            Translation *translation, Mode mode, int &latency)
930    {
931        bool delayedResponse;
932        assert(translation);
933
934        Fault fault = GpuTLB::translate(req, tc, translation, mode,
935                                        delayedResponse, true, latency);
936
937        if (!delayedResponse)
938            translation->finish(fault, req, tc, mode);
939    }
940
941    Walker*
942    GpuTLB::getWalker()
943    {
944        return walker;
945    }
946
947
948    void
949    GpuTLB::serialize(CheckpointOut &cp) const
950    {
951    }
952
953    void
954    GpuTLB::unserialize(CheckpointIn &cp)
955    {
956    }
957
958    void
959    GpuTLB::regStats()
960    {
961        MemObject::regStats();
962
963        localNumTLBAccesses
964            .name(name() + ".local_TLB_accesses")
965            .desc("Number of TLB accesses")
966            ;
967
968        localNumTLBHits
969            .name(name() + ".local_TLB_hits")
970            .desc("Number of TLB hits")
971            ;
972
973        localNumTLBMisses
974            .name(name() + ".local_TLB_misses")
975            .desc("Number of TLB misses")
976            ;
977
978        localTLBMissRate
979            .name(name() + ".local_TLB_miss_rate")
980            .desc("TLB miss rate")
981            ;
982
983        accessCycles
984            .name(name() + ".access_cycles")
985            .desc("Cycles spent accessing this TLB level")
986            ;
987
988        pageTableCycles
989            .name(name() + ".page_table_cycles")
990            .desc("Cycles spent accessing the page table")
991            ;
992
993        localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
994
995        numUniquePages
996            .name(name() + ".unique_pages")
997            .desc("Number of unique pages touched")
998            ;
999
1000        localCycles
1001            .name(name() + ".local_cycles")
1002            .desc("Number of cycles spent in queue for all incoming reqs")
1003            ;
1004
1005        localLatency
1006            .name(name() + ".local_latency")
1007            .desc("Avg. latency over incoming coalesced reqs")
1008            ;
1009
1010        localLatency = localCycles / localNumTLBAccesses;
1011
1012        globalNumTLBAccesses
1013            .name(name() + ".global_TLB_accesses")
1014            .desc("Number of TLB accesses")
1015            ;
1016
1017        globalNumTLBHits
1018            .name(name() + ".global_TLB_hits")
1019            .desc("Number of TLB hits")
1020            ;
1021
1022        globalNumTLBMisses
1023            .name(name() + ".global_TLB_misses")
1024            .desc("Number of TLB misses")
1025            ;
1026
1027        globalTLBMissRate
1028            .name(name() + ".global_TLB_miss_rate")
1029            .desc("TLB miss rate")
1030            ;
1031
1032        globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1033
1034        avgReuseDistance
1035            .name(name() + ".avg_reuse_distance")
1036            .desc("avg. reuse distance over all pages (in ticks)")
1037            ;
1038
1039    }
1040
1041    /**
1042     * Do the TLB lookup for this coalesced request and schedule
1043     * another event <TLB access latency> cycles later.
1044     */
1045
1046    void
1047    GpuTLB::issueTLBLookup(PacketPtr pkt)
1048    {
1049        assert(pkt);
1050        assert(pkt->senderState);
1051
1052        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1053                                        TheISA::PageBytes);
1054
1055        TranslationState *sender_state =
1056                safe_cast<TranslationState*>(pkt->senderState);
1057
1058        bool update_stats = !sender_state->prefetch;
1059        ThreadContext * tmp_tc = sender_state->tc;
1060
1061        DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1062                virt_page_addr);
1063
1064        int req_cnt = sender_state->reqCnt.back();
1065
1066        if (update_stats) {
1067            accessCycles -= (curTick() * req_cnt);
1068            localCycles -= curTick();
1069            updatePageFootprint(virt_page_addr);
1070            globalNumTLBAccesses += req_cnt;
1071        }
1072
1073        tlbOutcome lookup_outcome = TLB_MISS;
1074        RequestPtr tmp_req = pkt->req;
1075
1076        // Access the TLB and figure out if it's a hit or a miss.
1077        bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1078
1079        if (success) {
1080            lookup_outcome = TLB_HIT;
1081            // Put the entry in SenderState
1082            GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1083            assert(entry);
1084
1085            sender_state->tlbEntry =
1086                new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1087
1088            if (update_stats) {
1089                // the reqCnt has an entry per level, so its size tells us
1090                // which level we are in
1091                sender_state->hitLevel = sender_state->reqCnt.size();
1092                globalNumTLBHits += req_cnt;
1093            }
1094        } else {
1095            if (update_stats)
1096                globalNumTLBMisses += req_cnt;
1097        }
1098
1099        /*
1100         * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1101         * as the TLB access latency.
1102         *
1103         * We create and schedule a new TLBEvent which will help us take the
1104         * appropriate actions (e.g., update TLB on a hit, send request to lower
1105         * level TLB on a miss, or start a page walk if this was the last-level
1106         * TLB)
1107         */
1108        TLBEvent *tlb_event =
1109            new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1110
1111        if (translationReturnEvent.count(virt_page_addr)) {
1112            panic("Virtual Page Address %#x already has a return event\n",
1113                  virt_page_addr);
1114        }
1115
1116        translationReturnEvent[virt_page_addr] = tlb_event;
1117        assert(tlb_event);
1118
1119        DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1120                curTick() + this->ticks(hitLatency));
1121
1122        schedule(tlb_event, curTick() + this->ticks(hitLatency));
1123    }
1124
1125    GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1126                               PacketPtr _pkt)
1127        : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1128        outcome(tlb_outcome), pkt(_pkt)
1129    {
1130    }
1131
1132    /**
1133     * Do Paging protection checks. If we encounter a page fault, then
1134     * an assertion is fired.
1135     */
1136    void
1137    GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1138            GpuTlbEntry * tlb_entry, Mode mode)
1139    {
1140        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1141        uint32_t flags = pkt->req->getFlags();
1142        bool storeCheck = flags & (StoreCheck << FlagShift);
1143
1144        // Do paging protection checks.
1145        bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1146        CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1147
1148        bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1149
1150        if ((inUser && !tlb_entry->user) ||
1151            (mode == BaseTLB::Write && badWrite)) {
1152           // The page must have been present to get into the TLB in
1153           // the first place. We'll assume the reserved bits are
1154           // fine even though we're not checking them.
1155           assert(false);
1156        }
1157
1158        if (storeCheck && badWrite) {
1159           // This would fault if this were a write, so return a page
1160           // fault that reflects that happening.
1161           assert(false);
1162        }
1163    }
1164
1165    /**
1166     * handleTranslationReturn is called on a TLB hit,
1167     * when a TLB miss returns or when a page fault returns.
1168     * The latter calls handelHit with TLB miss as tlbOutcome.
1169     */
1170    void
1171    GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1172            PacketPtr pkt)
1173    {
1174
1175        assert(pkt);
1176        Addr vaddr = pkt->req->getVaddr();
1177
1178        TranslationState *sender_state =
1179            safe_cast<TranslationState*>(pkt->senderState);
1180
1181        ThreadContext *tc = sender_state->tc;
1182        Mode mode = sender_state->tlbMode;
1183
1184        GpuTlbEntry *local_entry, *new_entry;
1185
1186        if (tlb_outcome == TLB_HIT) {
1187            DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1188            local_entry = sender_state->tlbEntry;
1189        } else {
1190            DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1191                    vaddr);
1192
1193            // We are returning either from a page walk or from a hit at a lower
1194            // TLB level. The senderState should be "carrying" a pointer to the
1195            // correct TLBEntry.
1196            new_entry = sender_state->tlbEntry;
1197            assert(new_entry);
1198            local_entry = new_entry;
1199
1200            if (allocationPolicy) {
1201                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1202                        virt_page_addr);
1203
1204                local_entry = insert(virt_page_addr, *new_entry);
1205            }
1206
1207            assert(local_entry);
1208        }
1209
1210        /**
1211         * At this point the packet carries an up-to-date tlbEntry pointer
1212         * in its senderState.
1213         * Next step is to do the paging protection checks.
1214         */
1215        DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
1216                "while paddr was %#x.\n", local_entry->vaddr,
1217                local_entry->paddr);
1218
1219        pagingProtectionChecks(tc, pkt, local_entry, mode);
1220        int page_size = local_entry->size();
1221        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1222        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1223
1224        // Since this packet will be sent through the cpu side slave port,
1225        // it must be converted to a response pkt if it is not one already
1226        if (pkt->isRequest()) {
1227            pkt->makeTimingResponse();
1228        }
1229
1230        pkt->req->setPaddr(paddr);
1231
1232        if (local_entry->uncacheable) {
1233             pkt->req->setFlags(Request::UNCACHEABLE);
1234        }
1235
1236        //send packet back to coalescer
1237        cpuSidePort[0]->sendTimingResp(pkt);
1238        //schedule cleanup event
1239        cleanupQueue.push(virt_page_addr);
1240
1241        // schedule this only once per cycle.
1242        // The check is required because we might have multiple translations
1243        // returning the same cycle
1244        // this is a maximum priority event and must be on the same cycle
1245        // as the cleanup event in TLBCoalescer to avoid a race with
1246        // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1247        if (!cleanupEvent.scheduled())
1248            schedule(cleanupEvent, curTick());
1249    }
1250
1251    /**
1252     * Here we take the appropriate actions based on the result of the
1253     * TLB lookup.
1254     */
1255    void
1256    GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1257                              PacketPtr pkt)
1258    {
1259        DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1260
1261        assert(translationReturnEvent[virtPageAddr]);
1262        assert(pkt);
1263
1264        TranslationState *tmp_sender_state =
1265            safe_cast<TranslationState*>(pkt->senderState);
1266
1267        int req_cnt = tmp_sender_state->reqCnt.back();
1268        bool update_stats = !tmp_sender_state->prefetch;
1269
1270
1271        if (outcome == TLB_HIT) {
1272            handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1273
1274            if (update_stats) {
1275                accessCycles += (req_cnt * curTick());
1276                localCycles += curTick();
1277            }
1278
1279        } else if (outcome == TLB_MISS) {
1280
1281            DPRINTF(GPUTLB, "This is a TLB miss\n");
1282            if (update_stats) {
1283                accessCycles += (req_cnt*curTick());
1284                localCycles += curTick();
1285            }
1286
1287            if (hasMemSidePort) {
1288                // the one cyle added here represent the delay from when we get
1289                // the reply back till when we propagate it to the coalescer
1290                // above.
1291                if (update_stats) {
1292                    accessCycles += (req_cnt * 1);
1293                    localCycles += 1;
1294                }
1295
1296                /**
1297                 * There is a TLB below. Send the coalesced request.
1298                 * We actually send the very first packet of all the
1299                 * pending packets for this virtual page address.
1300                 */
1301                if (!memSidePort[0]->sendTimingReq(pkt)) {
1302                    DPRINTF(GPUTLB, "Failed sending translation request to "
1303                            "lower level TLB for addr %#x\n", virtPageAddr);
1304
1305                    memSidePort[0]->retries.push_back(pkt);
1306                } else {
1307                    DPRINTF(GPUTLB, "Sent translation request to lower level "
1308                            "TLB for addr %#x\n", virtPageAddr);
1309                }
1310            } else {
1311                //this is the last level TLB. Start a page walk
1312                DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1313                        "addr %#x\n", virtPageAddr);
1314
1315                if (update_stats)
1316                    pageTableCycles -= (req_cnt*curTick());
1317
1318                TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1319                assert(tlb_event);
1320                tlb_event->updateOutcome(PAGE_WALK);
1321                schedule(tlb_event, curTick() + ticks(missLatency2));
1322            }
1323        } else if (outcome == PAGE_WALK) {
1324            if (update_stats)
1325                pageTableCycles += (req_cnt*curTick());
1326
1327            // Need to access the page table and update the TLB
1328            DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1329                    virtPageAddr);
1330
1331            TranslationState *sender_state =
1332                safe_cast<TranslationState*>(pkt->senderState);
1333
1334            Process *p = sender_state->tc->getProcessPtr();
1335            TlbEntry newEntry;
1336            Addr vaddr = pkt->req->getVaddr();
1337    #ifndef NDEBUG
1338            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1339            assert(alignedVaddr == virtPageAddr);
1340    #endif
1341            bool success;
1342            success = p->pTable->lookup(vaddr, newEntry);
1343            if (!success && sender_state->tlbMode != BaseTLB::Execute) {
1344                if (p->fixupStackFault(vaddr)) {
1345                    success = p->pTable->lookup(vaddr, newEntry);
1346                }
1347            }
1348
1349            DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1350                    newEntry.pageStart());
1351
1352            sender_state->tlbEntry =
1353                new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success);
1354
1355            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1356        } else if (outcome == MISS_RETURN) {
1357            /** we add an extra cycle in the return path of the translation
1358             * requests in between the various TLB levels.
1359             */
1360            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1361        } else {
1362            assert(false);
1363        }
1364    }
1365
1366    void
1367    GpuTLB::TLBEvent::process()
1368    {
1369        tlb->translationReturn(virtPageAddr, outcome, pkt);
1370    }
1371
1372    const char*
1373    GpuTLB::TLBEvent::description() const
1374    {
1375        return "trigger translationDoneEvent";
1376    }
1377
1378    void
1379    GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1380    {
1381        outcome = _outcome;
1382    }
1383
1384    Addr
1385    GpuTLB::TLBEvent::getTLBEventVaddr()
1386    {
1387        return virtPageAddr;
1388    }
1389
1390    /*
1391     * recvTiming receives a coalesced timing request from a TLBCoalescer
1392     * and it calls issueTLBLookup()
1393     * It only rejects the packet if we have exceeded the max
1394     * outstanding number of requests for the TLB
1395     */
1396    bool
1397    GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1398    {
1399        if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1400            tlb->issueTLBLookup(pkt);
1401            // update number of outstanding translation requests
1402            tlb->outstandingReqs++;
1403            return true;
1404         } else {
1405            DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1406                    tlb->outstandingReqs);
1407            return false;
1408         }
1409    }
1410
1411    /**
1412     * handleFuncTranslationReturn is called on a TLB hit,
1413     * when a TLB miss returns or when a page fault returns.
1414     * It updates LRU, inserts the TLB entry on a miss
1415     * depending on the allocation policy and does the required
1416     * protection checks. It does NOT create a new packet to
1417     * update the packet's addr; this is done in hsail-gpu code.
1418     */
1419    void
1420    GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1421    {
1422        TranslationState *sender_state =
1423            safe_cast<TranslationState*>(pkt->senderState);
1424
1425        ThreadContext *tc = sender_state->tc;
1426        Mode mode = sender_state->tlbMode;
1427        Addr vaddr = pkt->req->getVaddr();
1428
1429        GpuTlbEntry *local_entry, *new_entry;
1430
1431        if (tlb_outcome == TLB_HIT) {
1432            DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1433                    "%#x\n", vaddr);
1434
1435            local_entry = sender_state->tlbEntry;
1436        } else {
1437            DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1438                    "%#x\n", vaddr);
1439
1440            // We are returning either from a page walk or from a hit at a lower
1441            // TLB level. The senderState should be "carrying" a pointer to the
1442            // correct TLBEntry.
1443            new_entry = sender_state->tlbEntry;
1444            assert(new_entry);
1445            local_entry = new_entry;
1446
1447            if (allocationPolicy) {
1448                Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1449
1450                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1451                        virt_page_addr);
1452
1453                local_entry = insert(virt_page_addr, *new_entry);
1454            }
1455
1456            assert(local_entry);
1457        }
1458
1459        DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1460                "while paddr was %#x.\n", local_entry->vaddr,
1461                local_entry->paddr);
1462
1463        // Do paging checks if it's a normal functional access.  If it's for a
1464        // prefetch, then sometimes you can try to prefetch something that won't
1465        // pass protection. We don't actually want to fault becuase there is no
1466        // demand access to deem this a violation.  Just put it in the TLB and
1467        // it will fault if indeed a future demand access touches it in
1468        // violation.
1469        if (!sender_state->prefetch && sender_state->tlbEntry->valid)
1470            pagingProtectionChecks(tc, pkt, local_entry, mode);
1471
1472        int page_size = local_entry->size();
1473        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1474        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1475
1476        pkt->req->setPaddr(paddr);
1477
1478        if (local_entry->uncacheable)
1479             pkt->req->setFlags(Request::UNCACHEABLE);
1480    }
1481
1482    // This is used for atomic translations. Need to
1483    // make it all happen during the same cycle.
1484    void
1485    GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1486    {
1487        TranslationState *sender_state =
1488            safe_cast<TranslationState*>(pkt->senderState);
1489
1490        ThreadContext *tc = sender_state->tc;
1491        bool update_stats = !sender_state->prefetch;
1492
1493        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1494                                        TheISA::PageBytes);
1495
1496        if (update_stats)
1497            tlb->updatePageFootprint(virt_page_addr);
1498
1499        // do the TLB lookup without updating the stats
1500        bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1501        tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1502
1503        // functional mode means no coalescing
1504        // global metrics are the same as the local metrics
1505        if (update_stats) {
1506            tlb->globalNumTLBAccesses++;
1507
1508            if (success) {
1509                sender_state->hitLevel = sender_state->reqCnt.size();
1510                tlb->globalNumTLBHits++;
1511            }
1512        }
1513
1514        if (!success) {
1515            if (update_stats)
1516                tlb->globalNumTLBMisses++;
1517            if (tlb->hasMemSidePort) {
1518                // there is a TLB below -> propagate down the TLB hierarchy
1519                tlb->memSidePort[0]->sendFunctional(pkt);
1520                // If no valid translation from a prefetch, then just return
1521                if (sender_state->prefetch && !pkt->req->hasPaddr())
1522                    return;
1523            } else {
1524                // Need to access the page table and update the TLB
1525                DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1526                        virt_page_addr);
1527
1528                Process *p = tc->getProcessPtr();
1529                TlbEntry newEntry;
1530
1531                Addr vaddr = pkt->req->getVaddr();
1532    #ifndef NDEBUG
1533                Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1534                assert(alignedVaddr == virt_page_addr);
1535    #endif
1536
1537                bool success = p->pTable->lookup(vaddr, newEntry);
1538                if (!success && sender_state->tlbMode != BaseTLB::Execute) {
1539                    if (p->fixupStackFault(vaddr))
1540                        success = p->pTable->lookup(vaddr, newEntry);
1541                }
1542
1543                if (!sender_state->prefetch) {
1544                    // no PageFaults are permitted after
1545                    // the second page table lookup
1546                    assert(success);
1547
1548                    DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1549                           newEntry.pageStart());
1550
1551                    sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr,
1552                                                             newEntry.paddr,
1553                                                             success);
1554                } else {
1555                    // If this was a prefetch, then do the normal thing if it
1556                    // was a successful translation.  Otherwise, send an empty
1557                    // TLB entry back so that it can be figured out as empty and
1558                    // handled accordingly.
1559                    if (success) {
1560                        DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1561                               newEntry.pageStart());
1562
1563                        sender_state->tlbEntry = new GpuTlbEntry(0,
1564                                                                 newEntry.vaddr,
1565                                                                 newEntry.paddr,
1566                                                                 success);
1567                    } else {
1568                        DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1569                                alignedVaddr);
1570
1571                        sender_state->tlbEntry = new GpuTlbEntry();
1572
1573                        return;
1574                    }
1575                }
1576            }
1577        } else {
1578            DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1579                    tlb->lookup(pkt->req->getVaddr()));
1580
1581            GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1582                                             update_stats);
1583
1584            assert(entry);
1585
1586            sender_state->tlbEntry =
1587                new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1588        }
1589        // This is the function that would populate pkt->req with the paddr of
1590        // the translation. But if no translation happens (i.e Prefetch fails)
1591        // then the early returns in the above code wiill keep this function
1592        // from executing.
1593        tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1594    }
1595
1596    void
1597    GpuTLB::CpuSidePort::recvReqRetry()
1598    {
1599        // The CPUSidePort never sends anything but replies. No retries
1600        // expected.
1601        assert(false);
1602    }
1603
1604    AddrRangeList
1605    GpuTLB::CpuSidePort::getAddrRanges() const
1606    {
1607        // currently not checked by the master
1608        AddrRangeList ranges;
1609
1610        return ranges;
1611    }
1612
1613    /**
1614     * MemSidePort receives the packet back.
1615     * We need to call the handleTranslationReturn
1616     * and propagate up the hierarchy.
1617     */
1618    bool
1619    GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1620    {
1621        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1622                                        TheISA::PageBytes);
1623
1624        DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1625                virt_page_addr);
1626
1627        TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1628        assert(tlb_event);
1629        assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1630
1631        tlb_event->updateOutcome(MISS_RETURN);
1632        tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1633
1634        return true;
1635    }
1636
1637    void
1638    GpuTLB::MemSidePort::recvReqRetry()
1639    {
1640        // No retries should reach the TLB. The retries
1641        // should only reach the TLBCoalescer.
1642        assert(false);
1643    }
1644
1645    void
1646    GpuTLB::cleanup()
1647    {
1648        while (!cleanupQueue.empty()) {
1649            Addr cleanup_addr = cleanupQueue.front();
1650            cleanupQueue.pop();
1651
1652            // delete TLBEvent
1653            TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1654            delete old_tlb_event;
1655            translationReturnEvent.erase(cleanup_addr);
1656
1657            // update number of outstanding requests
1658            outstandingReqs--;
1659        }
1660
1661        /** the higher level coalescer should retry if it has
1662         * any pending requests.
1663         */
1664        for (int i = 0; i < cpuSidePort.size(); ++i) {
1665            cpuSidePort[i]->sendRetryReq();
1666        }
1667    }
1668
1669    void
1670    GpuTLB::updatePageFootprint(Addr virt_page_addr)
1671    {
1672
1673        std::pair<AccessPatternTable::iterator, bool> ret;
1674
1675        AccessInfo tmp_access_info;
1676        tmp_access_info.lastTimeAccessed = 0;
1677        tmp_access_info.accessesPerPage = 0;
1678        tmp_access_info.totalReuseDistance = 0;
1679        tmp_access_info.sumDistance = 0;
1680        tmp_access_info.meanDistance = 0;
1681
1682        ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1683                                  tmp_access_info));
1684
1685        bool first_page_access = ret.second;
1686
1687        if (first_page_access) {
1688            numUniquePages++;
1689        } else  {
1690            int accessed_before;
1691            accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1692            ret.first->second.totalReuseDistance += accessed_before;
1693        }
1694
1695        ret.first->second.accessesPerPage++;
1696        ret.first->second.lastTimeAccessed = curTick();
1697
1698        if (accessDistance) {
1699            ret.first->second.localTLBAccesses
1700                .push_back(localNumTLBAccesses.value());
1701        }
1702    }
1703
1704    void
1705    GpuTLB::exitCallback()
1706    {
1707        std::ostream *page_stat_file = nullptr;
1708
1709        if (accessDistance) {
1710
1711            // print per page statistics to a separate file (.csv format)
1712            // simout is the gem5 output directory (default is m5out or the one
1713            // specified with -d
1714            page_stat_file = simout.create(name().c_str())->stream();
1715
1716            // print header
1717            *page_stat_file << "page,max_access_distance,mean_access_distance, "
1718                            << "stddev_distance" << std::endl;
1719        }
1720
1721        // update avg. reuse distance footprint
1722        AccessPatternTable::iterator iter, iter_begin, iter_end;
1723        unsigned int sum_avg_reuse_distance_per_page = 0;
1724
1725        // iterate through all pages seen by this TLB
1726        for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1727            sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1728                                               iter->second.accessesPerPage;
1729
1730            if (accessDistance) {
1731                unsigned int tmp = iter->second.localTLBAccesses[0];
1732                unsigned int prev = tmp;
1733
1734                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1735                    if (i) {
1736                        tmp = prev + 1;
1737                    }
1738
1739                    prev = iter->second.localTLBAccesses[i];
1740                    // update the localTLBAccesses value
1741                    // with the actual differece
1742                    iter->second.localTLBAccesses[i] -= tmp;
1743                    // compute the sum of AccessDistance per page
1744                    // used later for mean
1745                    iter->second.sumDistance +=
1746                        iter->second.localTLBAccesses[i];
1747                }
1748
1749                iter->second.meanDistance =
1750                    iter->second.sumDistance / iter->second.accessesPerPage;
1751
1752                // compute std_dev and max  (we need a second round because we
1753                // need to know the mean value
1754                unsigned int max_distance = 0;
1755                unsigned int stddev_distance = 0;
1756
1757                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1758                    unsigned int tmp_access_distance =
1759                        iter->second.localTLBAccesses[i];
1760
1761                    if (tmp_access_distance > max_distance) {
1762                        max_distance = tmp_access_distance;
1763                    }
1764
1765                    unsigned int diff =
1766                        tmp_access_distance - iter->second.meanDistance;
1767                    stddev_distance += pow(diff, 2);
1768
1769                }
1770
1771                stddev_distance =
1772                    sqrt(stddev_distance/iter->second.accessesPerPage);
1773
1774                if (page_stat_file) {
1775                    *page_stat_file << std::hex << iter->first << ",";
1776                    *page_stat_file << std::dec << max_distance << ",";
1777                    *page_stat_file << std::dec << iter->second.meanDistance
1778                                    << ",";
1779                    *page_stat_file << std::dec << stddev_distance;
1780                    *page_stat_file << std::endl;
1781                }
1782
1783                // erase the localTLBAccesses array
1784                iter->second.localTLBAccesses.clear();
1785            }
1786        }
1787
1788        if (!TLBFootprint.empty()) {
1789            avgReuseDistance =
1790                sum_avg_reuse_distance_per_page / TLBFootprint.size();
1791        }
1792
1793        //clear the TLBFootprint map
1794        TLBFootprint.clear();
1795    }
1796} // namespace X86ISA
1797
1798X86ISA::GpuTLB*
1799X86GPUTLBParams::create()
1800{
1801    return new X86ISA::GpuTLB(this);
1802}
1803
1804