gpu_tlb.cc revision 12663:565c16ffe1d1
1360SN/A/*
21458SN/A * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3360SN/A * All rights reserved.
4360SN/A *
5360SN/A * For use for simulation and test purposes only
6360SN/A *
7360SN/A * Redistribution and use in source and binary forms, with or without
8360SN/A * modification, are permitted provided that the following conditions are met:
9360SN/A *
10360SN/A * 1. Redistributions of source code must retain the above copyright notice,
11360SN/A * this list of conditions and the following disclaimer.
12360SN/A *
13360SN/A * 2. Redistributions in binary form must reproduce the above copyright notice,
14360SN/A * this list of conditions and the following disclaimer in the documentation
15360SN/A * and/or other materials provided with the distribution.
16360SN/A *
17360SN/A * 3. Neither the name of the copyright holder nor the names of its contributors
18360SN/A * may be used to endorse or promote products derived from this software
19360SN/A * without specific prior written permission.
20360SN/A *
21360SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22360SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23360SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24360SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25360SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26360SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
272665Ssaidi@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
282665Ssaidi@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
292665Ssaidi@eecs.umich.edu * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30360SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31360SN/A * POSSIBILITY OF SUCH DAMAGE.
322553SN/A *
332221SN/A * Author: Lisa Hsu
342048SN/A */
35360SN/A
36360SN/A#include "gpu-compute/gpu_tlb.hh"
372680Sktlim@umich.edu
382093SN/A#include <cmath>
392093SN/A#include <cstring>
402093SN/A
412093SN/A#include "arch/x86/faults.hh"
42360SN/A#include "arch/x86/insts/microldstop.hh"
43360SN/A#include "arch/x86/pagetable.hh"
442107SN/A#include "arch/x86/pagetable_walker.hh"
45360SN/A#include "arch/x86/regs/misc.hh"
462149SN/A#include "arch/x86/x86_traits.hh"
472149SN/A#include "base/bitfield.hh"
482093SN/A#include "base/output.hh"
492093SN/A#include "base/trace.hh"
503114Sgblack@eecs.umich.edu#include "cpu/base.hh"
512680Sktlim@umich.edu#include "cpu/thread_context.hh"
522093SN/A#include "debug/GPUPrefetch.hh"
532680Sktlim@umich.edu#include "debug/GPUTLB.hh"
54360SN/A#include "mem/packet_access.hh"
552093SN/A#include "mem/page_table.hh"
562093SN/A#include "mem/request.hh"
572093SN/A#include "sim/process.hh"
582093SN/A
592093SN/Anamespace X86ISA
60360SN/A{
612680Sktlim@umich.edu
622093SN/A    GpuTLB::GpuTLB(const Params *p)
632093SN/A        : MemObject(p), configAddress(0), size(p->size),
64360SN/A          cleanupEvent([this]{ cleanup(); }, name(), false,
652093SN/A                       Event::Maximum_Pri),
662093SN/A          exitEvent([this]{ exitCallback(); }, name())
672093SN/A    {
682093SN/A        assoc = p->assoc;
693114Sgblack@eecs.umich.edu        assert(assoc <= size);
702680Sktlim@umich.edu        numSets = size/assoc;
712093SN/A        allocationPolicy = p->allocationPolicy;
722680Sktlim@umich.edu        hasMemSidePort = false;
732680Sktlim@umich.edu        accessDistance = p->accessDistance;
742064SN/A        clock = p->clk_domain->clockPeriod();
752093SN/A
762064SN/A        tlb.assign(size, GpuTlbEntry());
772093SN/A
782680Sktlim@umich.edu        freeList.resize(numSets);
792093SN/A        entryList.resize(numSets);
802093SN/A
812680Sktlim@umich.edu        for (int set = 0; set < numSets; ++set) {
822093SN/A            for (int way = 0; way < assoc; ++way) {
832093SN/A                int x = set * assoc + way;
84360SN/A                freeList[set].push_back(&tlb.at(x));
852093SN/A            }
862093SN/A        }
872093SN/A
882093SN/A        FA = (size == assoc);
891999SN/A
901999SN/A        /**
912093SN/A         * @warning: the set-associative version assumes you have a
922093SN/A         * fixed page size of 4KB.
93360SN/A         * If the page size is greather than 4KB (as defined in the
942093SN/A         * TheISA::PageBytes), then there are various issues w/ the current
952093SN/A         * implementation (you'd have the same 8KB page being replicated in
963114Sgblack@eecs.umich.edu         * different sets etc)
972680Sktlim@umich.edu         */
982093SN/A        setMask = numSets - 1;
992680Sktlim@umich.edu
1002680Sktlim@umich.edu    #if 0
101360SN/A        // GpuTLB doesn't yet support full system
1022093SN/A        walker = p->walker;
103360SN/A        walker->setTLB(this);
1042093SN/A    #endif
1052680Sktlim@umich.edu
1062093SN/A        maxCoalescedReqs = p->maxOutstandingReqs;
1072680Sktlim@umich.edu
1082093SN/A        // Do not allow maxCoalescedReqs to be more than the TLB associativity
1092093SN/A        if (maxCoalescedReqs > assoc) {
1102093SN/A            maxCoalescedReqs = assoc;
1112093SN/A            cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
1122093SN/A        }
1132093SN/A
1142093SN/A        outstandingReqs = 0;
1152093SN/A        hitLatency = p->hitLatency;
1162093SN/A        missLatency1 = p->missLatency1;
117360SN/A        missLatency2 = p->missLatency2;
118360SN/A
1192093SN/A        // create the slave ports based on the number of connected ports
1202093SN/A        for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
121360SN/A            cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
122360SN/A                                  name(), i), this, i));
1232093SN/A        }
124360SN/A
125360SN/A        // create the master ports based on the number of connected ports
126360SN/A        for (size_t i = 0; i < p->port_master_connection_count; ++i) {
127360SN/A            memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
128360SN/A                                  name(), i), this, i));
129360SN/A        }
130360SN/A    }
131360SN/A
132360SN/A    // fixme: this is never called?
133360SN/A    GpuTLB::~GpuTLB()
134511SN/A    {
135360SN/A        // make sure all the hash-maps are empty
136360SN/A        assert(translationReturnEvent.empty());
137360SN/A    }
138360SN/A
1392553SN/A    BaseSlavePort&
1401999SN/A    GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
141360SN/A    {
142360SN/A        if (if_name == "slave") {
143360SN/A            if (idx >= static_cast<PortID>(cpuSidePort.size())) {
1442238SN/A                panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
145360SN/A            }
146360SN/A
147360SN/A            return *cpuSidePort[idx];
1482238SN/A        } else {
149360SN/A            panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
150360SN/A        }
151360SN/A    }
152360SN/A
153360SN/A    BaseMasterPort&
154360SN/A    GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
155360SN/A    {
156360SN/A        if (if_name == "master") {
157360SN/A            if (idx >= static_cast<PortID>(memSidePort.size())) {
158360SN/A                panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
159360SN/A            }
160360SN/A
161360SN/A            hasMemSidePort = true;
162360SN/A
163360SN/A            return *memSidePort[idx];
164360SN/A        } else {
1653079Sstever@eecs.umich.edu            panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
1662238SN/A        }
167360SN/A    }
168360SN/A
1692553SN/A    GpuTlbEntry*
170360SN/A    GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)
1712238SN/A    {
172360SN/A        GpuTlbEntry *newEntry = nullptr;
173360SN/A
174360SN/A        /**
175360SN/A         * vpn holds the virtual page address
176360SN/A         * The least significant bits are simply masked
177360SN/A         */
1782553SN/A        int set = (vpn >> TheISA::PageShift) & setMask;
179360SN/A
180360SN/A        if (!freeList[set].empty()) {
181360SN/A            newEntry = freeList[set].front();
182360SN/A            freeList[set].pop_front();
183360SN/A        } else {
184360SN/A            newEntry = entryList[set].back();
185360SN/A            entryList[set].pop_back();
186360SN/A        }
187360SN/A
188360SN/A        *newEntry = entry;
189360SN/A        newEntry->vaddr = vpn;
190360SN/A        entryList[set].push_front(newEntry);
1912553SN/A
1922553SN/A        return newEntry;
193360SN/A    }
194360SN/A
1952553SN/A    GpuTLB::EntryList::iterator
196360SN/A    GpuTLB::lookupIt(Addr va, bool update_lru)
197360SN/A    {
198360SN/A        int set = (va >> TheISA::PageShift) & setMask;
199360SN/A
200360SN/A        if (FA) {
201360SN/A            assert(!set);
202360SN/A        }
203360SN/A
204360SN/A        auto entry = entryList[set].begin();
205360SN/A        for (; entry != entryList[set].end(); ++entry) {
206360SN/A            int page_size = (*entry)->size();
207360SN/A
208360SN/A            if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
209360SN/A                DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
210360SN/A                        "with size %#x.\n", va, (*entry)->vaddr, page_size);
211360SN/A
212360SN/A                if (update_lru) {
213360SN/A                    entryList[set].push_front(*entry);
214360SN/A                    entryList[set].erase(entry);
2152553SN/A                    entry = entryList[set].begin();
216360SN/A                }
217360SN/A
218360SN/A                break;
219360SN/A            }
220360SN/A        }
221360SN/A
222360SN/A        return entry;
223360SN/A    }
224360SN/A
225360SN/A    GpuTlbEntry*
226360SN/A    GpuTLB::lookup(Addr va, bool update_lru)
227360SN/A    {
228360SN/A        int set = (va >> TheISA::PageShift) & setMask;
229360SN/A
230360SN/A        auto entry = lookupIt(va, update_lru);
231360SN/A
232360SN/A        if (entry == entryList[set].end())
233360SN/A            return nullptr;
234360SN/A        else
235360SN/A            return *entry;
236360SN/A    }
237360SN/A
238360SN/A    void
239360SN/A    GpuTLB::invalidateAll()
240360SN/A    {
241360SN/A        DPRINTF(GPUTLB, "Invalidating all entries.\n");
242360SN/A
243360SN/A        for (int i = 0; i < numSets; ++i) {
244360SN/A            while (!entryList[i].empty()) {
2452553SN/A                GpuTlbEntry *entry = entryList[i].front();
246360SN/A                entryList[i].pop_front();
2471999SN/A                freeList[i].push_back(entry);
2482553SN/A            }
249360SN/A        }
250360SN/A    }
251360SN/A
252511SN/A    void
253360SN/A    GpuTLB::setConfigAddress(uint32_t addr)
254360SN/A    {
255360SN/A        configAddress = addr;
256360SN/A    }
257360SN/A
258360SN/A    void
259360SN/A    GpuTLB::invalidateNonGlobal()
260360SN/A    {
261360SN/A        DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
262360SN/A
263360SN/A        for (int i = 0; i < numSets; ++i) {
264360SN/A            for (auto entryIt = entryList[i].begin();
265360SN/A                 entryIt != entryList[i].end();) {
266360SN/A                if (!(*entryIt)->global) {
267360SN/A                    freeList[i].push_back(*entryIt);
2682553SN/A                    entryList[i].erase(entryIt++);
269511SN/A                } else {
270360SN/A                    ++entryIt;
271360SN/A                }
272360SN/A            }
273360SN/A        }
274360SN/A    }
275360SN/A
276360SN/A    void
277360SN/A    GpuTLB::demapPage(Addr va, uint64_t asn)
278360SN/A    {
279360SN/A
280360SN/A        int set = (va >> TheISA::PageShift) & setMask;
281360SN/A        auto entry = lookupIt(va, false);
282360SN/A
283360SN/A        if (entry != entryList[set].end()) {
284360SN/A            freeList[set].push_back(*entry);
285360SN/A            entryList[set].erase(entry);
286360SN/A        }
287360SN/A    }
288360SN/A
289360SN/A    Fault
290360SN/A    GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)
291360SN/A    {
292360SN/A        DPRINTF(GPUTLB, "Addresses references internal memory.\n");
293360SN/A        Addr vaddr = req->getVaddr();
294360SN/A        Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
295360SN/A
296360SN/A        if (prefix == IntAddrPrefixCPUID) {
297360SN/A            panic("CPUID memory space not yet implemented!\n");
298360SN/A        } else if (prefix == IntAddrPrefixMSR) {
299360SN/A            vaddr = vaddr >> 3;
300360SN/A            req->setFlags(Request::MMAPPED_IPR);
301360SN/A            Addr regNum = 0;
302360SN/A
303360SN/A            switch (vaddr & ~IntAddrPrefixMask) {
304360SN/A              case 0x10:
305360SN/A                regNum = MISCREG_TSC;
306360SN/A                break;
307360SN/A              case 0x1B:
308360SN/A                regNum = MISCREG_APIC_BASE;
309360SN/A                break;
310360SN/A              case 0xFE:
311360SN/A                regNum = MISCREG_MTRRCAP;
312360SN/A                break;
313360SN/A              case 0x174:
314360SN/A                regNum = MISCREG_SYSENTER_CS;
315360SN/A                break;
316360SN/A              case 0x175:
317360SN/A                regNum = MISCREG_SYSENTER_ESP;
318360SN/A                break;
319360SN/A              case 0x176:
320360SN/A                regNum = MISCREG_SYSENTER_EIP;
321360SN/A                break;
322360SN/A              case 0x179:
323360SN/A                regNum = MISCREG_MCG_CAP;
324360SN/A                break;
325360SN/A              case 0x17A:
326360SN/A                regNum = MISCREG_MCG_STATUS;
327360SN/A                break;
328360SN/A              case 0x17B:
329360SN/A                regNum = MISCREG_MCG_CTL;
330360SN/A                break;
331360SN/A              case 0x1D9:
332360SN/A                regNum = MISCREG_DEBUG_CTL_MSR;
333360SN/A                break;
334360SN/A              case 0x1DB:
335360SN/A                regNum = MISCREG_LAST_BRANCH_FROM_IP;
336360SN/A                break;
337360SN/A              case 0x1DC:
338360SN/A                regNum = MISCREG_LAST_BRANCH_TO_IP;
339360SN/A                break;
340360SN/A              case 0x1DD:
341360SN/A                regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
342360SN/A                break;
343360SN/A              case 0x1DE:
344360SN/A                regNum = MISCREG_LAST_EXCEPTION_TO_IP;
345360SN/A                break;
346360SN/A              case 0x200:
347360SN/A                regNum = MISCREG_MTRR_PHYS_BASE_0;
348360SN/A                break;
349360SN/A              case 0x201:
350360SN/A                regNum = MISCREG_MTRR_PHYS_MASK_0;
351360SN/A                break;
352360SN/A              case 0x202:
353360SN/A                regNum = MISCREG_MTRR_PHYS_BASE_1;
354360SN/A                break;
355360SN/A              case 0x203:
356360SN/A                regNum = MISCREG_MTRR_PHYS_MASK_1;
357360SN/A                break;
358360SN/A              case 0x204:
359360SN/A                regNum = MISCREG_MTRR_PHYS_BASE_2;
360360SN/A                break;
361360SN/A              case 0x205:
362360SN/A                regNum = MISCREG_MTRR_PHYS_MASK_2;
363360SN/A                break;
364360SN/A              case 0x206:
365360SN/A                regNum = MISCREG_MTRR_PHYS_BASE_3;
366360SN/A                break;
367360SN/A              case 0x207:
368360SN/A                regNum = MISCREG_MTRR_PHYS_MASK_3;
369360SN/A                break;
370360SN/A              case 0x208:
371360SN/A                regNum = MISCREG_MTRR_PHYS_BASE_4;
372360SN/A                break;
373360SN/A              case 0x209:
374360SN/A                regNum = MISCREG_MTRR_PHYS_MASK_4;
375360SN/A                break;
376360SN/A              case 0x20A:
377360SN/A                regNum = MISCREG_MTRR_PHYS_BASE_5;
378360SN/A                break;
379360SN/A              case 0x20B:
380360SN/A                regNum = MISCREG_MTRR_PHYS_MASK_5;
381360SN/A                break;
382360SN/A              case 0x20C:
383360SN/A                regNum = MISCREG_MTRR_PHYS_BASE_6;
384360SN/A                break;
385360SN/A              case 0x20D:
386360SN/A                regNum = MISCREG_MTRR_PHYS_MASK_6;
387360SN/A                break;
388360SN/A              case 0x20E:
389360SN/A                regNum = MISCREG_MTRR_PHYS_BASE_7;
390360SN/A                break;
391360SN/A              case 0x20F:
392360SN/A                regNum = MISCREG_MTRR_PHYS_MASK_7;
393360SN/A                break;
394360SN/A              case 0x250:
395360SN/A                regNum = MISCREG_MTRR_FIX_64K_00000;
396360SN/A                break;
397360SN/A              case 0x258:
398360SN/A                regNum = MISCREG_MTRR_FIX_16K_80000;
399360SN/A                break;
400360SN/A              case 0x259:
401360SN/A                regNum = MISCREG_MTRR_FIX_16K_A0000;
402360SN/A                break;
403360SN/A              case 0x268:
404360SN/A                regNum = MISCREG_MTRR_FIX_4K_C0000;
405360SN/A                break;
406360SN/A              case 0x269:
407360SN/A                regNum = MISCREG_MTRR_FIX_4K_C8000;
408360SN/A                break;
409360SN/A              case 0x26A:
410360SN/A                regNum = MISCREG_MTRR_FIX_4K_D0000;
411360SN/A                break;
412360SN/A              case 0x26B:
413360SN/A                regNum = MISCREG_MTRR_FIX_4K_D8000;
414360SN/A                break;
415360SN/A              case 0x26C:
416360SN/A                regNum = MISCREG_MTRR_FIX_4K_E0000;
417360SN/A                break;
418360SN/A              case 0x26D:
419360SN/A                regNum = MISCREG_MTRR_FIX_4K_E8000;
420360SN/A                break;
421360SN/A              case 0x26E:
422360SN/A                regNum = MISCREG_MTRR_FIX_4K_F0000;
423360SN/A                break;
424360SN/A              case 0x26F:
425360SN/A                regNum = MISCREG_MTRR_FIX_4K_F8000;
426360SN/A                break;
427360SN/A              case 0x277:
428360SN/A                regNum = MISCREG_PAT;
429360SN/A                break;
430360SN/A              case 0x2FF:
431360SN/A                regNum = MISCREG_DEF_TYPE;
432360SN/A                break;
433360SN/A              case 0x400:
434360SN/A                regNum = MISCREG_MC0_CTL;
435360SN/A                break;
436360SN/A              case 0x404:
437360SN/A                regNum = MISCREG_MC1_CTL;
438360SN/A                break;
439360SN/A              case 0x408:
440360SN/A                regNum = MISCREG_MC2_CTL;
441360SN/A                break;
442360SN/A              case 0x40C:
443360SN/A                regNum = MISCREG_MC3_CTL;
444360SN/A                break;
445360SN/A              case 0x410:
446360SN/A                regNum = MISCREG_MC4_CTL;
447360SN/A                break;
448360SN/A              case 0x414:
449360SN/A                regNum = MISCREG_MC5_CTL;
450511SN/A                break;
451360SN/A              case 0x418:
452360SN/A                regNum = MISCREG_MC6_CTL;
453360SN/A                break;
454360SN/A              case 0x41C:
455360SN/A                regNum = MISCREG_MC7_CTL;
456360SN/A                break;
457360SN/A              case 0x401:
458360SN/A                regNum = MISCREG_MC0_STATUS;
459360SN/A                break;
460360SN/A              case 0x405:
461360SN/A                regNum = MISCREG_MC1_STATUS;
462360SN/A                break;
463360SN/A              case 0x409:
464360SN/A                regNum = MISCREG_MC2_STATUS;
465360SN/A                break;
466360SN/A              case 0x40D:
467360SN/A                regNum = MISCREG_MC3_STATUS;
468360SN/A                break;
469360SN/A              case 0x411:
470360SN/A                regNum = MISCREG_MC4_STATUS;
471360SN/A                break;
472360SN/A              case 0x415:
473360SN/A                regNum = MISCREG_MC5_STATUS;
474360SN/A                break;
475360SN/A              case 0x419:
476360SN/A                regNum = MISCREG_MC6_STATUS;
477360SN/A                break;
478360SN/A              case 0x41D:
479511SN/A                regNum = MISCREG_MC7_STATUS;
480360SN/A                break;
481360SN/A              case 0x402:
482360SN/A                regNum = MISCREG_MC0_ADDR;
483360SN/A                break;
484360SN/A              case 0x406:
485360SN/A                regNum = MISCREG_MC1_ADDR;
4862553SN/A                break;
487360SN/A              case 0x40A:
488360SN/A                regNum = MISCREG_MC2_ADDR;
489360SN/A                break;
4902553SN/A              case 0x40E:
4912553SN/A                regNum = MISCREG_MC3_ADDR;
492360SN/A                break;
493360SN/A              case 0x412:
494360SN/A                regNum = MISCREG_MC4_ADDR;
495360SN/A                break;
496360SN/A              case 0x416:
497360SN/A                regNum = MISCREG_MC5_ADDR;
498360SN/A                break;
499360SN/A              case 0x41A:
500360SN/A                regNum = MISCREG_MC6_ADDR;
501360SN/A                break;
502360SN/A              case 0x41E:
503360SN/A                regNum = MISCREG_MC7_ADDR;
504360SN/A                break;
505360SN/A              case 0x403:
506360SN/A                regNum = MISCREG_MC0_MISC;
507360SN/A                break;
508360SN/A              case 0x407:
509360SN/A                regNum = MISCREG_MC1_MISC;
510360SN/A                break;
511360SN/A              case 0x40B:
512360SN/A                regNum = MISCREG_MC2_MISC;
513360SN/A                break;
514360SN/A              case 0x40F:
515360SN/A                regNum = MISCREG_MC3_MISC;
516360SN/A                break;
517360SN/A              case 0x413:
518360SN/A                regNum = MISCREG_MC4_MISC;
519360SN/A                break;
520360SN/A              case 0x417:
521543SN/A                regNum = MISCREG_MC5_MISC;
522543SN/A                break;
523543SN/A              case 0x41B:
524543SN/A                regNum = MISCREG_MC6_MISC;
525543SN/A                break;
526543SN/A              case 0x41F:
527543SN/A                regNum = MISCREG_MC7_MISC;
528543SN/A                break;
529543SN/A              case 0xC0000080:
530543SN/A                regNum = MISCREG_EFER;
531543SN/A                break;
532543SN/A              case 0xC0000081:
533543SN/A                regNum = MISCREG_STAR;
534543SN/A                break;
535543SN/A              case 0xC0000082:
536543SN/A                regNum = MISCREG_LSTAR;
537543SN/A                break;
538543SN/A              case 0xC0000083:
539543SN/A                regNum = MISCREG_CSTAR;
540543SN/A                break;
541543SN/A              case 0xC0000084:
542543SN/A                regNum = MISCREG_SF_MASK;
543543SN/A                break;
544543SN/A              case 0xC0000100:
545543SN/A                regNum = MISCREG_FS_BASE;
546543SN/A                break;
547543SN/A              case 0xC0000101:
548543SN/A                regNum = MISCREG_GS_BASE;
549543SN/A                break;
550543SN/A              case 0xC0000102:
5511999SN/A                regNum = MISCREG_KERNEL_GS_BASE;
5521999SN/A                break;
5532553SN/A              case 0xC0000103:
5542553SN/A                regNum = MISCREG_TSC_AUX;
5551999SN/A                break;
5561999SN/A              case 0xC0010000:
5571999SN/A                regNum = MISCREG_PERF_EVT_SEL0;
5581999SN/A                break;
5591999SN/A              case 0xC0010001:
5601999SN/A                regNum = MISCREG_PERF_EVT_SEL1;
5611999SN/A                break;
5621999SN/A              case 0xC0010002:
5631999SN/A                regNum = MISCREG_PERF_EVT_SEL2;
5641999SN/A                break;
5651999SN/A              case 0xC0010003:
5661999SN/A                regNum = MISCREG_PERF_EVT_SEL3;
5671999SN/A                break;
5681999SN/A              case 0xC0010004:
569360SN/A                regNum = MISCREG_PERF_EVT_CTR0;
570360SN/A                break;
571360SN/A              case 0xC0010005:
572360SN/A                regNum = MISCREG_PERF_EVT_CTR1;
5732423SN/A                break;
574360SN/A              case 0xC0010006:
575360SN/A                regNum = MISCREG_PERF_EVT_CTR2;
576360SN/A                break;
577360SN/A              case 0xC0010007:
5783114Sgblack@eecs.umich.edu                regNum = MISCREG_PERF_EVT_CTR3;
5793669Sbinkertn@umich.edu                break;
5803114Sgblack@eecs.umich.edu              case 0xC0010010:
5813114Sgblack@eecs.umich.edu                regNum = MISCREG_SYSCFG;
5823114Sgblack@eecs.umich.edu                break;
5833114Sgblack@eecs.umich.edu              case 0xC0010016:
5843114Sgblack@eecs.umich.edu                regNum = MISCREG_IORR_BASE0;
5853114Sgblack@eecs.umich.edu                break;
5862474SN/A              case 0xC0010017:
5873669Sbinkertn@umich.edu                regNum = MISCREG_IORR_BASE1;
5882093SN/A                break;
589360SN/A              case 0xC0010018:
5902423SN/A                regNum = MISCREG_IORR_MASK0;
591360SN/A                break;
5922093SN/A              case 0xC0010019:
5932093SN/A                regNum = MISCREG_IORR_MASK1;
5942093SN/A                break;
5952093SN/A              case 0xC001001A:
5962093SN/A                regNum = MISCREG_TOP_MEM;
5972093SN/A                break;
5982093SN/A              case 0xC001001D:
5992093SN/A                regNum = MISCREG_TOP_MEM2;
6002093SN/A                break;
6012093SN/A              case 0xC0010114:
602                regNum = MISCREG_VM_CR;
603                break;
604              case 0xC0010115:
605                regNum = MISCREG_IGNNE;
606                break;
607              case 0xC0010116:
608                regNum = MISCREG_SMM_CTL;
609                break;
610              case 0xC0010117:
611                regNum = MISCREG_VM_HSAVE_PA;
612                break;
613              default:
614                return std::make_shared<GeneralProtection>(0);
615            }
616            //The index is multiplied by the size of a MiscReg so that
617            //any memory dependence calculations will not see these as
618            //overlapping.
619            req->setPaddr(regNum * sizeof(MiscReg));
620            return NoFault;
621        } else if (prefix == IntAddrPrefixIO) {
622            // TODO If CPL > IOPL or in virtual mode, check the I/O permission
623            // bitmap in the TSS.
624
625            Addr IOPort = vaddr & ~IntAddrPrefixMask;
626            // Make sure the address fits in the expected 16 bit IO address
627            // space.
628            assert(!(IOPort & ~0xFFFF));
629
630            if (IOPort == 0xCF8 && req->getSize() == 4) {
631                req->setFlags(Request::MMAPPED_IPR);
632                req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
633            } else if ((IOPort & ~mask(2)) == 0xCFC) {
634                req->setFlags(Request::UNCACHEABLE);
635
636                Addr configAddress =
637                    tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
638
639                if (bits(configAddress, 31, 31)) {
640                    req->setPaddr(PhysAddrPrefixPciConfig |
641                                  mbits(configAddress, 30, 2) |
642                                  (IOPort & mask(2)));
643                } else {
644                    req->setPaddr(PhysAddrPrefixIO | IOPort);
645                }
646            } else {
647                req->setFlags(Request::UNCACHEABLE);
648                req->setPaddr(PhysAddrPrefixIO | IOPort);
649            }
650            return NoFault;
651        } else {
652            panic("Access to unrecognized internal address space %#x.\n",
653                  prefix);
654        }
655    }
656
657    /**
658     * TLB_lookup will only perform a TLB lookup returning true on a TLB hit
659     * and false on a TLB miss.
660     * Many of the checks about different modes have been converted to
661     * assertions, since these parts of the code are not really used.
662     * On a hit it will update the LRU stack.
663     */
664    bool
665    GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
666    {
667        bool tlb_hit = false;
668    #ifndef NDEBUG
669        uint32_t flags = req->getFlags();
670        int seg = flags & SegmentFlagMask;
671    #endif
672
673        assert(seg != SEGMENT_REG_MS);
674        Addr vaddr = req->getVaddr();
675        DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
676        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
677
678        if (m5Reg.prot) {
679            DPRINTF(GPUTLB, "In protected mode.\n");
680            // make sure we are in 64-bit mode
681            assert(m5Reg.mode == LongMode);
682
683            // If paging is enabled, do the translation.
684            if (m5Reg.paging) {
685                DPRINTF(GPUTLB, "Paging enabled.\n");
686                //update LRU stack on a hit
687                GpuTlbEntry *entry = lookup(vaddr, true);
688
689                if (entry)
690                    tlb_hit = true;
691
692                if (!update_stats) {
693                    // functional tlb access for memory initialization
694                    // i.e., memory seeding or instr. seeding -> don't update
695                    // TLB and stats
696                    return tlb_hit;
697                }
698
699                localNumTLBAccesses++;
700
701                if (!entry) {
702                    localNumTLBMisses++;
703                } else {
704                    localNumTLBHits++;
705                }
706            }
707        }
708
709        return tlb_hit;
710    }
711
712    Fault
713    GpuTLB::translate(RequestPtr req, ThreadContext *tc,
714                      Translation *translation, Mode mode,
715                      bool &delayedResponse, bool timing, int &latency)
716    {
717        uint32_t flags = req->getFlags();
718        int seg = flags & SegmentFlagMask;
719        bool storeCheck = flags & (StoreCheck << FlagShift);
720
721        // If this is true, we're dealing with a request
722        // to a non-memory address space.
723        if (seg == SEGMENT_REG_MS) {
724            return translateInt(req, tc);
725        }
726
727        delayedResponse = false;
728        Addr vaddr = req->getVaddr();
729        DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
730
731        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
732
733        // If protected mode has been enabled...
734        if (m5Reg.prot) {
735            DPRINTF(GPUTLB, "In protected mode.\n");
736            // If we're not in 64-bit mode, do protection/limit checks
737            if (m5Reg.mode != LongMode) {
738                DPRINTF(GPUTLB, "Not in long mode. Checking segment "
739                        "protection.\n");
740
741                // Check for a null segment selector.
742                if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
743                    seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
744                    && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
745                    return std::make_shared<GeneralProtection>(0);
746                }
747
748                bool expandDown = false;
749                SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
750
751                if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
752                    if (!attr.writable && (mode == BaseTLB::Write ||
753                        storeCheck))
754                        return std::make_shared<GeneralProtection>(0);
755
756                    if (!attr.readable && mode == BaseTLB::Read)
757                        return std::make_shared<GeneralProtection>(0);
758
759                    expandDown = attr.expandDown;
760
761                }
762
763                Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
764                Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
765                // This assumes we're not in 64 bit mode. If we were, the
766                // default address size is 64 bits, overridable to 32.
767                int size = 32;
768                bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
769                SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
770
771                if ((csAttr.defaultSize && sizeOverride) ||
772                    (!csAttr.defaultSize && !sizeOverride)) {
773                    size = 16;
774                }
775
776                Addr offset = bits(vaddr - base, size - 1, 0);
777                Addr endOffset = offset + req->getSize() - 1;
778
779                if (expandDown) {
780                    DPRINTF(GPUTLB, "Checking an expand down segment.\n");
781                    warn_once("Expand down segments are untested.\n");
782
783                    if (offset <= limit || endOffset <= limit)
784                        return std::make_shared<GeneralProtection>(0);
785                } else {
786                    if (offset > limit || endOffset > limit)
787                        return std::make_shared<GeneralProtection>(0);
788                }
789            }
790
791            // If paging is enabled, do the translation.
792            if (m5Reg.paging) {
793                DPRINTF(GPUTLB, "Paging enabled.\n");
794                // The vaddr already has the segment base applied.
795                GpuTlbEntry *entry = lookup(vaddr);
796                localNumTLBAccesses++;
797
798                if (!entry) {
799                    localNumTLBMisses++;
800                    if (timing) {
801                        latency = missLatency1;
802                    }
803
804                    if (FullSystem) {
805                        fatal("GpuTLB doesn't support full-system mode\n");
806                    } else {
807                        DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
808                                "at pc %#x.\n", vaddr, tc->instAddr());
809
810                        Process *p = tc->getProcessPtr();
811                        const EmulationPageTable::Entry *pte =
812                            p->pTable->lookup(vaddr);
813
814                        if (!pte && mode != BaseTLB::Execute) {
815                            // penalize a "page fault" more
816                            if (timing)
817                                latency += missLatency2;
818
819                            if (p->fixupStackFault(vaddr))
820                                pte = p->pTable->lookup(vaddr);
821                        }
822
823                        if (!pte) {
824                            return std::make_shared<PageFault>(vaddr, true,
825                                                               mode, true,
826                                                               false);
827                        } else {
828                            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
829
830                            DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
831                                    alignedVaddr, pte->paddr);
832
833                            GpuTlbEntry gpuEntry(
834                                p->pTable->pid(), alignedVaddr,
835                                pte->paddr, true);
836                            entry = insert(alignedVaddr, gpuEntry);
837                        }
838
839                        DPRINTF(GPUTLB, "Miss was serviced.\n");
840                    }
841                } else {
842                    localNumTLBHits++;
843
844                    if (timing) {
845                        latency = hitLatency;
846                    }
847                }
848
849                // Do paging protection checks.
850                bool inUser = (m5Reg.cpl == 3 &&
851                               !(flags & (CPL0FlagBit << FlagShift)));
852
853                CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
854                bool badWrite = (!entry->writable && (inUser || cr0.wp));
855
856                if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
857                     badWrite)) {
858                    // The page must have been present to get into the TLB in
859                    // the first place. We'll assume the reserved bits are
860                    // fine even though we're not checking them.
861                    return std::make_shared<PageFault>(vaddr, true, mode,
862                                                       inUser, false);
863                }
864
865                if (storeCheck && badWrite) {
866                    // This would fault if this were a write, so return a page
867                    // fault that reflects that happening.
868                    return std::make_shared<PageFault>(vaddr, true,
869                                                       BaseTLB::Write,
870                                                       inUser, false);
871                }
872
873
874                DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
875                        "checks.\n", entry->paddr);
876
877                int page_size = entry->size();
878                Addr paddr = entry->paddr | (vaddr & (page_size - 1));
879                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
880                req->setPaddr(paddr);
881
882                if (entry->uncacheable)
883                    req->setFlags(Request::UNCACHEABLE);
884            } else {
885                //Use the address which already has segmentation applied.
886                DPRINTF(GPUTLB, "Paging disabled.\n");
887                DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
888                req->setPaddr(vaddr);
889            }
890        } else {
891            // Real mode
892            DPRINTF(GPUTLB, "In real mode.\n");
893            DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
894            req->setPaddr(vaddr);
895        }
896
897        // Check for an access to the local APIC
898        if (FullSystem) {
899            LocalApicBase localApicBase =
900                tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
901
902            Addr baseAddr = localApicBase.base * PageBytes;
903            Addr paddr = req->getPaddr();
904
905            if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
906                // Force the access to be uncacheable.
907                req->setFlags(Request::UNCACHEABLE);
908                req->setPaddr(x86LocalAPICAddress(tc->contextId(),
909                                                  paddr - baseAddr));
910            }
911        }
912
913        return NoFault;
914    };
915
916    Fault
917    GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
918                            int &latency)
919    {
920        bool delayedResponse;
921
922        return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
923                                 latency);
924    }
925
926    void
927    GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,
928            Translation *translation, Mode mode, int &latency)
929    {
930        bool delayedResponse;
931        assert(translation);
932
933        Fault fault = GpuTLB::translate(req, tc, translation, mode,
934                                        delayedResponse, true, latency);
935
936        if (!delayedResponse)
937            translation->finish(fault, req, tc, mode);
938    }
939
940    Walker*
941    GpuTLB::getWalker()
942    {
943        return walker;
944    }
945
946
947    void
948    GpuTLB::serialize(CheckpointOut &cp) const
949    {
950    }
951
952    void
953    GpuTLB::unserialize(CheckpointIn &cp)
954    {
955    }
956
957    void
958    GpuTLB::regStats()
959    {
960        MemObject::regStats();
961
962        localNumTLBAccesses
963            .name(name() + ".local_TLB_accesses")
964            .desc("Number of TLB accesses")
965            ;
966
967        localNumTLBHits
968            .name(name() + ".local_TLB_hits")
969            .desc("Number of TLB hits")
970            ;
971
972        localNumTLBMisses
973            .name(name() + ".local_TLB_misses")
974            .desc("Number of TLB misses")
975            ;
976
977        localTLBMissRate
978            .name(name() + ".local_TLB_miss_rate")
979            .desc("TLB miss rate")
980            ;
981
982        accessCycles
983            .name(name() + ".access_cycles")
984            .desc("Cycles spent accessing this TLB level")
985            ;
986
987        pageTableCycles
988            .name(name() + ".page_table_cycles")
989            .desc("Cycles spent accessing the page table")
990            ;
991
992        localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
993
994        numUniquePages
995            .name(name() + ".unique_pages")
996            .desc("Number of unique pages touched")
997            ;
998
999        localCycles
1000            .name(name() + ".local_cycles")
1001            .desc("Number of cycles spent in queue for all incoming reqs")
1002            ;
1003
1004        localLatency
1005            .name(name() + ".local_latency")
1006            .desc("Avg. latency over incoming coalesced reqs")
1007            ;
1008
1009        localLatency = localCycles / localNumTLBAccesses;
1010
1011        globalNumTLBAccesses
1012            .name(name() + ".global_TLB_accesses")
1013            .desc("Number of TLB accesses")
1014            ;
1015
1016        globalNumTLBHits
1017            .name(name() + ".global_TLB_hits")
1018            .desc("Number of TLB hits")
1019            ;
1020
1021        globalNumTLBMisses
1022            .name(name() + ".global_TLB_misses")
1023            .desc("Number of TLB misses")
1024            ;
1025
1026        globalTLBMissRate
1027            .name(name() + ".global_TLB_miss_rate")
1028            .desc("TLB miss rate")
1029            ;
1030
1031        globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
1032
1033        avgReuseDistance
1034            .name(name() + ".avg_reuse_distance")
1035            .desc("avg. reuse distance over all pages (in ticks)")
1036            ;
1037
1038    }
1039
1040    /**
1041     * Do the TLB lookup for this coalesced request and schedule
1042     * another event <TLB access latency> cycles later.
1043     */
1044
1045    void
1046    GpuTLB::issueTLBLookup(PacketPtr pkt)
1047    {
1048        assert(pkt);
1049        assert(pkt->senderState);
1050
1051        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1052                                        TheISA::PageBytes);
1053
1054        TranslationState *sender_state =
1055                safe_cast<TranslationState*>(pkt->senderState);
1056
1057        bool update_stats = !sender_state->prefetch;
1058        ThreadContext * tmp_tc = sender_state->tc;
1059
1060        DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1061                virt_page_addr);
1062
1063        int req_cnt = sender_state->reqCnt.back();
1064
1065        if (update_stats) {
1066            accessCycles -= (curTick() * req_cnt);
1067            localCycles -= curTick();
1068            updatePageFootprint(virt_page_addr);
1069            globalNumTLBAccesses += req_cnt;
1070        }
1071
1072        tlbOutcome lookup_outcome = TLB_MISS;
1073        RequestPtr tmp_req = pkt->req;
1074
1075        // Access the TLB and figure out if it's a hit or a miss.
1076        bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1077
1078        if (success) {
1079            lookup_outcome = TLB_HIT;
1080            // Put the entry in SenderState
1081            GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1082            assert(entry);
1083
1084            sender_state->tlbEntry =
1085                new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1086
1087            if (update_stats) {
1088                // the reqCnt has an entry per level, so its size tells us
1089                // which level we are in
1090                sender_state->hitLevel = sender_state->reqCnt.size();
1091                globalNumTLBHits += req_cnt;
1092            }
1093        } else {
1094            if (update_stats)
1095                globalNumTLBMisses += req_cnt;
1096        }
1097
1098        /*
1099         * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1100         * as the TLB access latency.
1101         *
1102         * We create and schedule a new TLBEvent which will help us take the
1103         * appropriate actions (e.g., update TLB on a hit, send request to lower
1104         * level TLB on a miss, or start a page walk if this was the last-level
1105         * TLB)
1106         */
1107        TLBEvent *tlb_event =
1108            new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1109
1110        if (translationReturnEvent.count(virt_page_addr)) {
1111            panic("Virtual Page Address %#x already has a return event\n",
1112                  virt_page_addr);
1113        }
1114
1115        translationReturnEvent[virt_page_addr] = tlb_event;
1116        assert(tlb_event);
1117
1118        DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1119                curTick() + this->ticks(hitLatency));
1120
1121        schedule(tlb_event, curTick() + this->ticks(hitLatency));
1122    }
1123
1124    GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
1125                               PacketPtr _pkt)
1126        : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1127        outcome(tlb_outcome), pkt(_pkt)
1128    {
1129    }
1130
1131    /**
1132     * Do Paging protection checks. If we encounter a page fault, then
1133     * an assertion is fired.
1134     */
1135    void
1136    GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
1137            GpuTlbEntry * tlb_entry, Mode mode)
1138    {
1139        HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1140        uint32_t flags = pkt->req->getFlags();
1141        bool storeCheck = flags & (StoreCheck << FlagShift);
1142
1143        // Do paging protection checks.
1144        bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1145        CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1146
1147        bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1148
1149        if ((inUser && !tlb_entry->user) ||
1150            (mode == BaseTLB::Write && badWrite)) {
1151           // The page must have been present to get into the TLB in
1152           // the first place. We'll assume the reserved bits are
1153           // fine even though we're not checking them.
1154           assert(false);
1155        }
1156
1157        if (storeCheck && badWrite) {
1158           // This would fault if this were a write, so return a page
1159           // fault that reflects that happening.
1160           assert(false);
1161        }
1162    }
1163
1164    /**
1165     * handleTranslationReturn is called on a TLB hit,
1166     * when a TLB miss returns or when a page fault returns.
1167     * The latter calls handelHit with TLB miss as tlbOutcome.
1168     */
1169    void
1170    GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1171            PacketPtr pkt)
1172    {
1173
1174        assert(pkt);
1175        Addr vaddr = pkt->req->getVaddr();
1176
1177        TranslationState *sender_state =
1178            safe_cast<TranslationState*>(pkt->senderState);
1179
1180        ThreadContext *tc = sender_state->tc;
1181        Mode mode = sender_state->tlbMode;
1182
1183        GpuTlbEntry *local_entry, *new_entry;
1184
1185        if (tlb_outcome == TLB_HIT) {
1186            DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1187            local_entry = sender_state->tlbEntry;
1188        } else {
1189            DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1190                    vaddr);
1191
1192            // We are returning either from a page walk or from a hit at a lower
1193            // TLB level. The senderState should be "carrying" a pointer to the
1194            // correct TLBEntry.
1195            new_entry = sender_state->tlbEntry;
1196            assert(new_entry);
1197            local_entry = new_entry;
1198
1199            if (allocationPolicy) {
1200                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1201                        virt_page_addr);
1202
1203                local_entry = insert(virt_page_addr, *new_entry);
1204            }
1205
1206            assert(local_entry);
1207        }
1208
1209        /**
1210         * At this point the packet carries an up-to-date tlbEntry pointer
1211         * in its senderState.
1212         * Next step is to do the paging protection checks.
1213         */
1214        DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "
1215                "while paddr was %#x.\n", local_entry->vaddr,
1216                local_entry->paddr);
1217
1218        pagingProtectionChecks(tc, pkt, local_entry, mode);
1219        int page_size = local_entry->size();
1220        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1221        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1222
1223        // Since this packet will be sent through the cpu side slave port,
1224        // it must be converted to a response pkt if it is not one already
1225        if (pkt->isRequest()) {
1226            pkt->makeTimingResponse();
1227        }
1228
1229        pkt->req->setPaddr(paddr);
1230
1231        if (local_entry->uncacheable) {
1232             pkt->req->setFlags(Request::UNCACHEABLE);
1233        }
1234
1235        //send packet back to coalescer
1236        cpuSidePort[0]->sendTimingResp(pkt);
1237        //schedule cleanup event
1238        cleanupQueue.push(virt_page_addr);
1239
1240        // schedule this only once per cycle.
1241        // The check is required because we might have multiple translations
1242        // returning the same cycle
1243        // this is a maximum priority event and must be on the same cycle
1244        // as the cleanup event in TLBCoalescer to avoid a race with
1245        // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1246        if (!cleanupEvent.scheduled())
1247            schedule(cleanupEvent, curTick());
1248    }
1249
1250    /**
1251     * Here we take the appropriate actions based on the result of the
1252     * TLB lookup.
1253     */
1254    void
1255    GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
1256                              PacketPtr pkt)
1257    {
1258        DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1259
1260        assert(translationReturnEvent[virtPageAddr]);
1261        assert(pkt);
1262
1263        TranslationState *tmp_sender_state =
1264            safe_cast<TranslationState*>(pkt->senderState);
1265
1266        int req_cnt = tmp_sender_state->reqCnt.back();
1267        bool update_stats = !tmp_sender_state->prefetch;
1268
1269
1270        if (outcome == TLB_HIT) {
1271            handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1272
1273            if (update_stats) {
1274                accessCycles += (req_cnt * curTick());
1275                localCycles += curTick();
1276            }
1277
1278        } else if (outcome == TLB_MISS) {
1279
1280            DPRINTF(GPUTLB, "This is a TLB miss\n");
1281            if (update_stats) {
1282                accessCycles += (req_cnt*curTick());
1283                localCycles += curTick();
1284            }
1285
1286            if (hasMemSidePort) {
1287                // the one cyle added here represent the delay from when we get
1288                // the reply back till when we propagate it to the coalescer
1289                // above.
1290                if (update_stats) {
1291                    accessCycles += (req_cnt * 1);
1292                    localCycles += 1;
1293                }
1294
1295                /**
1296                 * There is a TLB below. Send the coalesced request.
1297                 * We actually send the very first packet of all the
1298                 * pending packets for this virtual page address.
1299                 */
1300                if (!memSidePort[0]->sendTimingReq(pkt)) {
1301                    DPRINTF(GPUTLB, "Failed sending translation request to "
1302                            "lower level TLB for addr %#x\n", virtPageAddr);
1303
1304                    memSidePort[0]->retries.push_back(pkt);
1305                } else {
1306                    DPRINTF(GPUTLB, "Sent translation request to lower level "
1307                            "TLB for addr %#x\n", virtPageAddr);
1308                }
1309            } else {
1310                //this is the last level TLB. Start a page walk
1311                DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1312                        "addr %#x\n", virtPageAddr);
1313
1314                if (update_stats)
1315                    pageTableCycles -= (req_cnt*curTick());
1316
1317                TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1318                assert(tlb_event);
1319                tlb_event->updateOutcome(PAGE_WALK);
1320                schedule(tlb_event, curTick() + ticks(missLatency2));
1321            }
1322        } else if (outcome == PAGE_WALK) {
1323            if (update_stats)
1324                pageTableCycles += (req_cnt*curTick());
1325
1326            // Need to access the page table and update the TLB
1327            DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1328                    virtPageAddr);
1329
1330            TranslationState *sender_state =
1331                safe_cast<TranslationState*>(pkt->senderState);
1332
1333            Process *p = sender_state->tc->getProcessPtr();
1334            Addr vaddr = pkt->req->getVaddr();
1335    #ifndef NDEBUG
1336            Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1337            assert(alignedVaddr == virtPageAddr);
1338    #endif
1339            const EmulationPageTable::Entry *pte = p->pTable->lookup(vaddr);
1340            if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1341                    p->fixupStackFault(vaddr)) {
1342                pte = p->pTable->lookup(vaddr);
1343            }
1344
1345            if (pte) {
1346                DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1347                        pte->paddr);
1348
1349                sender_state->tlbEntry =
1350                    new GpuTlbEntry(0, virtPageAddr, pte->paddr, true);
1351            } else {
1352                sender_state->tlbEntry =
1353                    new GpuTlbEntry(0, 0, 0, false);
1354            }
1355
1356            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1357        } else if (outcome == MISS_RETURN) {
1358            /** we add an extra cycle in the return path of the translation
1359             * requests in between the various TLB levels.
1360             */
1361            handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1362        } else {
1363            assert(false);
1364        }
1365    }
1366
1367    void
1368    GpuTLB::TLBEvent::process()
1369    {
1370        tlb->translationReturn(virtPageAddr, outcome, pkt);
1371    }
1372
1373    const char*
1374    GpuTLB::TLBEvent::description() const
1375    {
1376        return "trigger translationDoneEvent";
1377    }
1378
1379    void
1380    GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
1381    {
1382        outcome = _outcome;
1383    }
1384
1385    Addr
1386    GpuTLB::TLBEvent::getTLBEventVaddr()
1387    {
1388        return virtPageAddr;
1389    }
1390
1391    /*
1392     * recvTiming receives a coalesced timing request from a TLBCoalescer
1393     * and it calls issueTLBLookup()
1394     * It only rejects the packet if we have exceeded the max
1395     * outstanding number of requests for the TLB
1396     */
1397    bool
1398    GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
1399    {
1400        if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1401            tlb->issueTLBLookup(pkt);
1402            // update number of outstanding translation requests
1403            tlb->outstandingReqs++;
1404            return true;
1405         } else {
1406            DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1407                    tlb->outstandingReqs);
1408            return false;
1409         }
1410    }
1411
1412    /**
1413     * handleFuncTranslationReturn is called on a TLB hit,
1414     * when a TLB miss returns or when a page fault returns.
1415     * It updates LRU, inserts the TLB entry on a miss
1416     * depending on the allocation policy and does the required
1417     * protection checks. It does NOT create a new packet to
1418     * update the packet's addr; this is done in hsail-gpu code.
1419     */
1420    void
1421    GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
1422    {
1423        TranslationState *sender_state =
1424            safe_cast<TranslationState*>(pkt->senderState);
1425
1426        ThreadContext *tc = sender_state->tc;
1427        Mode mode = sender_state->tlbMode;
1428        Addr vaddr = pkt->req->getVaddr();
1429
1430        GpuTlbEntry *local_entry, *new_entry;
1431
1432        if (tlb_outcome == TLB_HIT) {
1433            DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1434                    "%#x\n", vaddr);
1435
1436            local_entry = sender_state->tlbEntry;
1437        } else {
1438            DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1439                    "%#x\n", vaddr);
1440
1441            // We are returning either from a page walk or from a hit at a lower
1442            // TLB level. The senderState should be "carrying" a pointer to the
1443            // correct TLBEntry.
1444            new_entry = sender_state->tlbEntry;
1445            assert(new_entry);
1446            local_entry = new_entry;
1447
1448            if (allocationPolicy) {
1449                Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1450
1451                DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1452                        virt_page_addr);
1453
1454                local_entry = insert(virt_page_addr, *new_entry);
1455            }
1456
1457            assert(local_entry);
1458        }
1459
1460        DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1461                "while paddr was %#x.\n", local_entry->vaddr,
1462                local_entry->paddr);
1463
1464        // Do paging checks if it's a normal functional access.  If it's for a
1465        // prefetch, then sometimes you can try to prefetch something that won't
1466        // pass protection. We don't actually want to fault becuase there is no
1467        // demand access to deem this a violation.  Just put it in the TLB and
1468        // it will fault if indeed a future demand access touches it in
1469        // violation.
1470        if (!sender_state->prefetch && sender_state->tlbEntry->valid)
1471            pagingProtectionChecks(tc, pkt, local_entry, mode);
1472
1473        int page_size = local_entry->size();
1474        Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1475        DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1476
1477        pkt->req->setPaddr(paddr);
1478
1479        if (local_entry->uncacheable)
1480             pkt->req->setFlags(Request::UNCACHEABLE);
1481    }
1482
1483    // This is used for atomic translations. Need to
1484    // make it all happen during the same cycle.
1485    void
1486    GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
1487    {
1488        TranslationState *sender_state =
1489            safe_cast<TranslationState*>(pkt->senderState);
1490
1491        ThreadContext *tc = sender_state->tc;
1492        bool update_stats = !sender_state->prefetch;
1493
1494        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1495                                        TheISA::PageBytes);
1496
1497        if (update_stats)
1498            tlb->updatePageFootprint(virt_page_addr);
1499
1500        // do the TLB lookup without updating the stats
1501        bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1502        tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1503
1504        // functional mode means no coalescing
1505        // global metrics are the same as the local metrics
1506        if (update_stats) {
1507            tlb->globalNumTLBAccesses++;
1508
1509            if (success) {
1510                sender_state->hitLevel = sender_state->reqCnt.size();
1511                tlb->globalNumTLBHits++;
1512            }
1513        }
1514
1515        if (!success) {
1516            if (update_stats)
1517                tlb->globalNumTLBMisses++;
1518            if (tlb->hasMemSidePort) {
1519                // there is a TLB below -> propagate down the TLB hierarchy
1520                tlb->memSidePort[0]->sendFunctional(pkt);
1521                // If no valid translation from a prefetch, then just return
1522                if (sender_state->prefetch && !pkt->req->hasPaddr())
1523                    return;
1524            } else {
1525                // Need to access the page table and update the TLB
1526                DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1527                        virt_page_addr);
1528
1529                Process *p = tc->getProcessPtr();
1530
1531                Addr vaddr = pkt->req->getVaddr();
1532    #ifndef NDEBUG
1533                Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1534                assert(alignedVaddr == virt_page_addr);
1535    #endif
1536
1537                const EmulationPageTable::Entry *pte =
1538                        p->pTable->lookup(vaddr);
1539                if (!pte && sender_state->tlbMode != BaseTLB::Execute &&
1540                        p->fixupStackFault(vaddr)) {
1541                    pte = p->pTable->lookup(vaddr);
1542                }
1543
1544                if (!sender_state->prefetch) {
1545                    // no PageFaults are permitted after
1546                    // the second page table lookup
1547                    assert(pte);
1548
1549                    DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1550                            pte->paddr);
1551
1552                    sender_state->tlbEntry =
1553                        new GpuTlbEntry(0, virt_page_addr,
1554                                        pte->paddr, true);
1555                } else {
1556                    // If this was a prefetch, then do the normal thing if it
1557                    // was a successful translation.  Otherwise, send an empty
1558                    // TLB entry back so that it can be figured out as empty and
1559                    // handled accordingly.
1560                    if (pte) {
1561                        DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1562                                pte->paddr);
1563
1564                        sender_state->tlbEntry =
1565                            new GpuTlbEntry(0, virt_page_addr,
1566                                            pte->paddr, true);
1567                    } else {
1568                        DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1569                                alignedVaddr);
1570
1571                        sender_state->tlbEntry = new GpuTlbEntry();
1572
1573                        return;
1574                    }
1575                }
1576            }
1577        } else {
1578            DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1579                    tlb->lookup(pkt->req->getVaddr()));
1580
1581            GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1582                                             update_stats);
1583
1584            assert(entry);
1585
1586            sender_state->tlbEntry =
1587                new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1588        }
1589        // This is the function that would populate pkt->req with the paddr of
1590        // the translation. But if no translation happens (i.e Prefetch fails)
1591        // then the early returns in the above code wiill keep this function
1592        // from executing.
1593        tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1594    }
1595
1596    void
1597    GpuTLB::CpuSidePort::recvReqRetry()
1598    {
1599        // The CPUSidePort never sends anything but replies. No retries
1600        // expected.
1601        assert(false);
1602    }
1603
1604    AddrRangeList
1605    GpuTLB::CpuSidePort::getAddrRanges() const
1606    {
1607        // currently not checked by the master
1608        AddrRangeList ranges;
1609
1610        return ranges;
1611    }
1612
1613    /**
1614     * MemSidePort receives the packet back.
1615     * We need to call the handleTranslationReturn
1616     * and propagate up the hierarchy.
1617     */
1618    bool
1619    GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
1620    {
1621        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1622                                        TheISA::PageBytes);
1623
1624        DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1625                virt_page_addr);
1626
1627        TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1628        assert(tlb_event);
1629        assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1630
1631        tlb_event->updateOutcome(MISS_RETURN);
1632        tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1633
1634        return true;
1635    }
1636
1637    void
1638    GpuTLB::MemSidePort::recvReqRetry()
1639    {
1640        // No retries should reach the TLB. The retries
1641        // should only reach the TLBCoalescer.
1642        assert(false);
1643    }
1644
1645    void
1646    GpuTLB::cleanup()
1647    {
1648        while (!cleanupQueue.empty()) {
1649            Addr cleanup_addr = cleanupQueue.front();
1650            cleanupQueue.pop();
1651
1652            // delete TLBEvent
1653            TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1654            delete old_tlb_event;
1655            translationReturnEvent.erase(cleanup_addr);
1656
1657            // update number of outstanding requests
1658            outstandingReqs--;
1659        }
1660
1661        /** the higher level coalescer should retry if it has
1662         * any pending requests.
1663         */
1664        for (int i = 0; i < cpuSidePort.size(); ++i) {
1665            cpuSidePort[i]->sendRetryReq();
1666        }
1667    }
1668
1669    void
1670    GpuTLB::updatePageFootprint(Addr virt_page_addr)
1671    {
1672
1673        std::pair<AccessPatternTable::iterator, bool> ret;
1674
1675        AccessInfo tmp_access_info;
1676        tmp_access_info.lastTimeAccessed = 0;
1677        tmp_access_info.accessesPerPage = 0;
1678        tmp_access_info.totalReuseDistance = 0;
1679        tmp_access_info.sumDistance = 0;
1680        tmp_access_info.meanDistance = 0;
1681
1682        ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1683                                  tmp_access_info));
1684
1685        bool first_page_access = ret.second;
1686
1687        if (first_page_access) {
1688            numUniquePages++;
1689        } else  {
1690            int accessed_before;
1691            accessed_before  = curTick() - ret.first->second.lastTimeAccessed;
1692            ret.first->second.totalReuseDistance += accessed_before;
1693        }
1694
1695        ret.first->second.accessesPerPage++;
1696        ret.first->second.lastTimeAccessed = curTick();
1697
1698        if (accessDistance) {
1699            ret.first->second.localTLBAccesses
1700                .push_back(localNumTLBAccesses.value());
1701        }
1702    }
1703
1704    void
1705    GpuTLB::exitCallback()
1706    {
1707        std::ostream *page_stat_file = nullptr;
1708
1709        if (accessDistance) {
1710
1711            // print per page statistics to a separate file (.csv format)
1712            // simout is the gem5 output directory (default is m5out or the one
1713            // specified with -d
1714            page_stat_file = simout.create(name().c_str())->stream();
1715
1716            // print header
1717            *page_stat_file << "page,max_access_distance,mean_access_distance, "
1718                            << "stddev_distance" << std::endl;
1719        }
1720
1721        // update avg. reuse distance footprint
1722        AccessPatternTable::iterator iter, iter_begin, iter_end;
1723        unsigned int sum_avg_reuse_distance_per_page = 0;
1724
1725        // iterate through all pages seen by this TLB
1726        for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1727            sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1728                                               iter->second.accessesPerPage;
1729
1730            if (accessDistance) {
1731                unsigned int tmp = iter->second.localTLBAccesses[0];
1732                unsigned int prev = tmp;
1733
1734                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1735                    if (i) {
1736                        tmp = prev + 1;
1737                    }
1738
1739                    prev = iter->second.localTLBAccesses[i];
1740                    // update the localTLBAccesses value
1741                    // with the actual differece
1742                    iter->second.localTLBAccesses[i] -= tmp;
1743                    // compute the sum of AccessDistance per page
1744                    // used later for mean
1745                    iter->second.sumDistance +=
1746                        iter->second.localTLBAccesses[i];
1747                }
1748
1749                iter->second.meanDistance =
1750                    iter->second.sumDistance / iter->second.accessesPerPage;
1751
1752                // compute std_dev and max  (we need a second round because we
1753                // need to know the mean value
1754                unsigned int max_distance = 0;
1755                unsigned int stddev_distance = 0;
1756
1757                for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1758                    unsigned int tmp_access_distance =
1759                        iter->second.localTLBAccesses[i];
1760
1761                    if (tmp_access_distance > max_distance) {
1762                        max_distance = tmp_access_distance;
1763                    }
1764
1765                    unsigned int diff =
1766                        tmp_access_distance - iter->second.meanDistance;
1767                    stddev_distance += pow(diff, 2);
1768
1769                }
1770
1771                stddev_distance =
1772                    sqrt(stddev_distance/iter->second.accessesPerPage);
1773
1774                if (page_stat_file) {
1775                    *page_stat_file << std::hex << iter->first << ",";
1776                    *page_stat_file << std::dec << max_distance << ",";
1777                    *page_stat_file << std::dec << iter->second.meanDistance
1778                                    << ",";
1779                    *page_stat_file << std::dec << stddev_distance;
1780                    *page_stat_file << std::endl;
1781                }
1782
1783                // erase the localTLBAccesses array
1784                iter->second.localTLBAccesses.clear();
1785            }
1786        }
1787
1788        if (!TLBFootprint.empty()) {
1789            avgReuseDistance =
1790                sum_avg_reuse_distance_per_page / TLBFootprint.size();
1791        }
1792
1793        //clear the TLBFootprint map
1794        TLBFootprint.clear();
1795    }
1796} // namespace X86ISA
1797
1798X86ISA::GpuTLB*
1799X86GPUTLBParams::create()
1800{
1801    return new X86ISA::GpuTLB(this);
1802}
1803
1804