tlb_coalescer.cc revision 11308:7d8836fd043d
11060SN/A/*
21762SN/A * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
31060SN/A * All rights reserved.
41060SN/A *
51060SN/A * For use for simulation and test purposes only
61060SN/A *
71060SN/A * Redistribution and use in source and binary forms, with or without
81060SN/A * modification, are permitted provided that the following conditions are met:
91060SN/A *
101060SN/A * 1. Redistributions of source code must retain the above copyright notice,
111060SN/A * this list of conditions and the following disclaimer.
121060SN/A *
131060SN/A * 2. Redistributions in binary form must reproduce the above copyright notice,
141060SN/A * this list of conditions and the following disclaimer in the documentation
151060SN/A * and/or other materials provided with the distribution.
161060SN/A *
171060SN/A * 3. Neither the name of the copyright holder nor the names of its contributors
181060SN/A * may be used to endorse or promote products derived from this software
191060SN/A * without specific prior written permission.
201060SN/A *
211060SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
221060SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231060SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241060SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
251060SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
261060SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
272665Ssaidi@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
282665Ssaidi@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
291060SN/A * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
301060SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
311464SN/A * POSSIBILITY OF SUCH DAMAGE.
321464SN/A *
331060SN/A * Author: Lisa Hsu
342292SN/A */
351464SN/A
361060SN/A#include "gpu-compute/tlb_coalescer.hh"
372669Sktlim@umich.edu
381060SN/A#include <cstring>
391060SN/A
401858SN/A#include "debug/GPUTLB.hh"
411464SN/A
421464SN/ATLBCoalescer::TLBCoalescer(const Params *p) : MemObject(p),
432669Sktlim@umich.edu    clock(p->clk_domain->clockPeriod()), TLBProbesPerCycle(p->probesPerCycle),
441060SN/A    coalescingWindow(p->coalescingWindow),
452669Sktlim@umich.edu    disableCoalescing(p->disableCoalescing), probeTLBEvent(this),
462292SN/A    cleanupEvent(this)
472292SN/A{
481717SN/A    // create the slave ports based on the number of connected ports
491717SN/A    for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
501717SN/A        cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i),
511717SN/A                                              this, i));
522292SN/A    }
531060SN/A
541060SN/A    // create the master ports based on the number of connected ports
551060SN/A    for (size_t i = 0; i < p->port_master_connection_count; ++i) {
561060SN/A        memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i),
571060SN/A                                              this, i));
581060SN/A    }
591061SN/A}
601061SN/A
611060SN/ABaseSlavePort&
621060SN/ATLBCoalescer::getSlavePort(const std::string &if_name, PortID idx)
631061SN/A{
641060SN/A    if (if_name == "slave") {
651060SN/A        if (idx >= static_cast<PortID>(cpuSidePort.size())) {
661060SN/A            panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
671060SN/A        }
682292SN/A
691060SN/A        return *cpuSidePort[idx];
702292SN/A    } else {
712107SN/A        panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
722292SN/A    }
732292SN/A}
742292SN/A
752107SN/ABaseMasterPort&
762292SN/ATLBCoalescer::getMasterPort(const std::string &if_name, PortID idx)
772107SN/A{
781060SN/A    if (if_name == "master") {
792292SN/A        if (idx >= static_cast<PortID>(memSidePort.size())) {
802292SN/A            panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
812292SN/A        }
822292SN/A
832292SN/A        return *memSidePort[idx];
842292SN/A    } else {
851060SN/A        panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
862292SN/A    }
872292SN/A}
881060SN/A
891060SN/A/*
902292SN/A * This method returns true if the <incoming_pkt>
912107SN/A * can be coalesced with <coalesced_pkt> and false otherwise.
921060SN/A * A given set of rules is checked.
931060SN/A * The rules can potentially be modified based on the TLB level.
941060SN/A */
951060SN/Abool
961060SN/ATLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt)
971060SN/A{
982292SN/A    if (disableCoalescing)
991060SN/A        return false;
1001060SN/A
1012292SN/A    TheISA::GpuTLB::TranslationState *incoming_state =
1022292SN/A      safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->senderState);
1032292SN/A
1042292SN/A    TheISA::GpuTLB::TranslationState *coalesced_state =
1052292SN/A     safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->senderState);
1062292SN/A
1072292SN/A    // Rule 1: Coalesce requests only if they
1081060SN/A    // fall within the same virtual page
1092132SN/A    Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(),
1101060SN/A                                             TheISA::PageBytes);
1112292SN/A
1122292SN/A    Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(),
1132292SN/A                                              TheISA::PageBytes);
1142292SN/A
1152292SN/A    if (incoming_virt_page_addr != coalesced_virt_page_addr)
1162292SN/A        return false;
1172292SN/A
1182292SN/A    //* Rule 2: Coalesce requests only if they
1191060SN/A    // share a TLB Mode, i.e. they are both read
1202132SN/A    // or write requests.
1211060SN/A    BaseTLB::Mode incoming_mode = incoming_state->tlbMode;
1221060SN/A    BaseTLB::Mode coalesced_mode = coalesced_state->tlbMode;
1231060SN/A
1241060SN/A    if (incoming_mode != coalesced_mode)
1252132SN/A        return false;
1262132SN/A
1271060SN/A    // when we can coalesce a packet update the reqCnt
1281684SN/A    // that is the number of packets represented by
1291060SN/A    // this coalesced packet
1301060SN/A    if (!incoming_state->prefetch)
1311060SN/A        coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
1321060SN/A
1332292SN/A    return true;
1342292SN/A}
1352292SN/A
1362292SN/A/*
1372292SN/A * We need to update the physical addresses of all the translation requests
1382292SN/A * that were coalesced into the one that just returned.
1392292SN/A */
1402292SN/Avoid
1411060SN/ATLBCoalescer::updatePhysAddresses(PacketPtr pkt)
1421464SN/A{
1431464SN/A    Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
1441464SN/A
1452308SN/A    DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n",
1462308SN/A            issuedTranslationsTable[virt_page_addr].size(), virt_page_addr);
1472308SN/A
1481060SN/A    TheISA::GpuTLB::TranslationState *sender_state =
1491060SN/A        safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
1501060SN/A
1511060SN/A    TheISA::GpuTlbEntry *tlb_entry = sender_state->tlbEntry;
1521060SN/A    assert(tlb_entry);
1531060SN/A    Addr first_entry_vaddr = tlb_entry->vaddr;
1541060SN/A    Addr first_entry_paddr = tlb_entry->paddr;
1551060SN/A    int page_size = tlb_entry->size();
1561060SN/A    bool uncacheable = tlb_entry->uncacheable;
1571060SN/A    int first_hit_level = sender_state->hitLevel;
1581060SN/A    bool valid = tlb_entry->valid;
1591060SN/A
1602292SN/A    // Get the physical page address of the translated request
1612292SN/A    // Using the page_size specified in the TLBEntry allows us
1622292SN/A    // to support different page sizes.
1631060SN/A    Addr phys_page_paddr = pkt->req->getPaddr();
1641060SN/A    phys_page_paddr &= ~(page_size - 1);
1651060SN/A
1661060SN/A    for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
1671060SN/A        PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i];
1681060SN/A        TheISA::GpuTLB::TranslationState *sender_state =
1692292SN/A            safe_cast<TheISA::GpuTLB::TranslationState*>(
1702292SN/A                    local_pkt->senderState);
1712292SN/A
1722292SN/A        // we are sending the packet back, so pop the reqCnt associated
1732292SN/A        // with this level in the TLB hiearchy
1742292SN/A        if (!sender_state->prefetch)
1751060SN/A            sender_state->reqCnt.pop_back();
1761060SN/A
1771060SN/A        /*
1781060SN/A         * Only the first packet from this coalesced request has been
1791060SN/A         * translated. Grab the translated phys. page addr and update the
1801060SN/A         * physical addresses of the remaining packets with the appropriate
1811060SN/A         * page offsets.
1821060SN/A         */
1831060SN/A        if (i) {
1841060SN/A            Addr paddr = phys_page_paddr;
1851060SN/A            paddr |= (local_pkt->req->getVaddr() & (page_size - 1));
1861060SN/A            local_pkt->req->setPaddr(paddr);
1871060SN/A
1881060SN/A            if (uncacheable)
1891060SN/A                local_pkt->req->setFlags(Request::UNCACHEABLE);
1902292SN/A
1912292SN/A            // update senderState->tlbEntry, so we can insert
1922292SN/A            // the correct TLBEentry in the TLBs above.
1931060SN/A            sender_state->tlbEntry =
1941060SN/A                new TheISA::GpuTlbEntry(0, first_entry_vaddr, first_entry_paddr,
1951060SN/A                                        valid);
1962680Sktlim@umich.edu
1972292SN/A            // update the hitLevel for all uncoalesced reqs
1981060SN/A            // so that each packet knows where it hit
1991060SN/A            // (used for statistics in the CUs)
2002132SN/A            sender_state->hitLevel = first_hit_level;
2011060SN/A        }
2022292SN/A
2032669Sktlim@umich.edu        SlavePort *return_port = sender_state->ports.back();
2042669Sktlim@umich.edu        sender_state->ports.pop_back();
2052669Sktlim@umich.edu
2062669Sktlim@umich.edu        // Translation is done - Convert to a response pkt if necessary and
2072669Sktlim@umich.edu        // send the translation back
2082292SN/A        if (local_pkt->isRequest()) {
2091060SN/A            local_pkt->makeTimingResponse();
2101060SN/A        }
2111060SN/A
2121060SN/A        return_port->sendTimingResp(local_pkt);
2131060SN/A    }
2141060SN/A
2151060SN/A    // schedule clean up for end of this cycle
2161060SN/A    // This is a maximum priority event and must be on
2171060SN/A    // the same cycle as GPUTLB cleanup event to prevent
2181060SN/A    // race conditions with an IssueProbeEvent caused by
2191060SN/A    // MemSidePort::recvReqRetry
2201060SN/A    cleanupQueue.push(virt_page_addr);
2211060SN/A
2221060SN/A    if (!cleanupEvent.scheduled())
2231060SN/A        schedule(cleanupEvent, curTick());
2241060SN/A}
2251060SN/A
2261060SN/A// Receive translation requests, create a coalesced request,
2271060SN/A// and send them to the TLB (TLBProbesPerCycle)
2281060SN/Abool
2291060SN/ATLBCoalescer::CpuSidePort::recvTimingReq(PacketPtr pkt)
2301464SN/A{
2311464SN/A    // first packet of a coalesced request
2321464SN/A    PacketPtr first_packet = nullptr;
2331464SN/A    // true if we are able to do coalescing
2341464SN/A    bool didCoalesce = false;
2351060SN/A    // number of coalesced reqs for a given window
2361464SN/A    int coalescedReq_cnt = 0;
2371464SN/A
2381464SN/A    TheISA::GpuTLB::TranslationState *sender_state =
2391464SN/A        safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
2401060SN/A
2411060SN/A    // push back the port to remember the path back
2421060SN/A    sender_state->ports.push_back(this);
2431060SN/A
2441060SN/A    bool update_stats = !sender_state->prefetch;
2451060SN/A
2461060SN/A    if (update_stats) {
2471060SN/A        // if reqCnt is empty then this packet does not represent
2481060SN/A        // multiple uncoalesced reqs(pkts) but just a single pkt.
2491060SN/A        // If it does though then the reqCnt for each level in the
2501060SN/A        // hierarchy accumulates the total number of reqs this packet
2511060SN/A        // represents
2521060SN/A        int req_cnt = 1;
2531060SN/A
2541060SN/A        if (!sender_state->reqCnt.empty())
2551060SN/A            req_cnt = sender_state->reqCnt.back();
2562292SN/A
2572292SN/A        sender_state->reqCnt.push_back(req_cnt);
2582292SN/A
2592292SN/A        // update statistics
2602292SN/A        coalescer->uncoalescedAccesses++;
2612292SN/A        req_cnt = sender_state->reqCnt.back();
2621060SN/A        DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt);
2631060SN/A        coalescer->queuingCycles -= (curTick() * req_cnt);
2641060SN/A        coalescer->localqueuingCycles -= curTick();
2651060SN/A    }
2662292SN/A
2672292SN/A    // FIXME if you want to coalesce not based on the issueTime
2682292SN/A    // of the packets (i.e., from the compute unit's perspective)
2692292SN/A    // but based on when they reached this coalescer then
2702292SN/A    // remove the following if statement and use curTick() or
2712292SN/A    // coalescingWindow for the tick_index.
2722292SN/A    if (!sender_state->issueTime)
2732292SN/A       sender_state->issueTime = curTick();
2741060SN/A
2751060SN/A    // The tick index is used as a key to the coalescerFIFO hashmap.
2762292SN/A    // It is shared by all candidates that fall within the
2772292SN/A    // given coalescingWindow.
2782292SN/A    int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow;
2792107SN/A
2801060SN/A    if (coalescer->coalescerFIFO.count(tick_index)) {
2811060SN/A        coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size();
2821060SN/A    }
2831060SN/A
2841464SN/A    // see if we can coalesce the incoming pkt with another
2851684SN/A    // coalesced request with the same tick_index
2861464SN/A    for (int i = 0; i < coalescedReq_cnt; ++i) {
2871060SN/A        first_packet = coalescer->coalescerFIFO[tick_index][i][0];
2881464SN/A
2892292SN/A        if (coalescer->canCoalesce(pkt, first_packet)) {
2902292SN/A            coalescer->coalescerFIFO[tick_index][i].push_back(pkt);
2912292SN/A
2922292SN/A            DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n",
2932292SN/A                    i, tick_index,
2942292SN/A                    coalescer->coalescerFIFO[tick_index][i].size());
2952292SN/A
2962292SN/A            didCoalesce = true;
2972669Sktlim@umich.edu            break;
2982669Sktlim@umich.edu        }
2992669Sktlim@umich.edu    }
3002669Sktlim@umich.edu
3012669Sktlim@umich.edu    // if this is the first request for this tick_index
3022669Sktlim@umich.edu    // or we did not manage to coalesce, update stats
3031060SN/A    // and make necessary allocations.
3041060SN/A    if (!coalescedReq_cnt || !didCoalesce) {
3051060SN/A        if (update_stats)
3061060SN/A            coalescer->coalescedAccesses++;
3071060SN/A
3081060SN/A        std::vector<PacketPtr> new_array;
3091060SN/A        new_array.push_back(pkt);
3101060SN/A        coalescer->coalescerFIFO[tick_index].push_back(new_array);
3112132SN/A
3121060SN/A        DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after "
3131060SN/A                "push\n", tick_index,
3141060SN/A                coalescer->coalescerFIFO[tick_index].size());
3151060SN/A    }
3162292SN/A
3171060SN/A    //schedule probeTLBEvent next cycle to send the
3181060SN/A    //coalesced requests to the TLB
3191060SN/A    if (!coalescer->probeTLBEvent.scheduled()) {
3201684SN/A        coalescer->schedule(coalescer->probeTLBEvent,
3211684SN/A                curTick() + coalescer->ticks(1));
3221684SN/A    }
3231060SN/A
3241060SN/A    return true;
3251060SN/A}
3261060SN/A
3271060SN/Avoid
3281060SN/ATLBCoalescer::CpuSidePort::recvReqRetry()
3291060SN/A{
3301060SN/A    assert(false);
3311060SN/A}
3322292SN/A
3331060SN/Avoid
3341060SN/ATLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt)
3352292SN/A{
3361060SN/A
3371060SN/A    TheISA::GpuTLB::TranslationState *sender_state =
3381060SN/A        safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
3391060SN/A
3401060SN/A    bool update_stats = !sender_state->prefetch;
3411060SN/A
3421060SN/A    if (update_stats)
3431060SN/A        coalescer->uncoalescedAccesses++;
3442336SN/A
3452336SN/A    // If there is a pending timing request for this virtual address
3461060SN/A    // print a warning message. This is a temporary caveat of
3471060SN/A    // the current simulator where atomic and timing requests can
3481060SN/A    // coexist. FIXME remove this check/warning in the future.
3491060SN/A    Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
3501060SN/A    int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
3511060SN/A
3521060SN/A    if (map_count) {
3531060SN/A        DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing "
3541060SN/A                "req. pending\n", virt_page_addr);
3551060SN/A    }
3561060SN/A
3571060SN/A    coalescer->memSidePort[0]->sendFunctional(pkt);
3581060SN/A}
3591060SN/A
3602292SN/AAddrRangeList
3612292SN/ATLBCoalescer::CpuSidePort::getAddrRanges() const
3622292SN/A{
3632292SN/A    // currently not checked by the master
3641060SN/A    AddrRangeList ranges;
3651060SN/A
3661060SN/A    return ranges;
3672292SN/A}
3682336SN/A
3692308SN/Abool
3702292SN/ATLBCoalescer::MemSidePort::recvTimingResp(PacketPtr pkt)
3712292SN/A{
3722292SN/A    // a translation completed and returned
3732292SN/A    coalescer->updatePhysAddresses(pkt);
3742292SN/A
3752292SN/A    return true;
3762292SN/A}
3772292SN/A
3782292SN/Avoid
3792292SN/ATLBCoalescer::MemSidePort::recvReqRetry()
3802292SN/A{
3812292SN/A    //we've receeived a retry. Schedule a probeTLBEvent
3822292SN/A    if (!coalescer->probeTLBEvent.scheduled())
3832292SN/A        coalescer->schedule(coalescer->probeTLBEvent,
3842292SN/A                curTick() + coalescer->ticks(1));
3852292SN/A}
3862292SN/A
3872292SN/Avoid
3882292SN/ATLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt)
3892292SN/A{
3902292SN/A    fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
3912292SN/A}
3922292SN/A
3932292SN/ATLBCoalescer::IssueProbeEvent::IssueProbeEvent(TLBCoalescer * _coalescer)
3942292SN/A    : Event(CPU_Tick_Pri), coalescer(_coalescer)
3952292SN/A{
3962292SN/A}
3972292SN/A
3982292SN/Aconst char*
3992292SN/ATLBCoalescer::IssueProbeEvent::description() const
4002292SN/A{
4012292SN/A    return "Probe the TLB below";
4022292SN/A}
4032292SN/A
4042292SN/A/*
4052292SN/A * Here we scan the coalescer FIFO and issue the max
4062292SN/A * number of permitted probes to the TLB below. We
4071060SN/A * permit bypassing of coalesced requests for the same
4081464SN/A * tick_index.
4091464SN/A *
4101464SN/A * We do not access the next tick_index unless we've
4111464SN/A * drained the previous one. The coalesced requests
4121464SN/A * that are successfully sent are moved to the
4131464SN/A * issuedTranslationsTable table (the table which keeps
4142292SN/A * track of the outstanding reqs)
4152292SN/A */
4161684SN/Avoid
4172292SN/ATLBCoalescer::IssueProbeEvent::process()
4181060SN/A{
4191060SN/A    // number of TLB probes sent so far
4201060SN/A    int sent_probes = 0;
4211060SN/A    // rejected denotes a blocking event
4221060SN/A    bool rejected = false;
4231060SN/A
4241060SN/A    // It is set to true either when the recvTiming of the TLB below
4251060SN/A    // returns false or when there is another outstanding request for the
4262292SN/A    // same virt. page.
4271060SN/A
4281060SN/A    DPRINTF(GPUTLB, "triggered TLBCoalescer IssueProbeEvent\n");
4292292SN/A
4301060SN/A    for (auto iter = coalescer->coalescerFIFO.begin();
4311684SN/A         iter != coalescer->coalescerFIFO.end() && !rejected; ) {
4321464SN/A        int coalescedReq_cnt = iter->second.size();
4331684SN/A        int i = 0;
4341684SN/A        int vector_index = 0;
4351464SN/A
4361684SN/A        DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n",
4371684SN/A               coalescedReq_cnt, iter->first);
4381464SN/A
4391060SN/A        while (i < coalescedReq_cnt) {
4402308SN/A            ++i;
4411060SN/A            PacketPtr first_packet = iter->second[vector_index][0];
4422308SN/A
4431060SN/A            // compute virtual page address for this request
4441060SN/A            Addr virt_page_addr = roundDown(first_packet->req->getVaddr(),
4452308SN/A                    TheISA::PageBytes);
4461060SN/A
4472308SN/A            // is there another outstanding request for the same page addr?
4482308SN/A            int pending_reqs =
4491060SN/A                coalescer->issuedTranslationsTable.count(virt_page_addr);
4502308SN/A
4512308SN/A            if (pending_reqs) {
4522308SN/A                DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
4532308SN/A                        "page %#x\n", virt_page_addr);
4541060SN/A
4552308SN/A                ++vector_index;
4562308SN/A                rejected = true;
4572308SN/A
4581060SN/A                continue;
4591060SN/A            }
4602190SN/A
4612292SN/A            // send the coalesced request for virt_page_addr
4622190SN/A            if (!coalescer->memSidePort[0]->sendTimingReq(first_packet)) {
4632331SN/A                DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
4642292SN/A                       virt_page_addr);
4652190SN/A
4661684SN/A                // No need for a retries queue since we are already buffering
4671464SN/A                // the coalesced request in coalescerFIFO.
4681464SN/A                rejected = true;
4691464SN/A                ++vector_index;
4701464SN/A            } else {
4711464SN/A                TheISA::GpuTLB::TranslationState *tmp_sender_state =
4721684SN/A                    safe_cast<TheISA::GpuTLB::TranslationState*>
4731464SN/A                    (first_packet->senderState);
4741464SN/A
4752292SN/A                bool update_stats = !tmp_sender_state->prefetch;
4761464SN/A
4771464SN/A                if (update_stats) {
4782308SN/A                    // req_cnt is total number of packets represented
4792308SN/A                    // by the one we just sent counting all the way from
4802308SN/A                    // the top of TLB hiearchy (i.e., from the CU)
4812308SN/A                    int req_cnt = tmp_sender_state->reqCnt.back();
4821060SN/A                    coalescer->queuingCycles += (curTick() * req_cnt);
4831060SN/A
4841060SN/A                    DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
4851060SN/A                            coalescer->name(), req_cnt);
4861060SN/A
4871060SN/A                    // pkt_cnt is number of packets we coalesced into the one
4881060SN/A                    // we just sent but only at this coalescer level
4891060SN/A                    int pkt_cnt = iter->second[vector_index].size();
4901060SN/A                    coalescer->localqueuingCycles += (curTick() * pkt_cnt);
4911060SN/A                }
4921464SN/A
4931060SN/A                DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
4941060SN/A                       virt_page_addr);
4951060SN/A
4961060SN/A                //copy coalescedReq to issuedTranslationsTable
4971060SN/A                coalescer->issuedTranslationsTable[virt_page_addr]
4981464SN/A                    = iter->second[vector_index];
4991060SN/A
5001060SN/A                //erase the entry of this coalesced req
5011060SN/A                iter->second.erase(iter->second.begin() + vector_index);
5021060SN/A
5031061SN/A                if (iter->second.empty())
5041061SN/A                    assert(i == coalescedReq_cnt);
5051061SN/A
5061060SN/A                sent_probes++;
5071060SN/A                if (sent_probes == coalescer->TLBProbesPerCycle)
5081060SN/A                   return;
5092292SN/A            }
5102292SN/A        }
5112292SN/A
5122292SN/A        //if there are no more coalesced reqs for this tick_index
5132292SN/A        //erase the hash_map with the first iterator
5142292SN/A        if (iter->second.empty()) {
5151060SN/A            coalescer->coalescerFIFO.erase(iter++);
5161060SN/A        } else {
5171060SN/A            ++iter;
5181060SN/A        }
5191060SN/A    }
5201060SN/A}
5212292SN/A
5222292SN/ATLBCoalescer::CleanupEvent::CleanupEvent(TLBCoalescer* _coalescer)
5232292SN/A    : Event(Maximum_Pri), coalescer(_coalescer)
5242292SN/A{
5252292SN/A}
5262292SN/A
5272292SN/Aconst char*
5282292SN/ATLBCoalescer::CleanupEvent::description() const
5291060SN/A{
5302292SN/A    return "Cleanup issuedTranslationsTable hashmap";
5311060SN/A}
5321060SN/A
5331464SN/Avoid
5341060SN/ATLBCoalescer::CleanupEvent::process()
5352292SN/A{
5362292SN/A    while (!coalescer->cleanupQueue.empty()) {
5372292SN/A        Addr cleanup_addr = coalescer->cleanupQueue.front();
5382292SN/A        coalescer->cleanupQueue.pop();
5392292SN/A        coalescer->issuedTranslationsTable.erase(cleanup_addr);
5402292SN/A
5412292SN/A        DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
5422292SN/A                cleanup_addr);
5432292SN/A    }
5442292SN/A}
5452292SN/A
5462292SN/Avoid
5472292SN/ATLBCoalescer::regStats()
5482292SN/A{
5492292SN/A    uncoalescedAccesses
5502292SN/A        .name(name() + ".uncoalesced_accesses")
5512292SN/A        .desc("Number of uncoalesced TLB accesses")
5522292SN/A        ;
5532292SN/A
5542292SN/A    coalescedAccesses
5552292SN/A        .name(name() + ".coalesced_accesses")
5562292SN/A        .desc("Number of coalesced TLB accesses")
5572292SN/A        ;
5582292SN/A
5592292SN/A    queuingCycles
5602292SN/A        .name(name() + ".queuing_cycles")
5612292SN/A        .desc("Number of cycles spent in queue")
5622292SN/A        ;
5632292SN/A
5642292SN/A    localqueuingCycles
5652292SN/A        .name(name() + ".local_queuing_cycles")
5662292SN/A        .desc("Number of cycles spent in queue for all incoming reqs")
5672292SN/A        ;
5682292SN/A
5692292SN/A    localLatency
5702292SN/A        .name(name() + ".local_latency")
5712292SN/A        .desc("Avg. latency over all incoming pkts")
5722292SN/A        ;
5732292SN/A
5741060SN/A    localLatency = localqueuingCycles / uncoalescedAccesses;
5751464SN/A}
5761060SN/A
5771060SN/A
5782308SN/ATLBCoalescer*
5792308SN/ATLBCoalescerParams::create()
5802308SN/A{
5812308SN/A    return new TLBCoalescer(this);
5822308SN/A}
5832190SN/A
5842292SN/A