tlb_coalescer.cc revision 11308:7d8836fd043d
11060SN/A/* 21762SN/A * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 31060SN/A * All rights reserved. 41060SN/A * 51060SN/A * For use for simulation and test purposes only 61060SN/A * 71060SN/A * Redistribution and use in source and binary forms, with or without 81060SN/A * modification, are permitted provided that the following conditions are met: 91060SN/A * 101060SN/A * 1. Redistributions of source code must retain the above copyright notice, 111060SN/A * this list of conditions and the following disclaimer. 121060SN/A * 131060SN/A * 2. Redistributions in binary form must reproduce the above copyright notice, 141060SN/A * this list of conditions and the following disclaimer in the documentation 151060SN/A * and/or other materials provided with the distribution. 161060SN/A * 171060SN/A * 3. Neither the name of the copyright holder nor the names of its contributors 181060SN/A * may be used to endorse or promote products derived from this software 191060SN/A * without specific prior written permission. 201060SN/A * 211060SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 221060SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231060SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241060SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 251060SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 261060SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 272665Ssaidi@eecs.umich.edu * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 282665Ssaidi@eecs.umich.edu * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 291060SN/A * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 301060SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 311464SN/A * POSSIBILITY OF SUCH DAMAGE. 321464SN/A * 331060SN/A * Author: Lisa Hsu 342292SN/A */ 351464SN/A 361060SN/A#include "gpu-compute/tlb_coalescer.hh" 372669Sktlim@umich.edu 381060SN/A#include <cstring> 391060SN/A 401858SN/A#include "debug/GPUTLB.hh" 411464SN/A 421464SN/ATLBCoalescer::TLBCoalescer(const Params *p) : MemObject(p), 432669Sktlim@umich.edu clock(p->clk_domain->clockPeriod()), TLBProbesPerCycle(p->probesPerCycle), 441060SN/A coalescingWindow(p->coalescingWindow), 452669Sktlim@umich.edu disableCoalescing(p->disableCoalescing), probeTLBEvent(this), 462292SN/A cleanupEvent(this) 472292SN/A{ 481717SN/A // create the slave ports based on the number of connected ports 491717SN/A for (size_t i = 0; i < p->port_slave_connection_count; ++i) { 501717SN/A cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i), 511717SN/A this, i)); 522292SN/A } 531060SN/A 541060SN/A // create the master ports based on the number of connected ports 551060SN/A for (size_t i = 0; i < p->port_master_connection_count; ++i) { 561060SN/A memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i), 571060SN/A this, i)); 581060SN/A } 591061SN/A} 601061SN/A 611060SN/ABaseSlavePort& 621060SN/ATLBCoalescer::getSlavePort(const std::string &if_name, PortID idx) 631061SN/A{ 641060SN/A if (if_name == "slave") { 651060SN/A if (idx >= static_cast<PortID>(cpuSidePort.size())) { 661060SN/A panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx); 671060SN/A } 682292SN/A 691060SN/A return *cpuSidePort[idx]; 702292SN/A } else { 712107SN/A panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name); 722292SN/A } 732292SN/A} 742292SN/A 752107SN/ABaseMasterPort& 762292SN/ATLBCoalescer::getMasterPort(const std::string &if_name, PortID idx) 772107SN/A{ 781060SN/A if (if_name == "master") { 792292SN/A if (idx >= static_cast<PortID>(memSidePort.size())) { 802292SN/A panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx); 812292SN/A } 822292SN/A 832292SN/A return *memSidePort[idx]; 842292SN/A } else { 851060SN/A panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name); 862292SN/A } 872292SN/A} 881060SN/A 891060SN/A/* 902292SN/A * This method returns true if the <incoming_pkt> 912107SN/A * can be coalesced with <coalesced_pkt> and false otherwise. 921060SN/A * A given set of rules is checked. 931060SN/A * The rules can potentially be modified based on the TLB level. 941060SN/A */ 951060SN/Abool 961060SN/ATLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt) 971060SN/A{ 982292SN/A if (disableCoalescing) 991060SN/A return false; 1001060SN/A 1012292SN/A TheISA::GpuTLB::TranslationState *incoming_state = 1022292SN/A safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->senderState); 1032292SN/A 1042292SN/A TheISA::GpuTLB::TranslationState *coalesced_state = 1052292SN/A safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->senderState); 1062292SN/A 1072292SN/A // Rule 1: Coalesce requests only if they 1081060SN/A // fall within the same virtual page 1092132SN/A Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(), 1101060SN/A TheISA::PageBytes); 1112292SN/A 1122292SN/A Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(), 1132292SN/A TheISA::PageBytes); 1142292SN/A 1152292SN/A if (incoming_virt_page_addr != coalesced_virt_page_addr) 1162292SN/A return false; 1172292SN/A 1182292SN/A //* Rule 2: Coalesce requests only if they 1191060SN/A // share a TLB Mode, i.e. they are both read 1202132SN/A // or write requests. 1211060SN/A BaseTLB::Mode incoming_mode = incoming_state->tlbMode; 1221060SN/A BaseTLB::Mode coalesced_mode = coalesced_state->tlbMode; 1231060SN/A 1241060SN/A if (incoming_mode != coalesced_mode) 1252132SN/A return false; 1262132SN/A 1271060SN/A // when we can coalesce a packet update the reqCnt 1281684SN/A // that is the number of packets represented by 1291060SN/A // this coalesced packet 1301060SN/A if (!incoming_state->prefetch) 1311060SN/A coalesced_state->reqCnt.back() += incoming_state->reqCnt.back(); 1321060SN/A 1332292SN/A return true; 1342292SN/A} 1352292SN/A 1362292SN/A/* 1372292SN/A * We need to update the physical addresses of all the translation requests 1382292SN/A * that were coalesced into the one that just returned. 1392292SN/A */ 1402292SN/Avoid 1411060SN/ATLBCoalescer::updatePhysAddresses(PacketPtr pkt) 1421464SN/A{ 1431464SN/A Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes); 1441464SN/A 1452308SN/A DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n", 1462308SN/A issuedTranslationsTable[virt_page_addr].size(), virt_page_addr); 1472308SN/A 1481060SN/A TheISA::GpuTLB::TranslationState *sender_state = 1491060SN/A safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState); 1501060SN/A 1511060SN/A TheISA::GpuTlbEntry *tlb_entry = sender_state->tlbEntry; 1521060SN/A assert(tlb_entry); 1531060SN/A Addr first_entry_vaddr = tlb_entry->vaddr; 1541060SN/A Addr first_entry_paddr = tlb_entry->paddr; 1551060SN/A int page_size = tlb_entry->size(); 1561060SN/A bool uncacheable = tlb_entry->uncacheable; 1571060SN/A int first_hit_level = sender_state->hitLevel; 1581060SN/A bool valid = tlb_entry->valid; 1591060SN/A 1602292SN/A // Get the physical page address of the translated request 1612292SN/A // Using the page_size specified in the TLBEntry allows us 1622292SN/A // to support different page sizes. 1631060SN/A Addr phys_page_paddr = pkt->req->getPaddr(); 1641060SN/A phys_page_paddr &= ~(page_size - 1); 1651060SN/A 1661060SN/A for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) { 1671060SN/A PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i]; 1681060SN/A TheISA::GpuTLB::TranslationState *sender_state = 1692292SN/A safe_cast<TheISA::GpuTLB::TranslationState*>( 1702292SN/A local_pkt->senderState); 1712292SN/A 1722292SN/A // we are sending the packet back, so pop the reqCnt associated 1732292SN/A // with this level in the TLB hiearchy 1742292SN/A if (!sender_state->prefetch) 1751060SN/A sender_state->reqCnt.pop_back(); 1761060SN/A 1771060SN/A /* 1781060SN/A * Only the first packet from this coalesced request has been 1791060SN/A * translated. Grab the translated phys. page addr and update the 1801060SN/A * physical addresses of the remaining packets with the appropriate 1811060SN/A * page offsets. 1821060SN/A */ 1831060SN/A if (i) { 1841060SN/A Addr paddr = phys_page_paddr; 1851060SN/A paddr |= (local_pkt->req->getVaddr() & (page_size - 1)); 1861060SN/A local_pkt->req->setPaddr(paddr); 1871060SN/A 1881060SN/A if (uncacheable) 1891060SN/A local_pkt->req->setFlags(Request::UNCACHEABLE); 1902292SN/A 1912292SN/A // update senderState->tlbEntry, so we can insert 1922292SN/A // the correct TLBEentry in the TLBs above. 1931060SN/A sender_state->tlbEntry = 1941060SN/A new TheISA::GpuTlbEntry(0, first_entry_vaddr, first_entry_paddr, 1951060SN/A valid); 1962680Sktlim@umich.edu 1972292SN/A // update the hitLevel for all uncoalesced reqs 1981060SN/A // so that each packet knows where it hit 1991060SN/A // (used for statistics in the CUs) 2002132SN/A sender_state->hitLevel = first_hit_level; 2011060SN/A } 2022292SN/A 2032669Sktlim@umich.edu SlavePort *return_port = sender_state->ports.back(); 2042669Sktlim@umich.edu sender_state->ports.pop_back(); 2052669Sktlim@umich.edu 2062669Sktlim@umich.edu // Translation is done - Convert to a response pkt if necessary and 2072669Sktlim@umich.edu // send the translation back 2082292SN/A if (local_pkt->isRequest()) { 2091060SN/A local_pkt->makeTimingResponse(); 2101060SN/A } 2111060SN/A 2121060SN/A return_port->sendTimingResp(local_pkt); 2131060SN/A } 2141060SN/A 2151060SN/A // schedule clean up for end of this cycle 2161060SN/A // This is a maximum priority event and must be on 2171060SN/A // the same cycle as GPUTLB cleanup event to prevent 2181060SN/A // race conditions with an IssueProbeEvent caused by 2191060SN/A // MemSidePort::recvReqRetry 2201060SN/A cleanupQueue.push(virt_page_addr); 2211060SN/A 2221060SN/A if (!cleanupEvent.scheduled()) 2231060SN/A schedule(cleanupEvent, curTick()); 2241060SN/A} 2251060SN/A 2261060SN/A// Receive translation requests, create a coalesced request, 2271060SN/A// and send them to the TLB (TLBProbesPerCycle) 2281060SN/Abool 2291060SN/ATLBCoalescer::CpuSidePort::recvTimingReq(PacketPtr pkt) 2301464SN/A{ 2311464SN/A // first packet of a coalesced request 2321464SN/A PacketPtr first_packet = nullptr; 2331464SN/A // true if we are able to do coalescing 2341464SN/A bool didCoalesce = false; 2351060SN/A // number of coalesced reqs for a given window 2361464SN/A int coalescedReq_cnt = 0; 2371464SN/A 2381464SN/A TheISA::GpuTLB::TranslationState *sender_state = 2391464SN/A safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState); 2401060SN/A 2411060SN/A // push back the port to remember the path back 2421060SN/A sender_state->ports.push_back(this); 2431060SN/A 2441060SN/A bool update_stats = !sender_state->prefetch; 2451060SN/A 2461060SN/A if (update_stats) { 2471060SN/A // if reqCnt is empty then this packet does not represent 2481060SN/A // multiple uncoalesced reqs(pkts) but just a single pkt. 2491060SN/A // If it does though then the reqCnt for each level in the 2501060SN/A // hierarchy accumulates the total number of reqs this packet 2511060SN/A // represents 2521060SN/A int req_cnt = 1; 2531060SN/A 2541060SN/A if (!sender_state->reqCnt.empty()) 2551060SN/A req_cnt = sender_state->reqCnt.back(); 2562292SN/A 2572292SN/A sender_state->reqCnt.push_back(req_cnt); 2582292SN/A 2592292SN/A // update statistics 2602292SN/A coalescer->uncoalescedAccesses++; 2612292SN/A req_cnt = sender_state->reqCnt.back(); 2621060SN/A DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt); 2631060SN/A coalescer->queuingCycles -= (curTick() * req_cnt); 2641060SN/A coalescer->localqueuingCycles -= curTick(); 2651060SN/A } 2662292SN/A 2672292SN/A // FIXME if you want to coalesce not based on the issueTime 2682292SN/A // of the packets (i.e., from the compute unit's perspective) 2692292SN/A // but based on when they reached this coalescer then 2702292SN/A // remove the following if statement and use curTick() or 2712292SN/A // coalescingWindow for the tick_index. 2722292SN/A if (!sender_state->issueTime) 2732292SN/A sender_state->issueTime = curTick(); 2741060SN/A 2751060SN/A // The tick index is used as a key to the coalescerFIFO hashmap. 2762292SN/A // It is shared by all candidates that fall within the 2772292SN/A // given coalescingWindow. 2782292SN/A int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow; 2792107SN/A 2801060SN/A if (coalescer->coalescerFIFO.count(tick_index)) { 2811060SN/A coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size(); 2821060SN/A } 2831060SN/A 2841464SN/A // see if we can coalesce the incoming pkt with another 2851684SN/A // coalesced request with the same tick_index 2861464SN/A for (int i = 0; i < coalescedReq_cnt; ++i) { 2871060SN/A first_packet = coalescer->coalescerFIFO[tick_index][i][0]; 2881464SN/A 2892292SN/A if (coalescer->canCoalesce(pkt, first_packet)) { 2902292SN/A coalescer->coalescerFIFO[tick_index][i].push_back(pkt); 2912292SN/A 2922292SN/A DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n", 2932292SN/A i, tick_index, 2942292SN/A coalescer->coalescerFIFO[tick_index][i].size()); 2952292SN/A 2962292SN/A didCoalesce = true; 2972669Sktlim@umich.edu break; 2982669Sktlim@umich.edu } 2992669Sktlim@umich.edu } 3002669Sktlim@umich.edu 3012669Sktlim@umich.edu // if this is the first request for this tick_index 3022669Sktlim@umich.edu // or we did not manage to coalesce, update stats 3031060SN/A // and make necessary allocations. 3041060SN/A if (!coalescedReq_cnt || !didCoalesce) { 3051060SN/A if (update_stats) 3061060SN/A coalescer->coalescedAccesses++; 3071060SN/A 3081060SN/A std::vector<PacketPtr> new_array; 3091060SN/A new_array.push_back(pkt); 3101060SN/A coalescer->coalescerFIFO[tick_index].push_back(new_array); 3112132SN/A 3121060SN/A DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after " 3131060SN/A "push\n", tick_index, 3141060SN/A coalescer->coalescerFIFO[tick_index].size()); 3151060SN/A } 3162292SN/A 3171060SN/A //schedule probeTLBEvent next cycle to send the 3181060SN/A //coalesced requests to the TLB 3191060SN/A if (!coalescer->probeTLBEvent.scheduled()) { 3201684SN/A coalescer->schedule(coalescer->probeTLBEvent, 3211684SN/A curTick() + coalescer->ticks(1)); 3221684SN/A } 3231060SN/A 3241060SN/A return true; 3251060SN/A} 3261060SN/A 3271060SN/Avoid 3281060SN/ATLBCoalescer::CpuSidePort::recvReqRetry() 3291060SN/A{ 3301060SN/A assert(false); 3311060SN/A} 3322292SN/A 3331060SN/Avoid 3341060SN/ATLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt) 3352292SN/A{ 3361060SN/A 3371060SN/A TheISA::GpuTLB::TranslationState *sender_state = 3381060SN/A safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState); 3391060SN/A 3401060SN/A bool update_stats = !sender_state->prefetch; 3411060SN/A 3421060SN/A if (update_stats) 3431060SN/A coalescer->uncoalescedAccesses++; 3442336SN/A 3452336SN/A // If there is a pending timing request for this virtual address 3461060SN/A // print a warning message. This is a temporary caveat of 3471060SN/A // the current simulator where atomic and timing requests can 3481060SN/A // coexist. FIXME remove this check/warning in the future. 3491060SN/A Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes); 3501060SN/A int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr); 3511060SN/A 3521060SN/A if (map_count) { 3531060SN/A DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing " 3541060SN/A "req. pending\n", virt_page_addr); 3551060SN/A } 3561060SN/A 3571060SN/A coalescer->memSidePort[0]->sendFunctional(pkt); 3581060SN/A} 3591060SN/A 3602292SN/AAddrRangeList 3612292SN/ATLBCoalescer::CpuSidePort::getAddrRanges() const 3622292SN/A{ 3632292SN/A // currently not checked by the master 3641060SN/A AddrRangeList ranges; 3651060SN/A 3661060SN/A return ranges; 3672292SN/A} 3682336SN/A 3692308SN/Abool 3702292SN/ATLBCoalescer::MemSidePort::recvTimingResp(PacketPtr pkt) 3712292SN/A{ 3722292SN/A // a translation completed and returned 3732292SN/A coalescer->updatePhysAddresses(pkt); 3742292SN/A 3752292SN/A return true; 3762292SN/A} 3772292SN/A 3782292SN/Avoid 3792292SN/ATLBCoalescer::MemSidePort::recvReqRetry() 3802292SN/A{ 3812292SN/A //we've receeived a retry. Schedule a probeTLBEvent 3822292SN/A if (!coalescer->probeTLBEvent.scheduled()) 3832292SN/A coalescer->schedule(coalescer->probeTLBEvent, 3842292SN/A curTick() + coalescer->ticks(1)); 3852292SN/A} 3862292SN/A 3872292SN/Avoid 3882292SN/ATLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt) 3892292SN/A{ 3902292SN/A fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n"); 3912292SN/A} 3922292SN/A 3932292SN/ATLBCoalescer::IssueProbeEvent::IssueProbeEvent(TLBCoalescer * _coalescer) 3942292SN/A : Event(CPU_Tick_Pri), coalescer(_coalescer) 3952292SN/A{ 3962292SN/A} 3972292SN/A 3982292SN/Aconst char* 3992292SN/ATLBCoalescer::IssueProbeEvent::description() const 4002292SN/A{ 4012292SN/A return "Probe the TLB below"; 4022292SN/A} 4032292SN/A 4042292SN/A/* 4052292SN/A * Here we scan the coalescer FIFO and issue the max 4062292SN/A * number of permitted probes to the TLB below. We 4071060SN/A * permit bypassing of coalesced requests for the same 4081464SN/A * tick_index. 4091464SN/A * 4101464SN/A * We do not access the next tick_index unless we've 4111464SN/A * drained the previous one. The coalesced requests 4121464SN/A * that are successfully sent are moved to the 4131464SN/A * issuedTranslationsTable table (the table which keeps 4142292SN/A * track of the outstanding reqs) 4152292SN/A */ 4161684SN/Avoid 4172292SN/ATLBCoalescer::IssueProbeEvent::process() 4181060SN/A{ 4191060SN/A // number of TLB probes sent so far 4201060SN/A int sent_probes = 0; 4211060SN/A // rejected denotes a blocking event 4221060SN/A bool rejected = false; 4231060SN/A 4241060SN/A // It is set to true either when the recvTiming of the TLB below 4251060SN/A // returns false or when there is another outstanding request for the 4262292SN/A // same virt. page. 4271060SN/A 4281060SN/A DPRINTF(GPUTLB, "triggered TLBCoalescer IssueProbeEvent\n"); 4292292SN/A 4301060SN/A for (auto iter = coalescer->coalescerFIFO.begin(); 4311684SN/A iter != coalescer->coalescerFIFO.end() && !rejected; ) { 4321464SN/A int coalescedReq_cnt = iter->second.size(); 4331684SN/A int i = 0; 4341684SN/A int vector_index = 0; 4351464SN/A 4361684SN/A DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n", 4371684SN/A coalescedReq_cnt, iter->first); 4381464SN/A 4391060SN/A while (i < coalescedReq_cnt) { 4402308SN/A ++i; 4411060SN/A PacketPtr first_packet = iter->second[vector_index][0]; 4422308SN/A 4431060SN/A // compute virtual page address for this request 4441060SN/A Addr virt_page_addr = roundDown(first_packet->req->getVaddr(), 4452308SN/A TheISA::PageBytes); 4461060SN/A 4472308SN/A // is there another outstanding request for the same page addr? 4482308SN/A int pending_reqs = 4491060SN/A coalescer->issuedTranslationsTable.count(virt_page_addr); 4502308SN/A 4512308SN/A if (pending_reqs) { 4522308SN/A DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for " 4532308SN/A "page %#x\n", virt_page_addr); 4541060SN/A 4552308SN/A ++vector_index; 4562308SN/A rejected = true; 4572308SN/A 4581060SN/A continue; 4591060SN/A } 4602190SN/A 4612292SN/A // send the coalesced request for virt_page_addr 4622190SN/A if (!coalescer->memSidePort[0]->sendTimingReq(first_packet)) { 4632331SN/A DPRINTF(GPUTLB, "Failed to send TLB request for page %#x", 4642292SN/A virt_page_addr); 4652190SN/A 4661684SN/A // No need for a retries queue since we are already buffering 4671464SN/A // the coalesced request in coalescerFIFO. 4681464SN/A rejected = true; 4691464SN/A ++vector_index; 4701464SN/A } else { 4711464SN/A TheISA::GpuTLB::TranslationState *tmp_sender_state = 4721684SN/A safe_cast<TheISA::GpuTLB::TranslationState*> 4731464SN/A (first_packet->senderState); 4741464SN/A 4752292SN/A bool update_stats = !tmp_sender_state->prefetch; 4761464SN/A 4771464SN/A if (update_stats) { 4782308SN/A // req_cnt is total number of packets represented 4792308SN/A // by the one we just sent counting all the way from 4802308SN/A // the top of TLB hiearchy (i.e., from the CU) 4812308SN/A int req_cnt = tmp_sender_state->reqCnt.back(); 4821060SN/A coalescer->queuingCycles += (curTick() * req_cnt); 4831060SN/A 4841060SN/A DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n", 4851060SN/A coalescer->name(), req_cnt); 4861060SN/A 4871060SN/A // pkt_cnt is number of packets we coalesced into the one 4881060SN/A // we just sent but only at this coalescer level 4891060SN/A int pkt_cnt = iter->second[vector_index].size(); 4901060SN/A coalescer->localqueuingCycles += (curTick() * pkt_cnt); 4911060SN/A } 4921464SN/A 4931060SN/A DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x", 4941060SN/A virt_page_addr); 4951060SN/A 4961060SN/A //copy coalescedReq to issuedTranslationsTable 4971060SN/A coalescer->issuedTranslationsTable[virt_page_addr] 4981464SN/A = iter->second[vector_index]; 4991060SN/A 5001060SN/A //erase the entry of this coalesced req 5011060SN/A iter->second.erase(iter->second.begin() + vector_index); 5021060SN/A 5031061SN/A if (iter->second.empty()) 5041061SN/A assert(i == coalescedReq_cnt); 5051061SN/A 5061060SN/A sent_probes++; 5071060SN/A if (sent_probes == coalescer->TLBProbesPerCycle) 5081060SN/A return; 5092292SN/A } 5102292SN/A } 5112292SN/A 5122292SN/A //if there are no more coalesced reqs for this tick_index 5132292SN/A //erase the hash_map with the first iterator 5142292SN/A if (iter->second.empty()) { 5151060SN/A coalescer->coalescerFIFO.erase(iter++); 5161060SN/A } else { 5171060SN/A ++iter; 5181060SN/A } 5191060SN/A } 5201060SN/A} 5212292SN/A 5222292SN/ATLBCoalescer::CleanupEvent::CleanupEvent(TLBCoalescer* _coalescer) 5232292SN/A : Event(Maximum_Pri), coalescer(_coalescer) 5242292SN/A{ 5252292SN/A} 5262292SN/A 5272292SN/Aconst char* 5282292SN/ATLBCoalescer::CleanupEvent::description() const 5291060SN/A{ 5302292SN/A return "Cleanup issuedTranslationsTable hashmap"; 5311060SN/A} 5321060SN/A 5331464SN/Avoid 5341060SN/ATLBCoalescer::CleanupEvent::process() 5352292SN/A{ 5362292SN/A while (!coalescer->cleanupQueue.empty()) { 5372292SN/A Addr cleanup_addr = coalescer->cleanupQueue.front(); 5382292SN/A coalescer->cleanupQueue.pop(); 5392292SN/A coalescer->issuedTranslationsTable.erase(cleanup_addr); 5402292SN/A 5412292SN/A DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n", 5422292SN/A cleanup_addr); 5432292SN/A } 5442292SN/A} 5452292SN/A 5462292SN/Avoid 5472292SN/ATLBCoalescer::regStats() 5482292SN/A{ 5492292SN/A uncoalescedAccesses 5502292SN/A .name(name() + ".uncoalesced_accesses") 5512292SN/A .desc("Number of uncoalesced TLB accesses") 5522292SN/A ; 5532292SN/A 5542292SN/A coalescedAccesses 5552292SN/A .name(name() + ".coalesced_accesses") 5562292SN/A .desc("Number of coalesced TLB accesses") 5572292SN/A ; 5582292SN/A 5592292SN/A queuingCycles 5602292SN/A .name(name() + ".queuing_cycles") 5612292SN/A .desc("Number of cycles spent in queue") 5622292SN/A ; 5632292SN/A 5642292SN/A localqueuingCycles 5652292SN/A .name(name() + ".local_queuing_cycles") 5662292SN/A .desc("Number of cycles spent in queue for all incoming reqs") 5672292SN/A ; 5682292SN/A 5692292SN/A localLatency 5702292SN/A .name(name() + ".local_latency") 5712292SN/A .desc("Avg. latency over all incoming pkts") 5722292SN/A ; 5732292SN/A 5741060SN/A localLatency = localqueuingCycles / uncoalescedAccesses; 5751464SN/A} 5761060SN/A 5771060SN/A 5782308SN/ATLBCoalescer* 5792308SN/ATLBCoalescerParams::create() 5802308SN/A{ 5812308SN/A return new TLBCoalescer(this); 5822308SN/A} 5832190SN/A 5842292SN/A