Cross Reference: /gem5/src/gpu-compute/tlb

Deleted Added

sdiff udiff text old ( 12717:2e2c211644d2 ) new ( 13449:2f7efa89c58b )

full compact

tlb_coalescer.cc (12717:2e2c211644d2)	tlb_coalescer.cc (13449:2f7efa89c58b)
1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its 18 * contributors may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: Lisa Hsu 34 */ 35 36#include "gpu-compute/tlb_coalescer.hh" 37 38#include <cstring> 39	1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its 18 * contributors may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: Lisa Hsu 34 */ 35 36#include "gpu-compute/tlb_coalescer.hh" 37 38#include <cstring> 39
	40#include "base/logging.hh"
40#include "debug/GPUTLB.hh" 41#include "sim/process.hh" 42 43TLBCoalescer::TLBCoalescer(const Params p) 44 : MemObject(p), 45 clock(p->clk_domain->clockPeriod()), 46 TLBProbesPerCycle(p->probesPerCycle), 47 coalescingWindow(p->coalescingWindow), 48 disableCoalescing(p->disableCoalescing), 49 probeTLBEvent([this]{ processProbeTLBEvent(); }, 50 "Probe the TLB below", 51 false, Event::CPU_Tick_Pri), 52 cleanupEvent([this]{ processCleanupEvent(); }, 53 "Cleanup issuedTranslationsTable hashmap", 54 false, Event::Maximum_Pri) 55{ 56 // create the slave ports based on the number of connected ports 57 for (size_t i = 0; i < p->port_slave_connection_count; ++i) { 58 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i), 59 this, i)); 60 } 61 62 // create the master ports based on the number of connected ports 63 for (size_t i = 0; i < p->port_master_connection_count; ++i) { 64 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i), 65 this, i)); 66 } 67} 68 69BaseSlavePort& 70TLBCoalescer::getSlavePort(const std::string &if_name, PortID idx) 71{ 72 if (if_name == "slave") { 73 if (idx >= static_cast<PortID>(cpuSidePort.size())) { 74 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx); 75 } 76 77 return cpuSidePort[idx]; 78 } else { 79 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name); 80 } 81} 82 83BaseMasterPort& 84TLBCoalescer::getMasterPort(const std::string &if_name, PortID idx) 85{ 86 if (if_name == "master") { 87 if (idx >= static_cast<PortID>(memSidePort.size())) { 88 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx); 89 } 90 91 return memSidePort[idx]; 92 } else { 93 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name); 94 } 95} 96 97/ 98 * This method returns true if the <incoming_pkt> 99 * can be coalesced with <coalesced_pkt> and false otherwise. 100 * A given set of rules is checked. 101 * The rules can potentially be modified based on the TLB level. 102 / 103bool 104TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt) 105{ 106* if (disableCoalescing) 107 return false; 108 109 TheISA::GpuTLB::TranslationState incoming_state = 110* safe_cast<TheISA::GpuTLB::TranslationState>(incoming_pkt->senderState); 111* 112 TheISA::GpuTLB::TranslationState coalesced_state = 113* safe_cast<TheISA::GpuTLB::TranslationState>(coalesced_pkt->senderState); 114* 115 // Rule 1: Coalesce requests only if they 116 // fall within the same virtual page 117 Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(), 118 TheISA::PageBytes); 119 120 Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(), 121 TheISA::PageBytes); 122 123 if (incoming_virt_page_addr != coalesced_virt_page_addr) 124 return false; 125 126 //* Rule 2: Coalesce requests only if they 127 // share a TLB Mode, i.e. they are both read 128 // or write requests. 129 BaseTLB::Mode incoming_mode = incoming_state->tlbMode; 130 BaseTLB::Mode coalesced_mode = coalesced_state->tlbMode; 131 132 if (incoming_mode != coalesced_mode) 133 return false; 134 135 // when we can coalesce a packet update the reqCnt 136 // that is the number of packets represented by 137 // this coalesced packet 138 if (!incoming_state->prefetch) 139 coalesced_state->reqCnt.back() += incoming_state->reqCnt.back(); 140 141 return true; 142} 143 144/* 145 * We need to update the physical addresses of all the translation requests 146 * that were coalesced into the one that just returned. 147 / 148void 149TLBCoalescer::updatePhysAddresses(PacketPtr pkt) 150{ 151* Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes); 152 153 DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n", 154 issuedTranslationsTable[virt_page_addr].size(), virt_page_addr); 155 156 TheISA::GpuTLB::TranslationState sender_state = 157* safe_cast<TheISA::GpuTLB::TranslationState>(pkt->senderState); 158* 159 TheISA::TlbEntry tlb_entry = sender_state->tlbEntry; 160* assert(tlb_entry); 161 Addr first_entry_vaddr = tlb_entry->vaddr; 162 Addr first_entry_paddr = tlb_entry->paddr; 163 int page_size = tlb_entry->size(); 164 bool uncacheable = tlb_entry->uncacheable; 165 int first_hit_level = sender_state->hitLevel; 166 167 // Get the physical page address of the translated request 168 // Using the page_size specified in the TLBEntry allows us 169 // to support different page sizes. 170 Addr phys_page_paddr = pkt->req->getPaddr(); 171 phys_page_paddr &= ~(page_size - 1); 172 173 for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) { 174 PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i]; 175 TheISA::GpuTLB::TranslationState sender_state = 176* safe_cast<TheISA::GpuTLB::TranslationState>( 177* local_pkt->senderState); 178 179 // we are sending the packet back, so pop the reqCnt associated 180 // with this level in the TLB hiearchy 181 if (!sender_state->prefetch) 182 sender_state->reqCnt.pop_back(); 183 184 /* 185 * Only the first packet from this coalesced request has been 186 * translated. Grab the translated phys. page addr and update the 187 * physical addresses of the remaining packets with the appropriate 188 * page offsets. 189 / 190* if (i) { 191 Addr paddr = phys_page_paddr; 192 paddr \|= (local_pkt->req->getVaddr() & (page_size - 1)); 193 local_pkt->req->setPaddr(paddr); 194 195 if (uncacheable) 196 local_pkt->req->setFlags(Request::UNCACHEABLE); 197 198 // update senderState->tlbEntry, so we can insert 199 // the correct TLBEentry in the TLBs above. 200 auto p = sender_state->tc->getProcessPtr(); 201 sender_state->tlbEntry = 202 new TheISA::TlbEntry(p->pid(), first_entry_vaddr, 203 first_entry_paddr, false, false); 204 205 // update the hitLevel for all uncoalesced reqs 206 // so that each packet knows where it hit 207 // (used for statistics in the CUs) 208 sender_state->hitLevel = first_hit_level; 209 } 210 211 SlavePort return_port = sender_state->ports.back(); 212* sender_state->ports.pop_back(); 213 214 // Translation is done - Convert to a response pkt if necessary and 215 // send the translation back 216 if (local_pkt->isRequest()) { 217 local_pkt->makeTimingResponse(); 218 } 219 220 return_port->sendTimingResp(local_pkt); 221 } 222 223 // schedule clean up for end of this cycle 224 // This is a maximum priority event and must be on 225 // the same cycle as GPUTLB cleanup event to prevent 226 // race conditions with an IssueProbeEvent caused by 227 // MemSidePort::recvReqRetry 228 cleanupQueue.push(virt_page_addr); 229 230 if (!cleanupEvent.scheduled()) 231 schedule(cleanupEvent, curTick()); 232} 233 234// Receive translation requests, create a coalesced request, 235// and send them to the TLB (TLBProbesPerCycle) 236bool 237TLBCoalescer::CpuSidePort::recvTimingReq(PacketPtr pkt) 238{ 239 // first packet of a coalesced request 240 PacketPtr first_packet = nullptr; 241 // true if we are able to do coalescing 242 bool didCoalesce = false; 243 // number of coalesced reqs for a given window 244 int coalescedReq_cnt = 0; 245 246 TheISA::GpuTLB::TranslationState sender_state = 247* safe_cast<TheISA::GpuTLB::TranslationState>(pkt->senderState); 248* 249 // push back the port to remember the path back 250 sender_state->ports.push_back(this); 251 252 bool update_stats = !sender_state->prefetch; 253 254 if (update_stats) { 255 // if reqCnt is empty then this packet does not represent 256 // multiple uncoalesced reqs(pkts) but just a single pkt. 257 // If it does though then the reqCnt for each level in the 258 // hierarchy accumulates the total number of reqs this packet 259 // represents 260 int req_cnt = 1; 261 262 if (!sender_state->reqCnt.empty()) 263 req_cnt = sender_state->reqCnt.back(); 264 265 sender_state->reqCnt.push_back(req_cnt); 266 267 // update statistics 268 coalescer->uncoalescedAccesses++; 269 req_cnt = sender_state->reqCnt.back(); 270 DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt); 271 coalescer->queuingCycles -= (curTick() * req_cnt); 272 coalescer->localqueuingCycles -= curTick(); 273 } 274 275 // FIXME if you want to coalesce not based on the issueTime 276 // of the packets (i.e., from the compute unit's perspective) 277 // but based on when they reached this coalescer then 278 // remove the following if statement and use curTick() or 279 // coalescingWindow for the tick_index. 280 if (!sender_state->issueTime) 281 sender_state->issueTime = curTick(); 282 283 // The tick index is used as a key to the coalescerFIFO hashmap. 284 // It is shared by all candidates that fall within the 285 // given coalescingWindow. 286 int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow; 287 288 if (coalescer->coalescerFIFO.count(tick_index)) { 289 coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size(); 290 } 291 292 // see if we can coalesce the incoming pkt with another 293 // coalesced request with the same tick_index 294 for (int i = 0; i < coalescedReq_cnt; ++i) { 295 first_packet = coalescer->coalescerFIFO[tick_index][i][0]; 296 297 if (coalescer->canCoalesce(pkt, first_packet)) { 298 coalescer->coalescerFIFO[tick_index][i].push_back(pkt); 299 300 DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n", 301 i, tick_index, 302 coalescer->coalescerFIFO[tick_index][i].size()); 303 304 didCoalesce = true; 305 break; 306 } 307 } 308 309 // if this is the first request for this tick_index 310 // or we did not manage to coalesce, update stats 311 // and make necessary allocations. 312 if (!coalescedReq_cnt \|\| !didCoalesce) { 313 if (update_stats) 314 coalescer->coalescedAccesses++; 315 316 std::vector<PacketPtr> new_array; 317 new_array.push_back(pkt); 318 coalescer->coalescerFIFO[tick_index].push_back(new_array); 319 320 DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after " 321 "push\n", tick_index, 322 coalescer->coalescerFIFO[tick_index].size()); 323 } 324 325 //schedule probeTLBEvent next cycle to send the 326 //coalesced requests to the TLB 327 if (!coalescer->probeTLBEvent.scheduled()) { 328 coalescer->schedule(coalescer->probeTLBEvent, 329 curTick() + coalescer->ticks(1)); 330 } 331 332 return true; 333} 334 335void 336TLBCoalescer::CpuSidePort::recvReqRetry() 337{	41#include "debug/GPUTLB.hh" 42#include "sim/process.hh" 43 44TLBCoalescer::TLBCoalescer(const Params p) 45 : MemObject(p), 46 clock(p->clk_domain->clockPeriod()), 47 TLBProbesPerCycle(p->probesPerCycle), 48 coalescingWindow(p->coalescingWindow), 49 disableCoalescing(p->disableCoalescing), 50 probeTLBEvent([this]{ processProbeTLBEvent(); }, 51 "Probe the TLB below", 52 false, Event::CPU_Tick_Pri), 53 cleanupEvent([this]{ processCleanupEvent(); }, 54 "Cleanup issuedTranslationsTable hashmap", 55 false, Event::Maximum_Pri) 56{ 57 // create the slave ports based on the number of connected ports 58 for (size_t i = 0; i < p->port_slave_connection_count; ++i) { 59 cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i), 60 this, i)); 61 } 62 63 // create the master ports based on the number of connected ports 64 for (size_t i = 0; i < p->port_master_connection_count; ++i) { 65 memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i), 66 this, i)); 67 } 68} 69 70BaseSlavePort& 71TLBCoalescer::getSlavePort(const std::string &if_name, PortID idx) 72{ 73 if (if_name == "slave") { 74 if (idx >= static_cast<PortID>(cpuSidePort.size())) { 75 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx); 76 } 77 78 return cpuSidePort[idx]; 79 } else { 80 panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name); 81 } 82} 83 84BaseMasterPort& 85TLBCoalescer::getMasterPort(const std::string &if_name, PortID idx) 86{ 87 if (if_name == "master") { 88 if (idx >= static_cast<PortID>(memSidePort.size())) { 89 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx); 90 } 91 92 return memSidePort[idx]; 93 } else { 94 panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name); 95 } 96} 97 98/ 99 * This method returns true if the <incoming_pkt> 100 * can be coalesced with <coalesced_pkt> and false otherwise. 101 * A given set of rules is checked. 102 * The rules can potentially be modified based on the TLB level. 103 / 104bool 105TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt) 106{ 107* if (disableCoalescing) 108 return false; 109 110 TheISA::GpuTLB::TranslationState incoming_state = 111* safe_cast<TheISA::GpuTLB::TranslationState>(incoming_pkt->senderState); 112* 113 TheISA::GpuTLB::TranslationState coalesced_state = 114* safe_cast<TheISA::GpuTLB::TranslationState>(coalesced_pkt->senderState); 115* 116 // Rule 1: Coalesce requests only if they 117 // fall within the same virtual page 118 Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(), 119 TheISA::PageBytes); 120 121 Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(), 122 TheISA::PageBytes); 123 124 if (incoming_virt_page_addr != coalesced_virt_page_addr) 125 return false; 126 127 //* Rule 2: Coalesce requests only if they 128 // share a TLB Mode, i.e. they are both read 129 // or write requests. 130 BaseTLB::Mode incoming_mode = incoming_state->tlbMode; 131 BaseTLB::Mode coalesced_mode = coalesced_state->tlbMode; 132 133 if (incoming_mode != coalesced_mode) 134 return false; 135 136 // when we can coalesce a packet update the reqCnt 137 // that is the number of packets represented by 138 // this coalesced packet 139 if (!incoming_state->prefetch) 140 coalesced_state->reqCnt.back() += incoming_state->reqCnt.back(); 141 142 return true; 143} 144 145/* 146 * We need to update the physical addresses of all the translation requests 147 * that were coalesced into the one that just returned. 148 / 149void 150TLBCoalescer::updatePhysAddresses(PacketPtr pkt) 151{ 152* Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes); 153 154 DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n", 155 issuedTranslationsTable[virt_page_addr].size(), virt_page_addr); 156 157 TheISA::GpuTLB::TranslationState sender_state = 158* safe_cast<TheISA::GpuTLB::TranslationState>(pkt->senderState); 159* 160 TheISA::TlbEntry tlb_entry = sender_state->tlbEntry; 161* assert(tlb_entry); 162 Addr first_entry_vaddr = tlb_entry->vaddr; 163 Addr first_entry_paddr = tlb_entry->paddr; 164 int page_size = tlb_entry->size(); 165 bool uncacheable = tlb_entry->uncacheable; 166 int first_hit_level = sender_state->hitLevel; 167 168 // Get the physical page address of the translated request 169 // Using the page_size specified in the TLBEntry allows us 170 // to support different page sizes. 171 Addr phys_page_paddr = pkt->req->getPaddr(); 172 phys_page_paddr &= ~(page_size - 1); 173 174 for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) { 175 PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i]; 176 TheISA::GpuTLB::TranslationState sender_state = 177* safe_cast<TheISA::GpuTLB::TranslationState>( 178* local_pkt->senderState); 179 180 // we are sending the packet back, so pop the reqCnt associated 181 // with this level in the TLB hiearchy 182 if (!sender_state->prefetch) 183 sender_state->reqCnt.pop_back(); 184 185 /* 186 * Only the first packet from this coalesced request has been 187 * translated. Grab the translated phys. page addr and update the 188 * physical addresses of the remaining packets with the appropriate 189 * page offsets. 190 / 191* if (i) { 192 Addr paddr = phys_page_paddr; 193 paddr \|= (local_pkt->req->getVaddr() & (page_size - 1)); 194 local_pkt->req->setPaddr(paddr); 195 196 if (uncacheable) 197 local_pkt->req->setFlags(Request::UNCACHEABLE); 198 199 // update senderState->tlbEntry, so we can insert 200 // the correct TLBEentry in the TLBs above. 201 auto p = sender_state->tc->getProcessPtr(); 202 sender_state->tlbEntry = 203 new TheISA::TlbEntry(p->pid(), first_entry_vaddr, 204 first_entry_paddr, false, false); 205 206 // update the hitLevel for all uncoalesced reqs 207 // so that each packet knows where it hit 208 // (used for statistics in the CUs) 209 sender_state->hitLevel = first_hit_level; 210 } 211 212 SlavePort return_port = sender_state->ports.back(); 213* sender_state->ports.pop_back(); 214 215 // Translation is done - Convert to a response pkt if necessary and 216 // send the translation back 217 if (local_pkt->isRequest()) { 218 local_pkt->makeTimingResponse(); 219 } 220 221 return_port->sendTimingResp(local_pkt); 222 } 223 224 // schedule clean up for end of this cycle 225 // This is a maximum priority event and must be on 226 // the same cycle as GPUTLB cleanup event to prevent 227 // race conditions with an IssueProbeEvent caused by 228 // MemSidePort::recvReqRetry 229 cleanupQueue.push(virt_page_addr); 230 231 if (!cleanupEvent.scheduled()) 232 schedule(cleanupEvent, curTick()); 233} 234 235// Receive translation requests, create a coalesced request, 236// and send them to the TLB (TLBProbesPerCycle) 237bool 238TLBCoalescer::CpuSidePort::recvTimingReq(PacketPtr pkt) 239{ 240 // first packet of a coalesced request 241 PacketPtr first_packet = nullptr; 242 // true if we are able to do coalescing 243 bool didCoalesce = false; 244 // number of coalesced reqs for a given window 245 int coalescedReq_cnt = 0; 246 247 TheISA::GpuTLB::TranslationState sender_state = 248* safe_cast<TheISA::GpuTLB::TranslationState>(pkt->senderState); 249* 250 // push back the port to remember the path back 251 sender_state->ports.push_back(this); 252 253 bool update_stats = !sender_state->prefetch; 254 255 if (update_stats) { 256 // if reqCnt is empty then this packet does not represent 257 // multiple uncoalesced reqs(pkts) but just a single pkt. 258 // If it does though then the reqCnt for each level in the 259 // hierarchy accumulates the total number of reqs this packet 260 // represents 261 int req_cnt = 1; 262 263 if (!sender_state->reqCnt.empty()) 264 req_cnt = sender_state->reqCnt.back(); 265 266 sender_state->reqCnt.push_back(req_cnt); 267 268 // update statistics 269 coalescer->uncoalescedAccesses++; 270 req_cnt = sender_state->reqCnt.back(); 271 DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt); 272 coalescer->queuingCycles -= (curTick() * req_cnt); 273 coalescer->localqueuingCycles -= curTick(); 274 } 275 276 // FIXME if you want to coalesce not based on the issueTime 277 // of the packets (i.e., from the compute unit's perspective) 278 // but based on when they reached this coalescer then 279 // remove the following if statement and use curTick() or 280 // coalescingWindow for the tick_index. 281 if (!sender_state->issueTime) 282 sender_state->issueTime = curTick(); 283 284 // The tick index is used as a key to the coalescerFIFO hashmap. 285 // It is shared by all candidates that fall within the 286 // given coalescingWindow. 287 int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow; 288 289 if (coalescer->coalescerFIFO.count(tick_index)) { 290 coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size(); 291 } 292 293 // see if we can coalesce the incoming pkt with another 294 // coalesced request with the same tick_index 295 for (int i = 0; i < coalescedReq_cnt; ++i) { 296 first_packet = coalescer->coalescerFIFO[tick_index][i][0]; 297 298 if (coalescer->canCoalesce(pkt, first_packet)) { 299 coalescer->coalescerFIFO[tick_index][i].push_back(pkt); 300 301 DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n", 302 i, tick_index, 303 coalescer->coalescerFIFO[tick_index][i].size()); 304 305 didCoalesce = true; 306 break; 307 } 308 } 309 310 // if this is the first request for this tick_index 311 // or we did not manage to coalesce, update stats 312 // and make necessary allocations. 313 if (!coalescedReq_cnt \|\| !didCoalesce) { 314 if (update_stats) 315 coalescer->coalescedAccesses++; 316 317 std::vector<PacketPtr> new_array; 318 new_array.push_back(pkt); 319 coalescer->coalescerFIFO[tick_index].push_back(new_array); 320 321 DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after " 322 "push\n", tick_index, 323 coalescer->coalescerFIFO[tick_index].size()); 324 } 325 326 //schedule probeTLBEvent next cycle to send the 327 //coalesced requests to the TLB 328 if (!coalescer->probeTLBEvent.scheduled()) { 329 coalescer->schedule(coalescer->probeTLBEvent, 330 curTick() + coalescer->ticks(1)); 331 } 332 333 return true; 334} 335 336void 337TLBCoalescer::CpuSidePort::recvReqRetry() 338{
338 assert(false);	339 panic("recvReqRetry called");
339} 340 341void 342TLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt) 343{ 344 345 TheISA::GpuTLB::TranslationState sender_state = 346* safe_cast<TheISA::GpuTLB::TranslationState>(pkt->senderState); 347* 348 bool update_stats = !sender_state->prefetch; 349 350 if (update_stats) 351 coalescer->uncoalescedAccesses++; 352 353 // If there is a pending timing request for this virtual address 354 // print a warning message. This is a temporary caveat of 355 // the current simulator where atomic and timing requests can 356 // coexist. FIXME remove this check/warning in the future. 357 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes); 358 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr); 359 360 if (map_count) { 361 DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing " 362 "req. pending\n", virt_page_addr); 363 } 364 365 coalescer->memSidePort[0]->sendFunctional(pkt); 366} 367 368AddrRangeList 369TLBCoalescer::CpuSidePort::getAddrRanges() const 370{ 371 // currently not checked by the master 372 AddrRangeList ranges; 373 374 return ranges; 375} 376 377bool 378TLBCoalescer::MemSidePort::recvTimingResp(PacketPtr pkt) 379{ 380 // a translation completed and returned 381 coalescer->updatePhysAddresses(pkt); 382 383 return true; 384} 385 386void 387TLBCoalescer::MemSidePort::recvReqRetry() 388{ 389 //we've receeived a retry. Schedule a probeTLBEvent 390 if (!coalescer->probeTLBEvent.scheduled()) 391 coalescer->schedule(coalescer->probeTLBEvent, 392 curTick() + coalescer->ticks(1)); 393} 394 395void 396TLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt) 397{ 398 fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n"); 399} 400 401/* 402 * Here we scan the coalescer FIFO and issue the max 403 * number of permitted probes to the TLB below. We 404 * permit bypassing of coalesced requests for the same 405 * tick_index. 406 * 407 * We do not access the next tick_index unless we've 408 * drained the previous one. The coalesced requests 409 * that are successfully sent are moved to the 410 * issuedTranslationsTable table (the table which keeps 411 * track of the outstanding reqs) 412 / 413void 414TLBCoalescer::processProbeTLBEvent() 415{ 416* // number of TLB probes sent so far 417 int sent_probes = 0; 418 // rejected denotes a blocking event 419 bool rejected = false; 420 421 // It is set to true either when the recvTiming of the TLB below 422 // returns false or when there is another outstanding request for the 423 // same virt. page. 424 425 DPRINTF(GPUTLB, "triggered TLBCoalescer %s\n", __func__); 426 427 for (auto iter = coalescerFIFO.begin(); 428 iter != coalescerFIFO.end() && !rejected; ) { 429 int coalescedReq_cnt = iter->second.size(); 430 int i = 0; 431 int vector_index = 0; 432 433 DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n", 434 coalescedReq_cnt, iter->first); 435 436 while (i < coalescedReq_cnt) { 437 ++i; 438 PacketPtr first_packet = iter->second[vector_index][0]; 439 440 // compute virtual page address for this request 441 Addr virt_page_addr = roundDown(first_packet->req->getVaddr(), 442 TheISA::PageBytes); 443 444 // is there another outstanding request for the same page addr? 445 int pending_reqs = 446 issuedTranslationsTable.count(virt_page_addr); 447 448 if (pending_reqs) { 449 DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for " 450 "page %#x\n", virt_page_addr); 451 452 ++vector_index; 453 rejected = true; 454 455 continue; 456 } 457 458 // send the coalesced request for virt_page_addr 459 if (!memSidePort[0]->sendTimingReq(first_packet)) { 460 DPRINTF(GPUTLB, "Failed to send TLB request for page %#x", 461 virt_page_addr); 462 463 // No need for a retries queue since we are already buffering 464 // the coalesced request in coalescerFIFO. 465 rejected = true; 466 ++vector_index; 467 } else { 468 TheISA::GpuTLB::TranslationState tmp_sender_state = 469* safe_cast<TheISA::GpuTLB::TranslationState> 470* (first_packet->senderState); 471 472 bool update_stats = !tmp_sender_state->prefetch; 473 474 if (update_stats) { 475 // req_cnt is total number of packets represented 476 // by the one we just sent counting all the way from 477 // the top of TLB hiearchy (i.e., from the CU) 478 int req_cnt = tmp_sender_state->reqCnt.back(); 479 queuingCycles += (curTick() * req_cnt); 480 481 DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n", 482 name(), req_cnt); 483 484 // pkt_cnt is number of packets we coalesced into the one 485 // we just sent but only at this coalescer level 486 int pkt_cnt = iter->second[vector_index].size(); 487 localqueuingCycles += (curTick() * pkt_cnt); 488 } 489 490 DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x", 491 virt_page_addr); 492 493 //copy coalescedReq to issuedTranslationsTable 494 issuedTranslationsTable[virt_page_addr] 495 = iter->second[vector_index]; 496 497 //erase the entry of this coalesced req 498 iter->second.erase(iter->second.begin() + vector_index); 499 500 if (iter->second.empty()) 501 assert(i == coalescedReq_cnt); 502 503 sent_probes++; 504 if (sent_probes == TLBProbesPerCycle) 505 return; 506 } 507 } 508 509 //if there are no more coalesced reqs for this tick_index 510 //erase the hash_map with the first iterator 511 if (iter->second.empty()) { 512 coalescerFIFO.erase(iter++); 513 } else { 514 ++iter; 515 } 516 } 517} 518 519void 520TLBCoalescer::processCleanupEvent() 521{ 522 while (!cleanupQueue.empty()) { 523 Addr cleanup_addr = cleanupQueue.front(); 524 cleanupQueue.pop(); 525 issuedTranslationsTable.erase(cleanup_addr); 526 527 DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n", 528 cleanup_addr); 529 } 530} 531 532void 533TLBCoalescer::regStats() 534{ 535 MemObject::regStats(); 536 537 uncoalescedAccesses 538 .name(name() + ".uncoalesced_accesses") 539 .desc("Number of uncoalesced TLB accesses") 540 ; 541 542 coalescedAccesses 543 .name(name() + ".coalesced_accesses") 544 .desc("Number of coalesced TLB accesses") 545 ; 546 547 queuingCycles 548 .name(name() + ".queuing_cycles") 549 .desc("Number of cycles spent in queue") 550 ; 551 552 localqueuingCycles 553 .name(name() + ".local_queuing_cycles") 554 .desc("Number of cycles spent in queue for all incoming reqs") 555 ; 556 557 localLatency 558 .name(name() + ".local_latency") 559 .desc("Avg. latency over all incoming pkts") 560 ; 561 562 localLatency = localqueuingCycles / uncoalescedAccesses; 563} 564 565 566TLBCoalescer* 567TLBCoalescerParams::create() 568{ 569 return new TLBCoalescer(this); 570} 571	340} 341 342void 343TLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt) 344{ 345 346 TheISA::GpuTLB::TranslationState sender_state = 347* safe_cast<TheISA::GpuTLB::TranslationState>(pkt->senderState); 348* 349 bool update_stats = !sender_state->prefetch; 350 351 if (update_stats) 352 coalescer->uncoalescedAccesses++; 353 354 // If there is a pending timing request for this virtual address 355 // print a warning message. This is a temporary caveat of 356 // the current simulator where atomic and timing requests can 357 // coexist. FIXME remove this check/warning in the future. 358 Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes); 359 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr); 360 361 if (map_count) { 362 DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing " 363 "req. pending\n", virt_page_addr); 364 } 365 366 coalescer->memSidePort[0]->sendFunctional(pkt); 367} 368 369AddrRangeList 370TLBCoalescer::CpuSidePort::getAddrRanges() const 371{ 372 // currently not checked by the master 373 AddrRangeList ranges; 374 375 return ranges; 376} 377 378bool 379TLBCoalescer::MemSidePort::recvTimingResp(PacketPtr pkt) 380{ 381 // a translation completed and returned 382 coalescer->updatePhysAddresses(pkt); 383 384 return true; 385} 386 387void 388TLBCoalescer::MemSidePort::recvReqRetry() 389{ 390 //we've receeived a retry. Schedule a probeTLBEvent 391 if (!coalescer->probeTLBEvent.scheduled()) 392 coalescer->schedule(coalescer->probeTLBEvent, 393 curTick() + coalescer->ticks(1)); 394} 395 396void 397TLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt) 398{ 399 fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n"); 400} 401 402/* 403 * Here we scan the coalescer FIFO and issue the max 404 * number of permitted probes to the TLB below. We 405 * permit bypassing of coalesced requests for the same 406 * tick_index. 407 * 408 * We do not access the next tick_index unless we've 409 * drained the previous one. The coalesced requests 410 * that are successfully sent are moved to the 411 * issuedTranslationsTable table (the table which keeps 412 * track of the outstanding reqs) 413 / 414void 415TLBCoalescer::processProbeTLBEvent() 416{ 417* // number of TLB probes sent so far 418 int sent_probes = 0; 419 // rejected denotes a blocking event 420 bool rejected = false; 421 422 // It is set to true either when the recvTiming of the TLB below 423 // returns false or when there is another outstanding request for the 424 // same virt. page. 425 426 DPRINTF(GPUTLB, "triggered TLBCoalescer %s\n", __func__); 427 428 for (auto iter = coalescerFIFO.begin(); 429 iter != coalescerFIFO.end() && !rejected; ) { 430 int coalescedReq_cnt = iter->second.size(); 431 int i = 0; 432 int vector_index = 0; 433 434 DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n", 435 coalescedReq_cnt, iter->first); 436 437 while (i < coalescedReq_cnt) { 438 ++i; 439 PacketPtr first_packet = iter->second[vector_index][0]; 440 441 // compute virtual page address for this request 442 Addr virt_page_addr = roundDown(first_packet->req->getVaddr(), 443 TheISA::PageBytes); 444 445 // is there another outstanding request for the same page addr? 446 int pending_reqs = 447 issuedTranslationsTable.count(virt_page_addr); 448 449 if (pending_reqs) { 450 DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for " 451 "page %#x\n", virt_page_addr); 452 453 ++vector_index; 454 rejected = true; 455 456 continue; 457 } 458 459 // send the coalesced request for virt_page_addr 460 if (!memSidePort[0]->sendTimingReq(first_packet)) { 461 DPRINTF(GPUTLB, "Failed to send TLB request for page %#x", 462 virt_page_addr); 463 464 // No need for a retries queue since we are already buffering 465 // the coalesced request in coalescerFIFO. 466 rejected = true; 467 ++vector_index; 468 } else { 469 TheISA::GpuTLB::TranslationState tmp_sender_state = 470* safe_cast<TheISA::GpuTLB::TranslationState> 471* (first_packet->senderState); 472 473 bool update_stats = !tmp_sender_state->prefetch; 474 475 if (update_stats) { 476 // req_cnt is total number of packets represented 477 // by the one we just sent counting all the way from 478 // the top of TLB hiearchy (i.e., from the CU) 479 int req_cnt = tmp_sender_state->reqCnt.back(); 480 queuingCycles += (curTick() * req_cnt); 481 482 DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n", 483 name(), req_cnt); 484 485 // pkt_cnt is number of packets we coalesced into the one 486 // we just sent but only at this coalescer level 487 int pkt_cnt = iter->second[vector_index].size(); 488 localqueuingCycles += (curTick() * pkt_cnt); 489 } 490 491 DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x", 492 virt_page_addr); 493 494 //copy coalescedReq to issuedTranslationsTable 495 issuedTranslationsTable[virt_page_addr] 496 = iter->second[vector_index]; 497 498 //erase the entry of this coalesced req 499 iter->second.erase(iter->second.begin() + vector_index); 500 501 if (iter->second.empty()) 502 assert(i == coalescedReq_cnt); 503 504 sent_probes++; 505 if (sent_probes == TLBProbesPerCycle) 506 return; 507 } 508 } 509 510 //if there are no more coalesced reqs for this tick_index 511 //erase the hash_map with the first iterator 512 if (iter->second.empty()) { 513 coalescerFIFO.erase(iter++); 514 } else { 515 ++iter; 516 } 517 } 518} 519 520void 521TLBCoalescer::processCleanupEvent() 522{ 523 while (!cleanupQueue.empty()) { 524 Addr cleanup_addr = cleanupQueue.front(); 525 cleanupQueue.pop(); 526 issuedTranslationsTable.erase(cleanup_addr); 527 528 DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n", 529 cleanup_addr); 530 } 531} 532 533void 534TLBCoalescer::regStats() 535{ 536 MemObject::regStats(); 537 538 uncoalescedAccesses 539 .name(name() + ".uncoalesced_accesses") 540 .desc("Number of uncoalesced TLB accesses") 541 ; 542 543 coalescedAccesses 544 .name(name() + ".coalesced_accesses") 545 .desc("Number of coalesced TLB accesses") 546 ; 547 548 queuingCycles 549 .name(name() + ".queuing_cycles") 550 .desc("Number of cycles spent in queue") 551 ; 552 553 localqueuingCycles 554 .name(name() + ".local_queuing_cycles") 555 .desc("Number of cycles spent in queue for all incoming reqs") 556 ; 557 558 localLatency 559 .name(name() + ".local_latency") 560 .desc("Avg. latency over all incoming pkts") 561 ; 562 563 localLatency = localqueuingCycles / uncoalescedAccesses; 564} 565 566 567TLBCoalescer* 568TLBCoalescerParams::create() 569{ 570 return new TLBCoalescer(this); 571} 572