1/* 2 * Copyright (c) 2014-2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Mitch Hayenga 38 */ 39 40#include "mem/cache/prefetch/queued.hh" 41 42#include <cassert> 43 44#include "arch/generic/tlb.hh" 45#include "base/logging.hh" 46#include "base/trace.hh" 47#include "debug/HWPrefetch.hh" 48#include "mem/cache/base.hh" 49#include "mem/request.hh" 50#include "params/QueuedPrefetcher.hh" 51 52void 53QueuedPrefetcher::DeferredPacket::createPkt(Addr paddr, unsigned blk_size, 54 MasterID mid, bool tag_prefetch, 55 Tick t) { 56 /* Create a prefetch memory request */ 57 RequestPtr req = std::make_shared<Request>(paddr, blk_size, 0, mid); 58 59 if (pfInfo.isSecure()) { 60 req->setFlags(Request::SECURE); 61 } 62 req->taskId(ContextSwitchTaskId::Prefetcher); 63 pkt = new Packet(req, MemCmd::HardPFReq); 64 pkt->allocate(); 65 if (tag_prefetch && pfInfo.hasPC()) { 66 // Tag prefetch packet with accessing pc 67 pkt->req->setPC(pfInfo.getPC()); 68 } 69 tick = t; 70} 71 72void 73QueuedPrefetcher::DeferredPacket::startTranslation(BaseTLB *tlb) 74{ 75 assert(translationRequest != nullptr); 76 if (!ongoingTranslation) { 77 ongoingTranslation = true; 78 // Prefetchers only operate in Timing mode 79 tlb->translateTiming(translationRequest, tc, this, BaseTLB::Read); 80 } 81} 82 83void 84QueuedPrefetcher::DeferredPacket::finish(const Fault &fault, 85 const RequestPtr &req, ThreadContext *tc, BaseTLB::Mode mode) 86{ 87 assert(ongoingTranslation); 88 ongoingTranslation = false; 89 bool failed = (fault != NoFault); 90 owner->translationComplete(this, failed); 91} 92 93QueuedPrefetcher::QueuedPrefetcher(const QueuedPrefetcherParams *p) 94 : BasePrefetcher(p), queueSize(p->queue_size), 95 missingTranslationQueueSize( 96 p->max_prefetch_requests_with_pending_translation), 97 latency(p->latency), queueSquash(p->queue_squash), 98 queueFilter(p->queue_filter), cacheSnoop(p->cache_snoop), 99 tagPrefetch(p->tag_prefetch), 100 throttleControlPct(p->throttle_control_percentage) 101{ 102} 103 104QueuedPrefetcher::~QueuedPrefetcher() 105{ 106 // Delete the queued prefetch packets 107 for (DeferredPacket &p : pfq) { 108 delete p.pkt; 109 } 110} 111 112size_t 113QueuedPrefetcher::getMaxPermittedPrefetches(size_t total) const 114{ 115 /** 116 * Throttle generated prefetches based in the accuracy of the prefetcher. 117 * Accuracy is computed based in the ratio of useful prefetches with 118 * respect to the number of issued prefetches. 119 * 120 * The throttleControlPct controls how many of the candidate addresses 121 * generated by the prefetcher will be finally turned into prefetch 122 * requests 123 * - If set to 100, all candidates can be discarded (one request 124 * will always be allowed to be generated) 125 * - Setting it to 0 will disable the throttle control, so requests are 126 * created for all candidates 127 * - If set to 60, 40% of candidates will generate a request, and the 128 * remaining 60% will be generated depending on the current accuracy 129 */ 130 131 size_t max_pfs = total; 132 if (total > 0 && issuedPrefetches > 0) { 133 size_t throttle_pfs = (total * throttleControlPct) / 100; 134 size_t min_pfs = (total - throttle_pfs) == 0 ? 135 1 : (total - throttle_pfs); 136 max_pfs = min_pfs + (total - min_pfs) * 137 usefulPrefetches / issuedPrefetches; 138 } 139 return max_pfs; 140} 141 142void 143QueuedPrefetcher::notify(const PacketPtr &pkt, const PrefetchInfo &pfi) 144{ 145 Addr blk_addr = blockAddress(pfi.getAddr()); 146 bool is_secure = pfi.isSecure(); 147 148 // Squash queued prefetches if demand miss to same line 149 if (queueSquash) { 150 auto itr = pfq.begin(); 151 while (itr != pfq.end()) { 152 if (itr->pfInfo.getAddr() == blk_addr && 153 itr->pfInfo.isSecure() == is_secure) { 154 delete itr->pkt; 155 itr = pfq.erase(itr); 156 } else { 157 ++itr; 158 } 159 } 160 } 161 162 // Calculate prefetches given this access 163 std::vector<AddrPriority> addresses; 164 calculatePrefetch(pfi, addresses); 165 166 // Get the maximu number of prefetches that we are allowed to generate 167 size_t max_pfs = getMaxPermittedPrefetches(addresses.size()); 168 169 // Queue up generated prefetches 170 size_t num_pfs = 0; 171 for (AddrPriority& addr_prio : addresses) { 172 173 // Block align prefetch address 174 addr_prio.first = blockAddress(addr_prio.first); 175 176 if (!samePage(addr_prio.first, pfi.getAddr())) { 177 pfSpanPage += 1; 178 } 179 180 bool can_cross_page = (tlb != nullptr); 181 if (can_cross_page || samePage(addr_prio.first, pfi.getAddr())) { 182 PrefetchInfo new_pfi(pfi,addr_prio.first); 183 pfIdentified++; 184 DPRINTF(HWPrefetch, "Found a pf candidate addr: %#x, " 185 "inserting into prefetch queue.\n", new_pfi.getAddr()); 186 // Create and insert the request 187 insert(pkt, new_pfi, addr_prio.second); 188 num_pfs += 1; 189 if (num_pfs == max_pfs) { 190 break; 191 } 192 } else { 193 DPRINTF(HWPrefetch, "Ignoring page crossing prefetch.\n"); 194 } 195 } 196} 197 198PacketPtr 199QueuedPrefetcher::getPacket() 200{ 201 DPRINTF(HWPrefetch, "Requesting a prefetch to issue.\n"); 202 203 if (pfq.empty()) { 204 // If the queue is empty, attempt first to fill it with requests 205 // from the queue of missing translations 206 processMissingTranslations(queueSize); 207 } 208 209 if (pfq.empty()) { 210 DPRINTF(HWPrefetch, "No hardware prefetches available.\n"); 211 return nullptr; 212 } 213 214 PacketPtr pkt = pfq.front().pkt; 215 pfq.pop_front(); 216 217 pfIssued++; 218 issuedPrefetches += 1; 219 assert(pkt != nullptr); 220 DPRINTF(HWPrefetch, "Generating prefetch for %#x.\n", pkt->getAddr()); 221 222 processMissingTranslations(queueSize - pfq.size()); 223 return pkt; 224} 225 226void 227QueuedPrefetcher::regStats() 228{ 229 BasePrefetcher::regStats(); 230 231 pfIdentified 232 .name(name() + ".pfIdentified") 233 .desc("number of prefetch candidates identified"); 234 235 pfBufferHit 236 .name(name() + ".pfBufferHit") 237 .desc("number of redundant prefetches already in prefetch queue"); 238 239 pfInCache 240 .name(name() + ".pfInCache") 241 .desc("number of redundant prefetches already in cache/mshr dropped"); 242 243 pfRemovedFull 244 .name(name() + ".pfRemovedFull") 245 .desc("number of prefetches dropped due to prefetch queue size"); 246 247 pfSpanPage 248 .name(name() + ".pfSpanPage") 249 .desc("number of prefetches that crossed the page"); 250} 251 252 253void 254QueuedPrefetcher::processMissingTranslations(unsigned max) 255{ 256 unsigned count = 0; 257 iterator it = pfqMissingTranslation.begin(); 258 while (it != pfqMissingTranslation.end() && count < max) { 259 DeferredPacket &dp = *it; 260 // Increase the iterator first because dp.startTranslation can end up 261 // calling finishTranslation, which will erase "it" 262 it++; 263 dp.startTranslation(tlb); 264 count += 1; 265 } 266} 267 268void 269QueuedPrefetcher::translationComplete(DeferredPacket *dp, bool failed) 270{ 271 auto it = pfqMissingTranslation.begin(); 272 while (it != pfqMissingTranslation.end()) { 273 if (&(*it) == dp) { 274 break; 275 } 276 it++; 277 } 278 assert(it != pfqMissingTranslation.end()); 279 if (!failed) { 280 DPRINTF(HWPrefetch, "%s Translation of vaddr %#x succeeded: " 281 "paddr %#x \n", tlb->name(), 282 it->translationRequest->getVaddr(), 283 it->translationRequest->getPaddr()); 284 Addr target_paddr = it->translationRequest->getPaddr(); 285 // check if this prefetch is already redundant 286 if (cacheSnoop && (inCache(target_paddr, it->pfInfo.isSecure()) || 287 inMissQueue(target_paddr, it->pfInfo.isSecure()))) { 288 pfInCache++; 289 DPRINTF(HWPrefetch, "Dropping redundant in " 290 "cache/MSHR prefetch addr:%#x\n", target_paddr); 291 } else { 292 Tick pf_time = curTick() + clockPeriod() * latency; 293 it->createPkt(it->translationRequest->getPaddr(), blkSize, 294 masterId, tagPrefetch, pf_time); 295 addToQueue(pfq, *it); 296 } 297 } else { 298 DPRINTF(HWPrefetch, "%s Translation of vaddr %#x failed, dropping " 299 "prefetch request %#x \n", tlb->name(), 300 it->translationRequest->getVaddr()); 301 } 302 pfqMissingTranslation.erase(it); 303} 304 305bool 306QueuedPrefetcher::alreadyInQueue(std::list<DeferredPacket> &queue, 307 const PrefetchInfo &pfi, int32_t priority) 308{ 309 bool found = false; 310 iterator it; 311 for (it = queue.begin(); it != queue.end() && !found; it++) { 312 found = it->pfInfo.sameAddr(pfi); 313 } 314 315 /* If the address is already in the queue, update priority and leave */ 316 if (it != queue.end()) { 317 pfBufferHit++; 318 if (it->priority < priority) { 319 /* Update priority value and position in the queue */ 320 it->priority = priority; 321 iterator prev = it; 322 while (prev != queue.begin()) { 323 prev--; 324 /* If the packet has higher priority, swap */ 325 if (*it > *prev) { 326 std::swap(*it, *prev); 327 it = prev; 328 } 329 } 330 DPRINTF(HWPrefetch, "Prefetch addr already in " 331 "prefetch queue, priority updated\n"); 332 } else { 333 DPRINTF(HWPrefetch, "Prefetch addr already in " 334 "prefetch queue\n"); 335 } 336 } 337 return found; 338} 339 340RequestPtr 341QueuedPrefetcher::createPrefetchRequest(Addr addr, PrefetchInfo const &pfi, 342 PacketPtr pkt) 343{ 344 RequestPtr translation_req = std::make_shared<Request>(pkt->req->getAsid(), 345 addr, blkSize, pkt->req->getFlags(), masterId, pfi.getPC(), 346 pkt->req->contextId()); 347 translation_req->setFlags(Request::PREFETCH); 348 return translation_req; 349} 350 351void 352QueuedPrefetcher::insert(const PacketPtr &pkt, PrefetchInfo &new_pfi, 353 int32_t priority) 354{ 355 if (queueFilter) { 356 if (alreadyInQueue(pfq, new_pfi, priority)) { 357 return; 358 } 359 if (alreadyInQueue(pfqMissingTranslation, new_pfi, priority)) { 360 return; 361 } 362 } 363 364 /* 365 * Physical address computation 366 * if the prefetch is within the same page 367 * using VA: add the computed stride to the original PA 368 * using PA: no actions needed 369 * if we are page crossing 370 * using VA: Create a translaion request and enqueue the corresponding 371 * deferred packet to the queue of pending translations 372 * using PA: use the provided VA to obtain the target VA, then attempt to 373 * translate the resulting address 374 */ 375 376 Addr orig_addr = useVirtualAddresses ? 377 pkt->req->getVaddr() : pkt->req->getPaddr(); 378 bool positive_stride = new_pfi.getAddr() >= orig_addr; 379 Addr stride = positive_stride ? 380 (new_pfi.getAddr() - orig_addr) : (orig_addr - new_pfi.getAddr()); 381 382 Addr target_paddr; 383 bool has_target_pa = false; 384 RequestPtr translation_req = nullptr; 385 if (samePage(orig_addr, new_pfi.getAddr())) { 386 if (useVirtualAddresses) { 387 // if we trained with virtual addresses, 388 // compute the target PA using the original PA and adding the 389 // prefetch stride (difference between target VA and original VA) 390 target_paddr = positive_stride ? (pkt->req->getPaddr() + stride) : 391 (pkt->req->getPaddr() - stride); 392 } else { 393 target_paddr = new_pfi.getAddr(); 394 } 395 has_target_pa = true; 396 } else { 397 // Page crossing reference 398 399 // ContextID is needed for translation 400 if (!pkt->req->hasContextId()) { 401 return; 402 } 403 if (useVirtualAddresses) { 404 has_target_pa = false; 405 translation_req = createPrefetchRequest(new_pfi.getAddr(), new_pfi, 406 pkt); 407 } else if (pkt->req->hasVaddr()) { 408 has_target_pa = false; 409 // Compute the target VA using req->getVaddr + stride 410 Addr target_vaddr = positive_stride ? 411 (pkt->req->getVaddr() + stride) : 412 (pkt->req->getVaddr() - stride); 413 translation_req = createPrefetchRequest(target_vaddr, new_pfi, 414 pkt); 415 } else { 416 // Using PA for training but the request does not have a VA, 417 // unable to process this page crossing prefetch. 418 return; 419 } 420 } 421 if (has_target_pa && cacheSnoop && 422 (inCache(target_paddr, new_pfi.isSecure()) || 423 inMissQueue(target_paddr, new_pfi.isSecure()))) { 424 pfInCache++; 425 DPRINTF(HWPrefetch, "Dropping redundant in " 426 "cache/MSHR prefetch addr:%#x\n", target_paddr); 427 return; 428 } 429 430 /* Create the packet and find the spot to insert it */ 431 DeferredPacket dpp(this, new_pfi, 0, priority); 432 if (has_target_pa) { 433 Tick pf_time = curTick() + clockPeriod() * latency; 434 dpp.createPkt(target_paddr, blkSize, masterId, tagPrefetch, pf_time); 435 DPRINTF(HWPrefetch, "Prefetch queued. " 436 "addr:%#x priority: %3d tick:%lld.\n", 437 new_pfi.getAddr(), priority, pf_time); 438 addToQueue(pfq, dpp); 439 } else { 440 // Add the translation request and try to resolve it later 441 dpp.setTranslationRequest(translation_req); 442 dpp.tc = cache->system->getThreadContext(translation_req->contextId()); 443 DPRINTF(HWPrefetch, "Prefetch queued with no translation. " 444 "addr:%#x priority: %3d\n", new_pfi.getAddr(), priority); 445 addToQueue(pfqMissingTranslation, dpp); 446 } 447} 448 449void 450QueuedPrefetcher::addToQueue(std::list<DeferredPacket> &queue, 451 DeferredPacket &dpp) 452{ 453 /* Verify prefetch buffer space for request */ 454 if (queue.size() == queueSize) { 455 pfRemovedFull++; 456 /* Lowest priority packet */ 457 iterator it = queue.end(); 458 panic_if (it == queue.begin(), 459 "Prefetch queue is both full and empty!"); 460 --it; 461 /* Look for oldest in that level of priority */ 462 panic_if (it == queue.begin(), 463 "Prefetch queue is full with 1 element!"); 464 iterator prev = it; 465 bool cont = true; 466 /* While not at the head of the queue */ 467 while (cont && prev != queue.begin()) { 468 prev--; 469 /* While at the same level of priority */ 470 cont = prev->priority == it->priority; 471 if (cont) 472 /* update pointer */ 473 it = prev; 474 } 475 DPRINTF(HWPrefetch, "Prefetch queue full, removing lowest priority " 476 "oldest packet, addr: %#x\n",it->pfInfo.getAddr()); 477 delete it->pkt; 478 queue.erase(it); 479 } 480 481 if (queue.size() == 0) { 482 queue.emplace_back(dpp); 483 } else { 484 iterator it = queue.end(); 485 do { 486 --it; 487 } while (it != queue.begin() && dpp > *it); 488 /* If we reach the head, we have to see if the new element is new head 489 * or not */ 490 if (it == queue.begin() && dpp <= *it) 491 it++; 492 queue.insert(it, dpp); 493 } 494} 495