Prefetcher.cc revision 11523:81332eb10367
1/* 2 * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include "debug/RubyPrefetcher.hh" 30#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh" 31#include "mem/ruby/structures/Prefetcher.hh" 32#include "mem/ruby/system/RubySystem.hh" 33 34Prefetcher* 35PrefetcherParams::create() 36{ 37 return new Prefetcher(this); 38} 39 40Prefetcher::Prefetcher(const Params *p) 41 : SimObject(p), m_num_streams(p->num_streams), 42 m_array(p->num_streams), m_train_misses(p->train_misses), 43 m_num_startup_pfs(p->num_startup_pfs), m_num_unit_filters(p->unit_filter), 44 m_num_nonunit_filters(p->nonunit_filter), 45 m_unit_filter(p->unit_filter, 0), 46 m_negative_filter(p->unit_filter, 0), 47 m_nonunit_filter(p->nonunit_filter, 0), 48 m_prefetch_cross_pages(p->cross_page), 49 m_page_shift(p->sys->getPageShift()) 50{ 51 assert(m_num_streams > 0); 52 assert(m_num_startup_pfs <= MAX_PF_INFLIGHT); 53 54 // create +1 stride filter 55 m_unit_filter_index = 0; 56 m_unit_filter_hit = new uint32_t[m_num_unit_filters]; 57 for (uint32_t i =0; i < m_num_unit_filters; i++) { 58 m_unit_filter_hit[i] = 0; 59 } 60 61 // create -1 stride filter 62 m_negative_filter_index = 0; 63 m_negative_filter_hit = new uint32_t[m_num_unit_filters]; 64 for (int i =0; i < m_num_unit_filters; i++) { 65 m_negative_filter_hit[i] = 0; 66 } 67 68 // create nonunit stride filter 69 m_nonunit_index = 0; 70 m_nonunit_stride = new int[m_num_nonunit_filters]; 71 m_nonunit_hit = new uint32_t[m_num_nonunit_filters]; 72 for (int i =0; i < m_num_nonunit_filters; i++) { 73 m_nonunit_stride[i] = 0; 74 m_nonunit_hit[i] = 0; 75 } 76} 77 78Prefetcher::~Prefetcher() 79{ 80 delete m_unit_filter_hit; 81 delete m_negative_filter_hit; 82 delete m_nonunit_stride; 83 delete m_nonunit_hit; 84} 85 86void 87Prefetcher::regStats() 88{ 89 SimObject::regStats(); 90 91 numMissObserved 92 .name(name() + ".miss_observed") 93 .desc("number of misses observed") 94 ; 95 96 numAllocatedStreams 97 .name(name() + ".allocated_streams") 98 .desc("number of streams allocated for prefetching") 99 ; 100 101 numPrefetchRequested 102 .name(name() + ".prefetches_requested") 103 .desc("number of prefetch requests made") 104 ; 105 106 numPrefetchAccepted 107 .name(name() + ".prefetches_accepted") 108 .desc("number of prefetch requests accepted") 109 ; 110 111 numDroppedPrefetches 112 .name(name() + ".dropped_prefetches") 113 .desc("number of prefetch requests dropped") 114 ; 115 116 numHits 117 .name(name() + ".hits") 118 .desc("number of prefetched blocks accessed") 119 ; 120 121 numPartialHits 122 .name(name() + ".partial_hits") 123 .desc("number of misses observed for a block being prefetched") 124 ; 125 126 numPagesCrossed 127 .name(name() + ".pages_crossed") 128 .desc("number of prefetches across pages") 129 ; 130 131 numMissedPrefetchedBlocks 132 .name(name() + ".misses_on_prefetched_blocks") 133 .desc("number of misses for blocks that were prefetched, yet missed") 134 ; 135} 136 137void 138Prefetcher::observeMiss(Addr address, const RubyRequestType& type) 139{ 140 DPRINTF(RubyPrefetcher, "Observed miss for %#x\n", address); 141 Addr line_addr = makeLineAddress(address); 142 numMissObserved++; 143 144 // check to see if we have already issued a prefetch for this block 145 uint32_t index = 0; 146 PrefetchEntry *pfEntry = getPrefetchEntry(line_addr, index); 147 if (pfEntry != NULL) { 148 if (pfEntry->requestIssued[index]) { 149 if (pfEntry->requestCompleted[index]) { 150 // We prefetched too early and now the prefetch block no 151 // longer exists in the cache 152 numMissedPrefetchedBlocks++; 153 return; 154 } else { 155 // The controller has issued the prefetch request, 156 // but the request for the block arrived earlier. 157 numPartialHits++; 158 observePfHit(line_addr); 159 return; 160 } 161 } else { 162 // The request is still in the prefetch queue of the controller. 163 // Or was evicted because of other requests. 164 return; 165 } 166 } 167 168 // check to see if this address is in the unit stride filter 169 bool alloc = false; 170 bool hit = accessUnitFilter(m_unit_filter, m_unit_filter_hit, 171 m_unit_filter_index, line_addr, 1, alloc); 172 if (alloc) { 173 // allocate a new prefetch stream 174 initializeStream(line_addr, 1, getLRUindex(), type); 175 } 176 if (hit) { 177 DPRINTF(RubyPrefetcher, " *** hit in unit stride buffer\n"); 178 return; 179 } 180 181 hit = accessUnitFilter(m_negative_filter, m_negative_filter_hit, 182 m_negative_filter_index, line_addr, -1, alloc); 183 if (alloc) { 184 // allocate a new prefetch stream 185 initializeStream(line_addr, -1, getLRUindex(), type); 186 } 187 if (hit) { 188 DPRINTF(RubyPrefetcher, " *** hit in unit negative unit buffer\n"); 189 return; 190 } 191 192 // check to see if this address is in the non-unit stride filter 193 int stride = 0; // NULL value 194 hit = accessNonunitFilter(address, &stride, alloc); 195 if (alloc) { 196 assert(stride != 0); // ensure non-zero stride prefetches 197 initializeStream(line_addr, stride, getLRUindex(), type); 198 } 199 if (hit) { 200 DPRINTF(RubyPrefetcher, " *** hit in non-unit stride buffer\n"); 201 return; 202 } 203} 204 205void 206Prefetcher::observePfMiss(Addr address) 207{ 208 numPartialHits++; 209 DPRINTF(RubyPrefetcher, "Observed partial hit for %#x\n", address); 210 issueNextPrefetch(address, NULL); 211} 212 213void 214Prefetcher::observePfHit(Addr address) 215{ 216 numHits++; 217 DPRINTF(RubyPrefetcher, "Observed hit for %#x\n", address); 218 issueNextPrefetch(address, NULL); 219} 220 221void 222Prefetcher::issueNextPrefetch(Addr address, PrefetchEntry *stream) 223{ 224 // get our corresponding stream fetcher 225 if (stream == NULL) { 226 uint32_t index = 0; 227 stream = getPrefetchEntry(address, index); 228 } 229 230 // if (for some reason), this stream is unallocated, return. 231 if (stream == NULL) { 232 DPRINTF(RubyPrefetcher, "Unallocated stream, returning\n"); 233 return; 234 } 235 236 // extend this prefetching stream by 1 (or more) 237 Addr page_addr = pageAddress(stream->m_address); 238 Addr line_addr = makeNextStrideAddress(stream->m_address, 239 stream->m_stride); 240 241 // possibly stop prefetching at page boundaries 242 if (page_addr != pageAddress(line_addr)) { 243 numPagesCrossed++; 244 if (!m_prefetch_cross_pages) { 245 // Deallocate the stream since we are not prefetching 246 // across page boundries 247 stream->m_is_valid = false; 248 return; 249 } 250 } 251 252 // launch next prefetch 253 stream->m_address = line_addr; 254 stream->m_use_time = m_controller->curCycle(); 255 DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr); 256 m_controller->enqueuePrefetch(line_addr, stream->m_type); 257} 258 259uint32_t 260Prefetcher::getLRUindex(void) 261{ 262 uint32_t lru_index = 0; 263 Cycles lru_access = m_array[lru_index].m_use_time; 264 265 for (uint32_t i = 0; i < m_num_streams; i++) { 266 if (!m_array[i].m_is_valid) { 267 return i; 268 } 269 if (m_array[i].m_use_time < lru_access) { 270 lru_access = m_array[i].m_use_time; 271 lru_index = i; 272 } 273 } 274 275 return lru_index; 276} 277 278void 279Prefetcher::clearNonunitEntry(uint32_t index) 280{ 281 m_nonunit_filter[index] = 0; 282 m_nonunit_stride[index] = 0; 283 m_nonunit_hit[index] = 0; 284} 285 286void 287Prefetcher::initializeStream(Addr address, int stride, 288 uint32_t index, const RubyRequestType& type) 289{ 290 numAllocatedStreams++; 291 292 // initialize the stream prefetcher 293 PrefetchEntry *mystream = &(m_array[index]); 294 mystream->m_address = makeLineAddress(address); 295 mystream->m_stride = stride; 296 mystream->m_use_time = m_controller->curCycle(); 297 mystream->m_is_valid = true; 298 mystream->m_type = type; 299 300 // create a number of initial prefetches for this stream 301 Addr page_addr = pageAddress(mystream->m_address); 302 Addr line_addr = makeLineAddress(mystream->m_address); 303 304 // insert a number of prefetches into the prefetch table 305 for (int k = 0; k < m_num_startup_pfs; k++) { 306 line_addr = makeNextStrideAddress(line_addr, stride); 307 // possibly stop prefetching at page boundaries 308 if (page_addr != pageAddress(line_addr)) { 309 numPagesCrossed++; 310 if (!m_prefetch_cross_pages) { 311 // deallocate this stream prefetcher 312 mystream->m_is_valid = false; 313 return; 314 } 315 } 316 317 // launch prefetch 318 numPrefetchRequested++; 319 DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr); 320 m_controller->enqueuePrefetch(line_addr, m_array[index].m_type); 321 } 322 323 // update the address to be the last address prefetched 324 mystream->m_address = line_addr; 325} 326 327PrefetchEntry * 328Prefetcher::getPrefetchEntry(Addr address, uint32_t &index) 329{ 330 // search all streams for a match 331 for (int i = 0; i < m_num_streams; i++) { 332 // search all the outstanding prefetches for this stream 333 if (m_array[i].m_is_valid) { 334 for (int j = 0; j < m_num_startup_pfs; j++) { 335 if (makeNextStrideAddress(m_array[i].m_address, 336 -(m_array[i].m_stride*j)) == address) { 337 return &(m_array[i]); 338 } 339 } 340 } 341 } 342 return NULL; 343} 344 345bool 346Prefetcher::accessUnitFilter(std::vector<Addr>& filter_table, 347 uint32_t *filter_hit, uint32_t &index, Addr address, 348 int stride, bool &alloc) 349{ 350 //reset the alloc flag 351 alloc = false; 352 353 Addr line_addr = makeLineAddress(address); 354 for (int i = 0; i < m_num_unit_filters; i++) { 355 if (filter_table[i] == line_addr) { 356 filter_table[i] = makeNextStrideAddress(filter_table[i], stride); 357 filter_hit[i]++; 358 if (filter_hit[i] >= m_train_misses) { 359 alloc = true; 360 } 361 return true; 362 } 363 } 364 365 // enter this address in the table 366 int local_index = index; 367 filter_table[local_index] = makeNextStrideAddress(line_addr, stride); 368 filter_hit[local_index] = 0; 369 local_index = local_index + 1; 370 if (local_index >= m_num_unit_filters) { 371 local_index = 0; 372 } 373 374 index = local_index; 375 return false; 376} 377 378bool 379Prefetcher::accessNonunitFilter(Addr address, int *stride, 380 bool &alloc) 381{ 382 //reset the alloc flag 383 alloc = false; 384 385 /// look for non-unit strides based on a (user-defined) page size 386 Addr page_addr = pageAddress(address); 387 Addr line_addr = makeLineAddress(address); 388 389 for (uint32_t i = 0; i < m_num_nonunit_filters; i++) { 390 if (pageAddress(m_nonunit_filter[i]) == page_addr) { 391 // hit in the non-unit filter 392 // compute the actual stride (for this reference) 393 int delta = line_addr - m_nonunit_filter[i]; 394 395 if (delta != 0) { 396 // no zero stride prefetches 397 // check that the stride matches (for the last N times) 398 if (delta == m_nonunit_stride[i]) { 399 // -> stride hit 400 // increment count (if > 2) allocate stream 401 m_nonunit_hit[i]++; 402 if (m_nonunit_hit[i] > m_train_misses) { 403 // This stride HAS to be the multiplicative constant of 404 // dataBlockBytes (bc makeNextStrideAddress is 405 // calculated based on this multiplicative constant!) 406 *stride = m_nonunit_stride[i] / 407 RubySystem::getBlockSizeBytes(); 408 409 // clear this filter entry 410 clearNonunitEntry(i); 411 alloc = true; 412 } 413 } else { 414 // delta didn't match ... reset m_nonunit_hit count for 415 // this entry 416 m_nonunit_hit[i] = 0; 417 } 418 419 // update the last address seen & the stride 420 m_nonunit_stride[i] = delta; 421 m_nonunit_filter[i] = line_addr; 422 return true; 423 } else { 424 return false; 425 } 426 } 427 } 428 429 // not found: enter this address in the table 430 m_nonunit_filter[m_nonunit_index] = line_addr; 431 m_nonunit_stride[m_nonunit_index] = 0; 432 m_nonunit_hit[m_nonunit_index] = 0; 433 434 m_nonunit_index = m_nonunit_index + 1; 435 if (m_nonunit_index >= m_num_nonunit_filters) { 436 m_nonunit_index = 0; 437 } 438 return false; 439} 440 441void 442Prefetcher::print(std::ostream& out) const 443{ 444 out << name() << " Prefetcher State\n"; 445 // print out unit filter 446 out << "unit table:\n"; 447 for (int i = 0; i < m_num_unit_filters; i++) { 448 out << m_unit_filter[i] << std::endl; 449 } 450 451 out << "negative table:\n"; 452 for (int i = 0; i < m_num_unit_filters; i++) { 453 out << m_negative_filter[i] << std::endl; 454 } 455 456 // print out non-unit stride filter 457 out << "non-unit table:\n"; 458 for (int i = 0; i < m_num_nonunit_filters; i++) { 459 out << m_nonunit_filter[i] << " " 460 << m_nonunit_stride[i] << " " 461 << m_nonunit_hit[i] << std::endl; 462 } 463 464 // print out allocated stream buffers 465 out << "streams:\n"; 466 for (int i = 0; i < m_num_streams; i++) { 467 out << m_array[i].m_address << " " 468 << m_array[i].m_stride << " " 469 << m_array[i].m_is_valid << " " 470 << m_array[i].m_use_time << std::endl; 471 } 472} 473 474Addr 475Prefetcher::pageAddress(Addr addr) const 476{ 477 return maskLowOrderBits(addr, m_page_shift); 478} 479