Prefetcher.cc revision 11108:6342ddf6d733
1/* 2 * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include "debug/RubyPrefetcher.hh" 30#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh" 31#include "mem/ruby/structures/Prefetcher.hh" 32#include "mem/ruby/system/RubySystem.hh" 33 34Prefetcher* 35PrefetcherParams::create() 36{ 37 return new Prefetcher(this); 38} 39 40Prefetcher::Prefetcher(const Params *p) 41 : SimObject(p), m_num_streams(p->num_streams), 42 m_array(p->num_streams), m_train_misses(p->train_misses), 43 m_num_startup_pfs(p->num_startup_pfs), m_num_unit_filters(p->unit_filter), 44 m_num_nonunit_filters(p->nonunit_filter), 45 m_unit_filter(p->unit_filter, 0), 46 m_negative_filter(p->unit_filter, 0), 47 m_nonunit_filter(p->nonunit_filter, 0), 48 m_prefetch_cross_pages(p->cross_page), 49 m_page_shift(p->sys->getPageShift()) 50{ 51 assert(m_num_streams > 0); 52 assert(m_num_startup_pfs <= MAX_PF_INFLIGHT); 53 54 // create +1 stride filter 55 m_unit_filter_index = 0; 56 m_unit_filter_hit = new uint32_t[m_num_unit_filters]; 57 for (uint32_t i =0; i < m_num_unit_filters; i++) { 58 m_unit_filter_hit[i] = 0; 59 } 60 61 // create -1 stride filter 62 m_negative_filter_index = 0; 63 m_negative_filter_hit = new uint32_t[m_num_unit_filters]; 64 for (int i =0; i < m_num_unit_filters; i++) { 65 m_negative_filter_hit[i] = 0; 66 } 67 68 // create nonunit stride filter 69 m_nonunit_index = 0; 70 m_nonunit_stride = new int[m_num_nonunit_filters]; 71 m_nonunit_hit = new uint32_t[m_num_nonunit_filters]; 72 for (int i =0; i < m_num_nonunit_filters; i++) { 73 m_nonunit_stride[i] = 0; 74 m_nonunit_hit[i] = 0; 75 } 76} 77 78Prefetcher::~Prefetcher() 79{ 80 delete m_unit_filter_hit; 81 delete m_negative_filter_hit; 82 delete m_nonunit_stride; 83 delete m_nonunit_hit; 84} 85 86void 87Prefetcher::regStats() 88{ 89 numMissObserved 90 .name(name() + ".miss_observed") 91 .desc("number of misses observed") 92 ; 93 94 numAllocatedStreams 95 .name(name() + ".allocated_streams") 96 .desc("number of streams allocated for prefetching") 97 ; 98 99 numPrefetchRequested 100 .name(name() + ".prefetches_requested") 101 .desc("number of prefetch requests made") 102 ; 103 104 numPrefetchAccepted 105 .name(name() + ".prefetches_accepted") 106 .desc("number of prefetch requests accepted") 107 ; 108 109 numDroppedPrefetches 110 .name(name() + ".dropped_prefetches") 111 .desc("number of prefetch requests dropped") 112 ; 113 114 numHits 115 .name(name() + ".hits") 116 .desc("number of prefetched blocks accessed") 117 ; 118 119 numPartialHits 120 .name(name() + ".partial_hits") 121 .desc("number of misses observed for a block being prefetched") 122 ; 123 124 numPagesCrossed 125 .name(name() + ".pages_crossed") 126 .desc("number of prefetches across pages") 127 ; 128 129 numMissedPrefetchedBlocks 130 .name(name() + ".misses_on_prefetched_blocks") 131 .desc("number of misses for blocks that were prefetched, yet missed") 132 ; 133} 134 135void 136Prefetcher::observeMiss(Addr address, const RubyRequestType& type) 137{ 138 DPRINTF(RubyPrefetcher, "Observed miss for %s\n", address); 139 Addr line_addr = makeLineAddress(address); 140 numMissObserved++; 141 142 // check to see if we have already issued a prefetch for this block 143 uint32_t index = 0; 144 PrefetchEntry *pfEntry = getPrefetchEntry(line_addr, index); 145 if (pfEntry != NULL) { 146 if (pfEntry->requestIssued[index]) { 147 if (pfEntry->requestCompleted[index]) { 148 // We prefetched too early and now the prefetch block no 149 // longer exists in the cache 150 numMissedPrefetchedBlocks++; 151 return; 152 } else { 153 // The controller has issued the prefetch request, 154 // but the request for the block arrived earlier. 155 numPartialHits++; 156 observePfHit(line_addr); 157 return; 158 } 159 } else { 160 // The request is still in the prefetch queue of the controller. 161 // Or was evicted because of other requests. 162 return; 163 } 164 } 165 166 // check to see if this address is in the unit stride filter 167 bool alloc = false; 168 bool hit = accessUnitFilter(m_unit_filter, m_unit_filter_hit, 169 m_unit_filter_index, line_addr, 1, alloc); 170 if (alloc) { 171 // allocate a new prefetch stream 172 initializeStream(line_addr, 1, getLRUindex(), type); 173 } 174 if (hit) { 175 DPRINTF(RubyPrefetcher, " *** hit in unit stride buffer\n"); 176 return; 177 } 178 179 hit = accessUnitFilter(m_negative_filter, m_negative_filter_hit, 180 m_negative_filter_index, line_addr, -1, alloc); 181 if (alloc) { 182 // allocate a new prefetch stream 183 initializeStream(line_addr, -1, getLRUindex(), type); 184 } 185 if (hit) { 186 DPRINTF(RubyPrefetcher, " *** hit in unit negative unit buffer\n"); 187 return; 188 } 189 190 // check to see if this address is in the non-unit stride filter 191 int stride = 0; // NULL value 192 hit = accessNonunitFilter(address, &stride, alloc); 193 if (alloc) { 194 assert(stride != 0); // ensure non-zero stride prefetches 195 initializeStream(line_addr, stride, getLRUindex(), type); 196 } 197 if (hit) { 198 DPRINTF(RubyPrefetcher, " *** hit in non-unit stride buffer\n"); 199 return; 200 } 201} 202 203void 204Prefetcher::observePfMiss(Addr address) 205{ 206 numPartialHits++; 207 DPRINTF(RubyPrefetcher, "Observed partial hit for %s\n", address); 208 issueNextPrefetch(address, NULL); 209} 210 211void 212Prefetcher::observePfHit(Addr address) 213{ 214 numHits++; 215 DPRINTF(RubyPrefetcher, "Observed hit for %s\n", address); 216 issueNextPrefetch(address, NULL); 217} 218 219void 220Prefetcher::issueNextPrefetch(Addr address, PrefetchEntry *stream) 221{ 222 // get our corresponding stream fetcher 223 if (stream == NULL) { 224 uint32_t index = 0; 225 stream = getPrefetchEntry(address, index); 226 } 227 228 // if (for some reason), this stream is unallocated, return. 229 if (stream == NULL) { 230 DPRINTF(RubyPrefetcher, "Unallocated stream, returning\n"); 231 return; 232 } 233 234 // extend this prefetching stream by 1 (or more) 235 Addr page_addr = pageAddress(stream->m_address); 236 Addr line_addr = makeNextStrideAddress(stream->m_address, 237 stream->m_stride); 238 239 // possibly stop prefetching at page boundaries 240 if (page_addr != pageAddress(line_addr)) { 241 numPagesCrossed++; 242 if (!m_prefetch_cross_pages) { 243 // Deallocate the stream since we are not prefetching 244 // across page boundries 245 stream->m_is_valid = false; 246 return; 247 } 248 } 249 250 // launch next prefetch 251 stream->m_address = line_addr; 252 stream->m_use_time = m_controller->curCycle(); 253 DPRINTF(RubyPrefetcher, "Requesting prefetch for %s\n", line_addr); 254 m_controller->enqueuePrefetch(line_addr, stream->m_type); 255} 256 257uint32_t 258Prefetcher::getLRUindex(void) 259{ 260 uint32_t lru_index = 0; 261 Cycles lru_access = m_array[lru_index].m_use_time; 262 263 for (uint32_t i = 0; i < m_num_streams; i++) { 264 if (!m_array[i].m_is_valid) { 265 return i; 266 } 267 if (m_array[i].m_use_time < lru_access) { 268 lru_access = m_array[i].m_use_time; 269 lru_index = i; 270 } 271 } 272 273 return lru_index; 274} 275 276void 277Prefetcher::clearNonunitEntry(uint32_t index) 278{ 279 m_nonunit_filter[index] = 0; 280 m_nonunit_stride[index] = 0; 281 m_nonunit_hit[index] = 0; 282} 283 284void 285Prefetcher::initializeStream(Addr address, int stride, 286 uint32_t index, const RubyRequestType& type) 287{ 288 numAllocatedStreams++; 289 290 // initialize the stream prefetcher 291 PrefetchEntry *mystream = &(m_array[index]); 292 mystream->m_address = makeLineAddress(address); 293 mystream->m_stride = stride; 294 mystream->m_use_time = m_controller->curCycle(); 295 mystream->m_is_valid = true; 296 mystream->m_type = type; 297 298 // create a number of initial prefetches for this stream 299 Addr page_addr = pageAddress(mystream->m_address); 300 Addr line_addr = makeLineAddress(mystream->m_address); 301 302 // insert a number of prefetches into the prefetch table 303 for (int k = 0; k < m_num_startup_pfs; k++) { 304 line_addr = makeNextStrideAddress(line_addr, stride); 305 // possibly stop prefetching at page boundaries 306 if (page_addr != pageAddress(line_addr)) { 307 numPagesCrossed++; 308 if (!m_prefetch_cross_pages) { 309 // deallocate this stream prefetcher 310 mystream->m_is_valid = false; 311 return; 312 } 313 } 314 315 // launch prefetch 316 numPrefetchRequested++; 317 DPRINTF(RubyPrefetcher, "Requesting prefetch for %s\n", line_addr); 318 m_controller->enqueuePrefetch(line_addr, m_array[index].m_type); 319 } 320 321 // update the address to be the last address prefetched 322 mystream->m_address = line_addr; 323} 324 325PrefetchEntry * 326Prefetcher::getPrefetchEntry(Addr address, uint32_t &index) 327{ 328 // search all streams for a match 329 for (int i = 0; i < m_num_streams; i++) { 330 // search all the outstanding prefetches for this stream 331 if (m_array[i].m_is_valid) { 332 for (int j = 0; j < m_num_startup_pfs; j++) { 333 if (makeNextStrideAddress(m_array[i].m_address, 334 -(m_array[i].m_stride*j)) == address) { 335 return &(m_array[i]); 336 } 337 } 338 } 339 } 340 return NULL; 341} 342 343bool 344Prefetcher::accessUnitFilter(std::vector<Addr>& filter_table, 345 uint32_t *filter_hit, uint32_t &index, Addr address, 346 int stride, bool &alloc) 347{ 348 //reset the alloc flag 349 alloc = false; 350 351 Addr line_addr = makeLineAddress(address); 352 for (int i = 0; i < m_num_unit_filters; i++) { 353 if (filter_table[i] == line_addr) { 354 filter_table[i] = makeNextStrideAddress(filter_table[i], stride); 355 filter_hit[i]++; 356 if (filter_hit[i] >= m_train_misses) { 357 alloc = true; 358 } 359 return true; 360 } 361 } 362 363 // enter this address in the table 364 int local_index = index; 365 filter_table[local_index] = makeNextStrideAddress(line_addr, stride); 366 filter_hit[local_index] = 0; 367 local_index = local_index + 1; 368 if (local_index >= m_num_unit_filters) { 369 local_index = 0; 370 } 371 372 index = local_index; 373 return false; 374} 375 376bool 377Prefetcher::accessNonunitFilter(Addr address, int *stride, 378 bool &alloc) 379{ 380 //reset the alloc flag 381 alloc = false; 382 383 /// look for non-unit strides based on a (user-defined) page size 384 Addr page_addr = pageAddress(address); 385 Addr line_addr = makeLineAddress(address); 386 387 for (uint32_t i = 0; i < m_num_nonunit_filters; i++) { 388 if (pageAddress(m_nonunit_filter[i]) == page_addr) { 389 // hit in the non-unit filter 390 // compute the actual stride (for this reference) 391 int delta = line_addr - m_nonunit_filter[i]; 392 393 if (delta != 0) { 394 // no zero stride prefetches 395 // check that the stride matches (for the last N times) 396 if (delta == m_nonunit_stride[i]) { 397 // -> stride hit 398 // increment count (if > 2) allocate stream 399 m_nonunit_hit[i]++; 400 if (m_nonunit_hit[i] > m_train_misses) { 401 // This stride HAS to be the multiplicative constant of 402 // dataBlockBytes (bc makeNextStrideAddress is 403 // calculated based on this multiplicative constant!) 404 *stride = m_nonunit_stride[i] / 405 RubySystem::getBlockSizeBytes(); 406 407 // clear this filter entry 408 clearNonunitEntry(i); 409 alloc = true; 410 } 411 } else { 412 // delta didn't match ... reset m_nonunit_hit count for 413 // this entry 414 m_nonunit_hit[i] = 0; 415 } 416 417 // update the last address seen & the stride 418 m_nonunit_stride[i] = delta; 419 m_nonunit_filter[i] = line_addr; 420 return true; 421 } else { 422 return false; 423 } 424 } 425 } 426 427 // not found: enter this address in the table 428 m_nonunit_filter[m_nonunit_index] = line_addr; 429 m_nonunit_stride[m_nonunit_index] = 0; 430 m_nonunit_hit[m_nonunit_index] = 0; 431 432 m_nonunit_index = m_nonunit_index + 1; 433 if (m_nonunit_index >= m_num_nonunit_filters) { 434 m_nonunit_index = 0; 435 } 436 return false; 437} 438 439void 440Prefetcher::print(std::ostream& out) const 441{ 442 out << name() << " Prefetcher State\n"; 443 // print out unit filter 444 out << "unit table:\n"; 445 for (int i = 0; i < m_num_unit_filters; i++) { 446 out << m_unit_filter[i] << std::endl; 447 } 448 449 out << "negative table:\n"; 450 for (int i = 0; i < m_num_unit_filters; i++) { 451 out << m_negative_filter[i] << std::endl; 452 } 453 454 // print out non-unit stride filter 455 out << "non-unit table:\n"; 456 for (int i = 0; i < m_num_nonunit_filters; i++) { 457 out << m_nonunit_filter[i] << " " 458 << m_nonunit_stride[i] << " " 459 << m_nonunit_hit[i] << std::endl; 460 } 461 462 // print out allocated stream buffers 463 out << "streams:\n"; 464 for (int i = 0; i < m_num_streams; i++) { 465 out << m_array[i].m_address << " " 466 << m_array[i].m_stride << " " 467 << m_array[i].m_is_valid << " " 468 << m_array[i].m_use_time << std::endl; 469 } 470} 471 472Addr 473Prefetcher::pageAddress(Addr addr) const 474{ 475 return maskLowOrderBits(addr, m_page_shift); 476} 477