1/* 2 * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include "mem/ruby/structures/Prefetcher.hh" 30 31#include "debug/RubyPrefetcher.hh" 32#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh" 33#include "mem/ruby/system/RubySystem.hh" 34 35Prefetcher* 36PrefetcherParams::create() 37{ 38 return new Prefetcher(this); 39} 40 41Prefetcher::Prefetcher(const Params *p) 42 : SimObject(p), m_num_streams(p->num_streams), 43 m_array(p->num_streams), m_train_misses(p->train_misses), 44 m_num_startup_pfs(p->num_startup_pfs), m_num_unit_filters(p->unit_filter), 45 m_num_nonunit_filters(p->nonunit_filter), 46 m_unit_filter(p->unit_filter, 0), 47 m_negative_filter(p->unit_filter, 0), 48 m_nonunit_filter(p->nonunit_filter, 0), 49 m_prefetch_cross_pages(p->cross_page), 50 m_page_shift(p->sys->getPageShift()) 51{ 52 assert(m_num_streams > 0); 53 assert(m_num_startup_pfs <= MAX_PF_INFLIGHT); 54 55 // create +1 stride filter 56 m_unit_filter_index = 0; 57 m_unit_filter_hit = new uint32_t[m_num_unit_filters]; 58 for (uint32_t i =0; i < m_num_unit_filters; i++) { 59 m_unit_filter_hit[i] = 0; 60 } 61 62 // create -1 stride filter 63 m_negative_filter_index = 0; 64 m_negative_filter_hit = new uint32_t[m_num_unit_filters]; 65 for (int i =0; i < m_num_unit_filters; i++) { 66 m_negative_filter_hit[i] = 0; 67 } 68 69 // create nonunit stride filter 70 m_nonunit_index = 0; 71 m_nonunit_stride = new int[m_num_nonunit_filters]; 72 m_nonunit_hit = new uint32_t[m_num_nonunit_filters]; 73 for (int i =0; i < m_num_nonunit_filters; i++) { 74 m_nonunit_stride[i] = 0; 75 m_nonunit_hit[i] = 0; 76 } 77} 78 79Prefetcher::~Prefetcher() 80{ 81 delete m_unit_filter_hit; 82 delete m_negative_filter_hit; 83 delete m_nonunit_stride; 84 delete m_nonunit_hit; 85} 86 87void 88Prefetcher::regStats() 89{ 90 SimObject::regStats(); 91 92 numMissObserved 93 .name(name() + ".miss_observed") 94 .desc("number of misses observed") 95 ; 96 97 numAllocatedStreams 98 .name(name() + ".allocated_streams") 99 .desc("number of streams allocated for prefetching") 100 ; 101 102 numPrefetchRequested 103 .name(name() + ".prefetches_requested") 104 .desc("number of prefetch requests made") 105 ; 106 107 numPrefetchAccepted 108 .name(name() + ".prefetches_accepted") 109 .desc("number of prefetch requests accepted") 110 ; 111 112 numDroppedPrefetches 113 .name(name() + ".dropped_prefetches") 114 .desc("number of prefetch requests dropped") 115 ; 116 117 numHits 118 .name(name() + ".hits") 119 .desc("number of prefetched blocks accessed") 120 ; 121 122 numPartialHits 123 .name(name() + ".partial_hits") 124 .desc("number of misses observed for a block being prefetched") 125 ; 126 127 numPagesCrossed 128 .name(name() + ".pages_crossed") 129 .desc("number of prefetches across pages") 130 ; 131 132 numMissedPrefetchedBlocks 133 .name(name() + ".misses_on_prefetched_blocks") 134 .desc("number of misses for blocks that were prefetched, yet missed") 135 ; 136} 137 138void 139Prefetcher::observeMiss(Addr address, const RubyRequestType& type) 140{ 141 DPRINTF(RubyPrefetcher, "Observed miss for %#x\n", address); 142 Addr line_addr = makeLineAddress(address); 143 numMissObserved++; 144 145 // check to see if we have already issued a prefetch for this block 146 uint32_t index = 0; 147 PrefetchEntry *pfEntry = getPrefetchEntry(line_addr, index); 148 if (pfEntry != NULL) { 149 if (pfEntry->requestIssued[index]) { 150 if (pfEntry->requestCompleted[index]) { 151 // We prefetched too early and now the prefetch block no 152 // longer exists in the cache 153 numMissedPrefetchedBlocks++; 154 return; 155 } else { 156 // The controller has issued the prefetch request, 157 // but the request for the block arrived earlier. 158 numPartialHits++; 159 observePfHit(line_addr); 160 return; 161 } 162 } else { 163 // The request is still in the prefetch queue of the controller. 164 // Or was evicted because of other requests. 165 return; 166 } 167 } 168 169 // check to see if this address is in the unit stride filter 170 bool alloc = false; 171 bool hit = accessUnitFilter(m_unit_filter, m_unit_filter_hit, 172 m_unit_filter_index, line_addr, 1, alloc); 173 if (alloc) { 174 // allocate a new prefetch stream 175 initializeStream(line_addr, 1, getLRUindex(), type); 176 } 177 if (hit) { 178 DPRINTF(RubyPrefetcher, " *** hit in unit stride buffer\n"); 179 return; 180 } 181 182 hit = accessUnitFilter(m_negative_filter, m_negative_filter_hit, 183 m_negative_filter_index, line_addr, -1, alloc); 184 if (alloc) { 185 // allocate a new prefetch stream 186 initializeStream(line_addr, -1, getLRUindex(), type); 187 } 188 if (hit) { 189 DPRINTF(RubyPrefetcher, " *** hit in unit negative unit buffer\n"); 190 return; 191 } 192 193 // check to see if this address is in the non-unit stride filter 194 int stride = 0; // NULL value 195 hit = accessNonunitFilter(address, &stride, alloc); 196 if (alloc) { 197 assert(stride != 0); // ensure non-zero stride prefetches 198 initializeStream(line_addr, stride, getLRUindex(), type); 199 } 200 if (hit) { 201 DPRINTF(RubyPrefetcher, " *** hit in non-unit stride buffer\n"); 202 return; 203 } 204} 205 206void 207Prefetcher::observePfMiss(Addr address) 208{ 209 numPartialHits++; 210 DPRINTF(RubyPrefetcher, "Observed partial hit for %#x\n", address); 211 issueNextPrefetch(address, NULL); 212} 213 214void 215Prefetcher::observePfHit(Addr address) 216{ 217 numHits++; 218 DPRINTF(RubyPrefetcher, "Observed hit for %#x\n", address); 219 issueNextPrefetch(address, NULL); 220} 221 222void 223Prefetcher::issueNextPrefetch(Addr address, PrefetchEntry *stream) 224{ 225 // get our corresponding stream fetcher 226 if (stream == NULL) { 227 uint32_t index = 0; 228 stream = getPrefetchEntry(address, index); 229 } 230 231 // if (for some reason), this stream is unallocated, return. 232 if (stream == NULL) { 233 DPRINTF(RubyPrefetcher, "Unallocated stream, returning\n"); 234 return; 235 } 236 237 // extend this prefetching stream by 1 (or more) 238 Addr page_addr = pageAddress(stream->m_address); 239 Addr line_addr = makeNextStrideAddress(stream->m_address, 240 stream->m_stride); 241 242 // possibly stop prefetching at page boundaries 243 if (page_addr != pageAddress(line_addr)) { 244 numPagesCrossed++; 245 if (!m_prefetch_cross_pages) { 246 // Deallocate the stream since we are not prefetching 247 // across page boundries 248 stream->m_is_valid = false; 249 return; 250 } 251 } 252 253 // launch next prefetch 254 stream->m_address = line_addr; 255 stream->m_use_time = m_controller->curCycle(); 256 DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr); 257 m_controller->enqueuePrefetch(line_addr, stream->m_type); 258} 259 260uint32_t 261Prefetcher::getLRUindex(void) 262{ 263 uint32_t lru_index = 0; 264 Cycles lru_access = m_array[lru_index].m_use_time; 265 266 for (uint32_t i = 0; i < m_num_streams; i++) { 267 if (!m_array[i].m_is_valid) { 268 return i; 269 } 270 if (m_array[i].m_use_time < lru_access) { 271 lru_access = m_array[i].m_use_time; 272 lru_index = i; 273 } 274 } 275 276 return lru_index; 277} 278 279void 280Prefetcher::clearNonunitEntry(uint32_t index) 281{ 282 m_nonunit_filter[index] = 0; 283 m_nonunit_stride[index] = 0; 284 m_nonunit_hit[index] = 0; 285} 286 287void 288Prefetcher::initializeStream(Addr address, int stride, 289 uint32_t index, const RubyRequestType& type) 290{ 291 numAllocatedStreams++; 292 293 // initialize the stream prefetcher 294 PrefetchEntry *mystream = &(m_array[index]); 295 mystream->m_address = makeLineAddress(address); 296 mystream->m_stride = stride; 297 mystream->m_use_time = m_controller->curCycle(); 298 mystream->m_is_valid = true; 299 mystream->m_type = type; 300 301 // create a number of initial prefetches for this stream 302 Addr page_addr = pageAddress(mystream->m_address); 303 Addr line_addr = makeLineAddress(mystream->m_address); 304 305 // insert a number of prefetches into the prefetch table 306 for (int k = 0; k < m_num_startup_pfs; k++) { 307 line_addr = makeNextStrideAddress(line_addr, stride); 308 // possibly stop prefetching at page boundaries 309 if (page_addr != pageAddress(line_addr)) { 310 numPagesCrossed++; 311 if (!m_prefetch_cross_pages) { 312 // deallocate this stream prefetcher 313 mystream->m_is_valid = false; 314 return; 315 } 316 } 317 318 // launch prefetch 319 numPrefetchRequested++; 320 DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr); 321 m_controller->enqueuePrefetch(line_addr, m_array[index].m_type); 322 } 323 324 // update the address to be the last address prefetched 325 mystream->m_address = line_addr; 326} 327 328PrefetchEntry * 329Prefetcher::getPrefetchEntry(Addr address, uint32_t &index) 330{ 331 // search all streams for a match 332 for (int i = 0; i < m_num_streams; i++) { 333 // search all the outstanding prefetches for this stream 334 if (m_array[i].m_is_valid) { 335 for (int j = 0; j < m_num_startup_pfs; j++) { 336 if (makeNextStrideAddress(m_array[i].m_address, 337 -(m_array[i].m_stride*j)) == address) { 338 return &(m_array[i]); 339 } 340 } 341 } 342 } 343 return NULL; 344} 345 346bool 347Prefetcher::accessUnitFilter(std::vector<Addr>& filter_table, 348 uint32_t *filter_hit, uint32_t &index, Addr address, 349 int stride, bool &alloc) 350{ 351 //reset the alloc flag 352 alloc = false; 353 354 Addr line_addr = makeLineAddress(address); 355 for (int i = 0; i < m_num_unit_filters; i++) { 356 if (filter_table[i] == line_addr) { 357 filter_table[i] = makeNextStrideAddress(filter_table[i], stride); 358 filter_hit[i]++; 359 if (filter_hit[i] >= m_train_misses) { 360 alloc = true; 361 } 362 return true; 363 } 364 } 365 366 // enter this address in the table 367 int local_index = index; 368 filter_table[local_index] = makeNextStrideAddress(line_addr, stride); 369 filter_hit[local_index] = 0; 370 local_index = local_index + 1; 371 if (local_index >= m_num_unit_filters) { 372 local_index = 0; 373 } 374 375 index = local_index; 376 return false; 377} 378 379bool 380Prefetcher::accessNonunitFilter(Addr address, int *stride, 381 bool &alloc) 382{ 383 //reset the alloc flag 384 alloc = false; 385 386 /// look for non-unit strides based on a (user-defined) page size 387 Addr page_addr = pageAddress(address); 388 Addr line_addr = makeLineAddress(address); 389 390 for (uint32_t i = 0; i < m_num_nonunit_filters; i++) { 391 if (pageAddress(m_nonunit_filter[i]) == page_addr) { 392 // hit in the non-unit filter 393 // compute the actual stride (for this reference) 394 int delta = line_addr - m_nonunit_filter[i]; 395 396 if (delta != 0) { 397 // no zero stride prefetches 398 // check that the stride matches (for the last N times) 399 if (delta == m_nonunit_stride[i]) { 400 // -> stride hit 401 // increment count (if > 2) allocate stream 402 m_nonunit_hit[i]++; 403 if (m_nonunit_hit[i] > m_train_misses) { 404 // This stride HAS to be the multiplicative constant of 405 // dataBlockBytes (bc makeNextStrideAddress is 406 // calculated based on this multiplicative constant!) 407 *stride = m_nonunit_stride[i] / 408 RubySystem::getBlockSizeBytes(); 409 410 // clear this filter entry 411 clearNonunitEntry(i); 412 alloc = true; 413 } 414 } else { 415 // delta didn't match ... reset m_nonunit_hit count for 416 // this entry 417 m_nonunit_hit[i] = 0; 418 } 419 420 // update the last address seen & the stride 421 m_nonunit_stride[i] = delta; 422 m_nonunit_filter[i] = line_addr; 423 return true; 424 } else { 425 return false; 426 } 427 } 428 } 429 430 // not found: enter this address in the table 431 m_nonunit_filter[m_nonunit_index] = line_addr; 432 m_nonunit_stride[m_nonunit_index] = 0; 433 m_nonunit_hit[m_nonunit_index] = 0; 434 435 m_nonunit_index = m_nonunit_index + 1; 436 if (m_nonunit_index >= m_num_nonunit_filters) { 437 m_nonunit_index = 0; 438 } 439 return false; 440} 441 442void 443Prefetcher::print(std::ostream& out) const 444{ 445 out << name() << " Prefetcher State\n"; 446 // print out unit filter 447 out << "unit table:\n"; 448 for (int i = 0; i < m_num_unit_filters; i++) { 449 out << m_unit_filter[i] << std::endl; 450 } 451 452 out << "negative table:\n"; 453 for (int i = 0; i < m_num_unit_filters; i++) { 454 out << m_negative_filter[i] << std::endl; 455 } 456 457 // print out non-unit stride filter 458 out << "non-unit table:\n"; 459 for (int i = 0; i < m_num_nonunit_filters; i++) { 460 out << m_nonunit_filter[i] << " " 461 << m_nonunit_stride[i] << " " 462 << m_nonunit_hit[i] << std::endl; 463 } 464 465 // print out allocated stream buffers 466 out << "streams:\n"; 467 for (int i = 0; i < m_num_streams; i++) { 468 out << m_array[i].m_address << " " 469 << m_array[i].m_stride << " " 470 << m_array[i].m_is_valid << " " 471 << m_array[i].m_use_time << std::endl; 472 } 473} 474 475Addr 476Prefetcher::pageAddress(Addr addr) const 477{ 478 return maskLowOrderBits(addr, m_page_shift); 479} 480