Prefetcher.cc revision 9507
1/* 2 * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include "debug/RubyPrefetcher.hh" 30#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh" 31#include "mem/ruby/structures/Prefetcher.hh" 32#include "mem/ruby/system/System.hh" 33 34Prefetcher* 35PrefetcherParams::create() 36{ 37 return new Prefetcher(this); 38} 39 40Prefetcher::Prefetcher(const Params *p) 41 : SimObject(p), m_num_streams(p->num_streams), 42 m_array(p->num_streams), m_train_misses(p->train_misses), 43 m_num_startup_pfs(p->num_startup_pfs), m_num_unit_filters(p->unit_filter), 44 m_num_nonunit_filters(p->nonunit_filter), 45 m_unit_filter(p->unit_filter, Address(0)), 46 m_negative_filter(p->unit_filter, Address(0)), 47 m_nonunit_filter(p->nonunit_filter, Address(0)), 48 m_prefetch_cross_pages(p->cross_page) 49{ 50 assert(m_num_streams > 0); 51 assert(m_num_startup_pfs <= MAX_PF_INFLIGHT); 52 53 // create +1 stride filter 54 m_unit_filter_index = 0; 55 m_unit_filter_hit = new uint32_t[m_num_unit_filters]; 56 for (uint32_t i =0; i < m_num_unit_filters; i++) { 57 m_unit_filter_hit[i] = 0; 58 } 59 60 // create -1 stride filter 61 m_negative_filter_index = 0; 62 m_negative_filter_hit = new uint32_t[m_num_unit_filters]; 63 for (int i =0; i < m_num_unit_filters; i++) { 64 m_negative_filter_hit[i] = 0; 65 } 66 67 // create nonunit stride filter 68 m_nonunit_index = 0; 69 m_nonunit_stride = new int[m_num_nonunit_filters]; 70 m_nonunit_hit = new uint32_t[m_num_nonunit_filters]; 71 for (int i =0; i < m_num_nonunit_filters; i++) { 72 m_nonunit_stride[i] = 0; 73 m_nonunit_hit[i] = 0; 74 } 75} 76 77Prefetcher::~Prefetcher() 78{ 79 delete m_unit_filter_hit; 80 delete m_negative_filter_hit; 81 delete m_nonunit_stride; 82 delete m_nonunit_hit; 83} 84 85void 86Prefetcher::regStats() 87{ 88 numMissObserved 89 .name(name() + ".miss_observed") 90 .desc("number of misses observed") 91 ; 92 93 numAllocatedStreams 94 .name(name() + ".allocated_streams") 95 .desc("number of streams allocated for prefetching") 96 ; 97 98 numPrefetchRequested 99 .name(name() + ".prefetches_requested") 100 .desc("number of prefetch requests made") 101 ; 102 103 numPrefetchAccepted 104 .name(name() + ".prefetches_accepted") 105 .desc("number of prefetch requests accepted") 106 ; 107 108 numDroppedPrefetches 109 .name(name() + ".dropped_prefetches") 110 .desc("number of prefetch requests dropped") 111 ; 112 113 numHits 114 .name(name() + ".hits") 115 .desc("number of prefetched blocks accessed") 116 ; 117 118 numPartialHits 119 .name(name() + ".partial_hits") 120 .desc("number of misses observed for a block being prefetched") 121 ; 122 123 numPagesCrossed 124 .name(name() + ".pages_crossed") 125 .desc("number of prefetches across pages") 126 ; 127 128 numMissedPrefetchedBlocks 129 .name(name() + ".misses_on_prefetched_blocks") 130 .desc("number of misses for blocks that were prefetched, yet missed") 131 ; 132} 133 134void 135Prefetcher::observeMiss(const Address& address, const RubyRequestType& type) 136{ 137 DPRINTF(RubyPrefetcher, "Observed miss for %s\n", address); 138 Address line_addr = line_address(address); 139 numMissObserved++; 140 141 // check to see if we have already issued a prefetch for this block 142 uint32_t index = 0; 143 PrefetchEntry *pfEntry = getPrefetchEntry(line_addr, index); 144 if (pfEntry != NULL) { 145 if (pfEntry->requestIssued[index]) { 146 if (pfEntry->requestCompleted[index]) { 147 // We prefetched too early and now the prefetch block no 148 // longer exists in the cache 149 numMissedPrefetchedBlocks++; 150 return; 151 } else { 152 // The controller has issued the prefetch request, 153 // but the request for the block arrived earlier. 154 numPartialHits++; 155 observePfHit(line_addr); 156 return; 157 } 158 } else { 159 // The request is still in the prefetch queue of the controller. 160 // Or was evicted because of other requests. 161 return; 162 } 163 } 164 165 // check to see if this address is in the unit stride filter 166 bool alloc = false; 167 bool hit = accessUnitFilter(m_unit_filter, m_unit_filter_hit, 168 m_unit_filter_index, line_addr, 1, alloc); 169 if (alloc) { 170 // allocate a new prefetch stream 171 initializeStream(line_addr, 1, getLRUindex(), type); 172 } 173 if (hit) { 174 DPRINTF(RubyPrefetcher, " *** hit in unit stride buffer\n"); 175 return; 176 } 177 178 hit = accessUnitFilter(m_negative_filter, m_negative_filter_hit, 179 m_negative_filter_index, line_addr, -1, alloc); 180 if (alloc) { 181 // allocate a new prefetch stream 182 initializeStream(line_addr, -1, getLRUindex(), type); 183 } 184 if (hit) { 185 DPRINTF(RubyPrefetcher, " *** hit in unit negative unit buffer\n"); 186 return; 187 } 188 189 // check to see if this address is in the non-unit stride filter 190 int stride = 0; // NULL value 191 hit = accessNonunitFilter(address, &stride, alloc); 192 if (alloc) { 193 assert(stride != 0); // ensure non-zero stride prefetches 194 initializeStream(line_addr, stride, getLRUindex(), type); 195 } 196 if (hit) { 197 DPRINTF(RubyPrefetcher, " *** hit in non-unit stride buffer\n"); 198 return; 199 } 200} 201 202void 203Prefetcher::observePfMiss(const Address& address) 204{ 205 numPartialHits++; 206 DPRINTF(RubyPrefetcher, "Observed partial hit for %s\n", address); 207 issueNextPrefetch(address, NULL); 208} 209 210void 211Prefetcher::observePfHit(const Address& address) 212{ 213 numHits++; 214 DPRINTF(RubyPrefetcher, "Observed hit for %s\n", address); 215 issueNextPrefetch(address, NULL); 216} 217 218void 219Prefetcher::issueNextPrefetch(const Address &address, PrefetchEntry *stream) 220{ 221 // get our corresponding stream fetcher 222 if (stream == NULL) { 223 uint32_t index = 0; 224 stream = getPrefetchEntry(address, index); 225 } 226 227 // if (for some reason), this stream is unallocated, return. 228 if (stream == NULL) { 229 DPRINTF(RubyPrefetcher, "Unallocated stream, returning\n"); 230 return; 231 } 232 233 // extend this prefetching stream by 1 (or more) 234 Address page_addr = page_address(stream->m_address); 235 Address line_addr = next_stride_address(stream->m_address, 236 stream->m_stride); 237 238 // possibly stop prefetching at page boundaries 239 if (page_addr != page_address(line_addr)) { 240 numPagesCrossed++; 241 if (!m_prefetch_cross_pages) { 242 // Deallocate the stream since we are not prefetching 243 // across page boundries 244 stream->m_is_valid = false; 245 return; 246 } 247 } 248 249 // launch next prefetch 250 stream->m_address = line_addr; 251 stream->m_use_time = m_controller->curCycle(); 252 DPRINTF(RubyPrefetcher, "Requesting prefetch for %s\n", line_addr); 253 m_controller->enqueuePrefetch(line_addr, stream->m_type); 254} 255 256uint32_t 257Prefetcher::getLRUindex(void) 258{ 259 uint32_t lru_index = 0; 260 Cycles lru_access = m_array[lru_index].m_use_time; 261 262 for (uint32_t i = 0; i < m_num_streams; i++) { 263 if (!m_array[i].m_is_valid) { 264 return i; 265 } 266 if (m_array[i].m_use_time < lru_access) { 267 lru_access = m_array[i].m_use_time; 268 lru_index = i; 269 } 270 } 271 272 return lru_index; 273} 274 275void 276Prefetcher::clearNonunitEntry(uint32_t index) 277{ 278 m_nonunit_filter[index].setAddress(0); 279 m_nonunit_stride[index] = 0; 280 m_nonunit_hit[index] = 0; 281} 282 283void 284Prefetcher::initializeStream(const Address& address, int stride, 285 uint32_t index, const RubyRequestType& type) 286{ 287 numAllocatedStreams++; 288 289 // initialize the stream prefetcher 290 PrefetchEntry *mystream = &(m_array[index]); 291 mystream->m_address = line_address(address); 292 mystream->m_stride = stride; 293 mystream->m_use_time = m_controller->curCycle(); 294 mystream->m_is_valid = true; 295 mystream->m_type = type; 296 297 // create a number of initial prefetches for this stream 298 Address page_addr = page_address(mystream->m_address); 299 Address line_addr = line_address(mystream->m_address); 300 Address prev_addr = line_addr; 301 302 // insert a number of prefetches into the prefetch table 303 for (int k = 0; k < m_num_startup_pfs; k++) { 304 line_addr = next_stride_address(line_addr, stride); 305 // possibly stop prefetching at page boundaries 306 if (page_addr != page_address(line_addr)) { 307 numPagesCrossed++; 308 if (!m_prefetch_cross_pages) { 309 // deallocate this stream prefetcher 310 mystream->m_is_valid = false; 311 return; 312 } 313 } 314 315 // launch prefetch 316 numPrefetchRequested++; 317 DPRINTF(RubyPrefetcher, "Requesting prefetch for %s\n", line_addr); 318 m_controller->enqueuePrefetch(line_addr, m_array[index].m_type); 319 prev_addr = line_addr; 320 } 321 322 // update the address to be the last address prefetched 323 mystream->m_address = line_addr; 324} 325 326PrefetchEntry * 327Prefetcher::getPrefetchEntry(const Address &address, uint32_t &index) 328{ 329 // search all streams for a match 330 for (int i = 0; i < m_num_streams; i++) { 331 // search all the outstanding prefetches for this stream 332 if (m_array[i].m_is_valid) { 333 for (int j = 0; j < m_num_startup_pfs; j++) { 334 if (next_stride_address(m_array[i].m_address, 335 -(m_array[i].m_stride*j)) == address) { 336 return &(m_array[i]); 337 } 338 } 339 } 340 } 341 return NULL; 342} 343 344bool 345Prefetcher::accessUnitFilter(std::vector<Address>& filter_table, 346 uint32_t *filter_hit, uint32_t &index, const Address &address, 347 int stride, bool &alloc) 348{ 349 //reset the alloc flag 350 alloc = false; 351 352 Address line_addr = line_address(address); 353 for (int i = 0; i < m_num_unit_filters; i++) { 354 if (filter_table[i] == line_addr) { 355 filter_table[i] = next_stride_address(filter_table[i], stride); 356 filter_hit[i]++; 357 if (filter_hit[i] >= m_train_misses) { 358 alloc = true; 359 } 360 return true; 361 } 362 } 363 364 // enter this address in the table 365 int local_index = index; 366 filter_table[local_index] = next_stride_address(line_addr, stride); 367 filter_hit[local_index] = 0; 368 local_index = local_index + 1; 369 if (local_index >= m_num_unit_filters) { 370 local_index = 0; 371 } 372 373 index = local_index; 374 return false; 375} 376 377bool 378Prefetcher::accessNonunitFilter(const Address& address, int *stride, 379 bool &alloc) 380{ 381 //reset the alloc flag 382 alloc = false; 383 384 /// look for non-unit strides based on a (user-defined) page size 385 Address page_addr = page_address(address); 386 Address line_addr = line_address(address); 387 388 for (uint32_t i = 0; i < m_num_nonunit_filters; i++) { 389 if (page_address(m_nonunit_filter[i]) == page_addr) { 390 // hit in the non-unit filter 391 // compute the actual stride (for this reference) 392 int delta = line_addr.getAddress() - m_nonunit_filter[i].getAddress(); 393 394 if (delta != 0) { 395 // no zero stride prefetches 396 // check that the stride matches (for the last N times) 397 if (delta == m_nonunit_stride[i]) { 398 // -> stride hit 399 // increment count (if > 2) allocate stream 400 m_nonunit_hit[i]++; 401 if (m_nonunit_hit[i] > m_train_misses) { 402 //This stride HAS to be the multiplicative constant of 403 //dataBlockBytes (bc next_stride_address is calculated based 404 //on this multiplicative constant!) 405 *stride = m_nonunit_stride[i]/RubySystem::getBlockSizeBytes(); 406 407 // clear this filter entry 408 clearNonunitEntry(i); 409 alloc = true; 410 } 411 } else { 412 // delta didn't match ... reset m_nonunit_hit count for this entry 413 m_nonunit_hit[i] = 0; 414 } 415 416 // update the last address seen & the stride 417 m_nonunit_stride[i] = delta; 418 m_nonunit_filter[i] = line_addr; 419 return true; 420 } else { 421 return false; 422 } 423 } 424 } 425 426 // not found: enter this address in the table 427 m_nonunit_filter[m_nonunit_index] = line_addr; 428 m_nonunit_stride[m_nonunit_index] = 0; 429 m_nonunit_hit[m_nonunit_index] = 0; 430 431 m_nonunit_index = m_nonunit_index + 1; 432 if (m_nonunit_index >= m_num_nonunit_filters) { 433 m_nonunit_index = 0; 434 } 435 return false; 436} 437 438void 439Prefetcher::print(std::ostream& out) const 440{ 441 out << name() << " Prefetcher State\n"; 442 // print out unit filter 443 out << "unit table:\n"; 444 for (int i = 0; i < m_num_unit_filters; i++) { 445 out << m_unit_filter[i] << std::endl; 446 } 447 448 out << "negative table:\n"; 449 for (int i = 0; i < m_num_unit_filters; i++) { 450 out << m_negative_filter[i] << std::endl; 451 } 452 453 // print out non-unit stride filter 454 out << "non-unit table:\n"; 455 for (int i = 0; i < m_num_nonunit_filters; i++) { 456 out << m_nonunit_filter[i] << " " 457 << m_nonunit_stride[i] << " " 458 << m_nonunit_hit[i] << std::endl; 459 } 460 461 // print out allocated stream buffers 462 out << "streams:\n"; 463 for (int i = 0; i < m_num_streams; i++) { 464 out << m_array[i].m_address << " " 465 << m_array[i].m_stride << " " 466 << m_array[i].m_is_valid << " " 467 << m_array[i].m_use_time << std::endl; 468 } 469} 470