Deleted Added
sdiff udiff text old ( 11523:81332eb10367 ) new ( 11793:ef606668d247 )
full compact
1/*
2 * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "mem/ruby/structures/Prefetcher.hh"
30
31#include "debug/RubyPrefetcher.hh"
32#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
33#include "mem/ruby/system/RubySystem.hh"
34
35Prefetcher*
36PrefetcherParams::create()
37{
38 return new Prefetcher(this);
39}
40
41Prefetcher::Prefetcher(const Params *p)
42 : SimObject(p), m_num_streams(p->num_streams),
43 m_array(p->num_streams), m_train_misses(p->train_misses),
44 m_num_startup_pfs(p->num_startup_pfs), m_num_unit_filters(p->unit_filter),
45 m_num_nonunit_filters(p->nonunit_filter),
46 m_unit_filter(p->unit_filter, 0),
47 m_negative_filter(p->unit_filter, 0),
48 m_nonunit_filter(p->nonunit_filter, 0),
49 m_prefetch_cross_pages(p->cross_page),
50 m_page_shift(p->sys->getPageShift())
51{
52 assert(m_num_streams > 0);
53 assert(m_num_startup_pfs <= MAX_PF_INFLIGHT);
54
55 // create +1 stride filter
56 m_unit_filter_index = 0;
57 m_unit_filter_hit = new uint32_t[m_num_unit_filters];
58 for (uint32_t i =0; i < m_num_unit_filters; i++) {
59 m_unit_filter_hit[i] = 0;
60 }
61
62 // create -1 stride filter
63 m_negative_filter_index = 0;
64 m_negative_filter_hit = new uint32_t[m_num_unit_filters];
65 for (int i =0; i < m_num_unit_filters; i++) {
66 m_negative_filter_hit[i] = 0;
67 }
68
69 // create nonunit stride filter
70 m_nonunit_index = 0;
71 m_nonunit_stride = new int[m_num_nonunit_filters];
72 m_nonunit_hit = new uint32_t[m_num_nonunit_filters];
73 for (int i =0; i < m_num_nonunit_filters; i++) {
74 m_nonunit_stride[i] = 0;
75 m_nonunit_hit[i] = 0;
76 }
77}
78
79Prefetcher::~Prefetcher()
80{
81 delete m_unit_filter_hit;
82 delete m_negative_filter_hit;
83 delete m_nonunit_stride;
84 delete m_nonunit_hit;
85}
86
87void
88Prefetcher::regStats()
89{
90 SimObject::regStats();
91
92 numMissObserved
93 .name(name() + ".miss_observed")
94 .desc("number of misses observed")
95 ;
96
97 numAllocatedStreams
98 .name(name() + ".allocated_streams")
99 .desc("number of streams allocated for prefetching")
100 ;
101
102 numPrefetchRequested
103 .name(name() + ".prefetches_requested")
104 .desc("number of prefetch requests made")
105 ;
106
107 numPrefetchAccepted
108 .name(name() + ".prefetches_accepted")
109 .desc("number of prefetch requests accepted")
110 ;
111
112 numDroppedPrefetches
113 .name(name() + ".dropped_prefetches")
114 .desc("number of prefetch requests dropped")
115 ;
116
117 numHits
118 .name(name() + ".hits")
119 .desc("number of prefetched blocks accessed")
120 ;
121
122 numPartialHits
123 .name(name() + ".partial_hits")
124 .desc("number of misses observed for a block being prefetched")
125 ;
126
127 numPagesCrossed
128 .name(name() + ".pages_crossed")
129 .desc("number of prefetches across pages")
130 ;
131
132 numMissedPrefetchedBlocks
133 .name(name() + ".misses_on_prefetched_blocks")
134 .desc("number of misses for blocks that were prefetched, yet missed")
135 ;
136}
137
138void
139Prefetcher::observeMiss(Addr address, const RubyRequestType& type)
140{
141 DPRINTF(RubyPrefetcher, "Observed miss for %#x\n", address);
142 Addr line_addr = makeLineAddress(address);
143 numMissObserved++;
144
145 // check to see if we have already issued a prefetch for this block
146 uint32_t index = 0;
147 PrefetchEntry *pfEntry = getPrefetchEntry(line_addr, index);
148 if (pfEntry != NULL) {
149 if (pfEntry->requestIssued[index]) {
150 if (pfEntry->requestCompleted[index]) {
151 // We prefetched too early and now the prefetch block no
152 // longer exists in the cache
153 numMissedPrefetchedBlocks++;
154 return;
155 } else {
156 // The controller has issued the prefetch request,
157 // but the request for the block arrived earlier.
158 numPartialHits++;
159 observePfHit(line_addr);
160 return;
161 }
162 } else {
163 // The request is still in the prefetch queue of the controller.
164 // Or was evicted because of other requests.
165 return;
166 }
167 }
168
169 // check to see if this address is in the unit stride filter
170 bool alloc = false;
171 bool hit = accessUnitFilter(m_unit_filter, m_unit_filter_hit,
172 m_unit_filter_index, line_addr, 1, alloc);
173 if (alloc) {
174 // allocate a new prefetch stream
175 initializeStream(line_addr, 1, getLRUindex(), type);
176 }
177 if (hit) {
178 DPRINTF(RubyPrefetcher, " *** hit in unit stride buffer\n");
179 return;
180 }
181
182 hit = accessUnitFilter(m_negative_filter, m_negative_filter_hit,
183 m_negative_filter_index, line_addr, -1, alloc);
184 if (alloc) {
185 // allocate a new prefetch stream
186 initializeStream(line_addr, -1, getLRUindex(), type);
187 }
188 if (hit) {
189 DPRINTF(RubyPrefetcher, " *** hit in unit negative unit buffer\n");
190 return;
191 }
192
193 // check to see if this address is in the non-unit stride filter
194 int stride = 0; // NULL value
195 hit = accessNonunitFilter(address, &stride, alloc);
196 if (alloc) {
197 assert(stride != 0); // ensure non-zero stride prefetches
198 initializeStream(line_addr, stride, getLRUindex(), type);
199 }
200 if (hit) {
201 DPRINTF(RubyPrefetcher, " *** hit in non-unit stride buffer\n");
202 return;
203 }
204}
205
206void
207Prefetcher::observePfMiss(Addr address)
208{
209 numPartialHits++;
210 DPRINTF(RubyPrefetcher, "Observed partial hit for %#x\n", address);
211 issueNextPrefetch(address, NULL);
212}
213
214void
215Prefetcher::observePfHit(Addr address)
216{
217 numHits++;
218 DPRINTF(RubyPrefetcher, "Observed hit for %#x\n", address);
219 issueNextPrefetch(address, NULL);
220}
221
222void
223Prefetcher::issueNextPrefetch(Addr address, PrefetchEntry *stream)
224{
225 // get our corresponding stream fetcher
226 if (stream == NULL) {
227 uint32_t index = 0;
228 stream = getPrefetchEntry(address, index);
229 }
230
231 // if (for some reason), this stream is unallocated, return.
232 if (stream == NULL) {
233 DPRINTF(RubyPrefetcher, "Unallocated stream, returning\n");
234 return;
235 }
236
237 // extend this prefetching stream by 1 (or more)
238 Addr page_addr = pageAddress(stream->m_address);
239 Addr line_addr = makeNextStrideAddress(stream->m_address,
240 stream->m_stride);
241
242 // possibly stop prefetching at page boundaries
243 if (page_addr != pageAddress(line_addr)) {
244 numPagesCrossed++;
245 if (!m_prefetch_cross_pages) {
246 // Deallocate the stream since we are not prefetching
247 // across page boundries
248 stream->m_is_valid = false;
249 return;
250 }
251 }
252
253 // launch next prefetch
254 stream->m_address = line_addr;
255 stream->m_use_time = m_controller->curCycle();
256 DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr);
257 m_controller->enqueuePrefetch(line_addr, stream->m_type);
258}
259
260uint32_t
261Prefetcher::getLRUindex(void)
262{
263 uint32_t lru_index = 0;
264 Cycles lru_access = m_array[lru_index].m_use_time;
265
266 for (uint32_t i = 0; i < m_num_streams; i++) {
267 if (!m_array[i].m_is_valid) {
268 return i;
269 }
270 if (m_array[i].m_use_time < lru_access) {
271 lru_access = m_array[i].m_use_time;
272 lru_index = i;
273 }
274 }
275
276 return lru_index;
277}
278
279void
280Prefetcher::clearNonunitEntry(uint32_t index)
281{
282 m_nonunit_filter[index] = 0;
283 m_nonunit_stride[index] = 0;
284 m_nonunit_hit[index] = 0;
285}
286
287void
288Prefetcher::initializeStream(Addr address, int stride,
289 uint32_t index, const RubyRequestType& type)
290{
291 numAllocatedStreams++;
292
293 // initialize the stream prefetcher
294 PrefetchEntry *mystream = &(m_array[index]);
295 mystream->m_address = makeLineAddress(address);
296 mystream->m_stride = stride;
297 mystream->m_use_time = m_controller->curCycle();
298 mystream->m_is_valid = true;
299 mystream->m_type = type;
300
301 // create a number of initial prefetches for this stream
302 Addr page_addr = pageAddress(mystream->m_address);
303 Addr line_addr = makeLineAddress(mystream->m_address);
304
305 // insert a number of prefetches into the prefetch table
306 for (int k = 0; k < m_num_startup_pfs; k++) {
307 line_addr = makeNextStrideAddress(line_addr, stride);
308 // possibly stop prefetching at page boundaries
309 if (page_addr != pageAddress(line_addr)) {
310 numPagesCrossed++;
311 if (!m_prefetch_cross_pages) {
312 // deallocate this stream prefetcher
313 mystream->m_is_valid = false;
314 return;
315 }
316 }
317
318 // launch prefetch
319 numPrefetchRequested++;
320 DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr);
321 m_controller->enqueuePrefetch(line_addr, m_array[index].m_type);
322 }
323
324 // update the address to be the last address prefetched
325 mystream->m_address = line_addr;
326}
327
328PrefetchEntry *
329Prefetcher::getPrefetchEntry(Addr address, uint32_t &index)
330{
331 // search all streams for a match
332 for (int i = 0; i < m_num_streams; i++) {
333 // search all the outstanding prefetches for this stream
334 if (m_array[i].m_is_valid) {
335 for (int j = 0; j < m_num_startup_pfs; j++) {
336 if (makeNextStrideAddress(m_array[i].m_address,
337 -(m_array[i].m_stride*j)) == address) {
338 return &(m_array[i]);
339 }
340 }
341 }
342 }
343 return NULL;
344}
345
346bool
347Prefetcher::accessUnitFilter(std::vector<Addr>& filter_table,
348 uint32_t *filter_hit, uint32_t &index, Addr address,
349 int stride, bool &alloc)
350{
351 //reset the alloc flag
352 alloc = false;
353
354 Addr line_addr = makeLineAddress(address);
355 for (int i = 0; i < m_num_unit_filters; i++) {
356 if (filter_table[i] == line_addr) {
357 filter_table[i] = makeNextStrideAddress(filter_table[i], stride);
358 filter_hit[i]++;
359 if (filter_hit[i] >= m_train_misses) {
360 alloc = true;
361 }
362 return true;
363 }
364 }
365
366 // enter this address in the table
367 int local_index = index;
368 filter_table[local_index] = makeNextStrideAddress(line_addr, stride);
369 filter_hit[local_index] = 0;
370 local_index = local_index + 1;
371 if (local_index >= m_num_unit_filters) {
372 local_index = 0;
373 }
374
375 index = local_index;
376 return false;
377}
378
379bool
380Prefetcher::accessNonunitFilter(Addr address, int *stride,
381 bool &alloc)
382{
383 //reset the alloc flag
384 alloc = false;
385
386 /// look for non-unit strides based on a (user-defined) page size
387 Addr page_addr = pageAddress(address);
388 Addr line_addr = makeLineAddress(address);
389
390 for (uint32_t i = 0; i < m_num_nonunit_filters; i++) {
391 if (pageAddress(m_nonunit_filter[i]) == page_addr) {
392 // hit in the non-unit filter
393 // compute the actual stride (for this reference)
394 int delta = line_addr - m_nonunit_filter[i];
395
396 if (delta != 0) {
397 // no zero stride prefetches
398 // check that the stride matches (for the last N times)
399 if (delta == m_nonunit_stride[i]) {
400 // -> stride hit
401 // increment count (if > 2) allocate stream
402 m_nonunit_hit[i]++;
403 if (m_nonunit_hit[i] > m_train_misses) {
404 // This stride HAS to be the multiplicative constant of
405 // dataBlockBytes (bc makeNextStrideAddress is
406 // calculated based on this multiplicative constant!)
407 *stride = m_nonunit_stride[i] /
408 RubySystem::getBlockSizeBytes();
409
410 // clear this filter entry
411 clearNonunitEntry(i);
412 alloc = true;
413 }
414 } else {
415 // delta didn't match ... reset m_nonunit_hit count for
416 // this entry
417 m_nonunit_hit[i] = 0;
418 }
419
420 // update the last address seen & the stride
421 m_nonunit_stride[i] = delta;
422 m_nonunit_filter[i] = line_addr;
423 return true;
424 } else {
425 return false;
426 }
427 }
428 }
429
430 // not found: enter this address in the table
431 m_nonunit_filter[m_nonunit_index] = line_addr;
432 m_nonunit_stride[m_nonunit_index] = 0;
433 m_nonunit_hit[m_nonunit_index] = 0;
434
435 m_nonunit_index = m_nonunit_index + 1;
436 if (m_nonunit_index >= m_num_nonunit_filters) {
437 m_nonunit_index = 0;
438 }
439 return false;
440}
441
442void
443Prefetcher::print(std::ostream& out) const
444{
445 out << name() << " Prefetcher State\n";
446 // print out unit filter
447 out << "unit table:\n";
448 for (int i = 0; i < m_num_unit_filters; i++) {
449 out << m_unit_filter[i] << std::endl;
450 }
451
452 out << "negative table:\n";
453 for (int i = 0; i < m_num_unit_filters; i++) {
454 out << m_negative_filter[i] << std::endl;
455 }
456
457 // print out non-unit stride filter
458 out << "non-unit table:\n";
459 for (int i = 0; i < m_num_nonunit_filters; i++) {
460 out << m_nonunit_filter[i] << " "
461 << m_nonunit_stride[i] << " "
462 << m_nonunit_hit[i] << std::endl;
463 }
464
465 // print out allocated stream buffers
466 out << "streams:\n";
467 for (int i = 0; i < m_num_streams; i++) {
468 out << m_array[i].m_address << " "
469 << m_array[i].m_stride << " "
470 << m_array[i].m_is_valid << " "
471 << m_array[i].m_use_time << std::endl;
472 }
473}
474
475Addr
476Prefetcher::pageAddress(Addr addr) const
477{
478 return maskLowOrderBits(addr, m_page_shift);
479}