Prefetcher.cc revision 11430:bd1c6789c33f
1/*
2 * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "debug/RubyPrefetcher.hh"
30#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
31#include "mem/ruby/structures/Prefetcher.hh"
32#include "mem/ruby/system/RubySystem.hh"
33
34Prefetcher*
35PrefetcherParams::create()
36{
37    return new Prefetcher(this);
38}
39
40Prefetcher::Prefetcher(const Params *p)
41    : SimObject(p), m_num_streams(p->num_streams),
42    m_array(p->num_streams), m_train_misses(p->train_misses),
43    m_num_startup_pfs(p->num_startup_pfs), m_num_unit_filters(p->unit_filter),
44    m_num_nonunit_filters(p->nonunit_filter),
45    m_unit_filter(p->unit_filter, 0),
46    m_negative_filter(p->unit_filter, 0),
47    m_nonunit_filter(p->nonunit_filter, 0),
48    m_prefetch_cross_pages(p->cross_page),
49    m_page_shift(p->sys->getPageShift())
50{
51    assert(m_num_streams > 0);
52    assert(m_num_startup_pfs <= MAX_PF_INFLIGHT);
53
54    // create +1 stride filter
55    m_unit_filter_index = 0;
56    m_unit_filter_hit = new uint32_t[m_num_unit_filters];
57    for (uint32_t i =0; i < m_num_unit_filters; i++) {
58        m_unit_filter_hit[i] = 0;
59    }
60
61    // create -1 stride filter
62    m_negative_filter_index = 0;
63    m_negative_filter_hit = new uint32_t[m_num_unit_filters];
64    for (int i =0; i < m_num_unit_filters; i++) {
65        m_negative_filter_hit[i] = 0;
66    }
67
68    // create nonunit stride filter
69    m_nonunit_index = 0;
70    m_nonunit_stride = new int[m_num_nonunit_filters];
71    m_nonunit_hit    = new uint32_t[m_num_nonunit_filters];
72    for (int i =0; i < m_num_nonunit_filters; i++) {
73        m_nonunit_stride[i] = 0;
74        m_nonunit_hit[i]    = 0;
75    }
76}
77
78Prefetcher::~Prefetcher()
79{
80    delete m_unit_filter_hit;
81    delete m_negative_filter_hit;
82    delete m_nonunit_stride;
83    delete m_nonunit_hit;
84}
85
86void
87Prefetcher::regStats()
88{
89    numMissObserved
90        .name(name() + ".miss_observed")
91        .desc("number of misses observed")
92        ;
93
94    numAllocatedStreams
95        .name(name() + ".allocated_streams")
96        .desc("number of streams allocated for prefetching")
97        ;
98
99    numPrefetchRequested
100        .name(name() + ".prefetches_requested")
101        .desc("number of prefetch requests made")
102        ;
103
104    numPrefetchAccepted
105        .name(name() + ".prefetches_accepted")
106        .desc("number of prefetch requests accepted")
107        ;
108
109    numDroppedPrefetches
110        .name(name() + ".dropped_prefetches")
111        .desc("number of prefetch requests dropped")
112        ;
113
114    numHits
115        .name(name() + ".hits")
116        .desc("number of prefetched blocks accessed")
117        ;
118
119    numPartialHits
120        .name(name() + ".partial_hits")
121        .desc("number of misses observed for a block being prefetched")
122        ;
123
124    numPagesCrossed
125        .name(name() + ".pages_crossed")
126        .desc("number of prefetches across pages")
127        ;
128
129    numMissedPrefetchedBlocks
130        .name(name() + ".misses_on_prefetched_blocks")
131        .desc("number of misses for blocks that were prefetched, yet missed")
132        ;
133}
134
135void
136Prefetcher::observeMiss(Addr address, const RubyRequestType& type)
137{
138    DPRINTF(RubyPrefetcher, "Observed miss for %#x\n", address);
139    Addr line_addr = makeLineAddress(address);
140    numMissObserved++;
141
142    // check to see if we have already issued a prefetch for this block
143    uint32_t index = 0;
144    PrefetchEntry *pfEntry = getPrefetchEntry(line_addr, index);
145    if (pfEntry != NULL) {
146        if (pfEntry->requestIssued[index]) {
147            if (pfEntry->requestCompleted[index]) {
148                // We prefetched too early and now the prefetch block no
149                // longer exists in the cache
150                numMissedPrefetchedBlocks++;
151                return;
152            } else {
153                // The controller has issued the prefetch request,
154                // but the request for the block arrived earlier.
155                numPartialHits++;
156                observePfHit(line_addr);
157                return;
158            }
159        } else {
160            // The request is still in the prefetch queue of the controller.
161            // Or was evicted because of other requests.
162            return;
163        }
164    }
165
166    // check to see if this address is in the unit stride filter
167    bool alloc = false;
168    bool hit = accessUnitFilter(m_unit_filter, m_unit_filter_hit,
169                                m_unit_filter_index, line_addr, 1, alloc);
170    if (alloc) {
171        // allocate a new prefetch stream
172        initializeStream(line_addr, 1, getLRUindex(), type);
173    }
174    if (hit) {
175        DPRINTF(RubyPrefetcher, "  *** hit in unit stride buffer\n");
176        return;
177    }
178
179    hit = accessUnitFilter(m_negative_filter, m_negative_filter_hit,
180        m_negative_filter_index, line_addr, -1, alloc);
181    if (alloc) {
182        // allocate a new prefetch stream
183        initializeStream(line_addr, -1, getLRUindex(), type);
184    }
185    if (hit) {
186        DPRINTF(RubyPrefetcher, "  *** hit in unit negative unit buffer\n");
187        return;
188    }
189
190    // check to see if this address is in the non-unit stride filter
191    int stride = 0;  // NULL value
192    hit = accessNonunitFilter(address, &stride, alloc);
193    if (alloc) {
194        assert(stride != 0);  // ensure non-zero stride prefetches
195        initializeStream(line_addr, stride, getLRUindex(), type);
196    }
197    if (hit) {
198        DPRINTF(RubyPrefetcher, "  *** hit in non-unit stride buffer\n");
199        return;
200    }
201}
202
203void
204Prefetcher::observePfMiss(Addr address)
205{
206    numPartialHits++;
207    DPRINTF(RubyPrefetcher, "Observed partial hit for %#x\n", address);
208    issueNextPrefetch(address, NULL);
209}
210
211void
212Prefetcher::observePfHit(Addr address)
213{
214    numHits++;
215    DPRINTF(RubyPrefetcher, "Observed hit for %#x\n", address);
216    issueNextPrefetch(address, NULL);
217}
218
219void
220Prefetcher::issueNextPrefetch(Addr address, PrefetchEntry *stream)
221{
222    // get our corresponding stream fetcher
223    if (stream == NULL) {
224        uint32_t index = 0;
225        stream = getPrefetchEntry(address, index);
226    }
227
228    // if (for some reason), this stream is unallocated, return.
229    if (stream == NULL) {
230        DPRINTF(RubyPrefetcher, "Unallocated stream, returning\n");
231        return;
232    }
233
234    // extend this prefetching stream by 1 (or more)
235    Addr page_addr = pageAddress(stream->m_address);
236    Addr line_addr = makeNextStrideAddress(stream->m_address,
237                                         stream->m_stride);
238
239    // possibly stop prefetching at page boundaries
240    if (page_addr != pageAddress(line_addr)) {
241        numPagesCrossed++;
242        if (!m_prefetch_cross_pages) {
243            // Deallocate the stream since we are not prefetching
244            // across page boundries
245            stream->m_is_valid = false;
246            return;
247        }
248    }
249
250    // launch next prefetch
251    stream->m_address = line_addr;
252    stream->m_use_time = m_controller->curCycle();
253    DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr);
254    m_controller->enqueuePrefetch(line_addr, stream->m_type);
255}
256
257uint32_t
258Prefetcher::getLRUindex(void)
259{
260    uint32_t lru_index = 0;
261    Cycles lru_access = m_array[lru_index].m_use_time;
262
263    for (uint32_t i = 0; i < m_num_streams; i++) {
264        if (!m_array[i].m_is_valid) {
265            return i;
266        }
267        if (m_array[i].m_use_time < lru_access) {
268            lru_access = m_array[i].m_use_time;
269            lru_index = i;
270        }
271    }
272
273    return lru_index;
274}
275
276void
277Prefetcher::clearNonunitEntry(uint32_t index)
278{
279    m_nonunit_filter[index] = 0;
280    m_nonunit_stride[index] = 0;
281    m_nonunit_hit[index]    = 0;
282}
283
284void
285Prefetcher::initializeStream(Addr address, int stride,
286     uint32_t index, const RubyRequestType& type)
287{
288    numAllocatedStreams++;
289
290    // initialize the stream prefetcher
291    PrefetchEntry *mystream = &(m_array[index]);
292    mystream->m_address = makeLineAddress(address);
293    mystream->m_stride = stride;
294    mystream->m_use_time = m_controller->curCycle();
295    mystream->m_is_valid = true;
296    mystream->m_type = type;
297
298    // create a number of initial prefetches for this stream
299    Addr page_addr = pageAddress(mystream->m_address);
300    Addr line_addr = makeLineAddress(mystream->m_address);
301
302    // insert a number of prefetches into the prefetch table
303    for (int k = 0; k < m_num_startup_pfs; k++) {
304        line_addr = makeNextStrideAddress(line_addr, stride);
305        // possibly stop prefetching at page boundaries
306        if (page_addr != pageAddress(line_addr)) {
307            numPagesCrossed++;
308            if (!m_prefetch_cross_pages) {
309                // deallocate this stream prefetcher
310                mystream->m_is_valid = false;
311                return;
312            }
313        }
314
315        // launch prefetch
316        numPrefetchRequested++;
317        DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr);
318        m_controller->enqueuePrefetch(line_addr, m_array[index].m_type);
319    }
320
321    // update the address to be the last address prefetched
322    mystream->m_address = line_addr;
323}
324
325PrefetchEntry *
326Prefetcher::getPrefetchEntry(Addr address, uint32_t &index)
327{
328    // search all streams for a match
329    for (int i = 0; i < m_num_streams; i++) {
330        // search all the outstanding prefetches for this stream
331        if (m_array[i].m_is_valid) {
332            for (int j = 0; j < m_num_startup_pfs; j++) {
333                if (makeNextStrideAddress(m_array[i].m_address,
334                    -(m_array[i].m_stride*j)) == address) {
335                    return &(m_array[i]);
336                }
337            }
338        }
339    }
340    return NULL;
341}
342
343bool
344Prefetcher::accessUnitFilter(std::vector<Addr>& filter_table,
345    uint32_t *filter_hit, uint32_t &index, Addr address,
346    int stride, bool &alloc)
347{
348    //reset the alloc flag
349    alloc = false;
350
351    Addr line_addr = makeLineAddress(address);
352    for (int i = 0; i < m_num_unit_filters; i++) {
353        if (filter_table[i] == line_addr) {
354            filter_table[i] = makeNextStrideAddress(filter_table[i], stride);
355            filter_hit[i]++;
356            if (filter_hit[i] >= m_train_misses) {
357                alloc = true;
358            }
359            return true;
360        }
361    }
362
363    // enter this address in the table
364    int local_index = index;
365    filter_table[local_index] = makeNextStrideAddress(line_addr, stride);
366    filter_hit[local_index] = 0;
367    local_index = local_index + 1;
368    if (local_index >= m_num_unit_filters) {
369        local_index = 0;
370    }
371
372    index = local_index;
373    return false;
374}
375
376bool
377Prefetcher::accessNonunitFilter(Addr address, int *stride,
378    bool &alloc)
379{
380    //reset the alloc flag
381    alloc = false;
382
383    /// look for non-unit strides based on a (user-defined) page size
384    Addr page_addr = pageAddress(address);
385    Addr line_addr = makeLineAddress(address);
386
387    for (uint32_t i = 0; i < m_num_nonunit_filters; i++) {
388        if (pageAddress(m_nonunit_filter[i]) == page_addr) {
389            // hit in the non-unit filter
390            // compute the actual stride (for this reference)
391            int delta = line_addr - m_nonunit_filter[i];
392
393            if (delta != 0) {
394                // no zero stride prefetches
395                // check that the stride matches (for the last N times)
396                if (delta == m_nonunit_stride[i]) {
397                    // -> stride hit
398                    // increment count (if > 2) allocate stream
399                    m_nonunit_hit[i]++;
400                    if (m_nonunit_hit[i] > m_train_misses) {
401                        // This stride HAS to be the multiplicative constant of
402                        // dataBlockBytes (bc makeNextStrideAddress is
403                        // calculated based on this multiplicative constant!)
404                        *stride = m_nonunit_stride[i] /
405                                    RubySystem::getBlockSizeBytes();
406
407                        // clear this filter entry
408                        clearNonunitEntry(i);
409                        alloc = true;
410                    }
411                } else {
412                    // delta didn't match ... reset m_nonunit_hit count for
413                    // this entry
414                    m_nonunit_hit[i] = 0;
415                }
416
417                // update the last address seen & the stride
418                m_nonunit_stride[i] = delta;
419                m_nonunit_filter[i] = line_addr;
420                return true;
421            } else {
422                return false;
423            }
424        }
425    }
426
427    // not found: enter this address in the table
428    m_nonunit_filter[m_nonunit_index] = line_addr;
429    m_nonunit_stride[m_nonunit_index] = 0;
430    m_nonunit_hit[m_nonunit_index]    = 0;
431
432    m_nonunit_index = m_nonunit_index + 1;
433    if (m_nonunit_index >= m_num_nonunit_filters) {
434        m_nonunit_index = 0;
435    }
436    return false;
437}
438
439void
440Prefetcher::print(std::ostream& out) const
441{
442    out << name() << " Prefetcher State\n";
443    // print out unit filter
444    out << "unit table:\n";
445    for (int i = 0; i < m_num_unit_filters; i++) {
446        out << m_unit_filter[i] << std::endl;
447    }
448
449    out << "negative table:\n";
450    for (int i = 0; i < m_num_unit_filters; i++) {
451        out << m_negative_filter[i] << std::endl;
452    }
453
454    // print out non-unit stride filter
455    out << "non-unit table:\n";
456    for (int i = 0; i < m_num_nonunit_filters; i++) {
457        out << m_nonunit_filter[i] << " "
458            << m_nonunit_stride[i] << " "
459            << m_nonunit_hit[i] << std::endl;
460    }
461
462    // print out allocated stream buffers
463    out << "streams:\n";
464    for (int i = 0; i < m_num_streams; i++) {
465        out << m_array[i].m_address << " "
466            << m_array[i].m_stride << " "
467            << m_array[i].m_is_valid << " "
468            << m_array[i].m_use_time << std::endl;
469    }
470}
471
472Addr
473Prefetcher::pageAddress(Addr addr) const
474{
475    return maskLowOrderBits(addr, m_page_shift);
476}
477