Prefetcher.cc revision 11523:81332eb10367
1/*
2 * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "debug/RubyPrefetcher.hh"
30#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
31#include "mem/ruby/structures/Prefetcher.hh"
32#include "mem/ruby/system/RubySystem.hh"
33
34Prefetcher*
35PrefetcherParams::create()
36{
37    return new Prefetcher(this);
38}
39
40Prefetcher::Prefetcher(const Params *p)
41    : SimObject(p), m_num_streams(p->num_streams),
42    m_array(p->num_streams), m_train_misses(p->train_misses),
43    m_num_startup_pfs(p->num_startup_pfs), m_num_unit_filters(p->unit_filter),
44    m_num_nonunit_filters(p->nonunit_filter),
45    m_unit_filter(p->unit_filter, 0),
46    m_negative_filter(p->unit_filter, 0),
47    m_nonunit_filter(p->nonunit_filter, 0),
48    m_prefetch_cross_pages(p->cross_page),
49    m_page_shift(p->sys->getPageShift())
50{
51    assert(m_num_streams > 0);
52    assert(m_num_startup_pfs <= MAX_PF_INFLIGHT);
53
54    // create +1 stride filter
55    m_unit_filter_index = 0;
56    m_unit_filter_hit = new uint32_t[m_num_unit_filters];
57    for (uint32_t i =0; i < m_num_unit_filters; i++) {
58        m_unit_filter_hit[i] = 0;
59    }
60
61    // create -1 stride filter
62    m_negative_filter_index = 0;
63    m_negative_filter_hit = new uint32_t[m_num_unit_filters];
64    for (int i =0; i < m_num_unit_filters; i++) {
65        m_negative_filter_hit[i] = 0;
66    }
67
68    // create nonunit stride filter
69    m_nonunit_index = 0;
70    m_nonunit_stride = new int[m_num_nonunit_filters];
71    m_nonunit_hit    = new uint32_t[m_num_nonunit_filters];
72    for (int i =0; i < m_num_nonunit_filters; i++) {
73        m_nonunit_stride[i] = 0;
74        m_nonunit_hit[i]    = 0;
75    }
76}
77
78Prefetcher::~Prefetcher()
79{
80    delete m_unit_filter_hit;
81    delete m_negative_filter_hit;
82    delete m_nonunit_stride;
83    delete m_nonunit_hit;
84}
85
86void
87Prefetcher::regStats()
88{
89    SimObject::regStats();
90
91    numMissObserved
92        .name(name() + ".miss_observed")
93        .desc("number of misses observed")
94        ;
95
96    numAllocatedStreams
97        .name(name() + ".allocated_streams")
98        .desc("number of streams allocated for prefetching")
99        ;
100
101    numPrefetchRequested
102        .name(name() + ".prefetches_requested")
103        .desc("number of prefetch requests made")
104        ;
105
106    numPrefetchAccepted
107        .name(name() + ".prefetches_accepted")
108        .desc("number of prefetch requests accepted")
109        ;
110
111    numDroppedPrefetches
112        .name(name() + ".dropped_prefetches")
113        .desc("number of prefetch requests dropped")
114        ;
115
116    numHits
117        .name(name() + ".hits")
118        .desc("number of prefetched blocks accessed")
119        ;
120
121    numPartialHits
122        .name(name() + ".partial_hits")
123        .desc("number of misses observed for a block being prefetched")
124        ;
125
126    numPagesCrossed
127        .name(name() + ".pages_crossed")
128        .desc("number of prefetches across pages")
129        ;
130
131    numMissedPrefetchedBlocks
132        .name(name() + ".misses_on_prefetched_blocks")
133        .desc("number of misses for blocks that were prefetched, yet missed")
134        ;
135}
136
137void
138Prefetcher::observeMiss(Addr address, const RubyRequestType& type)
139{
140    DPRINTF(RubyPrefetcher, "Observed miss for %#x\n", address);
141    Addr line_addr = makeLineAddress(address);
142    numMissObserved++;
143
144    // check to see if we have already issued a prefetch for this block
145    uint32_t index = 0;
146    PrefetchEntry *pfEntry = getPrefetchEntry(line_addr, index);
147    if (pfEntry != NULL) {
148        if (pfEntry->requestIssued[index]) {
149            if (pfEntry->requestCompleted[index]) {
150                // We prefetched too early and now the prefetch block no
151                // longer exists in the cache
152                numMissedPrefetchedBlocks++;
153                return;
154            } else {
155                // The controller has issued the prefetch request,
156                // but the request for the block arrived earlier.
157                numPartialHits++;
158                observePfHit(line_addr);
159                return;
160            }
161        } else {
162            // The request is still in the prefetch queue of the controller.
163            // Or was evicted because of other requests.
164            return;
165        }
166    }
167
168    // check to see if this address is in the unit stride filter
169    bool alloc = false;
170    bool hit = accessUnitFilter(m_unit_filter, m_unit_filter_hit,
171                                m_unit_filter_index, line_addr, 1, alloc);
172    if (alloc) {
173        // allocate a new prefetch stream
174        initializeStream(line_addr, 1, getLRUindex(), type);
175    }
176    if (hit) {
177        DPRINTF(RubyPrefetcher, "  *** hit in unit stride buffer\n");
178        return;
179    }
180
181    hit = accessUnitFilter(m_negative_filter, m_negative_filter_hit,
182        m_negative_filter_index, line_addr, -1, alloc);
183    if (alloc) {
184        // allocate a new prefetch stream
185        initializeStream(line_addr, -1, getLRUindex(), type);
186    }
187    if (hit) {
188        DPRINTF(RubyPrefetcher, "  *** hit in unit negative unit buffer\n");
189        return;
190    }
191
192    // check to see if this address is in the non-unit stride filter
193    int stride = 0;  // NULL value
194    hit = accessNonunitFilter(address, &stride, alloc);
195    if (alloc) {
196        assert(stride != 0);  // ensure non-zero stride prefetches
197        initializeStream(line_addr, stride, getLRUindex(), type);
198    }
199    if (hit) {
200        DPRINTF(RubyPrefetcher, "  *** hit in non-unit stride buffer\n");
201        return;
202    }
203}
204
205void
206Prefetcher::observePfMiss(Addr address)
207{
208    numPartialHits++;
209    DPRINTF(RubyPrefetcher, "Observed partial hit for %#x\n", address);
210    issueNextPrefetch(address, NULL);
211}
212
213void
214Prefetcher::observePfHit(Addr address)
215{
216    numHits++;
217    DPRINTF(RubyPrefetcher, "Observed hit for %#x\n", address);
218    issueNextPrefetch(address, NULL);
219}
220
221void
222Prefetcher::issueNextPrefetch(Addr address, PrefetchEntry *stream)
223{
224    // get our corresponding stream fetcher
225    if (stream == NULL) {
226        uint32_t index = 0;
227        stream = getPrefetchEntry(address, index);
228    }
229
230    // if (for some reason), this stream is unallocated, return.
231    if (stream == NULL) {
232        DPRINTF(RubyPrefetcher, "Unallocated stream, returning\n");
233        return;
234    }
235
236    // extend this prefetching stream by 1 (or more)
237    Addr page_addr = pageAddress(stream->m_address);
238    Addr line_addr = makeNextStrideAddress(stream->m_address,
239                                         stream->m_stride);
240
241    // possibly stop prefetching at page boundaries
242    if (page_addr != pageAddress(line_addr)) {
243        numPagesCrossed++;
244        if (!m_prefetch_cross_pages) {
245            // Deallocate the stream since we are not prefetching
246            // across page boundries
247            stream->m_is_valid = false;
248            return;
249        }
250    }
251
252    // launch next prefetch
253    stream->m_address = line_addr;
254    stream->m_use_time = m_controller->curCycle();
255    DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr);
256    m_controller->enqueuePrefetch(line_addr, stream->m_type);
257}
258
259uint32_t
260Prefetcher::getLRUindex(void)
261{
262    uint32_t lru_index = 0;
263    Cycles lru_access = m_array[lru_index].m_use_time;
264
265    for (uint32_t i = 0; i < m_num_streams; i++) {
266        if (!m_array[i].m_is_valid) {
267            return i;
268        }
269        if (m_array[i].m_use_time < lru_access) {
270            lru_access = m_array[i].m_use_time;
271            lru_index = i;
272        }
273    }
274
275    return lru_index;
276}
277
278void
279Prefetcher::clearNonunitEntry(uint32_t index)
280{
281    m_nonunit_filter[index] = 0;
282    m_nonunit_stride[index] = 0;
283    m_nonunit_hit[index]    = 0;
284}
285
286void
287Prefetcher::initializeStream(Addr address, int stride,
288     uint32_t index, const RubyRequestType& type)
289{
290    numAllocatedStreams++;
291
292    // initialize the stream prefetcher
293    PrefetchEntry *mystream = &(m_array[index]);
294    mystream->m_address = makeLineAddress(address);
295    mystream->m_stride = stride;
296    mystream->m_use_time = m_controller->curCycle();
297    mystream->m_is_valid = true;
298    mystream->m_type = type;
299
300    // create a number of initial prefetches for this stream
301    Addr page_addr = pageAddress(mystream->m_address);
302    Addr line_addr = makeLineAddress(mystream->m_address);
303
304    // insert a number of prefetches into the prefetch table
305    for (int k = 0; k < m_num_startup_pfs; k++) {
306        line_addr = makeNextStrideAddress(line_addr, stride);
307        // possibly stop prefetching at page boundaries
308        if (page_addr != pageAddress(line_addr)) {
309            numPagesCrossed++;
310            if (!m_prefetch_cross_pages) {
311                // deallocate this stream prefetcher
312                mystream->m_is_valid = false;
313                return;
314            }
315        }
316
317        // launch prefetch
318        numPrefetchRequested++;
319        DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr);
320        m_controller->enqueuePrefetch(line_addr, m_array[index].m_type);
321    }
322
323    // update the address to be the last address prefetched
324    mystream->m_address = line_addr;
325}
326
327PrefetchEntry *
328Prefetcher::getPrefetchEntry(Addr address, uint32_t &index)
329{
330    // search all streams for a match
331    for (int i = 0; i < m_num_streams; i++) {
332        // search all the outstanding prefetches for this stream
333        if (m_array[i].m_is_valid) {
334            for (int j = 0; j < m_num_startup_pfs; j++) {
335                if (makeNextStrideAddress(m_array[i].m_address,
336                    -(m_array[i].m_stride*j)) == address) {
337                    return &(m_array[i]);
338                }
339            }
340        }
341    }
342    return NULL;
343}
344
345bool
346Prefetcher::accessUnitFilter(std::vector<Addr>& filter_table,
347    uint32_t *filter_hit, uint32_t &index, Addr address,
348    int stride, bool &alloc)
349{
350    //reset the alloc flag
351    alloc = false;
352
353    Addr line_addr = makeLineAddress(address);
354    for (int i = 0; i < m_num_unit_filters; i++) {
355        if (filter_table[i] == line_addr) {
356            filter_table[i] = makeNextStrideAddress(filter_table[i], stride);
357            filter_hit[i]++;
358            if (filter_hit[i] >= m_train_misses) {
359                alloc = true;
360            }
361            return true;
362        }
363    }
364
365    // enter this address in the table
366    int local_index = index;
367    filter_table[local_index] = makeNextStrideAddress(line_addr, stride);
368    filter_hit[local_index] = 0;
369    local_index = local_index + 1;
370    if (local_index >= m_num_unit_filters) {
371        local_index = 0;
372    }
373
374    index = local_index;
375    return false;
376}
377
378bool
379Prefetcher::accessNonunitFilter(Addr address, int *stride,
380    bool &alloc)
381{
382    //reset the alloc flag
383    alloc = false;
384
385    /// look for non-unit strides based on a (user-defined) page size
386    Addr page_addr = pageAddress(address);
387    Addr line_addr = makeLineAddress(address);
388
389    for (uint32_t i = 0; i < m_num_nonunit_filters; i++) {
390        if (pageAddress(m_nonunit_filter[i]) == page_addr) {
391            // hit in the non-unit filter
392            // compute the actual stride (for this reference)
393            int delta = line_addr - m_nonunit_filter[i];
394
395            if (delta != 0) {
396                // no zero stride prefetches
397                // check that the stride matches (for the last N times)
398                if (delta == m_nonunit_stride[i]) {
399                    // -> stride hit
400                    // increment count (if > 2) allocate stream
401                    m_nonunit_hit[i]++;
402                    if (m_nonunit_hit[i] > m_train_misses) {
403                        // This stride HAS to be the multiplicative constant of
404                        // dataBlockBytes (bc makeNextStrideAddress is
405                        // calculated based on this multiplicative constant!)
406                        *stride = m_nonunit_stride[i] /
407                                    RubySystem::getBlockSizeBytes();
408
409                        // clear this filter entry
410                        clearNonunitEntry(i);
411                        alloc = true;
412                    }
413                } else {
414                    // delta didn't match ... reset m_nonunit_hit count for
415                    // this entry
416                    m_nonunit_hit[i] = 0;
417                }
418
419                // update the last address seen & the stride
420                m_nonunit_stride[i] = delta;
421                m_nonunit_filter[i] = line_addr;
422                return true;
423            } else {
424                return false;
425            }
426        }
427    }
428
429    // not found: enter this address in the table
430    m_nonunit_filter[m_nonunit_index] = line_addr;
431    m_nonunit_stride[m_nonunit_index] = 0;
432    m_nonunit_hit[m_nonunit_index]    = 0;
433
434    m_nonunit_index = m_nonunit_index + 1;
435    if (m_nonunit_index >= m_num_nonunit_filters) {
436        m_nonunit_index = 0;
437    }
438    return false;
439}
440
441void
442Prefetcher::print(std::ostream& out) const
443{
444    out << name() << " Prefetcher State\n";
445    // print out unit filter
446    out << "unit table:\n";
447    for (int i = 0; i < m_num_unit_filters; i++) {
448        out << m_unit_filter[i] << std::endl;
449    }
450
451    out << "negative table:\n";
452    for (int i = 0; i < m_num_unit_filters; i++) {
453        out << m_negative_filter[i] << std::endl;
454    }
455
456    // print out non-unit stride filter
457    out << "non-unit table:\n";
458    for (int i = 0; i < m_num_nonunit_filters; i++) {
459        out << m_nonunit_filter[i] << " "
460            << m_nonunit_stride[i] << " "
461            << m_nonunit_hit[i] << std::endl;
462    }
463
464    // print out allocated stream buffers
465    out << "streams:\n";
466    for (int i = 0; i < m_num_streams; i++) {
467        out << m_array[i].m_address << " "
468            << m_array[i].m_stride << " "
469            << m_array[i].m_is_valid << " "
470            << m_array[i].m_use_time << std::endl;
471    }
472}
473
474Addr
475Prefetcher::pageAddress(Addr addr) const
476{
477    return maskLowOrderBits(addr, m_page_shift);
478}
479