Prefetcher.cc revision 9507
1/*
2 * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "debug/RubyPrefetcher.hh"
30#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
31#include "mem/ruby/structures/Prefetcher.hh"
32#include "mem/ruby/system/System.hh"
33
34Prefetcher*
35PrefetcherParams::create()
36{
37    return new Prefetcher(this);
38}
39
40Prefetcher::Prefetcher(const Params *p)
41    : SimObject(p), m_num_streams(p->num_streams),
42    m_array(p->num_streams), m_train_misses(p->train_misses),
43    m_num_startup_pfs(p->num_startup_pfs), m_num_unit_filters(p->unit_filter),
44    m_num_nonunit_filters(p->nonunit_filter),
45    m_unit_filter(p->unit_filter, Address(0)),
46    m_negative_filter(p->unit_filter, Address(0)),
47    m_nonunit_filter(p->nonunit_filter, Address(0)),
48    m_prefetch_cross_pages(p->cross_page)
49{
50    assert(m_num_streams > 0);
51    assert(m_num_startup_pfs <= MAX_PF_INFLIGHT);
52
53    // create +1 stride filter
54    m_unit_filter_index = 0;
55    m_unit_filter_hit = new uint32_t[m_num_unit_filters];
56    for (uint32_t i =0; i < m_num_unit_filters; i++) {
57        m_unit_filter_hit[i] = 0;
58    }
59
60    // create -1 stride filter
61    m_negative_filter_index = 0;
62    m_negative_filter_hit = new uint32_t[m_num_unit_filters];
63    for (int i =0; i < m_num_unit_filters; i++) {
64        m_negative_filter_hit[i] = 0;
65    }
66
67    // create nonunit stride filter
68    m_nonunit_index = 0;
69    m_nonunit_stride = new int[m_num_nonunit_filters];
70    m_nonunit_hit    = new uint32_t[m_num_nonunit_filters];
71    for (int i =0; i < m_num_nonunit_filters; i++) {
72        m_nonunit_stride[i] = 0;
73        m_nonunit_hit[i]    = 0;
74    }
75}
76
77Prefetcher::~Prefetcher()
78{
79    delete m_unit_filter_hit;
80    delete m_negative_filter_hit;
81    delete m_nonunit_stride;
82    delete m_nonunit_hit;
83}
84
85void
86Prefetcher::regStats()
87{
88    numMissObserved
89        .name(name() + ".miss_observed")
90        .desc("number of misses observed")
91        ;
92
93    numAllocatedStreams
94        .name(name() + ".allocated_streams")
95        .desc("number of streams allocated for prefetching")
96        ;
97
98    numPrefetchRequested
99        .name(name() + ".prefetches_requested")
100        .desc("number of prefetch requests made")
101        ;
102
103    numPrefetchAccepted
104        .name(name() + ".prefetches_accepted")
105        .desc("number of prefetch requests accepted")
106        ;
107
108    numDroppedPrefetches
109        .name(name() + ".dropped_prefetches")
110        .desc("number of prefetch requests dropped")
111        ;
112
113    numHits
114        .name(name() + ".hits")
115        .desc("number of prefetched blocks accessed")
116        ;
117
118    numPartialHits
119        .name(name() + ".partial_hits")
120        .desc("number of misses observed for a block being prefetched")
121        ;
122
123    numPagesCrossed
124        .name(name() + ".pages_crossed")
125        .desc("number of prefetches across pages")
126        ;
127
128    numMissedPrefetchedBlocks
129        .name(name() + ".misses_on_prefetched_blocks")
130        .desc("number of misses for blocks that were prefetched, yet missed")
131        ;
132}
133
134void
135Prefetcher::observeMiss(const Address& address, const RubyRequestType& type)
136{
137    DPRINTF(RubyPrefetcher, "Observed miss for %s\n", address);
138    Address line_addr = line_address(address);
139    numMissObserved++;
140
141    // check to see if we have already issued a prefetch for this block
142    uint32_t index = 0;
143    PrefetchEntry *pfEntry = getPrefetchEntry(line_addr, index);
144    if (pfEntry != NULL) {
145        if (pfEntry->requestIssued[index]) {
146            if (pfEntry->requestCompleted[index]) {
147                // We prefetched too early and now the prefetch block no
148                // longer exists in the cache
149                numMissedPrefetchedBlocks++;
150                return;
151            } else {
152                // The controller has issued the prefetch request,
153                // but the request for the block arrived earlier.
154                numPartialHits++;
155                observePfHit(line_addr);
156                return;
157            }
158        } else {
159            // The request is still in the prefetch queue of the controller.
160            // Or was evicted because of other requests.
161            return;
162        }
163    }
164
165    // check to see if this address is in the unit stride filter
166    bool alloc = false;
167    bool hit = accessUnitFilter(m_unit_filter, m_unit_filter_hit,
168                                m_unit_filter_index, line_addr, 1, alloc);
169    if (alloc) {
170        // allocate a new prefetch stream
171        initializeStream(line_addr, 1, getLRUindex(), type);
172    }
173    if (hit) {
174        DPRINTF(RubyPrefetcher, "  *** hit in unit stride buffer\n");
175        return;
176    }
177
178    hit = accessUnitFilter(m_negative_filter, m_negative_filter_hit,
179        m_negative_filter_index, line_addr, -1, alloc);
180    if (alloc) {
181        // allocate a new prefetch stream
182        initializeStream(line_addr, -1, getLRUindex(), type);
183    }
184    if (hit) {
185        DPRINTF(RubyPrefetcher, "  *** hit in unit negative unit buffer\n");
186        return;
187    }
188
189    // check to see if this address is in the non-unit stride filter
190    int stride = 0;  // NULL value
191    hit = accessNonunitFilter(address, &stride, alloc);
192    if (alloc) {
193        assert(stride != 0);  // ensure non-zero stride prefetches
194        initializeStream(line_addr, stride, getLRUindex(), type);
195    }
196    if (hit) {
197        DPRINTF(RubyPrefetcher, "  *** hit in non-unit stride buffer\n");
198        return;
199    }
200}
201
202void
203Prefetcher::observePfMiss(const Address& address)
204{
205    numPartialHits++;
206    DPRINTF(RubyPrefetcher, "Observed partial hit for %s\n", address);
207    issueNextPrefetch(address, NULL);
208}
209
210void
211Prefetcher::observePfHit(const Address& address)
212{
213    numHits++;
214    DPRINTF(RubyPrefetcher, "Observed hit for %s\n", address);
215    issueNextPrefetch(address, NULL);
216}
217
218void
219Prefetcher::issueNextPrefetch(const Address &address, PrefetchEntry *stream)
220{
221    // get our corresponding stream fetcher
222    if (stream == NULL) {
223        uint32_t index = 0;
224        stream = getPrefetchEntry(address, index);
225    }
226
227    // if (for some reason), this stream is unallocated, return.
228    if (stream == NULL) {
229        DPRINTF(RubyPrefetcher, "Unallocated stream, returning\n");
230        return;
231    }
232
233    // extend this prefetching stream by 1 (or more)
234    Address page_addr = page_address(stream->m_address);
235    Address line_addr = next_stride_address(stream->m_address,
236                                            stream->m_stride);
237
238    // possibly stop prefetching at page boundaries
239    if (page_addr != page_address(line_addr)) {
240        numPagesCrossed++;
241        if (!m_prefetch_cross_pages) {
242            // Deallocate the stream since we are not prefetching
243            // across page boundries
244            stream->m_is_valid = false;
245            return;
246        }
247    }
248
249    // launch next prefetch
250    stream->m_address = line_addr;
251    stream->m_use_time = m_controller->curCycle();
252    DPRINTF(RubyPrefetcher, "Requesting prefetch for %s\n", line_addr);
253    m_controller->enqueuePrefetch(line_addr, stream->m_type);
254}
255
256uint32_t
257Prefetcher::getLRUindex(void)
258{
259    uint32_t lru_index = 0;
260    Cycles lru_access = m_array[lru_index].m_use_time;
261
262    for (uint32_t i = 0; i < m_num_streams; i++) {
263        if (!m_array[i].m_is_valid) {
264            return i;
265        }
266        if (m_array[i].m_use_time < lru_access) {
267            lru_access = m_array[i].m_use_time;
268            lru_index = i;
269        }
270    }
271
272    return lru_index;
273}
274
275void
276Prefetcher::clearNonunitEntry(uint32_t index)
277{
278    m_nonunit_filter[index].setAddress(0);
279    m_nonunit_stride[index] = 0;
280    m_nonunit_hit[index]    = 0;
281}
282
283void
284Prefetcher::initializeStream(const Address& address, int stride,
285     uint32_t index, const RubyRequestType& type)
286{
287    numAllocatedStreams++;
288
289    // initialize the stream prefetcher
290    PrefetchEntry *mystream = &(m_array[index]);
291    mystream->m_address = line_address(address);
292    mystream->m_stride = stride;
293    mystream->m_use_time = m_controller->curCycle();
294    mystream->m_is_valid = true;
295    mystream->m_type = type;
296
297    // create a number of initial prefetches for this stream
298    Address page_addr = page_address(mystream->m_address);
299    Address line_addr = line_address(mystream->m_address);
300    Address prev_addr = line_addr;
301
302    // insert a number of prefetches into the prefetch table
303    for (int k = 0; k < m_num_startup_pfs; k++) {
304        line_addr = next_stride_address(line_addr, stride);
305        // possibly stop prefetching at page boundaries
306        if (page_addr != page_address(line_addr)) {
307            numPagesCrossed++;
308            if (!m_prefetch_cross_pages) {
309                // deallocate this stream prefetcher
310                mystream->m_is_valid = false;
311                return;
312            }
313        }
314
315        // launch prefetch
316        numPrefetchRequested++;
317        DPRINTF(RubyPrefetcher, "Requesting prefetch for %s\n", line_addr);
318        m_controller->enqueuePrefetch(line_addr, m_array[index].m_type);
319        prev_addr = line_addr;
320    }
321
322    // update the address to be the last address prefetched
323    mystream->m_address = line_addr;
324}
325
326PrefetchEntry *
327Prefetcher::getPrefetchEntry(const Address &address, uint32_t &index)
328{
329    // search all streams for a match
330    for (int i = 0; i < m_num_streams; i++) {
331        // search all the outstanding prefetches for this stream
332        if (m_array[i].m_is_valid) {
333            for (int j = 0; j < m_num_startup_pfs; j++) {
334                if (next_stride_address(m_array[i].m_address,
335                    -(m_array[i].m_stride*j)) == address) {
336                    return &(m_array[i]);
337                }
338            }
339        }
340    }
341    return NULL;
342}
343
344bool
345Prefetcher::accessUnitFilter(std::vector<Address>& filter_table,
346    uint32_t *filter_hit, uint32_t &index, const Address &address,
347    int stride, bool &alloc)
348{
349    //reset the alloc flag
350    alloc = false;
351
352    Address line_addr = line_address(address);
353    for (int i = 0; i < m_num_unit_filters; i++) {
354        if (filter_table[i] == line_addr) {
355            filter_table[i] = next_stride_address(filter_table[i], stride);
356            filter_hit[i]++;
357            if (filter_hit[i] >= m_train_misses) {
358                alloc = true;
359            }
360            return true;
361        }
362    }
363
364    // enter this address in the table
365    int local_index = index;
366    filter_table[local_index] = next_stride_address(line_addr, stride);
367    filter_hit[local_index] = 0;
368    local_index = local_index + 1;
369    if (local_index >= m_num_unit_filters) {
370        local_index = 0;
371    }
372
373    index = local_index;
374    return false;
375}
376
377bool
378Prefetcher::accessNonunitFilter(const Address& address, int *stride,
379    bool &alloc)
380{
381    //reset the alloc flag
382    alloc = false;
383
384    /// look for non-unit strides based on a (user-defined) page size
385    Address page_addr = page_address(address);
386    Address line_addr = line_address(address);
387
388    for (uint32_t i = 0; i < m_num_nonunit_filters; i++) {
389        if (page_address(m_nonunit_filter[i]) == page_addr) {
390            // hit in the non-unit filter
391            // compute the actual stride (for this reference)
392            int delta = line_addr.getAddress() - m_nonunit_filter[i].getAddress();
393
394            if (delta != 0) {
395                // no zero stride prefetches
396                // check that the stride matches (for the last N times)
397                if (delta == m_nonunit_stride[i]) {
398                    // -> stride hit
399                    // increment count (if > 2) allocate stream
400                    m_nonunit_hit[i]++;
401                    if (m_nonunit_hit[i] > m_train_misses) {
402                        //This stride HAS to be the multiplicative constant of
403                        //dataBlockBytes (bc next_stride_address is calculated based
404                        //on this multiplicative constant!)
405                        *stride = m_nonunit_stride[i]/RubySystem::getBlockSizeBytes();
406
407                        // clear this filter entry
408                        clearNonunitEntry(i);
409                        alloc = true;
410                    }
411                } else {
412                    // delta didn't match ... reset m_nonunit_hit count for this entry
413                    m_nonunit_hit[i] = 0;
414                }
415
416                // update the last address seen & the stride
417                m_nonunit_stride[i] = delta;
418                m_nonunit_filter[i] = line_addr;
419                return true;
420            } else {
421                return false;
422            }
423        }
424    }
425
426    // not found: enter this address in the table
427    m_nonunit_filter[m_nonunit_index] = line_addr;
428    m_nonunit_stride[m_nonunit_index] = 0;
429    m_nonunit_hit[m_nonunit_index]    = 0;
430
431    m_nonunit_index = m_nonunit_index + 1;
432    if (m_nonunit_index >= m_num_nonunit_filters) {
433        m_nonunit_index = 0;
434    }
435    return false;
436}
437
438void
439Prefetcher::print(std::ostream& out) const
440{
441    out << name() << " Prefetcher State\n";
442    // print out unit filter
443    out << "unit table:\n";
444    for (int i = 0; i < m_num_unit_filters; i++) {
445        out << m_unit_filter[i] << std::endl;
446    }
447
448    out << "negative table:\n";
449    for (int i = 0; i < m_num_unit_filters; i++) {
450        out << m_negative_filter[i] << std::endl;
451    }
452
453    // print out non-unit stride filter
454    out << "non-unit table:\n";
455    for (int i = 0; i < m_num_nonunit_filters; i++) {
456        out << m_nonunit_filter[i] << " "
457            << m_nonunit_stride[i] << " "
458            << m_nonunit_hit[i] << std::endl;
459    }
460
461    // print out allocated stream buffers
462    out << "streams:\n";
463    for (int i = 0; i < m_num_streams; i++) {
464        out << m_array[i].m_address << " "
465            << m_array[i].m_stride << " "
466            << m_array[i].m_is_valid << " "
467            << m_array[i].m_use_time << std::endl;
468    }
469}
470