1/*
2 * Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "mem/ruby/structures/Prefetcher.hh"
30
31#include "debug/RubyPrefetcher.hh"
32#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
33#include "mem/ruby/system/RubySystem.hh"
34
35Prefetcher*
36PrefetcherParams::create()
37{
38    return new Prefetcher(this);
39}
40
41Prefetcher::Prefetcher(const Params *p)
42    : SimObject(p), m_num_streams(p->num_streams),
43    m_array(p->num_streams), m_train_misses(p->train_misses),
44    m_num_startup_pfs(p->num_startup_pfs), m_num_unit_filters(p->unit_filter),
45    m_num_nonunit_filters(p->nonunit_filter),
46    m_unit_filter(p->unit_filter, 0),
47    m_negative_filter(p->unit_filter, 0),
48    m_nonunit_filter(p->nonunit_filter, 0),
49    m_prefetch_cross_pages(p->cross_page),
50    m_page_shift(p->sys->getPageShift())
51{
52    assert(m_num_streams > 0);
53    assert(m_num_startup_pfs <= MAX_PF_INFLIGHT);
54
55    // create +1 stride filter
56    m_unit_filter_index = 0;
57    m_unit_filter_hit = new uint32_t[m_num_unit_filters];
58    for (uint32_t i =0; i < m_num_unit_filters; i++) {
59        m_unit_filter_hit[i] = 0;
60    }
61
62    // create -1 stride filter
63    m_negative_filter_index = 0;
64    m_negative_filter_hit = new uint32_t[m_num_unit_filters];
65    for (int i =0; i < m_num_unit_filters; i++) {
66        m_negative_filter_hit[i] = 0;
67    }
68
69    // create nonunit stride filter
70    m_nonunit_index = 0;
71    m_nonunit_stride = new int[m_num_nonunit_filters];
72    m_nonunit_hit    = new uint32_t[m_num_nonunit_filters];
73    for (int i =0; i < m_num_nonunit_filters; i++) {
74        m_nonunit_stride[i] = 0;
75        m_nonunit_hit[i]    = 0;
76    }
77}
78
79Prefetcher::~Prefetcher()
80{
81    delete m_unit_filter_hit;
82    delete m_negative_filter_hit;
83    delete m_nonunit_stride;
84    delete m_nonunit_hit;
85}
86
87void
88Prefetcher::regStats()
89{
90    SimObject::regStats();
91
92    numMissObserved
93        .name(name() + ".miss_observed")
94        .desc("number of misses observed")
95        ;
96
97    numAllocatedStreams
98        .name(name() + ".allocated_streams")
99        .desc("number of streams allocated for prefetching")
100        ;
101
102    numPrefetchRequested
103        .name(name() + ".prefetches_requested")
104        .desc("number of prefetch requests made")
105        ;
106
107    numPrefetchAccepted
108        .name(name() + ".prefetches_accepted")
109        .desc("number of prefetch requests accepted")
110        ;
111
112    numDroppedPrefetches
113        .name(name() + ".dropped_prefetches")
114        .desc("number of prefetch requests dropped")
115        ;
116
117    numHits
118        .name(name() + ".hits")
119        .desc("number of prefetched blocks accessed")
120        ;
121
122    numPartialHits
123        .name(name() + ".partial_hits")
124        .desc("number of misses observed for a block being prefetched")
125        ;
126
127    numPagesCrossed
128        .name(name() + ".pages_crossed")
129        .desc("number of prefetches across pages")
130        ;
131
132    numMissedPrefetchedBlocks
133        .name(name() + ".misses_on_prefetched_blocks")
134        .desc("number of misses for blocks that were prefetched, yet missed")
135        ;
136}
137
138void
139Prefetcher::observeMiss(Addr address, const RubyRequestType& type)
140{
141    DPRINTF(RubyPrefetcher, "Observed miss for %#x\n", address);
142    Addr line_addr = makeLineAddress(address);
143    numMissObserved++;
144
145    // check to see if we have already issued a prefetch for this block
146    uint32_t index = 0;
147    PrefetchEntry *pfEntry = getPrefetchEntry(line_addr, index);
148    if (pfEntry != NULL) {
149        if (pfEntry->requestIssued[index]) {
150            if (pfEntry->requestCompleted[index]) {
151                // We prefetched too early and now the prefetch block no
152                // longer exists in the cache
153                numMissedPrefetchedBlocks++;
154                return;
155            } else {
156                // The controller has issued the prefetch request,
157                // but the request for the block arrived earlier.
158                numPartialHits++;
159                observePfHit(line_addr);
160                return;
161            }
162        } else {
163            // The request is still in the prefetch queue of the controller.
164            // Or was evicted because of other requests.
165            return;
166        }
167    }
168
169    // check to see if this address is in the unit stride filter
170    bool alloc = false;
171    bool hit = accessUnitFilter(m_unit_filter, m_unit_filter_hit,
172                                m_unit_filter_index, line_addr, 1, alloc);
173    if (alloc) {
174        // allocate a new prefetch stream
175        initializeStream(line_addr, 1, getLRUindex(), type);
176    }
177    if (hit) {
178        DPRINTF(RubyPrefetcher, "  *** hit in unit stride buffer\n");
179        return;
180    }
181
182    hit = accessUnitFilter(m_negative_filter, m_negative_filter_hit,
183        m_negative_filter_index, line_addr, -1, alloc);
184    if (alloc) {
185        // allocate a new prefetch stream
186        initializeStream(line_addr, -1, getLRUindex(), type);
187    }
188    if (hit) {
189        DPRINTF(RubyPrefetcher, "  *** hit in unit negative unit buffer\n");
190        return;
191    }
192
193    // check to see if this address is in the non-unit stride filter
194    int stride = 0;  // NULL value
195    hit = accessNonunitFilter(address, &stride, alloc);
196    if (alloc) {
197        assert(stride != 0);  // ensure non-zero stride prefetches
198        initializeStream(line_addr, stride, getLRUindex(), type);
199    }
200    if (hit) {
201        DPRINTF(RubyPrefetcher, "  *** hit in non-unit stride buffer\n");
202        return;
203    }
204}
205
206void
207Prefetcher::observePfMiss(Addr address)
208{
209    numPartialHits++;
210    DPRINTF(RubyPrefetcher, "Observed partial hit for %#x\n", address);
211    issueNextPrefetch(address, NULL);
212}
213
214void
215Prefetcher::observePfHit(Addr address)
216{
217    numHits++;
218    DPRINTF(RubyPrefetcher, "Observed hit for %#x\n", address);
219    issueNextPrefetch(address, NULL);
220}
221
222void
223Prefetcher::issueNextPrefetch(Addr address, PrefetchEntry *stream)
224{
225    // get our corresponding stream fetcher
226    if (stream == NULL) {
227        uint32_t index = 0;
228        stream = getPrefetchEntry(address, index);
229    }
230
231    // if (for some reason), this stream is unallocated, return.
232    if (stream == NULL) {
233        DPRINTF(RubyPrefetcher, "Unallocated stream, returning\n");
234        return;
235    }
236
237    // extend this prefetching stream by 1 (or more)
238    Addr page_addr = pageAddress(stream->m_address);
239    Addr line_addr = makeNextStrideAddress(stream->m_address,
240                                         stream->m_stride);
241
242    // possibly stop prefetching at page boundaries
243    if (page_addr != pageAddress(line_addr)) {
244        numPagesCrossed++;
245        if (!m_prefetch_cross_pages) {
246            // Deallocate the stream since we are not prefetching
247            // across page boundries
248            stream->m_is_valid = false;
249            return;
250        }
251    }
252
253    // launch next prefetch
254    stream->m_address = line_addr;
255    stream->m_use_time = m_controller->curCycle();
256    DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr);
257    m_controller->enqueuePrefetch(line_addr, stream->m_type);
258}
259
260uint32_t
261Prefetcher::getLRUindex(void)
262{
263    uint32_t lru_index = 0;
264    Cycles lru_access = m_array[lru_index].m_use_time;
265
266    for (uint32_t i = 0; i < m_num_streams; i++) {
267        if (!m_array[i].m_is_valid) {
268            return i;
269        }
270        if (m_array[i].m_use_time < lru_access) {
271            lru_access = m_array[i].m_use_time;
272            lru_index = i;
273        }
274    }
275
276    return lru_index;
277}
278
279void
280Prefetcher::clearNonunitEntry(uint32_t index)
281{
282    m_nonunit_filter[index] = 0;
283    m_nonunit_stride[index] = 0;
284    m_nonunit_hit[index]    = 0;
285}
286
287void
288Prefetcher::initializeStream(Addr address, int stride,
289     uint32_t index, const RubyRequestType& type)
290{
291    numAllocatedStreams++;
292
293    // initialize the stream prefetcher
294    PrefetchEntry *mystream = &(m_array[index]);
295    mystream->m_address = makeLineAddress(address);
296    mystream->m_stride = stride;
297    mystream->m_use_time = m_controller->curCycle();
298    mystream->m_is_valid = true;
299    mystream->m_type = type;
300
301    // create a number of initial prefetches for this stream
302    Addr page_addr = pageAddress(mystream->m_address);
303    Addr line_addr = makeLineAddress(mystream->m_address);
304
305    // insert a number of prefetches into the prefetch table
306    for (int k = 0; k < m_num_startup_pfs; k++) {
307        line_addr = makeNextStrideAddress(line_addr, stride);
308        // possibly stop prefetching at page boundaries
309        if (page_addr != pageAddress(line_addr)) {
310            numPagesCrossed++;
311            if (!m_prefetch_cross_pages) {
312                // deallocate this stream prefetcher
313                mystream->m_is_valid = false;
314                return;
315            }
316        }
317
318        // launch prefetch
319        numPrefetchRequested++;
320        DPRINTF(RubyPrefetcher, "Requesting prefetch for %#x\n", line_addr);
321        m_controller->enqueuePrefetch(line_addr, m_array[index].m_type);
322    }
323
324    // update the address to be the last address prefetched
325    mystream->m_address = line_addr;
326}
327
328PrefetchEntry *
329Prefetcher::getPrefetchEntry(Addr address, uint32_t &index)
330{
331    // search all streams for a match
332    for (int i = 0; i < m_num_streams; i++) {
333        // search all the outstanding prefetches for this stream
334        if (m_array[i].m_is_valid) {
335            for (int j = 0; j < m_num_startup_pfs; j++) {
336                if (makeNextStrideAddress(m_array[i].m_address,
337                    -(m_array[i].m_stride*j)) == address) {
338                    return &(m_array[i]);
339                }
340            }
341        }
342    }
343    return NULL;
344}
345
346bool
347Prefetcher::accessUnitFilter(std::vector<Addr>& filter_table,
348    uint32_t *filter_hit, uint32_t &index, Addr address,
349    int stride, bool &alloc)
350{
351    //reset the alloc flag
352    alloc = false;
353
354    Addr line_addr = makeLineAddress(address);
355    for (int i = 0; i < m_num_unit_filters; i++) {
356        if (filter_table[i] == line_addr) {
357            filter_table[i] = makeNextStrideAddress(filter_table[i], stride);
358            filter_hit[i]++;
359            if (filter_hit[i] >= m_train_misses) {
360                alloc = true;
361            }
362            return true;
363        }
364    }
365
366    // enter this address in the table
367    int local_index = index;
368    filter_table[local_index] = makeNextStrideAddress(line_addr, stride);
369    filter_hit[local_index] = 0;
370    local_index = local_index + 1;
371    if (local_index >= m_num_unit_filters) {
372        local_index = 0;
373    }
374
375    index = local_index;
376    return false;
377}
378
379bool
380Prefetcher::accessNonunitFilter(Addr address, int *stride,
381    bool &alloc)
382{
383    //reset the alloc flag
384    alloc = false;
385
386    /// look for non-unit strides based on a (user-defined) page size
387    Addr page_addr = pageAddress(address);
388    Addr line_addr = makeLineAddress(address);
389
390    for (uint32_t i = 0; i < m_num_nonunit_filters; i++) {
391        if (pageAddress(m_nonunit_filter[i]) == page_addr) {
392            // hit in the non-unit filter
393            // compute the actual stride (for this reference)
394            int delta = line_addr - m_nonunit_filter[i];
395
396            if (delta != 0) {
397                // no zero stride prefetches
398                // check that the stride matches (for the last N times)
399                if (delta == m_nonunit_stride[i]) {
400                    // -> stride hit
401                    // increment count (if > 2) allocate stream
402                    m_nonunit_hit[i]++;
403                    if (m_nonunit_hit[i] > m_train_misses) {
404                        // This stride HAS to be the multiplicative constant of
405                        // dataBlockBytes (bc makeNextStrideAddress is
406                        // calculated based on this multiplicative constant!)
407                        *stride = m_nonunit_stride[i] /
408                                    RubySystem::getBlockSizeBytes();
409
410                        // clear this filter entry
411                        clearNonunitEntry(i);
412                        alloc = true;
413                    }
414                } else {
415                    // delta didn't match ... reset m_nonunit_hit count for
416                    // this entry
417                    m_nonunit_hit[i] = 0;
418                }
419
420                // update the last address seen & the stride
421                m_nonunit_stride[i] = delta;
422                m_nonunit_filter[i] = line_addr;
423                return true;
424            } else {
425                return false;
426            }
427        }
428    }
429
430    // not found: enter this address in the table
431    m_nonunit_filter[m_nonunit_index] = line_addr;
432    m_nonunit_stride[m_nonunit_index] = 0;
433    m_nonunit_hit[m_nonunit_index]    = 0;
434
435    m_nonunit_index = m_nonunit_index + 1;
436    if (m_nonunit_index >= m_num_nonunit_filters) {
437        m_nonunit_index = 0;
438    }
439    return false;
440}
441
442void
443Prefetcher::print(std::ostream& out) const
444{
445    out << name() << " Prefetcher State\n";
446    // print out unit filter
447    out << "unit table:\n";
448    for (int i = 0; i < m_num_unit_filters; i++) {
449        out << m_unit_filter[i] << std::endl;
450    }
451
452    out << "negative table:\n";
453    for (int i = 0; i < m_num_unit_filters; i++) {
454        out << m_negative_filter[i] << std::endl;
455    }
456
457    // print out non-unit stride filter
458    out << "non-unit table:\n";
459    for (int i = 0; i < m_num_nonunit_filters; i++) {
460        out << m_nonunit_filter[i] << " "
461            << m_nonunit_stride[i] << " "
462            << m_nonunit_hit[i] << std::endl;
463    }
464
465    // print out allocated stream buffers
466    out << "streams:\n";
467    for (int i = 0; i < m_num_streams; i++) {
468        out << m_array[i].m_address << " "
469            << m_array[i].m_stride << " "
470            << m_array[i].m_is_valid << " "
471            << m_array[i].m_use_time << std::endl;
472    }
473}
474
475Addr
476Prefetcher::pageAddress(Addr addr) const
477{
478    return maskLowOrderBits(addr, m_page_shift);
479}
480