1/** 2 * Copyright (c) 2018 Metempsy Technology Consulting 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Ivan Pizarro 29 */ 30 31#include "mem/cache/prefetch/bop.hh" 32 33#include "debug/HWPrefetch.hh" 34#include "params/BOPPrefetcher.hh" 35 36BOPPrefetcher::BOPPrefetcher(const BOPPrefetcherParams *p) 37 : QueuedPrefetcher(p), 38 scoreMax(p->score_max), roundMax(p->round_max), 39 badScore(p->bad_score), rrEntries(p->rr_size), 40 tagMask((1 << p->tag_bits) - 1), 41 delayQueueEnabled(p->delay_queue_enable), 42 delayQueueSize(p->delay_queue_size), 43 delayTicks(cyclesToTicks(p->delay_queue_cycles)), 44 delayQueueEvent([this]{ delayQueueEventWrapper(); }, name()), 45 issuePrefetchRequests(false), bestOffset(1), phaseBestOffset(0), 46 bestScore(0), round(0) 47{ 48 if (!isPowerOf2(rrEntries)) { 49 fatal("%s: number of RR entries is not power of 2\n", name()); 50 } 51 if (!isPowerOf2(blkSize)) { 52 fatal("%s: cache line size is not power of 2\n", name()); 53 } 54 if (!(p->negative_offsets_enable && (p->offset_list_size % 2 == 0))) { 55 fatal("%s: negative offsets enabled with odd offset list size\n", 56 name()); 57 } 58 59 rrLeft.resize(rrEntries); 60 rrRight.resize(rrEntries); 61 62 // Following the paper implementation, a list with the specified number 63 // of offsets which are of the form 2^i * 3^j * 5^k with i,j,k >= 0 64 const int factors[] = { 2, 3, 5 }; 65 unsigned int i = 0; 66 int64_t offset_i = 1; 67 68 while (i < p->offset_list_size) 69 { 70 int64_t offset = offset_i; 71 72 for (int n : factors) { 73 while ((offset % n) == 0) { 74 offset /= n; 75 } 76 } 77 78 if (offset == 1) { 79 offsetsList.push_back(OffsetListEntry(offset_i, 0)); 80 i++; 81 // If we want to use negative offsets, add also the negative value 82 // of the offset just calculated 83 if (p->negative_offsets_enable) { 84 offsetsList.push_back(OffsetListEntry(-offset_i, 0)); 85 i++; 86 } 87 } 88 89 offset_i++; 90 } 91 92 offsetsListIterator = offsetsList.begin(); 93} 94 95void 96BOPPrefetcher::delayQueueEventWrapper() 97{ 98 while (!delayQueue.empty() && 99 delayQueue.front().processTick <= curTick()) 100 { 101 Addr addr_x = delayQueue.front().baseAddr; 102 insertIntoRR(addr_x, RRWay::Left); 103 delayQueue.pop_front(); 104 } 105 106 // Schedule an event for the next element if there is one 107 if (!delayQueue.empty()) { 108 schedule(delayQueueEvent, delayQueue.front().processTick); 109 } 110} 111 112unsigned int 113BOPPrefetcher::hash(Addr addr, unsigned int way) const 114{ 115 Addr hash1 = addr >> way; 116 Addr hash2 = hash1 >> floorLog2(rrEntries); 117 return (hash1 ^ hash2) & (Addr)(rrEntries - 1); 118} 119 120void 121BOPPrefetcher::insertIntoRR(Addr addr, unsigned int way) 122{ 123 switch (way) { 124 case RRWay::Left: 125 rrLeft[hash(addr, RRWay::Left)] = addr; 126 break; 127 case RRWay::Right: 128 rrRight[hash(addr, RRWay::Right)] = addr; 129 break; 130 } 131} 132 133void 134BOPPrefetcher::insertIntoDelayQueue(Addr x) 135{ 136 if (delayQueue.size() == delayQueueSize) { 137 return; 138 } 139 140 // Add the address to the delay queue and schedule an event to process 141 // it after the specified delay cycles 142 Tick process_tick = curTick() + delayTicks; 143 144 delayQueue.push_back(DelayQueueEntry(x, process_tick)); 145 146 if (!delayQueueEvent.scheduled()) { 147 schedule(delayQueueEvent, process_tick); 148 } 149} 150 151void 152BOPPrefetcher::resetScores() 153{ 154 for (auto& it : offsetsList) { 155 it.second = 0; 156 } 157} 158 159inline Addr 160BOPPrefetcher::tag(Addr addr) const 161{ 162 return (addr >> blkSize) & tagMask; 163} 164 165bool 166BOPPrefetcher::testRR(Addr addr) const 167{ 168 for (auto& it : rrLeft) { 169 if (it == addr) { 170 return true; 171 } 172 } 173 174 for (auto& it : rrRight) { 175 if (it == addr) { 176 return true; 177 } 178 } 179 180 return false; 181} 182 183void 184BOPPrefetcher::bestOffsetLearning(Addr x) 185{ 186 Addr offset_addr = (*offsetsListIterator).first; 187 Addr lookup_addr = x - offset_addr; 188 189 // There was a hit in the RR table, increment the score for this offset 190 if (testRR(lookup_addr)) { 191 DPRINTF(HWPrefetch, "Address %#lx found in the RR table\n", x); 192 (*offsetsListIterator).second++; 193 if ((*offsetsListIterator).second > bestScore) { 194 bestScore = (*offsetsListIterator).second; 195 phaseBestOffset = (*offsetsListIterator).first; 196 DPRINTF(HWPrefetch, "New best score is %lu\n", bestScore); 197 } 198 } 199 200 offsetsListIterator++; 201 202 // All the offsets in the list were visited meaning that a learning 203 // phase finished. Check if 204 if (offsetsListIterator == offsetsList.end()) { 205 offsetsListIterator = offsetsList.begin(); 206 round++; 207 208 // Check if the best offset must be updated if: 209 // (1) One of the scores equals SCORE_MAX 210 // (2) The number of rounds equals ROUND_MAX 211 if ((bestScore >= scoreMax) || (round == roundMax)) { 212 bestOffset = phaseBestOffset; 213 round = 0; 214 bestScore = 0; 215 phaseBestOffset = 0; 216 resetScores(); 217 issuePrefetchRequests = true; 218 } else if (phaseBestOffset <= badScore) { 219 issuePrefetchRequests = false; 220 } 221 } 222} 223 224void 225BOPPrefetcher::calculatePrefetch(const PrefetchInfo &pfi, 226 std::vector<AddrPriority> &addresses) 227{ 228 Addr addr = pfi.getAddr(); 229 Addr tag_x = tag(addr); 230 231 if (delayQueueEnabled) { 232 insertIntoDelayQueue(tag_x); 233 } else { 234 insertIntoRR(tag_x, RRWay::Left); 235 } 236 237 // Go through the nth offset and update the score, the best score and the 238 // current best offset if a better one is found 239 bestOffsetLearning(tag_x); 240 241 // This prefetcher is a degree 1 prefetch, so it will only generate one 242 // prefetch at most per access 243 if (issuePrefetchRequests) { 244 Addr prefetch_addr = addr + (bestOffset << lBlkSize); 245 addresses.push_back(AddrPriority(prefetch_addr, 0)); 246 DPRINTF(HWPrefetch, "Generated prefetch %#lx\n", prefetch_addr); 247 } 248} 249 250void 251BOPPrefetcher::notifyFill(const PacketPtr& pkt) 252{ 253 // Only insert into the RR right way if it's the pkt is a HWP 254 if (!pkt->cmd.isHWPrefetch()) return; 255 256 Addr tag_y = tag(pkt->getAddr()); 257 258 if (issuePrefetchRequests) { 259 insertIntoRR(tag_y - bestOffset, RRWay::Right); 260 } 261} 262 263BOPPrefetcher* 264BOPPrefetcherParams::create() 265{ 266 return new BOPPrefetcher(this); 267} 268