1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its 18 * contributors may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: Lisa Hsu 34 */ 35 36#ifndef __TLB_COALESCER_HH__ 37#define __TLB_COALESCER_HH__ 38 39#include <list> 40#include <queue> 41#include <string> 42#include <vector> 43 44#include "arch/generic/tlb.hh" 45#include "arch/isa.hh" 46#include "arch/isa_traits.hh" 47#include "arch/x86/pagetable.hh" 48#include "arch/x86/regs/segment.hh" 49#include "base/logging.hh" 50#include "base/statistics.hh" 51#include "gpu-compute/gpu_tlb.hh" 52#include "mem/port.hh" 53#include "mem/request.hh" 54#include "params/TLBCoalescer.hh" 55#include "sim/clocked_object.hh" 56 57class BaseTLB; 58class Packet; 59class ThreadContext; 60 61/** 62 * The TLBCoalescer is a ClockedObject sitting on the front side (CPUSide) of 63 * each TLB. It receives packets and issues coalesced requests to the 64 * TLB below it. It controls how requests are coalesced (the rules) 65 * and the permitted number of TLB probes per cycle (i.e., how many 66 * coalesced requests it feeds the TLB per cycle). 67 */ 68class TLBCoalescer : public ClockedObject 69{ 70 protected: 71 // TLB clock: will inherit clock from shader's clock period in terms 72 // of nuber of ticks of curTime (aka global simulation clock) 73 // The assignment of TLB clock from shader clock is done in the 74 // python config files. 75 int clock; 76 77 public: 78 typedef TLBCoalescerParams Params; 79 TLBCoalescer(const Params *p); 80 ~TLBCoalescer() { } 81 82 // Number of TLB probes per cycle. Parameterizable - default 2. 83 int TLBProbesPerCycle; 84 85 // Consider coalescing across that many ticks. 86 // Paraemterizable - default 1. 87 int coalescingWindow; 88 89 // Each coalesced request consists of multiple packets 90 // that all fall within the same virtual page 91 typedef std::vector<PacketPtr> coalescedReq; 92 93 // disables coalescing when true 94 bool disableCoalescing; 95 96 /* 97 * This is a hash map with <tick_index> as a key. 98 * It contains a vector of coalescedReqs per <tick_index>. 99 * Requests are buffered here until they can be issued to 100 * the TLB, at which point they are copied to the 101 * issuedTranslationsTable hash map. 102 * 103 * In terms of coalescing, we coalesce requests in a given 104 * window of x cycles by using tick_index = issueTime/x as a 105 * key, where x = coalescingWindow. issueTime is the issueTime 106 * of the pkt from the ComputeUnit's perspective, but another 107 * option is to change it to curTick(), so we coalesce based 108 * on the receive time. 109 */ 110 typedef std::unordered_map<int64_t, std::vector<coalescedReq>> CoalescingFIFO; 111 112 CoalescingFIFO coalescerFIFO; 113 114 /* 115 * issuedTranslationsTabler: a hash_map indexed by virtual page 116 * address. Each hash_map entry has a vector of PacketPtr associated 117 * with it denoting the different packets that share an outstanding 118 * coalesced translation request for the same virtual page. 119 * 120 * The rules that determine which requests we can coalesce are 121 * specified in the canCoalesce() method. 122 */ 123 typedef std::unordered_map<Addr, coalescedReq> CoalescingTable; 124 125 CoalescingTable issuedTranslationsTable; 126 127 // number of packets the coalescer receives 128 Stats::Scalar uncoalescedAccesses; 129 // number packets the coalescer send to the TLB 130 Stats::Scalar coalescedAccesses; 131 132 // Number of cycles the coalesced requests spend waiting in 133 // coalescerFIFO. For each packet the coalescer receives we take into 134 // account the number of all uncoalesced requests this pkt "represents" 135 Stats::Scalar queuingCycles; 136 137 // On average how much time a request from the 138 // uncoalescedAccesses that reaches the TLB 139 // spends waiting? 140 Stats::Scalar localqueuingCycles; 141 // localqueuingCycles/uncoalescedAccesses 142 Stats::Formula localLatency; 143 144 bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2); 145 void updatePhysAddresses(PacketPtr pkt); 146 void regStats() override; 147 148 // Clock related functions. Maps to-and-from 149 // Simulation ticks and object clocks. 150 Tick frequency() const { return SimClock::Frequency / clock; } 151 Tick ticks(int numCycles) const { return (Tick)clock * numCycles; } 152 Tick curCycle() const { return curTick() / clock; } 153 Tick tickToCycles(Tick val) const { return val / clock;} 154 155 class CpuSidePort : public SlavePort 156 { 157 public: 158 CpuSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer, 159 PortID _index) 160 : SlavePort(_name, tlb_coalescer), coalescer(tlb_coalescer), 161 index(_index) { } 162 163 protected: 164 TLBCoalescer *coalescer; 165 int index; 166 167 virtual bool recvTimingReq(PacketPtr pkt); 168 virtual Tick recvAtomic(PacketPtr pkt) { return 0; } 169 virtual void recvFunctional(PacketPtr pkt); 170 virtual void recvRangeChange() { } 171 virtual void recvReqRetry(); 172 173 virtual void 174 recvRespRetry() 175 { 176 fatal("recvRespRetry() is not implemented in the TLB coalescer.\n"); 177 } 178 179 virtual AddrRangeList getAddrRanges() const; 180 }; 181 182 class MemSidePort : public MasterPort 183 { 184 public: 185 MemSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer, 186 PortID _index) 187 : MasterPort(_name, tlb_coalescer), coalescer(tlb_coalescer), 188 index(_index) { } 189 190 std::deque<PacketPtr> retries; 191 192 protected: 193 TLBCoalescer *coalescer; 194 int index; 195 196 virtual bool recvTimingResp(PacketPtr pkt); 197 virtual Tick recvAtomic(PacketPtr pkt) { return 0; } 198 virtual void recvFunctional(PacketPtr pkt); 199 virtual void recvRangeChange() { } 200 virtual void recvReqRetry(); 201 202 virtual void 203 recvRespRetry() 204 { 205 fatal("recvRespRetry() not implemented in TLB coalescer"); 206 } 207 }; 208 209 // Coalescer slave ports on the cpu Side 210 std::vector<CpuSidePort*> cpuSidePort; 211 // Coalescer master ports on the memory side 212 std::vector<MemSidePort*> memSidePort; 213 214 Port &getPort(const std::string &if_name, 215 PortID idx=InvalidPortID) override; 216 217 void processProbeTLBEvent(); 218 /// This event issues the TLB probes 219 EventFunctionWrapper probeTLBEvent; 220 221 void processCleanupEvent(); 222 /// The cleanupEvent is scheduled after a TLBEvent triggers 223 /// in order to free memory and do the required clean-up 224 EventFunctionWrapper cleanupEvent; 225 226 // this FIFO queue keeps track of the virt. page 227 // addresses that are pending cleanup 228 std::queue<Addr> cleanupQueue; 229}; 230 231#endif // __TLB_COALESCER_HH__ 232