Cross Reference: /gem5/src/cpu/o3/lsq

Deleted Added

sdiff udiff text old ( 2674:6d4afef73a20 ) new ( 2678:1f86b91dc3bb )

full compact

lsq_unit.hh (2674:6d4afef73a20)	lsq_unit.hh (2678:1f86b91dc3bb)
1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 / 28 29#ifndef __CPU_O3_LSQ_UNIT_HH__ 30#define __CPU_O3_LSQ_UNIT_HH__ 31 32#include <algorithm> 33#include <map> 34#include <queue> 35 36#include "arch/faults.hh" 37#include "config/full_system.hh" 38#include "base/hashmap.hh" 39#include "cpu/inst_seq.hh" 40#include "mem/packet.hh" 41#include "mem/port.hh" 42//#include "mem/page_table.hh" 43//#include "sim/debug.hh" 44//#include "sim/sim_object.hh" 45 46/* 47 * Class that implements the actual LQ and SQ for each specific 48 * thread. Both are circular queues; load entries are freed upon 49 * committing, while store entries are freed once they writeback. The 50 * LSQUnit tracks if there are memory ordering violations, and also 51 * detects partial load to store forwarding cases (a store only has 52 * part of a load's data) that requires the load to wait until the 53 * store writes back. In the former case it holds onto the instruction 54 * until the dependence unit looks at it, and in the latter it stalls 55 * the LSQ until the store writes back. At that point the load is 56 * replayed. 57 / 58template <class Impl> 59class LSQUnit { 60 protected: 61 typedef TheISA::IntReg IntReg; 62 public: 63 typedef typename Impl::Params Params; 64 typedef typename Impl::FullCPU FullCPU; 65 typedef typename Impl::DynInstPtr DynInstPtr; 66 typedef typename Impl::CPUPol::IEW IEW; 67 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 68 69 public: 70 /* Constructs an LSQ unit. init() must be called prior to use. / 71 LSQUnit(); 72 73 /* Initializes the LSQ unit with the specified number of entries. / 74 void init(Params params, unsigned maxLQEntries, 75 unsigned maxSQEntries, unsigned id); 76 77 /** Returns the name of the LSQ unit. / 78 std::string name() const; 79 80 /* Sets the CPU pointer. / 81 void setCPU(FullCPU cpu_ptr); 82 83 /** Sets the IEW stage pointer. / 84 void setIEW(IEW iew_ptr) 85 { iewStage = iew_ptr; } 86 87 /** Sets the page table pointer. / 88// void setPageTable(PageTable pt_ptr); 89 90 /** Switches out LSQ unit. / 91 void switchOut(); 92 93 /* Takes over from another CPU's thread. / 94 void takeOverFrom(); 95 96 /* Returns if the LSQ is switched out. / 97 bool isSwitchedOut() { return switchedOut; } 98 99 /* Ticks the LSQ unit, which in this case only resets the number of 100 * used cache ports. 101 * @todo: Move the number of used ports up to the LSQ level so it can 102 * be shared by all LSQ units. 103 / 104* void tick() { usedPorts = 0; } 105 106 /** Inserts an instruction. / 107* void insert(DynInstPtr &inst); 108 /** Inserts a load instruction. / 109* void insertLoad(DynInstPtr &load_inst); 110 /** Inserts a store instruction. / 111* void insertStore(DynInstPtr &store_inst); 112 113 /** Executes a load instruction. / 114* Fault executeLoad(DynInstPtr &inst); 115 116 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } 117 /** Executes a store instruction. / 118* Fault executeStore(DynInstPtr &inst); 119 120 /** Commits the head load. / 121* void commitLoad(); 122 /** Commits loads older than a specific sequence number. / 123* void commitLoads(InstSeqNum &youngest_inst); 124 125 /** Commits stores older than a specific sequence number. / 126* void commitStores(InstSeqNum &youngest_inst); 127 128 /** Writes back stores. / 129* void writebackStores(); 130 131 void completeDataAccess(PacketPtr pkt); 132	1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 / 28 29#ifndef __CPU_O3_LSQ_UNIT_HH__ 30#define __CPU_O3_LSQ_UNIT_HH__ 31 32#include <algorithm> 33#include <map> 34#include <queue> 35 36#include "arch/faults.hh" 37#include "config/full_system.hh" 38#include "base/hashmap.hh" 39#include "cpu/inst_seq.hh" 40#include "mem/packet.hh" 41#include "mem/port.hh" 42//#include "mem/page_table.hh" 43//#include "sim/debug.hh" 44//#include "sim/sim_object.hh" 45 46/* 47 * Class that implements the actual LQ and SQ for each specific 48 * thread. Both are circular queues; load entries are freed upon 49 * committing, while store entries are freed once they writeback. The 50 * LSQUnit tracks if there are memory ordering violations, and also 51 * detects partial load to store forwarding cases (a store only has 52 * part of a load's data) that requires the load to wait until the 53 * store writes back. In the former case it holds onto the instruction 54 * until the dependence unit looks at it, and in the latter it stalls 55 * the LSQ until the store writes back. At that point the load is 56 * replayed. 57 / 58template <class Impl> 59class LSQUnit { 60 protected: 61 typedef TheISA::IntReg IntReg; 62 public: 63 typedef typename Impl::Params Params; 64 typedef typename Impl::FullCPU FullCPU; 65 typedef typename Impl::DynInstPtr DynInstPtr; 66 typedef typename Impl::CPUPol::IEW IEW; 67 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 68 69 public: 70 /* Constructs an LSQ unit. init() must be called prior to use. / 71 LSQUnit(); 72 73 /* Initializes the LSQ unit with the specified number of entries. / 74 void init(Params params, unsigned maxLQEntries, 75 unsigned maxSQEntries, unsigned id); 76 77 /** Returns the name of the LSQ unit. / 78 std::string name() const; 79 80 /* Sets the CPU pointer. / 81 void setCPU(FullCPU cpu_ptr); 82 83 /** Sets the IEW stage pointer. / 84 void setIEW(IEW iew_ptr) 85 { iewStage = iew_ptr; } 86 87 /** Sets the page table pointer. / 88// void setPageTable(PageTable pt_ptr); 89 90 /** Switches out LSQ unit. / 91 void switchOut(); 92 93 /* Takes over from another CPU's thread. / 94 void takeOverFrom(); 95 96 /* Returns if the LSQ is switched out. / 97 bool isSwitchedOut() { return switchedOut; } 98 99 /* Ticks the LSQ unit, which in this case only resets the number of 100 * used cache ports. 101 * @todo: Move the number of used ports up to the LSQ level so it can 102 * be shared by all LSQ units. 103 / 104* void tick() { usedPorts = 0; } 105 106 /** Inserts an instruction. / 107* void insert(DynInstPtr &inst); 108 /** Inserts a load instruction. / 109* void insertLoad(DynInstPtr &load_inst); 110 /** Inserts a store instruction. / 111* void insertStore(DynInstPtr &store_inst); 112 113 /** Executes a load instruction. / 114* Fault executeLoad(DynInstPtr &inst); 115 116 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } 117 /** Executes a store instruction. / 118* Fault executeStore(DynInstPtr &inst); 119 120 /** Commits the head load. / 121* void commitLoad(); 122 /** Commits loads older than a specific sequence number. / 123* void commitLoads(InstSeqNum &youngest_inst); 124 125 /** Commits stores older than a specific sequence number. / 126* void commitStores(InstSeqNum &youngest_inst); 127 128 /** Writes back stores. / 129* void writebackStores(); 130 131 void completeDataAccess(PacketPtr pkt); 132
133 void completeStoreDataAccess(DynInstPtr &inst); 134
135 // @todo: Include stats in the LSQ unit. 136 //void regStats(); 137 138 /** Clears all the entries in the LQ. / 139* void clearLQ(); 140 141 /** Clears all the entries in the SQ. / 142* void clearSQ(); 143 144 /** Resizes the LQ to a given size. / 145* void resizeLQ(unsigned size); 146 147 /** Resizes the SQ to a given size. / 148* void resizeSQ(unsigned size); 149 150 /** Squashes all instructions younger than a specific sequence number. / 151* void squash(const InstSeqNum &squashed_num); 152 153 /** Returns if there is a memory ordering violation. Value is reset upon 154 * call to getMemDepViolator(). 155 / 156* bool violation() { return memDepViolator; } 157 158 /** Returns the memory ordering violator. / 159* DynInstPtr getMemDepViolator(); 160 161 /** Returns if a load became blocked due to the memory system. / 162* bool loadBlocked() 163 { return isLoadBlocked; } 164 165 /** Clears the signal that a load became blocked. / 166* void clearLoadBlocked() 167 { isLoadBlocked = false; } 168 169 /** Returns if the blocked load was handled. / 170* bool isLoadBlockedHandled() 171 { return loadBlockedHandled; } 172 173 /** Records the blocked load as being handled. / 174* void setLoadBlockedHandled() 175 { loadBlockedHandled = true; } 176 177 /** Returns the number of free entries (min of free LQ and SQ entries). / 178* unsigned numFreeEntries(); 179 180 /** Returns the number of loads ready to execute. / 181* int numLoadsReady(); 182 183 /** Returns the number of loads in the LQ. / 184* int numLoads() { return loads; } 185 186 /** Returns the number of stores in the SQ. / 187* int numStores() { return stores; } 188 189 /** Returns if either the LQ or SQ is full. / 190* bool isFull() { return lqFull() \|\| sqFull(); } 191 192 /** Returns if the LQ is full. / 193* bool lqFull() { return loads >= (LQEntries - 1); } 194 195 /** Returns if the SQ is full. / 196* bool sqFull() { return stores >= (SQEntries - 1); } 197 198 /** Returns the number of instructions in the LSQ. / 199* unsigned getCount() { return loads + stores; } 200 201 /** Returns if there are any stores to writeback. / 202* bool hasStoresToWB() { return storesToWB; } 203 204 /** Returns the number of stores to writeback. / 205* int numStoresToWB() { return storesToWB; } 206 207 /** Returns if the LSQ unit will writeback on this cycle. / 208* bool willWB() { return storeQueue[storeWBIdx].canWB &&	133 // @todo: Include stats in the LSQ unit. 134 //void regStats(); 135 136 /** Clears all the entries in the LQ. / 137* void clearLQ(); 138 139 /** Clears all the entries in the SQ. / 140* void clearSQ(); 141 142 /** Resizes the LQ to a given size. / 143* void resizeLQ(unsigned size); 144 145 /** Resizes the SQ to a given size. / 146* void resizeSQ(unsigned size); 147 148 /** Squashes all instructions younger than a specific sequence number. / 149* void squash(const InstSeqNum &squashed_num); 150 151 /** Returns if there is a memory ordering violation. Value is reset upon 152 * call to getMemDepViolator(). 153 / 154* bool violation() { return memDepViolator; } 155 156 /** Returns the memory ordering violator. / 157* DynInstPtr getMemDepViolator(); 158 159 /** Returns if a load became blocked due to the memory system. / 160* bool loadBlocked() 161 { return isLoadBlocked; } 162 163 /** Clears the signal that a load became blocked. / 164* void clearLoadBlocked() 165 { isLoadBlocked = false; } 166 167 /** Returns if the blocked load was handled. / 168* bool isLoadBlockedHandled() 169 { return loadBlockedHandled; } 170 171 /** Records the blocked load as being handled. / 172* void setLoadBlockedHandled() 173 { loadBlockedHandled = true; } 174 175 /** Returns the number of free entries (min of free LQ and SQ entries). / 176* unsigned numFreeEntries(); 177 178 /** Returns the number of loads ready to execute. / 179* int numLoadsReady(); 180 181 /** Returns the number of loads in the LQ. / 182* int numLoads() { return loads; } 183 184 /** Returns the number of stores in the SQ. / 185* int numStores() { return stores; } 186 187 /** Returns if either the LQ or SQ is full. / 188* bool isFull() { return lqFull() \|\| sqFull(); } 189 190 /** Returns if the LQ is full. / 191* bool lqFull() { return loads >= (LQEntries - 1); } 192 193 /** Returns if the SQ is full. / 194* bool sqFull() { return stores >= (SQEntries - 1); } 195 196 /** Returns the number of instructions in the LSQ. / 197* unsigned getCount() { return loads + stores; } 198 199 /** Returns if there are any stores to writeback. / 200* bool hasStoresToWB() { return storesToWB; } 201 202 /** Returns the number of stores to writeback. / 203* int numStoresToWB() { return storesToWB; } 204 205 /** Returns if the LSQ unit will writeback on this cycle. / 206* bool willWB() { return storeQueue[storeWBIdx].canWB &&
209 !storeQueue[storeWBIdx].completed/* && 210 !dcacheInterface->isBlocked()*/; }	207 !storeQueue[storeWBIdx].completed && 208 !isStoreBlocked; }
211 212 private:	209 210 private:
	211 void writeback(DynInstPtr &inst, PacketPtr pkt); 212
213 /** Completes the store at the specified index. / 214* void completeStore(int store_idx); 215 216 /** Increments the given store index (circular queue). / 217* inline void incrStIdx(int &store_idx); 218 /** Decrements the given store index (circular queue). / 219* inline void decrStIdx(int &store_idx); 220 /** Increments the given load index (circular queue). / 221* inline void incrLdIdx(int &load_idx); 222 /** Decrements the given load index (circular queue). / 223* inline void decrLdIdx(int &load_idx); 224 225 public: 226 /** Debugging function to dump instructions in the LSQ. / 227* void dumpInsts(); 228 229 private: 230 /** Pointer to the CPU. / 231* FullCPU cpu; 232* 233 /** Pointer to the IEW stage. / 234* IEW iewStage; 235* 236 MemObject mem; 237* 238 class DcachePort : public Port 239 { 240 protected: 241 FullCPU cpu; 242* LSQUnit lsq; 243* 244 public: 245 DcachePort(FullCPU _cpu, LSQUnit _lsq) 246 : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) 247 { } 248 249 protected: 250 virtual Tick recvAtomic(PacketPtr pkt); 251 252 virtual void recvFunctional(PacketPtr pkt); 253 254 virtual void recvStatusChange(Status status); 255 256 virtual void getDeviceAddressRanges(AddrRangeList &resp, 257 AddrRangeList &snoop) 258 { resp.clear(); snoop.clear(); } 259 260 virtual bool recvTiming(PacketPtr pkt); 261 262 virtual void recvRetry(); 263 }; 264 265 /** Pointer to the D-cache. / 266* DcachePort dcachePort; 267*	213 /** Completes the store at the specified index. / 214* void completeStore(int store_idx); 215 216 /** Increments the given store index (circular queue). / 217* inline void incrStIdx(int &store_idx); 218 /** Decrements the given store index (circular queue). / 219* inline void decrStIdx(int &store_idx); 220 /** Increments the given load index (circular queue). / 221* inline void incrLdIdx(int &load_idx); 222 /** Decrements the given load index (circular queue). / 223* inline void decrLdIdx(int &load_idx); 224 225 public: 226 /** Debugging function to dump instructions in the LSQ. / 227* void dumpInsts(); 228 229 private: 230 /** Pointer to the CPU. / 231* FullCPU cpu; 232* 233 /** Pointer to the IEW stage. / 234* IEW iewStage; 235* 236 MemObject mem; 237* 238 class DcachePort : public Port 239 { 240 protected: 241 FullCPU cpu; 242* LSQUnit lsq; 243* 244 public: 245 DcachePort(FullCPU _cpu, LSQUnit _lsq) 246 : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) 247 { } 248 249 protected: 250 virtual Tick recvAtomic(PacketPtr pkt); 251 252 virtual void recvFunctional(PacketPtr pkt); 253 254 virtual void recvStatusChange(Status status); 255 256 virtual void getDeviceAddressRanges(AddrRangeList &resp, 257 AddrRangeList &snoop) 258 { resp.clear(); snoop.clear(); } 259 260 virtual bool recvTiming(PacketPtr pkt); 261 262 virtual void recvRetry(); 263 }; 264 265 /** Pointer to the D-cache. / 266* DcachePort dcachePort; 267*
	268 class LSQSenderState : public Packet::SenderState 269 { 270 public: 271 LSQSenderState() 272 : noWB(false) 273 { } 274 275// protected: 276 DynInstPtr inst; 277 bool isLoad; 278 int idx; 279 bool noWB; 280 }; 281
268 /** Pointer to the page table. / 269// PageTable pTable; 270	282 /** Pointer to the page table. / 283// PageTable pTable; 284
	285 class WritebackEvent : public Event { 286 public: 287 /** Constructs a writeback event. / 288* WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit lsq_ptr); 289* 290 /** Processes the writeback event. / 291* void process(); 292 293 /** Returns the description of this event. / 294* const char description(); 295* 296 private: 297 DynInstPtr inst; 298 299 PacketPtr pkt; 300 301 /** The pointer to the LSQ unit that issued the store. / 302* LSQUnit<Impl> lsqPtr; 303* }; 304
271 public: 272 struct SQEntry { 273 /** Constructs an empty store queue entry. / 274* SQEntry() 275 : inst(NULL), req(NULL), size(0), data(0), 276 canWB(0), committed(0), completed(0) 277 { } 278 279 /** Constructs a store queue entry for a given instruction. / 280* SQEntry(DynInstPtr &_inst) 281 : inst(_inst), req(NULL), size(0), data(0), 282 canWB(0), committed(0), completed(0) 283 { } 284 285 /** The store instruction. / 286* DynInstPtr inst; 287 /** The request for the store. / 288* RequestPtr req; 289 /** The size of the store. / 290* int size; 291 /** The store data. / 292* IntReg data; 293 /** Whether or not the store can writeback. / 294* bool canWB; 295 /** Whether or not the store is committed. / 296* bool committed; 297 /** Whether or not the store is completed. / 298* bool completed; 299 }; 300 301 private: 302 /** The LSQUnit thread id. / 303* unsigned lsqID; 304 305 /** The store queue. / 306* std::vector<SQEntry> storeQueue; 307 308 /** The load queue. / 309* std::vector<DynInstPtr> loadQueue; 310 311 /** The number of LQ entries, plus a sentinel entry (circular queue). 312 * @todo: Consider having var that records the true number of LQ entries. 313 / 314* unsigned LQEntries; 315 /** The number of SQ entries, plus a sentinel entry (circular queue). 316 * @todo: Consider having var that records the true number of SQ entries. 317 / 318* unsigned SQEntries; 319 320 /** The number of load instructions in the LQ. / 321* int loads; 322 /** The number of store instructions in the SQ. / 323* int stores; 324 /** The number of store instructions in the SQ waiting to writeback. / 325* int storesToWB; 326 327 /** The index of the head instruction in the LQ. / 328* int loadHead; 329 /** The index of the tail instruction in the LQ. / 330* int loadTail; 331 332 /** The index of the head instruction in the SQ. / 333* int storeHead; 334 /** The index of the first instruction that may be ready to be 335 * written back, and has not yet been written back. 336 / 337* int storeWBIdx; 338 /** The index of the tail instruction in the SQ. / 339* int storeTail; 340 341 /// @todo Consider moving to a more advanced model with write vs read ports 342 /** The number of cache ports available each cycle. / 343* int cachePorts; 344 345 /** The number of used cache ports in this cycle. / 346* int usedPorts; 347 348 /** Is the LSQ switched out. / 349* bool switchedOut; 350 351 //list<InstSeqNum> mshrSeqNums; 352 353 /** Wire to read information from the issue stage time queue. / 354* typename TimeBuffer<IssueStruct>::wire fromIssue; 355 356 /** Whether or not the LSQ is stalled. / 357* bool stalled; 358 /** The store that causes the stall due to partial store to load 359 * forwarding. 360 / 361* InstSeqNum stallingStoreIsn; 362 /** The index of the above store. / 363* int stallingLoadIdx; 364	305 public: 306 struct SQEntry { 307 /** Constructs an empty store queue entry. / 308* SQEntry() 309 : inst(NULL), req(NULL), size(0), data(0), 310 canWB(0), committed(0), completed(0) 311 { } 312 313 /** Constructs a store queue entry for a given instruction. / 314* SQEntry(DynInstPtr &_inst) 315 : inst(_inst), req(NULL), size(0), data(0), 316 canWB(0), committed(0), completed(0) 317 { } 318 319 /** The store instruction. / 320* DynInstPtr inst; 321 /** The request for the store. / 322* RequestPtr req; 323 /** The size of the store. / 324* int size; 325 /** The store data. / 326* IntReg data; 327 /** Whether or not the store can writeback. / 328* bool canWB; 329 /** Whether or not the store is committed. / 330* bool committed; 331 /** Whether or not the store is completed. / 332* bool completed; 333 }; 334 335 private: 336 /** The LSQUnit thread id. / 337* unsigned lsqID; 338 339 /** The store queue. / 340* std::vector<SQEntry> storeQueue; 341 342 /** The load queue. / 343* std::vector<DynInstPtr> loadQueue; 344 345 /** The number of LQ entries, plus a sentinel entry (circular queue). 346 * @todo: Consider having var that records the true number of LQ entries. 347 / 348* unsigned LQEntries; 349 /** The number of SQ entries, plus a sentinel entry (circular queue). 350 * @todo: Consider having var that records the true number of SQ entries. 351 / 352* unsigned SQEntries; 353 354 /** The number of load instructions in the LQ. / 355* int loads; 356 /** The number of store instructions in the SQ. / 357* int stores; 358 /** The number of store instructions in the SQ waiting to writeback. / 359* int storesToWB; 360 361 /** The index of the head instruction in the LQ. / 362* int loadHead; 363 /** The index of the tail instruction in the LQ. / 364* int loadTail; 365 366 /** The index of the head instruction in the SQ. / 367* int storeHead; 368 /** The index of the first instruction that may be ready to be 369 * written back, and has not yet been written back. 370 / 371* int storeWBIdx; 372 /** The index of the tail instruction in the SQ. / 373* int storeTail; 374 375 /// @todo Consider moving to a more advanced model with write vs read ports 376 /** The number of cache ports available each cycle. / 377* int cachePorts; 378 379 /** The number of used cache ports in this cycle. / 380* int usedPorts; 381 382 /** Is the LSQ switched out. / 383* bool switchedOut; 384 385 //list<InstSeqNum> mshrSeqNums; 386 387 /** Wire to read information from the issue stage time queue. / 388* typename TimeBuffer<IssueStruct>::wire fromIssue; 389 390 /** Whether or not the LSQ is stalled. / 391* bool stalled; 392 /** The store that causes the stall due to partial store to load 393 * forwarding. 394 / 395* InstSeqNum stallingStoreIsn; 396 /** The index of the above store. / 397* int stallingLoadIdx; 398
	399 bool isStoreBlocked; 400
365 /** Whether or not a load is blocked due to the memory system. / 366* bool isLoadBlocked; 367 368 /** Has the blocked load been handled. / 369* bool loadBlockedHandled; 370 371 /** The sequence number of the blocked load. / 372* InstSeqNum blockedLoadSeqNum; 373 374 /** The oldest load that caused a memory ordering violation. / 375* DynInstPtr memDepViolator; 376 377 // Will also need how many read/write ports the Dcache has. Or keep track 378 // of that in stage that is one level up, and only call executeLoad/Store 379 // the appropriate number of times. 380/* 381 // total number of loads forwaded from LSQ stores 382 Stats::Vector<> lsq_forw_loads; 383 384 // total number of loads ignored due to invalid addresses 385 Stats::Vector<> inv_addr_loads; 386 387 // total number of software prefetches ignored due to invalid addresses 388 Stats::Vector<> inv_addr_swpfs; 389 390 // total non-speculative bogus addresses seen (debug var) 391 Counter sim_invalid_addrs; 392 Stats::Vector<> fu_busy; //cumulative fu busy 393 394 // ready loads blocked due to memory disambiguation 395 Stats::Vector<> lsq_blocked_loads; 396 397 Stats::Scalar<> lsqInversion; 398/ 399* public: 400 /** Executes the load at the given index. / 401* template <class T> 402 Fault read(Request req, T &data, int load_idx); 403* 404 /** Executes the store at the given index. / 405* template <class T> 406 Fault write(Request req, T &data, int store_idx); 407* 408 /** Returns the index of the head load instruction. / 409* int getLoadHead() { return loadHead; } 410 /** Returns the sequence number of the head load instruction. / 411* InstSeqNum getLoadHeadSeqNum() 412 { 413 if (loadQueue[loadHead]) { 414 return loadQueue[loadHead]->seqNum; 415 } else { 416 return 0; 417 } 418 419 } 420 421 /** Returns the index of the head store instruction. / 422* int getStoreHead() { return storeHead; } 423 /** Returns the sequence number of the head store instruction. / 424* InstSeqNum getStoreHeadSeqNum() 425 { 426 if (storeQueue[storeHead].inst) { 427 return storeQueue[storeHead].inst->seqNum; 428 } else { 429 return 0; 430 } 431 432 } 433 434 /** Returns whether or not the LSQ unit is stalled. / 435* bool isStalled() { return stalled; } 436}; 437 438template <class Impl> 439template <class T> 440Fault 441LSQUnit<Impl>::read(Request req, T &data, int load_idx) 442{ 443* DynInstPtr load_inst = loadQueue[load_idx]; 444 445 assert(load_inst); 446 447 assert(!load_inst->isExecuted()); 448 449 // Make sure this isn't an uncacheable access 450 // A bit of a hackish way to get uncached accesses to work only if they're 451 // at the head of the LSQ and are ready to commit (at the head of the ROB 452 // too). 453 if (req->getFlags() & UNCACHEABLE && 454 (load_idx != loadHead \|\| !load_inst->reachedCommit)) { 455 iewStage->rescheduleMemInst(load_inst); 456 return TheISA::genMachineCheckFault(); 457 } 458 459 // Check the SQ for any previous stores that might lead to forwarding 460 int store_idx = load_inst->sqIdx; 461 462 int store_size = 0; 463 464 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " 465 "storeHead: %i addr: %#x\n", 466 load_idx, store_idx, storeHead, req->getPaddr()); 467 468#if 0 469 if (req->getFlags() & LOCKED) { 470 cpu->lockAddr = req->getPaddr(); 471 cpu->lockFlag = true; 472 } 473#endif 474 475 while (store_idx != -1) { 476 // End once we've reached the top of the LSQ 477 if (store_idx == storeWBIdx) { 478 break; 479 } 480 481 // Move the index to one younger 482 if (--store_idx < 0) 483 store_idx += SQEntries; 484 485 assert(storeQueue[store_idx].inst); 486 487 store_size = storeQueue[store_idx].size; 488 489 if (store_size == 0) 490 continue; 491 492 // Check if the store data is within the lower and upper bounds of 493 // addresses that the request needs. 494 bool store_has_lower_limit = 495 req->getVaddr() >= storeQueue[store_idx].inst->effAddr; 496 bool store_has_upper_limit = 497 (req->getVaddr() + req->getSize()) <= 498 (storeQueue[store_idx].inst->effAddr + store_size); 499 bool lower_load_has_store_part = 500 req->getVaddr() < (storeQueue[store_idx].inst->effAddr + 501 store_size); 502 bool upper_load_has_store_part = 503 (req->getVaddr() + req->getSize()) > 504 storeQueue[store_idx].inst->effAddr; 505 506 // If the store's data has all of the data needed, we can forward. 507 if (store_has_lower_limit && store_has_upper_limit) { 508 // Get shift amount for offset into the store's data. 509 int shift_amt = req->getVaddr() & (store_size - 1); 510 // @todo: Magic number, assumes byte addressing 511 shift_amt = shift_amt << 3; 512 513 // Cast this to type T? 514 data = storeQueue[store_idx].data >> shift_amt; 515 516 assert(!load_inst->memData); 517 load_inst->memData = new uint8_t[64]; 518 519 memcpy(load_inst->memData, &data, req->getSize()); 520 521 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " 522 "addr %#x, data %#x\n", 523 store_idx, req->getVaddr(), *(load_inst->memData));	401 /** Whether or not a load is blocked due to the memory system. / 402* bool isLoadBlocked; 403 404 /** Has the blocked load been handled. / 405* bool loadBlockedHandled; 406 407 /** The sequence number of the blocked load. / 408* InstSeqNum blockedLoadSeqNum; 409 410 /** The oldest load that caused a memory ordering violation. / 411* DynInstPtr memDepViolator; 412 413 // Will also need how many read/write ports the Dcache has. Or keep track 414 // of that in stage that is one level up, and only call executeLoad/Store 415 // the appropriate number of times. 416/* 417 // total number of loads forwaded from LSQ stores 418 Stats::Vector<> lsq_forw_loads; 419 420 // total number of loads ignored due to invalid addresses 421 Stats::Vector<> inv_addr_loads; 422 423 // total number of software prefetches ignored due to invalid addresses 424 Stats::Vector<> inv_addr_swpfs; 425 426 // total non-speculative bogus addresses seen (debug var) 427 Counter sim_invalid_addrs; 428 Stats::Vector<> fu_busy; //cumulative fu busy 429 430 // ready loads blocked due to memory disambiguation 431 Stats::Vector<> lsq_blocked_loads; 432 433 Stats::Scalar<> lsqInversion; 434/ 435* public: 436 /** Executes the load at the given index. / 437* template <class T> 438 Fault read(Request req, T &data, int load_idx); 439* 440 /** Executes the store at the given index. / 441* template <class T> 442 Fault write(Request req, T &data, int store_idx); 443* 444 /** Returns the index of the head load instruction. / 445* int getLoadHead() { return loadHead; } 446 /** Returns the sequence number of the head load instruction. / 447* InstSeqNum getLoadHeadSeqNum() 448 { 449 if (loadQueue[loadHead]) { 450 return loadQueue[loadHead]->seqNum; 451 } else { 452 return 0; 453 } 454 455 } 456 457 /** Returns the index of the head store instruction. / 458* int getStoreHead() { return storeHead; } 459 /** Returns the sequence number of the head store instruction. / 460* InstSeqNum getStoreHeadSeqNum() 461 { 462 if (storeQueue[storeHead].inst) { 463 return storeQueue[storeHead].inst->seqNum; 464 } else { 465 return 0; 466 } 467 468 } 469 470 /** Returns whether or not the LSQ unit is stalled. / 471* bool isStalled() { return stalled; } 472}; 473 474template <class Impl> 475template <class T> 476Fault 477LSQUnit<Impl>::read(Request req, T &data, int load_idx) 478{ 479* DynInstPtr load_inst = loadQueue[load_idx]; 480 481 assert(load_inst); 482 483 assert(!load_inst->isExecuted()); 484 485 // Make sure this isn't an uncacheable access 486 // A bit of a hackish way to get uncached accesses to work only if they're 487 // at the head of the LSQ and are ready to commit (at the head of the ROB 488 // too). 489 if (req->getFlags() & UNCACHEABLE && 490 (load_idx != loadHead \|\| !load_inst->reachedCommit)) { 491 iewStage->rescheduleMemInst(load_inst); 492 return TheISA::genMachineCheckFault(); 493 } 494 495 // Check the SQ for any previous stores that might lead to forwarding 496 int store_idx = load_inst->sqIdx; 497 498 int store_size = 0; 499 500 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " 501 "storeHead: %i addr: %#x\n", 502 load_idx, store_idx, storeHead, req->getPaddr()); 503 504#if 0 505 if (req->getFlags() & LOCKED) { 506 cpu->lockAddr = req->getPaddr(); 507 cpu->lockFlag = true; 508 } 509#endif 510 511 while (store_idx != -1) { 512 // End once we've reached the top of the LSQ 513 if (store_idx == storeWBIdx) { 514 break; 515 } 516 517 // Move the index to one younger 518 if (--store_idx < 0) 519 store_idx += SQEntries; 520 521 assert(storeQueue[store_idx].inst); 522 523 store_size = storeQueue[store_idx].size; 524 525 if (store_size == 0) 526 continue; 527 528 // Check if the store data is within the lower and upper bounds of 529 // addresses that the request needs. 530 bool store_has_lower_limit = 531 req->getVaddr() >= storeQueue[store_idx].inst->effAddr; 532 bool store_has_upper_limit = 533 (req->getVaddr() + req->getSize()) <= 534 (storeQueue[store_idx].inst->effAddr + store_size); 535 bool lower_load_has_store_part = 536 req->getVaddr() < (storeQueue[store_idx].inst->effAddr + 537 store_size); 538 bool upper_load_has_store_part = 539 (req->getVaddr() + req->getSize()) > 540 storeQueue[store_idx].inst->effAddr; 541 542 // If the store's data has all of the data needed, we can forward. 543 if (store_has_lower_limit && store_has_upper_limit) { 544 // Get shift amount for offset into the store's data. 545 int shift_amt = req->getVaddr() & (store_size - 1); 546 // @todo: Magic number, assumes byte addressing 547 shift_amt = shift_amt << 3; 548 549 // Cast this to type T? 550 data = storeQueue[store_idx].data >> shift_amt; 551 552 assert(!load_inst->memData); 553 load_inst->memData = new uint8_t[64]; 554 555 memcpy(load_inst->memData, &data, req->getSize()); 556 557 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " 558 "addr %#x, data %#x\n", 559 store_idx, req->getVaddr(), *(load_inst->memData));
524/* 525 typename LdWritebackEvent wb = 526* new typename LdWritebackEvent(load_inst, 527 iewStage);
528	560
	561 PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); 562 data_pkt->dataStatic(load_inst->memData); 563 564 WritebackEvent wb = new WritebackEvent(load_inst, data_pkt, this); 565*
529 // We'll say this has a 1 cycle load-store forwarding latency 530 // for now. 531 // @todo: Need to make this a parameter. 532 wb->schedule(curTick);	566 // We'll say this has a 1 cycle load-store forwarding latency 567 // for now. 568 // @todo: Need to make this a parameter. 569 wb->schedule(curTick);
533*/	570
534 // Should keep track of stat for forwarded data 535 return NoFault; 536 } else if ((store_has_lower_limit && lower_load_has_store_part) \|\| 537 (store_has_upper_limit && upper_load_has_store_part) \|\| 538 (lower_load_has_store_part && upper_load_has_store_part)) { 539 // This is the partial store-load forwarding case where a store 540 // has only part of the load's data. 541 542 // If it's already been written back, then don't worry about 543 // stalling on it. 544 if (storeQueue[store_idx].completed) { 545 continue; 546 } 547 548 // Must stall load and force it to retry, so long as it's the oldest 549 // load that needs to do so. 550 if (!stalled \|\| 551 (stalled && 552 load_inst->seqNum < 553 loadQueue[stallingLoadIdx]->seqNum)) { 554 stalled = true; 555 stallingStoreIsn = storeQueue[store_idx].inst->seqNum; 556 stallingLoadIdx = load_idx; 557 } 558 559 // Tell IQ/mem dep unit that this instruction will need to be 560 // rescheduled eventually 561 iewStage->rescheduleMemInst(load_inst); 562 563 // Do not generate a writeback event as this instruction is not 564 // complete. 565 DPRINTF(LSQUnit, "Load-store forwarding mis-match. " 566 "Store idx %i to load addr %#x\n", 567 store_idx, req->getVaddr()); 568 569 return NoFault; 570 } 571 } 572 573 // If there's no forwarding case, then go access memory 574 DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", 575 load_inst->seqNum, load_inst->readPC()); 576 577 assert(!load_inst->memData); 578 load_inst->memData = new uint8_t[64]; 579 580 ++usedPorts; 581 582 DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", 583 load_inst->readPC()); 584 585 PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); 586 data_pkt->dataStatic(load_inst->memData); 587	571 // Should keep track of stat for forwarded data 572 return NoFault; 573 } else if ((store_has_lower_limit && lower_load_has_store_part) \|\| 574 (store_has_upper_limit && upper_load_has_store_part) \|\| 575 (lower_load_has_store_part && upper_load_has_store_part)) { 576 // This is the partial store-load forwarding case where a store 577 // has only part of the load's data. 578 579 // If it's already been written back, then don't worry about 580 // stalling on it. 581 if (storeQueue[store_idx].completed) { 582 continue; 583 } 584 585 // Must stall load and force it to retry, so long as it's the oldest 586 // load that needs to do so. 587 if (!stalled \|\| 588 (stalled && 589 load_inst->seqNum < 590 loadQueue[stallingLoadIdx]->seqNum)) { 591 stalled = true; 592 stallingStoreIsn = storeQueue[store_idx].inst->seqNum; 593 stallingLoadIdx = load_idx; 594 } 595 596 // Tell IQ/mem dep unit that this instruction will need to be 597 // rescheduled eventually 598 iewStage->rescheduleMemInst(load_inst); 599 600 // Do not generate a writeback event as this instruction is not 601 // complete. 602 DPRINTF(LSQUnit, "Load-store forwarding mis-match. " 603 "Store idx %i to load addr %#x\n", 604 store_idx, req->getVaddr()); 605 606 return NoFault; 607 } 608 } 609 610 // If there's no forwarding case, then go access memory 611 DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", 612 load_inst->seqNum, load_inst->readPC()); 613 614 assert(!load_inst->memData); 615 load_inst->memData = new uint8_t[64]; 616 617 ++usedPorts; 618 619 DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", 620 load_inst->readPC()); 621 622 PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); 623 data_pkt->dataStatic(load_inst->memData); 624
	625 LSQSenderState state = new LSQSenderState; 626* state->isLoad = true; 627 state->idx = load_idx; 628 state->inst = load_inst; 629 data_pkt->senderState = state; 630
588 // if we have a cache, do cache access too 589 if (!dcachePort->sendTiming(data_pkt)) { 590 // There's an older load that's already going to squash. 591 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) 592 return NoFault; 593 594 // Record that the load was blocked due to memory. This 595 // load will squash all instructions after it, be 596 // refetched, and re-executed. 597 isLoadBlocked = true; 598 loadBlockedHandled = false; 599 blockedLoadSeqNum = load_inst->seqNum; 600 // No fault occurred, even though the interface is blocked. 601 return NoFault; 602 } 603 604 if (data_pkt->result != Packet::Success) { 605 DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); 606 DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", 607 load_inst->seqNum); 608 } else { 609 DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); 610 DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", 611 load_inst->seqNum); 612 } 613 614 return NoFault; 615} 616 617template <class Impl> 618template <class T> 619Fault 620LSQUnit<Impl>::write(Request req, T &data, int store_idx) 621{ 622* assert(storeQueue[store_idx].inst); 623 624 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" 625 " \| storeHead:%i [sn:%i]\n", 626 store_idx, req->getPaddr(), data, storeHead, 627 storeQueue[store_idx].inst->seqNum); 628 629 storeQueue[store_idx].req = req; 630 storeQueue[store_idx].size = sizeof(T); 631 storeQueue[store_idx].data = data; 632 633 // This function only writes the data to the store queue, so no fault 634 // can happen here. 635 return NoFault; 636} 637 638#endif // __CPU_O3_LSQ_UNIT_HH__	631 // if we have a cache, do cache access too 632 if (!dcachePort->sendTiming(data_pkt)) { 633 // There's an older load that's already going to squash. 634 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) 635 return NoFault; 636 637 // Record that the load was blocked due to memory. This 638 // load will squash all instructions after it, be 639 // refetched, and re-executed. 640 isLoadBlocked = true; 641 loadBlockedHandled = false; 642 blockedLoadSeqNum = load_inst->seqNum; 643 // No fault occurred, even though the interface is blocked. 644 return NoFault; 645 } 646 647 if (data_pkt->result != Packet::Success) { 648 DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); 649 DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", 650 load_inst->seqNum); 651 } else { 652 DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); 653 DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", 654 load_inst->seqNum); 655 } 656 657 return NoFault; 658} 659 660template <class Impl> 661template <class T> 662Fault 663LSQUnit<Impl>::write(Request req, T &data, int store_idx) 664{ 665* assert(storeQueue[store_idx].inst); 666 667 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" 668 " \| storeHead:%i [sn:%i]\n", 669 store_idx, req->getPaddr(), data, storeHead, 670 storeQueue[store_idx].inst->seqNum); 671 672 storeQueue[store_idx].req = req; 673 storeQueue[store_idx].size = sizeof(T); 674 storeQueue[store_idx].data = data; 675 676 // This function only writes the data to the store queue, so no fault 677 // can happen here. 678 return NoFault; 679} 680 681#endif // __CPU_O3_LSQ_UNIT_HH__