Cross Reference: /gem5/src/cpu/o3/lsq

Deleted Added

sdiff udiff text old ( 2689:dbf969c18a65 ) new ( 2693:18c6be231eb1 )

full compact

lsq_unit.hh (2689:dbf969c18a65)	lsq_unit.hh (2693:18c6be231eb1)
1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Kevin Lim 29 * Korey Sewell 30 / 31 32#ifndef __CPU_O3_LSQ_UNIT_HH__ 33#define __CPU_O3_LSQ_UNIT_HH__ 34 35#include <algorithm> 36#include <map> 37#include <queue> 38 39#include "arch/faults.hh" 40#include "config/full_system.hh" 41#include "base/hashmap.hh" 42#include "cpu/inst_seq.hh" 43#include "mem/packet.hh" 44#include "mem/port.hh" 45//#include "mem/page_table.hh" 46//#include "sim/debug.hh" 47//#include "sim/sim_object.hh" 48 49/* 50 * Class that implements the actual LQ and SQ for each specific 51 * thread. Both are circular queues; load entries are freed upon 52 * committing, while store entries are freed once they writeback. The 53 * LSQUnit tracks if there are memory ordering violations, and also 54 * detects partial load to store forwarding cases (a store only has 55 * part of a load's data) that requires the load to wait until the 56 * store writes back. In the former case it holds onto the instruction 57 * until the dependence unit looks at it, and in the latter it stalls 58 * the LSQ until the store writes back. At that point the load is 59 * replayed. 60 / 61template <class Impl> 62class LSQUnit { 63 protected: 64 typedef TheISA::IntReg IntReg; 65 public: 66 typedef typename Impl::Params Params; 67 typedef typename Impl::FullCPU FullCPU; 68 typedef typename Impl::DynInstPtr DynInstPtr; 69 typedef typename Impl::CPUPol::IEW IEW; 70 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 71 72 public: 73 /* Constructs an LSQ unit. init() must be called prior to use. / 74 LSQUnit(); 75 76 /* Initializes the LSQ unit with the specified number of entries. / 77 void init(Params params, unsigned maxLQEntries, 78 unsigned maxSQEntries, unsigned id); 79 80 /** Returns the name of the LSQ unit. / 81 std::string name() const; 82 83 /* Sets the CPU pointer. / 84 void setCPU(FullCPU cpu_ptr); 85 86 /** Sets the IEW stage pointer. / 87 void setIEW(IEW iew_ptr) 88 { iewStage = iew_ptr; } 89 90 /** Sets the page table pointer. / 91// void setPageTable(PageTable pt_ptr); 92 93 /** Switches out LSQ unit. / 94 void switchOut(); 95 96 /* Takes over from another CPU's thread. / 97 void takeOverFrom(); 98 99 /* Returns if the LSQ is switched out. / 100* bool isSwitchedOut() { return switchedOut; } 101 102 /** Ticks the LSQ unit, which in this case only resets the number of 103 * used cache ports. 104 * @todo: Move the number of used ports up to the LSQ level so it can 105 * be shared by all LSQ units. 106 / 107* void tick() { usedPorts = 0; } 108 109 /** Inserts an instruction. / 110* void insert(DynInstPtr &inst); 111 /** Inserts a load instruction. / 112* void insertLoad(DynInstPtr &load_inst); 113 /** Inserts a store instruction. / 114* void insertStore(DynInstPtr &store_inst); 115 116 /** Executes a load instruction. / 117* Fault executeLoad(DynInstPtr &inst); 118 119 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } 120 /** Executes a store instruction. / 121* Fault executeStore(DynInstPtr &inst); 122 123 /** Commits the head load. / 124* void commitLoad(); 125 /** Commits loads older than a specific sequence number. / 126* void commitLoads(InstSeqNum &youngest_inst); 127 128 /** Commits stores older than a specific sequence number. / 129* void commitStores(InstSeqNum &youngest_inst); 130 131 /** Writes back stores. / 132* void writebackStores(); 133 134 void completeDataAccess(PacketPtr pkt); 135 136 // @todo: Include stats in the LSQ unit. 137 //void regStats(); 138 139 /** Clears all the entries in the LQ. / 140* void clearLQ(); 141 142 /** Clears all the entries in the SQ. / 143* void clearSQ(); 144 145 /** Resizes the LQ to a given size. / 146* void resizeLQ(unsigned size); 147 148 /** Resizes the SQ to a given size. / 149* void resizeSQ(unsigned size); 150 151 /** Squashes all instructions younger than a specific sequence number. / 152* void squash(const InstSeqNum &squashed_num); 153 154 /** Returns if there is a memory ordering violation. Value is reset upon 155 * call to getMemDepViolator(). 156 / 157* bool violation() { return memDepViolator; } 158 159 /** Returns the memory ordering violator. / 160* DynInstPtr getMemDepViolator(); 161 162 /** Returns if a load became blocked due to the memory system. / 163* bool loadBlocked() 164 { return isLoadBlocked; } 165 166 /** Clears the signal that a load became blocked. / 167* void clearLoadBlocked() 168 { isLoadBlocked = false; } 169 170 /** Returns if the blocked load was handled. / 171* bool isLoadBlockedHandled() 172 { return loadBlockedHandled; } 173 174 /** Records the blocked load as being handled. / 175* void setLoadBlockedHandled() 176 { loadBlockedHandled = true; } 177 178 /** Returns the number of free entries (min of free LQ and SQ entries). / 179* unsigned numFreeEntries(); 180 181 /** Returns the number of loads ready to execute. / 182* int numLoadsReady(); 183 184 /** Returns the number of loads in the LQ. / 185* int numLoads() { return loads; } 186 187 /** Returns the number of stores in the SQ. / 188* int numStores() { return stores; } 189 190 /** Returns if either the LQ or SQ is full. / 191* bool isFull() { return lqFull() \|\| sqFull(); } 192 193 /** Returns if the LQ is full. / 194* bool lqFull() { return loads >= (LQEntries - 1); } 195 196 /** Returns if the SQ is full. / 197* bool sqFull() { return stores >= (SQEntries - 1); } 198 199 /** Returns the number of instructions in the LSQ. / 200* unsigned getCount() { return loads + stores; } 201 202 /** Returns if there are any stores to writeback. / 203* bool hasStoresToWB() { return storesToWB; } 204 205 /** Returns the number of stores to writeback. / 206* int numStoresToWB() { return storesToWB; } 207 208 /** Returns if the LSQ unit will writeback on this cycle. / 209* bool willWB() { return storeQueue[storeWBIdx].canWB && 210 !storeQueue[storeWBIdx].completed && 211 !isStoreBlocked; } 212 213 private: 214 void writeback(DynInstPtr &inst, PacketPtr pkt); 215	1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Kevin Lim 29 * Korey Sewell 30 / 31 32#ifndef __CPU_O3_LSQ_UNIT_HH__ 33#define __CPU_O3_LSQ_UNIT_HH__ 34 35#include <algorithm> 36#include <map> 37#include <queue> 38 39#include "arch/faults.hh" 40#include "config/full_system.hh" 41#include "base/hashmap.hh" 42#include "cpu/inst_seq.hh" 43#include "mem/packet.hh" 44#include "mem/port.hh" 45//#include "mem/page_table.hh" 46//#include "sim/debug.hh" 47//#include "sim/sim_object.hh" 48 49/* 50 * Class that implements the actual LQ and SQ for each specific 51 * thread. Both are circular queues; load entries are freed upon 52 * committing, while store entries are freed once they writeback. The 53 * LSQUnit tracks if there are memory ordering violations, and also 54 * detects partial load to store forwarding cases (a store only has 55 * part of a load's data) that requires the load to wait until the 56 * store writes back. In the former case it holds onto the instruction 57 * until the dependence unit looks at it, and in the latter it stalls 58 * the LSQ until the store writes back. At that point the load is 59 * replayed. 60 / 61template <class Impl> 62class LSQUnit { 63 protected: 64 typedef TheISA::IntReg IntReg; 65 public: 66 typedef typename Impl::Params Params; 67 typedef typename Impl::FullCPU FullCPU; 68 typedef typename Impl::DynInstPtr DynInstPtr; 69 typedef typename Impl::CPUPol::IEW IEW; 70 typedef typename Impl::CPUPol::IssueStruct IssueStruct; 71 72 public: 73 /* Constructs an LSQ unit. init() must be called prior to use. / 74 LSQUnit(); 75 76 /* Initializes the LSQ unit with the specified number of entries. / 77 void init(Params params, unsigned maxLQEntries, 78 unsigned maxSQEntries, unsigned id); 79 80 /** Returns the name of the LSQ unit. / 81 std::string name() const; 82 83 /* Sets the CPU pointer. / 84 void setCPU(FullCPU cpu_ptr); 85 86 /** Sets the IEW stage pointer. / 87 void setIEW(IEW iew_ptr) 88 { iewStage = iew_ptr; } 89 90 /** Sets the page table pointer. / 91// void setPageTable(PageTable pt_ptr); 92 93 /** Switches out LSQ unit. / 94 void switchOut(); 95 96 /* Takes over from another CPU's thread. / 97 void takeOverFrom(); 98 99 /* Returns if the LSQ is switched out. / 100* bool isSwitchedOut() { return switchedOut; } 101 102 /** Ticks the LSQ unit, which in this case only resets the number of 103 * used cache ports. 104 * @todo: Move the number of used ports up to the LSQ level so it can 105 * be shared by all LSQ units. 106 / 107* void tick() { usedPorts = 0; } 108 109 /** Inserts an instruction. / 110* void insert(DynInstPtr &inst); 111 /** Inserts a load instruction. / 112* void insertLoad(DynInstPtr &load_inst); 113 /** Inserts a store instruction. / 114* void insertStore(DynInstPtr &store_inst); 115 116 /** Executes a load instruction. / 117* Fault executeLoad(DynInstPtr &inst); 118 119 Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } 120 /** Executes a store instruction. / 121* Fault executeStore(DynInstPtr &inst); 122 123 /** Commits the head load. / 124* void commitLoad(); 125 /** Commits loads older than a specific sequence number. / 126* void commitLoads(InstSeqNum &youngest_inst); 127 128 /** Commits stores older than a specific sequence number. / 129* void commitStores(InstSeqNum &youngest_inst); 130 131 /** Writes back stores. / 132* void writebackStores(); 133 134 void completeDataAccess(PacketPtr pkt); 135 136 // @todo: Include stats in the LSQ unit. 137 //void regStats(); 138 139 /** Clears all the entries in the LQ. / 140* void clearLQ(); 141 142 /** Clears all the entries in the SQ. / 143* void clearSQ(); 144 145 /** Resizes the LQ to a given size. / 146* void resizeLQ(unsigned size); 147 148 /** Resizes the SQ to a given size. / 149* void resizeSQ(unsigned size); 150 151 /** Squashes all instructions younger than a specific sequence number. / 152* void squash(const InstSeqNum &squashed_num); 153 154 /** Returns if there is a memory ordering violation. Value is reset upon 155 * call to getMemDepViolator(). 156 / 157* bool violation() { return memDepViolator; } 158 159 /** Returns the memory ordering violator. / 160* DynInstPtr getMemDepViolator(); 161 162 /** Returns if a load became blocked due to the memory system. / 163* bool loadBlocked() 164 { return isLoadBlocked; } 165 166 /** Clears the signal that a load became blocked. / 167* void clearLoadBlocked() 168 { isLoadBlocked = false; } 169 170 /** Returns if the blocked load was handled. / 171* bool isLoadBlockedHandled() 172 { return loadBlockedHandled; } 173 174 /** Records the blocked load as being handled. / 175* void setLoadBlockedHandled() 176 { loadBlockedHandled = true; } 177 178 /** Returns the number of free entries (min of free LQ and SQ entries). / 179* unsigned numFreeEntries(); 180 181 /** Returns the number of loads ready to execute. / 182* int numLoadsReady(); 183 184 /** Returns the number of loads in the LQ. / 185* int numLoads() { return loads; } 186 187 /** Returns the number of stores in the SQ. / 188* int numStores() { return stores; } 189 190 /** Returns if either the LQ or SQ is full. / 191* bool isFull() { return lqFull() \|\| sqFull(); } 192 193 /** Returns if the LQ is full. / 194* bool lqFull() { return loads >= (LQEntries - 1); } 195 196 /** Returns if the SQ is full. / 197* bool sqFull() { return stores >= (SQEntries - 1); } 198 199 /** Returns the number of instructions in the LSQ. / 200* unsigned getCount() { return loads + stores; } 201 202 /** Returns if there are any stores to writeback. / 203* bool hasStoresToWB() { return storesToWB; } 204 205 /** Returns the number of stores to writeback. / 206* int numStoresToWB() { return storesToWB; } 207 208 /** Returns if the LSQ unit will writeback on this cycle. / 209* bool willWB() { return storeQueue[storeWBIdx].canWB && 210 !storeQueue[storeWBIdx].completed && 211 !isStoreBlocked; } 212 213 private: 214 void writeback(DynInstPtr &inst, PacketPtr pkt); 215
	216 void storePostSend(Packet pkt); 217*
216 /** Completes the store at the specified index. / 217* void completeStore(int store_idx); 218	218 /** Completes the store at the specified index. / 219* void completeStore(int store_idx); 220
	221 /** Handles doing the retry. / 222* void recvRetry(); 223
219 /** Increments the given store index (circular queue). / 220* inline void incrStIdx(int &store_idx); 221 /** Decrements the given store index (circular queue). / 222* inline void decrStIdx(int &store_idx); 223 /** Increments the given load index (circular queue). / 224* inline void incrLdIdx(int &load_idx); 225 /** Decrements the given load index (circular queue). / 226* inline void decrLdIdx(int &load_idx); 227 228 public: 229 /** Debugging function to dump instructions in the LSQ. / 230* void dumpInsts(); 231 232 private: 233 /** Pointer to the CPU. / 234* FullCPU cpu; 235* 236 /** Pointer to the IEW stage. / 237* IEW iewStage; 238* 239 MemObject mem; 240* 241 class DcachePort : public Port 242 { 243 protected: 244 FullCPU cpu; 245* LSQUnit lsq; 246* 247 public: 248 DcachePort(FullCPU _cpu, LSQUnit _lsq) 249 : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) 250 { } 251 252 protected: 253 virtual Tick recvAtomic(PacketPtr pkt); 254 255 virtual void recvFunctional(PacketPtr pkt); 256 257 virtual void recvStatusChange(Status status); 258 259 virtual void getDeviceAddressRanges(AddrRangeList &resp, 260 AddrRangeList &snoop) 261 { resp.clear(); snoop.clear(); } 262 263 virtual bool recvTiming(PacketPtr pkt); 264 265 virtual void recvRetry(); 266 }; 267 268 /** Pointer to the D-cache. / 269* DcachePort dcachePort; 270* 271 class LSQSenderState : public Packet::SenderState 272 { 273 public: 274 LSQSenderState() 275 : noWB(false) 276 { } 277 278// protected: 279 DynInstPtr inst; 280 bool isLoad; 281 int idx; 282 bool noWB; 283 }; 284 285 /** Pointer to the page table. / 286// PageTable pTable; 287 288 class WritebackEvent : public Event { 289 public: 290 /** Constructs a writeback event. / 291* WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit lsq_ptr); 292* 293 /** Processes the writeback event. / 294* void process(); 295 296 /** Returns the description of this event. / 297* const char description(); 298* 299 private: 300 DynInstPtr inst; 301 302 PacketPtr pkt; 303 304 /** The pointer to the LSQ unit that issued the store. / 305* LSQUnit<Impl> lsqPtr; 306* }; 307 308 public: 309 struct SQEntry { 310 /** Constructs an empty store queue entry. / 311* SQEntry() 312 : inst(NULL), req(NULL), size(0), data(0), 313 canWB(0), committed(0), completed(0) 314 { } 315 316 /** Constructs a store queue entry for a given instruction. / 317* SQEntry(DynInstPtr &_inst) 318 : inst(_inst), req(NULL), size(0), data(0), 319 canWB(0), committed(0), completed(0) 320 { } 321 322 /** The store instruction. / 323* DynInstPtr inst; 324 /** The request for the store. / 325* RequestPtr req; 326 /** The size of the store. / 327* int size; 328 /** The store data. / 329* IntReg data; 330 /** Whether or not the store can writeback. / 331* bool canWB; 332 /** Whether or not the store is committed. / 333* bool committed; 334 /** Whether or not the store is completed. / 335* bool completed; 336 }; 337 338 private: 339 /** The LSQUnit thread id. / 340* unsigned lsqID; 341 342 /** The store queue. / 343* std::vector<SQEntry> storeQueue; 344 345 /** The load queue. / 346* std::vector<DynInstPtr> loadQueue; 347 348 /** The number of LQ entries, plus a sentinel entry (circular queue). 349 * @todo: Consider having var that records the true number of LQ entries. 350 / 351* unsigned LQEntries; 352 /** The number of SQ entries, plus a sentinel entry (circular queue). 353 * @todo: Consider having var that records the true number of SQ entries. 354 / 355* unsigned SQEntries; 356 357 /** The number of load instructions in the LQ. / 358* int loads; 359 /** The number of store instructions in the SQ. / 360* int stores; 361 /** The number of store instructions in the SQ waiting to writeback. / 362* int storesToWB; 363 364 /** The index of the head instruction in the LQ. / 365* int loadHead; 366 /** The index of the tail instruction in the LQ. / 367* int loadTail; 368 369 /** The index of the head instruction in the SQ. / 370* int storeHead; 371 /** The index of the first instruction that may be ready to be 372 * written back, and has not yet been written back. 373 / 374* int storeWBIdx; 375 /** The index of the tail instruction in the SQ. / 376* int storeTail; 377 378 /// @todo Consider moving to a more advanced model with write vs read ports 379 /** The number of cache ports available each cycle. / 380* int cachePorts; 381 382 /** The number of used cache ports in this cycle. / 383* int usedPorts; 384 385 /** Is the LSQ switched out. / 386* bool switchedOut; 387 388 //list<InstSeqNum> mshrSeqNums; 389 390 /** Wire to read information from the issue stage time queue. / 391* typename TimeBuffer<IssueStruct>::wire fromIssue; 392 393 /** Whether or not the LSQ is stalled. / 394* bool stalled; 395 /** The store that causes the stall due to partial store to load 396 * forwarding. 397 / 398* InstSeqNum stallingStoreIsn; 399 /** The index of the above store. / 400* int stallingLoadIdx; 401	224 /** Increments the given store index (circular queue). / 225* inline void incrStIdx(int &store_idx); 226 /** Decrements the given store index (circular queue). / 227* inline void decrStIdx(int &store_idx); 228 /** Increments the given load index (circular queue). / 229* inline void incrLdIdx(int &load_idx); 230 /** Decrements the given load index (circular queue). / 231* inline void decrLdIdx(int &load_idx); 232 233 public: 234 /** Debugging function to dump instructions in the LSQ. / 235* void dumpInsts(); 236 237 private: 238 /** Pointer to the CPU. / 239* FullCPU cpu; 240* 241 /** Pointer to the IEW stage. / 242* IEW iewStage; 243* 244 MemObject mem; 245* 246 class DcachePort : public Port 247 { 248 protected: 249 FullCPU cpu; 250* LSQUnit lsq; 251* 252 public: 253 DcachePort(FullCPU _cpu, LSQUnit _lsq) 254 : Port(_lsq->name() + "-dport"), cpu(_cpu), lsq(_lsq) 255 { } 256 257 protected: 258 virtual Tick recvAtomic(PacketPtr pkt); 259 260 virtual void recvFunctional(PacketPtr pkt); 261 262 virtual void recvStatusChange(Status status); 263 264 virtual void getDeviceAddressRanges(AddrRangeList &resp, 265 AddrRangeList &snoop) 266 { resp.clear(); snoop.clear(); } 267 268 virtual bool recvTiming(PacketPtr pkt); 269 270 virtual void recvRetry(); 271 }; 272 273 /** Pointer to the D-cache. / 274* DcachePort dcachePort; 275* 276 class LSQSenderState : public Packet::SenderState 277 { 278 public: 279 LSQSenderState() 280 : noWB(false) 281 { } 282 283// protected: 284 DynInstPtr inst; 285 bool isLoad; 286 int idx; 287 bool noWB; 288 }; 289 290 /** Pointer to the page table. / 291// PageTable pTable; 292 293 class WritebackEvent : public Event { 294 public: 295 /** Constructs a writeback event. / 296* WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit lsq_ptr); 297* 298 /** Processes the writeback event. / 299* void process(); 300 301 /** Returns the description of this event. / 302* const char description(); 303* 304 private: 305 DynInstPtr inst; 306 307 PacketPtr pkt; 308 309 /** The pointer to the LSQ unit that issued the store. / 310* LSQUnit<Impl> lsqPtr; 311* }; 312 313 public: 314 struct SQEntry { 315 /** Constructs an empty store queue entry. / 316* SQEntry() 317 : inst(NULL), req(NULL), size(0), data(0), 318 canWB(0), committed(0), completed(0) 319 { } 320 321 /** Constructs a store queue entry for a given instruction. / 322* SQEntry(DynInstPtr &_inst) 323 : inst(_inst), req(NULL), size(0), data(0), 324 canWB(0), committed(0), completed(0) 325 { } 326 327 /** The store instruction. / 328* DynInstPtr inst; 329 /** The request for the store. / 330* RequestPtr req; 331 /** The size of the store. / 332* int size; 333 /** The store data. / 334* IntReg data; 335 /** Whether or not the store can writeback. / 336* bool canWB; 337 /** Whether or not the store is committed. / 338* bool committed; 339 /** Whether or not the store is completed. / 340* bool completed; 341 }; 342 343 private: 344 /** The LSQUnit thread id. / 345* unsigned lsqID; 346 347 /** The store queue. / 348* std::vector<SQEntry> storeQueue; 349 350 /** The load queue. / 351* std::vector<DynInstPtr> loadQueue; 352 353 /** The number of LQ entries, plus a sentinel entry (circular queue). 354 * @todo: Consider having var that records the true number of LQ entries. 355 / 356* unsigned LQEntries; 357 /** The number of SQ entries, plus a sentinel entry (circular queue). 358 * @todo: Consider having var that records the true number of SQ entries. 359 / 360* unsigned SQEntries; 361 362 /** The number of load instructions in the LQ. / 363* int loads; 364 /** The number of store instructions in the SQ. / 365* int stores; 366 /** The number of store instructions in the SQ waiting to writeback. / 367* int storesToWB; 368 369 /** The index of the head instruction in the LQ. / 370* int loadHead; 371 /** The index of the tail instruction in the LQ. / 372* int loadTail; 373 374 /** The index of the head instruction in the SQ. / 375* int storeHead; 376 /** The index of the first instruction that may be ready to be 377 * written back, and has not yet been written back. 378 / 379* int storeWBIdx; 380 /** The index of the tail instruction in the SQ. / 381* int storeTail; 382 383 /// @todo Consider moving to a more advanced model with write vs read ports 384 /** The number of cache ports available each cycle. / 385* int cachePorts; 386 387 /** The number of used cache ports in this cycle. / 388* int usedPorts; 389 390 /** Is the LSQ switched out. / 391* bool switchedOut; 392 393 //list<InstSeqNum> mshrSeqNums; 394 395 /** Wire to read information from the issue stage time queue. / 396* typename TimeBuffer<IssueStruct>::wire fromIssue; 397 398 /** Whether or not the LSQ is stalled. / 399* bool stalled; 400 /** The store that causes the stall due to partial store to load 401 * forwarding. 402 / 403* InstSeqNum stallingStoreIsn; 404 /** The index of the above store. / 405* int stallingLoadIdx; 406
	407 PacketPtr sendingPkt; 408
402 bool isStoreBlocked; 403 404 /** Whether or not a load is blocked due to the memory system. / 405* bool isLoadBlocked; 406 407 /** Has the blocked load been handled. / 408* bool loadBlockedHandled; 409 410 /** The sequence number of the blocked load. / 411* InstSeqNum blockedLoadSeqNum; 412 413 /** The oldest load that caused a memory ordering violation. / 414* DynInstPtr memDepViolator; 415 416 // Will also need how many read/write ports the Dcache has. Or keep track 417 // of that in stage that is one level up, and only call executeLoad/Store 418 // the appropriate number of times. 419/* 420 // total number of loads forwaded from LSQ stores 421 Stats::Vector<> lsq_forw_loads; 422 423 // total number of loads ignored due to invalid addresses 424 Stats::Vector<> inv_addr_loads; 425 426 // total number of software prefetches ignored due to invalid addresses 427 Stats::Vector<> inv_addr_swpfs; 428 429 // total non-speculative bogus addresses seen (debug var) 430 Counter sim_invalid_addrs; 431 Stats::Vector<> fu_busy; //cumulative fu busy 432 433 // ready loads blocked due to memory disambiguation 434 Stats::Vector<> lsq_blocked_loads; 435 436 Stats::Scalar<> lsqInversion; 437/ 438* public: 439 /** Executes the load at the given index. / 440* template <class T> 441 Fault read(Request req, T &data, int load_idx); 442* 443 /** Executes the store at the given index. / 444* template <class T> 445 Fault write(Request req, T &data, int store_idx); 446* 447 /** Returns the index of the head load instruction. / 448* int getLoadHead() { return loadHead; } 449 /** Returns the sequence number of the head load instruction. / 450* InstSeqNum getLoadHeadSeqNum() 451 { 452 if (loadQueue[loadHead]) { 453 return loadQueue[loadHead]->seqNum; 454 } else { 455 return 0; 456 } 457 458 } 459 460 /** Returns the index of the head store instruction. / 461* int getStoreHead() { return storeHead; } 462 /** Returns the sequence number of the head store instruction. / 463* InstSeqNum getStoreHeadSeqNum() 464 { 465 if (storeQueue[storeHead].inst) { 466 return storeQueue[storeHead].inst->seqNum; 467 } else { 468 return 0; 469 } 470 471 } 472 473 /** Returns whether or not the LSQ unit is stalled. / 474* bool isStalled() { return stalled; } 475}; 476 477template <class Impl> 478template <class T> 479Fault 480LSQUnit<Impl>::read(Request req, T &data, int load_idx) 481{ 482* DynInstPtr load_inst = loadQueue[load_idx]; 483 484 assert(load_inst); 485 486 assert(!load_inst->isExecuted()); 487 488 // Make sure this isn't an uncacheable access 489 // A bit of a hackish way to get uncached accesses to work only if they're 490 // at the head of the LSQ and are ready to commit (at the head of the ROB 491 // too). 492 if (req->getFlags() & UNCACHEABLE && 493 (load_idx != loadHead \|\| !load_inst->reachedCommit)) { 494 iewStage->rescheduleMemInst(load_inst); 495 return TheISA::genMachineCheckFault(); 496 } 497 498 // Check the SQ for any previous stores that might lead to forwarding 499 int store_idx = load_inst->sqIdx; 500 501 int store_size = 0; 502 503 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " 504 "storeHead: %i addr: %#x\n", 505 load_idx, store_idx, storeHead, req->getPaddr()); 506	409 bool isStoreBlocked; 410 411 /** Whether or not a load is blocked due to the memory system. / 412* bool isLoadBlocked; 413 414 /** Has the blocked load been handled. / 415* bool loadBlockedHandled; 416 417 /** The sequence number of the blocked load. / 418* InstSeqNum blockedLoadSeqNum; 419 420 /** The oldest load that caused a memory ordering violation. / 421* DynInstPtr memDepViolator; 422 423 // Will also need how many read/write ports the Dcache has. Or keep track 424 // of that in stage that is one level up, and only call executeLoad/Store 425 // the appropriate number of times. 426/* 427 // total number of loads forwaded from LSQ stores 428 Stats::Vector<> lsq_forw_loads; 429 430 // total number of loads ignored due to invalid addresses 431 Stats::Vector<> inv_addr_loads; 432 433 // total number of software prefetches ignored due to invalid addresses 434 Stats::Vector<> inv_addr_swpfs; 435 436 // total non-speculative bogus addresses seen (debug var) 437 Counter sim_invalid_addrs; 438 Stats::Vector<> fu_busy; //cumulative fu busy 439 440 // ready loads blocked due to memory disambiguation 441 Stats::Vector<> lsq_blocked_loads; 442 443 Stats::Scalar<> lsqInversion; 444/ 445* public: 446 /** Executes the load at the given index. / 447* template <class T> 448 Fault read(Request req, T &data, int load_idx); 449* 450 /** Executes the store at the given index. / 451* template <class T> 452 Fault write(Request req, T &data, int store_idx); 453* 454 /** Returns the index of the head load instruction. / 455* int getLoadHead() { return loadHead; } 456 /** Returns the sequence number of the head load instruction. / 457* InstSeqNum getLoadHeadSeqNum() 458 { 459 if (loadQueue[loadHead]) { 460 return loadQueue[loadHead]->seqNum; 461 } else { 462 return 0; 463 } 464 465 } 466 467 /** Returns the index of the head store instruction. / 468* int getStoreHead() { return storeHead; } 469 /** Returns the sequence number of the head store instruction. / 470* InstSeqNum getStoreHeadSeqNum() 471 { 472 if (storeQueue[storeHead].inst) { 473 return storeQueue[storeHead].inst->seqNum; 474 } else { 475 return 0; 476 } 477 478 } 479 480 /** Returns whether or not the LSQ unit is stalled. / 481* bool isStalled() { return stalled; } 482}; 483 484template <class Impl> 485template <class T> 486Fault 487LSQUnit<Impl>::read(Request req, T &data, int load_idx) 488{ 489* DynInstPtr load_inst = loadQueue[load_idx]; 490 491 assert(load_inst); 492 493 assert(!load_inst->isExecuted()); 494 495 // Make sure this isn't an uncacheable access 496 // A bit of a hackish way to get uncached accesses to work only if they're 497 // at the head of the LSQ and are ready to commit (at the head of the ROB 498 // too). 499 if (req->getFlags() & UNCACHEABLE && 500 (load_idx != loadHead \|\| !load_inst->reachedCommit)) { 501 iewStage->rescheduleMemInst(load_inst); 502 return TheISA::genMachineCheckFault(); 503 } 504 505 // Check the SQ for any previous stores that might lead to forwarding 506 int store_idx = load_inst->sqIdx; 507 508 int store_size = 0; 509 510 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " 511 "storeHead: %i addr: %#x\n", 512 load_idx, store_idx, storeHead, req->getPaddr()); 513
507#if 0	514#if FULL_SYSTEM
508 if (req->getFlags() & LOCKED) { 509 cpu->lockAddr = req->getPaddr(); 510 cpu->lockFlag = true; 511 } 512#endif 513 514 while (store_idx != -1) { 515 // End once we've reached the top of the LSQ 516 if (store_idx == storeWBIdx) { 517 break; 518 } 519 520 // Move the index to one younger 521 if (--store_idx < 0) 522 store_idx += SQEntries; 523 524 assert(storeQueue[store_idx].inst); 525 526 store_size = storeQueue[store_idx].size; 527 528 if (store_size == 0) 529 continue; 530 531 // Check if the store data is within the lower and upper bounds of 532 // addresses that the request needs. 533 bool store_has_lower_limit = 534 req->getVaddr() >= storeQueue[store_idx].inst->effAddr; 535 bool store_has_upper_limit = 536 (req->getVaddr() + req->getSize()) <= 537 (storeQueue[store_idx].inst->effAddr + store_size); 538 bool lower_load_has_store_part = 539 req->getVaddr() < (storeQueue[store_idx].inst->effAddr + 540 store_size); 541 bool upper_load_has_store_part = 542 (req->getVaddr() + req->getSize()) > 543 storeQueue[store_idx].inst->effAddr; 544 545 // If the store's data has all of the data needed, we can forward. 546 if (store_has_lower_limit && store_has_upper_limit) { 547 // Get shift amount for offset into the store's data. 548 int shift_amt = req->getVaddr() & (store_size - 1); 549 // @todo: Magic number, assumes byte addressing 550 shift_amt = shift_amt << 3; 551 552 // Cast this to type T? 553 data = storeQueue[store_idx].data >> shift_amt; 554 555 assert(!load_inst->memData); 556 load_inst->memData = new uint8_t[64]; 557 558 memcpy(load_inst->memData, &data, req->getSize()); 559 560 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " 561 "addr %#x, data %#x\n",	515 if (req->getFlags() & LOCKED) { 516 cpu->lockAddr = req->getPaddr(); 517 cpu->lockFlag = true; 518 } 519#endif 520 521 while (store_idx != -1) { 522 // End once we've reached the top of the LSQ 523 if (store_idx == storeWBIdx) { 524 break; 525 } 526 527 // Move the index to one younger 528 if (--store_idx < 0) 529 store_idx += SQEntries; 530 531 assert(storeQueue[store_idx].inst); 532 533 store_size = storeQueue[store_idx].size; 534 535 if (store_size == 0) 536 continue; 537 538 // Check if the store data is within the lower and upper bounds of 539 // addresses that the request needs. 540 bool store_has_lower_limit = 541 req->getVaddr() >= storeQueue[store_idx].inst->effAddr; 542 bool store_has_upper_limit = 543 (req->getVaddr() + req->getSize()) <= 544 (storeQueue[store_idx].inst->effAddr + store_size); 545 bool lower_load_has_store_part = 546 req->getVaddr() < (storeQueue[store_idx].inst->effAddr + 547 store_size); 548 bool upper_load_has_store_part = 549 (req->getVaddr() + req->getSize()) > 550 storeQueue[store_idx].inst->effAddr; 551 552 // If the store's data has all of the data needed, we can forward. 553 if (store_has_lower_limit && store_has_upper_limit) { 554 // Get shift amount for offset into the store's data. 555 int shift_amt = req->getVaddr() & (store_size - 1); 556 // @todo: Magic number, assumes byte addressing 557 shift_amt = shift_amt << 3; 558 559 // Cast this to type T? 560 data = storeQueue[store_idx].data >> shift_amt; 561 562 assert(!load_inst->memData); 563 load_inst->memData = new uint8_t[64]; 564 565 memcpy(load_inst->memData, &data, req->getSize()); 566 567 DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " 568 "addr %#x, data %#x\n",
562 store_idx, req->getVaddr(), *(load_inst->memData));	569 store_idx, req->getVaddr(), data);
563 564 PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); 565 data_pkt->dataStatic(load_inst->memData); 566 567 WritebackEvent wb = new WritebackEvent(load_inst, data_pkt, this); 568* 569 // We'll say this has a 1 cycle load-store forwarding latency 570 // for now. 571 // @todo: Need to make this a parameter. 572 wb->schedule(curTick); 573 574 // Should keep track of stat for forwarded data 575 return NoFault; 576 } else if ((store_has_lower_limit && lower_load_has_store_part) \|\| 577 (store_has_upper_limit && upper_load_has_store_part) \|\| 578 (lower_load_has_store_part && upper_load_has_store_part)) { 579 // This is the partial store-load forwarding case where a store 580 // has only part of the load's data. 581 582 // If it's already been written back, then don't worry about 583 // stalling on it. 584 if (storeQueue[store_idx].completed) { 585 continue; 586 } 587 588 // Must stall load and force it to retry, so long as it's the oldest 589 // load that needs to do so. 590 if (!stalled \|\| 591 (stalled && 592 load_inst->seqNum < 593 loadQueue[stallingLoadIdx]->seqNum)) { 594 stalled = true; 595 stallingStoreIsn = storeQueue[store_idx].inst->seqNum; 596 stallingLoadIdx = load_idx; 597 } 598 599 // Tell IQ/mem dep unit that this instruction will need to be 600 // rescheduled eventually 601 iewStage->rescheduleMemInst(load_inst); 602 603 // Do not generate a writeback event as this instruction is not 604 // complete. 605 DPRINTF(LSQUnit, "Load-store forwarding mis-match. " 606 "Store idx %i to load addr %#x\n", 607 store_idx, req->getVaddr()); 608 609 return NoFault; 610 } 611 } 612 613 // If there's no forwarding case, then go access memory 614 DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", 615 load_inst->seqNum, load_inst->readPC()); 616 617 assert(!load_inst->memData); 618 load_inst->memData = new uint8_t[64]; 619 620 ++usedPorts; 621 622 DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", 623 load_inst->readPC()); 624 625 PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); 626 data_pkt->dataStatic(load_inst->memData); 627 628 LSQSenderState state = new LSQSenderState; 629* state->isLoad = true; 630 state->idx = load_idx; 631 state->inst = load_inst; 632 data_pkt->senderState = state; 633 634 // if we have a cache, do cache access too 635 if (!dcachePort->sendTiming(data_pkt)) { 636 // There's an older load that's already going to squash. 637 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) 638 return NoFault; 639 640 // Record that the load was blocked due to memory. This 641 // load will squash all instructions after it, be 642 // refetched, and re-executed. 643 isLoadBlocked = true; 644 loadBlockedHandled = false; 645 blockedLoadSeqNum = load_inst->seqNum; 646 // No fault occurred, even though the interface is blocked. 647 return NoFault; 648 } 649 650 if (data_pkt->result != Packet::Success) { 651 DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); 652 DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", 653 load_inst->seqNum); 654 } else { 655 DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); 656 DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", 657 load_inst->seqNum); 658 } 659 660 return NoFault; 661} 662 663template <class Impl> 664template <class T> 665Fault 666LSQUnit<Impl>::write(Request req, T &data, int store_idx) 667{ 668* assert(storeQueue[store_idx].inst); 669 670 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" 671 " \| storeHead:%i [sn:%i]\n", 672 store_idx, req->getPaddr(), data, storeHead, 673 storeQueue[store_idx].inst->seqNum); 674 675 storeQueue[store_idx].req = req; 676 storeQueue[store_idx].size = sizeof(T); 677 storeQueue[store_idx].data = data; 678 679 // This function only writes the data to the store queue, so no fault 680 // can happen here. 681 return NoFault; 682} 683 684#endif // __CPU_O3_LSQ_UNIT_HH__	570 571 PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); 572 data_pkt->dataStatic(load_inst->memData); 573 574 WritebackEvent wb = new WritebackEvent(load_inst, data_pkt, this); 575* 576 // We'll say this has a 1 cycle load-store forwarding latency 577 // for now. 578 // @todo: Need to make this a parameter. 579 wb->schedule(curTick); 580 581 // Should keep track of stat for forwarded data 582 return NoFault; 583 } else if ((store_has_lower_limit && lower_load_has_store_part) \|\| 584 (store_has_upper_limit && upper_load_has_store_part) \|\| 585 (lower_load_has_store_part && upper_load_has_store_part)) { 586 // This is the partial store-load forwarding case where a store 587 // has only part of the load's data. 588 589 // If it's already been written back, then don't worry about 590 // stalling on it. 591 if (storeQueue[store_idx].completed) { 592 continue; 593 } 594 595 // Must stall load and force it to retry, so long as it's the oldest 596 // load that needs to do so. 597 if (!stalled \|\| 598 (stalled && 599 load_inst->seqNum < 600 loadQueue[stallingLoadIdx]->seqNum)) { 601 stalled = true; 602 stallingStoreIsn = storeQueue[store_idx].inst->seqNum; 603 stallingLoadIdx = load_idx; 604 } 605 606 // Tell IQ/mem dep unit that this instruction will need to be 607 // rescheduled eventually 608 iewStage->rescheduleMemInst(load_inst); 609 610 // Do not generate a writeback event as this instruction is not 611 // complete. 612 DPRINTF(LSQUnit, "Load-store forwarding mis-match. " 613 "Store idx %i to load addr %#x\n", 614 store_idx, req->getVaddr()); 615 616 return NoFault; 617 } 618 } 619 620 // If there's no forwarding case, then go access memory 621 DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n", 622 load_inst->seqNum, load_inst->readPC()); 623 624 assert(!load_inst->memData); 625 load_inst->memData = new uint8_t[64]; 626 627 ++usedPorts; 628 629 DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n", 630 load_inst->readPC()); 631 632 PacketPtr data_pkt = new Packet(req, Packet::ReadReq, Packet::Broadcast); 633 data_pkt->dataStatic(load_inst->memData); 634 635 LSQSenderState state = new LSQSenderState; 636* state->isLoad = true; 637 state->idx = load_idx; 638 state->inst = load_inst; 639 data_pkt->senderState = state; 640 641 // if we have a cache, do cache access too 642 if (!dcachePort->sendTiming(data_pkt)) { 643 // There's an older load that's already going to squash. 644 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) 645 return NoFault; 646 647 // Record that the load was blocked due to memory. This 648 // load will squash all instructions after it, be 649 // refetched, and re-executed. 650 isLoadBlocked = true; 651 loadBlockedHandled = false; 652 blockedLoadSeqNum = load_inst->seqNum; 653 // No fault occurred, even though the interface is blocked. 654 return NoFault; 655 } 656 657 if (data_pkt->result != Packet::Success) { 658 DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n"); 659 DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n", 660 load_inst->seqNum); 661 } else { 662 DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n"); 663 DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n", 664 load_inst->seqNum); 665 } 666 667 return NoFault; 668} 669 670template <class Impl> 671template <class T> 672Fault 673LSQUnit<Impl>::write(Request req, T &data, int store_idx) 674{ 675* assert(storeQueue[store_idx].inst); 676 677 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" 678 " \| storeHead:%i [sn:%i]\n", 679 store_idx, req->getPaddr(), data, storeHead, 680 storeQueue[store_idx].inst->seqNum); 681 682 storeQueue[store_idx].req = req; 683 storeQueue[store_idx].size = sizeof(T); 684 storeQueue[store_idx].data = data; 685 686 // This function only writes the data to the store queue, so no fault 687 // can happen here. 688 return NoFault; 689} 690 691#endif // __CPU_O3_LSQ_UNIT_HH__