lsq_unit.hh (6658:f4de76601762) lsq_unit.hh (6974:4d4903a3e7c5)
1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;

--- 202 unchanged lines hidden (view full) ---

211
212 /** Handles doing the retry. */
213 void recvRetry();
214
215 private:
216 /** Writes back the instruction, sending it to IEW. */
217 void writeback(DynInstPtr &inst, PacketPtr pkt);
218
1/*
2 * Copyright (c) 2004-2006 The Regents of The University of Michigan
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;

--- 202 unchanged lines hidden (view full) ---

211
212 /** Handles doing the retry. */
213 void recvRetry();
214
215 private:
216 /** Writes back the instruction, sending it to IEW. */
217 void writeback(DynInstPtr &inst, PacketPtr pkt);
218
219 /** Writes back a store that couldn't be completed the previous cycle. */
220 void writebackPendingStore();
221
219 /** Handles completing the send of a store to memory. */
220 void storePostSend(PacketPtr pkt);
221
222 /** Completes the store at the specified index. */
223 void completeStore(int store_idx);
224
222 /** Handles completing the send of a store to memory. */
223 void storePostSend(PacketPtr pkt);
224
225 /** Completes the store at the specified index. */
226 void completeStore(int store_idx);
227
228 /** Attempts to send a store to the cache. */
229 bool sendStore(PacketPtr data_pkt);
230
225 /** Increments the given store index (circular queue). */
226 inline void incrStIdx(int &store_idx);
227 /** Decrements the given store index (circular queue). */
228 inline void decrStIdx(int &store_idx);
229 /** Increments the given load index (circular queue). */
230 inline void incrLdIdx(int &load_idx);
231 /** Decrements the given load index (circular queue). */
232 inline void decrLdIdx(int &load_idx);

--- 16 unchanged lines hidden (view full) ---

249 Port *dcachePort;
250
251 /** Derived class to hold any sender state the LSQ needs. */
252 class LSQSenderState : public Packet::SenderState, public FastAlloc
253 {
254 public:
255 /** Default constructor. */
256 LSQSenderState()
231 /** Increments the given store index (circular queue). */
232 inline void incrStIdx(int &store_idx);
233 /** Decrements the given store index (circular queue). */
234 inline void decrStIdx(int &store_idx);
235 /** Increments the given load index (circular queue). */
236 inline void incrLdIdx(int &load_idx);
237 /** Decrements the given load index (circular queue). */
238 inline void decrLdIdx(int &load_idx);

--- 16 unchanged lines hidden (view full) ---

255 Port *dcachePort;
256
257 /** Derived class to hold any sender state the LSQ needs. */
258 class LSQSenderState : public Packet::SenderState, public FastAlloc
259 {
260 public:
261 /** Default constructor. */
262 LSQSenderState()
257 : noWB(false)
263 : noWB(false), isSplit(false), pktToSend(false), outstanding(1),
264 mainPkt(NULL), pendingPacket(NULL)
258 { }
259
260 /** Instruction who initiated the access to memory. */
261 DynInstPtr inst;
262 /** Whether or not it is a load. */
263 bool isLoad;
264 /** The LQ/SQ index of the instruction. */
265 int idx;
266 /** Whether or not the instruction will need to writeback. */
267 bool noWB;
265 { }
266
267 /** Instruction who initiated the access to memory. */
268 DynInstPtr inst;
269 /** Whether or not it is a load. */
270 bool isLoad;
271 /** The LQ/SQ index of the instruction. */
272 int idx;
273 /** Whether or not the instruction will need to writeback. */
274 bool noWB;
275 /** Whether or not this access is split in two. */
276 bool isSplit;
277 /** Whether or not there is a packet that needs sending. */
278 bool pktToSend;
279 /** Number of outstanding packets to complete. */
280 int outstanding;
281 /** The main packet from a split load, used during writeback. */
282 PacketPtr mainPkt;
283 /** A second packet from a split store that needs sending. */
284 PacketPtr pendingPacket;
285
286 /** Completes a packet and returns whether the access is finished. */
287 inline bool complete() { return --outstanding == 0; }
268 };
269
270 /** Writeback event, specifically for when stores forward data to loads. */
271 class WritebackEvent : public Event {
272 public:
273 /** Constructs a writeback event. */
274 WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
275

--- 21 unchanged lines hidden (view full) ---

297 : inst(NULL), req(NULL), size(0),
298 canWB(0), committed(0), completed(0)
299 {
300 std::memset(data, 0, sizeof(data));
301 }
302
303 /** Constructs a store queue entry for a given instruction. */
304 SQEntry(DynInstPtr &_inst)
288 };
289
290 /** Writeback event, specifically for when stores forward data to loads. */
291 class WritebackEvent : public Event {
292 public:
293 /** Constructs a writeback event. */
294 WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
295

--- 21 unchanged lines hidden (view full) ---

317 : inst(NULL), req(NULL), size(0),
318 canWB(0), committed(0), completed(0)
319 {
320 std::memset(data, 0, sizeof(data));
321 }
322
323 /** Constructs a store queue entry for a given instruction. */
324 SQEntry(DynInstPtr &_inst)
305 : inst(_inst), req(NULL), size(0),
306 canWB(0), committed(0), completed(0)
325 : inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0),
326 isSplit(0), canWB(0), committed(0), completed(0)
307 {
308 std::memset(data, 0, sizeof(data));
309 }
310
311 /** The store instruction. */
312 DynInstPtr inst;
313 /** The request for the store. */
314 RequestPtr req;
327 {
328 std::memset(data, 0, sizeof(data));
329 }
330
331 /** The store instruction. */
332 DynInstPtr inst;
333 /** The request for the store. */
334 RequestPtr req;
335 /** The split requests for the store. */
336 RequestPtr sreqLow;
337 RequestPtr sreqHigh;
315 /** The size of the store. */
316 int size;
317 /** The store data. */
318 char data[sizeof(IntReg)];
338 /** The size of the store. */
339 int size;
340 /** The store data. */
341 char data[sizeof(IntReg)];
342 /** Whether or not the store is split into two requests. */
343 bool isSplit;
319 /** Whether or not the store can writeback. */
320 bool canWB;
321 /** Whether or not the store is committed. */
322 bool committed;
323 /** Whether or not the store is completed. */
324 bool completed;
325 };
326

--- 74 unchanged lines hidden (view full) ---

401 bool loadBlockedHandled;
402
403 /** The sequence number of the blocked load. */
404 InstSeqNum blockedLoadSeqNum;
405
406 /** The oldest load that caused a memory ordering violation. */
407 DynInstPtr memDepViolator;
408
344 /** Whether or not the store can writeback. */
345 bool canWB;
346 /** Whether or not the store is committed. */
347 bool committed;
348 /** Whether or not the store is completed. */
349 bool completed;
350 };
351

--- 74 unchanged lines hidden (view full) ---

426 bool loadBlockedHandled;
427
428 /** The sequence number of the blocked load. */
429 InstSeqNum blockedLoadSeqNum;
430
431 /** The oldest load that caused a memory ordering violation. */
432 DynInstPtr memDepViolator;
433
434 /** Whether or not there is a packet that couldn't be sent because of
435 * a lack of cache ports. */
436 bool hasPendingPkt;
437
438 /** The packet that is pending free cache ports. */
439 PacketPtr pendingPkt;
440
409 // Will also need how many read/write ports the Dcache has. Or keep track
410 // of that in stage that is one level up, and only call executeLoad/Store
411 // the appropriate number of times.
412 /** Total number of loads forwaded from LSQ stores. */
413 Stats::Scalar lsqForwLoads;
414
415 /** Total number of loads ignored due to invalid addresses. */
416 Stats::Scalar invAddrLoads;

--- 21 unchanged lines hidden (view full) ---

438 Stats::Scalar lsqRescheduledLoads;
439
440 /** Number of times the LSQ is blocked due to the cache. */
441 Stats::Scalar lsqCacheBlocked;
442
443 public:
444 /** Executes the load at the given index. */
445 template <class T>
441 // Will also need how many read/write ports the Dcache has. Or keep track
442 // of that in stage that is one level up, and only call executeLoad/Store
443 // the appropriate number of times.
444 /** Total number of loads forwaded from LSQ stores. */
445 Stats::Scalar lsqForwLoads;
446
447 /** Total number of loads ignored due to invalid addresses. */
448 Stats::Scalar invAddrLoads;

--- 21 unchanged lines hidden (view full) ---

470 Stats::Scalar lsqRescheduledLoads;
471
472 /** Number of times the LSQ is blocked due to the cache. */
473 Stats::Scalar lsqCacheBlocked;
474
475 public:
476 /** Executes the load at the given index. */
477 template <class T>
446 Fault read(Request *req, T &data, int load_idx);
478 Fault read(Request *req, Request *sreqLow, Request *sreqHigh, T &data,
479 int load_idx);
447
448 /** Executes the store at the given index. */
449 template <class T>
480
481 /** Executes the store at the given index. */
482 template <class T>
450 Fault write(Request *req, T &data, int store_idx);
483 Fault write(Request *req, Request *sreqLow, Request *sreqHigh, T &data,
484 int store_idx);
451
452 /** Returns the index of the head load instruction. */
453 int getLoadHead() { return loadHead; }
454 /** Returns the sequence number of the head load instruction. */
455 InstSeqNum getLoadHeadSeqNum()
456 {
457 if (loadQueue[loadHead]) {
458 return loadQueue[loadHead]->seqNum;

--- 18 unchanged lines hidden (view full) ---

477
478 /** Returns whether or not the LSQ unit is stalled. */
479 bool isStalled() { return stalled; }
480};
481
482template <class Impl>
483template <class T>
484Fault
485
486 /** Returns the index of the head load instruction. */
487 int getLoadHead() { return loadHead; }
488 /** Returns the sequence number of the head load instruction. */
489 InstSeqNum getLoadHeadSeqNum()
490 {
491 if (loadQueue[loadHead]) {
492 return loadQueue[loadHead]->seqNum;

--- 18 unchanged lines hidden (view full) ---

511
512 /** Returns whether or not the LSQ unit is stalled. */
513 bool isStalled() { return stalled; }
514};
515
516template <class Impl>
517template <class T>
518Fault
485LSQUnit<Impl>::read(Request *req, T &data, int load_idx)
519LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
520 T &data, int load_idx)
486{
487 DynInstPtr load_inst = loadQueue[load_idx];
488
489 assert(load_inst);
490
491 assert(!load_inst->isExecuted());
492
493 // Make sure this isn't an uncacheable access

--- 4 unchanged lines hidden (view full) ---

498 (load_idx != loadHead || !load_inst->isAtCommit())) {
499 iewStage->rescheduleMemInst(load_inst);
500 ++lsqRescheduledLoads;
501
502 // Must delete request now that it wasn't handed off to
503 // memory. This is quite ugly. @todo: Figure out the proper
504 // place to really handle request deletes.
505 delete req;
521{
522 DynInstPtr load_inst = loadQueue[load_idx];
523
524 assert(load_inst);
525
526 assert(!load_inst->isExecuted());
527
528 // Make sure this isn't an uncacheable access

--- 4 unchanged lines hidden (view full) ---

533 (load_idx != loadHead || !load_inst->isAtCommit())) {
534 iewStage->rescheduleMemInst(load_inst);
535 ++lsqRescheduledLoads;
536
537 // Must delete request now that it wasn't handed off to
538 // memory. This is quite ugly. @todo: Figure out the proper
539 // place to really handle request deletes.
540 delete req;
541 if (TheISA::HasUnalignedMemAcc && sreqLow) {
542 delete sreqLow;
543 delete sreqHigh;
544 }
506 return TheISA::genMachineCheckFault();
507 }
508
509 // Check the SQ for any previous stores that might lead to forwarding
510 int store_idx = load_inst->sqIdx;
511
512 int store_size = 0;
513
514 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
545 return TheISA::genMachineCheckFault();
546 }
547
548 // Check the SQ for any previous stores that might lead to forwarding
549 int store_idx = load_inst->sqIdx;
550
551 int store_size = 0;
552
553 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
515 "storeHead: %i addr: %#x\n",
516 load_idx, store_idx, storeHead, req->getPaddr());
554 "storeHead: %i addr: %#x%s\n",
555 load_idx, store_idx, storeHead, req->getPaddr(),
556 sreqLow ? " split" : "");
517
518 if (req->isLLSC()) {
557
558 if (req->isLLSC()) {
559 assert(!sreqLow);
519 // Disable recording the result temporarily. Writing to misc
520 // regs normally updates the result, but this is not the
521 // desired behavior when handling store conditionals.
522 load_inst->recordResult = false;
523 TheISA::handleLockedRead(load_inst.get(), req);
524 load_inst->recordResult = true;
525 }
526

--- 55 unchanged lines hidden (view full) ---

582
583 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
584
585 // We'll say this has a 1 cycle load-store forwarding latency
586 // for now.
587 // @todo: Need to make this a parameter.
588 cpu->schedule(wb, curTick);
589
560 // Disable recording the result temporarily. Writing to misc
561 // regs normally updates the result, but this is not the
562 // desired behavior when handling store conditionals.
563 load_inst->recordResult = false;
564 TheISA::handleLockedRead(load_inst.get(), req);
565 load_inst->recordResult = true;
566 }
567

--- 55 unchanged lines hidden (view full) ---

623
624 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
625
626 // We'll say this has a 1 cycle load-store forwarding latency
627 // for now.
628 // @todo: Need to make this a parameter.
629 cpu->schedule(wb, curTick);
630
631 // Don't need to do anything special for split loads.
632 if (TheISA::HasUnalignedMemAcc && sreqLow) {
633 delete sreqLow;
634 delete sreqHigh;
635 }
636
590 ++lsqForwLoads;
591 return NoFault;
592 } else if ((store_has_lower_limit && lower_load_has_store_part) ||
593 (store_has_upper_limit && upper_load_has_store_part) ||
594 (lower_load_has_store_part && upper_load_has_store_part)) {
595 // This is the partial store-load forwarding case where a store
596 // has only part of the load's data.
597

--- 27 unchanged lines hidden (view full) ---

625 DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
626 "Store idx %i to load addr %#x\n",
627 store_idx, req->getVaddr());
628
629 // Must delete request now that it wasn't handed off to
630 // memory. This is quite ugly. @todo: Figure out the
631 // proper place to really handle request deletes.
632 delete req;
637 ++lsqForwLoads;
638 return NoFault;
639 } else if ((store_has_lower_limit && lower_load_has_store_part) ||
640 (store_has_upper_limit && upper_load_has_store_part) ||
641 (lower_load_has_store_part && upper_load_has_store_part)) {
642 // This is the partial store-load forwarding case where a store
643 // has only part of the load's data.
644

--- 27 unchanged lines hidden (view full) ---

672 DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
673 "Store idx %i to load addr %#x\n",
674 store_idx, req->getVaddr());
675
676 // Must delete request now that it wasn't handed off to
677 // memory. This is quite ugly. @todo: Figure out the
678 // proper place to really handle request deletes.
679 delete req;
680 if (TheISA::HasUnalignedMemAcc && sreqLow) {
681 delete sreqLow;
682 delete sreqHigh;
683 }
633
634 return NoFault;
635 }
636 }
637
638 // If there's no forwarding case, then go access memory
639 DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n",
640 load_inst->seqNum, load_inst->readPC());
641
642 assert(!load_inst->memData);
643 load_inst->memData = new uint8_t[64];
644
645 ++usedPorts;
646
647 // if we the cache is not blocked, do cache access
684
685 return NoFault;
686 }
687 }
688
689 // If there's no forwarding case, then go access memory
690 DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n",
691 load_inst->seqNum, load_inst->readPC());
692
693 assert(!load_inst->memData);
694 load_inst->memData = new uint8_t[64];
695
696 ++usedPorts;
697
698 // if we the cache is not blocked, do cache access
699 bool completedFirst = false;
648 if (!lsq->cacheBlocked()) {
700 if (!lsq->cacheBlocked()) {
649 PacketPtr data_pkt =
650 new Packet(req,
651 (req->isLLSC() ?
652 MemCmd::LoadLockedReq : MemCmd::ReadReq),
653 Packet::Broadcast);
701 MemCmd command =
702 req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
703 PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast);
704 PacketPtr fst_data_pkt = NULL;
705 PacketPtr snd_data_pkt = NULL;
706
654 data_pkt->dataStatic(load_inst->memData);
655
656 LSQSenderState *state = new LSQSenderState;
657 state->isLoad = true;
658 state->idx = load_idx;
659 state->inst = load_inst;
660 data_pkt->senderState = state;
661
707 data_pkt->dataStatic(load_inst->memData);
708
709 LSQSenderState *state = new LSQSenderState;
710 state->isLoad = true;
711 state->idx = load_idx;
712 state->inst = load_inst;
713 data_pkt->senderState = state;
714
662 if (!dcachePort->sendTiming(data_pkt)) {
715 if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
716
717 // Point the first packet at the main data packet.
718 fst_data_pkt = data_pkt;
719 } else {
720
721 // Create the split packets.
722 fst_data_pkt = new Packet(sreqLow, command, Packet::Broadcast);
723 snd_data_pkt = new Packet(sreqHigh, command, Packet::Broadcast);
724
725 fst_data_pkt->dataStatic(load_inst->memData);
726 snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
727
728 fst_data_pkt->senderState = state;
729 snd_data_pkt->senderState = state;
730
731 state->isSplit = true;
732 state->outstanding = 2;
733 state->mainPkt = data_pkt;
734 }
735
736 if (!dcachePort->sendTiming(fst_data_pkt)) {
663 // Delete state and data packet because a load retry
664 // initiates a pipeline restart; it does not retry.
665 delete state;
666 delete data_pkt->req;
667 delete data_pkt;
737 // Delete state and data packet because a load retry
738 // initiates a pipeline restart; it does not retry.
739 delete state;
740 delete data_pkt->req;
741 delete data_pkt;
742 if (TheISA::HasUnalignedMemAcc && sreqLow) {
743 delete fst_data_pkt->req;
744 delete fst_data_pkt;
745 delete snd_data_pkt->req;
746 delete snd_data_pkt;
747 }
668
669 req = NULL;
670
671 // If the access didn't succeed, tell the LSQ by setting
672 // the retry thread id.
673 lsq->setRetryTid(lsqID);
748
749 req = NULL;
750
751 // If the access didn't succeed, tell the LSQ by setting
752 // the retry thread id.
753 lsq->setRetryTid(lsqID);
754 } else if (TheISA::HasUnalignedMemAcc && sreqLow) {
755 completedFirst = true;
756
757 // The first packet was sent without problems, so send this one
758 // too. If there is a problem with this packet then the whole
759 // load will be squashed, so indicate this to the state object.
760 // The first packet will return in completeDataAccess and be
761 // handled there.
762 ++usedPorts;
763 if (!dcachePort->sendTiming(snd_data_pkt)) {
764
765 // The main packet will be deleted in completeDataAccess.
766 delete snd_data_pkt->req;
767 delete snd_data_pkt;
768
769 state->complete();
770
771 req = NULL;
772
773 lsq->setRetryTid(lsqID);
774 }
674 }
675 }
676
677 // If the cache was blocked, or has become blocked due to the access,
678 // handle it.
679 if (lsq->cacheBlocked()) {
680 if (req)
681 delete req;
775 }
776 }
777
778 // If the cache was blocked, or has become blocked due to the access,
779 // handle it.
780 if (lsq->cacheBlocked()) {
781 if (req)
782 delete req;
783 if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) {
784 delete sreqLow;
785 delete sreqHigh;
786 }
682
683 ++lsqCacheBlocked;
684
685 iewStage->decrWb(load_inst->seqNum);
686 // There's an older load that's already going to squash.
687 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
688 return NoFault;
689

--- 8 unchanged lines hidden (view full) ---

698 }
699
700 return NoFault;
701}
702
703template <class Impl>
704template <class T>
705Fault
787
788 ++lsqCacheBlocked;
789
790 iewStage->decrWb(load_inst->seqNum);
791 // There's an older load that's already going to squash.
792 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
793 return NoFault;
794

--- 8 unchanged lines hidden (view full) ---

803 }
804
805 return NoFault;
806}
807
808template <class Impl>
809template <class T>
810Fault
706LSQUnit<Impl>::write(Request *req, T &data, int store_idx)
811LSQUnit<Impl>::write(Request *req, Request *sreqLow, Request *sreqHigh,
812 T &data, int store_idx)
707{
708 assert(storeQueue[store_idx].inst);
709
710 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
711 " | storeHead:%i [sn:%i]\n",
712 store_idx, req->getPaddr(), data, storeHead,
713 storeQueue[store_idx].inst->seqNum);
714
715 storeQueue[store_idx].req = req;
813{
814 assert(storeQueue[store_idx].inst);
815
816 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
817 " | storeHead:%i [sn:%i]\n",
818 store_idx, req->getPaddr(), data, storeHead,
819 storeQueue[store_idx].inst->seqNum);
820
821 storeQueue[store_idx].req = req;
822 storeQueue[store_idx].sreqLow = sreqLow;
823 storeQueue[store_idx].sreqHigh = sreqHigh;
716 storeQueue[store_idx].size = sizeof(T);
717 assert(sizeof(T) <= sizeof(storeQueue[store_idx].data));
718
719 T gData = htog(data);
720 memcpy(storeQueue[store_idx].data, &gData, sizeof(T));
721
722 // This function only writes the data to the store queue, so no fault
723 // can happen here.
724 return NoFault;
725}
726
727#endif // __CPU_O3_LSQ_UNIT_HH__
824 storeQueue[store_idx].size = sizeof(T);
825 assert(sizeof(T) <= sizeof(storeQueue[store_idx].data));
826
827 T gData = htog(data);
828 memcpy(storeQueue[store_idx].data, &gData, sizeof(T));
829
830 // This function only writes the data to the store queue, so no fault
831 // can happen here.
832 return NoFault;
833}
834
835#endif // __CPU_O3_LSQ_UNIT_HH__