lsq_unit.hh (6658:f4de76601762) | lsq_unit.hh (6974:4d4903a3e7c5) |
---|---|
1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; --- 202 unchanged lines hidden (view full) --- 211 212 /** Handles doing the retry. */ 213 void recvRetry(); 214 215 private: 216 /** Writes back the instruction, sending it to IEW. */ 217 void writeback(DynInstPtr &inst, PacketPtr pkt); 218 | 1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; --- 202 unchanged lines hidden (view full) --- 211 212 /** Handles doing the retry. */ 213 void recvRetry(); 214 215 private: 216 /** Writes back the instruction, sending it to IEW. */ 217 void writeback(DynInstPtr &inst, PacketPtr pkt); 218 |
219 /** Writes back a store that couldn't be completed the previous cycle. */ 220 void writebackPendingStore(); 221 |
|
219 /** Handles completing the send of a store to memory. */ 220 void storePostSend(PacketPtr pkt); 221 222 /** Completes the store at the specified index. */ 223 void completeStore(int store_idx); 224 | 222 /** Handles completing the send of a store to memory. */ 223 void storePostSend(PacketPtr pkt); 224 225 /** Completes the store at the specified index. */ 226 void completeStore(int store_idx); 227 |
228 /** Attempts to send a store to the cache. */ 229 bool sendStore(PacketPtr data_pkt); 230 |
|
225 /** Increments the given store index (circular queue). */ 226 inline void incrStIdx(int &store_idx); 227 /** Decrements the given store index (circular queue). */ 228 inline void decrStIdx(int &store_idx); 229 /** Increments the given load index (circular queue). */ 230 inline void incrLdIdx(int &load_idx); 231 /** Decrements the given load index (circular queue). */ 232 inline void decrLdIdx(int &load_idx); --- 16 unchanged lines hidden (view full) --- 249 Port *dcachePort; 250 251 /** Derived class to hold any sender state the LSQ needs. */ 252 class LSQSenderState : public Packet::SenderState, public FastAlloc 253 { 254 public: 255 /** Default constructor. */ 256 LSQSenderState() | 231 /** Increments the given store index (circular queue). */ 232 inline void incrStIdx(int &store_idx); 233 /** Decrements the given store index (circular queue). */ 234 inline void decrStIdx(int &store_idx); 235 /** Increments the given load index (circular queue). */ 236 inline void incrLdIdx(int &load_idx); 237 /** Decrements the given load index (circular queue). */ 238 inline void decrLdIdx(int &load_idx); --- 16 unchanged lines hidden (view full) --- 255 Port *dcachePort; 256 257 /** Derived class to hold any sender state the LSQ needs. */ 258 class LSQSenderState : public Packet::SenderState, public FastAlloc 259 { 260 public: 261 /** Default constructor. */ 262 LSQSenderState() |
257 : noWB(false) | 263 : noWB(false), isSplit(false), pktToSend(false), outstanding(1), 264 mainPkt(NULL), pendingPacket(NULL) |
258 { } 259 260 /** Instruction who initiated the access to memory. */ 261 DynInstPtr inst; 262 /** Whether or not it is a load. */ 263 bool isLoad; 264 /** The LQ/SQ index of the instruction. */ 265 int idx; 266 /** Whether or not the instruction will need to writeback. */ 267 bool noWB; | 265 { } 266 267 /** Instruction who initiated the access to memory. */ 268 DynInstPtr inst; 269 /** Whether or not it is a load. */ 270 bool isLoad; 271 /** The LQ/SQ index of the instruction. */ 272 int idx; 273 /** Whether or not the instruction will need to writeback. */ 274 bool noWB; |
275 /** Whether or not this access is split in two. */ 276 bool isSplit; 277 /** Whether or not there is a packet that needs sending. */ 278 bool pktToSend; 279 /** Number of outstanding packets to complete. */ 280 int outstanding; 281 /** The main packet from a split load, used during writeback. */ 282 PacketPtr mainPkt; 283 /** A second packet from a split store that needs sending. */ 284 PacketPtr pendingPacket; 285 286 /** Completes a packet and returns whether the access is finished. */ 287 inline bool complete() { return --outstanding == 0; } |
|
268 }; 269 270 /** Writeback event, specifically for when stores forward data to loads. */ 271 class WritebackEvent : public Event { 272 public: 273 /** Constructs a writeback event. */ 274 WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr); 275 --- 21 unchanged lines hidden (view full) --- 297 : inst(NULL), req(NULL), size(0), 298 canWB(0), committed(0), completed(0) 299 { 300 std::memset(data, 0, sizeof(data)); 301 } 302 303 /** Constructs a store queue entry for a given instruction. */ 304 SQEntry(DynInstPtr &_inst) | 288 }; 289 290 /** Writeback event, specifically for when stores forward data to loads. */ 291 class WritebackEvent : public Event { 292 public: 293 /** Constructs a writeback event. */ 294 WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr); 295 --- 21 unchanged lines hidden (view full) --- 317 : inst(NULL), req(NULL), size(0), 318 canWB(0), committed(0), completed(0) 319 { 320 std::memset(data, 0, sizeof(data)); 321 } 322 323 /** Constructs a store queue entry for a given instruction. */ 324 SQEntry(DynInstPtr &_inst) |
305 : inst(_inst), req(NULL), size(0), 306 canWB(0), committed(0), completed(0) | 325 : inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0), 326 isSplit(0), canWB(0), committed(0), completed(0) |
307 { 308 std::memset(data, 0, sizeof(data)); 309 } 310 311 /** The store instruction. */ 312 DynInstPtr inst; 313 /** The request for the store. */ 314 RequestPtr req; | 327 { 328 std::memset(data, 0, sizeof(data)); 329 } 330 331 /** The store instruction. */ 332 DynInstPtr inst; 333 /** The request for the store. */ 334 RequestPtr req; |
335 /** The split requests for the store. */ 336 RequestPtr sreqLow; 337 RequestPtr sreqHigh; |
|
315 /** The size of the store. */ 316 int size; 317 /** The store data. */ 318 char data[sizeof(IntReg)]; | 338 /** The size of the store. */ 339 int size; 340 /** The store data. */ 341 char data[sizeof(IntReg)]; |
342 /** Whether or not the store is split into two requests. */ 343 bool isSplit; |
|
319 /** Whether or not the store can writeback. */ 320 bool canWB; 321 /** Whether or not the store is committed. */ 322 bool committed; 323 /** Whether or not the store is completed. */ 324 bool completed; 325 }; 326 --- 74 unchanged lines hidden (view full) --- 401 bool loadBlockedHandled; 402 403 /** The sequence number of the blocked load. */ 404 InstSeqNum blockedLoadSeqNum; 405 406 /** The oldest load that caused a memory ordering violation. */ 407 DynInstPtr memDepViolator; 408 | 344 /** Whether or not the store can writeback. */ 345 bool canWB; 346 /** Whether or not the store is committed. */ 347 bool committed; 348 /** Whether or not the store is completed. */ 349 bool completed; 350 }; 351 --- 74 unchanged lines hidden (view full) --- 426 bool loadBlockedHandled; 427 428 /** The sequence number of the blocked load. */ 429 InstSeqNum blockedLoadSeqNum; 430 431 /** The oldest load that caused a memory ordering violation. */ 432 DynInstPtr memDepViolator; 433 |
434 /** Whether or not there is a packet that couldn't be sent because of 435 * a lack of cache ports. */ 436 bool hasPendingPkt; 437 438 /** The packet that is pending free cache ports. */ 439 PacketPtr pendingPkt; 440 |
|
409 // Will also need how many read/write ports the Dcache has. Or keep track 410 // of that in stage that is one level up, and only call executeLoad/Store 411 // the appropriate number of times. 412 /** Total number of loads forwaded from LSQ stores. */ 413 Stats::Scalar lsqForwLoads; 414 415 /** Total number of loads ignored due to invalid addresses. */ 416 Stats::Scalar invAddrLoads; --- 21 unchanged lines hidden (view full) --- 438 Stats::Scalar lsqRescheduledLoads; 439 440 /** Number of times the LSQ is blocked due to the cache. */ 441 Stats::Scalar lsqCacheBlocked; 442 443 public: 444 /** Executes the load at the given index. */ 445 template <class T> | 441 // Will also need how many read/write ports the Dcache has. Or keep track 442 // of that in stage that is one level up, and only call executeLoad/Store 443 // the appropriate number of times. 444 /** Total number of loads forwaded from LSQ stores. */ 445 Stats::Scalar lsqForwLoads; 446 447 /** Total number of loads ignored due to invalid addresses. */ 448 Stats::Scalar invAddrLoads; --- 21 unchanged lines hidden (view full) --- 470 Stats::Scalar lsqRescheduledLoads; 471 472 /** Number of times the LSQ is blocked due to the cache. */ 473 Stats::Scalar lsqCacheBlocked; 474 475 public: 476 /** Executes the load at the given index. */ 477 template <class T> |
446 Fault read(Request *req, T &data, int load_idx); | 478 Fault read(Request *req, Request *sreqLow, Request *sreqHigh, T &data, 479 int load_idx); |
447 448 /** Executes the store at the given index. */ 449 template <class T> | 480 481 /** Executes the store at the given index. */ 482 template <class T> |
450 Fault write(Request *req, T &data, int store_idx); | 483 Fault write(Request *req, Request *sreqLow, Request *sreqHigh, T &data, 484 int store_idx); |
451 452 /** Returns the index of the head load instruction. */ 453 int getLoadHead() { return loadHead; } 454 /** Returns the sequence number of the head load instruction. */ 455 InstSeqNum getLoadHeadSeqNum() 456 { 457 if (loadQueue[loadHead]) { 458 return loadQueue[loadHead]->seqNum; --- 18 unchanged lines hidden (view full) --- 477 478 /** Returns whether or not the LSQ unit is stalled. */ 479 bool isStalled() { return stalled; } 480}; 481 482template <class Impl> 483template <class T> 484Fault | 485 486 /** Returns the index of the head load instruction. */ 487 int getLoadHead() { return loadHead; } 488 /** Returns the sequence number of the head load instruction. */ 489 InstSeqNum getLoadHeadSeqNum() 490 { 491 if (loadQueue[loadHead]) { 492 return loadQueue[loadHead]->seqNum; --- 18 unchanged lines hidden (view full) --- 511 512 /** Returns whether or not the LSQ unit is stalled. */ 513 bool isStalled() { return stalled; } 514}; 515 516template <class Impl> 517template <class T> 518Fault |
485LSQUnit<Impl>::read(Request *req, T &data, int load_idx) | 519LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh, 520 T &data, int load_idx) |
486{ 487 DynInstPtr load_inst = loadQueue[load_idx]; 488 489 assert(load_inst); 490 491 assert(!load_inst->isExecuted()); 492 493 // Make sure this isn't an uncacheable access --- 4 unchanged lines hidden (view full) --- 498 (load_idx != loadHead || !load_inst->isAtCommit())) { 499 iewStage->rescheduleMemInst(load_inst); 500 ++lsqRescheduledLoads; 501 502 // Must delete request now that it wasn't handed off to 503 // memory. This is quite ugly. @todo: Figure out the proper 504 // place to really handle request deletes. 505 delete req; | 521{ 522 DynInstPtr load_inst = loadQueue[load_idx]; 523 524 assert(load_inst); 525 526 assert(!load_inst->isExecuted()); 527 528 // Make sure this isn't an uncacheable access --- 4 unchanged lines hidden (view full) --- 533 (load_idx != loadHead || !load_inst->isAtCommit())) { 534 iewStage->rescheduleMemInst(load_inst); 535 ++lsqRescheduledLoads; 536 537 // Must delete request now that it wasn't handed off to 538 // memory. This is quite ugly. @todo: Figure out the proper 539 // place to really handle request deletes. 540 delete req; |
541 if (TheISA::HasUnalignedMemAcc && sreqLow) { 542 delete sreqLow; 543 delete sreqHigh; 544 } |
|
506 return TheISA::genMachineCheckFault(); 507 } 508 509 // Check the SQ for any previous stores that might lead to forwarding 510 int store_idx = load_inst->sqIdx; 511 512 int store_size = 0; 513 514 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " | 545 return TheISA::genMachineCheckFault(); 546 } 547 548 // Check the SQ for any previous stores that might lead to forwarding 549 int store_idx = load_inst->sqIdx; 550 551 int store_size = 0; 552 553 DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " |
515 "storeHead: %i addr: %#x\n", 516 load_idx, store_idx, storeHead, req->getPaddr()); | 554 "storeHead: %i addr: %#x%s\n", 555 load_idx, store_idx, storeHead, req->getPaddr(), 556 sreqLow ? " split" : ""); |
517 518 if (req->isLLSC()) { | 557 558 if (req->isLLSC()) { |
559 assert(!sreqLow); |
|
519 // Disable recording the result temporarily. Writing to misc 520 // regs normally updates the result, but this is not the 521 // desired behavior when handling store conditionals. 522 load_inst->recordResult = false; 523 TheISA::handleLockedRead(load_inst.get(), req); 524 load_inst->recordResult = true; 525 } 526 --- 55 unchanged lines hidden (view full) --- 582 583 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this); 584 585 // We'll say this has a 1 cycle load-store forwarding latency 586 // for now. 587 // @todo: Need to make this a parameter. 588 cpu->schedule(wb, curTick); 589 | 560 // Disable recording the result temporarily. Writing to misc 561 // regs normally updates the result, but this is not the 562 // desired behavior when handling store conditionals. 563 load_inst->recordResult = false; 564 TheISA::handleLockedRead(load_inst.get(), req); 565 load_inst->recordResult = true; 566 } 567 --- 55 unchanged lines hidden (view full) --- 623 624 WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this); 625 626 // We'll say this has a 1 cycle load-store forwarding latency 627 // for now. 628 // @todo: Need to make this a parameter. 629 cpu->schedule(wb, curTick); 630 |
631 // Don't need to do anything special for split loads. 632 if (TheISA::HasUnalignedMemAcc && sreqLow) { 633 delete sreqLow; 634 delete sreqHigh; 635 } 636 |
|
590 ++lsqForwLoads; 591 return NoFault; 592 } else if ((store_has_lower_limit && lower_load_has_store_part) || 593 (store_has_upper_limit && upper_load_has_store_part) || 594 (lower_load_has_store_part && upper_load_has_store_part)) { 595 // This is the partial store-load forwarding case where a store 596 // has only part of the load's data. 597 --- 27 unchanged lines hidden (view full) --- 625 DPRINTF(LSQUnit, "Load-store forwarding mis-match. " 626 "Store idx %i to load addr %#x\n", 627 store_idx, req->getVaddr()); 628 629 // Must delete request now that it wasn't handed off to 630 // memory. This is quite ugly. @todo: Figure out the 631 // proper place to really handle request deletes. 632 delete req; | 637 ++lsqForwLoads; 638 return NoFault; 639 } else if ((store_has_lower_limit && lower_load_has_store_part) || 640 (store_has_upper_limit && upper_load_has_store_part) || 641 (lower_load_has_store_part && upper_load_has_store_part)) { 642 // This is the partial store-load forwarding case where a store 643 // has only part of the load's data. 644 --- 27 unchanged lines hidden (view full) --- 672 DPRINTF(LSQUnit, "Load-store forwarding mis-match. " 673 "Store idx %i to load addr %#x\n", 674 store_idx, req->getVaddr()); 675 676 // Must delete request now that it wasn't handed off to 677 // memory. This is quite ugly. @todo: Figure out the 678 // proper place to really handle request deletes. 679 delete req; |
680 if (TheISA::HasUnalignedMemAcc && sreqLow) { 681 delete sreqLow; 682 delete sreqHigh; 683 } |
|
633 634 return NoFault; 635 } 636 } 637 638 // If there's no forwarding case, then go access memory 639 DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n", 640 load_inst->seqNum, load_inst->readPC()); 641 642 assert(!load_inst->memData); 643 load_inst->memData = new uint8_t[64]; 644 645 ++usedPorts; 646 647 // if we the cache is not blocked, do cache access | 684 685 return NoFault; 686 } 687 } 688 689 // If there's no forwarding case, then go access memory 690 DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n", 691 load_inst->seqNum, load_inst->readPC()); 692 693 assert(!load_inst->memData); 694 load_inst->memData = new uint8_t[64]; 695 696 ++usedPorts; 697 698 // if we the cache is not blocked, do cache access |
699 bool completedFirst = false; |
|
648 if (!lsq->cacheBlocked()) { | 700 if (!lsq->cacheBlocked()) { |
649 PacketPtr data_pkt = 650 new Packet(req, 651 (req->isLLSC() ? 652 MemCmd::LoadLockedReq : MemCmd::ReadReq), 653 Packet::Broadcast); | 701 MemCmd command = 702 req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq; 703 PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast); 704 PacketPtr fst_data_pkt = NULL; 705 PacketPtr snd_data_pkt = NULL; 706 |
654 data_pkt->dataStatic(load_inst->memData); 655 656 LSQSenderState *state = new LSQSenderState; 657 state->isLoad = true; 658 state->idx = load_idx; 659 state->inst = load_inst; 660 data_pkt->senderState = state; 661 | 707 data_pkt->dataStatic(load_inst->memData); 708 709 LSQSenderState *state = new LSQSenderState; 710 state->isLoad = true; 711 state->idx = load_idx; 712 state->inst = load_inst; 713 data_pkt->senderState = state; 714 |
662 if (!dcachePort->sendTiming(data_pkt)) { | 715 if (!TheISA::HasUnalignedMemAcc || !sreqLow) { 716 717 // Point the first packet at the main data packet. 718 fst_data_pkt = data_pkt; 719 } else { 720 721 // Create the split packets. 722 fst_data_pkt = new Packet(sreqLow, command, Packet::Broadcast); 723 snd_data_pkt = new Packet(sreqHigh, command, Packet::Broadcast); 724 725 fst_data_pkt->dataStatic(load_inst->memData); 726 snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize()); 727 728 fst_data_pkt->senderState = state; 729 snd_data_pkt->senderState = state; 730 731 state->isSplit = true; 732 state->outstanding = 2; 733 state->mainPkt = data_pkt; 734 } 735 736 if (!dcachePort->sendTiming(fst_data_pkt)) { |
663 // Delete state and data packet because a load retry 664 // initiates a pipeline restart; it does not retry. 665 delete state; 666 delete data_pkt->req; 667 delete data_pkt; | 737 // Delete state and data packet because a load retry 738 // initiates a pipeline restart; it does not retry. 739 delete state; 740 delete data_pkt->req; 741 delete data_pkt; |
742 if (TheISA::HasUnalignedMemAcc && sreqLow) { 743 delete fst_data_pkt->req; 744 delete fst_data_pkt; 745 delete snd_data_pkt->req; 746 delete snd_data_pkt; 747 } |
|
668 669 req = NULL; 670 671 // If the access didn't succeed, tell the LSQ by setting 672 // the retry thread id. 673 lsq->setRetryTid(lsqID); | 748 749 req = NULL; 750 751 // If the access didn't succeed, tell the LSQ by setting 752 // the retry thread id. 753 lsq->setRetryTid(lsqID); |
754 } else if (TheISA::HasUnalignedMemAcc && sreqLow) { 755 completedFirst = true; 756 757 // The first packet was sent without problems, so send this one 758 // too. If there is a problem with this packet then the whole 759 // load will be squashed, so indicate this to the state object. 760 // The first packet will return in completeDataAccess and be 761 // handled there. 762 ++usedPorts; 763 if (!dcachePort->sendTiming(snd_data_pkt)) { 764 765 // The main packet will be deleted in completeDataAccess. 766 delete snd_data_pkt->req; 767 delete snd_data_pkt; 768 769 state->complete(); 770 771 req = NULL; 772 773 lsq->setRetryTid(lsqID); 774 } |
|
674 } 675 } 676 677 // If the cache was blocked, or has become blocked due to the access, 678 // handle it. 679 if (lsq->cacheBlocked()) { 680 if (req) 681 delete req; | 775 } 776 } 777 778 // If the cache was blocked, or has become blocked due to the access, 779 // handle it. 780 if (lsq->cacheBlocked()) { 781 if (req) 782 delete req; |
783 if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) { 784 delete sreqLow; 785 delete sreqHigh; 786 } |
|
682 683 ++lsqCacheBlocked; 684 685 iewStage->decrWb(load_inst->seqNum); 686 // There's an older load that's already going to squash. 687 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) 688 return NoFault; 689 --- 8 unchanged lines hidden (view full) --- 698 } 699 700 return NoFault; 701} 702 703template <class Impl> 704template <class T> 705Fault | 787 788 ++lsqCacheBlocked; 789 790 iewStage->decrWb(load_inst->seqNum); 791 // There's an older load that's already going to squash. 792 if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) 793 return NoFault; 794 --- 8 unchanged lines hidden (view full) --- 803 } 804 805 return NoFault; 806} 807 808template <class Impl> 809template <class T> 810Fault |
706LSQUnit<Impl>::write(Request *req, T &data, int store_idx) | 811LSQUnit<Impl>::write(Request *req, Request *sreqLow, Request *sreqHigh, 812 T &data, int store_idx) |
707{ 708 assert(storeQueue[store_idx].inst); 709 710 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" 711 " | storeHead:%i [sn:%i]\n", 712 store_idx, req->getPaddr(), data, storeHead, 713 storeQueue[store_idx].inst->seqNum); 714 715 storeQueue[store_idx].req = req; | 813{ 814 assert(storeQueue[store_idx].inst); 815 816 DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x" 817 " | storeHead:%i [sn:%i]\n", 818 store_idx, req->getPaddr(), data, storeHead, 819 storeQueue[store_idx].inst->seqNum); 820 821 storeQueue[store_idx].req = req; |
822 storeQueue[store_idx].sreqLow = sreqLow; 823 storeQueue[store_idx].sreqHigh = sreqHigh; |
|
716 storeQueue[store_idx].size = sizeof(T); 717 assert(sizeof(T) <= sizeof(storeQueue[store_idx].data)); 718 719 T gData = htog(data); 720 memcpy(storeQueue[store_idx].data, &gData, sizeof(T)); 721 722 // This function only writes the data to the store queue, so no fault 723 // can happen here. 724 return NoFault; 725} 726 727#endif // __CPU_O3_LSQ_UNIT_HH__ | 824 storeQueue[store_idx].size = sizeof(T); 825 assert(sizeof(T) <= sizeof(storeQueue[store_idx].data)); 826 827 T gData = htog(data); 828 memcpy(storeQueue[store_idx].data, &gData, sizeof(T)); 829 830 // This function only writes the data to the store queue, so no fault 831 // can happen here. 832 return NoFault; 833} 834 835#endif // __CPU_O3_LSQ_UNIT_HH__ |