fetch_impl.hh (3791:f1783bae1afe) | fetch_impl.hh (3795:60ecc96c3cee) |
---|---|
1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; --- 305 unchanged lines hidden (view full) --- 314template<class Impl> 315void 316DefaultFetch<Impl>::initStage() 317{ 318 // Setup PC and nextPC with initial state. 319 for (int tid = 0; tid < numThreads; tid++) { 320 PC[tid] = cpu->readPC(tid); 321 nextPC[tid] = cpu->readNextPC(tid); | 1/* 2 * Copyright (c) 2004-2006 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; --- 305 unchanged lines hidden (view full) --- 314template<class Impl> 315void 316DefaultFetch<Impl>::initStage() 317{ 318 // Setup PC and nextPC with initial state. 319 for (int tid = 0; tid < numThreads; tid++) { 320 PC[tid] = cpu->readPC(tid); 321 nextPC[tid] = cpu->readNextPC(tid); |
322#if ISA_HAS_DELAY_SLOT | |
323 nextNPC[tid] = cpu->readNextNPC(tid); | 322 nextNPC[tid] = cpu->readNextNPC(tid); |
324#endif | |
325 } 326 327 // Size of cache block. 328 cacheBlkSize = icachePort->peerBlockSize(); 329 330 // Create mask to get rid of offset bits. 331 cacheBlkMask = (cacheBlkSize - 1); 332 --- 166 unchanged lines hidden (view full) --- 499{ 500 // Do branch prediction check here. 501 // A bit of a misnomer...next_PC is actually the current PC until 502 // this function updates it. 503 bool predict_taken; 504 505 if (!inst->isControl()) { 506#if ISA_HAS_DELAY_SLOT | 323 } 324 325 // Size of cache block. 326 cacheBlkSize = icachePort->peerBlockSize(); 327 328 // Create mask to get rid of offset bits. 329 cacheBlkMask = (cacheBlkSize - 1); 330 --- 166 unchanged lines hidden (view full) --- 497{ 498 // Do branch prediction check here. 499 // A bit of a misnomer...next_PC is actually the current PC until 500 // this function updates it. 501 bool predict_taken; 502 503 if (!inst->isControl()) { 504#if ISA_HAS_DELAY_SLOT |
507 Addr cur_PC = next_PC; 508 next_PC = cur_PC + instSize; //next_NPC; 509 next_NPC = cur_PC + (2 * instSize);//next_NPC + instSize; 510 inst->setPredTarg(next_NPC); | 505 next_PC = next_NPC; 506 next_NPC = next_NPC + instSize; 507 inst->setPredTarg(next_PC, next_NPC); |
511#else 512 next_PC = next_PC + instSize; | 508#else 509 next_PC = next_PC + instSize; |
513 inst->setPredTarg(next_PC); | 510 inst->setPredTarg(next_PC, next_PC + sizeof(TheISA::MachInst)); |
514#endif | 511#endif |
512 inst->setPredTaken(false); |
|
515 return false; 516 } 517 518 int tid = inst->threadNumber; 519#if ISA_HAS_DELAY_SLOT 520 Addr pred_PC = next_PC; 521 predict_taken = branchPred.predict(inst, pred_PC, tid); 522 523 if (predict_taken) { | 513 return false; 514 } 515 516 int tid = inst->threadNumber; 517#if ISA_HAS_DELAY_SLOT 518 Addr pred_PC = next_PC; 519 predict_taken = branchPred.predict(inst, pred_PC, tid); 520 521 if (predict_taken) { |
524 DPRINTF(Fetch, "[tid:%i]: Branch predicted to be true.\n", tid); | 522 DPRINTF(Fetch, "[tid:%i]: Branch predicted to be taken.\n", tid); |
525 } else { | 523 } else { |
526 DPRINTF(Fetch, "[tid:%i]: Branch predicted to be false.\n", tid); | 524 DPRINTF(Fetch, "[tid:%i]: Branch predicted to be not taken.\n", tid); |
527 } 528 | 525 } 526 |
527 next_PC = next_NPC; |
|
529 if (predict_taken) { | 528 if (predict_taken) { |
530 next_PC = next_NPC; | |
531 next_NPC = pred_PC; | 529 next_NPC = pred_PC; |
532 | |
533 // Update delay slot info 534 ++delaySlotInfo[tid].numInsts; 535 delaySlotInfo[tid].targetAddr = pred_PC; 536 DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid, 537 delaySlotInfo[tid].numInsts); | 530 // Update delay slot info 531 ++delaySlotInfo[tid].numInsts; 532 delaySlotInfo[tid].targetAddr = pred_PC; 533 DPRINTF(Fetch, "[tid:%i]: %i delay slot inst(s) to process.\n", tid, 534 delaySlotInfo[tid].numInsts); |
538 } else { // !predict_taken 539 if (inst->isCondDelaySlot()) { 540 next_PC = pred_PC; 541 // The delay slot is skipped here if there is on 542 // prediction 543 } else { 544 next_PC = next_NPC; 545 // No need to declare a delay slot here since 546 // there is no for the pred. target to jump 547 } 548 | 535 } else { |
549 next_NPC = next_NPC + instSize; 550 } 551#else 552 predict_taken = branchPred.predict(inst, next_PC, tid); 553#endif | 536 next_NPC = next_NPC + instSize; 537 } 538#else 539 predict_taken = branchPred.predict(inst, next_PC, tid); 540#endif |
541 DPRINTF(Fetch, "[tid:%i]: Branch predicted to go to %#x and then %#x.\n", 542 tid, next_PC, next_NPC); 543 inst->setPredTarg(next_PC, next_NPC); 544 inst->setPredTaken(predict_taken); |
|
554 555 ++fetchedBranches; 556 557 if (predict_taken) { 558 ++predictedBranches; 559 } 560 561 return predict_taken; --- 104 unchanged lines hidden (view full) --- 666 } 667 668 ret_fault = fault; 669 return true; 670} 671 672template <class Impl> 673inline void | 545 546 ++fetchedBranches; 547 548 if (predict_taken) { 549 ++predictedBranches; 550 } 551 552 return predict_taken; --- 104 unchanged lines hidden (view full) --- 657 } 658 659 ret_fault = fault; 660 return true; 661} 662 663template <class Impl> 664inline void |
674DefaultFetch<Impl>::doSquash(const Addr &new_PC, unsigned tid) | 665DefaultFetch 666 const Addr &new_NPC, unsigned tid) |
675{ | 667{ |
676 DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x.\n", 677 tid, new_PC); | 668 DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n", 669 tid, new_PC, new_NPC); |
678 679 PC[tid] = new_PC; | 670 671 PC[tid] = new_PC; |
680 nextPC[tid] = new_PC + instSize; 681 nextNPC[tid] = new_PC + (2 * instSize); | 672 nextPC[tid] = new_NPC; 673 nextNPC[tid] = new_NPC + instSize; |
682 683 // Clear the icache miss if it's outstanding. 684 if (fetchStatus[tid] == IcacheWaitResponse) { 685 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", 686 tid); 687 memReq[tid] = NULL; 688 } 689 --- 9 unchanged lines hidden (view full) --- 699 700 fetchStatus[tid] = Squashing; 701 702 ++fetchSquashCycles; 703} 704 705template<class Impl> 706void | 674 675 // Clear the icache miss if it's outstanding. 676 if (fetchStatus[tid] == IcacheWaitResponse) { 677 DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n", 678 tid); 679 memReq[tid] = NULL; 680 } 681 --- 9 unchanged lines hidden (view full) --- 691 692 fetchStatus[tid] = Squashing; 693 694 ++fetchSquashCycles; 695} 696 697template<class Impl> 698void |
707DefaultFetch | 699DefaultFetch<Impl>::squashFromDecode(const Addr &new_PC, const Addr &new_NPC, |
708 const InstSeqNum &seq_num, 709 unsigned tid) 710{ 711 DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); 712 | 700 const InstSeqNum &seq_num, 701 unsigned tid) 702{ 703 DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n",tid); 704 |
713 doSquash(new_PC, tid); | 705 doSquash(new_PC, new_NPC, tid); |
714 715#if ISA_HAS_DELAY_SLOT 716 if (seq_num <= delaySlotInfo[tid].branchSeqNum) { 717 delaySlotInfo[tid].numInsts = 0; 718 delaySlotInfo[tid].targetAddr = 0; 719 delaySlotInfo[tid].targetReady = false; 720 } 721#endif --- 66 unchanged lines hidden (view full) --- 788 cpu->deactivateStage(O3CPU::FetchIdx); 789 } 790 791 return Inactive; 792} 793 794template <class Impl> 795void | 706 707#if ISA_HAS_DELAY_SLOT 708 if (seq_num <= delaySlotInfo[tid].branchSeqNum) { 709 delaySlotInfo[tid].numInsts = 0; 710 delaySlotInfo[tid].targetAddr = 0; 711 delaySlotInfo[tid].targetReady = false; 712 } 713#endif --- 66 unchanged lines hidden (view full) --- 780 cpu->deactivateStage(O3CPU::FetchIdx); 781 } 782 783 return Inactive; 784} 785 786template <class Impl> 787void |
796DefaultFetch<Impl>::squash(const Addr &new_PC, const InstSeqNum &seq_num, | 788DefaultFetch<Impl>::squash(const Addr &new_PC, const Addr &new_NPC, 789 const InstSeqNum &seq_num, |
797 bool squash_delay_slot, unsigned tid) 798{ 799 DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); 800 | 790 bool squash_delay_slot, unsigned tid) 791{ 792 DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n",tid); 793 |
801 doSquash(new_PC, tid); | 794 doSquash(new_PC, new_NPC, tid); |
802 803#if ISA_HAS_DELAY_SLOT 804 if (seq_num <= delaySlotInfo[tid].branchSeqNum) { 805 delaySlotInfo[tid].numInsts = 0; 806 delaySlotInfo[tid].targetAddr = 0; 807 delaySlotInfo[tid].targetReady = false; 808 } 809 --- 113 unchanged lines hidden (view full) --- 923 924#if ISA_HAS_DELAY_SLOT 925 InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum; 926#else 927 InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum; 928#endif 929 // In any case, squash. 930 squash(fromCommit->commitInfo[tid].nextPC, | 795 796#if ISA_HAS_DELAY_SLOT 797 if (seq_num <= delaySlotInfo[tid].branchSeqNum) { 798 delaySlotInfo[tid].numInsts = 0; 799 delaySlotInfo[tid].targetAddr = 0; 800 delaySlotInfo[tid].targetReady = false; 801 } 802 --- 113 unchanged lines hidden (view full) --- 916 917#if ISA_HAS_DELAY_SLOT 918 InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].bdelayDoneSeqNum; 919#else 920 InstSeqNum doneSeqNum = fromCommit->commitInfo[tid].doneSeqNum; 921#endif 922 // In any case, squash. 923 squash(fromCommit->commitInfo[tid].nextPC, |
924 fromCommit->commitInfo[tid].nextNPC, |
|
931 doneSeqNum, 932 fromCommit->commitInfo[tid].squashDelaySlot, 933 tid); 934 935 // Also check if there's a mispredict that happened. 936 if (fromCommit->commitInfo[tid].branchMispredict) { 937 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 938 fromCommit->commitInfo[tid].nextPC, --- 41 unchanged lines hidden (view full) --- 980 981#if ISA_HAS_DELAY_SLOT 982 InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum; 983#else 984 InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; 985#endif 986 // Squash unless we're already squashing 987 squashFromDecode(fromDecode->decodeInfo[tid].nextPC, | 925 doneSeqNum, 926 fromCommit->commitInfo[tid].squashDelaySlot, 927 tid); 928 929 // Also check if there's a mispredict that happened. 930 if (fromCommit->commitInfo[tid].branchMispredict) { 931 branchPred.squash(fromCommit->commitInfo[tid].doneSeqNum, 932 fromCommit->commitInfo[tid].nextPC, --- 41 unchanged lines hidden (view full) --- 974 975#if ISA_HAS_DELAY_SLOT 976 InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].bdelayDoneSeqNum; 977#else 978 InstSeqNum doneSeqNum = fromDecode->decodeInfo[tid].doneSeqNum; 979#endif 980 // Squash unless we're already squashing 981 squashFromDecode(fromDecode->decodeInfo[tid].nextPC, |
982 fromDecode->decodeInfo[tid].nextNPC, |
|
988 doneSeqNum, 989 tid); 990 991 return true; 992 } 993 } 994 995 if (checkStall(tid) && --- 40 unchanged lines hidden (view full) --- 1036 return; 1037 } 1038 1039 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); 1040 1041 // The current PC. 1042 Addr &fetch_PC = PC[tid]; 1043 | 983 doneSeqNum, 984 tid); 985 986 return true; 987 } 988 } 989 990 if (checkStall(tid) && --- 40 unchanged lines hidden (view full) --- 1031 return; 1032 } 1033 1034 DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); 1035 1036 // The current PC. 1037 Addr &fetch_PC = PC[tid]; 1038 |
1039 Addr &fetch_NPC = nextPC[tid]; 1040 |
|
1044 // Fault code for memory access. 1045 Fault fault = NoFault; 1046 1047 // If returning from the delay of a cache miss, then update the status 1048 // to running, otherwise do the cache access. Possibly move this up 1049 // to tick() function. 1050 if (fetchStatus[tid] == IcacheAccessComplete) { 1051 DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", --- 40 unchanged lines hidden (view full) --- 1092 // If we had a stall due to an icache miss, then return. 1093 if (fetchStatus[tid] == IcacheWaitResponse) { 1094 ++icacheStallCycles; 1095 status_change = true; 1096 return; 1097 } 1098 1099 Addr next_PC = fetch_PC; | 1041 // Fault code for memory access. 1042 Fault fault = NoFault; 1043 1044 // If returning from the delay of a cache miss, then update the status 1045 // to running, otherwise do the cache access. Possibly move this up 1046 // to tick() function. 1047 if (fetchStatus[tid] == IcacheAccessComplete) { 1048 DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", --- 40 unchanged lines hidden (view full) --- 1089 // If we had a stall due to an icache miss, then return. 1090 if (fetchStatus[tid] == IcacheWaitResponse) { 1091 ++icacheStallCycles; 1092 status_change = true; 1093 return; 1094 } 1095 1096 Addr next_PC = fetch_PC; |
1100 Addr next_NPC = next_PC + instSize; | 1097 Addr next_NPC = fetch_NPC; 1098 |
1101 InstSeqNum inst_seq; 1102 MachInst inst; 1103 ExtMachInst ext_inst; 1104 // @todo: Fix this hack. 1105 unsigned offset = (fetch_PC & cacheBlkMask) & ~3; 1106 1107 if (fault == NoFault) { 1108 // If the read of the first instruction was successful, then grab the --- 30 unchanged lines hidden (view full) --- 1139 ext_inst = TheISA::makeExtMI(inst, fetch_PC); 1140#elif THE_ISA == SPARC_ISA 1141 ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC()); 1142#elif THE_ISA == MIPS_ISA 1143 ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC()); 1144#endif 1145 1146 // Create a new DynInst from the instruction fetched. | 1099 InstSeqNum inst_seq; 1100 MachInst inst; 1101 ExtMachInst ext_inst; 1102 // @todo: Fix this hack. 1103 unsigned offset = (fetch_PC & cacheBlkMask) & ~3; 1104 1105 if (fault == NoFault) { 1106 // If the read of the first instruction was successful, then grab the --- 30 unchanged lines hidden (view full) --- 1137 ext_inst = TheISA::makeExtMI(inst, fetch_PC); 1138#elif THE_ISA == SPARC_ISA 1139 ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC()); 1140#elif THE_ISA == MIPS_ISA 1141 ext_inst = TheISA::makeExtMI(inst, cpu->thread[tid]->getTC()); 1142#endif 1143 1144 // Create a new DynInst from the instruction fetched. |
1147 DynInstPtr instruction = new DynInst(ext_inst, fetch_PC, 1148 next_PC, | 1145 DynInstPtr instruction = new DynInst(ext_inst, 1146 fetch_PC, fetch_NPC, 1147 next_PC, next_NPC, |
1149 inst_seq, cpu); 1150 instruction->setTid(tid); 1151 1152 instruction->setASID(tid); 1153 1154 instruction->setThreadState(cpu->thread[tid]); 1155 1156 DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " --- 81 unchanged lines hidden (view full) --- 1238 1239 // Now that fetching is completed, update the PC to signify what the next 1240 // cycle will be. 1241 if (fault == NoFault) { 1242#if ISA_HAS_DELAY_SLOT 1243 if (delaySlotInfo[tid].targetReady && 1244 delaySlotInfo[tid].numInsts == 0) { 1245 // Set PC to target | 1148 inst_seq, cpu); 1149 instruction->setTid(tid); 1150 1151 instruction->setASID(tid); 1152 1153 instruction->setThreadState(cpu->thread[tid]); 1154 1155 DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x created " --- 81 unchanged lines hidden (view full) --- 1237 1238 // Now that fetching is completed, update the PC to signify what the next 1239 // cycle will be. 1240 if (fault == NoFault) { 1241#if ISA_HAS_DELAY_SLOT 1242 if (delaySlotInfo[tid].targetReady && 1243 delaySlotInfo[tid].numInsts == 0) { 1244 // Set PC to target |
1246 PC[tid] = delaySlotInfo[tid].targetAddr; //next_PC 1247 nextPC[tid] = next_PC + instSize; //next_NPC 1248 nextNPC[tid] = next_PC + (2 * instSize); | 1245 PC[tid] = next_PC; 1246 nextPC[tid] = next_NPC; 1247 nextNPC[tid] = next_NPC + instSize; |
1249 1250 delaySlotInfo[tid].targetReady = false; 1251 } else { 1252 PC[tid] = next_PC; 1253 nextPC[tid] = next_NPC; 1254 nextNPC[tid] = next_NPC + instSize; 1255 } 1256 --- 224 unchanged lines hidden --- | 1248 1249 delaySlotInfo[tid].targetReady = false; 1250 } else { 1251 PC[tid] = next_PC; 1252 nextPC[tid] = next_NPC; 1253 nextNPC[tid] = next_NPC + instSize; 1254 } 1255 --- 224 unchanged lines hidden --- |