Cross Reference: /gem5/src/mem/ruby/system/Sequencer.cc

Deleted Added

sdiff udiff text old ( 12133:ca42be3276af ) new ( 12334:e0ab29a34764 )

full compact

Sequencer.cc (12133:ca42be3276af)	Sequencer.cc (12334:e0ab29a34764)
1/* 2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include "mem/ruby/system/Sequencer.hh" 30 31#include "arch/x86/ldstflags.hh"	1/* 2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include "mem/ruby/system/Sequencer.hh" 30 31#include "arch/x86/ldstflags.hh"
32#include "base/misc.hh"	32#include "base/logging.hh"
33#include "base/str.hh" 34#include "cpu/testers/rubytest/RubyTester.hh" 35#include "debug/MemoryAccess.hh" 36#include "debug/ProtocolTrace.hh" 37#include "debug/RubySequencer.hh" 38#include "debug/RubyStats.hh" 39#include "mem/packet.hh" 40#include "mem/protocol/PrefetchBit.hh" 41#include "mem/protocol/RubyAccessMode.hh" 42#include "mem/ruby/profiler/Profiler.hh" 43#include "mem/ruby/slicc_interface/RubyRequest.hh" 44#include "mem/ruby/system/RubySystem.hh" 45#include "sim/system.hh" 46 47using namespace std; 48 49Sequencer * 50RubySequencerParams::create() 51{ 52 return new Sequencer(this); 53} 54 55Sequencer::Sequencer(const Params p) 56 : RubyPort(p), m_IncompleteTimes(MachineType_NUM), 57 deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check") 58{ 59 m_outstanding_count = 0; 60 61 m_instCache_ptr = p->icache; 62 m_dataCache_ptr = p->dcache; 63 m_data_cache_hit_latency = p->dcache_hit_latency; 64 m_inst_cache_hit_latency = p->icache_hit_latency; 65 m_max_outstanding_requests = p->max_outstanding_requests; 66 m_deadlock_threshold = p->deadlock_threshold; 67 68 m_coreId = p->coreid; // for tracking the two CorePair sequencers 69 assert(m_max_outstanding_requests > 0); 70 assert(m_deadlock_threshold > 0); 71 assert(m_instCache_ptr != NULL); 72 assert(m_dataCache_ptr != NULL); 73 assert(m_data_cache_hit_latency > 0); 74 assert(m_inst_cache_hit_latency > 0); 75 76 m_runningGarnetStandalone = p->garnet_standalone; 77} 78 79Sequencer::~Sequencer() 80{ 81} 82 83void 84Sequencer::wakeup() 85{ 86 assert(drainState() != DrainState::Draining); 87 88 // Check for deadlock of any of the requests 89 Cycles current_time = curCycle(); 90 91 // Check across all outstanding requests 92 int total_outstanding = 0; 93 94 RequestTable::iterator read = m_readRequestTable.begin(); 95 RequestTable::iterator read_end = m_readRequestTable.end(); 96 for (; read != read_end; ++read) { 97 SequencerRequest request = read->second; 98 if (current_time - request->issue_time < m_deadlock_threshold) 99 continue; 100 101 panic("Possible Deadlock detected. Aborting!\n" 102 "version: %d request.paddr: 0x%x m_readRequestTable: %d " 103 "current time: %u issue_time: %d difference: %d\n", m_version, 104 request->pkt->getAddr(), m_readRequestTable.size(), 105 current_time * clockPeriod(), request->issue_time * clockPeriod(), 106 (current_time * clockPeriod()) - (request->issue_time * clockPeriod())); 107 } 108 109 RequestTable::iterator write = m_writeRequestTable.begin(); 110 RequestTable::iterator write_end = m_writeRequestTable.end(); 111 for (; write != write_end; ++write) { 112 SequencerRequest* request = write->second; 113 if (current_time - request->issue_time < m_deadlock_threshold) 114 continue; 115 116 panic("Possible Deadlock detected. Aborting!\n" 117 "version: %d request.paddr: 0x%x m_writeRequestTable: %d " 118 "current time: %u issue_time: %d difference: %d\n", m_version, 119 request->pkt->getAddr(), m_writeRequestTable.size(), 120 current_time * clockPeriod(), request->issue_time * clockPeriod(), 121 (current_time * clockPeriod()) - (request->issue_time * clockPeriod())); 122 } 123 124 total_outstanding += m_writeRequestTable.size(); 125 total_outstanding += m_readRequestTable.size(); 126 127 assert(m_outstanding_count == total_outstanding); 128 129 if (m_outstanding_count > 0) { 130 // If there are still outstanding requests, keep checking 131 schedule(deadlockCheckEvent, clockEdge(m_deadlock_threshold)); 132 } 133} 134 135void Sequencer::resetStats() 136{ 137 m_latencyHist.reset(); 138 m_hitLatencyHist.reset(); 139 m_missLatencyHist.reset(); 140 for (int i = 0; i < RubyRequestType_NUM; i++) { 141 m_typeLatencyHist[i]->reset(); 142 m_hitTypeLatencyHist[i]->reset(); 143 m_missTypeLatencyHist[i]->reset(); 144 for (int j = 0; j < MachineType_NUM; j++) { 145 m_hitTypeMachLatencyHist[i][j]->reset(); 146 m_missTypeMachLatencyHist[i][j]->reset(); 147 } 148 } 149 150 for (int i = 0; i < MachineType_NUM; i++) { 151 m_missMachLatencyHist[i]->reset(); 152 m_hitMachLatencyHist[i]->reset(); 153 154 m_IssueToInitialDelayHist[i]->reset(); 155 m_InitialToForwardDelayHist[i]->reset(); 156 m_ForwardToFirstResponseDelayHist[i]->reset(); 157 m_FirstResponseToCompletionDelayHist[i]->reset(); 158 159 m_IncompleteTimes[i] = 0; 160 } 161} 162 163// Insert the request on the correct request table. Return true if 164// the entry was already present. 165RequestStatus 166Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type) 167{ 168 assert(m_outstanding_count == 169 (m_writeRequestTable.size() + m_readRequestTable.size())); 170 171 // See if we should schedule a deadlock check 172 if (!deadlockCheckEvent.scheduled() && 173 drainState() != DrainState::Draining) { 174 schedule(deadlockCheckEvent, clockEdge(m_deadlock_threshold)); 175 } 176 177 Addr line_addr = makeLineAddress(pkt->getAddr()); 178 179 // Check if the line is blocked for a Locked_RMW 180 if (m_controller->isBlocked(line_addr) && 181 (request_type != RubyRequestType_Locked_RMW_Write)) { 182 // Return that this request's cache line address aliases with 183 // a prior request that locked the cache line. The request cannot 184 // proceed until the cache line is unlocked by a Locked_RMW_Write 185 return RequestStatus_Aliased; 186 } 187 188 // Create a default entry, mapping the address to NULL, the cast is 189 // there to make gcc 4.4 happy 190 RequestTable::value_type default_entry(line_addr, 191 (SequencerRequest) NULL); 192* 193 if ((request_type == RubyRequestType_ST) \|\| 194 (request_type == RubyRequestType_RMW_Read) \|\| 195 (request_type == RubyRequestType_RMW_Write) \|\| 196 (request_type == RubyRequestType_Load_Linked) \|\| 197 (request_type == RubyRequestType_Store_Conditional) \|\| 198 (request_type == RubyRequestType_Locked_RMW_Read) \|\| 199 (request_type == RubyRequestType_Locked_RMW_Write) \|\| 200 (request_type == RubyRequestType_FLUSH)) { 201 202 // Check if there is any outstanding read request for the same 203 // cache line. 204 if (m_readRequestTable.count(line_addr) > 0) { 205 m_store_waiting_on_load++; 206 return RequestStatus_Aliased; 207 } 208 209 pair<RequestTable::iterator, bool> r = 210 m_writeRequestTable.insert(default_entry); 211 if (r.second) { 212 RequestTable::iterator i = r.first; 213 i->second = new SequencerRequest(pkt, request_type, curCycle()); 214 m_outstanding_count++; 215 } else { 216 // There is an outstanding write request for the cache line 217 m_store_waiting_on_store++; 218 return RequestStatus_Aliased; 219 } 220 } else { 221 // Check if there is any outstanding write request for the same 222 // cache line. 223 if (m_writeRequestTable.count(line_addr) > 0) { 224 m_load_waiting_on_store++; 225 return RequestStatus_Aliased; 226 } 227 228 pair<RequestTable::iterator, bool> r = 229 m_readRequestTable.insert(default_entry); 230 231 if (r.second) { 232 RequestTable::iterator i = r.first; 233 i->second = new SequencerRequest(pkt, request_type, curCycle()); 234 m_outstanding_count++; 235 } else { 236 // There is an outstanding read request for the cache line 237 m_load_waiting_on_load++; 238 return RequestStatus_Aliased; 239 } 240 } 241 242 m_outstandReqHist.sample(m_outstanding_count); 243 assert(m_outstanding_count == 244 (m_writeRequestTable.size() + m_readRequestTable.size())); 245 246 return RequestStatus_Ready; 247} 248 249void 250Sequencer::markRemoved() 251{ 252 m_outstanding_count--; 253 assert(m_outstanding_count == 254 m_writeRequestTable.size() + m_readRequestTable.size()); 255} 256 257void 258Sequencer::invalidateSC(Addr address) 259{ 260 AbstractCacheEntry e = m_dataCache_ptr->lookup(address); 261* // The controller has lost the coherence permissions, hence the lock 262 // on the cache line maintained by the cache should be cleared. 263 if (e && e->isLocked(m_version)) { 264 e->clearLocked(); 265 } 266} 267 268bool 269Sequencer::handleLlsc(Addr address, SequencerRequest* request) 270{ 271 AbstractCacheEntry e = m_dataCache_ptr->lookup(address); 272* if (!e) 273 return true; 274 275 // The success flag indicates whether the LLSC operation was successful. 276 // LL ops will always succeed, but SC may fail if the cache line is no 277 // longer locked. 278 bool success = true; 279 if (request->m_type == RubyRequestType_Store_Conditional) { 280 if (!e->isLocked(m_version)) { 281 // 282 // For failed SC requests, indicate the failure to the cpu by 283 // setting the extra data to zero. 284 // 285 request->pkt->req->setExtraData(0); 286 success = false; 287 } else { 288 // 289 // For successful SC requests, indicate the success to the cpu by 290 // setting the extra data to one. 291 // 292 request->pkt->req->setExtraData(1); 293 } 294 // 295 // Independent of success, all SC operations must clear the lock 296 // 297 e->clearLocked(); 298 } else if (request->m_type == RubyRequestType_Load_Linked) { 299 // 300 // Note: To fully follow Alpha LLSC semantics, should the LL clear any 301 // previously locked cache lines? 302 // 303 e->setLocked(m_version); 304 } else if (e->isLocked(m_version)) { 305 // 306 // Normal writes should clear the locked address 307 // 308 e->clearLocked(); 309 } 310 return success; 311} 312 313void 314Sequencer::recordMissLatency(const Cycles cycles, const RubyRequestType type, 315 const MachineType respondingMach, 316 bool isExternalHit, Cycles issuedTime, 317 Cycles initialRequestTime, 318 Cycles forwardRequestTime, 319 Cycles firstResponseTime, Cycles completionTime) 320{ 321 m_latencyHist.sample(cycles); 322 m_typeLatencyHist[type]->sample(cycles); 323 324 if (isExternalHit) { 325 m_missLatencyHist.sample(cycles); 326 m_missTypeLatencyHist[type]->sample(cycles); 327 328 if (respondingMach != MachineType_NUM) { 329 m_missMachLatencyHist[respondingMach]->sample(cycles); 330 m_missTypeMachLatencyHist[type][respondingMach]->sample(cycles); 331 332 if ((issuedTime <= initialRequestTime) && 333 (initialRequestTime <= forwardRequestTime) && 334 (forwardRequestTime <= firstResponseTime) && 335 (firstResponseTime <= completionTime)) { 336 337 m_IssueToInitialDelayHist[respondingMach]->sample( 338 initialRequestTime - issuedTime); 339 m_InitialToForwardDelayHist[respondingMach]->sample( 340 forwardRequestTime - initialRequestTime); 341 m_ForwardToFirstResponseDelayHist[respondingMach]->sample( 342 firstResponseTime - forwardRequestTime); 343 m_FirstResponseToCompletionDelayHist[respondingMach]->sample( 344 completionTime - firstResponseTime); 345 } else { 346 m_IncompleteTimes[respondingMach]++; 347 } 348 } 349 } else { 350 m_hitLatencyHist.sample(cycles); 351 m_hitTypeLatencyHist[type]->sample(cycles); 352 353 if (respondingMach != MachineType_NUM) { 354 m_hitMachLatencyHist[respondingMach]->sample(cycles); 355 m_hitTypeMachLatencyHist[type][respondingMach]->sample(cycles); 356 } 357 } 358} 359 360void 361Sequencer::writeCallback(Addr address, DataBlock& data, 362 const bool externalHit, const MachineType mach, 363 const Cycles initialRequestTime, 364 const Cycles forwardRequestTime, 365 const Cycles firstResponseTime) 366{ 367 assert(address == makeLineAddress(address)); 368 assert(m_writeRequestTable.count(makeLineAddress(address))); 369 370 RequestTable::iterator i = m_writeRequestTable.find(address); 371 assert(i != m_writeRequestTable.end()); 372 SequencerRequest* request = i->second; 373 374 m_writeRequestTable.erase(i); 375 markRemoved(); 376 377 assert((request->m_type == RubyRequestType_ST) \|\| 378 (request->m_type == RubyRequestType_ATOMIC) \|\| 379 (request->m_type == RubyRequestType_RMW_Read) \|\| 380 (request->m_type == RubyRequestType_RMW_Write) \|\| 381 (request->m_type == RubyRequestType_Load_Linked) \|\| 382 (request->m_type == RubyRequestType_Store_Conditional) \|\| 383 (request->m_type == RubyRequestType_Locked_RMW_Read) \|\| 384 (request->m_type == RubyRequestType_Locked_RMW_Write) \|\| 385 (request->m_type == RubyRequestType_FLUSH)); 386 387 // 388 // For Alpha, properly handle LL, SC, and write requests with respect to 389 // locked cache blocks. 390 // 391 // Not valid for Garnet_standalone protocl 392 // 393 bool success = true; 394 if (!m_runningGarnetStandalone) 395 success = handleLlsc(address, request); 396 397 // Handle SLICC block_on behavior for Locked_RMW accesses. NOTE: the 398 // address variable here is assumed to be a line address, so when 399 // blocking buffers, must check line addresses. 400 if (request->m_type == RubyRequestType_Locked_RMW_Read) { 401 // blockOnQueue blocks all first-level cache controller queues 402 // waiting on memory accesses for the specified address that go to 403 // the specified queue. In this case, a Locked_RMW_Write must go to 404 // the mandatory_q before unblocking the first-level controller. 405 // This will block standard loads, stores, ifetches, etc. 406 m_controller->blockOnQueue(address, m_mandatory_q_ptr); 407 } else if (request->m_type == RubyRequestType_Locked_RMW_Write) { 408 m_controller->unblock(address); 409 } 410 411 hitCallback(request, data, success, mach, externalHit, 412 initialRequestTime, forwardRequestTime, firstResponseTime); 413} 414 415void 416Sequencer::readCallback(Addr address, DataBlock& data, 417 bool externalHit, const MachineType mach, 418 Cycles initialRequestTime, 419 Cycles forwardRequestTime, 420 Cycles firstResponseTime) 421{ 422 assert(address == makeLineAddress(address)); 423 assert(m_readRequestTable.count(makeLineAddress(address))); 424 425 RequestTable::iterator i = m_readRequestTable.find(address); 426 assert(i != m_readRequestTable.end()); 427 SequencerRequest* request = i->second; 428 429 m_readRequestTable.erase(i); 430 markRemoved(); 431 432 assert((request->m_type == RubyRequestType_LD) \|\| 433 (request->m_type == RubyRequestType_IFETCH)); 434 435 hitCallback(request, data, true, mach, externalHit, 436 initialRequestTime, forwardRequestTime, firstResponseTime); 437} 438 439void 440Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, 441 bool llscSuccess, 442 const MachineType mach, const bool externalHit, 443 const Cycles initialRequestTime, 444 const Cycles forwardRequestTime, 445 const Cycles firstResponseTime) 446{ 447 warn_once("Replacement policy updates recently became the responsibility " 448 "of SLICC state machines. Make sure to setMRU() near callbacks " 449 "in .sm files!"); 450 451 PacketPtr pkt = srequest->pkt; 452 Addr request_address(pkt->getAddr()); 453 RubyRequestType type = srequest->m_type; 454 Cycles issued_time = srequest->issue_time; 455 456 assert(curCycle() >= issued_time); 457 Cycles total_latency = curCycle() - issued_time; 458 459 // Profile the latency for all demand accesses. 460 recordMissLatency(total_latency, type, mach, externalHit, issued_time, 461 initialRequestTime, forwardRequestTime, 462 firstResponseTime, curCycle()); 463 464 DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %d cycles\n", 465 curTick(), m_version, "Seq", 466 llscSuccess ? "Done" : "SC_Failed", "", "", 467 printAddress(request_address), total_latency); 468 469 // update the data unless it is a non-data-carrying flush 470 if (RubySystem::getWarmupEnabled()) { 471 data.setData(pkt->getConstPtr<uint8_t>(), 472 getOffset(request_address), pkt->getSize()); 473 } else if (!pkt->isFlush()) { 474 if ((type == RubyRequestType_LD) \|\| 475 (type == RubyRequestType_IFETCH) \|\| 476 (type == RubyRequestType_RMW_Read) \|\| 477 (type == RubyRequestType_Locked_RMW_Read) \|\| 478 (type == RubyRequestType_Load_Linked)) { 479 memcpy(pkt->getPtr<uint8_t>(), 480 data.getData(getOffset(request_address), pkt->getSize()), 481 pkt->getSize()); 482 DPRINTF(RubySequencer, "read data %s\n", data); 483 } else if (pkt->req->isSwap()) { 484 std::vector<uint8_t> overwrite_val(pkt->getSize()); 485 memcpy(&overwrite_val[0], pkt->getConstPtr<uint8_t>(), 486 pkt->getSize()); 487 memcpy(pkt->getPtr<uint8_t>(), 488 data.getData(getOffset(request_address), pkt->getSize()), 489 pkt->getSize()); 490 data.setData(&overwrite_val[0], 491 getOffset(request_address), pkt->getSize()); 492 DPRINTF(RubySequencer, "swap data %s\n", data); 493 } else if (type != RubyRequestType_Store_Conditional \|\| llscSuccess) { 494 // Types of stores set the actual data here, apart from 495 // failed Store Conditional requests 496 data.setData(pkt->getConstPtr<uint8_t>(), 497 getOffset(request_address), pkt->getSize()); 498 DPRINTF(RubySequencer, "set data %s\n", data); 499 } 500 } 501 502 // If using the RubyTester, update the RubyTester sender state's 503 // subBlock with the recieved data. The tester will later access 504 // this state. 505 if (m_usingRubyTester) { 506 DPRINTF(RubySequencer, "hitCallback %s 0x%x using RubyTester\n", 507 pkt->cmdString(), pkt->getAddr()); 508 RubyTester::SenderState* testerSenderState = 509 pkt->findNextSenderState<RubyTester::SenderState>(); 510 assert(testerSenderState); 511 testerSenderState->subBlock.mergeFrom(data); 512 } 513 514 delete srequest; 515 516 RubySystem rs = m_ruby_system; 517* if (RubySystem::getWarmupEnabled()) { 518 assert(pkt->req); 519 delete pkt->req; 520 delete pkt; 521 rs->m_cache_recorder->enqueueNextFetchRequest(); 522 } else if (RubySystem::getCooldownEnabled()) { 523 delete pkt; 524 rs->m_cache_recorder->enqueueNextFlushRequest(); 525 } else { 526 ruby_hit_callback(pkt); 527 testDrainComplete(); 528 } 529} 530 531bool 532Sequencer::empty() const 533{ 534 return m_writeRequestTable.empty() && m_readRequestTable.empty(); 535} 536 537RequestStatus 538Sequencer::makeRequest(PacketPtr pkt) 539{ 540 if (m_outstanding_count >= m_max_outstanding_requests) { 541 return RequestStatus_BufferFull; 542 } 543 544 RubyRequestType primary_type = RubyRequestType_NULL; 545 RubyRequestType secondary_type = RubyRequestType_NULL; 546 547 if (pkt->isLLSC()) { 548 // 549 // Alpha LL/SC instructions need to be handled carefully by the cache 550 // coherence protocol to ensure they follow the proper semantics. In 551 // particular, by identifying the operations as atomic, the protocol 552 // should understand that migratory sharing optimizations should not 553 // be performed (i.e. a load between the LL and SC should not steal 554 // away exclusive permission). 555 // 556 if (pkt->isWrite()) { 557 DPRINTF(RubySequencer, "Issuing SC\n"); 558 primary_type = RubyRequestType_Store_Conditional; 559 } else { 560 DPRINTF(RubySequencer, "Issuing LL\n"); 561 assert(pkt->isRead()); 562 primary_type = RubyRequestType_Load_Linked; 563 } 564 secondary_type = RubyRequestType_ATOMIC; 565 } else if (pkt->req->isLockedRMW()) { 566 // 567 // x86 locked instructions are translated to store cache coherence 568 // requests because these requests should always be treated as read 569 // exclusive operations and should leverage any migratory sharing 570 // optimization built into the protocol. 571 // 572 if (pkt->isWrite()) { 573 DPRINTF(RubySequencer, "Issuing Locked RMW Write\n"); 574 primary_type = RubyRequestType_Locked_RMW_Write; 575 } else { 576 DPRINTF(RubySequencer, "Issuing Locked RMW Read\n"); 577 assert(pkt->isRead()); 578 primary_type = RubyRequestType_Locked_RMW_Read; 579 } 580 secondary_type = RubyRequestType_ST; 581 } else { 582 // 583 // To support SwapReq, we need to check isWrite() first: a SwapReq 584 // should always be treated like a write, but since a SwapReq implies 585 // both isWrite() and isRead() are true, check isWrite() first here. 586 // 587 if (pkt->isWrite()) { 588 // 589 // Note: M5 packets do not differentiate ST from RMW_Write 590 // 591 primary_type = secondary_type = RubyRequestType_ST; 592 } else if (pkt->isRead()) { 593 if (pkt->req->isInstFetch()) { 594 primary_type = secondary_type = RubyRequestType_IFETCH; 595 } else { 596 bool storeCheck = false; 597 // only X86 need the store check 598 if (system->getArch() == Arch::X86ISA) { 599 uint32_t flags = pkt->req->getFlags(); 600 storeCheck = flags & 601 (X86ISA::StoreCheck << X86ISA::FlagShift); 602 } 603 if (storeCheck) { 604 primary_type = RubyRequestType_RMW_Read; 605 secondary_type = RubyRequestType_ST; 606 } else { 607 primary_type = secondary_type = RubyRequestType_LD; 608 } 609 } 610 } else if (pkt->isFlush()) { 611 primary_type = secondary_type = RubyRequestType_FLUSH; 612 } else { 613 panic("Unsupported ruby packet type\n"); 614 } 615 } 616 617 RequestStatus status = insertRequest(pkt, primary_type); 618 if (status != RequestStatus_Ready) 619 return status; 620 621 issueRequest(pkt, secondary_type); 622 623 // TODO: issue hardware prefetches here 624 return RequestStatus_Issued; 625} 626 627void 628Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) 629{ 630 assert(pkt != NULL); 631 ContextID proc_id = pkt->req->hasContextId() ? 632 pkt->req->contextId() : InvalidContextID; 633 634 ContextID core_id = coreId(); 635 636 // If valid, copy the pc to the ruby request 637 Addr pc = 0; 638 if (pkt->req->hasPC()) { 639 pc = pkt->req->getPC(); 640 } 641 642 // check if the packet has data as for example prefetch and flush 643 // requests do not 644 std::shared_ptr<RubyRequest> msg = 645 std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(), 646 pkt->isFlush() ? 647 nullptr : pkt->getPtr<uint8_t>(), 648 pkt->getSize(), pc, secondary_type, 649 RubyAccessMode_Supervisor, pkt, 650 PrefetchBit_No, proc_id, core_id); 651 652 DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\n", 653 curTick(), m_version, "Seq", "Begin", "", "", 654 printAddress(msg->getPhysicalAddress()), 655 RubyRequestType_to_string(secondary_type)); 656 657 // The Sequencer currently assesses instruction and data cache hit latency 658 // for the top-level caches at the beginning of a memory access. 659 // TODO: Eventually, this latency should be moved to represent the actual 660 // cache access latency portion of the memory access. This will require 661 // changing cache controller protocol files to assess the latency on the 662 // access response path. 663 Cycles latency(0); // Initialize to zero to catch misconfigured latency 664 if (secondary_type == RubyRequestType_IFETCH) 665 latency = m_inst_cache_hit_latency; 666 else 667 latency = m_data_cache_hit_latency; 668 669 // Send the message to the cache controller 670 assert(latency > 0); 671 672 assert(m_mandatory_q_ptr != NULL); 673 m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(latency)); 674} 675 676template <class KEY, class VALUE> 677std::ostream & 678operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map) 679{ 680 auto i = map.begin(); 681 auto end = map.end(); 682 683 out << "["; 684 for (; i != end; ++i) 685 out << " " << i->first << "=" << i->second; 686 out << " ]"; 687 688 return out; 689} 690 691void 692Sequencer::print(ostream& out) const 693{ 694 out << "[Sequencer: " << m_version 695 << ", outstanding requests: " << m_outstanding_count 696 << ", read request table: " << m_readRequestTable 697 << ", write request table: " << m_writeRequestTable 698 << "]"; 699} 700 701// this can be called from setState whenever coherence permissions are 702// upgraded when invoked, coherence violations will be checked for the 703// given block 704void 705Sequencer::checkCoherence(Addr addr) 706{ 707#ifdef CHECK_COHERENCE 708 m_ruby_system->checkGlobalCoherenceInvariant(addr); 709#endif 710} 711 712void 713Sequencer::recordRequestType(SequencerRequestType requestType) { 714 DPRINTF(RubyStats, "Recorded statistic: %s\n", 715 SequencerRequestType_to_string(requestType)); 716} 717 718 719void 720Sequencer::evictionCallback(Addr address) 721{ 722 ruby_eviction_callback(address); 723} 724 725void 726Sequencer::regStats() 727{ 728 RubyPort::regStats(); 729 730 m_store_waiting_on_load 731 .name(name() + ".store_waiting_on_load") 732 .desc("Number of times a store aliased with a pending load") 733 .flags(Stats::nozero); 734 m_store_waiting_on_store 735 .name(name() + ".store_waiting_on_store") 736 .desc("Number of times a store aliased with a pending store") 737 .flags(Stats::nozero); 738 m_load_waiting_on_load 739 .name(name() + ".load_waiting_on_load") 740 .desc("Number of times a load aliased with a pending load") 741 .flags(Stats::nozero); 742 m_load_waiting_on_store 743 .name(name() + ".load_waiting_on_store") 744 .desc("Number of times a load aliased with a pending store") 745 .flags(Stats::nozero); 746 747 // These statistical variables are not for display. 748 // The profiler will collate these across different 749 // sequencers and display those collated statistics. 750 m_outstandReqHist.init(10); 751 m_latencyHist.init(10); 752 m_hitLatencyHist.init(10); 753 m_missLatencyHist.init(10); 754 755 for (int i = 0; i < RubyRequestType_NUM; i++) { 756 m_typeLatencyHist.push_back(new Stats::Histogram()); 757 m_typeLatencyHist[i]->init(10); 758 759 m_hitTypeLatencyHist.push_back(new Stats::Histogram()); 760 m_hitTypeLatencyHist[i]->init(10); 761 762 m_missTypeLatencyHist.push_back(new Stats::Histogram()); 763 m_missTypeLatencyHist[i]->init(10); 764 } 765 766 for (int i = 0; i < MachineType_NUM; i++) { 767 m_hitMachLatencyHist.push_back(new Stats::Histogram()); 768 m_hitMachLatencyHist[i]->init(10); 769 770 m_missMachLatencyHist.push_back(new Stats::Histogram()); 771 m_missMachLatencyHist[i]->init(10); 772 773 m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); 774 m_IssueToInitialDelayHist[i]->init(10); 775 776 m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); 777 m_InitialToForwardDelayHist[i]->init(10); 778 779 m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); 780 m_ForwardToFirstResponseDelayHist[i]->init(10); 781 782 m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); 783 m_FirstResponseToCompletionDelayHist[i]->init(10); 784 } 785 786 for (int i = 0; i < RubyRequestType_NUM; i++) { 787 m_hitTypeMachLatencyHist.push_back(std::vector<Stats::Histogram >()); 788* m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram >()); 789* 790 for (int j = 0; j < MachineType_NUM; j++) { 791 m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram()); 792 m_hitTypeMachLatencyHist[i][j]->init(10); 793 794 m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); 795 m_missTypeMachLatencyHist[i][j]->init(10); 796 } 797 } 798}	33#include "base/str.hh" 34#include "cpu/testers/rubytest/RubyTester.hh" 35#include "debug/MemoryAccess.hh" 36#include "debug/ProtocolTrace.hh" 37#include "debug/RubySequencer.hh" 38#include "debug/RubyStats.hh" 39#include "mem/packet.hh" 40#include "mem/protocol/PrefetchBit.hh" 41#include "mem/protocol/RubyAccessMode.hh" 42#include "mem/ruby/profiler/Profiler.hh" 43#include "mem/ruby/slicc_interface/RubyRequest.hh" 44#include "mem/ruby/system/RubySystem.hh" 45#include "sim/system.hh" 46 47using namespace std; 48 49Sequencer * 50RubySequencerParams::create() 51{ 52 return new Sequencer(this); 53} 54 55Sequencer::Sequencer(const Params p) 56 : RubyPort(p), m_IncompleteTimes(MachineType_NUM), 57 deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check") 58{ 59 m_outstanding_count = 0; 60 61 m_instCache_ptr = p->icache; 62 m_dataCache_ptr = p->dcache; 63 m_data_cache_hit_latency = p->dcache_hit_latency; 64 m_inst_cache_hit_latency = p->icache_hit_latency; 65 m_max_outstanding_requests = p->max_outstanding_requests; 66 m_deadlock_threshold = p->deadlock_threshold; 67 68 m_coreId = p->coreid; // for tracking the two CorePair sequencers 69 assert(m_max_outstanding_requests > 0); 70 assert(m_deadlock_threshold > 0); 71 assert(m_instCache_ptr != NULL); 72 assert(m_dataCache_ptr != NULL); 73 assert(m_data_cache_hit_latency > 0); 74 assert(m_inst_cache_hit_latency > 0); 75 76 m_runningGarnetStandalone = p->garnet_standalone; 77} 78 79Sequencer::~Sequencer() 80{ 81} 82 83void 84Sequencer::wakeup() 85{ 86 assert(drainState() != DrainState::Draining); 87 88 // Check for deadlock of any of the requests 89 Cycles current_time = curCycle(); 90 91 // Check across all outstanding requests 92 int total_outstanding = 0; 93 94 RequestTable::iterator read = m_readRequestTable.begin(); 95 RequestTable::iterator read_end = m_readRequestTable.end(); 96 for (; read != read_end; ++read) { 97 SequencerRequest request = read->second; 98 if (current_time - request->issue_time < m_deadlock_threshold) 99 continue; 100 101 panic("Possible Deadlock detected. Aborting!\n" 102 "version: %d request.paddr: 0x%x m_readRequestTable: %d " 103 "current time: %u issue_time: %d difference: %d\n", m_version, 104 request->pkt->getAddr(), m_readRequestTable.size(), 105 current_time * clockPeriod(), request->issue_time * clockPeriod(), 106 (current_time * clockPeriod()) - (request->issue_time * clockPeriod())); 107 } 108 109 RequestTable::iterator write = m_writeRequestTable.begin(); 110 RequestTable::iterator write_end = m_writeRequestTable.end(); 111 for (; write != write_end; ++write) { 112 SequencerRequest* request = write->second; 113 if (current_time - request->issue_time < m_deadlock_threshold) 114 continue; 115 116 panic("Possible Deadlock detected. Aborting!\n" 117 "version: %d request.paddr: 0x%x m_writeRequestTable: %d " 118 "current time: %u issue_time: %d difference: %d\n", m_version, 119 request->pkt->getAddr(), m_writeRequestTable.size(), 120 current_time * clockPeriod(), request->issue_time * clockPeriod(), 121 (current_time * clockPeriod()) - (request->issue_time * clockPeriod())); 122 } 123 124 total_outstanding += m_writeRequestTable.size(); 125 total_outstanding += m_readRequestTable.size(); 126 127 assert(m_outstanding_count == total_outstanding); 128 129 if (m_outstanding_count > 0) { 130 // If there are still outstanding requests, keep checking 131 schedule(deadlockCheckEvent, clockEdge(m_deadlock_threshold)); 132 } 133} 134 135void Sequencer::resetStats() 136{ 137 m_latencyHist.reset(); 138 m_hitLatencyHist.reset(); 139 m_missLatencyHist.reset(); 140 for (int i = 0; i < RubyRequestType_NUM; i++) { 141 m_typeLatencyHist[i]->reset(); 142 m_hitTypeLatencyHist[i]->reset(); 143 m_missTypeLatencyHist[i]->reset(); 144 for (int j = 0; j < MachineType_NUM; j++) { 145 m_hitTypeMachLatencyHist[i][j]->reset(); 146 m_missTypeMachLatencyHist[i][j]->reset(); 147 } 148 } 149 150 for (int i = 0; i < MachineType_NUM; i++) { 151 m_missMachLatencyHist[i]->reset(); 152 m_hitMachLatencyHist[i]->reset(); 153 154 m_IssueToInitialDelayHist[i]->reset(); 155 m_InitialToForwardDelayHist[i]->reset(); 156 m_ForwardToFirstResponseDelayHist[i]->reset(); 157 m_FirstResponseToCompletionDelayHist[i]->reset(); 158 159 m_IncompleteTimes[i] = 0; 160 } 161} 162 163// Insert the request on the correct request table. Return true if 164// the entry was already present. 165RequestStatus 166Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type) 167{ 168 assert(m_outstanding_count == 169 (m_writeRequestTable.size() + m_readRequestTable.size())); 170 171 // See if we should schedule a deadlock check 172 if (!deadlockCheckEvent.scheduled() && 173 drainState() != DrainState::Draining) { 174 schedule(deadlockCheckEvent, clockEdge(m_deadlock_threshold)); 175 } 176 177 Addr line_addr = makeLineAddress(pkt->getAddr()); 178 179 // Check if the line is blocked for a Locked_RMW 180 if (m_controller->isBlocked(line_addr) && 181 (request_type != RubyRequestType_Locked_RMW_Write)) { 182 // Return that this request's cache line address aliases with 183 // a prior request that locked the cache line. The request cannot 184 // proceed until the cache line is unlocked by a Locked_RMW_Write 185 return RequestStatus_Aliased; 186 } 187 188 // Create a default entry, mapping the address to NULL, the cast is 189 // there to make gcc 4.4 happy 190 RequestTable::value_type default_entry(line_addr, 191 (SequencerRequest) NULL); 192* 193 if ((request_type == RubyRequestType_ST) \|\| 194 (request_type == RubyRequestType_RMW_Read) \|\| 195 (request_type == RubyRequestType_RMW_Write) \|\| 196 (request_type == RubyRequestType_Load_Linked) \|\| 197 (request_type == RubyRequestType_Store_Conditional) \|\| 198 (request_type == RubyRequestType_Locked_RMW_Read) \|\| 199 (request_type == RubyRequestType_Locked_RMW_Write) \|\| 200 (request_type == RubyRequestType_FLUSH)) { 201 202 // Check if there is any outstanding read request for the same 203 // cache line. 204 if (m_readRequestTable.count(line_addr) > 0) { 205 m_store_waiting_on_load++; 206 return RequestStatus_Aliased; 207 } 208 209 pair<RequestTable::iterator, bool> r = 210 m_writeRequestTable.insert(default_entry); 211 if (r.second) { 212 RequestTable::iterator i = r.first; 213 i->second = new SequencerRequest(pkt, request_type, curCycle()); 214 m_outstanding_count++; 215 } else { 216 // There is an outstanding write request for the cache line 217 m_store_waiting_on_store++; 218 return RequestStatus_Aliased; 219 } 220 } else { 221 // Check if there is any outstanding write request for the same 222 // cache line. 223 if (m_writeRequestTable.count(line_addr) > 0) { 224 m_load_waiting_on_store++; 225 return RequestStatus_Aliased; 226 } 227 228 pair<RequestTable::iterator, bool> r = 229 m_readRequestTable.insert(default_entry); 230 231 if (r.second) { 232 RequestTable::iterator i = r.first; 233 i->second = new SequencerRequest(pkt, request_type, curCycle()); 234 m_outstanding_count++; 235 } else { 236 // There is an outstanding read request for the cache line 237 m_load_waiting_on_load++; 238 return RequestStatus_Aliased; 239 } 240 } 241 242 m_outstandReqHist.sample(m_outstanding_count); 243 assert(m_outstanding_count == 244 (m_writeRequestTable.size() + m_readRequestTable.size())); 245 246 return RequestStatus_Ready; 247} 248 249void 250Sequencer::markRemoved() 251{ 252 m_outstanding_count--; 253 assert(m_outstanding_count == 254 m_writeRequestTable.size() + m_readRequestTable.size()); 255} 256 257void 258Sequencer::invalidateSC(Addr address) 259{ 260 AbstractCacheEntry e = m_dataCache_ptr->lookup(address); 261* // The controller has lost the coherence permissions, hence the lock 262 // on the cache line maintained by the cache should be cleared. 263 if (e && e->isLocked(m_version)) { 264 e->clearLocked(); 265 } 266} 267 268bool 269Sequencer::handleLlsc(Addr address, SequencerRequest* request) 270{ 271 AbstractCacheEntry e = m_dataCache_ptr->lookup(address); 272* if (!e) 273 return true; 274 275 // The success flag indicates whether the LLSC operation was successful. 276 // LL ops will always succeed, but SC may fail if the cache line is no 277 // longer locked. 278 bool success = true; 279 if (request->m_type == RubyRequestType_Store_Conditional) { 280 if (!e->isLocked(m_version)) { 281 // 282 // For failed SC requests, indicate the failure to the cpu by 283 // setting the extra data to zero. 284 // 285 request->pkt->req->setExtraData(0); 286 success = false; 287 } else { 288 // 289 // For successful SC requests, indicate the success to the cpu by 290 // setting the extra data to one. 291 // 292 request->pkt->req->setExtraData(1); 293 } 294 // 295 // Independent of success, all SC operations must clear the lock 296 // 297 e->clearLocked(); 298 } else if (request->m_type == RubyRequestType_Load_Linked) { 299 // 300 // Note: To fully follow Alpha LLSC semantics, should the LL clear any 301 // previously locked cache lines? 302 // 303 e->setLocked(m_version); 304 } else if (e->isLocked(m_version)) { 305 // 306 // Normal writes should clear the locked address 307 // 308 e->clearLocked(); 309 } 310 return success; 311} 312 313void 314Sequencer::recordMissLatency(const Cycles cycles, const RubyRequestType type, 315 const MachineType respondingMach, 316 bool isExternalHit, Cycles issuedTime, 317 Cycles initialRequestTime, 318 Cycles forwardRequestTime, 319 Cycles firstResponseTime, Cycles completionTime) 320{ 321 m_latencyHist.sample(cycles); 322 m_typeLatencyHist[type]->sample(cycles); 323 324 if (isExternalHit) { 325 m_missLatencyHist.sample(cycles); 326 m_missTypeLatencyHist[type]->sample(cycles); 327 328 if (respondingMach != MachineType_NUM) { 329 m_missMachLatencyHist[respondingMach]->sample(cycles); 330 m_missTypeMachLatencyHist[type][respondingMach]->sample(cycles); 331 332 if ((issuedTime <= initialRequestTime) && 333 (initialRequestTime <= forwardRequestTime) && 334 (forwardRequestTime <= firstResponseTime) && 335 (firstResponseTime <= completionTime)) { 336 337 m_IssueToInitialDelayHist[respondingMach]->sample( 338 initialRequestTime - issuedTime); 339 m_InitialToForwardDelayHist[respondingMach]->sample( 340 forwardRequestTime - initialRequestTime); 341 m_ForwardToFirstResponseDelayHist[respondingMach]->sample( 342 firstResponseTime - forwardRequestTime); 343 m_FirstResponseToCompletionDelayHist[respondingMach]->sample( 344 completionTime - firstResponseTime); 345 } else { 346 m_IncompleteTimes[respondingMach]++; 347 } 348 } 349 } else { 350 m_hitLatencyHist.sample(cycles); 351 m_hitTypeLatencyHist[type]->sample(cycles); 352 353 if (respondingMach != MachineType_NUM) { 354 m_hitMachLatencyHist[respondingMach]->sample(cycles); 355 m_hitTypeMachLatencyHist[type][respondingMach]->sample(cycles); 356 } 357 } 358} 359 360void 361Sequencer::writeCallback(Addr address, DataBlock& data, 362 const bool externalHit, const MachineType mach, 363 const Cycles initialRequestTime, 364 const Cycles forwardRequestTime, 365 const Cycles firstResponseTime) 366{ 367 assert(address == makeLineAddress(address)); 368 assert(m_writeRequestTable.count(makeLineAddress(address))); 369 370 RequestTable::iterator i = m_writeRequestTable.find(address); 371 assert(i != m_writeRequestTable.end()); 372 SequencerRequest* request = i->second; 373 374 m_writeRequestTable.erase(i); 375 markRemoved(); 376 377 assert((request->m_type == RubyRequestType_ST) \|\| 378 (request->m_type == RubyRequestType_ATOMIC) \|\| 379 (request->m_type == RubyRequestType_RMW_Read) \|\| 380 (request->m_type == RubyRequestType_RMW_Write) \|\| 381 (request->m_type == RubyRequestType_Load_Linked) \|\| 382 (request->m_type == RubyRequestType_Store_Conditional) \|\| 383 (request->m_type == RubyRequestType_Locked_RMW_Read) \|\| 384 (request->m_type == RubyRequestType_Locked_RMW_Write) \|\| 385 (request->m_type == RubyRequestType_FLUSH)); 386 387 // 388 // For Alpha, properly handle LL, SC, and write requests with respect to 389 // locked cache blocks. 390 // 391 // Not valid for Garnet_standalone protocl 392 // 393 bool success = true; 394 if (!m_runningGarnetStandalone) 395 success = handleLlsc(address, request); 396 397 // Handle SLICC block_on behavior for Locked_RMW accesses. NOTE: the 398 // address variable here is assumed to be a line address, so when 399 // blocking buffers, must check line addresses. 400 if (request->m_type == RubyRequestType_Locked_RMW_Read) { 401 // blockOnQueue blocks all first-level cache controller queues 402 // waiting on memory accesses for the specified address that go to 403 // the specified queue. In this case, a Locked_RMW_Write must go to 404 // the mandatory_q before unblocking the first-level controller. 405 // This will block standard loads, stores, ifetches, etc. 406 m_controller->blockOnQueue(address, m_mandatory_q_ptr); 407 } else if (request->m_type == RubyRequestType_Locked_RMW_Write) { 408 m_controller->unblock(address); 409 } 410 411 hitCallback(request, data, success, mach, externalHit, 412 initialRequestTime, forwardRequestTime, firstResponseTime); 413} 414 415void 416Sequencer::readCallback(Addr address, DataBlock& data, 417 bool externalHit, const MachineType mach, 418 Cycles initialRequestTime, 419 Cycles forwardRequestTime, 420 Cycles firstResponseTime) 421{ 422 assert(address == makeLineAddress(address)); 423 assert(m_readRequestTable.count(makeLineAddress(address))); 424 425 RequestTable::iterator i = m_readRequestTable.find(address); 426 assert(i != m_readRequestTable.end()); 427 SequencerRequest* request = i->second; 428 429 m_readRequestTable.erase(i); 430 markRemoved(); 431 432 assert((request->m_type == RubyRequestType_LD) \|\| 433 (request->m_type == RubyRequestType_IFETCH)); 434 435 hitCallback(request, data, true, mach, externalHit, 436 initialRequestTime, forwardRequestTime, firstResponseTime); 437} 438 439void 440Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, 441 bool llscSuccess, 442 const MachineType mach, const bool externalHit, 443 const Cycles initialRequestTime, 444 const Cycles forwardRequestTime, 445 const Cycles firstResponseTime) 446{ 447 warn_once("Replacement policy updates recently became the responsibility " 448 "of SLICC state machines. Make sure to setMRU() near callbacks " 449 "in .sm files!"); 450 451 PacketPtr pkt = srequest->pkt; 452 Addr request_address(pkt->getAddr()); 453 RubyRequestType type = srequest->m_type; 454 Cycles issued_time = srequest->issue_time; 455 456 assert(curCycle() >= issued_time); 457 Cycles total_latency = curCycle() - issued_time; 458 459 // Profile the latency for all demand accesses. 460 recordMissLatency(total_latency, type, mach, externalHit, issued_time, 461 initialRequestTime, forwardRequestTime, 462 firstResponseTime, curCycle()); 463 464 DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %d cycles\n", 465 curTick(), m_version, "Seq", 466 llscSuccess ? "Done" : "SC_Failed", "", "", 467 printAddress(request_address), total_latency); 468 469 // update the data unless it is a non-data-carrying flush 470 if (RubySystem::getWarmupEnabled()) { 471 data.setData(pkt->getConstPtr<uint8_t>(), 472 getOffset(request_address), pkt->getSize()); 473 } else if (!pkt->isFlush()) { 474 if ((type == RubyRequestType_LD) \|\| 475 (type == RubyRequestType_IFETCH) \|\| 476 (type == RubyRequestType_RMW_Read) \|\| 477 (type == RubyRequestType_Locked_RMW_Read) \|\| 478 (type == RubyRequestType_Load_Linked)) { 479 memcpy(pkt->getPtr<uint8_t>(), 480 data.getData(getOffset(request_address), pkt->getSize()), 481 pkt->getSize()); 482 DPRINTF(RubySequencer, "read data %s\n", data); 483 } else if (pkt->req->isSwap()) { 484 std::vector<uint8_t> overwrite_val(pkt->getSize()); 485 memcpy(&overwrite_val[0], pkt->getConstPtr<uint8_t>(), 486 pkt->getSize()); 487 memcpy(pkt->getPtr<uint8_t>(), 488 data.getData(getOffset(request_address), pkt->getSize()), 489 pkt->getSize()); 490 data.setData(&overwrite_val[0], 491 getOffset(request_address), pkt->getSize()); 492 DPRINTF(RubySequencer, "swap data %s\n", data); 493 } else if (type != RubyRequestType_Store_Conditional \|\| llscSuccess) { 494 // Types of stores set the actual data here, apart from 495 // failed Store Conditional requests 496 data.setData(pkt->getConstPtr<uint8_t>(), 497 getOffset(request_address), pkt->getSize()); 498 DPRINTF(RubySequencer, "set data %s\n", data); 499 } 500 } 501 502 // If using the RubyTester, update the RubyTester sender state's 503 // subBlock with the recieved data. The tester will later access 504 // this state. 505 if (m_usingRubyTester) { 506 DPRINTF(RubySequencer, "hitCallback %s 0x%x using RubyTester\n", 507 pkt->cmdString(), pkt->getAddr()); 508 RubyTester::SenderState* testerSenderState = 509 pkt->findNextSenderState<RubyTester::SenderState>(); 510 assert(testerSenderState); 511 testerSenderState->subBlock.mergeFrom(data); 512 } 513 514 delete srequest; 515 516 RubySystem rs = m_ruby_system; 517* if (RubySystem::getWarmupEnabled()) { 518 assert(pkt->req); 519 delete pkt->req; 520 delete pkt; 521 rs->m_cache_recorder->enqueueNextFetchRequest(); 522 } else if (RubySystem::getCooldownEnabled()) { 523 delete pkt; 524 rs->m_cache_recorder->enqueueNextFlushRequest(); 525 } else { 526 ruby_hit_callback(pkt); 527 testDrainComplete(); 528 } 529} 530 531bool 532Sequencer::empty() const 533{ 534 return m_writeRequestTable.empty() && m_readRequestTable.empty(); 535} 536 537RequestStatus 538Sequencer::makeRequest(PacketPtr pkt) 539{ 540 if (m_outstanding_count >= m_max_outstanding_requests) { 541 return RequestStatus_BufferFull; 542 } 543 544 RubyRequestType primary_type = RubyRequestType_NULL; 545 RubyRequestType secondary_type = RubyRequestType_NULL; 546 547 if (pkt->isLLSC()) { 548 // 549 // Alpha LL/SC instructions need to be handled carefully by the cache 550 // coherence protocol to ensure they follow the proper semantics. In 551 // particular, by identifying the operations as atomic, the protocol 552 // should understand that migratory sharing optimizations should not 553 // be performed (i.e. a load between the LL and SC should not steal 554 // away exclusive permission). 555 // 556 if (pkt->isWrite()) { 557 DPRINTF(RubySequencer, "Issuing SC\n"); 558 primary_type = RubyRequestType_Store_Conditional; 559 } else { 560 DPRINTF(RubySequencer, "Issuing LL\n"); 561 assert(pkt->isRead()); 562 primary_type = RubyRequestType_Load_Linked; 563 } 564 secondary_type = RubyRequestType_ATOMIC; 565 } else if (pkt->req->isLockedRMW()) { 566 // 567 // x86 locked instructions are translated to store cache coherence 568 // requests because these requests should always be treated as read 569 // exclusive operations and should leverage any migratory sharing 570 // optimization built into the protocol. 571 // 572 if (pkt->isWrite()) { 573 DPRINTF(RubySequencer, "Issuing Locked RMW Write\n"); 574 primary_type = RubyRequestType_Locked_RMW_Write; 575 } else { 576 DPRINTF(RubySequencer, "Issuing Locked RMW Read\n"); 577 assert(pkt->isRead()); 578 primary_type = RubyRequestType_Locked_RMW_Read; 579 } 580 secondary_type = RubyRequestType_ST; 581 } else { 582 // 583 // To support SwapReq, we need to check isWrite() first: a SwapReq 584 // should always be treated like a write, but since a SwapReq implies 585 // both isWrite() and isRead() are true, check isWrite() first here. 586 // 587 if (pkt->isWrite()) { 588 // 589 // Note: M5 packets do not differentiate ST from RMW_Write 590 // 591 primary_type = secondary_type = RubyRequestType_ST; 592 } else if (pkt->isRead()) { 593 if (pkt->req->isInstFetch()) { 594 primary_type = secondary_type = RubyRequestType_IFETCH; 595 } else { 596 bool storeCheck = false; 597 // only X86 need the store check 598 if (system->getArch() == Arch::X86ISA) { 599 uint32_t flags = pkt->req->getFlags(); 600 storeCheck = flags & 601 (X86ISA::StoreCheck << X86ISA::FlagShift); 602 } 603 if (storeCheck) { 604 primary_type = RubyRequestType_RMW_Read; 605 secondary_type = RubyRequestType_ST; 606 } else { 607 primary_type = secondary_type = RubyRequestType_LD; 608 } 609 } 610 } else if (pkt->isFlush()) { 611 primary_type = secondary_type = RubyRequestType_FLUSH; 612 } else { 613 panic("Unsupported ruby packet type\n"); 614 } 615 } 616 617 RequestStatus status = insertRequest(pkt, primary_type); 618 if (status != RequestStatus_Ready) 619 return status; 620 621 issueRequest(pkt, secondary_type); 622 623 // TODO: issue hardware prefetches here 624 return RequestStatus_Issued; 625} 626 627void 628Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) 629{ 630 assert(pkt != NULL); 631 ContextID proc_id = pkt->req->hasContextId() ? 632 pkt->req->contextId() : InvalidContextID; 633 634 ContextID core_id = coreId(); 635 636 // If valid, copy the pc to the ruby request 637 Addr pc = 0; 638 if (pkt->req->hasPC()) { 639 pc = pkt->req->getPC(); 640 } 641 642 // check if the packet has data as for example prefetch and flush 643 // requests do not 644 std::shared_ptr<RubyRequest> msg = 645 std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(), 646 pkt->isFlush() ? 647 nullptr : pkt->getPtr<uint8_t>(), 648 pkt->getSize(), pc, secondary_type, 649 RubyAccessMode_Supervisor, pkt, 650 PrefetchBit_No, proc_id, core_id); 651 652 DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\n", 653 curTick(), m_version, "Seq", "Begin", "", "", 654 printAddress(msg->getPhysicalAddress()), 655 RubyRequestType_to_string(secondary_type)); 656 657 // The Sequencer currently assesses instruction and data cache hit latency 658 // for the top-level caches at the beginning of a memory access. 659 // TODO: Eventually, this latency should be moved to represent the actual 660 // cache access latency portion of the memory access. This will require 661 // changing cache controller protocol files to assess the latency on the 662 // access response path. 663 Cycles latency(0); // Initialize to zero to catch misconfigured latency 664 if (secondary_type == RubyRequestType_IFETCH) 665 latency = m_inst_cache_hit_latency; 666 else 667 latency = m_data_cache_hit_latency; 668 669 // Send the message to the cache controller 670 assert(latency > 0); 671 672 assert(m_mandatory_q_ptr != NULL); 673 m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(latency)); 674} 675 676template <class KEY, class VALUE> 677std::ostream & 678operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map) 679{ 680 auto i = map.begin(); 681 auto end = map.end(); 682 683 out << "["; 684 for (; i != end; ++i) 685 out << " " << i->first << "=" << i->second; 686 out << " ]"; 687 688 return out; 689} 690 691void 692Sequencer::print(ostream& out) const 693{ 694 out << "[Sequencer: " << m_version 695 << ", outstanding requests: " << m_outstanding_count 696 << ", read request table: " << m_readRequestTable 697 << ", write request table: " << m_writeRequestTable 698 << "]"; 699} 700 701// this can be called from setState whenever coherence permissions are 702// upgraded when invoked, coherence violations will be checked for the 703// given block 704void 705Sequencer::checkCoherence(Addr addr) 706{ 707#ifdef CHECK_COHERENCE 708 m_ruby_system->checkGlobalCoherenceInvariant(addr); 709#endif 710} 711 712void 713Sequencer::recordRequestType(SequencerRequestType requestType) { 714 DPRINTF(RubyStats, "Recorded statistic: %s\n", 715 SequencerRequestType_to_string(requestType)); 716} 717 718 719void 720Sequencer::evictionCallback(Addr address) 721{ 722 ruby_eviction_callback(address); 723} 724 725void 726Sequencer::regStats() 727{ 728 RubyPort::regStats(); 729 730 m_store_waiting_on_load 731 .name(name() + ".store_waiting_on_load") 732 .desc("Number of times a store aliased with a pending load") 733 .flags(Stats::nozero); 734 m_store_waiting_on_store 735 .name(name() + ".store_waiting_on_store") 736 .desc("Number of times a store aliased with a pending store") 737 .flags(Stats::nozero); 738 m_load_waiting_on_load 739 .name(name() + ".load_waiting_on_load") 740 .desc("Number of times a load aliased with a pending load") 741 .flags(Stats::nozero); 742 m_load_waiting_on_store 743 .name(name() + ".load_waiting_on_store") 744 .desc("Number of times a load aliased with a pending store") 745 .flags(Stats::nozero); 746 747 // These statistical variables are not for display. 748 // The profiler will collate these across different 749 // sequencers and display those collated statistics. 750 m_outstandReqHist.init(10); 751 m_latencyHist.init(10); 752 m_hitLatencyHist.init(10); 753 m_missLatencyHist.init(10); 754 755 for (int i = 0; i < RubyRequestType_NUM; i++) { 756 m_typeLatencyHist.push_back(new Stats::Histogram()); 757 m_typeLatencyHist[i]->init(10); 758 759 m_hitTypeLatencyHist.push_back(new Stats::Histogram()); 760 m_hitTypeLatencyHist[i]->init(10); 761 762 m_missTypeLatencyHist.push_back(new Stats::Histogram()); 763 m_missTypeLatencyHist[i]->init(10); 764 } 765 766 for (int i = 0; i < MachineType_NUM; i++) { 767 m_hitMachLatencyHist.push_back(new Stats::Histogram()); 768 m_hitMachLatencyHist[i]->init(10); 769 770 m_missMachLatencyHist.push_back(new Stats::Histogram()); 771 m_missMachLatencyHist[i]->init(10); 772 773 m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); 774 m_IssueToInitialDelayHist[i]->init(10); 775 776 m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); 777 m_InitialToForwardDelayHist[i]->init(10); 778 779 m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); 780 m_ForwardToFirstResponseDelayHist[i]->init(10); 781 782 m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); 783 m_FirstResponseToCompletionDelayHist[i]->init(10); 784 } 785 786 for (int i = 0; i < RubyRequestType_NUM; i++) { 787 m_hitTypeMachLatencyHist.push_back(std::vector<Stats::Histogram >()); 788* m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram >()); 789* 790 for (int j = 0; j < MachineType_NUM; j++) { 791 m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram()); 792 m_hitTypeMachLatencyHist[i][j]->init(10); 793 794 m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); 795 m_missTypeMachLatencyHist[i][j]->init(10); 796 } 797 } 798}