RubySystem.cc revision 10991
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33#include <list> 34 35#include "base/intmath.hh" 36#include "base/statistics.hh" 37#include "debug/RubyCacheTrace.hh" 38#include "debug/RubySystem.hh" 39#include "mem/ruby/common/Address.hh" 40#include "mem/ruby/network/Network.hh" 41#include "mem/ruby/system/System.hh" 42#include "mem/simple_mem.hh" 43#include "sim/eventq.hh" 44#include "sim/simulate.hh" 45 46using namespace std; 47 48int RubySystem::m_random_seed; 49bool RubySystem::m_randomization; 50uint32_t RubySystem::m_block_size_bytes; 51uint32_t RubySystem::m_block_size_bits; 52uint32_t RubySystem::m_memory_size_bits; 53bool RubySystem::m_warmup_enabled = false; 54// To look forward to allowing multiple RubySystem instances, track the number 55// of RubySystems that need to be warmed up on checkpoint restore. 56unsigned RubySystem::m_systems_to_warmup = 0; 57bool RubySystem::m_cooldown_enabled = false; 58 59RubySystem::RubySystem(const Params *p) 60 : ClockedObject(p), m_access_backing_store(p->access_backing_store), 61 m_cache_recorder(NULL) 62{ 63 m_random_seed = p->random_seed; 64 srandom(m_random_seed); 65 m_randomization = p->randomization; 66 67 m_block_size_bytes = p->block_size_bytes; 68 assert(isPowerOf2(m_block_size_bytes)); 69 m_block_size_bits = floorLog2(m_block_size_bytes); 70 m_memory_size_bits = p->memory_size_bits; 71 72 // Resize to the size of different machine types 73 m_abstract_controls.resize(MachineType_NUM); 74 75 // Collate the statistics before they are printed. 76 Stats::registerDumpCallback(new RubyStatsCallback(this)); 77 // Create the profiler 78 m_profiler = new Profiler(p, this); 79 m_phys_mem = p->phys_mem; 80} 81 82void 83RubySystem::registerNetwork(Network* network_ptr) 84{ 85 m_network = network_ptr; 86} 87 88void 89RubySystem::registerAbstractController(AbstractController* cntrl) 90{ 91 m_abs_cntrl_vec.push_back(cntrl); 92 93 MachineID id = cntrl->getMachineID(); 94 m_abstract_controls[id.getType()][id.getNum()] = cntrl; 95} 96 97RubySystem::~RubySystem() 98{ 99 delete m_network; 100 delete m_profiler; 101} 102 103void 104RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace, 105 uint64 cache_trace_size, 106 uint64 block_size_bytes) 107{ 108 vector<Sequencer*> sequencer_map; 109 Sequencer* sequencer_ptr = NULL; 110 111 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 112 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 113 if (sequencer_ptr == NULL) { 114 sequencer_ptr = sequencer_map[cntrl]; 115 } 116 } 117 118 assert(sequencer_ptr != NULL); 119 120 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 121 if (sequencer_map[cntrl] == NULL) { 122 sequencer_map[cntrl] = sequencer_ptr; 123 } 124 } 125 126 // Remove the old CacheRecorder if it's still hanging about. 127 if (m_cache_recorder != NULL) { 128 delete m_cache_recorder; 129 } 130 131 // Create the CacheRecorder and record the cache trace 132 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 133 sequencer_map, block_size_bytes); 134} 135 136void 137RubySystem::memWriteback() 138{ 139 m_cooldown_enabled = true; 140 141 // Make the trace so we know what to write back. 142 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 143 makeCacheRecorder(NULL, 0, getBlockSizeBytes()); 144 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 145 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 146 } 147 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 148 149 // save the current tick value 150 Tick curtick_original = curTick(); 151 DPRINTF(RubyCacheTrace, "Recording current tick %ld\n", curtick_original); 152 153 // Deschedule all prior events on the event queue, but record the tick they 154 // were scheduled at so they can be restored correctly later. 155 list<pair<Event*, Tick> > original_events; 156 while (!eventq->empty()) { 157 Event *curr_head = eventq->getHead(); 158 if (curr_head->isAutoDelete()) { 159 DPRINTF(RubyCacheTrace, "Event %s auto-deletes when descheduled," 160 " not recording\n", curr_head->name()); 161 } else { 162 original_events.push_back(make_pair(curr_head, curr_head->when())); 163 } 164 eventq->deschedule(curr_head); 165 } 166 167 // Schedule an event to start cache cooldown 168 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 169 enqueueRubyEvent(curTick()); 170 simulate(); 171 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 172 173 // Deschedule any events left on the event queue. 174 while (!eventq->empty()) { 175 eventq->deschedule(eventq->getHead()); 176 } 177 178 // Restore curTick 179 setCurTick(curtick_original); 180 181 // Restore all events that were originally on the event queue. This is 182 // done after setting curTick back to its original value so that events do 183 // not seem to be scheduled in the past. 184 while (!original_events.empty()) { 185 pair<Event*, Tick> event = original_events.back(); 186 eventq->schedule(event.first, event.second); 187 original_events.pop_back(); 188 } 189 190 // No longer flushing back to memory. 191 m_cooldown_enabled = false; 192 193 // There are several issues with continuing simulation after calling 194 // memWriteback() at the moment, that stem from taking events off the 195 // queue, simulating again, and then putting them back on, whilst 196 // pretending that no time has passed. One is that some events will have 197 // been deleted, so can't be put back. Another is that any object 198 // recording the tick something happens may end up storing a tick in the 199 // future. A simple warning here alerts the user that things may not work 200 // as expected. 201 warn_once("Ruby memory writeback is experimental. Continuing simulation " 202 "afterwards may not always work as intended."); 203 204 // Keep the cache recorder around so that we can dump the trace if a 205 // checkpoint is immediately taken. 206} 207 208void 209RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 210 uint64 uncompressed_trace_size) 211{ 212 // Create the checkpoint file for the memory 213 string thefile = CheckpointIn::dir() + "/" + filename.c_str(); 214 215 int fd = creat(thefile.c_str(), 0664); 216 if (fd < 0) { 217 perror("creat"); 218 fatal("Can't open memory trace file '%s'\n", filename); 219 } 220 221 gzFile compressedMemory = gzdopen(fd, "wb"); 222 if (compressedMemory == NULL) 223 fatal("Insufficient memory to allocate compression state for %s\n", 224 filename); 225 226 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 227 uncompressed_trace_size) { 228 fatal("Write failed on memory trace file '%s'\n", filename); 229 } 230 231 if (gzclose(compressedMemory)) { 232 fatal("Close failed on memory trace file '%s'\n", filename); 233 } 234 delete[] raw_data; 235} 236 237void 238RubySystem::serializeOld(CheckpointOut &cp) 239{ 240 // Store the cache-block size, so we are able to restore on systems with a 241 // different cache-block size. CacheRecorder depends on the correct 242 // cache-block size upon unserializing. 243 uint64 block_size_bytes = getBlockSizeBytes(); 244 SERIALIZE_SCALAR(block_size_bytes); 245 246 // Check that there's a valid trace to use. If not, then memory won't be 247 // up-to-date and the simulation will probably fail when restoring from the 248 // checkpoint. 249 if (m_cache_recorder == NULL) { 250 fatal("Call memWriteback() before serialize() to create ruby trace"); 251 } 252 253 // Aggregate the trace entries together into a single array 254 uint8_t *raw_data = new uint8_t[4096]; 255 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 256 4096); 257 string cache_trace_file = name() + ".cache.gz"; 258 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 259 260 SERIALIZE_SCALAR(cache_trace_file); 261 SERIALIZE_SCALAR(cache_trace_size); 262 263 // Now finished with the cache recorder. 264 delete m_cache_recorder; 265 m_cache_recorder = NULL; 266} 267 268void 269RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 270 uint64& uncompressed_trace_size) 271{ 272 // Read the trace file 273 gzFile compressedTrace; 274 275 // trace file 276 int fd = open(filename.c_str(), O_RDONLY); 277 if (fd < 0) { 278 perror("open"); 279 fatal("Unable to open trace file %s", filename); 280 } 281 282 compressedTrace = gzdopen(fd, "rb"); 283 if (compressedTrace == NULL) { 284 fatal("Insufficient memory to allocate compression state for %s\n", 285 filename); 286 } 287 288 raw_data = new uint8_t[uncompressed_trace_size]; 289 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 290 uncompressed_trace_size) { 291 fatal("Unable to read complete trace from file %s\n", filename); 292 } 293 294 if (gzclose(compressedTrace)) { 295 fatal("Failed to close cache trace file '%s'\n", filename); 296 } 297} 298 299void 300RubySystem::unserialize(CheckpointIn &cp) 301{ 302 uint8_t *uncompressed_trace = NULL; 303 304 // This value should be set to the checkpoint-system's block-size. 305 // Optional, as checkpoints without it can be run if the 306 // checkpoint-system's block-size == current block-size. 307 uint64 block_size_bytes = getBlockSizeBytes(); 308 UNSERIALIZE_OPT_SCALAR(block_size_bytes); 309 310 string cache_trace_file; 311 uint64 cache_trace_size = 0; 312 313 UNSERIALIZE_SCALAR(cache_trace_file); 314 UNSERIALIZE_SCALAR(cache_trace_size); 315 cache_trace_file = cp.cptDir + "/" + cache_trace_file; 316 317 readCompressedTrace(cache_trace_file, uncompressed_trace, 318 cache_trace_size); 319 m_warmup_enabled = true; 320 m_systems_to_warmup++; 321 322 // Create the cache recorder that will hang around until startup. 323 makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes); 324} 325 326void 327RubySystem::startup() 328{ 329 330 // Ruby restores state from a checkpoint by resetting the clock to 0 and 331 // playing the requests that can possibly re-generate the cache state. 332 // The clock value is set to the actual checkpointed value once all the 333 // requests have been executed. 334 // 335 // This way of restoring state is pretty finicky. For example, if a 336 // Ruby component reads time before the state has been restored, it would 337 // cache this value and hence its clock would not be reset to 0, when 338 // Ruby resets the global clock. This can potentially result in a 339 // deadlock. 340 // 341 // The solution is that no Ruby component should read time before the 342 // simulation starts. And then one also needs to hope that the time 343 // Ruby finishes restoring the state is less than the time when the 344 // state was checkpointed. 345 346 if (m_warmup_enabled) { 347 DPRINTF(RubyCacheTrace, "Starting ruby cache warmup\n"); 348 // save the current tick value 349 Tick curtick_original = curTick(); 350 // save the event queue head 351 Event* eventq_head = eventq->replaceHead(NULL); 352 // set curTick to 0 and reset Ruby System's clock 353 setCurTick(0); 354 resetClock(); 355 356 // Schedule an event to start cache warmup 357 enqueueRubyEvent(curTick()); 358 simulate(); 359 360 delete m_cache_recorder; 361 m_cache_recorder = NULL; 362 m_systems_to_warmup--; 363 if (m_systems_to_warmup == 0) { 364 m_warmup_enabled = false; 365 } 366 367 // Restore eventq head 368 eventq_head = eventq->replaceHead(eventq_head); 369 // Restore curTick and Ruby System's clock 370 setCurTick(curtick_original); 371 resetClock(); 372 } 373 374 resetStats(); 375} 376 377void 378RubySystem::RubyEvent::process() 379{ 380 if (RubySystem::getWarmupEnabled()) { 381 m_ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 382 } else if (RubySystem::getCooldownEnabled()) { 383 m_ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 384 } 385} 386 387void 388RubySystem::resetStats() 389{ 390 m_start_cycle = curCycle(); 391} 392 393bool 394RubySystem::functionalRead(PacketPtr pkt) 395{ 396 Address address(pkt->getAddr()); 397 Address line_address(address); 398 line_address.makeLineAddress(); 399 400 AccessPermission access_perm = AccessPermission_NotPresent; 401 int num_controllers = m_abs_cntrl_vec.size(); 402 403 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 404 405 unsigned int num_ro = 0; 406 unsigned int num_rw = 0; 407 unsigned int num_busy = 0; 408 unsigned int num_backing_store = 0; 409 unsigned int num_invalid = 0; 410 411 // In this loop we count the number of controllers that have the given 412 // address in read only, read write and busy states. 413 for (unsigned int i = 0; i < num_controllers; ++i) { 414 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 415 if (access_perm == AccessPermission_Read_Only) 416 num_ro++; 417 else if (access_perm == AccessPermission_Read_Write) 418 num_rw++; 419 else if (access_perm == AccessPermission_Busy) 420 num_busy++; 421 else if (access_perm == AccessPermission_Backing_Store) 422 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 423 // to represent blocks in memory *for Broadcast/Snooping protocols*, 424 // where memory has no idea whether it has an exclusive copy of data 425 // or not. 426 num_backing_store++; 427 else if (access_perm == AccessPermission_Invalid || 428 access_perm == AccessPermission_NotPresent) 429 num_invalid++; 430 } 431 assert(num_rw <= 1); 432 433 // This if case is meant to capture what happens in a Broadcast/Snoop 434 // protocol where the block does not exist in the cache hierarchy. You 435 // only want to read from the Backing_Store memory if there is no copy in 436 // the cache hierarchy, otherwise you want to try to read the RO or RW 437 // copies existing in the cache hierarchy (covered by the else statement). 438 // The reason is because the Backing_Store memory could easily be stale, if 439 // there are copies floating around the cache hierarchy, so you want to read 440 // it only if it's not in the cache hierarchy at all. 441 if (num_invalid == (num_controllers - 1) && num_backing_store == 1) { 442 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 443 for (unsigned int i = 0; i < num_controllers; ++i) { 444 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 445 if (access_perm == AccessPermission_Backing_Store) { 446 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 447 return true; 448 } 449 } 450 } else if (num_ro > 0 || num_rw == 1) { 451 // In Broadcast/Snoop protocols, this covers if you know the block 452 // exists somewhere in the caching hierarchy, then you want to read any 453 // valid RO or RW block. In directory protocols, same thing, you want 454 // to read any valid readable copy of the block. 455 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 456 num_busy, num_ro, num_rw); 457 // In this loop, we try to figure which controller has a read only or 458 // a read write copy of the given address. Any valid copy would suffice 459 // for a functional read. 460 for (unsigned int i = 0;i < num_controllers;++i) { 461 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 462 if (access_perm == AccessPermission_Read_Only || 463 access_perm == AccessPermission_Read_Write) { 464 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 465 return true; 466 } 467 } 468 } 469 470 return false; 471} 472 473// The function searches through all the buffers that exist in different 474// cache, directory and memory controllers, and in the network components 475// and writes the data portion of those that hold the address specified 476// in the packet. 477bool 478RubySystem::functionalWrite(PacketPtr pkt) 479{ 480 Address addr(pkt->getAddr()); 481 Address line_addr = line_address(addr); 482 AccessPermission access_perm = AccessPermission_NotPresent; 483 int num_controllers = m_abs_cntrl_vec.size(); 484 485 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 486 487 uint32_t M5_VAR_USED num_functional_writes = 0; 488 489 for (unsigned int i = 0; i < num_controllers;++i) { 490 num_functional_writes += 491 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); 492 493 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 494 if (access_perm != AccessPermission_Invalid && 495 access_perm != AccessPermission_NotPresent) { 496 num_functional_writes += 497 m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt); 498 } 499 } 500 501 num_functional_writes += m_network->functionalWrite(pkt); 502 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); 503 504 return true; 505} 506 507#ifdef CHECK_COHERENCE 508// This code will check for cases if the given cache block is exclusive in 509// one node and shared in another-- a coherence violation 510// 511// To use, the SLICC specification must call sequencer.checkCoherence(address) 512// when the controller changes to a state with new permissions. Do this 513// in setState. The SLICC spec must also define methods "isBlockShared" 514// and "isBlockExclusive" that are specific to that protocol 515// 516void 517RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 518{ 519#if 0 520 NodeID exclusive = -1; 521 bool sharedDetected = false; 522 NodeID lastShared = -1; 523 524 for (int i = 0; i < m_chip_vector.size(); i++) { 525 if (m_chip_vector[i]->isBlockExclusive(addr)) { 526 if (exclusive != -1) { 527 // coherence violation 528 WARN_EXPR(exclusive); 529 WARN_EXPR(m_chip_vector[i]->getID()); 530 WARN_EXPR(addr); 531 WARN_EXPR(getTime()); 532 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 533 } else if (sharedDetected) { 534 WARN_EXPR(lastShared); 535 WARN_EXPR(m_chip_vector[i]->getID()); 536 WARN_EXPR(addr); 537 WARN_EXPR(getTime()); 538 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 539 } else { 540 exclusive = m_chip_vector[i]->getID(); 541 } 542 } else if (m_chip_vector[i]->isBlockShared(addr)) { 543 sharedDetected = true; 544 lastShared = m_chip_vector[i]->getID(); 545 546 if (exclusive != -1) { 547 WARN_EXPR(lastShared); 548 WARN_EXPR(exclusive); 549 WARN_EXPR(addr); 550 WARN_EXPR(getTime()); 551 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 552 } 553 } 554 } 555#endif 556} 557#endif 558 559RubySystem * 560RubySystemParams::create() 561{ 562 return new RubySystem(this); 563} 564