RubySystem.cc revision 10163
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33 34#include "base/intmath.hh" 35#include "base/statistics.hh" 36#include "debug/RubyCacheTrace.hh" 37#include "debug/RubySystem.hh" 38#include "mem/ruby/common/Address.hh" 39#include "mem/ruby/network/Network.hh" 40#include "mem/ruby/system/System.hh" 41#include "sim/eventq.hh" 42#include "sim/simulate.hh" 43 44using namespace std; 45 46int RubySystem::m_random_seed; 47bool RubySystem::m_randomization; 48uint32_t RubySystem::m_block_size_bytes; 49uint32_t RubySystem::m_block_size_bits; 50uint64_t RubySystem::m_memory_size_bytes; 51uint32_t RubySystem::m_memory_size_bits; 52 53RubySystem::RubySystem(const Params *p) 54 : ClockedObject(p) 55{ 56 if (g_system_ptr != NULL) 57 fatal("Only one RubySystem object currently allowed.\n"); 58 59 m_random_seed = p->random_seed; 60 srandom(m_random_seed); 61 m_randomization = p->randomization; 62 63 m_block_size_bytes = p->block_size_bytes; 64 assert(isPowerOf2(m_block_size_bytes)); 65 m_block_size_bits = floorLog2(m_block_size_bytes); 66 67 m_memory_size_bytes = p->mem_size; 68 if (m_memory_size_bytes == 0) { 69 m_memory_size_bits = 0; 70 } else { 71 m_memory_size_bits = ceilLog2(m_memory_size_bytes); 72 } 73 74 if (p->no_mem_vec) { 75 m_mem_vec = NULL; 76 } else { 77 m_mem_vec = new MemoryVector; 78 m_mem_vec->resize(m_memory_size_bytes); 79 } 80 81 m_warmup_enabled = false; 82 m_cooldown_enabled = false; 83 84 // Setup the global variables used in Ruby 85 g_system_ptr = this; 86 87 // Resize to the size of different machine types 88 g_abs_controls.resize(MachineType_NUM); 89 90 // Collate the statistics before they are printed. 91 Stats::registerDumpCallback(new RubyStatsCallback(this)); 92 // Create the profiler 93 m_profiler = new Profiler(p); 94} 95 96void 97RubySystem::registerNetwork(Network* network_ptr) 98{ 99 m_network = network_ptr; 100} 101 102void 103RubySystem::registerAbstractController(AbstractController* cntrl) 104{ 105 m_abs_cntrl_vec.push_back(cntrl); 106 107 MachineID id = cntrl->getMachineID(); 108 g_abs_controls[id.getType()][id.getNum()] = cntrl; 109} 110 111void 112RubySystem::registerSparseMemory(SparseMemory* s) 113{ 114 m_sparse_memory_vector.push_back(s); 115} 116 117void 118RubySystem::registerMemController(MemoryControl *mc) { 119 m_memory_controller_vec.push_back(mc); 120} 121 122RubySystem::~RubySystem() 123{ 124 delete m_network; 125 delete m_profiler; 126 if (m_mem_vec) 127 delete m_mem_vec; 128} 129 130void 131RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 132 uint64 uncompressed_trace_size) 133{ 134 // Create the checkpoint file for the memory 135 string thefile = Checkpoint::dir() + "/" + filename.c_str(); 136 137 int fd = creat(thefile.c_str(), 0664); 138 if (fd < 0) { 139 perror("creat"); 140 fatal("Can't open memory trace file '%s'\n", filename); 141 } 142 143 gzFile compressedMemory = gzdopen(fd, "wb"); 144 if (compressedMemory == NULL) 145 fatal("Insufficient memory to allocate compression state for %s\n", 146 filename); 147 148 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 149 uncompressed_trace_size) { 150 fatal("Write failed on memory trace file '%s'\n", filename); 151 } 152 153 if (gzclose(compressedMemory)) { 154 fatal("Close failed on memory trace file '%s'\n", filename); 155 } 156 delete raw_data; 157} 158 159void 160RubySystem::serialize(std::ostream &os) 161{ 162 m_cooldown_enabled = true; 163 164 vector<Sequencer*> sequencer_map; 165 Sequencer* sequencer_ptr = NULL; 166 int cntrl_id = -1; 167 168 169 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 170 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 171 if (sequencer_ptr == NULL) { 172 sequencer_ptr = sequencer_map[cntrl]; 173 cntrl_id = cntrl; 174 } 175 } 176 177 assert(sequencer_ptr != NULL); 178 179 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 180 if (sequencer_map[cntrl] == NULL) { 181 sequencer_map[cntrl] = sequencer_ptr; 182 } 183 } 184 185 // Store the cache-block size, so we are able to restore on systems with a 186 // different cache-block size. CacheRecorder depends on the correct 187 // cache-block size upon unserializing. 188 uint64 block_size_bytes = getBlockSizeBytes(); 189 SERIALIZE_SCALAR(block_size_bytes); 190 191 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 192 // Create the CacheRecorder and record the cache trace 193 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map, 194 block_size_bytes); 195 196 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 197 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 198 } 199 200 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 201 // save the current tick value 202 Tick curtick_original = curTick(); 203 // save the event queue head 204 Event* eventq_head = eventq->replaceHead(NULL); 205 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n", 206 curtick_original); 207 208 // Schedule an event to start cache cooldown 209 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 210 enqueueRubyEvent(curTick()); 211 simulate(); 212 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 213 214 // Restore eventq head 215 eventq_head = eventq->replaceHead(eventq_head); 216 // Restore curTick 217 setCurTick(curtick_original); 218 219 uint8_t *raw_data = NULL; 220 221 if (m_mem_vec != NULL) { 222 uint64 memory_trace_size = m_mem_vec->collatePages(raw_data); 223 224 string memory_trace_file = name() + ".memory.gz"; 225 writeCompressedTrace(raw_data, memory_trace_file, 226 memory_trace_size); 227 228 SERIALIZE_SCALAR(memory_trace_file); 229 SERIALIZE_SCALAR(memory_trace_size); 230 231 } else { 232 for (int i = 0; i < m_sparse_memory_vector.size(); ++i) { 233 m_sparse_memory_vector[i]->recordBlocks(cntrl_id, 234 m_cache_recorder); 235 } 236 } 237 238 // Aggergate the trace entries together into a single array 239 raw_data = new uint8_t[4096]; 240 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 241 4096); 242 string cache_trace_file = name() + ".cache.gz"; 243 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 244 245 SERIALIZE_SCALAR(cache_trace_file); 246 SERIALIZE_SCALAR(cache_trace_size); 247 248 m_cooldown_enabled = false; 249} 250 251void 252RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 253 uint64& uncompressed_trace_size) 254{ 255 // Read the trace file 256 gzFile compressedTrace; 257 258 // trace file 259 int fd = open(filename.c_str(), O_RDONLY); 260 if (fd < 0) { 261 perror("open"); 262 fatal("Unable to open trace file %s", filename); 263 } 264 265 compressedTrace = gzdopen(fd, "rb"); 266 if (compressedTrace == NULL) { 267 fatal("Insufficient memory to allocate compression state for %s\n", 268 filename); 269 } 270 271 raw_data = new uint8_t[uncompressed_trace_size]; 272 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 273 uncompressed_trace_size) { 274 fatal("Unable to read complete trace from file %s\n", filename); 275 } 276 277 if (gzclose(compressedTrace)) { 278 fatal("Failed to close cache trace file '%s'\n", filename); 279 } 280} 281 282void 283RubySystem::unserialize(Checkpoint *cp, const string §ion) 284{ 285 uint8_t *uncompressed_trace = NULL; 286 287 // This value should be set to the checkpoint-system's block-size. 288 // Optional, as checkpoints without it can be run if the 289 // checkpoint-system's block-size == current block-size. 290 uint64 block_size_bytes = getBlockSizeBytes(); 291 UNSERIALIZE_OPT_SCALAR(block_size_bytes); 292 293 if (m_mem_vec != NULL) { 294 string memory_trace_file; 295 uint64 memory_trace_size = 0; 296 297 UNSERIALIZE_SCALAR(memory_trace_file); 298 UNSERIALIZE_SCALAR(memory_trace_size); 299 memory_trace_file = cp->cptDir + "/" + memory_trace_file; 300 301 readCompressedTrace(memory_trace_file, uncompressed_trace, 302 memory_trace_size); 303 m_mem_vec->populatePages(uncompressed_trace); 304 305 delete [] uncompressed_trace; 306 uncompressed_trace = NULL; 307 } 308 309 string cache_trace_file; 310 uint64 cache_trace_size = 0; 311 312 UNSERIALIZE_SCALAR(cache_trace_file); 313 UNSERIALIZE_SCALAR(cache_trace_size); 314 cache_trace_file = cp->cptDir + "/" + cache_trace_file; 315 316 readCompressedTrace(cache_trace_file, uncompressed_trace, 317 cache_trace_size); 318 m_warmup_enabled = true; 319 320 vector<Sequencer*> sequencer_map; 321 Sequencer* t = NULL; 322 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 323 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 324 if (t == NULL) t = sequencer_map[cntrl]; 325 } 326 327 assert(t != NULL); 328 329 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 330 if (sequencer_map[cntrl] == NULL) { 331 sequencer_map[cntrl] = t; 332 } 333 } 334 335 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 336 sequencer_map, block_size_bytes); 337} 338 339void 340RubySystem::startup() 341{ 342 343 // Ruby restores state from a checkpoint by resetting the clock to 0 and 344 // playing the requests that can possibly re-generate the cache state. 345 // The clock value is set to the actual checkpointed value once all the 346 // requests have been executed. 347 // 348 // This way of restoring state is pretty finicky. For example, if a 349 // Ruby component reads time before the state has been restored, it would 350 // cache this value and hence its clock would not be reset to 0, when 351 // Ruby resets the global clock. This can potentially result in a 352 // deadlock. 353 // 354 // The solution is that no Ruby component should read time before the 355 // simulation starts. And then one also needs to hope that the time 356 // Ruby finishes restoring the state is less than the time when the 357 // state was checkpointed. 358 359 if (m_warmup_enabled) { 360 // save the current tick value 361 Tick curtick_original = curTick(); 362 // save the event queue head 363 Event* eventq_head = eventq->replaceHead(NULL); 364 // set curTick to 0 and reset Ruby System's clock 365 setCurTick(0); 366 resetClock(); 367 368 // Schedule an event to start cache warmup 369 enqueueRubyEvent(curTick()); 370 simulate(); 371 372 delete m_cache_recorder; 373 m_cache_recorder = NULL; 374 m_warmup_enabled = false; 375 376 // reset DRAM so that it's not waiting for events on the old event 377 // queue 378 for (int i = 0; i < m_memory_controller_vec.size(); ++i) { 379 m_memory_controller_vec[i]->reset(); 380 } 381 382 // Restore eventq head 383 eventq_head = eventq->replaceHead(eventq_head); 384 // Restore curTick and Ruby System's clock 385 setCurTick(curtick_original); 386 resetClock(); 387 } 388 389 resetStats(); 390} 391 392void 393RubySystem::RubyEvent::process() 394{ 395 if (ruby_system->m_warmup_enabled) { 396 ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 397 } else if (ruby_system->m_cooldown_enabled) { 398 ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 399 } 400} 401 402void 403RubySystem::resetStats() 404{ 405 g_ruby_start = curCycle(); 406} 407 408bool 409RubySystem::functionalRead(PacketPtr pkt) 410{ 411 Address address(pkt->getAddr()); 412 Address line_address(address); 413 line_address.makeLineAddress(); 414 415 AccessPermission access_perm = AccessPermission_NotPresent; 416 int num_controllers = m_abs_cntrl_vec.size(); 417 418 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 419 420 unsigned int num_ro = 0; 421 unsigned int num_rw = 0; 422 unsigned int num_busy = 0; 423 unsigned int num_backing_store = 0; 424 unsigned int num_invalid = 0; 425 426 // In this loop we count the number of controllers that have the given 427 // address in read only, read write and busy states. 428 for (unsigned int i = 0; i < num_controllers; ++i) { 429 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 430 if (access_perm == AccessPermission_Read_Only) 431 num_ro++; 432 else if (access_perm == AccessPermission_Read_Write) 433 num_rw++; 434 else if (access_perm == AccessPermission_Busy) 435 num_busy++; 436 else if (access_perm == AccessPermission_Backing_Store) 437 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 438 // to represent blocks in memory *for Broadcast/Snooping protocols*, 439 // where memory has no idea whether it has an exclusive copy of data 440 // or not. 441 num_backing_store++; 442 else if (access_perm == AccessPermission_Invalid || 443 access_perm == AccessPermission_NotPresent) 444 num_invalid++; 445 } 446 assert(num_rw <= 1); 447 448 uint8_t *data = pkt->getPtr<uint8_t>(true); 449 unsigned int size_in_bytes = pkt->getSize(); 450 unsigned startByte = address.getAddress() - line_address.getAddress(); 451 452 // This if case is meant to capture what happens in a Broadcast/Snoop 453 // protocol where the block does not exist in the cache hierarchy. You 454 // only want to read from the Backing_Store memory if there is no copy in 455 // the cache hierarchy, otherwise you want to try to read the RO or RW 456 // copies existing in the cache hierarchy (covered by the else statement). 457 // The reason is because the Backing_Store memory could easily be stale, if 458 // there are copies floating around the cache hierarchy, so you want to read 459 // it only if it's not in the cache hierarchy at all. 460 if (num_invalid == (num_controllers - 1) && 461 num_backing_store == 1) { 462 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 463 for (unsigned int i = 0; i < num_controllers; ++i) { 464 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 465 if (access_perm == AccessPermission_Backing_Store) { 466 DataBlock& block = m_abs_cntrl_vec[i]-> 467 getDataBlock(line_address); 468 469 DPRINTF(RubySystem, "reading from %s block %s\n", 470 m_abs_cntrl_vec[i]->name(), block); 471 for (unsigned j = 0; j < size_in_bytes; ++j) { 472 data[j] = block.getByte(j + startByte); 473 } 474 return true; 475 } 476 } 477 } else if (num_ro > 0 || num_rw == 1) { 478 // In Broadcast/Snoop protocols, this covers if you know the block 479 // exists somewhere in the caching hierarchy, then you want to read any 480 // valid RO or RW block. In directory protocols, same thing, you want 481 // to read any valid readable copy of the block. 482 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 483 num_busy, num_ro, num_rw); 484 // In this loop, we try to figure which controller has a read only or 485 // a read write copy of the given address. Any valid copy would suffice 486 // for a functional read. 487 for (unsigned int i = 0;i < num_controllers;++i) { 488 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 489 if (access_perm == AccessPermission_Read_Only || 490 access_perm == AccessPermission_Read_Write) { 491 DataBlock& block = m_abs_cntrl_vec[i]-> 492 getDataBlock(line_address); 493 494 DPRINTF(RubySystem, "reading from %s block %s\n", 495 m_abs_cntrl_vec[i]->name(), block); 496 for (unsigned j = 0; j < size_in_bytes; ++j) { 497 data[j] = block.getByte(j + startByte); 498 } 499 return true; 500 } 501 } 502 } 503 504 return false; 505} 506 507// The function searches through all the buffers that exist in different 508// cache, directory and memory controllers, and in the network components 509// and writes the data portion of those that hold the address specified 510// in the packet. 511bool 512RubySystem::functionalWrite(PacketPtr pkt) 513{ 514 Address addr(pkt->getAddr()); 515 Address line_addr = line_address(addr); 516 AccessPermission access_perm = AccessPermission_NotPresent; 517 int num_controllers = m_abs_cntrl_vec.size(); 518 519 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 520 521 uint8_t *data = pkt->getPtr<uint8_t>(true); 522 unsigned int size_in_bytes = pkt->getSize(); 523 unsigned startByte = addr.getAddress() - line_addr.getAddress(); 524 525 uint32_t M5_VAR_USED num_functional_writes = 0; 526 527 for (unsigned int i = 0; i < num_controllers;++i) { 528 num_functional_writes += 529 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); 530 531 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 532 if (access_perm != AccessPermission_Invalid && 533 access_perm != AccessPermission_NotPresent) { 534 535 num_functional_writes++; 536 537 DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr); 538 DPRINTF(RubySystem, "%s\n",block); 539 for (unsigned j = 0; j < size_in_bytes; ++j) { 540 block.setByte(j + startByte, data[j]); 541 } 542 DPRINTF(RubySystem, "%s\n",block); 543 } 544 } 545 546 for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) { 547 num_functional_writes += 548 m_memory_controller_vec[i]->functionalWriteBuffers(pkt); 549 } 550 551 num_functional_writes += m_network->functionalWrite(pkt); 552 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); 553 554 return true; 555} 556 557#ifdef CHECK_COHERENCE 558// This code will check for cases if the given cache block is exclusive in 559// one node and shared in another-- a coherence violation 560// 561// To use, the SLICC specification must call sequencer.checkCoherence(address) 562// when the controller changes to a state with new permissions. Do this 563// in setState. The SLICC spec must also define methods "isBlockShared" 564// and "isBlockExclusive" that are specific to that protocol 565// 566void 567RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 568{ 569#if 0 570 NodeID exclusive = -1; 571 bool sharedDetected = false; 572 NodeID lastShared = -1; 573 574 for (int i = 0; i < m_chip_vector.size(); i++) { 575 if (m_chip_vector[i]->isBlockExclusive(addr)) { 576 if (exclusive != -1) { 577 // coherence violation 578 WARN_EXPR(exclusive); 579 WARN_EXPR(m_chip_vector[i]->getID()); 580 WARN_EXPR(addr); 581 WARN_EXPR(getTime()); 582 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 583 } else if (sharedDetected) { 584 WARN_EXPR(lastShared); 585 WARN_EXPR(m_chip_vector[i]->getID()); 586 WARN_EXPR(addr); 587 WARN_EXPR(getTime()); 588 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 589 } else { 590 exclusive = m_chip_vector[i]->getID(); 591 } 592 } else if (m_chip_vector[i]->isBlockShared(addr)) { 593 sharedDetected = true; 594 lastShared = m_chip_vector[i]->getID(); 595 596 if (exclusive != -1) { 597 WARN_EXPR(lastShared); 598 WARN_EXPR(exclusive); 599 WARN_EXPR(addr); 600 WARN_EXPR(getTime()); 601 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 602 } 603 } 604 } 605#endif 606} 607#endif 608 609RubySystem * 610RubySystemParams::create() 611{ 612 return new RubySystem(this); 613} 614