RubySystem.cc revision 9670
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33 34#include "base/intmath.hh" 35#include "base/statistics.hh" 36#include "debug/RubyCacheTrace.hh" 37#include "debug/RubySystem.hh" 38#include "mem/ruby/common/Address.hh" 39#include "mem/ruby/network/Network.hh" 40#include "mem/ruby/profiler/Profiler.hh" 41#include "mem/ruby/system/System.hh" 42#include "sim/eventq.hh" 43#include "sim/simulate.hh" 44 45using namespace std; 46 47int RubySystem::m_random_seed; 48bool RubySystem::m_randomization; 49uint32_t RubySystem::m_block_size_bytes; 50uint32_t RubySystem::m_block_size_bits; 51uint64_t RubySystem::m_memory_size_bytes; 52uint32_t RubySystem::m_memory_size_bits; 53 54RubySystem::RubySystem(const Params *p) 55 : ClockedObject(p) 56{ 57 if (g_system_ptr != NULL) 58 fatal("Only one RubySystem object currently allowed.\n"); 59 60 m_random_seed = p->random_seed; 61 srandom(m_random_seed); 62 m_randomization = p->randomization; 63 64 m_block_size_bytes = p->block_size_bytes; 65 assert(isPowerOf2(m_block_size_bytes)); 66 m_block_size_bits = floorLog2(m_block_size_bytes); 67 68 m_memory_size_bytes = p->mem_size; 69 if (m_memory_size_bytes == 0) { 70 m_memory_size_bits = 0; 71 } else { 72 m_memory_size_bits = ceilLog2(m_memory_size_bytes); 73 } 74 75 if (p->no_mem_vec) { 76 m_mem_vec_ptr = NULL; 77 } else { 78 m_mem_vec_ptr = new MemoryVector; 79 m_mem_vec_ptr->resize(m_memory_size_bytes); 80 } 81 82 // Print ruby configuration and stats at exit and when asked for 83 Stats::registerDumpCallback(new RubyDumpStatsCallback(p->stats_filename, 84 this)); 85 86 m_warmup_enabled = false; 87 m_cooldown_enabled = false; 88 89 // Setup the global variables used in Ruby 90 g_system_ptr = this; 91 92 // Resize to the size of different machine types 93 g_abs_controls.resize(MachineType_NUM); 94} 95 96void 97RubySystem::registerNetwork(Network* network_ptr) 98{ 99 m_network_ptr = network_ptr; 100} 101 102void 103RubySystem::registerProfiler(Profiler* profiler_ptr) 104{ 105 m_profiler_ptr = profiler_ptr; 106} 107 108void 109RubySystem::registerAbstractController(AbstractController* cntrl) 110{ 111 m_abs_cntrl_vec.push_back(cntrl); 112 113 MachineID id = cntrl->getMachineID(); 114 g_abs_controls[id.getType()][id.getNum()] = cntrl; 115} 116 117void 118RubySystem::registerSparseMemory(SparseMemory* s) 119{ 120 m_sparse_memory_vector.push_back(s); 121} 122 123void 124RubySystem::registerMemController(MemoryControl *mc) { 125 m_memory_controller_vec.push_back(mc); 126} 127 128RubySystem::~RubySystem() 129{ 130 delete m_network_ptr; 131 delete m_profiler_ptr; 132 if (m_mem_vec_ptr) 133 delete m_mem_vec_ptr; 134} 135 136void 137RubySystem::printStats(ostream& out) 138{ 139 const time_t T = time(NULL); 140 tm *localTime = localtime(&T); 141 char buf[100]; 142 strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime); 143 144 out << "Real time: " << buf << endl; 145 146 m_profiler_ptr->printStats(out); 147 m_network_ptr->printStats(out); 148 149 for (uint32_t i = 0;i < g_abs_controls.size(); ++i) { 150 for (map<uint32_t, AbstractController *>::iterator it = 151 g_abs_controls[i].begin(); 152 it != g_abs_controls[i].end(); ++it) { 153 154 ((*it).second)->printStats(out); 155 } 156 } 157} 158 159void 160RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 161 uint64 uncompressed_trace_size) 162{ 163 // Create the checkpoint file for the memory 164 string thefile = Checkpoint::dir() + "/" + filename.c_str(); 165 166 int fd = creat(thefile.c_str(), 0664); 167 if (fd < 0) { 168 perror("creat"); 169 fatal("Can't open memory trace file '%s'\n", filename); 170 } 171 172 gzFile compressedMemory = gzdopen(fd, "wb"); 173 if (compressedMemory == NULL) 174 fatal("Insufficient memory to allocate compression state for %s\n", 175 filename); 176 177 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 178 uncompressed_trace_size) { 179 fatal("Write failed on memory trace file '%s'\n", filename); 180 } 181 182 if (gzclose(compressedMemory)) { 183 fatal("Close failed on memory trace file '%s'\n", filename); 184 } 185 delete raw_data; 186} 187 188void 189RubySystem::serialize(std::ostream &os) 190{ 191 m_cooldown_enabled = true; 192 193 vector<Sequencer*> sequencer_map; 194 Sequencer* sequencer_ptr = NULL; 195 int cntrl_id = -1; 196 197 198 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 199 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 200 if (sequencer_ptr == NULL) { 201 sequencer_ptr = sequencer_map[cntrl]; 202 cntrl_id = cntrl; 203 } 204 } 205 206 assert(sequencer_ptr != NULL); 207 208 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 209 if (sequencer_map[cntrl] == NULL) { 210 sequencer_map[cntrl] = sequencer_ptr; 211 } 212 } 213 214 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 215 // Create the CacheRecorder and record the cache trace 216 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map); 217 218 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 219 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 220 } 221 222 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 223 // save the current tick value 224 Tick curtick_original = curTick(); 225 // save the event queue head 226 Event* eventq_head = eventq->replaceHead(NULL); 227 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n", 228 curtick_original); 229 230 // Schedule an event to start cache cooldown 231 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 232 enqueueRubyEvent(curTick()); 233 simulate(); 234 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 235 236 // Restore eventq head 237 eventq_head = eventq->replaceHead(eventq_head); 238 // Restore curTick 239 setCurTick(curtick_original); 240 241 uint8_t *raw_data = NULL; 242 243 if (m_mem_vec_ptr != NULL) { 244 uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data); 245 246 string memory_trace_file = name() + ".memory.gz"; 247 writeCompressedTrace(raw_data, memory_trace_file, 248 memory_trace_size); 249 250 SERIALIZE_SCALAR(memory_trace_file); 251 SERIALIZE_SCALAR(memory_trace_size); 252 253 } else { 254 for (int i = 0; i < m_sparse_memory_vector.size(); ++i) { 255 m_sparse_memory_vector[i]->recordBlocks(cntrl_id, 256 m_cache_recorder); 257 } 258 } 259 260 // Aggergate the trace entries together into a single array 261 raw_data = new uint8_t[4096]; 262 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 263 4096); 264 string cache_trace_file = name() + ".cache.gz"; 265 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 266 267 SERIALIZE_SCALAR(cache_trace_file); 268 SERIALIZE_SCALAR(cache_trace_size); 269 270 m_cooldown_enabled = false; 271} 272 273void 274RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 275 uint64& uncompressed_trace_size) 276{ 277 // Read the trace file 278 gzFile compressedTrace; 279 280 // trace file 281 int fd = open(filename.c_str(), O_RDONLY); 282 if (fd < 0) { 283 perror("open"); 284 fatal("Unable to open trace file %s", filename); 285 } 286 287 compressedTrace = gzdopen(fd, "rb"); 288 if (compressedTrace == NULL) { 289 fatal("Insufficient memory to allocate compression state for %s\n", 290 filename); 291 } 292 293 raw_data = new uint8_t[uncompressed_trace_size]; 294 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 295 uncompressed_trace_size) { 296 fatal("Unable to read complete trace from file %s\n", filename); 297 } 298 299 if (gzclose(compressedTrace)) { 300 fatal("Failed to close cache trace file '%s'\n", filename); 301 } 302} 303 304void 305RubySystem::unserialize(Checkpoint *cp, const string §ion) 306{ 307 uint8_t *uncompressed_trace = NULL; 308 309 if (m_mem_vec_ptr != NULL) { 310 string memory_trace_file; 311 uint64 memory_trace_size = 0; 312 313 UNSERIALIZE_SCALAR(memory_trace_file); 314 UNSERIALIZE_SCALAR(memory_trace_size); 315 memory_trace_file = cp->cptDir + "/" + memory_trace_file; 316 317 readCompressedTrace(memory_trace_file, uncompressed_trace, 318 memory_trace_size); 319 m_mem_vec_ptr->populatePages(uncompressed_trace); 320 321 delete [] uncompressed_trace; 322 uncompressed_trace = NULL; 323 } 324 325 string cache_trace_file; 326 uint64 cache_trace_size = 0; 327 328 UNSERIALIZE_SCALAR(cache_trace_file); 329 UNSERIALIZE_SCALAR(cache_trace_size); 330 cache_trace_file = cp->cptDir + "/" + cache_trace_file; 331 332 readCompressedTrace(cache_trace_file, uncompressed_trace, 333 cache_trace_size); 334 m_warmup_enabled = true; 335 336 vector<Sequencer*> sequencer_map; 337 Sequencer* t = NULL; 338 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 339 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 340 if (t == NULL) t = sequencer_map[cntrl]; 341 } 342 343 assert(t != NULL); 344 345 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 346 if (sequencer_map[cntrl] == NULL) { 347 sequencer_map[cntrl] = t; 348 } 349 } 350 351 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 352 sequencer_map); 353} 354 355void 356RubySystem::startup() 357{ 358 359 // Ruby restores state from a checkpoint by resetting the clock to 0 and 360 // playing the requests that can possibly re-generate the cache state. 361 // The clock value is set to the actual checkpointed value once all the 362 // requests have been executed. 363 // 364 // This way of restoring state is pretty finicky. For example, if a 365 // Ruby component reads time before the state has been restored, it would 366 // cache this value and hence its clock would not be reset to 0, when 367 // Ruby resets the global clock. This can potentially result in a 368 // deadlock. 369 // 370 // The solution is that no Ruby component should read time before the 371 // simulation starts. And then one also needs to hope that the time 372 // Ruby finishes restoring the state is less than the time when the 373 // state was checkpointed. 374 375 if (m_warmup_enabled) { 376 // save the current tick value 377 Tick curtick_original = curTick(); 378 // save the event queue head 379 Event* eventq_head = eventq->replaceHead(NULL); 380 // set curTick to 0 and reset Ruby System's clock 381 setCurTick(0); 382 resetClock(); 383 384 // Schedule an event to start cache warmup 385 enqueueRubyEvent(curTick()); 386 simulate(); 387 388 delete m_cache_recorder; 389 m_cache_recorder = NULL; 390 m_warmup_enabled = false; 391 392 // reset DRAM so that it's not waiting for events on the old event 393 // queue 394 for (int i = 0; i < m_memory_controller_vec.size(); ++i) { 395 m_memory_controller_vec[i]->reset(); 396 } 397 398 // Restore eventq head 399 eventq_head = eventq->replaceHead(eventq_head); 400 // Restore curTick and Ruby System's clock 401 setCurTick(curtick_original); 402 resetClock(); 403 } 404 405 resetStats(); 406} 407 408void 409RubySystem::RubyEvent::process() 410{ 411 if (ruby_system->m_warmup_enabled) { 412 ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 413 } else if (ruby_system->m_cooldown_enabled) { 414 ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 415 } 416} 417 418void 419RubySystem::resetStats() 420{ 421 m_profiler_ptr->clearStats(); 422 m_network_ptr->clearStats(); 423 for (uint32_t cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 424 m_abs_cntrl_vec[cntrl]->clearStats(); 425 } 426 427 g_ruby_start = curCycle(); 428} 429 430bool 431RubySystem::functionalRead(PacketPtr pkt) 432{ 433 Address address(pkt->getAddr()); 434 Address line_address(address); 435 line_address.makeLineAddress(); 436 437 AccessPermission access_perm = AccessPermission_NotPresent; 438 int num_controllers = m_abs_cntrl_vec.size(); 439 440 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 441 442 unsigned int num_ro = 0; 443 unsigned int num_rw = 0; 444 unsigned int num_busy = 0; 445 unsigned int num_backing_store = 0; 446 unsigned int num_invalid = 0; 447 448 // In this loop we count the number of controllers that have the given 449 // address in read only, read write and busy states. 450 for (unsigned int i = 0; i < num_controllers; ++i) { 451 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 452 if (access_perm == AccessPermission_Read_Only) 453 num_ro++; 454 else if (access_perm == AccessPermission_Read_Write) 455 num_rw++; 456 else if (access_perm == AccessPermission_Busy) 457 num_busy++; 458 else if (access_perm == AccessPermission_Backing_Store) 459 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 460 // to represent blocks in memory *for Broadcast/Snooping protocols*, 461 // where memory has no idea whether it has an exclusive copy of data 462 // or not. 463 num_backing_store++; 464 else if (access_perm == AccessPermission_Invalid || 465 access_perm == AccessPermission_NotPresent) 466 num_invalid++; 467 } 468 assert(num_rw <= 1); 469 470 uint8_t *data = pkt->getPtr<uint8_t>(true); 471 unsigned int size_in_bytes = pkt->getSize(); 472 unsigned startByte = address.getAddress() - line_address.getAddress(); 473 474 // This if case is meant to capture what happens in a Broadcast/Snoop 475 // protocol where the block does not exist in the cache hierarchy. You 476 // only want to read from the Backing_Store memory if there is no copy in 477 // the cache hierarchy, otherwise you want to try to read the RO or RW 478 // copies existing in the cache hierarchy (covered by the else statement). 479 // The reason is because the Backing_Store memory could easily be stale, if 480 // there are copies floating around the cache hierarchy, so you want to read 481 // it only if it's not in the cache hierarchy at all. 482 if (num_invalid == (num_controllers - 1) && 483 num_backing_store == 1) { 484 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 485 for (unsigned int i = 0; i < num_controllers; ++i) { 486 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 487 if (access_perm == AccessPermission_Backing_Store) { 488 DataBlock& block = m_abs_cntrl_vec[i]-> 489 getDataBlock(line_address); 490 491 DPRINTF(RubySystem, "reading from %s block %s\n", 492 m_abs_cntrl_vec[i]->name(), block); 493 for (unsigned j = 0; j < size_in_bytes; ++j) { 494 data[j] = block.getByte(j + startByte); 495 } 496 return true; 497 } 498 } 499 } else if (num_ro > 0 || num_rw == 1) { 500 // In Broadcast/Snoop protocols, this covers if you know the block 501 // exists somewhere in the caching hierarchy, then you want to read any 502 // valid RO or RW block. In directory protocols, same thing, you want 503 // to read any valid readable copy of the block. 504 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 505 num_busy, num_ro, num_rw); 506 // In this loop, we try to figure which controller has a read only or 507 // a read write copy of the given address. Any valid copy would suffice 508 // for a functional read. 509 for (unsigned int i = 0;i < num_controllers;++i) { 510 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 511 if (access_perm == AccessPermission_Read_Only || 512 access_perm == AccessPermission_Read_Write) { 513 DataBlock& block = m_abs_cntrl_vec[i]-> 514 getDataBlock(line_address); 515 516 DPRINTF(RubySystem, "reading from %s block %s\n", 517 m_abs_cntrl_vec[i]->name(), block); 518 for (unsigned j = 0; j < size_in_bytes; ++j) { 519 data[j] = block.getByte(j + startByte); 520 } 521 return true; 522 } 523 } 524 } 525 526 return false; 527} 528 529// The function searches through all the buffers that exist in different 530// cache, directory and memory controllers, and in the network components 531// and writes the data portion of those that hold the address specified 532// in the packet. 533bool 534RubySystem::functionalWrite(PacketPtr pkt) 535{ 536 Address addr(pkt->getAddr()); 537 Address line_addr = line_address(addr); 538 AccessPermission access_perm = AccessPermission_NotPresent; 539 int num_controllers = m_abs_cntrl_vec.size(); 540 541 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 542 543 uint8_t *data = pkt->getPtr<uint8_t>(true); 544 unsigned int size_in_bytes = pkt->getSize(); 545 unsigned startByte = addr.getAddress() - line_addr.getAddress(); 546 547 uint32_t M5_VAR_USED num_functional_writes = 0; 548 549 for (unsigned int i = 0; i < num_controllers;++i) { 550 num_functional_writes += 551 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); 552 553 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 554 if (access_perm != AccessPermission_Invalid && 555 access_perm != AccessPermission_NotPresent) { 556 557 num_functional_writes++; 558 559 DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr); 560 DPRINTF(RubySystem, "%s\n",block); 561 for (unsigned j = 0; j < size_in_bytes; ++j) { 562 block.setByte(j + startByte, data[j]); 563 } 564 DPRINTF(RubySystem, "%s\n",block); 565 } 566 } 567 568 for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) { 569 num_functional_writes += 570 m_memory_controller_vec[i]->functionalWriteBuffers(pkt); 571 } 572 573 num_functional_writes += m_network_ptr->functionalWrite(pkt); 574 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); 575 576 return true; 577} 578 579#ifdef CHECK_COHERENCE 580// This code will check for cases if the given cache block is exclusive in 581// one node and shared in another-- a coherence violation 582// 583// To use, the SLICC specification must call sequencer.checkCoherence(address) 584// when the controller changes to a state with new permissions. Do this 585// in setState. The SLICC spec must also define methods "isBlockShared" 586// and "isBlockExclusive" that are specific to that protocol 587// 588void 589RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 590{ 591#if 0 592 NodeID exclusive = -1; 593 bool sharedDetected = false; 594 NodeID lastShared = -1; 595 596 for (int i = 0; i < m_chip_vector.size(); i++) { 597 if (m_chip_vector[i]->isBlockExclusive(addr)) { 598 if (exclusive != -1) { 599 // coherence violation 600 WARN_EXPR(exclusive); 601 WARN_EXPR(m_chip_vector[i]->getID()); 602 WARN_EXPR(addr); 603 WARN_EXPR(getTime()); 604 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 605 } else if (sharedDetected) { 606 WARN_EXPR(lastShared); 607 WARN_EXPR(m_chip_vector[i]->getID()); 608 WARN_EXPR(addr); 609 WARN_EXPR(getTime()); 610 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 611 } else { 612 exclusive = m_chip_vector[i]->getID(); 613 } 614 } else if (m_chip_vector[i]->isBlockShared(addr)) { 615 sharedDetected = true; 616 lastShared = m_chip_vector[i]->getID(); 617 618 if (exclusive != -1) { 619 WARN_EXPR(lastShared); 620 WARN_EXPR(exclusive); 621 WARN_EXPR(addr); 622 WARN_EXPR(getTime()); 623 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 624 } 625 } 626 } 627#endif 628} 629#endif 630 631RubySystem * 632RubySystemParams::create() 633{ 634 return new RubySystem(this); 635} 636 637/** 638 * virtual process function that is invoked when the callback 639 * queue is executed. 640 */ 641void 642RubyDumpStatsCallback::process() 643{ 644 ruby_system->printStats(*os); 645} 646