RubySystem.cc revision 9350
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33 34#include "base/intmath.hh" 35#include "base/statistics.hh" 36#include "debug/RubyCacheTrace.hh" 37#include "debug/RubySystem.hh" 38#include "mem/ruby/common/Address.hh" 39#include "mem/ruby/network/Network.hh" 40#include "mem/ruby/profiler/Profiler.hh" 41#include "mem/ruby/system/System.hh" 42#include "sim/eventq.hh" 43#include "sim/simulate.hh" 44 45using namespace std; 46 47int RubySystem::m_random_seed; 48bool RubySystem::m_randomization; 49int RubySystem::m_block_size_bytes; 50int RubySystem::m_block_size_bits; 51uint64 RubySystem::m_memory_size_bytes; 52int RubySystem::m_memory_size_bits; 53 54RubySystem::RubySystem(const Params *p) 55 : ClockedObject(p) 56{ 57 if (g_system_ptr != NULL) 58 fatal("Only one RubySystem object currently allowed.\n"); 59 60 m_random_seed = p->random_seed; 61 srandom(m_random_seed); 62 m_randomization = p->randomization; 63 64 m_block_size_bytes = p->block_size_bytes; 65 assert(isPowerOf2(m_block_size_bytes)); 66 m_block_size_bits = floorLog2(m_block_size_bytes); 67 68 m_memory_size_bytes = p->mem_size; 69 if (m_memory_size_bytes == 0) { 70 m_memory_size_bits = 0; 71 } else { 72 m_memory_size_bits = floorLog2(m_memory_size_bytes); 73 } 74 75 g_system_ptr = this; 76 if (p->no_mem_vec) { 77 m_mem_vec_ptr = NULL; 78 } else { 79 m_mem_vec_ptr = new MemoryVector; 80 m_mem_vec_ptr->resize(m_memory_size_bytes); 81 } 82 83 // Print ruby configuration and stats at exit and when asked for 84 Stats::registerDumpCallback(new RubyDumpStatsCallback(p->stats_filename, 85 this)); 86 87 m_warmup_enabled = false; 88 m_cooldown_enabled = false; 89} 90 91void 92RubySystem::init() 93{ 94 m_profiler_ptr->clearStats(); 95 m_network_ptr->clearStats(); 96} 97 98void 99RubySystem::registerNetwork(Network* network_ptr) 100{ 101 m_network_ptr = network_ptr; 102} 103 104void 105RubySystem::registerProfiler(Profiler* profiler_ptr) 106{ 107 m_profiler_ptr = profiler_ptr; 108} 109 110void 111RubySystem::registerAbstractController(AbstractController* cntrl) 112{ 113 m_abs_cntrl_vec.push_back(cntrl); 114} 115 116void 117RubySystem::registerSparseMemory(SparseMemory* s) 118{ 119 m_sparse_memory_vector.push_back(s); 120} 121 122void 123RubySystem::registerMemController(MemoryControl *mc) { 124 m_memory_controller_vec.push_back(mc); 125} 126 127RubySystem::~RubySystem() 128{ 129 delete m_network_ptr; 130 delete m_profiler_ptr; 131 if (m_mem_vec_ptr) 132 delete m_mem_vec_ptr; 133} 134 135void 136RubySystem::printStats(ostream& out) 137{ 138 const time_t T = time(NULL); 139 tm *localTime = localtime(&T); 140 char buf[100]; 141 strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime); 142 143 out << "Real time: " << buf << endl; 144 145 m_profiler_ptr->printStats(out); 146 m_network_ptr->printStats(out); 147} 148 149void 150RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 151 uint64 uncompressed_trace_size) 152{ 153 // Create the checkpoint file for the memory 154 string thefile = Checkpoint::dir() + "/" + filename.c_str(); 155 156 int fd = creat(thefile.c_str(), 0664); 157 if (fd < 0) { 158 perror("creat"); 159 fatal("Can't open memory trace file '%s'\n", filename); 160 } 161 162 gzFile compressedMemory = gzdopen(fd, "wb"); 163 if (compressedMemory == NULL) 164 fatal("Insufficient memory to allocate compression state for %s\n", 165 filename); 166 167 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 168 uncompressed_trace_size) { 169 fatal("Write failed on memory trace file '%s'\n", filename); 170 } 171 172 if (gzclose(compressedMemory)) { 173 fatal("Close failed on memory trace file '%s'\n", filename); 174 } 175 delete raw_data; 176} 177 178void 179RubySystem::serialize(std::ostream &os) 180{ 181 m_cooldown_enabled = true; 182 183 vector<Sequencer*> sequencer_map; 184 Sequencer* sequencer_ptr = NULL; 185 int cntrl_id = -1; 186 187 188 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 189 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 190 if (sequencer_ptr == NULL) { 191 sequencer_ptr = sequencer_map[cntrl]; 192 cntrl_id = cntrl; 193 } 194 } 195 196 assert(sequencer_ptr != NULL); 197 198 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 199 if (sequencer_map[cntrl] == NULL) { 200 sequencer_map[cntrl] = sequencer_ptr; 201 } 202 } 203 204 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 205 // Create the CacheRecorder and record the cache trace 206 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map); 207 208 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 209 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 210 } 211 212 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 213 // save the current tick value 214 Tick curtick_original = curTick(); 215 // save the event queue head 216 Event* eventq_head = eventq->replaceHead(NULL); 217 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n", 218 curtick_original); 219 220 // Schedule an event to start cache cooldown 221 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 222 enqueueRubyEvent(curTick()); 223 simulate(); 224 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 225 226 // Restore eventq head 227 eventq_head = eventq->replaceHead(eventq_head); 228 // Restore curTick 229 curTick(curtick_original); 230 231 uint8_t *raw_data = NULL; 232 233 if (m_mem_vec_ptr != NULL) { 234 uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data); 235 236 string memory_trace_file = name() + ".memory.gz"; 237 writeCompressedTrace(raw_data, memory_trace_file, 238 memory_trace_size); 239 240 SERIALIZE_SCALAR(memory_trace_file); 241 SERIALIZE_SCALAR(memory_trace_size); 242 243 } else { 244 for (int i = 0; i < m_sparse_memory_vector.size(); ++i) { 245 m_sparse_memory_vector[i]->recordBlocks(cntrl_id, 246 m_cache_recorder); 247 } 248 } 249 250 // Aggergate the trace entries together into a single array 251 raw_data = new uint8_t[4096]; 252 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 253 4096); 254 string cache_trace_file = name() + ".cache.gz"; 255 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 256 257 SERIALIZE_SCALAR(cache_trace_file); 258 SERIALIZE_SCALAR(cache_trace_size); 259 260 m_cooldown_enabled = false; 261} 262 263void 264RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 265 uint64& uncompressed_trace_size) 266{ 267 // Read the trace file 268 gzFile compressedTrace; 269 270 // trace file 271 int fd = open(filename.c_str(), O_RDONLY); 272 if (fd < 0) { 273 perror("open"); 274 fatal("Unable to open trace file %s", filename); 275 } 276 277 compressedTrace = gzdopen(fd, "rb"); 278 if (compressedTrace == NULL) { 279 fatal("Insufficient memory to allocate compression state for %s\n", 280 filename); 281 } 282 283 raw_data = new uint8_t[uncompressed_trace_size]; 284 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 285 uncompressed_trace_size) { 286 fatal("Unable to read complete trace from file %s\n", filename); 287 } 288 289 if (gzclose(compressedTrace)) { 290 fatal("Failed to close cache trace file '%s'\n", filename); 291 } 292} 293 294void 295RubySystem::unserialize(Checkpoint *cp, const string §ion) 296{ 297 // 298 // The main purpose for clearing stats in the unserialize process is so 299 // that the profiler can correctly set its start time to the unserialized 300 // value of curTick() 301 // 302 resetStats(); 303 uint8_t *uncompressed_trace = NULL; 304 305 if (m_mem_vec_ptr != NULL) { 306 string memory_trace_file; 307 uint64 memory_trace_size = 0; 308 309 UNSERIALIZE_SCALAR(memory_trace_file); 310 UNSERIALIZE_SCALAR(memory_trace_size); 311 memory_trace_file = cp->cptDir + "/" + memory_trace_file; 312 313 readCompressedTrace(memory_trace_file, uncompressed_trace, 314 memory_trace_size); 315 m_mem_vec_ptr->populatePages(uncompressed_trace); 316 317 delete uncompressed_trace; 318 uncompressed_trace = NULL; 319 } 320 321 string cache_trace_file; 322 uint64 cache_trace_size = 0; 323 324 UNSERIALIZE_SCALAR(cache_trace_file); 325 UNSERIALIZE_SCALAR(cache_trace_size); 326 cache_trace_file = cp->cptDir + "/" + cache_trace_file; 327 328 readCompressedTrace(cache_trace_file, uncompressed_trace, 329 cache_trace_size); 330 m_warmup_enabled = true; 331 332 vector<Sequencer*> sequencer_map; 333 Sequencer* t = NULL; 334 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 335 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 336 if (t == NULL) t = sequencer_map[cntrl]; 337 } 338 339 assert(t != NULL); 340 341 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 342 if (sequencer_map[cntrl] == NULL) { 343 sequencer_map[cntrl] = t; 344 } 345 } 346 347 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 348 sequencer_map); 349} 350 351void 352RubySystem::startup() 353{ 354 if (m_warmup_enabled) { 355 // save the current tick value 356 Tick curtick_original = curTick(); 357 // save the event queue head 358 Event* eventq_head = eventq->replaceHead(NULL); 359 // set curTick to 0 and reset Ruby System's clock 360 curTick(0); 361 resetClock(); 362 363 // Schedule an event to start cache warmup 364 enqueueRubyEvent(curTick()); 365 simulate(); 366 367 delete m_cache_recorder; 368 m_cache_recorder = NULL; 369 m_warmup_enabled = false; 370 371 // reset DRAM so that it's not waiting for events on the old event 372 // queue 373 for (int i = 0; i < m_memory_controller_vec.size(); ++i) { 374 m_memory_controller_vec[i]->reset(); 375 } 376 377 // Restore eventq head 378 eventq_head = eventq->replaceHead(eventq_head); 379 // Restore curTick and Ruby System's clock 380 curTick(curtick_original); 381 resetClock(); 382 } 383} 384 385void 386RubySystem::RubyEvent::process() 387{ 388 if (ruby_system->m_warmup_enabled) { 389 ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 390 } else if (ruby_system->m_cooldown_enabled) { 391 ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 392 } 393} 394 395void 396RubySystem::resetStats() 397{ 398 m_profiler_ptr->clearStats(); 399 m_network_ptr->clearStats(); 400} 401 402bool 403RubySystem::functionalRead(PacketPtr pkt) 404{ 405 Address address(pkt->getAddr()); 406 Address line_address(address); 407 line_address.makeLineAddress(); 408 409 AccessPermission access_perm = AccessPermission_NotPresent; 410 int num_controllers = m_abs_cntrl_vec.size(); 411 412 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 413 414 unsigned int num_ro = 0; 415 unsigned int num_rw = 0; 416 unsigned int num_busy = 0; 417 unsigned int num_backing_store = 0; 418 unsigned int num_invalid = 0; 419 420 // In this loop we count the number of controllers that have the given 421 // address in read only, read write and busy states. 422 for (unsigned int i = 0; i < num_controllers; ++i) { 423 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 424 if (access_perm == AccessPermission_Read_Only) 425 num_ro++; 426 else if (access_perm == AccessPermission_Read_Write) 427 num_rw++; 428 else if (access_perm == AccessPermission_Busy) 429 num_busy++; 430 else if (access_perm == AccessPermission_Backing_Store) 431 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 432 // to represent blocks in memory *for Broadcast/Snooping protocols*, 433 // where memory has no idea whether it has an exclusive copy of data 434 // or not. 435 num_backing_store++; 436 else if (access_perm == AccessPermission_Invalid || 437 access_perm == AccessPermission_NotPresent) 438 num_invalid++; 439 } 440 assert(num_rw <= 1); 441 442 uint8_t *data = pkt->getPtr<uint8_t>(true); 443 unsigned int size_in_bytes = pkt->getSize(); 444 unsigned startByte = address.getAddress() - line_address.getAddress(); 445 446 // This if case is meant to capture what happens in a Broadcast/Snoop 447 // protocol where the block does not exist in the cache hierarchy. You 448 // only want to read from the Backing_Store memory if there is no copy in 449 // the cache hierarchy, otherwise you want to try to read the RO or RW 450 // copies existing in the cache hierarchy (covered by the else statement). 451 // The reason is because the Backing_Store memory could easily be stale, if 452 // there are copies floating around the cache hierarchy, so you want to read 453 // it only if it's not in the cache hierarchy at all. 454 if (num_invalid == (num_controllers - 1) && 455 num_backing_store == 1) { 456 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 457 for (unsigned int i = 0; i < num_controllers; ++i) { 458 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 459 if (access_perm == AccessPermission_Backing_Store) { 460 DataBlock& block = m_abs_cntrl_vec[i]-> 461 getDataBlock(line_address); 462 463 DPRINTF(RubySystem, "reading from %s block %s\n", 464 m_abs_cntrl_vec[i]->name(), block); 465 for (unsigned i = 0; i < size_in_bytes; ++i) { 466 data[i] = block.getByte(i + startByte); 467 } 468 return true; 469 } 470 } 471 } else if (num_ro > 0 || num_rw == 1) { 472 // In Broadcast/Snoop protocols, this covers if you know the block 473 // exists somewhere in the caching hierarchy, then you want to read any 474 // valid RO or RW block. In directory protocols, same thing, you want 475 // to read any valid readable copy of the block. 476 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 477 num_busy, num_ro, num_rw); 478 // In this loop, we try to figure which controller has a read only or 479 // a read write copy of the given address. Any valid copy would suffice 480 // for a functional read. 481 for (unsigned int i = 0;i < num_controllers;++i) { 482 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 483 if (access_perm == AccessPermission_Read_Only || 484 access_perm == AccessPermission_Read_Write) { 485 DataBlock& block = m_abs_cntrl_vec[i]-> 486 getDataBlock(line_address); 487 488 DPRINTF(RubySystem, "reading from %s block %s\n", 489 m_abs_cntrl_vec[i]->name(), block); 490 for (unsigned i = 0; i < size_in_bytes; ++i) { 491 data[i] = block.getByte(i + startByte); 492 } 493 return true; 494 } 495 } 496 } 497 498 // Since we are here, this means that none of the controllers hold this 499 // address in a stable/base state. The function searches through all the 500 // buffers that exist in different cache, directory and memory 501 // controllers, and in the network components and reads the data portion 502 // of the first message that holds address specified in the packet. 503 for (unsigned int i = 0; i < num_controllers;++i) { 504 if (m_abs_cntrl_vec[i]->functionalReadBuffers(pkt)) { 505 return true; 506 } 507 } 508 509 for (unsigned int i = 0; i < m_memory_controller_vec.size(); ++i) { 510 if (m_memory_controller_vec[i]->functionalReadBuffers(pkt)) { 511 return true; 512 } 513 } 514 515 if (m_network_ptr->functionalRead(pkt)) { 516 return true; 517 } 518 return false; 519} 520 521// The function searches through all the buffers that exist in different 522// cache, directory and memory controllers, and in the network components 523// and writes the data portion of those that hold the address specified 524// in the packet. 525bool 526RubySystem::functionalWrite(PacketPtr pkt) 527{ 528 Address addr(pkt->getAddr()); 529 Address line_addr = line_address(addr); 530 AccessPermission access_perm = AccessPermission_NotPresent; 531 int num_controllers = m_abs_cntrl_vec.size(); 532 533 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 534 535 uint8_t *data = pkt->getPtr<uint8_t>(true); 536 unsigned int size_in_bytes = pkt->getSize(); 537 unsigned startByte = addr.getAddress() - line_addr.getAddress(); 538 539 for (unsigned int i = 0; i < num_controllers;++i) { 540 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); 541 542 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 543 if (access_perm != AccessPermission_Invalid && 544 access_perm != AccessPermission_NotPresent) { 545 546 DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr); 547 DPRINTF(RubySystem, "%s\n",block); 548 for (unsigned i = 0; i < size_in_bytes; ++i) { 549 block.setByte(i + startByte, data[i]); 550 } 551 DPRINTF(RubySystem, "%s\n",block); 552 } 553 } 554 555 uint32_t M5_VAR_USED num_functional_writes = 0; 556 for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) { 557 num_functional_writes += 558 m_memory_controller_vec[i]->functionalWriteBuffers(pkt); 559 } 560 561 num_functional_writes += m_network_ptr->functionalWrite(pkt); 562 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); 563 564 return true; 565} 566 567#ifdef CHECK_COHERENCE 568// This code will check for cases if the given cache block is exclusive in 569// one node and shared in another-- a coherence violation 570// 571// To use, the SLICC specification must call sequencer.checkCoherence(address) 572// when the controller changes to a state with new permissions. Do this 573// in setState. The SLICC spec must also define methods "isBlockShared" 574// and "isBlockExclusive" that are specific to that protocol 575// 576void 577RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 578{ 579#if 0 580 NodeID exclusive = -1; 581 bool sharedDetected = false; 582 NodeID lastShared = -1; 583 584 for (int i = 0; i < m_chip_vector.size(); i++) { 585 if (m_chip_vector[i]->isBlockExclusive(addr)) { 586 if (exclusive != -1) { 587 // coherence violation 588 WARN_EXPR(exclusive); 589 WARN_EXPR(m_chip_vector[i]->getID()); 590 WARN_EXPR(addr); 591 WARN_EXPR(getTime()); 592 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 593 } else if (sharedDetected) { 594 WARN_EXPR(lastShared); 595 WARN_EXPR(m_chip_vector[i]->getID()); 596 WARN_EXPR(addr); 597 WARN_EXPR(getTime()); 598 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 599 } else { 600 exclusive = m_chip_vector[i]->getID(); 601 } 602 } else if (m_chip_vector[i]->isBlockShared(addr)) { 603 sharedDetected = true; 604 lastShared = m_chip_vector[i]->getID(); 605 606 if (exclusive != -1) { 607 WARN_EXPR(lastShared); 608 WARN_EXPR(exclusive); 609 WARN_EXPR(addr); 610 WARN_EXPR(getTime()); 611 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 612 } 613 } 614 } 615#endif 616} 617#endif 618 619RubySystem * 620RubySystemParams::create() 621{ 622 return new RubySystem(this); 623} 624 625/** 626 * virtual process function that is invoked when the callback 627 * queue is executed. 628 */ 629void 630RubyDumpStatsCallback::process() 631{ 632 ruby_system->printStats(*os); 633} 634