RubySystem.cc revision 9302
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33 34#include "base/intmath.hh" 35#include "base/output.hh" 36#include "debug/RubyCacheTrace.hh" 37#include "debug/RubySystem.hh" 38#include "mem/ruby/common/Address.hh" 39#include "mem/ruby/network/Network.hh" 40#include "mem/ruby/profiler/Profiler.hh" 41#include "mem/ruby/system/System.hh" 42#include "sim/eventq.hh" 43#include "sim/simulate.hh" 44 45using namespace std; 46 47int RubySystem::m_random_seed; 48bool RubySystem::m_randomization; 49int RubySystem::m_block_size_bytes; 50int RubySystem::m_block_size_bits; 51uint64 RubySystem::m_memory_size_bytes; 52int RubySystem::m_memory_size_bits; 53 54RubySystem::RubySystem(const Params *p) 55 : ClockedObject(p) 56{ 57 if (g_system_ptr != NULL) 58 fatal("Only one RubySystem object currently allowed.\n"); 59 60 m_random_seed = p->random_seed; 61 srandom(m_random_seed); 62 m_randomization = p->randomization; 63 64 m_block_size_bytes = p->block_size_bytes; 65 assert(isPowerOf2(m_block_size_bytes)); 66 m_block_size_bits = floorLog2(m_block_size_bytes); 67 68 m_memory_size_bytes = p->mem_size; 69 if (m_memory_size_bytes == 0) { 70 m_memory_size_bits = 0; 71 } else { 72 m_memory_size_bits = floorLog2(m_memory_size_bytes); 73 } 74 75 g_system_ptr = this; 76 if (p->no_mem_vec) { 77 m_mem_vec_ptr = NULL; 78 } else { 79 m_mem_vec_ptr = new MemoryVector; 80 m_mem_vec_ptr->resize(m_memory_size_bytes); 81 } 82 83 // Print ruby configuration and stats at exit 84 registerExitCallback(new RubyExitCallback(p->stats_filename, this)); 85 86 m_warmup_enabled = false; 87 m_cooldown_enabled = false; 88} 89 90void 91RubySystem::init() 92{ 93 m_profiler_ptr->clearStats(); 94} 95 96void 97RubySystem::registerNetwork(Network* network_ptr) 98{ 99 m_network_ptr = network_ptr; 100} 101 102void 103RubySystem::registerProfiler(Profiler* profiler_ptr) 104{ 105 m_profiler_ptr = profiler_ptr; 106} 107 108void 109RubySystem::registerAbstractController(AbstractController* cntrl) 110{ 111 m_abs_cntrl_vec.push_back(cntrl); 112} 113 114void 115RubySystem::registerSparseMemory(SparseMemory* s) 116{ 117 m_sparse_memory_vector.push_back(s); 118} 119 120void 121RubySystem::registerMemController(MemoryControl *mc) { 122 m_memory_controller_vec.push_back(mc); 123} 124 125RubySystem::~RubySystem() 126{ 127 delete m_network_ptr; 128 delete m_profiler_ptr; 129 if (m_mem_vec_ptr) 130 delete m_mem_vec_ptr; 131} 132 133void 134RubySystem::printStats(ostream& out) 135{ 136 const time_t T = time(NULL); 137 tm *localTime = localtime(&T); 138 char buf[100]; 139 strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime); 140 141 out << "Real time: " << buf << endl; 142 143 m_profiler_ptr->printStats(out); 144 m_network_ptr->printStats(out); 145} 146 147void 148RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 149 uint64 uncompressed_trace_size) 150{ 151 // Create the checkpoint file for the memory 152 string thefile = Checkpoint::dir() + "/" + filename.c_str(); 153 154 int fd = creat(thefile.c_str(), 0664); 155 if (fd < 0) { 156 perror("creat"); 157 fatal("Can't open memory trace file '%s'\n", filename); 158 } 159 160 gzFile compressedMemory = gzdopen(fd, "wb"); 161 if (compressedMemory == NULL) 162 fatal("Insufficient memory to allocate compression state for %s\n", 163 filename); 164 165 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 166 uncompressed_trace_size) { 167 fatal("Write failed on memory trace file '%s'\n", filename); 168 } 169 170 if (gzclose(compressedMemory)) { 171 fatal("Close failed on memory trace file '%s'\n", filename); 172 } 173 delete raw_data; 174} 175 176void 177RubySystem::serialize(std::ostream &os) 178{ 179 m_cooldown_enabled = true; 180 181 vector<Sequencer*> sequencer_map; 182 Sequencer* sequencer_ptr = NULL; 183 int cntrl_id = -1; 184 185 186 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 187 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 188 if (sequencer_ptr == NULL) { 189 sequencer_ptr = sequencer_map[cntrl]; 190 cntrl_id = cntrl; 191 } 192 } 193 194 assert(sequencer_ptr != NULL); 195 196 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 197 if (sequencer_map[cntrl] == NULL) { 198 sequencer_map[cntrl] = sequencer_ptr; 199 } 200 } 201 202 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 203 // Create the CacheRecorder and record the cache trace 204 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map); 205 206 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 207 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 208 } 209 210 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 211 // save the current tick value 212 Tick curtick_original = curTick(); 213 // save the event queue head 214 Event* eventq_head = eventq->replaceHead(NULL); 215 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n", 216 curtick_original); 217 218 // Schedule an event to start cache cooldown 219 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 220 enqueueRubyEvent(curTick()); 221 simulate(); 222 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 223 224 // Restore eventq head 225 eventq_head = eventq->replaceHead(eventq_head); 226 // Restore curTick 227 curTick(curtick_original); 228 229 uint8_t *raw_data = NULL; 230 231 if (m_mem_vec_ptr != NULL) { 232 uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data); 233 234 string memory_trace_file = name() + ".memory.gz"; 235 writeCompressedTrace(raw_data, memory_trace_file, 236 memory_trace_size); 237 238 SERIALIZE_SCALAR(memory_trace_file); 239 SERIALIZE_SCALAR(memory_trace_size); 240 241 } else { 242 for (int i = 0; i < m_sparse_memory_vector.size(); ++i) { 243 m_sparse_memory_vector[i]->recordBlocks(cntrl_id, 244 m_cache_recorder); 245 } 246 } 247 248 // Aggergate the trace entries together into a single array 249 raw_data = new uint8_t[4096]; 250 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 251 4096); 252 string cache_trace_file = name() + ".cache.gz"; 253 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 254 255 SERIALIZE_SCALAR(cache_trace_file); 256 SERIALIZE_SCALAR(cache_trace_size); 257 258 m_cooldown_enabled = false; 259} 260 261void 262RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 263 uint64& uncompressed_trace_size) 264{ 265 // Read the trace file 266 gzFile compressedTrace; 267 268 // trace file 269 int fd = open(filename.c_str(), O_RDONLY); 270 if (fd < 0) { 271 perror("open"); 272 fatal("Unable to open trace file %s", filename); 273 } 274 275 compressedTrace = gzdopen(fd, "rb"); 276 if (compressedTrace == NULL) { 277 fatal("Insufficient memory to allocate compression state for %s\n", 278 filename); 279 } 280 281 raw_data = new uint8_t[uncompressed_trace_size]; 282 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 283 uncompressed_trace_size) { 284 fatal("Unable to read complete trace from file %s\n", filename); 285 } 286 287 if (gzclose(compressedTrace)) { 288 fatal("Failed to close cache trace file '%s'\n", filename); 289 } 290} 291 292void 293RubySystem::unserialize(Checkpoint *cp, const string §ion) 294{ 295 // 296 // The main purpose for clearing stats in the unserialize process is so 297 // that the profiler can correctly set its start time to the unserialized 298 // value of curTick() 299 // 300 clearStats(); 301 uint8_t *uncompressed_trace = NULL; 302 303 if (m_mem_vec_ptr != NULL) { 304 string memory_trace_file; 305 uint64 memory_trace_size = 0; 306 307 UNSERIALIZE_SCALAR(memory_trace_file); 308 UNSERIALIZE_SCALAR(memory_trace_size); 309 memory_trace_file = cp->cptDir + "/" + memory_trace_file; 310 311 readCompressedTrace(memory_trace_file, uncompressed_trace, 312 memory_trace_size); 313 m_mem_vec_ptr->populatePages(uncompressed_trace); 314 315 delete uncompressed_trace; 316 uncompressed_trace = NULL; 317 } 318 319 string cache_trace_file; 320 uint64 cache_trace_size = 0; 321 322 UNSERIALIZE_SCALAR(cache_trace_file); 323 UNSERIALIZE_SCALAR(cache_trace_size); 324 cache_trace_file = cp->cptDir + "/" + cache_trace_file; 325 326 readCompressedTrace(cache_trace_file, uncompressed_trace, 327 cache_trace_size); 328 m_warmup_enabled = true; 329 330 vector<Sequencer*> sequencer_map; 331 Sequencer* t = NULL; 332 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 333 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 334 if (t == NULL) t = sequencer_map[cntrl]; 335 } 336 337 assert(t != NULL); 338 339 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 340 if (sequencer_map[cntrl] == NULL) { 341 sequencer_map[cntrl] = t; 342 } 343 } 344 345 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 346 sequencer_map); 347} 348 349void 350RubySystem::startup() 351{ 352 if (m_warmup_enabled) { 353 // save the current tick value 354 Tick curtick_original = curTick(); 355 // save the event queue head 356 Event* eventq_head = eventq->replaceHead(NULL); 357 // set curTick to 0 and reset Ruby System's clock 358 curTick(0); 359 resetClock(); 360 361 // Schedule an event to start cache warmup 362 enqueueRubyEvent(curTick()); 363 simulate(); 364 365 delete m_cache_recorder; 366 m_cache_recorder = NULL; 367 m_warmup_enabled = false; 368 369 // reset DRAM so that it's not waiting for events on the old event 370 // queue 371 for (int i = 0; i < m_memory_controller_vec.size(); ++i) { 372 m_memory_controller_vec[i]->reset(); 373 } 374 375 // Restore eventq head 376 eventq_head = eventq->replaceHead(eventq_head); 377 // Restore curTick and Ruby System's clock 378 curTick(curtick_original); 379 resetClock(); 380 } 381} 382 383void 384RubySystem::RubyEvent::process() 385{ 386 if (ruby_system->m_warmup_enabled) { 387 ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 388 } else if (ruby_system->m_cooldown_enabled) { 389 ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 390 } 391} 392 393void 394RubySystem::clearStats() const 395{ 396 m_profiler_ptr->clearStats(); 397 m_network_ptr->clearStats(); 398} 399 400bool 401RubySystem::functionalRead(PacketPtr pkt) 402{ 403 Address address(pkt->getAddr()); 404 Address line_address(address); 405 line_address.makeLineAddress(); 406 407 AccessPermission access_perm = AccessPermission_NotPresent; 408 int num_controllers = m_abs_cntrl_vec.size(); 409 410 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 411 412 unsigned int num_ro = 0; 413 unsigned int num_rw = 0; 414 unsigned int num_busy = 0; 415 unsigned int num_backing_store = 0; 416 unsigned int num_invalid = 0; 417 418 // In this loop we count the number of controllers that have the given 419 // address in read only, read write and busy states. 420 for (unsigned int i = 0; i < num_controllers; ++i) { 421 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 422 if (access_perm == AccessPermission_Read_Only) 423 num_ro++; 424 else if (access_perm == AccessPermission_Read_Write) 425 num_rw++; 426 else if (access_perm == AccessPermission_Busy) 427 num_busy++; 428 else if (access_perm == AccessPermission_Backing_Store) 429 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 430 // to represent blocks in memory *for Broadcast/Snooping protocols*, 431 // where memory has no idea whether it has an exclusive copy of data 432 // or not. 433 num_backing_store++; 434 else if (access_perm == AccessPermission_Invalid || 435 access_perm == AccessPermission_NotPresent) 436 num_invalid++; 437 } 438 assert(num_rw <= 1); 439 440 uint8_t *data = pkt->getPtr<uint8_t>(true); 441 unsigned int size_in_bytes = pkt->getSize(); 442 unsigned startByte = address.getAddress() - line_address.getAddress(); 443 444 // This if case is meant to capture what happens in a Broadcast/Snoop 445 // protocol where the block does not exist in the cache hierarchy. You 446 // only want to read from the Backing_Store memory if there is no copy in 447 // the cache hierarchy, otherwise you want to try to read the RO or RW 448 // copies existing in the cache hierarchy (covered by the else statement). 449 // The reason is because the Backing_Store memory could easily be stale, if 450 // there are copies floating around the cache hierarchy, so you want to read 451 // it only if it's not in the cache hierarchy at all. 452 if (num_invalid == (num_controllers - 1) && 453 num_backing_store == 1) { 454 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 455 for (unsigned int i = 0; i < num_controllers; ++i) { 456 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 457 if (access_perm == AccessPermission_Backing_Store) { 458 DataBlock& block = m_abs_cntrl_vec[i]-> 459 getDataBlock(line_address); 460 461 DPRINTF(RubySystem, "reading from %s block %s\n", 462 m_abs_cntrl_vec[i]->name(), block); 463 for (unsigned i = 0; i < size_in_bytes; ++i) { 464 data[i] = block.getByte(i + startByte); 465 } 466 return true; 467 } 468 } 469 } else if (num_ro > 0 || num_rw == 1) { 470 // In Broadcast/Snoop protocols, this covers if you know the block 471 // exists somewhere in the caching hierarchy, then you want to read any 472 // valid RO or RW block. In directory protocols, same thing, you want 473 // to read any valid readable copy of the block. 474 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 475 num_busy, num_ro, num_rw); 476 // In this loop, we try to figure which controller has a read only or 477 // a read write copy of the given address. Any valid copy would suffice 478 // for a functional read. 479 for (unsigned int i = 0;i < num_controllers;++i) { 480 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 481 if (access_perm == AccessPermission_Read_Only || 482 access_perm == AccessPermission_Read_Write) { 483 DataBlock& block = m_abs_cntrl_vec[i]-> 484 getDataBlock(line_address); 485 486 DPRINTF(RubySystem, "reading from %s block %s\n", 487 m_abs_cntrl_vec[i]->name(), block); 488 for (unsigned i = 0; i < size_in_bytes; ++i) { 489 data[i] = block.getByte(i + startByte); 490 } 491 return true; 492 } 493 } 494 } 495 496 // Since we are here, this means that none of the controllers hold this 497 // address in a stable/base state. The function searches through all the 498 // buffers that exist in different cache, directory and memory 499 // controllers, and in the network components and reads the data portion 500 // of the first message that holds address specified in the packet. 501 for (unsigned int i = 0; i < num_controllers;++i) { 502 if (m_abs_cntrl_vec[i]->functionalReadBuffers(pkt)) { 503 return true; 504 } 505 } 506 507 for (unsigned int i = 0; i < m_memory_controller_vec.size(); ++i) { 508 if (m_memory_controller_vec[i]->functionalReadBuffers(pkt)) { 509 return true; 510 } 511 } 512 513 if (m_network_ptr->functionalRead(pkt)) { 514 return true; 515 } 516 return false; 517} 518 519// The function searches through all the buffers that exist in different 520// cache, directory and memory controllers, and in the network components 521// and writes the data portion of those that hold the address specified 522// in the packet. 523bool 524RubySystem::functionalWrite(PacketPtr pkt) 525{ 526 Address addr(pkt->getAddr()); 527 Address line_addr = line_address(addr); 528 AccessPermission access_perm = AccessPermission_NotPresent; 529 int num_controllers = m_abs_cntrl_vec.size(); 530 531 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 532 533 uint8_t *data = pkt->getPtr<uint8_t>(true); 534 unsigned int size_in_bytes = pkt->getSize(); 535 unsigned startByte = addr.getAddress() - line_addr.getAddress(); 536 537 for (unsigned int i = 0; i < num_controllers;++i) { 538 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); 539 540 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 541 if (access_perm != AccessPermission_Invalid && 542 access_perm != AccessPermission_NotPresent) { 543 544 DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr); 545 DPRINTF(RubySystem, "%s\n",block); 546 for (unsigned i = 0; i < size_in_bytes; ++i) { 547 block.setByte(i + startByte, data[i]); 548 } 549 DPRINTF(RubySystem, "%s\n",block); 550 } 551 } 552 553 uint32_t M5_VAR_USED num_functional_writes = 0; 554 for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) { 555 num_functional_writes += 556 m_memory_controller_vec[i]->functionalWriteBuffers(pkt); 557 } 558 559 num_functional_writes += m_network_ptr->functionalWrite(pkt); 560 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); 561 562 return true; 563} 564 565#ifdef CHECK_COHERENCE 566// This code will check for cases if the given cache block is exclusive in 567// one node and shared in another-- a coherence violation 568// 569// To use, the SLICC specification must call sequencer.checkCoherence(address) 570// when the controller changes to a state with new permissions. Do this 571// in setState. The SLICC spec must also define methods "isBlockShared" 572// and "isBlockExclusive" that are specific to that protocol 573// 574void 575RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 576{ 577#if 0 578 NodeID exclusive = -1; 579 bool sharedDetected = false; 580 NodeID lastShared = -1; 581 582 for (int i = 0; i < m_chip_vector.size(); i++) { 583 if (m_chip_vector[i]->isBlockExclusive(addr)) { 584 if (exclusive != -1) { 585 // coherence violation 586 WARN_EXPR(exclusive); 587 WARN_EXPR(m_chip_vector[i]->getID()); 588 WARN_EXPR(addr); 589 WARN_EXPR(getTime()); 590 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 591 } else if (sharedDetected) { 592 WARN_EXPR(lastShared); 593 WARN_EXPR(m_chip_vector[i]->getID()); 594 WARN_EXPR(addr); 595 WARN_EXPR(getTime()); 596 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 597 } else { 598 exclusive = m_chip_vector[i]->getID(); 599 } 600 } else if (m_chip_vector[i]->isBlockShared(addr)) { 601 sharedDetected = true; 602 lastShared = m_chip_vector[i]->getID(); 603 604 if (exclusive != -1) { 605 WARN_EXPR(lastShared); 606 WARN_EXPR(exclusive); 607 WARN_EXPR(addr); 608 WARN_EXPR(getTime()); 609 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 610 } 611 } 612 } 613#endif 614} 615#endif 616 617RubySystem * 618RubySystemParams::create() 619{ 620 return new RubySystem(this); 621} 622 623/** 624 * virtual process function that is invoked when the callback 625 * queue is executed. 626 */ 627void 628RubyExitCallback::process() 629{ 630 std::ostream *os = simout.create(stats_filename); 631 ruby_system->printStats(*os); 632} 633