RubySystem.cc revision 9300
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33 34#include "base/intmath.hh" 35#include "base/output.hh" 36#include "debug/RubyCacheTrace.hh" 37#include "debug/RubySystem.hh" 38#include "mem/ruby/common/Address.hh" 39#include "mem/ruby/network/Network.hh" 40#include "mem/ruby/profiler/Profiler.hh" 41#include "mem/ruby/system/System.hh" 42#include "sim/eventq.hh" 43#include "sim/simulate.hh" 44 45using namespace std; 46 47int RubySystem::m_random_seed; 48bool RubySystem::m_randomization; 49int RubySystem::m_block_size_bytes; 50int RubySystem::m_block_size_bits; 51uint64 RubySystem::m_memory_size_bytes; 52int RubySystem::m_memory_size_bits; 53 54RubySystem::RubySystem(const Params *p) 55 : ClockedObject(p) 56{ 57 if (g_system_ptr != NULL) 58 fatal("Only one RubySystem object currently allowed.\n"); 59 60 m_random_seed = p->random_seed; 61 srandom(m_random_seed); 62 m_randomization = p->randomization; 63 64 m_block_size_bytes = p->block_size_bytes; 65 assert(isPowerOf2(m_block_size_bytes)); 66 m_block_size_bits = floorLog2(m_block_size_bytes); 67 68 m_memory_size_bytes = p->mem_size; 69 if (m_memory_size_bytes == 0) { 70 m_memory_size_bits = 0; 71 } else { 72 m_memory_size_bits = floorLog2(m_memory_size_bytes); 73 } 74 75 g_system_ptr = this; 76 if (p->no_mem_vec) { 77 m_mem_vec_ptr = NULL; 78 } else { 79 m_mem_vec_ptr = new MemoryVector; 80 m_mem_vec_ptr->resize(m_memory_size_bytes); 81 } 82 83 // Print ruby configuration and stats at exit 84 registerExitCallback(new RubyExitCallback(p->stats_filename, this)); 85 86 m_warmup_enabled = false; 87 m_cooldown_enabled = false; 88} 89 90void 91RubySystem::init() 92{ 93 m_profiler_ptr->clearStats(); 94} 95 96void 97RubySystem::registerNetwork(Network* network_ptr) 98{ 99 m_network_ptr = network_ptr; 100} 101 102void 103RubySystem::registerProfiler(Profiler* profiler_ptr) 104{ 105 m_profiler_ptr = profiler_ptr; 106} 107 108void 109RubySystem::registerAbstractController(AbstractController* cntrl) 110{ 111 m_abs_cntrl_vec.push_back(cntrl); 112} 113 114void 115RubySystem::registerSparseMemory(SparseMemory* s) 116{ 117 m_sparse_memory_vector.push_back(s); 118} 119 120void 121RubySystem::registerMemController(MemoryControl *mc) { 122 m_memory_controller_vec.push_back(mc); 123} 124 125RubySystem::~RubySystem() 126{ 127 delete m_network_ptr; 128 delete m_profiler_ptr; 129 if (m_mem_vec_ptr) 130 delete m_mem_vec_ptr; 131} 132 133void 134RubySystem::printStats(ostream& out) 135{ 136 const time_t T = time(NULL); 137 tm *localTime = localtime(&T); 138 char buf[100]; 139 strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime); 140 141 out << "Real time: " << buf << endl; 142 143 m_profiler_ptr->printStats(out); 144 m_network_ptr->printStats(out); 145} 146 147void 148RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 149 uint64 uncompressed_trace_size) 150{ 151 // Create the checkpoint file for the memory 152 string thefile = Checkpoint::dir() + "/" + filename.c_str(); 153 154 int fd = creat(thefile.c_str(), 0664); 155 if (fd < 0) { 156 perror("creat"); 157 fatal("Can't open memory trace file '%s'\n", filename); 158 } 159 160 gzFile compressedMemory = gzdopen(fd, "wb"); 161 if (compressedMemory == NULL) 162 fatal("Insufficient memory to allocate compression state for %s\n", 163 filename); 164 165 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 166 uncompressed_trace_size) { 167 fatal("Write failed on memory trace file '%s'\n", filename); 168 } 169 170 if (gzclose(compressedMemory)) { 171 fatal("Close failed on memory trace file '%s'\n", filename); 172 } 173 delete raw_data; 174} 175 176void 177RubySystem::serialize(std::ostream &os) 178{ 179 m_cooldown_enabled = true; 180 181 vector<Sequencer*> sequencer_map; 182 Sequencer* sequencer_ptr = NULL; 183 int cntrl_id = -1; 184 185 186 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 187 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 188 if (sequencer_ptr == NULL) { 189 sequencer_ptr = sequencer_map[cntrl]; 190 cntrl_id = cntrl; 191 } 192 } 193 194 assert(sequencer_ptr != NULL); 195 196 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 197 if (sequencer_map[cntrl] == NULL) { 198 sequencer_map[cntrl] = sequencer_ptr; 199 } 200 } 201 202 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 203 // Create the CacheRecorder and record the cache trace 204 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map); 205 206 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 207 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 208 } 209 210 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 211 // save the current tick value 212 Tick curtick_original = curTick(); 213 // save the event queue head 214 Event* eventq_head = eventq->replaceHead(NULL); 215 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n", 216 curtick_original); 217 218 // Schedule an event to start cache cooldown 219 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 220 enqueueRubyEvent(curTick()); 221 simulate(); 222 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 223 224 // Restore eventq head 225 eventq_head = eventq->replaceHead(eventq_head); 226 // Restore curTick 227 curTick(curtick_original); 228 229 uint8_t *raw_data = NULL; 230 231 if (m_mem_vec_ptr != NULL) { 232 uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data); 233 234 string memory_trace_file = name() + ".memory.gz"; 235 writeCompressedTrace(raw_data, memory_trace_file, 236 memory_trace_size); 237 238 SERIALIZE_SCALAR(memory_trace_file); 239 SERIALIZE_SCALAR(memory_trace_size); 240 241 } else { 242 for (int i = 0; i < m_sparse_memory_vector.size(); ++i) { 243 m_sparse_memory_vector[i]->recordBlocks(cntrl_id, 244 m_cache_recorder); 245 } 246 } 247 248 // Aggergate the trace entries together into a single array 249 raw_data = new uint8_t[4096]; 250 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 251 4096); 252 string cache_trace_file = name() + ".cache.gz"; 253 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 254 255 SERIALIZE_SCALAR(cache_trace_file); 256 SERIALIZE_SCALAR(cache_trace_size); 257 258 m_cooldown_enabled = false; 259} 260 261void 262RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 263 uint64& uncompressed_trace_size) 264{ 265 // Read the trace file 266 gzFile compressedTrace; 267 268 // trace file 269 int fd = open(filename.c_str(), O_RDONLY); 270 if (fd < 0) { 271 perror("open"); 272 fatal("Unable to open trace file %s", filename); 273 } 274 275 compressedTrace = gzdopen(fd, "rb"); 276 if (compressedTrace == NULL) { 277 fatal("Insufficient memory to allocate compression state for %s\n", 278 filename); 279 } 280 281 raw_data = new uint8_t[uncompressed_trace_size]; 282 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 283 uncompressed_trace_size) { 284 fatal("Unable to read complete trace from file %s\n", filename); 285 } 286 287 if (gzclose(compressedTrace)) { 288 fatal("Failed to close cache trace file '%s'\n", filename); 289 } 290} 291 292void 293RubySystem::unserialize(Checkpoint *cp, const string §ion) 294{ 295 // 296 // The main purpose for clearing stats in the unserialize process is so 297 // that the profiler can correctly set its start time to the unserialized 298 // value of curTick() 299 // 300 clearStats(); 301 uint8_t *uncompressed_trace = NULL; 302 303 if (m_mem_vec_ptr != NULL) { 304 string memory_trace_file; 305 uint64 memory_trace_size = 0; 306 307 UNSERIALIZE_SCALAR(memory_trace_file); 308 UNSERIALIZE_SCALAR(memory_trace_size); 309 memory_trace_file = cp->cptDir + "/" + memory_trace_file; 310 311 readCompressedTrace(memory_trace_file, uncompressed_trace, 312 memory_trace_size); 313 m_mem_vec_ptr->populatePages(uncompressed_trace); 314 315 delete uncompressed_trace; 316 uncompressed_trace = NULL; 317 } 318 319 string cache_trace_file; 320 uint64 cache_trace_size = 0; 321 322 UNSERIALIZE_SCALAR(cache_trace_file); 323 UNSERIALIZE_SCALAR(cache_trace_size); 324 cache_trace_file = cp->cptDir + "/" + cache_trace_file; 325 326 readCompressedTrace(cache_trace_file, uncompressed_trace, 327 cache_trace_size); 328 m_warmup_enabled = true; 329 330 vector<Sequencer*> sequencer_map; 331 Sequencer* t = NULL; 332 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 333 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 334 if (t == NULL) t = sequencer_map[cntrl]; 335 } 336 337 assert(t != NULL); 338 339 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 340 if (sequencer_map[cntrl] == NULL) { 341 sequencer_map[cntrl] = t; 342 } 343 } 344 345 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 346 sequencer_map); 347} 348 349void 350RubySystem::startup() 351{ 352 if (m_warmup_enabled) { 353 // save the current tick value 354 Tick curtick_original = curTick(); 355 // save the event queue head 356 Event* eventq_head = eventq->replaceHead(NULL); 357 // set curTick to 0 and reset Ruby System's clock 358 curTick(0); 359 resetClock(); 360 361 // Schedule an event to start cache warmup 362 enqueueRubyEvent(curTick()); 363 simulate(); 364 365 delete m_cache_recorder; 366 m_cache_recorder = NULL; 367 m_warmup_enabled = false; 368 369 // reset DRAM so that it's not waiting for events on the old event 370 // queue 371 for (int i = 0; i < m_memory_controller_vec.size(); ++i) { 372 m_memory_controller_vec[i]->reset(); 373 } 374 375 // Restore eventq head 376 eventq_head = eventq->replaceHead(eventq_head); 377 // Restore curTick and Ruby System's clock 378 curTick(curtick_original); 379 resetClock(); 380 } 381} 382 383void 384RubySystem::RubyEvent::process() 385{ 386 if (ruby_system->m_warmup_enabled) { 387 ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 388 } else if (ruby_system->m_cooldown_enabled) { 389 ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 390 } 391} 392 393void 394RubySystem::clearStats() const 395{ 396 m_profiler_ptr->clearStats(); 397 m_network_ptr->clearStats(); 398} 399 400bool 401RubySystem::functionalRead(PacketPtr pkt) 402{ 403 Address address(pkt->getAddr()); 404 Address line_address(address); 405 line_address.makeLineAddress(); 406 407 AccessPermission access_perm = AccessPermission_NotPresent; 408 int num_controllers = m_abs_cntrl_vec.size(); 409 410 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 411 412 unsigned int num_ro = 0; 413 unsigned int num_rw = 0; 414 unsigned int num_busy = 0; 415 unsigned int num_backing_store = 0; 416 unsigned int num_invalid = 0; 417 418 // In this loop we count the number of controllers that have the given 419 // address in read only, read write and busy states. 420 for (int i = 0; i < num_controllers; ++i) { 421 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 422 if (access_perm == AccessPermission_Read_Only) 423 num_ro++; 424 else if (access_perm == AccessPermission_Read_Write) 425 num_rw++; 426 else if (access_perm == AccessPermission_Busy) 427 num_busy++; 428 else if (access_perm == AccessPermission_Backing_Store) 429 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 430 // to represent blocks in memory *for Broadcast/Snooping protocols*, 431 // where memory has no idea whether it has an exclusive copy of data 432 // or not. 433 num_backing_store++; 434 else if (access_perm == AccessPermission_Invalid || 435 access_perm == AccessPermission_NotPresent) 436 num_invalid++; 437 } 438 assert(num_rw <= 1); 439 440 uint8_t *data = pkt->getPtr<uint8_t>(true); 441 unsigned int size_in_bytes = pkt->getSize(); 442 unsigned startByte = address.getAddress() - line_address.getAddress(); 443 444 // This if case is meant to capture what happens in a Broadcast/Snoop 445 // protocol where the block does not exist in the cache hierarchy. You 446 // only want to read from the Backing_Store memory if there is no copy in 447 // the cache hierarchy, otherwise you want to try to read the RO or RW 448 // copies existing in the cache hierarchy (covered by the else statement). 449 // The reason is because the Backing_Store memory could easily be stale, if 450 // there are copies floating around the cache hierarchy, so you want to read 451 // it only if it's not in the cache hierarchy at all. 452 if (num_invalid == (num_controllers - 1) && 453 num_backing_store == 1) { 454 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 455 for (int i = 0; i < num_controllers; ++i) { 456 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 457 if (access_perm == AccessPermission_Backing_Store) { 458 DataBlock& block = m_abs_cntrl_vec[i]-> 459 getDataBlock(line_address); 460 461 DPRINTF(RubySystem, "reading from %s block %s\n", 462 m_abs_cntrl_vec[i]->name(), block); 463 for (unsigned i = 0; i < size_in_bytes; ++i) { 464 data[i] = block.getByte(i + startByte); 465 } 466 return true; 467 } 468 } 469 } else { 470 // In Broadcast/Snoop protocols, this covers if you know the block 471 // exists somewhere in the caching hierarchy, then you want to read any 472 // valid RO or RW block. In directory protocols, same thing, you want 473 // to read any valid readable copy of the block. 474 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 475 num_busy, num_ro, num_rw); 476 // In this loop, we try to figure which controller has a read only or 477 // a read write copy of the given address. Any valid copy would suffice 478 // for a functional read. 479 for (int i = 0;i < num_controllers;++i) { 480 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 481 if (access_perm == AccessPermission_Read_Only || 482 access_perm == AccessPermission_Read_Write) { 483 DataBlock& block = m_abs_cntrl_vec[i]-> 484 getDataBlock(line_address); 485 486 DPRINTF(RubySystem, "reading from %s block %s\n", 487 m_abs_cntrl_vec[i]->name(), block); 488 for (unsigned i = 0; i < size_in_bytes; ++i) { 489 data[i] = block.getByte(i + startByte); 490 } 491 return true; 492 } 493 } 494 } 495 return false; 496} 497 498bool 499RubySystem::functionalWrite(PacketPtr pkt) 500{ 501 Address addr(pkt->getAddr()); 502 Address line_addr = line_address(addr); 503 AccessPermission access_perm = AccessPermission_NotPresent; 504 int num_controllers = m_abs_cntrl_vec.size(); 505 506 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 507 508 unsigned int num_ro = 0; 509 unsigned int num_rw = 0; 510 unsigned int num_busy = 0; 511 unsigned int num_backing_store = 0; 512 unsigned int num_invalid = 0; 513 514 // In this loop we count the number of controllers that have the given 515 // address in read only, read write and busy states. 516 for (int i = 0;i < num_controllers;++i) { 517 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 518 if (access_perm == AccessPermission_Read_Only) 519 num_ro++; 520 else if (access_perm == AccessPermission_Read_Write) 521 num_rw++; 522 else if (access_perm == AccessPermission_Busy) 523 num_busy++; 524 else if (access_perm == AccessPermission_Backing_Store) 525 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 526 // to represent blocks in memory *for Broadcast/Snooping protocols*, 527 // where memory has no idea whether it has an exclusive copy of data 528 // or not. 529 num_backing_store++; 530 else if (access_perm == AccessPermission_Invalid || 531 access_perm == AccessPermission_NotPresent) 532 num_invalid++; 533 } 534 535 // If the number of read write copies is more than 1, then there is bug in 536 // coherence protocol. Otherwise, if all copies are in stable states, i.e. 537 // num_busy == 0, we update all the copies. If there is at least one copy 538 // in busy state, then we check if there is read write copy. If yes, then 539 // also we let the access go through. Or, if there is no copy in the cache 540 // hierarchy at all, we still want to do the write to the memory 541 // (Backing_Store) instead of failing. 542 543 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 544 num_busy, num_ro, num_rw); 545 assert(num_rw <= 1); 546 547 uint8_t *data = pkt->getPtr<uint8_t>(true); 548 unsigned int size_in_bytes = pkt->getSize(); 549 unsigned startByte = addr.getAddress() - line_addr.getAddress(); 550 551 if ((num_busy == 0 && num_ro > 0) || num_rw == 1 || 552 (num_invalid == (num_controllers - 1) && num_backing_store == 1)) { 553 for (int i = 0; i < num_controllers;++i) { 554 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 555 if (access_perm == AccessPermission_Read_Only || 556 access_perm == AccessPermission_Read_Write|| 557 access_perm == AccessPermission_Maybe_Stale || 558 access_perm == AccessPermission_Backing_Store) { 559 560 DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr); 561 DPRINTF(RubySystem, "%s\n",block); 562 for (unsigned i = 0; i < size_in_bytes; ++i) { 563 block.setByte(i + startByte, data[i]); 564 } 565 DPRINTF(RubySystem, "%s\n",block); 566 } 567 } 568 return true; 569 } 570 return false; 571} 572 573#ifdef CHECK_COHERENCE 574// This code will check for cases if the given cache block is exclusive in 575// one node and shared in another-- a coherence violation 576// 577// To use, the SLICC specification must call sequencer.checkCoherence(address) 578// when the controller changes to a state with new permissions. Do this 579// in setState. The SLICC spec must also define methods "isBlockShared" 580// and "isBlockExclusive" that are specific to that protocol 581// 582void 583RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 584{ 585#if 0 586 NodeID exclusive = -1; 587 bool sharedDetected = false; 588 NodeID lastShared = -1; 589 590 for (int i = 0; i < m_chip_vector.size(); i++) { 591 if (m_chip_vector[i]->isBlockExclusive(addr)) { 592 if (exclusive != -1) { 593 // coherence violation 594 WARN_EXPR(exclusive); 595 WARN_EXPR(m_chip_vector[i]->getID()); 596 WARN_EXPR(addr); 597 WARN_EXPR(getTime()); 598 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 599 } else if (sharedDetected) { 600 WARN_EXPR(lastShared); 601 WARN_EXPR(m_chip_vector[i]->getID()); 602 WARN_EXPR(addr); 603 WARN_EXPR(getTime()); 604 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 605 } else { 606 exclusive = m_chip_vector[i]->getID(); 607 } 608 } else if (m_chip_vector[i]->isBlockShared(addr)) { 609 sharedDetected = true; 610 lastShared = m_chip_vector[i]->getID(); 611 612 if (exclusive != -1) { 613 WARN_EXPR(lastShared); 614 WARN_EXPR(exclusive); 615 WARN_EXPR(addr); 616 WARN_EXPR(getTime()); 617 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 618 } 619 } 620 } 621#endif 622} 623#endif 624 625RubySystem * 626RubySystemParams::create() 627{ 628 return new RubySystem(this); 629} 630 631/** 632 * virtual process function that is invoked when the callback 633 * queue is executed. 634 */ 635void 636RubyExitCallback::process() 637{ 638 std::ostream *os = simout.create(stats_filename); 639 ruby_system->printStats(*os); 640} 641