RubySystem.cc revision 9572
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33 34#include "base/intmath.hh" 35#include "base/statistics.hh" 36#include "debug/RubyCacheTrace.hh" 37#include "debug/RubySystem.hh" 38#include "mem/ruby/common/Address.hh" 39#include "mem/ruby/network/Network.hh" 40#include "mem/ruby/profiler/Profiler.hh" 41#include "mem/ruby/system/System.hh" 42#include "sim/eventq.hh" 43#include "sim/simulate.hh" 44 45using namespace std; 46 47int RubySystem::m_random_seed; 48bool RubySystem::m_randomization; 49uint32_t RubySystem::m_block_size_bytes; 50uint32_t RubySystem::m_block_size_bits; 51uint64_t RubySystem::m_memory_size_bytes; 52uint32_t RubySystem::m_memory_size_bits; 53 54RubySystem::RubySystem(const Params *p) 55 : ClockedObject(p) 56{ 57 if (g_system_ptr != NULL) 58 fatal("Only one RubySystem object currently allowed.\n"); 59 60 m_random_seed = p->random_seed; 61 srandom(m_random_seed); 62 m_randomization = p->randomization; 63 64 m_block_size_bytes = p->block_size_bytes; 65 assert(isPowerOf2(m_block_size_bytes)); 66 m_block_size_bits = floorLog2(m_block_size_bytes); 67 68 m_memory_size_bytes = p->mem_size; 69 if (m_memory_size_bytes == 0) { 70 m_memory_size_bits = 0; 71 } else { 72 m_memory_size_bits = ceilLog2(m_memory_size_bytes); 73 } 74 75 if (p->no_mem_vec) { 76 m_mem_vec_ptr = NULL; 77 } else { 78 m_mem_vec_ptr = new MemoryVector; 79 m_mem_vec_ptr->resize(m_memory_size_bytes); 80 } 81 82 // Print ruby configuration and stats at exit and when asked for 83 Stats::registerDumpCallback(new RubyDumpStatsCallback(p->stats_filename, 84 this)); 85 86 m_warmup_enabled = false; 87 m_cooldown_enabled = false; 88 89 // Setup the global variables used in Ruby 90 g_system_ptr = this; 91 92 // Resize to the size of different machine types 93 g_abs_controls.resize(MachineType_NUM); 94} 95 96void 97RubySystem::init() 98{ 99 m_profiler_ptr->clearStats(); 100 m_network_ptr->clearStats(); 101} 102 103void 104RubySystem::registerNetwork(Network* network_ptr) 105{ 106 m_network_ptr = network_ptr; 107} 108 109void 110RubySystem::registerProfiler(Profiler* profiler_ptr) 111{ 112 m_profiler_ptr = profiler_ptr; 113} 114 115void 116RubySystem::registerAbstractController(AbstractController* cntrl) 117{ 118 m_abs_cntrl_vec.push_back(cntrl); 119 120 MachineID id = cntrl->getMachineID(); 121 g_abs_controls[id.getType()][id.getNum()] = cntrl; 122} 123 124void 125RubySystem::registerSparseMemory(SparseMemory* s) 126{ 127 m_sparse_memory_vector.push_back(s); 128} 129 130void 131RubySystem::registerMemController(MemoryControl *mc) { 132 m_memory_controller_vec.push_back(mc); 133} 134 135RubySystem::~RubySystem() 136{ 137 delete m_network_ptr; 138 delete m_profiler_ptr; 139 if (m_mem_vec_ptr) 140 delete m_mem_vec_ptr; 141} 142 143void 144RubySystem::printStats(ostream& out) 145{ 146 const time_t T = time(NULL); 147 tm *localTime = localtime(&T); 148 char buf[100]; 149 strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime); 150 151 out << "Real time: " << buf << endl; 152 153 m_profiler_ptr->printStats(out); 154 m_network_ptr->printStats(out); 155 156 for (uint32_t i = 0;i < g_abs_controls.size(); ++i) { 157 for (map<uint32_t, AbstractController *>::iterator it = 158 g_abs_controls[i].begin(); 159 it != g_abs_controls[i].end(); ++it) { 160 161 ((*it).second)->printStats(out); 162 } 163 } 164} 165 166void 167RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 168 uint64 uncompressed_trace_size) 169{ 170 // Create the checkpoint file for the memory 171 string thefile = Checkpoint::dir() + "/" + filename.c_str(); 172 173 int fd = creat(thefile.c_str(), 0664); 174 if (fd < 0) { 175 perror("creat"); 176 fatal("Can't open memory trace file '%s'\n", filename); 177 } 178 179 gzFile compressedMemory = gzdopen(fd, "wb"); 180 if (compressedMemory == NULL) 181 fatal("Insufficient memory to allocate compression state for %s\n", 182 filename); 183 184 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 185 uncompressed_trace_size) { 186 fatal("Write failed on memory trace file '%s'\n", filename); 187 } 188 189 if (gzclose(compressedMemory)) { 190 fatal("Close failed on memory trace file '%s'\n", filename); 191 } 192 delete raw_data; 193} 194 195void 196RubySystem::serialize(std::ostream &os) 197{ 198 m_cooldown_enabled = true; 199 200 vector<Sequencer*> sequencer_map; 201 Sequencer* sequencer_ptr = NULL; 202 int cntrl_id = -1; 203 204 205 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 206 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 207 if (sequencer_ptr == NULL) { 208 sequencer_ptr = sequencer_map[cntrl]; 209 cntrl_id = cntrl; 210 } 211 } 212 213 assert(sequencer_ptr != NULL); 214 215 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 216 if (sequencer_map[cntrl] == NULL) { 217 sequencer_map[cntrl] = sequencer_ptr; 218 } 219 } 220 221 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 222 // Create the CacheRecorder and record the cache trace 223 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map); 224 225 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 226 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 227 } 228 229 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 230 // save the current tick value 231 Tick curtick_original = curTick(); 232 // save the event queue head 233 Event* eventq_head = eventq->replaceHead(NULL); 234 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n", 235 curtick_original); 236 237 // Schedule an event to start cache cooldown 238 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 239 enqueueRubyEvent(curTick()); 240 simulate(); 241 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 242 243 // Restore eventq head 244 eventq_head = eventq->replaceHead(eventq_head); 245 // Restore curTick 246 setCurTick(curtick_original); 247 248 uint8_t *raw_data = NULL; 249 250 if (m_mem_vec_ptr != NULL) { 251 uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data); 252 253 string memory_trace_file = name() + ".memory.gz"; 254 writeCompressedTrace(raw_data, memory_trace_file, 255 memory_trace_size); 256 257 SERIALIZE_SCALAR(memory_trace_file); 258 SERIALIZE_SCALAR(memory_trace_size); 259 260 } else { 261 for (int i = 0; i < m_sparse_memory_vector.size(); ++i) { 262 m_sparse_memory_vector[i]->recordBlocks(cntrl_id, 263 m_cache_recorder); 264 } 265 } 266 267 // Aggergate the trace entries together into a single array 268 raw_data = new uint8_t[4096]; 269 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 270 4096); 271 string cache_trace_file = name() + ".cache.gz"; 272 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 273 274 SERIALIZE_SCALAR(cache_trace_file); 275 SERIALIZE_SCALAR(cache_trace_size); 276 277 m_cooldown_enabled = false; 278} 279 280void 281RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 282 uint64& uncompressed_trace_size) 283{ 284 // Read the trace file 285 gzFile compressedTrace; 286 287 // trace file 288 int fd = open(filename.c_str(), O_RDONLY); 289 if (fd < 0) { 290 perror("open"); 291 fatal("Unable to open trace file %s", filename); 292 } 293 294 compressedTrace = gzdopen(fd, "rb"); 295 if (compressedTrace == NULL) { 296 fatal("Insufficient memory to allocate compression state for %s\n", 297 filename); 298 } 299 300 raw_data = new uint8_t[uncompressed_trace_size]; 301 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 302 uncompressed_trace_size) { 303 fatal("Unable to read complete trace from file %s\n", filename); 304 } 305 306 if (gzclose(compressedTrace)) { 307 fatal("Failed to close cache trace file '%s'\n", filename); 308 } 309} 310 311void 312RubySystem::unserialize(Checkpoint *cp, const string §ion) 313{ 314 // 315 // The main purpose for clearing stats in the unserialize process is so 316 // that the profiler can correctly set its start time to the unserialized 317 // value of curTick() 318 // 319 resetStats(); 320 uint8_t *uncompressed_trace = NULL; 321 322 if (m_mem_vec_ptr != NULL) { 323 string memory_trace_file; 324 uint64 memory_trace_size = 0; 325 326 UNSERIALIZE_SCALAR(memory_trace_file); 327 UNSERIALIZE_SCALAR(memory_trace_size); 328 memory_trace_file = cp->cptDir + "/" + memory_trace_file; 329 330 readCompressedTrace(memory_trace_file, uncompressed_trace, 331 memory_trace_size); 332 m_mem_vec_ptr->populatePages(uncompressed_trace); 333 334 delete uncompressed_trace; 335 uncompressed_trace = NULL; 336 } 337 338 string cache_trace_file; 339 uint64 cache_trace_size = 0; 340 341 UNSERIALIZE_SCALAR(cache_trace_file); 342 UNSERIALIZE_SCALAR(cache_trace_size); 343 cache_trace_file = cp->cptDir + "/" + cache_trace_file; 344 345 readCompressedTrace(cache_trace_file, uncompressed_trace, 346 cache_trace_size); 347 m_warmup_enabled = true; 348 349 vector<Sequencer*> sequencer_map; 350 Sequencer* t = NULL; 351 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 352 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 353 if (t == NULL) t = sequencer_map[cntrl]; 354 } 355 356 assert(t != NULL); 357 358 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 359 if (sequencer_map[cntrl] == NULL) { 360 sequencer_map[cntrl] = t; 361 } 362 } 363 364 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 365 sequencer_map); 366} 367 368void 369RubySystem::startup() 370{ 371 if (m_warmup_enabled) { 372 // save the current tick value 373 Tick curtick_original = curTick(); 374 // save the event queue head 375 Event* eventq_head = eventq->replaceHead(NULL); 376 // set curTick to 0 and reset Ruby System's clock 377 setCurTick(0); 378 resetClock(); 379 380 // Schedule an event to start cache warmup 381 enqueueRubyEvent(curTick()); 382 simulate(); 383 384 delete m_cache_recorder; 385 m_cache_recorder = NULL; 386 m_warmup_enabled = false; 387 388 // reset DRAM so that it's not waiting for events on the old event 389 // queue 390 for (int i = 0; i < m_memory_controller_vec.size(); ++i) { 391 m_memory_controller_vec[i]->reset(); 392 } 393 394 // Restore eventq head 395 eventq_head = eventq->replaceHead(eventq_head); 396 // Restore curTick and Ruby System's clock 397 setCurTick(curtick_original); 398 resetClock(); 399 } 400} 401 402void 403RubySystem::RubyEvent::process() 404{ 405 if (ruby_system->m_warmup_enabled) { 406 ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 407 } else if (ruby_system->m_cooldown_enabled) { 408 ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 409 } 410} 411 412void 413RubySystem::resetStats() 414{ 415 m_profiler_ptr->clearStats(); 416 m_network_ptr->clearStats(); 417 for (uint32_t cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 418 m_abs_cntrl_vec[cntrl]->clearStats(); 419 } 420} 421 422bool 423RubySystem::functionalRead(PacketPtr pkt) 424{ 425 Address address(pkt->getAddr()); 426 Address line_address(address); 427 line_address.makeLineAddress(); 428 429 AccessPermission access_perm = AccessPermission_NotPresent; 430 int num_controllers = m_abs_cntrl_vec.size(); 431 432 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 433 434 unsigned int num_ro = 0; 435 unsigned int num_rw = 0; 436 unsigned int num_busy = 0; 437 unsigned int num_backing_store = 0; 438 unsigned int num_invalid = 0; 439 440 // In this loop we count the number of controllers that have the given 441 // address in read only, read write and busy states. 442 for (unsigned int i = 0; i < num_controllers; ++i) { 443 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 444 if (access_perm == AccessPermission_Read_Only) 445 num_ro++; 446 else if (access_perm == AccessPermission_Read_Write) 447 num_rw++; 448 else if (access_perm == AccessPermission_Busy) 449 num_busy++; 450 else if (access_perm == AccessPermission_Backing_Store) 451 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 452 // to represent blocks in memory *for Broadcast/Snooping protocols*, 453 // where memory has no idea whether it has an exclusive copy of data 454 // or not. 455 num_backing_store++; 456 else if (access_perm == AccessPermission_Invalid || 457 access_perm == AccessPermission_NotPresent) 458 num_invalid++; 459 } 460 assert(num_rw <= 1); 461 462 uint8_t *data = pkt->getPtr<uint8_t>(true); 463 unsigned int size_in_bytes = pkt->getSize(); 464 unsigned startByte = address.getAddress() - line_address.getAddress(); 465 466 // This if case is meant to capture what happens in a Broadcast/Snoop 467 // protocol where the block does not exist in the cache hierarchy. You 468 // only want to read from the Backing_Store memory if there is no copy in 469 // the cache hierarchy, otherwise you want to try to read the RO or RW 470 // copies existing in the cache hierarchy (covered by the else statement). 471 // The reason is because the Backing_Store memory could easily be stale, if 472 // there are copies floating around the cache hierarchy, so you want to read 473 // it only if it's not in the cache hierarchy at all. 474 if (num_invalid == (num_controllers - 1) && 475 num_backing_store == 1) { 476 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 477 for (unsigned int i = 0; i < num_controllers; ++i) { 478 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 479 if (access_perm == AccessPermission_Backing_Store) { 480 DataBlock& block = m_abs_cntrl_vec[i]-> 481 getDataBlock(line_address); 482 483 DPRINTF(RubySystem, "reading from %s block %s\n", 484 m_abs_cntrl_vec[i]->name(), block); 485 for (unsigned j = 0; j < size_in_bytes; ++j) { 486 data[j] = block.getByte(j + startByte); 487 } 488 return true; 489 } 490 } 491 } else if (num_ro > 0 || num_rw == 1) { 492 // In Broadcast/Snoop protocols, this covers if you know the block 493 // exists somewhere in the caching hierarchy, then you want to read any 494 // valid RO or RW block. In directory protocols, same thing, you want 495 // to read any valid readable copy of the block. 496 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 497 num_busy, num_ro, num_rw); 498 // In this loop, we try to figure which controller has a read only or 499 // a read write copy of the given address. Any valid copy would suffice 500 // for a functional read. 501 for (unsigned int i = 0;i < num_controllers;++i) { 502 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 503 if (access_perm == AccessPermission_Read_Only || 504 access_perm == AccessPermission_Read_Write) { 505 DataBlock& block = m_abs_cntrl_vec[i]-> 506 getDataBlock(line_address); 507 508 DPRINTF(RubySystem, "reading from %s block %s\n", 509 m_abs_cntrl_vec[i]->name(), block); 510 for (unsigned j = 0; j < size_in_bytes; ++j) { 511 data[j] = block.getByte(j + startByte); 512 } 513 return true; 514 } 515 } 516 } 517 518 return false; 519} 520 521// The function searches through all the buffers that exist in different 522// cache, directory and memory controllers, and in the network components 523// and writes the data portion of those that hold the address specified 524// in the packet. 525bool 526RubySystem::functionalWrite(PacketPtr pkt) 527{ 528 Address addr(pkt->getAddr()); 529 Address line_addr = line_address(addr); 530 AccessPermission access_perm = AccessPermission_NotPresent; 531 int num_controllers = m_abs_cntrl_vec.size(); 532 533 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 534 535 uint8_t *data = pkt->getPtr<uint8_t>(true); 536 unsigned int size_in_bytes = pkt->getSize(); 537 unsigned startByte = addr.getAddress() - line_addr.getAddress(); 538 539 uint32_t M5_VAR_USED num_functional_writes = 0; 540 541 for (unsigned int i = 0; i < num_controllers;++i) { 542 num_functional_writes += 543 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); 544 545 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 546 if (access_perm != AccessPermission_Invalid && 547 access_perm != AccessPermission_NotPresent) { 548 549 num_functional_writes++; 550 551 DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr); 552 DPRINTF(RubySystem, "%s\n",block); 553 for (unsigned j = 0; j < size_in_bytes; ++j) { 554 block.setByte(j + startByte, data[j]); 555 } 556 DPRINTF(RubySystem, "%s\n",block); 557 } 558 } 559 560 for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) { 561 num_functional_writes += 562 m_memory_controller_vec[i]->functionalWriteBuffers(pkt); 563 } 564 565 num_functional_writes += m_network_ptr->functionalWrite(pkt); 566 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); 567 568 return true; 569} 570 571#ifdef CHECK_COHERENCE 572// This code will check for cases if the given cache block is exclusive in 573// one node and shared in another-- a coherence violation 574// 575// To use, the SLICC specification must call sequencer.checkCoherence(address) 576// when the controller changes to a state with new permissions. Do this 577// in setState. The SLICC spec must also define methods "isBlockShared" 578// and "isBlockExclusive" that are specific to that protocol 579// 580void 581RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 582{ 583#if 0 584 NodeID exclusive = -1; 585 bool sharedDetected = false; 586 NodeID lastShared = -1; 587 588 for (int i = 0; i < m_chip_vector.size(); i++) { 589 if (m_chip_vector[i]->isBlockExclusive(addr)) { 590 if (exclusive != -1) { 591 // coherence violation 592 WARN_EXPR(exclusive); 593 WARN_EXPR(m_chip_vector[i]->getID()); 594 WARN_EXPR(addr); 595 WARN_EXPR(getTime()); 596 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 597 } else if (sharedDetected) { 598 WARN_EXPR(lastShared); 599 WARN_EXPR(m_chip_vector[i]->getID()); 600 WARN_EXPR(addr); 601 WARN_EXPR(getTime()); 602 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 603 } else { 604 exclusive = m_chip_vector[i]->getID(); 605 } 606 } else if (m_chip_vector[i]->isBlockShared(addr)) { 607 sharedDetected = true; 608 lastShared = m_chip_vector[i]->getID(); 609 610 if (exclusive != -1) { 611 WARN_EXPR(lastShared); 612 WARN_EXPR(exclusive); 613 WARN_EXPR(addr); 614 WARN_EXPR(getTime()); 615 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 616 } 617 } 618 } 619#endif 620} 621#endif 622 623RubySystem * 624RubySystemParams::create() 625{ 626 return new RubySystem(this); 627} 628 629/** 630 * virtual process function that is invoked when the callback 631 * queue is executed. 632 */ 633void 634RubyDumpStatsCallback::process() 635{ 636 ruby_system->printStats(*os); 637} 638