RubySystem.cc revision 10917
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33 34#include "base/intmath.hh" 35#include "base/statistics.hh" 36#include "debug/RubyCacheTrace.hh" 37#include "debug/RubySystem.hh" 38#include "mem/ruby/common/Address.hh" 39#include "mem/ruby/network/Network.hh" 40#include "mem/ruby/system/System.hh" 41#include "mem/simple_mem.hh" 42#include "sim/eventq.hh" 43#include "sim/simulate.hh" 44 45using namespace std; 46 47int RubySystem::m_random_seed; 48bool RubySystem::m_randomization; 49uint32_t RubySystem::m_block_size_bytes; 50uint32_t RubySystem::m_block_size_bits; 51uint32_t RubySystem::m_memory_size_bits; 52bool RubySystem::m_warmup_enabled = false; 53// To look forward to allowing multiple RubySystem instances, track the number 54// of RubySystems that need to be warmed up on checkpoint restore. 55unsigned RubySystem::m_systems_to_warmup = 0; 56bool RubySystem::m_cooldown_enabled = false; 57 58RubySystem::RubySystem(const Params *p) 59 : ClockedObject(p), m_access_backing_store(p->access_backing_store) 60{ 61 if (g_system_ptr != NULL) 62 fatal("Only one RubySystem object currently allowed.\n"); 63 64 m_random_seed = p->random_seed; 65 srandom(m_random_seed); 66 m_randomization = p->randomization; 67 68 m_block_size_bytes = p->block_size_bytes; 69 assert(isPowerOf2(m_block_size_bytes)); 70 m_block_size_bits = floorLog2(m_block_size_bytes); 71 m_memory_size_bits = p->memory_size_bits; 72 73 // Setup the global variables used in Ruby 74 g_system_ptr = this; 75 76 // Resize to the size of different machine types 77 g_abs_controls.resize(MachineType_NUM); 78 79 // Collate the statistics before they are printed. 80 Stats::registerDumpCallback(new RubyStatsCallback(this)); 81 // Create the profiler 82 m_profiler = new Profiler(p); 83 m_phys_mem = p->phys_mem; 84} 85 86void 87RubySystem::registerNetwork(Network* network_ptr) 88{ 89 m_network = network_ptr; 90} 91 92void 93RubySystem::registerAbstractController(AbstractController* cntrl) 94{ 95 m_abs_cntrl_vec.push_back(cntrl); 96 97 MachineID id = cntrl->getMachineID(); 98 g_abs_controls[id.getType()][id.getNum()] = cntrl; 99} 100 101RubySystem::~RubySystem() 102{ 103 delete m_network; 104 delete m_profiler; 105} 106 107void 108RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 109 uint64 uncompressed_trace_size) 110{ 111 // Create the checkpoint file for the memory 112 string thefile = CheckpointIn::dir() + "/" + filename.c_str(); 113 114 int fd = creat(thefile.c_str(), 0664); 115 if (fd < 0) { 116 perror("creat"); 117 fatal("Can't open memory trace file '%s'\n", filename); 118 } 119 120 gzFile compressedMemory = gzdopen(fd, "wb"); 121 if (compressedMemory == NULL) 122 fatal("Insufficient memory to allocate compression state for %s\n", 123 filename); 124 125 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 126 uncompressed_trace_size) { 127 fatal("Write failed on memory trace file '%s'\n", filename); 128 } 129 130 if (gzclose(compressedMemory)) { 131 fatal("Close failed on memory trace file '%s'\n", filename); 132 } 133 delete[] raw_data; 134} 135 136void 137RubySystem::serializeOld(CheckpointOut &cp) 138{ 139 m_cooldown_enabled = true; 140 vector<Sequencer*> sequencer_map; 141 Sequencer* sequencer_ptr = NULL; 142 143 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 144 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 145 if (sequencer_ptr == NULL) { 146 sequencer_ptr = sequencer_map[cntrl]; 147 } 148 } 149 150 assert(sequencer_ptr != NULL); 151 152 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 153 if (sequencer_map[cntrl] == NULL) { 154 sequencer_map[cntrl] = sequencer_ptr; 155 } 156 } 157 158 // Store the cache-block size, so we are able to restore on systems with a 159 // different cache-block size. CacheRecorder depends on the correct 160 // cache-block size upon unserializing. 161 uint64 block_size_bytes = getBlockSizeBytes(); 162 SERIALIZE_SCALAR(block_size_bytes); 163 164 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 165 // Create the CacheRecorder and record the cache trace 166 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map, 167 block_size_bytes); 168 169 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 170 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 171 } 172 173 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 174 // save the current tick value 175 Tick curtick_original = curTick(); 176 // save the event queue head 177 Event* eventq_head = eventq->replaceHead(NULL); 178 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n", 179 curtick_original); 180 181 // Schedule an event to start cache cooldown 182 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 183 enqueueRubyEvent(curTick()); 184 simulate(); 185 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 186 187 // Restore eventq head 188 eventq_head = eventq->replaceHead(eventq_head); 189 // Restore curTick 190 setCurTick(curtick_original); 191 192 // Aggregate the trace entries together into a single array 193 uint8_t *raw_data = new uint8_t[4096]; 194 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 195 4096); 196 string cache_trace_file = name() + ".cache.gz"; 197 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 198 199 SERIALIZE_SCALAR(cache_trace_file); 200 SERIALIZE_SCALAR(cache_trace_size); 201 202 m_cooldown_enabled = false; 203} 204 205void 206RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 207 uint64& uncompressed_trace_size) 208{ 209 // Read the trace file 210 gzFile compressedTrace; 211 212 // trace file 213 int fd = open(filename.c_str(), O_RDONLY); 214 if (fd < 0) { 215 perror("open"); 216 fatal("Unable to open trace file %s", filename); 217 } 218 219 compressedTrace = gzdopen(fd, "rb"); 220 if (compressedTrace == NULL) { 221 fatal("Insufficient memory to allocate compression state for %s\n", 222 filename); 223 } 224 225 raw_data = new uint8_t[uncompressed_trace_size]; 226 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 227 uncompressed_trace_size) { 228 fatal("Unable to read complete trace from file %s\n", filename); 229 } 230 231 if (gzclose(compressedTrace)) { 232 fatal("Failed to close cache trace file '%s'\n", filename); 233 } 234} 235 236void 237RubySystem::unserialize(CheckpointIn &cp) 238{ 239 uint8_t *uncompressed_trace = NULL; 240 241 // This value should be set to the checkpoint-system's block-size. 242 // Optional, as checkpoints without it can be run if the 243 // checkpoint-system's block-size == current block-size. 244 uint64 block_size_bytes = getBlockSizeBytes(); 245 UNSERIALIZE_OPT_SCALAR(block_size_bytes); 246 247 string cache_trace_file; 248 uint64 cache_trace_size = 0; 249 250 UNSERIALIZE_SCALAR(cache_trace_file); 251 UNSERIALIZE_SCALAR(cache_trace_size); 252 cache_trace_file = cp.cptDir + "/" + cache_trace_file; 253 254 readCompressedTrace(cache_trace_file, uncompressed_trace, 255 cache_trace_size); 256 m_warmup_enabled = true; 257 m_systems_to_warmup++; 258 259 vector<Sequencer*> sequencer_map; 260 Sequencer* t = NULL; 261 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 262 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 263 if (t == NULL) t = sequencer_map[cntrl]; 264 } 265 266 assert(t != NULL); 267 268 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 269 if (sequencer_map[cntrl] == NULL) { 270 sequencer_map[cntrl] = t; 271 } 272 } 273 274 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 275 sequencer_map, block_size_bytes); 276} 277 278void 279RubySystem::startup() 280{ 281 282 // Ruby restores state from a checkpoint by resetting the clock to 0 and 283 // playing the requests that can possibly re-generate the cache state. 284 // The clock value is set to the actual checkpointed value once all the 285 // requests have been executed. 286 // 287 // This way of restoring state is pretty finicky. For example, if a 288 // Ruby component reads time before the state has been restored, it would 289 // cache this value and hence its clock would not be reset to 0, when 290 // Ruby resets the global clock. This can potentially result in a 291 // deadlock. 292 // 293 // The solution is that no Ruby component should read time before the 294 // simulation starts. And then one also needs to hope that the time 295 // Ruby finishes restoring the state is less than the time when the 296 // state was checkpointed. 297 298 if (m_warmup_enabled) { 299 // save the current tick value 300 Tick curtick_original = curTick(); 301 // save the event queue head 302 Event* eventq_head = eventq->replaceHead(NULL); 303 // set curTick to 0 and reset Ruby System's clock 304 setCurTick(0); 305 resetClock(); 306 307 // Schedule an event to start cache warmup 308 enqueueRubyEvent(curTick()); 309 simulate(); 310 311 delete m_cache_recorder; 312 m_cache_recorder = NULL; 313 m_systems_to_warmup--; 314 if (m_systems_to_warmup == 0) { 315 m_warmup_enabled = false; 316 } 317 318 // Restore eventq head 319 eventq_head = eventq->replaceHead(eventq_head); 320 // Restore curTick and Ruby System's clock 321 setCurTick(curtick_original); 322 resetClock(); 323 } 324 325 resetStats(); 326} 327 328void 329RubySystem::RubyEvent::process() 330{ 331 if (RubySystem::getWarmupEnabled()) { 332 ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 333 } else if (RubySystem::getCooldownEnabled()) { 334 ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 335 } 336} 337 338void 339RubySystem::resetStats() 340{ 341 g_ruby_start = curCycle(); 342} 343 344bool 345RubySystem::functionalRead(PacketPtr pkt) 346{ 347 Address address(pkt->getAddr()); 348 Address line_address(address); 349 line_address.makeLineAddress(); 350 351 AccessPermission access_perm = AccessPermission_NotPresent; 352 int num_controllers = m_abs_cntrl_vec.size(); 353 354 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 355 356 unsigned int num_ro = 0; 357 unsigned int num_rw = 0; 358 unsigned int num_busy = 0; 359 unsigned int num_backing_store = 0; 360 unsigned int num_invalid = 0; 361 362 // In this loop we count the number of controllers that have the given 363 // address in read only, read write and busy states. 364 for (unsigned int i = 0; i < num_controllers; ++i) { 365 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 366 if (access_perm == AccessPermission_Read_Only) 367 num_ro++; 368 else if (access_perm == AccessPermission_Read_Write) 369 num_rw++; 370 else if (access_perm == AccessPermission_Busy) 371 num_busy++; 372 else if (access_perm == AccessPermission_Backing_Store) 373 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 374 // to represent blocks in memory *for Broadcast/Snooping protocols*, 375 // where memory has no idea whether it has an exclusive copy of data 376 // or not. 377 num_backing_store++; 378 else if (access_perm == AccessPermission_Invalid || 379 access_perm == AccessPermission_NotPresent) 380 num_invalid++; 381 } 382 assert(num_rw <= 1); 383 384 // This if case is meant to capture what happens in a Broadcast/Snoop 385 // protocol where the block does not exist in the cache hierarchy. You 386 // only want to read from the Backing_Store memory if there is no copy in 387 // the cache hierarchy, otherwise you want to try to read the RO or RW 388 // copies existing in the cache hierarchy (covered by the else statement). 389 // The reason is because the Backing_Store memory could easily be stale, if 390 // there are copies floating around the cache hierarchy, so you want to read 391 // it only if it's not in the cache hierarchy at all. 392 if (num_invalid == (num_controllers - 1) && num_backing_store == 1) { 393 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 394 for (unsigned int i = 0; i < num_controllers; ++i) { 395 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 396 if (access_perm == AccessPermission_Backing_Store) { 397 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 398 return true; 399 } 400 } 401 } else if (num_ro > 0 || num_rw == 1) { 402 // In Broadcast/Snoop protocols, this covers if you know the block 403 // exists somewhere in the caching hierarchy, then you want to read any 404 // valid RO or RW block. In directory protocols, same thing, you want 405 // to read any valid readable copy of the block. 406 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 407 num_busy, num_ro, num_rw); 408 // In this loop, we try to figure which controller has a read only or 409 // a read write copy of the given address. Any valid copy would suffice 410 // for a functional read. 411 for (unsigned int i = 0;i < num_controllers;++i) { 412 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 413 if (access_perm == AccessPermission_Read_Only || 414 access_perm == AccessPermission_Read_Write) { 415 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 416 return true; 417 } 418 } 419 } 420 421 return false; 422} 423 424// The function searches through all the buffers that exist in different 425// cache, directory and memory controllers, and in the network components 426// and writes the data portion of those that hold the address specified 427// in the packet. 428bool 429RubySystem::functionalWrite(PacketPtr pkt) 430{ 431 Address addr(pkt->getAddr()); 432 Address line_addr = line_address(addr); 433 AccessPermission access_perm = AccessPermission_NotPresent; 434 int num_controllers = m_abs_cntrl_vec.size(); 435 436 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 437 438 uint32_t M5_VAR_USED num_functional_writes = 0; 439 440 for (unsigned int i = 0; i < num_controllers;++i) { 441 num_functional_writes += 442 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); 443 444 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 445 if (access_perm != AccessPermission_Invalid && 446 access_perm != AccessPermission_NotPresent) { 447 num_functional_writes += 448 m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt); 449 } 450 } 451 452 num_functional_writes += m_network->functionalWrite(pkt); 453 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); 454 455 return true; 456} 457 458#ifdef CHECK_COHERENCE 459// This code will check for cases if the given cache block is exclusive in 460// one node and shared in another-- a coherence violation 461// 462// To use, the SLICC specification must call sequencer.checkCoherence(address) 463// when the controller changes to a state with new permissions. Do this 464// in setState. The SLICC spec must also define methods "isBlockShared" 465// and "isBlockExclusive" that are specific to that protocol 466// 467void 468RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 469{ 470#if 0 471 NodeID exclusive = -1; 472 bool sharedDetected = false; 473 NodeID lastShared = -1; 474 475 for (int i = 0; i < m_chip_vector.size(); i++) { 476 if (m_chip_vector[i]->isBlockExclusive(addr)) { 477 if (exclusive != -1) { 478 // coherence violation 479 WARN_EXPR(exclusive); 480 WARN_EXPR(m_chip_vector[i]->getID()); 481 WARN_EXPR(addr); 482 WARN_EXPR(getTime()); 483 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 484 } else if (sharedDetected) { 485 WARN_EXPR(lastShared); 486 WARN_EXPR(m_chip_vector[i]->getID()); 487 WARN_EXPR(addr); 488 WARN_EXPR(getTime()); 489 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 490 } else { 491 exclusive = m_chip_vector[i]->getID(); 492 } 493 } else if (m_chip_vector[i]->isBlockShared(addr)) { 494 sharedDetected = true; 495 lastShared = m_chip_vector[i]->getID(); 496 497 if (exclusive != -1) { 498 WARN_EXPR(lastShared); 499 WARN_EXPR(exclusive); 500 WARN_EXPR(addr); 501 WARN_EXPR(getTime()); 502 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 503 } 504 } 505 } 506#endif 507} 508#endif 509 510RubySystem * 511RubySystemParams::create() 512{ 513 return new RubySystem(this); 514} 515