RubySystem.cc revision 10525
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33 34#include "base/intmath.hh" 35#include "base/statistics.hh" 36#include "debug/RubyCacheTrace.hh" 37#include "debug/RubySystem.hh" 38#include "mem/ruby/common/Address.hh" 39#include "mem/ruby/network/Network.hh" 40#include "mem/ruby/system/System.hh" 41#include "sim/eventq.hh" 42#include "sim/simulate.hh" 43 44using namespace std; 45 46int RubySystem::m_random_seed; 47bool RubySystem::m_randomization; 48uint32_t RubySystem::m_block_size_bytes; 49uint32_t RubySystem::m_block_size_bits; 50uint32_t RubySystem::m_memory_size_bits; 51 52RubySystem::RubySystem(const Params *p) 53 : ClockedObject(p) 54{ 55 if (g_system_ptr != NULL) 56 fatal("Only one RubySystem object currently allowed.\n"); 57 58 m_random_seed = p->random_seed; 59 srandom(m_random_seed); 60 m_randomization = p->randomization; 61 62 m_block_size_bytes = p->block_size_bytes; 63 assert(isPowerOf2(m_block_size_bytes)); 64 m_block_size_bits = floorLog2(m_block_size_bytes); 65 m_memory_size_bits = p->memory_size_bits; 66 67 m_warmup_enabled = false; 68 m_cooldown_enabled = false; 69 70 // Setup the global variables used in Ruby 71 g_system_ptr = this; 72 73 // Resize to the size of different machine types 74 g_abs_controls.resize(MachineType_NUM); 75 76 // Collate the statistics before they are printed. 77 Stats::registerDumpCallback(new RubyStatsCallback(this)); 78 // Create the profiler 79 m_profiler = new Profiler(p); 80 m_phys_mem = p->phys_mem; 81} 82 83void 84RubySystem::registerNetwork(Network* network_ptr) 85{ 86 m_network = network_ptr; 87} 88 89void 90RubySystem::registerAbstractController(AbstractController* cntrl) 91{ 92 m_abs_cntrl_vec.push_back(cntrl); 93 94 MachineID id = cntrl->getMachineID(); 95 g_abs_controls[id.getType()][id.getNum()] = cntrl; 96} 97 98RubySystem::~RubySystem() 99{ 100 delete m_network; 101 delete m_profiler; 102} 103 104void 105RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 106 uint64 uncompressed_trace_size) 107{ 108 // Create the checkpoint file for the memory 109 string thefile = Checkpoint::dir() + "/" + filename.c_str(); 110 111 int fd = creat(thefile.c_str(), 0664); 112 if (fd < 0) { 113 perror("creat"); 114 fatal("Can't open memory trace file '%s'\n", filename); 115 } 116 117 gzFile compressedMemory = gzdopen(fd, "wb"); 118 if (compressedMemory == NULL) 119 fatal("Insufficient memory to allocate compression state for %s\n", 120 filename); 121 122 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 123 uncompressed_trace_size) { 124 fatal("Write failed on memory trace file '%s'\n", filename); 125 } 126 127 if (gzclose(compressedMemory)) { 128 fatal("Close failed on memory trace file '%s'\n", filename); 129 } 130 delete[] raw_data; 131} 132 133void 134RubySystem::serialize(std::ostream &os) 135{ 136 m_cooldown_enabled = true; 137 vector<Sequencer*> sequencer_map; 138 Sequencer* sequencer_ptr = NULL; 139 140 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 141 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 142 if (sequencer_ptr == NULL) { 143 sequencer_ptr = sequencer_map[cntrl]; 144 } 145 } 146 147 assert(sequencer_ptr != NULL); 148 149 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 150 if (sequencer_map[cntrl] == NULL) { 151 sequencer_map[cntrl] = sequencer_ptr; 152 } 153 } 154 155 // Store the cache-block size, so we are able to restore on systems with a 156 // different cache-block size. CacheRecorder depends on the correct 157 // cache-block size upon unserializing. 158 uint64 block_size_bytes = getBlockSizeBytes(); 159 SERIALIZE_SCALAR(block_size_bytes); 160 161 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 162 // Create the CacheRecorder and record the cache trace 163 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map, 164 block_size_bytes); 165 166 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 167 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 168 } 169 170 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 171 // save the current tick value 172 Tick curtick_original = curTick(); 173 // save the event queue head 174 Event* eventq_head = eventq->replaceHead(NULL); 175 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n", 176 curtick_original); 177 178 // Schedule an event to start cache cooldown 179 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 180 enqueueRubyEvent(curTick()); 181 simulate(); 182 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 183 184 // Restore eventq head 185 eventq_head = eventq->replaceHead(eventq_head); 186 // Restore curTick 187 setCurTick(curtick_original); 188 189 // Aggergate the trace entries together into a single array 190 uint8_t *raw_data = new uint8_t[4096]; 191 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 192 4096); 193 string cache_trace_file = name() + ".cache.gz"; 194 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 195 196 SERIALIZE_SCALAR(cache_trace_file); 197 SERIALIZE_SCALAR(cache_trace_size); 198 199 m_cooldown_enabled = false; 200} 201 202void 203RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 204 uint64& uncompressed_trace_size) 205{ 206 // Read the trace file 207 gzFile compressedTrace; 208 209 // trace file 210 int fd = open(filename.c_str(), O_RDONLY); 211 if (fd < 0) { 212 perror("open"); 213 fatal("Unable to open trace file %s", filename); 214 } 215 216 compressedTrace = gzdopen(fd, "rb"); 217 if (compressedTrace == NULL) { 218 fatal("Insufficient memory to allocate compression state for %s\n", 219 filename); 220 } 221 222 raw_data = new uint8_t[uncompressed_trace_size]; 223 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 224 uncompressed_trace_size) { 225 fatal("Unable to read complete trace from file %s\n", filename); 226 } 227 228 if (gzclose(compressedTrace)) { 229 fatal("Failed to close cache trace file '%s'\n", filename); 230 } 231} 232 233void 234RubySystem::unserialize(Checkpoint *cp, const string §ion) 235{ 236 uint8_t *uncompressed_trace = NULL; 237 238 // This value should be set to the checkpoint-system's block-size. 239 // Optional, as checkpoints without it can be run if the 240 // checkpoint-system's block-size == current block-size. 241 uint64 block_size_bytes = getBlockSizeBytes(); 242 UNSERIALIZE_OPT_SCALAR(block_size_bytes); 243 244 string cache_trace_file; 245 uint64 cache_trace_size = 0; 246 247 UNSERIALIZE_SCALAR(cache_trace_file); 248 UNSERIALIZE_SCALAR(cache_trace_size); 249 cache_trace_file = cp->cptDir + "/" + cache_trace_file; 250 251 readCompressedTrace(cache_trace_file, uncompressed_trace, 252 cache_trace_size); 253 m_warmup_enabled = true; 254 255 vector<Sequencer*> sequencer_map; 256 Sequencer* t = NULL; 257 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 258 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 259 if (t == NULL) t = sequencer_map[cntrl]; 260 } 261 262 assert(t != NULL); 263 264 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 265 if (sequencer_map[cntrl] == NULL) { 266 sequencer_map[cntrl] = t; 267 } 268 } 269 270 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 271 sequencer_map, block_size_bytes); 272} 273 274void 275RubySystem::startup() 276{ 277 278 // Ruby restores state from a checkpoint by resetting the clock to 0 and 279 // playing the requests that can possibly re-generate the cache state. 280 // The clock value is set to the actual checkpointed value once all the 281 // requests have been executed. 282 // 283 // This way of restoring state is pretty finicky. For example, if a 284 // Ruby component reads time before the state has been restored, it would 285 // cache this value and hence its clock would not be reset to 0, when 286 // Ruby resets the global clock. This can potentially result in a 287 // deadlock. 288 // 289 // The solution is that no Ruby component should read time before the 290 // simulation starts. And then one also needs to hope that the time 291 // Ruby finishes restoring the state is less than the time when the 292 // state was checkpointed. 293 294 if (m_warmup_enabled) { 295 // save the current tick value 296 Tick curtick_original = curTick(); 297 // save the event queue head 298 Event* eventq_head = eventq->replaceHead(NULL); 299 // set curTick to 0 and reset Ruby System's clock 300 setCurTick(0); 301 resetClock(); 302 303 // Schedule an event to start cache warmup 304 enqueueRubyEvent(curTick()); 305 simulate(); 306 307 delete m_cache_recorder; 308 m_cache_recorder = NULL; 309 m_warmup_enabled = false; 310 311 // Restore eventq head 312 eventq_head = eventq->replaceHead(eventq_head); 313 // Restore curTick and Ruby System's clock 314 setCurTick(curtick_original); 315 resetClock(); 316 } 317 318 resetStats(); 319} 320 321void 322RubySystem::RubyEvent::process() 323{ 324 if (ruby_system->m_warmup_enabled) { 325 ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 326 } else if (ruby_system->m_cooldown_enabled) { 327 ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 328 } 329} 330 331void 332RubySystem::resetStats() 333{ 334 g_ruby_start = curCycle(); 335} 336 337bool 338RubySystem::functionalRead(PacketPtr pkt) 339{ 340 Address address(pkt->getAddr()); 341 Address line_address(address); 342 line_address.makeLineAddress(); 343 344 AccessPermission access_perm = AccessPermission_NotPresent; 345 int num_controllers = m_abs_cntrl_vec.size(); 346 347 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 348 349 unsigned int num_ro = 0; 350 unsigned int num_rw = 0; 351 unsigned int num_busy = 0; 352 unsigned int num_backing_store = 0; 353 unsigned int num_invalid = 0; 354 355 // In this loop we count the number of controllers that have the given 356 // address in read only, read write and busy states. 357 for (unsigned int i = 0; i < num_controllers; ++i) { 358 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 359 if (access_perm == AccessPermission_Read_Only) 360 num_ro++; 361 else if (access_perm == AccessPermission_Read_Write) 362 num_rw++; 363 else if (access_perm == AccessPermission_Busy) 364 num_busy++; 365 else if (access_perm == AccessPermission_Backing_Store) 366 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 367 // to represent blocks in memory *for Broadcast/Snooping protocols*, 368 // where memory has no idea whether it has an exclusive copy of data 369 // or not. 370 num_backing_store++; 371 else if (access_perm == AccessPermission_Invalid || 372 access_perm == AccessPermission_NotPresent) 373 num_invalid++; 374 } 375 assert(num_rw <= 1); 376 377 // This if case is meant to capture what happens in a Broadcast/Snoop 378 // protocol where the block does not exist in the cache hierarchy. You 379 // only want to read from the Backing_Store memory if there is no copy in 380 // the cache hierarchy, otherwise you want to try to read the RO or RW 381 // copies existing in the cache hierarchy (covered by the else statement). 382 // The reason is because the Backing_Store memory could easily be stale, if 383 // there are copies floating around the cache hierarchy, so you want to read 384 // it only if it's not in the cache hierarchy at all. 385 if (num_invalid == (num_controllers - 1) && num_backing_store == 1) { 386 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 387 for (unsigned int i = 0; i < num_controllers; ++i) { 388 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 389 if (access_perm == AccessPermission_Backing_Store) { 390 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 391 return true; 392 } 393 } 394 } else if (num_ro > 0 || num_rw == 1) { 395 // In Broadcast/Snoop protocols, this covers if you know the block 396 // exists somewhere in the caching hierarchy, then you want to read any 397 // valid RO or RW block. In directory protocols, same thing, you want 398 // to read any valid readable copy of the block. 399 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 400 num_busy, num_ro, num_rw); 401 // In this loop, we try to figure which controller has a read only or 402 // a read write copy of the given address. Any valid copy would suffice 403 // for a functional read. 404 for (unsigned int i = 0;i < num_controllers;++i) { 405 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 406 if (access_perm == AccessPermission_Read_Only || 407 access_perm == AccessPermission_Read_Write) { 408 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 409 return true; 410 } 411 } 412 } 413 414 return false; 415} 416 417// The function searches through all the buffers that exist in different 418// cache, directory and memory controllers, and in the network components 419// and writes the data portion of those that hold the address specified 420// in the packet. 421bool 422RubySystem::functionalWrite(PacketPtr pkt) 423{ 424 Address addr(pkt->getAddr()); 425 Address line_addr = line_address(addr); 426 AccessPermission access_perm = AccessPermission_NotPresent; 427 int num_controllers = m_abs_cntrl_vec.size(); 428 429 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 430 431 uint32_t M5_VAR_USED num_functional_writes = 0; 432 433 for (unsigned int i = 0; i < num_controllers;++i) { 434 num_functional_writes += 435 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); 436 437 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 438 if (access_perm != AccessPermission_Invalid && 439 access_perm != AccessPermission_NotPresent) { 440 num_functional_writes += 441 m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt); 442 } 443 } 444 445 num_functional_writes += m_network->functionalWrite(pkt); 446 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); 447 448 return true; 449} 450 451#ifdef CHECK_COHERENCE 452// This code will check for cases if the given cache block is exclusive in 453// one node and shared in another-- a coherence violation 454// 455// To use, the SLICC specification must call sequencer.checkCoherence(address) 456// when the controller changes to a state with new permissions. Do this 457// in setState. The SLICC spec must also define methods "isBlockShared" 458// and "isBlockExclusive" that are specific to that protocol 459// 460void 461RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 462{ 463#if 0 464 NodeID exclusive = -1; 465 bool sharedDetected = false; 466 NodeID lastShared = -1; 467 468 for (int i = 0; i < m_chip_vector.size(); i++) { 469 if (m_chip_vector[i]->isBlockExclusive(addr)) { 470 if (exclusive != -1) { 471 // coherence violation 472 WARN_EXPR(exclusive); 473 WARN_EXPR(m_chip_vector[i]->getID()); 474 WARN_EXPR(addr); 475 WARN_EXPR(getTime()); 476 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 477 } else if (sharedDetected) { 478 WARN_EXPR(lastShared); 479 WARN_EXPR(m_chip_vector[i]->getID()); 480 WARN_EXPR(addr); 481 WARN_EXPR(getTime()); 482 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 483 } else { 484 exclusive = m_chip_vector[i]->getID(); 485 } 486 } else if (m_chip_vector[i]->isBlockShared(addr)) { 487 sharedDetected = true; 488 lastShared = m_chip_vector[i]->getID(); 489 490 if (exclusive != -1) { 491 WARN_EXPR(lastShared); 492 WARN_EXPR(exclusive); 493 WARN_EXPR(addr); 494 WARN_EXPR(getTime()); 495 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 496 } 497 } 498 } 499#endif 500} 501#endif 502 503RubySystem * 504RubySystemParams::create() 505{ 506 return new RubySystem(this); 507} 508