RubySystem.cc revision 10706
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33 34#include "base/intmath.hh" 35#include "base/statistics.hh" 36#include "debug/RubyCacheTrace.hh" 37#include "debug/RubySystem.hh" 38#include "mem/ruby/common/Address.hh" 39#include "mem/ruby/network/Network.hh" 40#include "mem/ruby/system/System.hh" 41#include "mem/simple_mem.hh" 42#include "sim/eventq.hh" 43#include "sim/simulate.hh" 44 45using namespace std; 46 47int RubySystem::m_random_seed; 48bool RubySystem::m_randomization; 49uint32_t RubySystem::m_block_size_bytes; 50uint32_t RubySystem::m_block_size_bits; 51uint32_t RubySystem::m_memory_size_bits; 52 53RubySystem::RubySystem(const Params *p) 54 : ClockedObject(p), m_access_backing_store(p->access_backing_store) 55{ 56 if (g_system_ptr != NULL) 57 fatal("Only one RubySystem object currently allowed.\n"); 58 59 m_random_seed = p->random_seed; 60 srandom(m_random_seed); 61 m_randomization = p->randomization; 62 63 m_block_size_bytes = p->block_size_bytes; 64 assert(isPowerOf2(m_block_size_bytes)); 65 m_block_size_bits = floorLog2(m_block_size_bytes); 66 m_memory_size_bits = p->memory_size_bits; 67 68 m_warmup_enabled = false; 69 m_cooldown_enabled = false; 70 71 // Setup the global variables used in Ruby 72 g_system_ptr = this; 73 74 // Resize to the size of different machine types 75 g_abs_controls.resize(MachineType_NUM); 76 77 // Collate the statistics before they are printed. 78 Stats::registerDumpCallback(new RubyStatsCallback(this)); 79 // Create the profiler 80 m_profiler = new Profiler(p); 81 m_phys_mem = p->phys_mem; 82} 83 84void 85RubySystem::registerNetwork(Network* network_ptr) 86{ 87 m_network = network_ptr; 88} 89 90void 91RubySystem::registerAbstractController(AbstractController* cntrl) 92{ 93 m_abs_cntrl_vec.push_back(cntrl); 94 95 MachineID id = cntrl->getMachineID(); 96 g_abs_controls[id.getType()][id.getNum()] = cntrl; 97} 98 99RubySystem::~RubySystem() 100{ 101 delete m_network; 102 delete m_profiler; 103} 104 105void 106RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 107 uint64 uncompressed_trace_size) 108{ 109 // Create the checkpoint file for the memory 110 string thefile = Checkpoint::dir() + "/" + filename.c_str(); 111 112 int fd = creat(thefile.c_str(), 0664); 113 if (fd < 0) { 114 perror("creat"); 115 fatal("Can't open memory trace file '%s'\n", filename); 116 } 117 118 gzFile compressedMemory = gzdopen(fd, "wb"); 119 if (compressedMemory == NULL) 120 fatal("Insufficient memory to allocate compression state for %s\n", 121 filename); 122 123 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 124 uncompressed_trace_size) { 125 fatal("Write failed on memory trace file '%s'\n", filename); 126 } 127 128 if (gzclose(compressedMemory)) { 129 fatal("Close failed on memory trace file '%s'\n", filename); 130 } 131 delete[] raw_data; 132} 133 134void 135RubySystem::serialize(std::ostream &os) 136{ 137 m_cooldown_enabled = true; 138 vector<Sequencer*> sequencer_map; 139 Sequencer* sequencer_ptr = NULL; 140 141 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 142 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 143 if (sequencer_ptr == NULL) { 144 sequencer_ptr = sequencer_map[cntrl]; 145 } 146 } 147 148 assert(sequencer_ptr != NULL); 149 150 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 151 if (sequencer_map[cntrl] == NULL) { 152 sequencer_map[cntrl] = sequencer_ptr; 153 } 154 } 155 156 // Store the cache-block size, so we are able to restore on systems with a 157 // different cache-block size. CacheRecorder depends on the correct 158 // cache-block size upon unserializing. 159 uint64 block_size_bytes = getBlockSizeBytes(); 160 SERIALIZE_SCALAR(block_size_bytes); 161 162 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 163 // Create the CacheRecorder and record the cache trace 164 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map, 165 block_size_bytes); 166 167 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 168 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 169 } 170 171 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 172 // save the current tick value 173 Tick curtick_original = curTick(); 174 // save the event queue head 175 Event* eventq_head = eventq->replaceHead(NULL); 176 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n", 177 curtick_original); 178 179 // Schedule an event to start cache cooldown 180 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 181 enqueueRubyEvent(curTick()); 182 simulate(); 183 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 184 185 // Restore eventq head 186 eventq_head = eventq->replaceHead(eventq_head); 187 // Restore curTick 188 setCurTick(curtick_original); 189 190 // Aggergate the trace entries together into a single array 191 uint8_t *raw_data = new uint8_t[4096]; 192 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 193 4096); 194 string cache_trace_file = name() + ".cache.gz"; 195 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 196 197 SERIALIZE_SCALAR(cache_trace_file); 198 SERIALIZE_SCALAR(cache_trace_size); 199 200 m_cooldown_enabled = false; 201} 202 203void 204RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 205 uint64& uncompressed_trace_size) 206{ 207 // Read the trace file 208 gzFile compressedTrace; 209 210 // trace file 211 int fd = open(filename.c_str(), O_RDONLY); 212 if (fd < 0) { 213 perror("open"); 214 fatal("Unable to open trace file %s", filename); 215 } 216 217 compressedTrace = gzdopen(fd, "rb"); 218 if (compressedTrace == NULL) { 219 fatal("Insufficient memory to allocate compression state for %s\n", 220 filename); 221 } 222 223 raw_data = new uint8_t[uncompressed_trace_size]; 224 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 225 uncompressed_trace_size) { 226 fatal("Unable to read complete trace from file %s\n", filename); 227 } 228 229 if (gzclose(compressedTrace)) { 230 fatal("Failed to close cache trace file '%s'\n", filename); 231 } 232} 233 234void 235RubySystem::unserialize(Checkpoint *cp, const string §ion) 236{ 237 uint8_t *uncompressed_trace = NULL; 238 239 // This value should be set to the checkpoint-system's block-size. 240 // Optional, as checkpoints without it can be run if the 241 // checkpoint-system's block-size == current block-size. 242 uint64 block_size_bytes = getBlockSizeBytes(); 243 UNSERIALIZE_OPT_SCALAR(block_size_bytes); 244 245 string cache_trace_file; 246 uint64 cache_trace_size = 0; 247 248 UNSERIALIZE_SCALAR(cache_trace_file); 249 UNSERIALIZE_SCALAR(cache_trace_size); 250 cache_trace_file = cp->cptDir + "/" + cache_trace_file; 251 252 readCompressedTrace(cache_trace_file, uncompressed_trace, 253 cache_trace_size); 254 m_warmup_enabled = true; 255 256 vector<Sequencer*> sequencer_map; 257 Sequencer* t = NULL; 258 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 259 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 260 if (t == NULL) t = sequencer_map[cntrl]; 261 } 262 263 assert(t != NULL); 264 265 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 266 if (sequencer_map[cntrl] == NULL) { 267 sequencer_map[cntrl] = t; 268 } 269 } 270 271 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 272 sequencer_map, block_size_bytes); 273} 274 275void 276RubySystem::startup() 277{ 278 279 // Ruby restores state from a checkpoint by resetting the clock to 0 and 280 // playing the requests that can possibly re-generate the cache state. 281 // The clock value is set to the actual checkpointed value once all the 282 // requests have been executed. 283 // 284 // This way of restoring state is pretty finicky. For example, if a 285 // Ruby component reads time before the state has been restored, it would 286 // cache this value and hence its clock would not be reset to 0, when 287 // Ruby resets the global clock. This can potentially result in a 288 // deadlock. 289 // 290 // The solution is that no Ruby component should read time before the 291 // simulation starts. And then one also needs to hope that the time 292 // Ruby finishes restoring the state is less than the time when the 293 // state was checkpointed. 294 295 if (m_warmup_enabled) { 296 // save the current tick value 297 Tick curtick_original = curTick(); 298 // save the event queue head 299 Event* eventq_head = eventq->replaceHead(NULL); 300 // set curTick to 0 and reset Ruby System's clock 301 setCurTick(0); 302 resetClock(); 303 304 // Schedule an event to start cache warmup 305 enqueueRubyEvent(curTick()); 306 simulate(); 307 308 delete m_cache_recorder; 309 m_cache_recorder = NULL; 310 m_warmup_enabled = false; 311 312 // Restore eventq head 313 eventq_head = eventq->replaceHead(eventq_head); 314 // Restore curTick and Ruby System's clock 315 setCurTick(curtick_original); 316 resetClock(); 317 } 318 319 resetStats(); 320} 321 322void 323RubySystem::RubyEvent::process() 324{ 325 if (ruby_system->m_warmup_enabled) { 326 ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 327 } else if (ruby_system->m_cooldown_enabled) { 328 ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 329 } 330} 331 332void 333RubySystem::resetStats() 334{ 335 g_ruby_start = curCycle(); 336} 337 338bool 339RubySystem::functionalRead(PacketPtr pkt) 340{ 341 Address address(pkt->getAddr()); 342 Address line_address(address); 343 line_address.makeLineAddress(); 344 345 AccessPermission access_perm = AccessPermission_NotPresent; 346 int num_controllers = m_abs_cntrl_vec.size(); 347 348 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 349 350 unsigned int num_ro = 0; 351 unsigned int num_rw = 0; 352 unsigned int num_busy = 0; 353 unsigned int num_backing_store = 0; 354 unsigned int num_invalid = 0; 355 356 // In this loop we count the number of controllers that have the given 357 // address in read only, read write and busy states. 358 for (unsigned int i = 0; i < num_controllers; ++i) { 359 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 360 if (access_perm == AccessPermission_Read_Only) 361 num_ro++; 362 else if (access_perm == AccessPermission_Read_Write) 363 num_rw++; 364 else if (access_perm == AccessPermission_Busy) 365 num_busy++; 366 else if (access_perm == AccessPermission_Backing_Store) 367 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 368 // to represent blocks in memory *for Broadcast/Snooping protocols*, 369 // where memory has no idea whether it has an exclusive copy of data 370 // or not. 371 num_backing_store++; 372 else if (access_perm == AccessPermission_Invalid || 373 access_perm == AccessPermission_NotPresent) 374 num_invalid++; 375 } 376 assert(num_rw <= 1); 377 378 // This if case is meant to capture what happens in a Broadcast/Snoop 379 // protocol where the block does not exist in the cache hierarchy. You 380 // only want to read from the Backing_Store memory if there is no copy in 381 // the cache hierarchy, otherwise you want to try to read the RO or RW 382 // copies existing in the cache hierarchy (covered by the else statement). 383 // The reason is because the Backing_Store memory could easily be stale, if 384 // there are copies floating around the cache hierarchy, so you want to read 385 // it only if it's not in the cache hierarchy at all. 386 if (num_invalid == (num_controllers - 1) && num_backing_store == 1) { 387 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 388 for (unsigned int i = 0; i < num_controllers; ++i) { 389 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 390 if (access_perm == AccessPermission_Backing_Store) { 391 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 392 return true; 393 } 394 } 395 } else if (num_ro > 0 || num_rw == 1) { 396 // In Broadcast/Snoop protocols, this covers if you know the block 397 // exists somewhere in the caching hierarchy, then you want to read any 398 // valid RO or RW block. In directory protocols, same thing, you want 399 // to read any valid readable copy of the block. 400 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 401 num_busy, num_ro, num_rw); 402 // In this loop, we try to figure which controller has a read only or 403 // a read write copy of the given address. Any valid copy would suffice 404 // for a functional read. 405 for (unsigned int i = 0;i < num_controllers;++i) { 406 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 407 if (access_perm == AccessPermission_Read_Only || 408 access_perm == AccessPermission_Read_Write) { 409 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 410 return true; 411 } 412 } 413 } 414 415 return false; 416} 417 418// The function searches through all the buffers that exist in different 419// cache, directory and memory controllers, and in the network components 420// and writes the data portion of those that hold the address specified 421// in the packet. 422bool 423RubySystem::functionalWrite(PacketPtr pkt) 424{ 425 Address addr(pkt->getAddr()); 426 Address line_addr = line_address(addr); 427 AccessPermission access_perm = AccessPermission_NotPresent; 428 int num_controllers = m_abs_cntrl_vec.size(); 429 430 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 431 432 uint32_t M5_VAR_USED num_functional_writes = 0; 433 434 for (unsigned int i = 0; i < num_controllers;++i) { 435 num_functional_writes += 436 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); 437 438 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 439 if (access_perm != AccessPermission_Invalid && 440 access_perm != AccessPermission_NotPresent) { 441 num_functional_writes += 442 m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt); 443 } 444 } 445 446 num_functional_writes += m_network->functionalWrite(pkt); 447 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); 448 449 return true; 450} 451 452#ifdef CHECK_COHERENCE 453// This code will check for cases if the given cache block is exclusive in 454// one node and shared in another-- a coherence violation 455// 456// To use, the SLICC specification must call sequencer.checkCoherence(address) 457// when the controller changes to a state with new permissions. Do this 458// in setState. The SLICC spec must also define methods "isBlockShared" 459// and "isBlockExclusive" that are specific to that protocol 460// 461void 462RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 463{ 464#if 0 465 NodeID exclusive = -1; 466 bool sharedDetected = false; 467 NodeID lastShared = -1; 468 469 for (int i = 0; i < m_chip_vector.size(); i++) { 470 if (m_chip_vector[i]->isBlockExclusive(addr)) { 471 if (exclusive != -1) { 472 // coherence violation 473 WARN_EXPR(exclusive); 474 WARN_EXPR(m_chip_vector[i]->getID()); 475 WARN_EXPR(addr); 476 WARN_EXPR(getTime()); 477 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 478 } else if (sharedDetected) { 479 WARN_EXPR(lastShared); 480 WARN_EXPR(m_chip_vector[i]->getID()); 481 WARN_EXPR(addr); 482 WARN_EXPR(getTime()); 483 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 484 } else { 485 exclusive = m_chip_vector[i]->getID(); 486 } 487 } else if (m_chip_vector[i]->isBlockShared(addr)) { 488 sharedDetected = true; 489 lastShared = m_chip_vector[i]->getID(); 490 491 if (exclusive != -1) { 492 WARN_EXPR(lastShared); 493 WARN_EXPR(exclusive); 494 WARN_EXPR(addr); 495 WARN_EXPR(getTime()); 496 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 497 } 498 } 499 } 500#endif 501} 502#endif 503 504RubySystem * 505RubySystemParams::create() 506{ 507 return new RubySystem(this); 508} 509