RubySystem.cc revision 10920
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33 34#include "base/intmath.hh" 35#include "base/statistics.hh" 36#include "debug/RubyCacheTrace.hh" 37#include "debug/RubySystem.hh" 38#include "mem/ruby/common/Address.hh" 39#include "mem/ruby/network/Network.hh" 40#include "mem/ruby/system/System.hh" 41#include "mem/simple_mem.hh" 42#include "sim/eventq.hh" 43#include "sim/simulate.hh" 44 45using namespace std; 46 47int RubySystem::m_random_seed; 48bool RubySystem::m_randomization; 49uint32_t RubySystem::m_block_size_bytes; 50uint32_t RubySystem::m_block_size_bits; 51uint32_t RubySystem::m_memory_size_bits; 52bool RubySystem::m_warmup_enabled = false; 53// To look forward to allowing multiple RubySystem instances, track the number 54// of RubySystems that need to be warmed up on checkpoint restore. 55unsigned RubySystem::m_systems_to_warmup = 0; 56bool RubySystem::m_cooldown_enabled = false; 57 58RubySystem::RubySystem(const Params *p) 59 : ClockedObject(p), m_access_backing_store(p->access_backing_store) 60{ 61 m_random_seed = p->random_seed; 62 srandom(m_random_seed); 63 m_randomization = p->randomization; 64 65 m_block_size_bytes = p->block_size_bytes; 66 assert(isPowerOf2(m_block_size_bytes)); 67 m_block_size_bits = floorLog2(m_block_size_bytes); 68 m_memory_size_bits = p->memory_size_bits; 69 70 // Resize to the size of different machine types 71 m_abstract_controls.resize(MachineType_NUM); 72 73 // Collate the statistics before they are printed. 74 Stats::registerDumpCallback(new RubyStatsCallback(this)); 75 // Create the profiler 76 m_profiler = new Profiler(p, this); 77 m_phys_mem = p->phys_mem; 78} 79 80void 81RubySystem::registerNetwork(Network* network_ptr) 82{ 83 m_network = network_ptr; 84} 85 86void 87RubySystem::registerAbstractController(AbstractController* cntrl) 88{ 89 m_abs_cntrl_vec.push_back(cntrl); 90 91 MachineID id = cntrl->getMachineID(); 92 m_abstract_controls[id.getType()][id.getNum()] = cntrl; 93} 94 95RubySystem::~RubySystem() 96{ 97 delete m_network; 98 delete m_profiler; 99} 100 101void 102RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 103 uint64 uncompressed_trace_size) 104{ 105 // Create the checkpoint file for the memory 106 string thefile = CheckpointIn::dir() + "/" + filename.c_str(); 107 108 int fd = creat(thefile.c_str(), 0664); 109 if (fd < 0) { 110 perror("creat"); 111 fatal("Can't open memory trace file '%s'\n", filename); 112 } 113 114 gzFile compressedMemory = gzdopen(fd, "wb"); 115 if (compressedMemory == NULL) 116 fatal("Insufficient memory to allocate compression state for %s\n", 117 filename); 118 119 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 120 uncompressed_trace_size) { 121 fatal("Write failed on memory trace file '%s'\n", filename); 122 } 123 124 if (gzclose(compressedMemory)) { 125 fatal("Close failed on memory trace file '%s'\n", filename); 126 } 127 delete[] raw_data; 128} 129 130void 131RubySystem::serializeOld(CheckpointOut &cp) 132{ 133 m_cooldown_enabled = true; 134 vector<Sequencer*> sequencer_map; 135 Sequencer* sequencer_ptr = NULL; 136 137 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 138 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 139 if (sequencer_ptr == NULL) { 140 sequencer_ptr = sequencer_map[cntrl]; 141 } 142 } 143 144 assert(sequencer_ptr != NULL); 145 146 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 147 if (sequencer_map[cntrl] == NULL) { 148 sequencer_map[cntrl] = sequencer_ptr; 149 } 150 } 151 152 // Store the cache-block size, so we are able to restore on systems with a 153 // different cache-block size. CacheRecorder depends on the correct 154 // cache-block size upon unserializing. 155 uint64 block_size_bytes = getBlockSizeBytes(); 156 SERIALIZE_SCALAR(block_size_bytes); 157 158 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 159 // Create the CacheRecorder and record the cache trace 160 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map, 161 block_size_bytes); 162 163 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 164 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 165 } 166 167 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 168 // save the current tick value 169 Tick curtick_original = curTick(); 170 // save the event queue head 171 Event* eventq_head = eventq->replaceHead(NULL); 172 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n", 173 curtick_original); 174 175 // Schedule an event to start cache cooldown 176 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 177 enqueueRubyEvent(curTick()); 178 simulate(); 179 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 180 181 // Restore eventq head 182 eventq_head = eventq->replaceHead(eventq_head); 183 // Restore curTick 184 setCurTick(curtick_original); 185 186 // Aggregate the trace entries together into a single array 187 uint8_t *raw_data = new uint8_t[4096]; 188 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 189 4096); 190 string cache_trace_file = name() + ".cache.gz"; 191 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 192 193 SERIALIZE_SCALAR(cache_trace_file); 194 SERIALIZE_SCALAR(cache_trace_size); 195 196 m_cooldown_enabled = false; 197} 198 199void 200RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 201 uint64& uncompressed_trace_size) 202{ 203 // Read the trace file 204 gzFile compressedTrace; 205 206 // trace file 207 int fd = open(filename.c_str(), O_RDONLY); 208 if (fd < 0) { 209 perror("open"); 210 fatal("Unable to open trace file %s", filename); 211 } 212 213 compressedTrace = gzdopen(fd, "rb"); 214 if (compressedTrace == NULL) { 215 fatal("Insufficient memory to allocate compression state for %s\n", 216 filename); 217 } 218 219 raw_data = new uint8_t[uncompressed_trace_size]; 220 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 221 uncompressed_trace_size) { 222 fatal("Unable to read complete trace from file %s\n", filename); 223 } 224 225 if (gzclose(compressedTrace)) { 226 fatal("Failed to close cache trace file '%s'\n", filename); 227 } 228} 229 230void 231RubySystem::unserialize(CheckpointIn &cp) 232{ 233 uint8_t *uncompressed_trace = NULL; 234 235 // This value should be set to the checkpoint-system's block-size. 236 // Optional, as checkpoints without it can be run if the 237 // checkpoint-system's block-size == current block-size. 238 uint64 block_size_bytes = getBlockSizeBytes(); 239 UNSERIALIZE_OPT_SCALAR(block_size_bytes); 240 241 string cache_trace_file; 242 uint64 cache_trace_size = 0; 243 244 UNSERIALIZE_SCALAR(cache_trace_file); 245 UNSERIALIZE_SCALAR(cache_trace_size); 246 cache_trace_file = cp.cptDir + "/" + cache_trace_file; 247 248 readCompressedTrace(cache_trace_file, uncompressed_trace, 249 cache_trace_size); 250 m_warmup_enabled = true; 251 m_systems_to_warmup++; 252 253 vector<Sequencer*> sequencer_map; 254 Sequencer* t = NULL; 255 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 256 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 257 if (t == NULL) t = sequencer_map[cntrl]; 258 } 259 260 assert(t != NULL); 261 262 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 263 if (sequencer_map[cntrl] == NULL) { 264 sequencer_map[cntrl] = t; 265 } 266 } 267 268 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 269 sequencer_map, block_size_bytes); 270} 271 272void 273RubySystem::startup() 274{ 275 276 // Ruby restores state from a checkpoint by resetting the clock to 0 and 277 // playing the requests that can possibly re-generate the cache state. 278 // The clock value is set to the actual checkpointed value once all the 279 // requests have been executed. 280 // 281 // This way of restoring state is pretty finicky. For example, if a 282 // Ruby component reads time before the state has been restored, it would 283 // cache this value and hence its clock would not be reset to 0, when 284 // Ruby resets the global clock. This can potentially result in a 285 // deadlock. 286 // 287 // The solution is that no Ruby component should read time before the 288 // simulation starts. And then one also needs to hope that the time 289 // Ruby finishes restoring the state is less than the time when the 290 // state was checkpointed. 291 292 if (m_warmup_enabled) { 293 // save the current tick value 294 Tick curtick_original = curTick(); 295 // save the event queue head 296 Event* eventq_head = eventq->replaceHead(NULL); 297 // set curTick to 0 and reset Ruby System's clock 298 setCurTick(0); 299 resetClock(); 300 301 // Schedule an event to start cache warmup 302 enqueueRubyEvent(curTick()); 303 simulate(); 304 305 delete m_cache_recorder; 306 m_cache_recorder = NULL; 307 m_systems_to_warmup--; 308 if (m_systems_to_warmup == 0) { 309 m_warmup_enabled = false; 310 } 311 312 // Restore eventq head 313 eventq_head = eventq->replaceHead(eventq_head); 314 // Restore curTick and Ruby System's clock 315 setCurTick(curtick_original); 316 resetClock(); 317 } 318 319 resetStats(); 320} 321 322void 323RubySystem::RubyEvent::process() 324{ 325 if (RubySystem::getWarmupEnabled()) { 326 m_ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 327 } else if (RubySystem::getCooldownEnabled()) { 328 m_ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 329 } 330} 331 332void 333RubySystem::resetStats() 334{ 335 m_start_cycle = curCycle(); 336} 337 338bool 339RubySystem::functionalRead(PacketPtr pkt) 340{ 341 Address address(pkt->getAddr()); 342 Address line_address(address); 343 line_address.makeLineAddress(); 344 345 AccessPermission access_perm = AccessPermission_NotPresent; 346 int num_controllers = m_abs_cntrl_vec.size(); 347 348 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 349 350 unsigned int num_ro = 0; 351 unsigned int num_rw = 0; 352 unsigned int num_busy = 0; 353 unsigned int num_backing_store = 0; 354 unsigned int num_invalid = 0; 355 356 // In this loop we count the number of controllers that have the given 357 // address in read only, read write and busy states. 358 for (unsigned int i = 0; i < num_controllers; ++i) { 359 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 360 if (access_perm == AccessPermission_Read_Only) 361 num_ro++; 362 else if (access_perm == AccessPermission_Read_Write) 363 num_rw++; 364 else if (access_perm == AccessPermission_Busy) 365 num_busy++; 366 else if (access_perm == AccessPermission_Backing_Store) 367 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 368 // to represent blocks in memory *for Broadcast/Snooping protocols*, 369 // where memory has no idea whether it has an exclusive copy of data 370 // or not. 371 num_backing_store++; 372 else if (access_perm == AccessPermission_Invalid || 373 access_perm == AccessPermission_NotPresent) 374 num_invalid++; 375 } 376 assert(num_rw <= 1); 377 378 // This if case is meant to capture what happens in a Broadcast/Snoop 379 // protocol where the block does not exist in the cache hierarchy. You 380 // only want to read from the Backing_Store memory if there is no copy in 381 // the cache hierarchy, otherwise you want to try to read the RO or RW 382 // copies existing in the cache hierarchy (covered by the else statement). 383 // The reason is because the Backing_Store memory could easily be stale, if 384 // there are copies floating around the cache hierarchy, so you want to read 385 // it only if it's not in the cache hierarchy at all. 386 if (num_invalid == (num_controllers - 1) && num_backing_store == 1) { 387 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 388 for (unsigned int i = 0; i < num_controllers; ++i) { 389 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 390 if (access_perm == AccessPermission_Backing_Store) { 391 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 392 return true; 393 } 394 } 395 } else if (num_ro > 0 || num_rw == 1) { 396 // In Broadcast/Snoop protocols, this covers if you know the block 397 // exists somewhere in the caching hierarchy, then you want to read any 398 // valid RO or RW block. In directory protocols, same thing, you want 399 // to read any valid readable copy of the block. 400 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 401 num_busy, num_ro, num_rw); 402 // In this loop, we try to figure which controller has a read only or 403 // a read write copy of the given address. Any valid copy would suffice 404 // for a functional read. 405 for (unsigned int i = 0;i < num_controllers;++i) { 406 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 407 if (access_perm == AccessPermission_Read_Only || 408 access_perm == AccessPermission_Read_Write) { 409 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 410 return true; 411 } 412 } 413 } 414 415 return false; 416} 417 418// The function searches through all the buffers that exist in different 419// cache, directory and memory controllers, and in the network components 420// and writes the data portion of those that hold the address specified 421// in the packet. 422bool 423RubySystem::functionalWrite(PacketPtr pkt) 424{ 425 Address addr(pkt->getAddr()); 426 Address line_addr = line_address(addr); 427 AccessPermission access_perm = AccessPermission_NotPresent; 428 int num_controllers = m_abs_cntrl_vec.size(); 429 430 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 431 432 uint32_t M5_VAR_USED num_functional_writes = 0; 433 434 for (unsigned int i = 0; i < num_controllers;++i) { 435 num_functional_writes += 436 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); 437 438 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 439 if (access_perm != AccessPermission_Invalid && 440 access_perm != AccessPermission_NotPresent) { 441 num_functional_writes += 442 m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt); 443 } 444 } 445 446 num_functional_writes += m_network->functionalWrite(pkt); 447 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); 448 449 return true; 450} 451 452#ifdef CHECK_COHERENCE 453// This code will check for cases if the given cache block is exclusive in 454// one node and shared in another-- a coherence violation 455// 456// To use, the SLICC specification must call sequencer.checkCoherence(address) 457// when the controller changes to a state with new permissions. Do this 458// in setState. The SLICC spec must also define methods "isBlockShared" 459// and "isBlockExclusive" that are specific to that protocol 460// 461void 462RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 463{ 464#if 0 465 NodeID exclusive = -1; 466 bool sharedDetected = false; 467 NodeID lastShared = -1; 468 469 for (int i = 0; i < m_chip_vector.size(); i++) { 470 if (m_chip_vector[i]->isBlockExclusive(addr)) { 471 if (exclusive != -1) { 472 // coherence violation 473 WARN_EXPR(exclusive); 474 WARN_EXPR(m_chip_vector[i]->getID()); 475 WARN_EXPR(addr); 476 WARN_EXPR(getTime()); 477 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 478 } else if (sharedDetected) { 479 WARN_EXPR(lastShared); 480 WARN_EXPR(m_chip_vector[i]->getID()); 481 WARN_EXPR(addr); 482 WARN_EXPR(getTime()); 483 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 484 } else { 485 exclusive = m_chip_vector[i]->getID(); 486 } 487 } else if (m_chip_vector[i]->isBlockShared(addr)) { 488 sharedDetected = true; 489 lastShared = m_chip_vector[i]->getID(); 490 491 if (exclusive != -1) { 492 WARN_EXPR(lastShared); 493 WARN_EXPR(exclusive); 494 WARN_EXPR(addr); 495 WARN_EXPR(getTime()); 496 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 497 } 498 } 499 } 500#endif 501} 502#endif 503 504RubySystem * 505RubySystemParams::create() 506{ 507 return new RubySystem(this); 508} 509