RubySystem.cc revision 10524
1/* 2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <fcntl.h> 30#include <zlib.h> 31 32#include <cstdio> 33 34#include "base/intmath.hh" 35#include "base/statistics.hh" 36#include "debug/RubyCacheTrace.hh" 37#include "debug/RubySystem.hh" 38#include "mem/ruby/common/Address.hh" 39#include "mem/ruby/network/Network.hh" 40#include "mem/ruby/system/System.hh" 41#include "sim/eventq.hh" 42#include "sim/simulate.hh" 43 44using namespace std; 45 46int RubySystem::m_random_seed; 47bool RubySystem::m_randomization; 48uint32_t RubySystem::m_block_size_bytes; 49uint32_t RubySystem::m_block_size_bits; 50uint32_t RubySystem::m_memory_size_bits; 51 52RubySystem::RubySystem(const Params *p) 53 : ClockedObject(p) 54{ 55 if (g_system_ptr != NULL) 56 fatal("Only one RubySystem object currently allowed.\n"); 57 58 m_random_seed = p->random_seed; 59 srandom(m_random_seed); 60 m_randomization = p->randomization; 61 62 m_block_size_bytes = p->block_size_bytes; 63 assert(isPowerOf2(m_block_size_bytes)); 64 m_block_size_bits = floorLog2(m_block_size_bytes); 65 m_memory_size_bits = p->memory_size_bits; 66 67 m_warmup_enabled = false; 68 m_cooldown_enabled = false; 69 70 // Setup the global variables used in Ruby 71 g_system_ptr = this; 72 73 // Resize to the size of different machine types 74 g_abs_controls.resize(MachineType_NUM); 75 76 // Collate the statistics before they are printed. 77 Stats::registerDumpCallback(new RubyStatsCallback(this)); 78 // Create the profiler 79 m_profiler = new Profiler(p); 80} 81 82void 83RubySystem::registerNetwork(Network* network_ptr) 84{ 85 m_network = network_ptr; 86} 87 88void 89RubySystem::registerAbstractController(AbstractController* cntrl) 90{ 91 m_abs_cntrl_vec.push_back(cntrl); 92 93 MachineID id = cntrl->getMachineID(); 94 g_abs_controls[id.getType()][id.getNum()] = cntrl; 95} 96 97RubySystem::~RubySystem() 98{ 99 delete m_network; 100 delete m_profiler; 101} 102 103void 104RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename, 105 uint64 uncompressed_trace_size) 106{ 107 // Create the checkpoint file for the memory 108 string thefile = Checkpoint::dir() + "/" + filename.c_str(); 109 110 int fd = creat(thefile.c_str(), 0664); 111 if (fd < 0) { 112 perror("creat"); 113 fatal("Can't open memory trace file '%s'\n", filename); 114 } 115 116 gzFile compressedMemory = gzdopen(fd, "wb"); 117 if (compressedMemory == NULL) 118 fatal("Insufficient memory to allocate compression state for %s\n", 119 filename); 120 121 if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) != 122 uncompressed_trace_size) { 123 fatal("Write failed on memory trace file '%s'\n", filename); 124 } 125 126 if (gzclose(compressedMemory)) { 127 fatal("Close failed on memory trace file '%s'\n", filename); 128 } 129 delete[] raw_data; 130} 131 132void 133RubySystem::serialize(std::ostream &os) 134{ 135 m_cooldown_enabled = true; 136 vector<Sequencer*> sequencer_map; 137 Sequencer* sequencer_ptr = NULL; 138 139 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 140 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 141 if (sequencer_ptr == NULL) { 142 sequencer_ptr = sequencer_map[cntrl]; 143 } 144 } 145 146 assert(sequencer_ptr != NULL); 147 148 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 149 if (sequencer_map[cntrl] == NULL) { 150 sequencer_map[cntrl] = sequencer_ptr; 151 } 152 } 153 154 // Store the cache-block size, so we are able to restore on systems with a 155 // different cache-block size. CacheRecorder depends on the correct 156 // cache-block size upon unserializing. 157 uint64 block_size_bytes = getBlockSizeBytes(); 158 SERIALIZE_SCALAR(block_size_bytes); 159 160 DPRINTF(RubyCacheTrace, "Recording Cache Trace\n"); 161 // Create the CacheRecorder and record the cache trace 162 m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map, 163 block_size_bytes); 164 165 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 166 m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder); 167 } 168 169 DPRINTF(RubyCacheTrace, "Cache Trace Complete\n"); 170 // save the current tick value 171 Tick curtick_original = curTick(); 172 // save the event queue head 173 Event* eventq_head = eventq->replaceHead(NULL); 174 DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n", 175 curtick_original); 176 177 // Schedule an event to start cache cooldown 178 DPRINTF(RubyCacheTrace, "Starting cache flush\n"); 179 enqueueRubyEvent(curTick()); 180 simulate(); 181 DPRINTF(RubyCacheTrace, "Cache flush complete\n"); 182 183 // Restore eventq head 184 eventq_head = eventq->replaceHead(eventq_head); 185 // Restore curTick 186 setCurTick(curtick_original); 187 188 // Aggergate the trace entries together into a single array 189 uint8_t *raw_data = new uint8_t[4096]; 190 uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data, 191 4096); 192 string cache_trace_file = name() + ".cache.gz"; 193 writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size); 194 195 SERIALIZE_SCALAR(cache_trace_file); 196 SERIALIZE_SCALAR(cache_trace_size); 197 198 m_cooldown_enabled = false; 199} 200 201void 202RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data, 203 uint64& uncompressed_trace_size) 204{ 205 // Read the trace file 206 gzFile compressedTrace; 207 208 // trace file 209 int fd = open(filename.c_str(), O_RDONLY); 210 if (fd < 0) { 211 perror("open"); 212 fatal("Unable to open trace file %s", filename); 213 } 214 215 compressedTrace = gzdopen(fd, "rb"); 216 if (compressedTrace == NULL) { 217 fatal("Insufficient memory to allocate compression state for %s\n", 218 filename); 219 } 220 221 raw_data = new uint8_t[uncompressed_trace_size]; 222 if (gzread(compressedTrace, raw_data, uncompressed_trace_size) < 223 uncompressed_trace_size) { 224 fatal("Unable to read complete trace from file %s\n", filename); 225 } 226 227 if (gzclose(compressedTrace)) { 228 fatal("Failed to close cache trace file '%s'\n", filename); 229 } 230} 231 232void 233RubySystem::unserialize(Checkpoint *cp, const string §ion) 234{ 235 uint8_t *uncompressed_trace = NULL; 236 237 // This value should be set to the checkpoint-system's block-size. 238 // Optional, as checkpoints without it can be run if the 239 // checkpoint-system's block-size == current block-size. 240 uint64 block_size_bytes = getBlockSizeBytes(); 241 UNSERIALIZE_OPT_SCALAR(block_size_bytes); 242 243 string cache_trace_file; 244 uint64 cache_trace_size = 0; 245 246 UNSERIALIZE_SCALAR(cache_trace_file); 247 UNSERIALIZE_SCALAR(cache_trace_size); 248 cache_trace_file = cp->cptDir + "/" + cache_trace_file; 249 250 readCompressedTrace(cache_trace_file, uncompressed_trace, 251 cache_trace_size); 252 m_warmup_enabled = true; 253 254 vector<Sequencer*> sequencer_map; 255 Sequencer* t = NULL; 256 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 257 sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer()); 258 if (t == NULL) t = sequencer_map[cntrl]; 259 } 260 261 assert(t != NULL); 262 263 for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { 264 if (sequencer_map[cntrl] == NULL) { 265 sequencer_map[cntrl] = t; 266 } 267 } 268 269 m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, 270 sequencer_map, block_size_bytes); 271} 272 273void 274RubySystem::startup() 275{ 276 277 // Ruby restores state from a checkpoint by resetting the clock to 0 and 278 // playing the requests that can possibly re-generate the cache state. 279 // The clock value is set to the actual checkpointed value once all the 280 // requests have been executed. 281 // 282 // This way of restoring state is pretty finicky. For example, if a 283 // Ruby component reads time before the state has been restored, it would 284 // cache this value and hence its clock would not be reset to 0, when 285 // Ruby resets the global clock. This can potentially result in a 286 // deadlock. 287 // 288 // The solution is that no Ruby component should read time before the 289 // simulation starts. And then one also needs to hope that the time 290 // Ruby finishes restoring the state is less than the time when the 291 // state was checkpointed. 292 293 if (m_warmup_enabled) { 294 // save the current tick value 295 Tick curtick_original = curTick(); 296 // save the event queue head 297 Event* eventq_head = eventq->replaceHead(NULL); 298 // set curTick to 0 and reset Ruby System's clock 299 setCurTick(0); 300 resetClock(); 301 302 // Schedule an event to start cache warmup 303 enqueueRubyEvent(curTick()); 304 simulate(); 305 306 delete m_cache_recorder; 307 m_cache_recorder = NULL; 308 m_warmup_enabled = false; 309 310 // Restore eventq head 311 eventq_head = eventq->replaceHead(eventq_head); 312 // Restore curTick and Ruby System's clock 313 setCurTick(curtick_original); 314 resetClock(); 315 } 316 317 resetStats(); 318} 319 320void 321RubySystem::RubyEvent::process() 322{ 323 if (ruby_system->m_warmup_enabled) { 324 ruby_system->m_cache_recorder->enqueueNextFetchRequest(); 325 } else if (ruby_system->m_cooldown_enabled) { 326 ruby_system->m_cache_recorder->enqueueNextFlushRequest(); 327 } 328} 329 330void 331RubySystem::resetStats() 332{ 333 g_ruby_start = curCycle(); 334} 335 336bool 337RubySystem::functionalRead(PacketPtr pkt) 338{ 339 Address address(pkt->getAddr()); 340 Address line_address(address); 341 line_address.makeLineAddress(); 342 343 AccessPermission access_perm = AccessPermission_NotPresent; 344 int num_controllers = m_abs_cntrl_vec.size(); 345 346 DPRINTF(RubySystem, "Functional Read request for %s\n",address); 347 348 unsigned int num_ro = 0; 349 unsigned int num_rw = 0; 350 unsigned int num_busy = 0; 351 unsigned int num_backing_store = 0; 352 unsigned int num_invalid = 0; 353 354 // In this loop we count the number of controllers that have the given 355 // address in read only, read write and busy states. 356 for (unsigned int i = 0; i < num_controllers; ++i) { 357 access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address); 358 if (access_perm == AccessPermission_Read_Only) 359 num_ro++; 360 else if (access_perm == AccessPermission_Read_Write) 361 num_rw++; 362 else if (access_perm == AccessPermission_Busy) 363 num_busy++; 364 else if (access_perm == AccessPermission_Backing_Store) 365 // See RubySlicc_Exports.sm for details, but Backing_Store is meant 366 // to represent blocks in memory *for Broadcast/Snooping protocols*, 367 // where memory has no idea whether it has an exclusive copy of data 368 // or not. 369 num_backing_store++; 370 else if (access_perm == AccessPermission_Invalid || 371 access_perm == AccessPermission_NotPresent) 372 num_invalid++; 373 } 374 assert(num_rw <= 1); 375 376 // This if case is meant to capture what happens in a Broadcast/Snoop 377 // protocol where the block does not exist in the cache hierarchy. You 378 // only want to read from the Backing_Store memory if there is no copy in 379 // the cache hierarchy, otherwise you want to try to read the RO or RW 380 // copies existing in the cache hierarchy (covered by the else statement). 381 // The reason is because the Backing_Store memory could easily be stale, if 382 // there are copies floating around the cache hierarchy, so you want to read 383 // it only if it's not in the cache hierarchy at all. 384 if (num_invalid == (num_controllers - 1) && num_backing_store == 1) { 385 DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n"); 386 for (unsigned int i = 0; i < num_controllers; ++i) { 387 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 388 if (access_perm == AccessPermission_Backing_Store) { 389 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 390 return true; 391 } 392 } 393 } else if (num_ro > 0 || num_rw == 1) { 394 // In Broadcast/Snoop protocols, this covers if you know the block 395 // exists somewhere in the caching hierarchy, then you want to read any 396 // valid RO or RW block. In directory protocols, same thing, you want 397 // to read any valid readable copy of the block. 398 DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n", 399 num_busy, num_ro, num_rw); 400 // In this loop, we try to figure which controller has a read only or 401 // a read write copy of the given address. Any valid copy would suffice 402 // for a functional read. 403 for (unsigned int i = 0;i < num_controllers;++i) { 404 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address); 405 if (access_perm == AccessPermission_Read_Only || 406 access_perm == AccessPermission_Read_Write) { 407 m_abs_cntrl_vec[i]->functionalRead(line_address, pkt); 408 return true; 409 } 410 } 411 } 412 413 return false; 414} 415 416// The function searches through all the buffers that exist in different 417// cache, directory and memory controllers, and in the network components 418// and writes the data portion of those that hold the address specified 419// in the packet. 420bool 421RubySystem::functionalWrite(PacketPtr pkt) 422{ 423 Address addr(pkt->getAddr()); 424 Address line_addr = line_address(addr); 425 AccessPermission access_perm = AccessPermission_NotPresent; 426 int num_controllers = m_abs_cntrl_vec.size(); 427 428 DPRINTF(RubySystem, "Functional Write request for %s\n",addr); 429 430 uint32_t M5_VAR_USED num_functional_writes = 0; 431 432 for (unsigned int i = 0; i < num_controllers;++i) { 433 num_functional_writes += 434 m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt); 435 436 access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr); 437 if (access_perm != AccessPermission_Invalid && 438 access_perm != AccessPermission_NotPresent) { 439 num_functional_writes += 440 m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt); 441 } 442 } 443 444 num_functional_writes += m_network->functionalWrite(pkt); 445 DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes); 446 447 return true; 448} 449 450#ifdef CHECK_COHERENCE 451// This code will check for cases if the given cache block is exclusive in 452// one node and shared in another-- a coherence violation 453// 454// To use, the SLICC specification must call sequencer.checkCoherence(address) 455// when the controller changes to a state with new permissions. Do this 456// in setState. The SLICC spec must also define methods "isBlockShared" 457// and "isBlockExclusive" that are specific to that protocol 458// 459void 460RubySystem::checkGlobalCoherenceInvariant(const Address& addr) 461{ 462#if 0 463 NodeID exclusive = -1; 464 bool sharedDetected = false; 465 NodeID lastShared = -1; 466 467 for (int i = 0; i < m_chip_vector.size(); i++) { 468 if (m_chip_vector[i]->isBlockExclusive(addr)) { 469 if (exclusive != -1) { 470 // coherence violation 471 WARN_EXPR(exclusive); 472 WARN_EXPR(m_chip_vector[i]->getID()); 473 WARN_EXPR(addr); 474 WARN_EXPR(getTime()); 475 ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips"); 476 } else if (sharedDetected) { 477 WARN_EXPR(lastShared); 478 WARN_EXPR(m_chip_vector[i]->getID()); 479 WARN_EXPR(addr); 480 WARN_EXPR(getTime()); 481 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 482 } else { 483 exclusive = m_chip_vector[i]->getID(); 484 } 485 } else if (m_chip_vector[i]->isBlockShared(addr)) { 486 sharedDetected = true; 487 lastShared = m_chip_vector[i]->getID(); 488 489 if (exclusive != -1) { 490 WARN_EXPR(lastShared); 491 WARN_EXPR(exclusive); 492 WARN_EXPR(addr); 493 WARN_EXPR(getTime()); 494 ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared"); 495 } 496 } 497 } 498#endif 499} 500#endif 501 502RubySystem * 503RubySystemParams::create() 504{ 505 return new RubySystem(this); 506} 507