RubySystem.cc revision 10991
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33#include <list>
34
35#include "base/intmath.hh"
36#include "base/statistics.hh"
37#include "debug/RubyCacheTrace.hh"
38#include "debug/RubySystem.hh"
39#include "mem/ruby/common/Address.hh"
40#include "mem/ruby/network/Network.hh"
41#include "mem/ruby/system/System.hh"
42#include "mem/simple_mem.hh"
43#include "sim/eventq.hh"
44#include "sim/simulate.hh"
45
46using namespace std;
47
48int RubySystem::m_random_seed;
49bool RubySystem::m_randomization;
50uint32_t RubySystem::m_block_size_bytes;
51uint32_t RubySystem::m_block_size_bits;
52uint32_t RubySystem::m_memory_size_bits;
53bool RubySystem::m_warmup_enabled = false;
54// To look forward to allowing multiple RubySystem instances, track the number
55// of RubySystems that need to be warmed up on checkpoint restore.
56unsigned RubySystem::m_systems_to_warmup = 0;
57bool RubySystem::m_cooldown_enabled = false;
58
59RubySystem::RubySystem(const Params *p)
60    : ClockedObject(p), m_access_backing_store(p->access_backing_store),
61      m_cache_recorder(NULL)
62{
63    m_random_seed = p->random_seed;
64    srandom(m_random_seed);
65    m_randomization = p->randomization;
66
67    m_block_size_bytes = p->block_size_bytes;
68    assert(isPowerOf2(m_block_size_bytes));
69    m_block_size_bits = floorLog2(m_block_size_bytes);
70    m_memory_size_bits = p->memory_size_bits;
71
72    // Resize to the size of different machine types
73    m_abstract_controls.resize(MachineType_NUM);
74
75    // Collate the statistics before they are printed.
76    Stats::registerDumpCallback(new RubyStatsCallback(this));
77    // Create the profiler
78    m_profiler = new Profiler(p, this);
79    m_phys_mem = p->phys_mem;
80}
81
82void
83RubySystem::registerNetwork(Network* network_ptr)
84{
85    m_network = network_ptr;
86}
87
88void
89RubySystem::registerAbstractController(AbstractController* cntrl)
90{
91    m_abs_cntrl_vec.push_back(cntrl);
92
93    MachineID id = cntrl->getMachineID();
94    m_abstract_controls[id.getType()][id.getNum()] = cntrl;
95}
96
97RubySystem::~RubySystem()
98{
99    delete m_network;
100    delete m_profiler;
101}
102
103void
104RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
105                              uint64 cache_trace_size,
106                              uint64 block_size_bytes)
107{
108    vector<Sequencer*> sequencer_map;
109    Sequencer* sequencer_ptr = NULL;
110
111    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
112        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
113        if (sequencer_ptr == NULL) {
114            sequencer_ptr = sequencer_map[cntrl];
115        }
116    }
117
118    assert(sequencer_ptr != NULL);
119
120    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
121        if (sequencer_map[cntrl] == NULL) {
122            sequencer_map[cntrl] = sequencer_ptr;
123        }
124    }
125
126    // Remove the old CacheRecorder if it's still hanging about.
127    if (m_cache_recorder != NULL) {
128        delete m_cache_recorder;
129    }
130
131    // Create the CacheRecorder and record the cache trace
132    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
133                                         sequencer_map, block_size_bytes);
134}
135
136void
137RubySystem::memWriteback()
138{
139    m_cooldown_enabled = true;
140
141    // Make the trace so we know what to write back.
142    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
143    makeCacheRecorder(NULL, 0, getBlockSizeBytes());
144    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
145        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
146    }
147    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
148
149    // save the current tick value
150    Tick curtick_original = curTick();
151    DPRINTF(RubyCacheTrace, "Recording current tick %ld\n", curtick_original);
152
153    // Deschedule all prior events on the event queue, but record the tick they
154    // were scheduled at so they can be restored correctly later.
155    list<pair<Event*, Tick> > original_events;
156    while (!eventq->empty()) {
157        Event *curr_head = eventq->getHead();
158        if (curr_head->isAutoDelete()) {
159            DPRINTF(RubyCacheTrace, "Event %s auto-deletes when descheduled,"
160                    " not recording\n", curr_head->name());
161        } else {
162            original_events.push_back(make_pair(curr_head, curr_head->when()));
163        }
164        eventq->deschedule(curr_head);
165    }
166
167    // Schedule an event to start cache cooldown
168    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
169    enqueueRubyEvent(curTick());
170    simulate();
171    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
172
173    // Deschedule any events left on the event queue.
174    while (!eventq->empty()) {
175        eventq->deschedule(eventq->getHead());
176    }
177
178    // Restore curTick
179    setCurTick(curtick_original);
180
181    // Restore all events that were originally on the event queue.  This is
182    // done after setting curTick back to its original value so that events do
183    // not seem to be scheduled in the past.
184    while (!original_events.empty()) {
185        pair<Event*, Tick> event = original_events.back();
186        eventq->schedule(event.first, event.second);
187        original_events.pop_back();
188    }
189
190    // No longer flushing back to memory.
191    m_cooldown_enabled = false;
192
193    // There are several issues with continuing simulation after calling
194    // memWriteback() at the moment, that stem from taking events off the
195    // queue, simulating again, and then putting them back on, whilst
196    // pretending that no time has passed.  One is that some events will have
197    // been deleted, so can't be put back.  Another is that any object
198    // recording the tick something happens may end up storing a tick in the
199    // future.  A simple warning here alerts the user that things may not work
200    // as expected.
201    warn_once("Ruby memory writeback is experimental.  Continuing simulation "
202              "afterwards may not always work as intended.");
203
204    // Keep the cache recorder around so that we can dump the trace if a
205    // checkpoint is immediately taken.
206}
207
208void
209RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
210                                 uint64 uncompressed_trace_size)
211{
212    // Create the checkpoint file for the memory
213    string thefile = CheckpointIn::dir() + "/" + filename.c_str();
214
215    int fd = creat(thefile.c_str(), 0664);
216    if (fd < 0) {
217        perror("creat");
218        fatal("Can't open memory trace file '%s'\n", filename);
219    }
220
221    gzFile compressedMemory = gzdopen(fd, "wb");
222    if (compressedMemory == NULL)
223        fatal("Insufficient memory to allocate compression state for %s\n",
224              filename);
225
226    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
227        uncompressed_trace_size) {
228        fatal("Write failed on memory trace file '%s'\n", filename);
229    }
230
231    if (gzclose(compressedMemory)) {
232        fatal("Close failed on memory trace file '%s'\n", filename);
233    }
234    delete[] raw_data;
235}
236
237void
238RubySystem::serializeOld(CheckpointOut &cp)
239{
240    // Store the cache-block size, so we are able to restore on systems with a
241    // different cache-block size. CacheRecorder depends on the correct
242    // cache-block size upon unserializing.
243    uint64 block_size_bytes = getBlockSizeBytes();
244    SERIALIZE_SCALAR(block_size_bytes);
245
246    // Check that there's a valid trace to use.  If not, then memory won't be
247    // up-to-date and the simulation will probably fail when restoring from the
248    // checkpoint.
249    if (m_cache_recorder == NULL) {
250        fatal("Call memWriteback() before serialize() to create ruby trace");
251    }
252
253    // Aggregate the trace entries together into a single array
254    uint8_t *raw_data = new uint8_t[4096];
255    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
256                                                                 4096);
257    string cache_trace_file = name() + ".cache.gz";
258    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
259
260    SERIALIZE_SCALAR(cache_trace_file);
261    SERIALIZE_SCALAR(cache_trace_size);
262
263    // Now finished with the cache recorder.
264    delete m_cache_recorder;
265    m_cache_recorder = NULL;
266}
267
268void
269RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
270                                uint64& uncompressed_trace_size)
271{
272    // Read the trace file
273    gzFile compressedTrace;
274
275    // trace file
276    int fd = open(filename.c_str(), O_RDONLY);
277    if (fd < 0) {
278        perror("open");
279        fatal("Unable to open trace file %s", filename);
280    }
281
282    compressedTrace = gzdopen(fd, "rb");
283    if (compressedTrace == NULL) {
284        fatal("Insufficient memory to allocate compression state for %s\n",
285              filename);
286    }
287
288    raw_data = new uint8_t[uncompressed_trace_size];
289    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
290            uncompressed_trace_size) {
291        fatal("Unable to read complete trace from file %s\n", filename);
292    }
293
294    if (gzclose(compressedTrace)) {
295        fatal("Failed to close cache trace file '%s'\n", filename);
296    }
297}
298
299void
300RubySystem::unserialize(CheckpointIn &cp)
301{
302    uint8_t *uncompressed_trace = NULL;
303
304    // This value should be set to the checkpoint-system's block-size.
305    // Optional, as checkpoints without it can be run if the
306    // checkpoint-system's block-size == current block-size.
307    uint64 block_size_bytes = getBlockSizeBytes();
308    UNSERIALIZE_OPT_SCALAR(block_size_bytes);
309
310    string cache_trace_file;
311    uint64 cache_trace_size = 0;
312
313    UNSERIALIZE_SCALAR(cache_trace_file);
314    UNSERIALIZE_SCALAR(cache_trace_size);
315    cache_trace_file = cp.cptDir + "/" + cache_trace_file;
316
317    readCompressedTrace(cache_trace_file, uncompressed_trace,
318                        cache_trace_size);
319    m_warmup_enabled = true;
320    m_systems_to_warmup++;
321
322    // Create the cache recorder that will hang around until startup.
323    makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes);
324}
325
326void
327RubySystem::startup()
328{
329
330    // Ruby restores state from a checkpoint by resetting the clock to 0 and
331    // playing the requests that can possibly re-generate the cache state.
332    // The clock value is set to the actual checkpointed value once all the
333    // requests have been executed.
334    //
335    // This way of restoring state is pretty finicky. For example, if a
336    // Ruby component reads time before the state has been restored, it would
337    // cache this value and hence its clock would not be reset to 0, when
338    // Ruby resets the global clock. This can potentially result in a
339    // deadlock.
340    //
341    // The solution is that no Ruby component should read time before the
342    // simulation starts. And then one also needs to hope that the time
343    // Ruby finishes restoring the state is less than the time when the
344    // state was checkpointed.
345
346    if (m_warmup_enabled) {
347        DPRINTF(RubyCacheTrace, "Starting ruby cache warmup\n");
348        // save the current tick value
349        Tick curtick_original = curTick();
350        // save the event queue head
351        Event* eventq_head = eventq->replaceHead(NULL);
352        // set curTick to 0 and reset Ruby System's clock
353        setCurTick(0);
354        resetClock();
355
356        // Schedule an event to start cache warmup
357        enqueueRubyEvent(curTick());
358        simulate();
359
360        delete m_cache_recorder;
361        m_cache_recorder = NULL;
362        m_systems_to_warmup--;
363        if (m_systems_to_warmup == 0) {
364            m_warmup_enabled = false;
365        }
366
367        // Restore eventq head
368        eventq_head = eventq->replaceHead(eventq_head);
369        // Restore curTick and Ruby System's clock
370        setCurTick(curtick_original);
371        resetClock();
372    }
373
374    resetStats();
375}
376
377void
378RubySystem::RubyEvent::process()
379{
380    if (RubySystem::getWarmupEnabled()) {
381        m_ruby_system->m_cache_recorder->enqueueNextFetchRequest();
382    } else if (RubySystem::getCooldownEnabled()) {
383        m_ruby_system->m_cache_recorder->enqueueNextFlushRequest();
384    }
385}
386
387void
388RubySystem::resetStats()
389{
390    m_start_cycle = curCycle();
391}
392
393bool
394RubySystem::functionalRead(PacketPtr pkt)
395{
396    Address address(pkt->getAddr());
397    Address line_address(address);
398    line_address.makeLineAddress();
399
400    AccessPermission access_perm = AccessPermission_NotPresent;
401    int num_controllers = m_abs_cntrl_vec.size();
402
403    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
404
405    unsigned int num_ro = 0;
406    unsigned int num_rw = 0;
407    unsigned int num_busy = 0;
408    unsigned int num_backing_store = 0;
409    unsigned int num_invalid = 0;
410
411    // In this loop we count the number of controllers that have the given
412    // address in read only, read write and busy states.
413    for (unsigned int i = 0; i < num_controllers; ++i) {
414        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
415        if (access_perm == AccessPermission_Read_Only)
416            num_ro++;
417        else if (access_perm == AccessPermission_Read_Write)
418            num_rw++;
419        else if (access_perm == AccessPermission_Busy)
420            num_busy++;
421        else if (access_perm == AccessPermission_Backing_Store)
422            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
423            // to represent blocks in memory *for Broadcast/Snooping protocols*,
424            // where memory has no idea whether it has an exclusive copy of data
425            // or not.
426            num_backing_store++;
427        else if (access_perm == AccessPermission_Invalid ||
428                 access_perm == AccessPermission_NotPresent)
429            num_invalid++;
430    }
431    assert(num_rw <= 1);
432
433    // This if case is meant to capture what happens in a Broadcast/Snoop
434    // protocol where the block does not exist in the cache hierarchy. You
435    // only want to read from the Backing_Store memory if there is no copy in
436    // the cache hierarchy, otherwise you want to try to read the RO or RW
437    // copies existing in the cache hierarchy (covered by the else statement).
438    // The reason is because the Backing_Store memory could easily be stale, if
439    // there are copies floating around the cache hierarchy, so you want to read
440    // it only if it's not in the cache hierarchy at all.
441    if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
442        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
443        for (unsigned int i = 0; i < num_controllers; ++i) {
444            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
445            if (access_perm == AccessPermission_Backing_Store) {
446                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
447                return true;
448            }
449        }
450    } else if (num_ro > 0 || num_rw == 1) {
451        // In Broadcast/Snoop protocols, this covers if you know the block
452        // exists somewhere in the caching hierarchy, then you want to read any
453        // valid RO or RW block.  In directory protocols, same thing, you want
454        // to read any valid readable copy of the block.
455        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
456                num_busy, num_ro, num_rw);
457        // In this loop, we try to figure which controller has a read only or
458        // a read write copy of the given address. Any valid copy would suffice
459        // for a functional read.
460        for (unsigned int i = 0;i < num_controllers;++i) {
461            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
462            if (access_perm == AccessPermission_Read_Only ||
463                access_perm == AccessPermission_Read_Write) {
464                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
465                return true;
466            }
467        }
468    }
469
470    return false;
471}
472
473// The function searches through all the buffers that exist in different
474// cache, directory and memory controllers, and in the network components
475// and writes the data portion of those that hold the address specified
476// in the packet.
477bool
478RubySystem::functionalWrite(PacketPtr pkt)
479{
480    Address addr(pkt->getAddr());
481    Address line_addr = line_address(addr);
482    AccessPermission access_perm = AccessPermission_NotPresent;
483    int num_controllers = m_abs_cntrl_vec.size();
484
485    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
486
487    uint32_t M5_VAR_USED num_functional_writes = 0;
488
489    for (unsigned int i = 0; i < num_controllers;++i) {
490        num_functional_writes +=
491            m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
492
493        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
494        if (access_perm != AccessPermission_Invalid &&
495            access_perm != AccessPermission_NotPresent) {
496            num_functional_writes +=
497                m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
498        }
499    }
500
501    num_functional_writes += m_network->functionalWrite(pkt);
502    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
503
504    return true;
505}
506
507#ifdef CHECK_COHERENCE
508// This code will check for cases if the given cache block is exclusive in
509// one node and shared in another-- a coherence violation
510//
511// To use, the SLICC specification must call sequencer.checkCoherence(address)
512// when the controller changes to a state with new permissions.  Do this
513// in setState.  The SLICC spec must also define methods "isBlockShared"
514// and "isBlockExclusive" that are specific to that protocol
515//
516void
517RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
518{
519#if 0
520    NodeID exclusive = -1;
521    bool sharedDetected = false;
522    NodeID lastShared = -1;
523
524    for (int i = 0; i < m_chip_vector.size(); i++) {
525        if (m_chip_vector[i]->isBlockExclusive(addr)) {
526            if (exclusive != -1) {
527                // coherence violation
528                WARN_EXPR(exclusive);
529                WARN_EXPR(m_chip_vector[i]->getID());
530                WARN_EXPR(addr);
531                WARN_EXPR(getTime());
532                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
533            } else if (sharedDetected) {
534                WARN_EXPR(lastShared);
535                WARN_EXPR(m_chip_vector[i]->getID());
536                WARN_EXPR(addr);
537                WARN_EXPR(getTime());
538                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
539            } else {
540                exclusive = m_chip_vector[i]->getID();
541            }
542        } else if (m_chip_vector[i]->isBlockShared(addr)) {
543            sharedDetected = true;
544            lastShared = m_chip_vector[i]->getID();
545
546            if (exclusive != -1) {
547                WARN_EXPR(lastShared);
548                WARN_EXPR(exclusive);
549                WARN_EXPR(addr);
550                WARN_EXPR(getTime());
551                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
552            }
553        }
554    }
555#endif
556}
557#endif
558
559RubySystem *
560RubySystemParams::create()
561{
562    return new RubySystem(this);
563}
564