RubySystem.cc revision 10706
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33
34#include "base/intmath.hh"
35#include "base/statistics.hh"
36#include "debug/RubyCacheTrace.hh"
37#include "debug/RubySystem.hh"
38#include "mem/ruby/common/Address.hh"
39#include "mem/ruby/network/Network.hh"
40#include "mem/ruby/system/System.hh"
41#include "mem/simple_mem.hh"
42#include "sim/eventq.hh"
43#include "sim/simulate.hh"
44
45using namespace std;
46
47int RubySystem::m_random_seed;
48bool RubySystem::m_randomization;
49uint32_t RubySystem::m_block_size_bytes;
50uint32_t RubySystem::m_block_size_bits;
51uint32_t RubySystem::m_memory_size_bits;
52
53RubySystem::RubySystem(const Params *p)
54    : ClockedObject(p), m_access_backing_store(p->access_backing_store)
55{
56    if (g_system_ptr != NULL)
57        fatal("Only one RubySystem object currently allowed.\n");
58
59    m_random_seed = p->random_seed;
60    srandom(m_random_seed);
61    m_randomization = p->randomization;
62
63    m_block_size_bytes = p->block_size_bytes;
64    assert(isPowerOf2(m_block_size_bytes));
65    m_block_size_bits = floorLog2(m_block_size_bytes);
66    m_memory_size_bits = p->memory_size_bits;
67
68    m_warmup_enabled = false;
69    m_cooldown_enabled = false;
70
71    // Setup the global variables used in Ruby
72    g_system_ptr = this;
73
74    // Resize to the size of different machine types
75    g_abs_controls.resize(MachineType_NUM);
76
77    // Collate the statistics before they are printed.
78    Stats::registerDumpCallback(new RubyStatsCallback(this));
79    // Create the profiler
80    m_profiler = new Profiler(p);
81    m_phys_mem = p->phys_mem;
82}
83
84void
85RubySystem::registerNetwork(Network* network_ptr)
86{
87  m_network = network_ptr;
88}
89
90void
91RubySystem::registerAbstractController(AbstractController* cntrl)
92{
93  m_abs_cntrl_vec.push_back(cntrl);
94
95  MachineID id = cntrl->getMachineID();
96  g_abs_controls[id.getType()][id.getNum()] = cntrl;
97}
98
99RubySystem::~RubySystem()
100{
101    delete m_network;
102    delete m_profiler;
103}
104
105void
106RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
107                                 uint64 uncompressed_trace_size)
108{
109    // Create the checkpoint file for the memory
110    string thefile = Checkpoint::dir() + "/" + filename.c_str();
111
112    int fd = creat(thefile.c_str(), 0664);
113    if (fd < 0) {
114        perror("creat");
115        fatal("Can't open memory trace file '%s'\n", filename);
116    }
117
118    gzFile compressedMemory = gzdopen(fd, "wb");
119    if (compressedMemory == NULL)
120        fatal("Insufficient memory to allocate compression state for %s\n",
121              filename);
122
123    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
124        uncompressed_trace_size) {
125        fatal("Write failed on memory trace file '%s'\n", filename);
126    }
127
128    if (gzclose(compressedMemory)) {
129        fatal("Close failed on memory trace file '%s'\n", filename);
130    }
131    delete[] raw_data;
132}
133
134void
135RubySystem::serialize(std::ostream &os)
136{
137    m_cooldown_enabled = true;
138    vector<Sequencer*> sequencer_map;
139    Sequencer* sequencer_ptr = NULL;
140
141    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
142        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
143        if (sequencer_ptr == NULL) {
144            sequencer_ptr = sequencer_map[cntrl];
145        }
146    }
147
148    assert(sequencer_ptr != NULL);
149
150    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
151        if (sequencer_map[cntrl] == NULL) {
152            sequencer_map[cntrl] = sequencer_ptr;
153        }
154    }
155
156    // Store the cache-block size, so we are able to restore on systems with a
157    // different cache-block size. CacheRecorder depends on the correct
158    // cache-block size upon unserializing.
159    uint64 block_size_bytes = getBlockSizeBytes();
160    SERIALIZE_SCALAR(block_size_bytes);
161
162    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
163    // Create the CacheRecorder and record the cache trace
164    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map,
165                                         block_size_bytes);
166
167    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
168        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
169    }
170
171    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
172    // save the current tick value
173    Tick curtick_original = curTick();
174    // save the event queue head
175    Event* eventq_head = eventq->replaceHead(NULL);
176    DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
177            curtick_original);
178
179    // Schedule an event to start cache cooldown
180    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
181    enqueueRubyEvent(curTick());
182    simulate();
183    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
184
185    // Restore eventq head
186    eventq_head = eventq->replaceHead(eventq_head);
187    // Restore curTick
188    setCurTick(curtick_original);
189
190    // Aggergate the trace entries together into a single array
191    uint8_t *raw_data = new uint8_t[4096];
192    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
193                                                                 4096);
194    string cache_trace_file = name() + ".cache.gz";
195    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
196
197    SERIALIZE_SCALAR(cache_trace_file);
198    SERIALIZE_SCALAR(cache_trace_size);
199
200    m_cooldown_enabled = false;
201}
202
203void
204RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
205                                uint64& uncompressed_trace_size)
206{
207    // Read the trace file
208    gzFile compressedTrace;
209
210    // trace file
211    int fd = open(filename.c_str(), O_RDONLY);
212    if (fd < 0) {
213        perror("open");
214        fatal("Unable to open trace file %s", filename);
215    }
216
217    compressedTrace = gzdopen(fd, "rb");
218    if (compressedTrace == NULL) {
219        fatal("Insufficient memory to allocate compression state for %s\n",
220              filename);
221    }
222
223    raw_data = new uint8_t[uncompressed_trace_size];
224    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
225            uncompressed_trace_size) {
226        fatal("Unable to read complete trace from file %s\n", filename);
227    }
228
229    if (gzclose(compressedTrace)) {
230        fatal("Failed to close cache trace file '%s'\n", filename);
231    }
232}
233
234void
235RubySystem::unserialize(Checkpoint *cp, const string &section)
236{
237    uint8_t *uncompressed_trace = NULL;
238
239    // This value should be set to the checkpoint-system's block-size.
240    // Optional, as checkpoints without it can be run if the
241    // checkpoint-system's block-size == current block-size.
242    uint64 block_size_bytes = getBlockSizeBytes();
243    UNSERIALIZE_OPT_SCALAR(block_size_bytes);
244
245    string cache_trace_file;
246    uint64 cache_trace_size = 0;
247
248    UNSERIALIZE_SCALAR(cache_trace_file);
249    UNSERIALIZE_SCALAR(cache_trace_size);
250    cache_trace_file = cp->cptDir + "/" + cache_trace_file;
251
252    readCompressedTrace(cache_trace_file, uncompressed_trace,
253                        cache_trace_size);
254    m_warmup_enabled = true;
255
256    vector<Sequencer*> sequencer_map;
257    Sequencer* t = NULL;
258    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
259        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
260        if (t == NULL) t = sequencer_map[cntrl];
261    }
262
263    assert(t != NULL);
264
265    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
266        if (sequencer_map[cntrl] == NULL) {
267            sequencer_map[cntrl] = t;
268        }
269    }
270
271    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
272                                         sequencer_map, block_size_bytes);
273}
274
275void
276RubySystem::startup()
277{
278
279    // Ruby restores state from a checkpoint by resetting the clock to 0 and
280    // playing the requests that can possibly re-generate the cache state.
281    // The clock value is set to the actual checkpointed value once all the
282    // requests have been executed.
283    //
284    // This way of restoring state is pretty finicky. For example, if a
285    // Ruby component reads time before the state has been restored, it would
286    // cache this value and hence its clock would not be reset to 0, when
287    // Ruby resets the global clock. This can potentially result in a
288    // deadlock.
289    //
290    // The solution is that no Ruby component should read time before the
291    // simulation starts. And then one also needs to hope that the time
292    // Ruby finishes restoring the state is less than the time when the
293    // state was checkpointed.
294
295    if (m_warmup_enabled) {
296        // save the current tick value
297        Tick curtick_original = curTick();
298        // save the event queue head
299        Event* eventq_head = eventq->replaceHead(NULL);
300        // set curTick to 0 and reset Ruby System's clock
301        setCurTick(0);
302        resetClock();
303
304        // Schedule an event to start cache warmup
305        enqueueRubyEvent(curTick());
306        simulate();
307
308        delete m_cache_recorder;
309        m_cache_recorder = NULL;
310        m_warmup_enabled = false;
311
312        // Restore eventq head
313        eventq_head = eventq->replaceHead(eventq_head);
314        // Restore curTick and Ruby System's clock
315        setCurTick(curtick_original);
316        resetClock();
317    }
318
319    resetStats();
320}
321
322void
323RubySystem::RubyEvent::process()
324{
325    if (ruby_system->m_warmup_enabled) {
326        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
327    }  else if (ruby_system->m_cooldown_enabled) {
328        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
329    }
330}
331
332void
333RubySystem::resetStats()
334{
335    g_ruby_start = curCycle();
336}
337
338bool
339RubySystem::functionalRead(PacketPtr pkt)
340{
341    Address address(pkt->getAddr());
342    Address line_address(address);
343    line_address.makeLineAddress();
344
345    AccessPermission access_perm = AccessPermission_NotPresent;
346    int num_controllers = m_abs_cntrl_vec.size();
347
348    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
349
350    unsigned int num_ro = 0;
351    unsigned int num_rw = 0;
352    unsigned int num_busy = 0;
353    unsigned int num_backing_store = 0;
354    unsigned int num_invalid = 0;
355
356    // In this loop we count the number of controllers that have the given
357    // address in read only, read write and busy states.
358    for (unsigned int i = 0; i < num_controllers; ++i) {
359        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
360        if (access_perm == AccessPermission_Read_Only)
361            num_ro++;
362        else if (access_perm == AccessPermission_Read_Write)
363            num_rw++;
364        else if (access_perm == AccessPermission_Busy)
365            num_busy++;
366        else if (access_perm == AccessPermission_Backing_Store)
367            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
368            // to represent blocks in memory *for Broadcast/Snooping protocols*,
369            // where memory has no idea whether it has an exclusive copy of data
370            // or not.
371            num_backing_store++;
372        else if (access_perm == AccessPermission_Invalid ||
373                 access_perm == AccessPermission_NotPresent)
374            num_invalid++;
375    }
376    assert(num_rw <= 1);
377
378    // This if case is meant to capture what happens in a Broadcast/Snoop
379    // protocol where the block does not exist in the cache hierarchy. You
380    // only want to read from the Backing_Store memory if there is no copy in
381    // the cache hierarchy, otherwise you want to try to read the RO or RW
382    // copies existing in the cache hierarchy (covered by the else statement).
383    // The reason is because the Backing_Store memory could easily be stale, if
384    // there are copies floating around the cache hierarchy, so you want to read
385    // it only if it's not in the cache hierarchy at all.
386    if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
387        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
388        for (unsigned int i = 0; i < num_controllers; ++i) {
389            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
390            if (access_perm == AccessPermission_Backing_Store) {
391                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
392                return true;
393            }
394        }
395    } else if (num_ro > 0 || num_rw == 1) {
396        // In Broadcast/Snoop protocols, this covers if you know the block
397        // exists somewhere in the caching hierarchy, then you want to read any
398        // valid RO or RW block.  In directory protocols, same thing, you want
399        // to read any valid readable copy of the block.
400        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
401                num_busy, num_ro, num_rw);
402        // In this loop, we try to figure which controller has a read only or
403        // a read write copy of the given address. Any valid copy would suffice
404        // for a functional read.
405        for (unsigned int i = 0;i < num_controllers;++i) {
406            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
407            if (access_perm == AccessPermission_Read_Only ||
408                access_perm == AccessPermission_Read_Write) {
409                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
410                return true;
411            }
412        }
413    }
414
415    return false;
416}
417
418// The function searches through all the buffers that exist in different
419// cache, directory and memory controllers, and in the network components
420// and writes the data portion of those that hold the address specified
421// in the packet.
422bool
423RubySystem::functionalWrite(PacketPtr pkt)
424{
425    Address addr(pkt->getAddr());
426    Address line_addr = line_address(addr);
427    AccessPermission access_perm = AccessPermission_NotPresent;
428    int num_controllers = m_abs_cntrl_vec.size();
429
430    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
431
432    uint32_t M5_VAR_USED num_functional_writes = 0;
433
434    for (unsigned int i = 0; i < num_controllers;++i) {
435        num_functional_writes +=
436            m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
437
438        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
439        if (access_perm != AccessPermission_Invalid &&
440            access_perm != AccessPermission_NotPresent) {
441            num_functional_writes +=
442                m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
443        }
444    }
445
446    num_functional_writes += m_network->functionalWrite(pkt);
447    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
448
449    return true;
450}
451
452#ifdef CHECK_COHERENCE
453// This code will check for cases if the given cache block is exclusive in
454// one node and shared in another-- a coherence violation
455//
456// To use, the SLICC specification must call sequencer.checkCoherence(address)
457// when the controller changes to a state with new permissions.  Do this
458// in setState.  The SLICC spec must also define methods "isBlockShared"
459// and "isBlockExclusive" that are specific to that protocol
460//
461void
462RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
463{
464#if 0
465    NodeID exclusive = -1;
466    bool sharedDetected = false;
467    NodeID lastShared = -1;
468
469    for (int i = 0; i < m_chip_vector.size(); i++) {
470        if (m_chip_vector[i]->isBlockExclusive(addr)) {
471            if (exclusive != -1) {
472                // coherence violation
473                WARN_EXPR(exclusive);
474                WARN_EXPR(m_chip_vector[i]->getID());
475                WARN_EXPR(addr);
476                WARN_EXPR(getTime());
477                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
478            } else if (sharedDetected) {
479                WARN_EXPR(lastShared);
480                WARN_EXPR(m_chip_vector[i]->getID());
481                WARN_EXPR(addr);
482                WARN_EXPR(getTime());
483                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
484            } else {
485                exclusive = m_chip_vector[i]->getID();
486            }
487        } else if (m_chip_vector[i]->isBlockShared(addr)) {
488            sharedDetected = true;
489            lastShared = m_chip_vector[i]->getID();
490
491            if (exclusive != -1) {
492                WARN_EXPR(lastShared);
493                WARN_EXPR(exclusive);
494                WARN_EXPR(addr);
495                WARN_EXPR(getTime());
496                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
497            }
498        }
499    }
500#endif
501}
502#endif
503
504RubySystem *
505RubySystemParams::create()
506{
507    return new RubySystem(this);
508}
509