RubySystem.cc revision 10525
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33
34#include "base/intmath.hh"
35#include "base/statistics.hh"
36#include "debug/RubyCacheTrace.hh"
37#include "debug/RubySystem.hh"
38#include "mem/ruby/common/Address.hh"
39#include "mem/ruby/network/Network.hh"
40#include "mem/ruby/system/System.hh"
41#include "sim/eventq.hh"
42#include "sim/simulate.hh"
43
44using namespace std;
45
46int RubySystem::m_random_seed;
47bool RubySystem::m_randomization;
48uint32_t RubySystem::m_block_size_bytes;
49uint32_t RubySystem::m_block_size_bits;
50uint32_t RubySystem::m_memory_size_bits;
51
52RubySystem::RubySystem(const Params *p)
53    : ClockedObject(p)
54{
55    if (g_system_ptr != NULL)
56        fatal("Only one RubySystem object currently allowed.\n");
57
58    m_random_seed = p->random_seed;
59    srandom(m_random_seed);
60    m_randomization = p->randomization;
61
62    m_block_size_bytes = p->block_size_bytes;
63    assert(isPowerOf2(m_block_size_bytes));
64    m_block_size_bits = floorLog2(m_block_size_bytes);
65    m_memory_size_bits = p->memory_size_bits;
66
67    m_warmup_enabled = false;
68    m_cooldown_enabled = false;
69
70    // Setup the global variables used in Ruby
71    g_system_ptr = this;
72
73    // Resize to the size of different machine types
74    g_abs_controls.resize(MachineType_NUM);
75
76    // Collate the statistics before they are printed.
77    Stats::registerDumpCallback(new RubyStatsCallback(this));
78    // Create the profiler
79    m_profiler = new Profiler(p);
80    m_phys_mem = p->phys_mem;
81}
82
83void
84RubySystem::registerNetwork(Network* network_ptr)
85{
86  m_network = network_ptr;
87}
88
89void
90RubySystem::registerAbstractController(AbstractController* cntrl)
91{
92  m_abs_cntrl_vec.push_back(cntrl);
93
94  MachineID id = cntrl->getMachineID();
95  g_abs_controls[id.getType()][id.getNum()] = cntrl;
96}
97
98RubySystem::~RubySystem()
99{
100    delete m_network;
101    delete m_profiler;
102}
103
104void
105RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
106                                 uint64 uncompressed_trace_size)
107{
108    // Create the checkpoint file for the memory
109    string thefile = Checkpoint::dir() + "/" + filename.c_str();
110
111    int fd = creat(thefile.c_str(), 0664);
112    if (fd < 0) {
113        perror("creat");
114        fatal("Can't open memory trace file '%s'\n", filename);
115    }
116
117    gzFile compressedMemory = gzdopen(fd, "wb");
118    if (compressedMemory == NULL)
119        fatal("Insufficient memory to allocate compression state for %s\n",
120              filename);
121
122    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
123        uncompressed_trace_size) {
124        fatal("Write failed on memory trace file '%s'\n", filename);
125    }
126
127    if (gzclose(compressedMemory)) {
128        fatal("Close failed on memory trace file '%s'\n", filename);
129    }
130    delete[] raw_data;
131}
132
133void
134RubySystem::serialize(std::ostream &os)
135{
136    m_cooldown_enabled = true;
137    vector<Sequencer*> sequencer_map;
138    Sequencer* sequencer_ptr = NULL;
139
140    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
141        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
142        if (sequencer_ptr == NULL) {
143            sequencer_ptr = sequencer_map[cntrl];
144        }
145    }
146
147    assert(sequencer_ptr != NULL);
148
149    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
150        if (sequencer_map[cntrl] == NULL) {
151            sequencer_map[cntrl] = sequencer_ptr;
152        }
153    }
154
155    // Store the cache-block size, so we are able to restore on systems with a
156    // different cache-block size. CacheRecorder depends on the correct
157    // cache-block size upon unserializing.
158    uint64 block_size_bytes = getBlockSizeBytes();
159    SERIALIZE_SCALAR(block_size_bytes);
160
161    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
162    // Create the CacheRecorder and record the cache trace
163    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map,
164                                         block_size_bytes);
165
166    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
167        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
168    }
169
170    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
171    // save the current tick value
172    Tick curtick_original = curTick();
173    // save the event queue head
174    Event* eventq_head = eventq->replaceHead(NULL);
175    DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
176            curtick_original);
177
178    // Schedule an event to start cache cooldown
179    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
180    enqueueRubyEvent(curTick());
181    simulate();
182    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
183
184    // Restore eventq head
185    eventq_head = eventq->replaceHead(eventq_head);
186    // Restore curTick
187    setCurTick(curtick_original);
188
189    // Aggergate the trace entries together into a single array
190    uint8_t *raw_data = new uint8_t[4096];
191    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
192                                                                 4096);
193    string cache_trace_file = name() + ".cache.gz";
194    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
195
196    SERIALIZE_SCALAR(cache_trace_file);
197    SERIALIZE_SCALAR(cache_trace_size);
198
199    m_cooldown_enabled = false;
200}
201
202void
203RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
204                                uint64& uncompressed_trace_size)
205{
206    // Read the trace file
207    gzFile compressedTrace;
208
209    // trace file
210    int fd = open(filename.c_str(), O_RDONLY);
211    if (fd < 0) {
212        perror("open");
213        fatal("Unable to open trace file %s", filename);
214    }
215
216    compressedTrace = gzdopen(fd, "rb");
217    if (compressedTrace == NULL) {
218        fatal("Insufficient memory to allocate compression state for %s\n",
219              filename);
220    }
221
222    raw_data = new uint8_t[uncompressed_trace_size];
223    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
224            uncompressed_trace_size) {
225        fatal("Unable to read complete trace from file %s\n", filename);
226    }
227
228    if (gzclose(compressedTrace)) {
229        fatal("Failed to close cache trace file '%s'\n", filename);
230    }
231}
232
233void
234RubySystem::unserialize(Checkpoint *cp, const string &section)
235{
236    uint8_t *uncompressed_trace = NULL;
237
238    // This value should be set to the checkpoint-system's block-size.
239    // Optional, as checkpoints without it can be run if the
240    // checkpoint-system's block-size == current block-size.
241    uint64 block_size_bytes = getBlockSizeBytes();
242    UNSERIALIZE_OPT_SCALAR(block_size_bytes);
243
244    string cache_trace_file;
245    uint64 cache_trace_size = 0;
246
247    UNSERIALIZE_SCALAR(cache_trace_file);
248    UNSERIALIZE_SCALAR(cache_trace_size);
249    cache_trace_file = cp->cptDir + "/" + cache_trace_file;
250
251    readCompressedTrace(cache_trace_file, uncompressed_trace,
252                        cache_trace_size);
253    m_warmup_enabled = true;
254
255    vector<Sequencer*> sequencer_map;
256    Sequencer* t = NULL;
257    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
258        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
259        if (t == NULL) t = sequencer_map[cntrl];
260    }
261
262    assert(t != NULL);
263
264    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
265        if (sequencer_map[cntrl] == NULL) {
266            sequencer_map[cntrl] = t;
267        }
268    }
269
270    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
271                                         sequencer_map, block_size_bytes);
272}
273
274void
275RubySystem::startup()
276{
277
278    // Ruby restores state from a checkpoint by resetting the clock to 0 and
279    // playing the requests that can possibly re-generate the cache state.
280    // The clock value is set to the actual checkpointed value once all the
281    // requests have been executed.
282    //
283    // This way of restoring state is pretty finicky. For example, if a
284    // Ruby component reads time before the state has been restored, it would
285    // cache this value and hence its clock would not be reset to 0, when
286    // Ruby resets the global clock. This can potentially result in a
287    // deadlock.
288    //
289    // The solution is that no Ruby component should read time before the
290    // simulation starts. And then one also needs to hope that the time
291    // Ruby finishes restoring the state is less than the time when the
292    // state was checkpointed.
293
294    if (m_warmup_enabled) {
295        // save the current tick value
296        Tick curtick_original = curTick();
297        // save the event queue head
298        Event* eventq_head = eventq->replaceHead(NULL);
299        // set curTick to 0 and reset Ruby System's clock
300        setCurTick(0);
301        resetClock();
302
303        // Schedule an event to start cache warmup
304        enqueueRubyEvent(curTick());
305        simulate();
306
307        delete m_cache_recorder;
308        m_cache_recorder = NULL;
309        m_warmup_enabled = false;
310
311        // Restore eventq head
312        eventq_head = eventq->replaceHead(eventq_head);
313        // Restore curTick and Ruby System's clock
314        setCurTick(curtick_original);
315        resetClock();
316    }
317
318    resetStats();
319}
320
321void
322RubySystem::RubyEvent::process()
323{
324    if (ruby_system->m_warmup_enabled) {
325        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
326    }  else if (ruby_system->m_cooldown_enabled) {
327        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
328    }
329}
330
331void
332RubySystem::resetStats()
333{
334    g_ruby_start = curCycle();
335}
336
337bool
338RubySystem::functionalRead(PacketPtr pkt)
339{
340    Address address(pkt->getAddr());
341    Address line_address(address);
342    line_address.makeLineAddress();
343
344    AccessPermission access_perm = AccessPermission_NotPresent;
345    int num_controllers = m_abs_cntrl_vec.size();
346
347    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
348
349    unsigned int num_ro = 0;
350    unsigned int num_rw = 0;
351    unsigned int num_busy = 0;
352    unsigned int num_backing_store = 0;
353    unsigned int num_invalid = 0;
354
355    // In this loop we count the number of controllers that have the given
356    // address in read only, read write and busy states.
357    for (unsigned int i = 0; i < num_controllers; ++i) {
358        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
359        if (access_perm == AccessPermission_Read_Only)
360            num_ro++;
361        else if (access_perm == AccessPermission_Read_Write)
362            num_rw++;
363        else if (access_perm == AccessPermission_Busy)
364            num_busy++;
365        else if (access_perm == AccessPermission_Backing_Store)
366            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
367            // to represent blocks in memory *for Broadcast/Snooping protocols*,
368            // where memory has no idea whether it has an exclusive copy of data
369            // or not.
370            num_backing_store++;
371        else if (access_perm == AccessPermission_Invalid ||
372                 access_perm == AccessPermission_NotPresent)
373            num_invalid++;
374    }
375    assert(num_rw <= 1);
376
377    // This if case is meant to capture what happens in a Broadcast/Snoop
378    // protocol where the block does not exist in the cache hierarchy. You
379    // only want to read from the Backing_Store memory if there is no copy in
380    // the cache hierarchy, otherwise you want to try to read the RO or RW
381    // copies existing in the cache hierarchy (covered by the else statement).
382    // The reason is because the Backing_Store memory could easily be stale, if
383    // there are copies floating around the cache hierarchy, so you want to read
384    // it only if it's not in the cache hierarchy at all.
385    if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
386        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
387        for (unsigned int i = 0; i < num_controllers; ++i) {
388            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
389            if (access_perm == AccessPermission_Backing_Store) {
390                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
391                return true;
392            }
393        }
394    } else if (num_ro > 0 || num_rw == 1) {
395        // In Broadcast/Snoop protocols, this covers if you know the block
396        // exists somewhere in the caching hierarchy, then you want to read any
397        // valid RO or RW block.  In directory protocols, same thing, you want
398        // to read any valid readable copy of the block.
399        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
400                num_busy, num_ro, num_rw);
401        // In this loop, we try to figure which controller has a read only or
402        // a read write copy of the given address. Any valid copy would suffice
403        // for a functional read.
404        for (unsigned int i = 0;i < num_controllers;++i) {
405            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
406            if (access_perm == AccessPermission_Read_Only ||
407                access_perm == AccessPermission_Read_Write) {
408                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
409                return true;
410            }
411        }
412    }
413
414    return false;
415}
416
417// The function searches through all the buffers that exist in different
418// cache, directory and memory controllers, and in the network components
419// and writes the data portion of those that hold the address specified
420// in the packet.
421bool
422RubySystem::functionalWrite(PacketPtr pkt)
423{
424    Address addr(pkt->getAddr());
425    Address line_addr = line_address(addr);
426    AccessPermission access_perm = AccessPermission_NotPresent;
427    int num_controllers = m_abs_cntrl_vec.size();
428
429    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
430
431    uint32_t M5_VAR_USED num_functional_writes = 0;
432
433    for (unsigned int i = 0; i < num_controllers;++i) {
434        num_functional_writes +=
435            m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
436
437        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
438        if (access_perm != AccessPermission_Invalid &&
439            access_perm != AccessPermission_NotPresent) {
440            num_functional_writes +=
441                m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
442        }
443    }
444
445    num_functional_writes += m_network->functionalWrite(pkt);
446    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
447
448    return true;
449}
450
451#ifdef CHECK_COHERENCE
452// This code will check for cases if the given cache block is exclusive in
453// one node and shared in another-- a coherence violation
454//
455// To use, the SLICC specification must call sequencer.checkCoherence(address)
456// when the controller changes to a state with new permissions.  Do this
457// in setState.  The SLICC spec must also define methods "isBlockShared"
458// and "isBlockExclusive" that are specific to that protocol
459//
460void
461RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
462{
463#if 0
464    NodeID exclusive = -1;
465    bool sharedDetected = false;
466    NodeID lastShared = -1;
467
468    for (int i = 0; i < m_chip_vector.size(); i++) {
469        if (m_chip_vector[i]->isBlockExclusive(addr)) {
470            if (exclusive != -1) {
471                // coherence violation
472                WARN_EXPR(exclusive);
473                WARN_EXPR(m_chip_vector[i]->getID());
474                WARN_EXPR(addr);
475                WARN_EXPR(getTime());
476                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
477            } else if (sharedDetected) {
478                WARN_EXPR(lastShared);
479                WARN_EXPR(m_chip_vector[i]->getID());
480                WARN_EXPR(addr);
481                WARN_EXPR(getTime());
482                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
483            } else {
484                exclusive = m_chip_vector[i]->getID();
485            }
486        } else if (m_chip_vector[i]->isBlockShared(addr)) {
487            sharedDetected = true;
488            lastShared = m_chip_vector[i]->getID();
489
490            if (exclusive != -1) {
491                WARN_EXPR(lastShared);
492                WARN_EXPR(exclusive);
493                WARN_EXPR(addr);
494                WARN_EXPR(getTime());
495                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
496            }
497        }
498    }
499#endif
500}
501#endif
502
503RubySystem *
504RubySystemParams::create()
505{
506    return new RubySystem(this);
507}
508