RubySystem.cc revision 10524
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33
34#include "base/intmath.hh"
35#include "base/statistics.hh"
36#include "debug/RubyCacheTrace.hh"
37#include "debug/RubySystem.hh"
38#include "mem/ruby/common/Address.hh"
39#include "mem/ruby/network/Network.hh"
40#include "mem/ruby/system/System.hh"
41#include "sim/eventq.hh"
42#include "sim/simulate.hh"
43
44using namespace std;
45
46int RubySystem::m_random_seed;
47bool RubySystem::m_randomization;
48uint32_t RubySystem::m_block_size_bytes;
49uint32_t RubySystem::m_block_size_bits;
50uint32_t RubySystem::m_memory_size_bits;
51
52RubySystem::RubySystem(const Params *p)
53    : ClockedObject(p)
54{
55    if (g_system_ptr != NULL)
56        fatal("Only one RubySystem object currently allowed.\n");
57
58    m_random_seed = p->random_seed;
59    srandom(m_random_seed);
60    m_randomization = p->randomization;
61
62    m_block_size_bytes = p->block_size_bytes;
63    assert(isPowerOf2(m_block_size_bytes));
64    m_block_size_bits = floorLog2(m_block_size_bytes);
65    m_memory_size_bits = p->memory_size_bits;
66
67    m_warmup_enabled = false;
68    m_cooldown_enabled = false;
69
70    // Setup the global variables used in Ruby
71    g_system_ptr = this;
72
73    // Resize to the size of different machine types
74    g_abs_controls.resize(MachineType_NUM);
75
76    // Collate the statistics before they are printed.
77    Stats::registerDumpCallback(new RubyStatsCallback(this));
78    // Create the profiler
79    m_profiler = new Profiler(p);
80}
81
82void
83RubySystem::registerNetwork(Network* network_ptr)
84{
85  m_network = network_ptr;
86}
87
88void
89RubySystem::registerAbstractController(AbstractController* cntrl)
90{
91  m_abs_cntrl_vec.push_back(cntrl);
92
93  MachineID id = cntrl->getMachineID();
94  g_abs_controls[id.getType()][id.getNum()] = cntrl;
95}
96
97RubySystem::~RubySystem()
98{
99    delete m_network;
100    delete m_profiler;
101}
102
103void
104RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
105                                 uint64 uncompressed_trace_size)
106{
107    // Create the checkpoint file for the memory
108    string thefile = Checkpoint::dir() + "/" + filename.c_str();
109
110    int fd = creat(thefile.c_str(), 0664);
111    if (fd < 0) {
112        perror("creat");
113        fatal("Can't open memory trace file '%s'\n", filename);
114    }
115
116    gzFile compressedMemory = gzdopen(fd, "wb");
117    if (compressedMemory == NULL)
118        fatal("Insufficient memory to allocate compression state for %s\n",
119              filename);
120
121    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
122        uncompressed_trace_size) {
123        fatal("Write failed on memory trace file '%s'\n", filename);
124    }
125
126    if (gzclose(compressedMemory)) {
127        fatal("Close failed on memory trace file '%s'\n", filename);
128    }
129    delete[] raw_data;
130}
131
132void
133RubySystem::serialize(std::ostream &os)
134{
135    m_cooldown_enabled = true;
136    vector<Sequencer*> sequencer_map;
137    Sequencer* sequencer_ptr = NULL;
138
139    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
140        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
141        if (sequencer_ptr == NULL) {
142            sequencer_ptr = sequencer_map[cntrl];
143        }
144    }
145
146    assert(sequencer_ptr != NULL);
147
148    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
149        if (sequencer_map[cntrl] == NULL) {
150            sequencer_map[cntrl] = sequencer_ptr;
151        }
152    }
153
154    // Store the cache-block size, so we are able to restore on systems with a
155    // different cache-block size. CacheRecorder depends on the correct
156    // cache-block size upon unserializing.
157    uint64 block_size_bytes = getBlockSizeBytes();
158    SERIALIZE_SCALAR(block_size_bytes);
159
160    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
161    // Create the CacheRecorder and record the cache trace
162    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map,
163                                         block_size_bytes);
164
165    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
166        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
167    }
168
169    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
170    // save the current tick value
171    Tick curtick_original = curTick();
172    // save the event queue head
173    Event* eventq_head = eventq->replaceHead(NULL);
174    DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
175            curtick_original);
176
177    // Schedule an event to start cache cooldown
178    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
179    enqueueRubyEvent(curTick());
180    simulate();
181    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
182
183    // Restore eventq head
184    eventq_head = eventq->replaceHead(eventq_head);
185    // Restore curTick
186    setCurTick(curtick_original);
187
188    // Aggergate the trace entries together into a single array
189    uint8_t *raw_data = new uint8_t[4096];
190    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
191                                                                 4096);
192    string cache_trace_file = name() + ".cache.gz";
193    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
194
195    SERIALIZE_SCALAR(cache_trace_file);
196    SERIALIZE_SCALAR(cache_trace_size);
197
198    m_cooldown_enabled = false;
199}
200
201void
202RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
203                                uint64& uncompressed_trace_size)
204{
205    // Read the trace file
206    gzFile compressedTrace;
207
208    // trace file
209    int fd = open(filename.c_str(), O_RDONLY);
210    if (fd < 0) {
211        perror("open");
212        fatal("Unable to open trace file %s", filename);
213    }
214
215    compressedTrace = gzdopen(fd, "rb");
216    if (compressedTrace == NULL) {
217        fatal("Insufficient memory to allocate compression state for %s\n",
218              filename);
219    }
220
221    raw_data = new uint8_t[uncompressed_trace_size];
222    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
223            uncompressed_trace_size) {
224        fatal("Unable to read complete trace from file %s\n", filename);
225    }
226
227    if (gzclose(compressedTrace)) {
228        fatal("Failed to close cache trace file '%s'\n", filename);
229    }
230}
231
232void
233RubySystem::unserialize(Checkpoint *cp, const string &section)
234{
235    uint8_t *uncompressed_trace = NULL;
236
237    // This value should be set to the checkpoint-system's block-size.
238    // Optional, as checkpoints without it can be run if the
239    // checkpoint-system's block-size == current block-size.
240    uint64 block_size_bytes = getBlockSizeBytes();
241    UNSERIALIZE_OPT_SCALAR(block_size_bytes);
242
243    string cache_trace_file;
244    uint64 cache_trace_size = 0;
245
246    UNSERIALIZE_SCALAR(cache_trace_file);
247    UNSERIALIZE_SCALAR(cache_trace_size);
248    cache_trace_file = cp->cptDir + "/" + cache_trace_file;
249
250    readCompressedTrace(cache_trace_file, uncompressed_trace,
251                        cache_trace_size);
252    m_warmup_enabled = true;
253
254    vector<Sequencer*> sequencer_map;
255    Sequencer* t = NULL;
256    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
257        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
258        if (t == NULL) t = sequencer_map[cntrl];
259    }
260
261    assert(t != NULL);
262
263    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
264        if (sequencer_map[cntrl] == NULL) {
265            sequencer_map[cntrl] = t;
266        }
267    }
268
269    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
270                                         sequencer_map, block_size_bytes);
271}
272
273void
274RubySystem::startup()
275{
276
277    // Ruby restores state from a checkpoint by resetting the clock to 0 and
278    // playing the requests that can possibly re-generate the cache state.
279    // The clock value is set to the actual checkpointed value once all the
280    // requests have been executed.
281    //
282    // This way of restoring state is pretty finicky. For example, if a
283    // Ruby component reads time before the state has been restored, it would
284    // cache this value and hence its clock would not be reset to 0, when
285    // Ruby resets the global clock. This can potentially result in a
286    // deadlock.
287    //
288    // The solution is that no Ruby component should read time before the
289    // simulation starts. And then one also needs to hope that the time
290    // Ruby finishes restoring the state is less than the time when the
291    // state was checkpointed.
292
293    if (m_warmup_enabled) {
294        // save the current tick value
295        Tick curtick_original = curTick();
296        // save the event queue head
297        Event* eventq_head = eventq->replaceHead(NULL);
298        // set curTick to 0 and reset Ruby System's clock
299        setCurTick(0);
300        resetClock();
301
302        // Schedule an event to start cache warmup
303        enqueueRubyEvent(curTick());
304        simulate();
305
306        delete m_cache_recorder;
307        m_cache_recorder = NULL;
308        m_warmup_enabled = false;
309
310        // Restore eventq head
311        eventq_head = eventq->replaceHead(eventq_head);
312        // Restore curTick and Ruby System's clock
313        setCurTick(curtick_original);
314        resetClock();
315    }
316
317    resetStats();
318}
319
320void
321RubySystem::RubyEvent::process()
322{
323    if (ruby_system->m_warmup_enabled) {
324        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
325    }  else if (ruby_system->m_cooldown_enabled) {
326        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
327    }
328}
329
330void
331RubySystem::resetStats()
332{
333    g_ruby_start = curCycle();
334}
335
336bool
337RubySystem::functionalRead(PacketPtr pkt)
338{
339    Address address(pkt->getAddr());
340    Address line_address(address);
341    line_address.makeLineAddress();
342
343    AccessPermission access_perm = AccessPermission_NotPresent;
344    int num_controllers = m_abs_cntrl_vec.size();
345
346    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
347
348    unsigned int num_ro = 0;
349    unsigned int num_rw = 0;
350    unsigned int num_busy = 0;
351    unsigned int num_backing_store = 0;
352    unsigned int num_invalid = 0;
353
354    // In this loop we count the number of controllers that have the given
355    // address in read only, read write and busy states.
356    for (unsigned int i = 0; i < num_controllers; ++i) {
357        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
358        if (access_perm == AccessPermission_Read_Only)
359            num_ro++;
360        else if (access_perm == AccessPermission_Read_Write)
361            num_rw++;
362        else if (access_perm == AccessPermission_Busy)
363            num_busy++;
364        else if (access_perm == AccessPermission_Backing_Store)
365            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
366            // to represent blocks in memory *for Broadcast/Snooping protocols*,
367            // where memory has no idea whether it has an exclusive copy of data
368            // or not.
369            num_backing_store++;
370        else if (access_perm == AccessPermission_Invalid ||
371                 access_perm == AccessPermission_NotPresent)
372            num_invalid++;
373    }
374    assert(num_rw <= 1);
375
376    // This if case is meant to capture what happens in a Broadcast/Snoop
377    // protocol where the block does not exist in the cache hierarchy. You
378    // only want to read from the Backing_Store memory if there is no copy in
379    // the cache hierarchy, otherwise you want to try to read the RO or RW
380    // copies existing in the cache hierarchy (covered by the else statement).
381    // The reason is because the Backing_Store memory could easily be stale, if
382    // there are copies floating around the cache hierarchy, so you want to read
383    // it only if it's not in the cache hierarchy at all.
384    if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
385        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
386        for (unsigned int i = 0; i < num_controllers; ++i) {
387            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
388            if (access_perm == AccessPermission_Backing_Store) {
389                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
390                return true;
391            }
392        }
393    } else if (num_ro > 0 || num_rw == 1) {
394        // In Broadcast/Snoop protocols, this covers if you know the block
395        // exists somewhere in the caching hierarchy, then you want to read any
396        // valid RO or RW block.  In directory protocols, same thing, you want
397        // to read any valid readable copy of the block.
398        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
399                num_busy, num_ro, num_rw);
400        // In this loop, we try to figure which controller has a read only or
401        // a read write copy of the given address. Any valid copy would suffice
402        // for a functional read.
403        for (unsigned int i = 0;i < num_controllers;++i) {
404            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
405            if (access_perm == AccessPermission_Read_Only ||
406                access_perm == AccessPermission_Read_Write) {
407                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
408                return true;
409            }
410        }
411    }
412
413    return false;
414}
415
416// The function searches through all the buffers that exist in different
417// cache, directory and memory controllers, and in the network components
418// and writes the data portion of those that hold the address specified
419// in the packet.
420bool
421RubySystem::functionalWrite(PacketPtr pkt)
422{
423    Address addr(pkt->getAddr());
424    Address line_addr = line_address(addr);
425    AccessPermission access_perm = AccessPermission_NotPresent;
426    int num_controllers = m_abs_cntrl_vec.size();
427
428    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
429
430    uint32_t M5_VAR_USED num_functional_writes = 0;
431
432    for (unsigned int i = 0; i < num_controllers;++i) {
433        num_functional_writes +=
434            m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
435
436        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
437        if (access_perm != AccessPermission_Invalid &&
438            access_perm != AccessPermission_NotPresent) {
439            num_functional_writes +=
440                m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
441        }
442    }
443
444    num_functional_writes += m_network->functionalWrite(pkt);
445    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
446
447    return true;
448}
449
450#ifdef CHECK_COHERENCE
451// This code will check for cases if the given cache block is exclusive in
452// one node and shared in another-- a coherence violation
453//
454// To use, the SLICC specification must call sequencer.checkCoherence(address)
455// when the controller changes to a state with new permissions.  Do this
456// in setState.  The SLICC spec must also define methods "isBlockShared"
457// and "isBlockExclusive" that are specific to that protocol
458//
459void
460RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
461{
462#if 0
463    NodeID exclusive = -1;
464    bool sharedDetected = false;
465    NodeID lastShared = -1;
466
467    for (int i = 0; i < m_chip_vector.size(); i++) {
468        if (m_chip_vector[i]->isBlockExclusive(addr)) {
469            if (exclusive != -1) {
470                // coherence violation
471                WARN_EXPR(exclusive);
472                WARN_EXPR(m_chip_vector[i]->getID());
473                WARN_EXPR(addr);
474                WARN_EXPR(getTime());
475                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
476            } else if (sharedDetected) {
477                WARN_EXPR(lastShared);
478                WARN_EXPR(m_chip_vector[i]->getID());
479                WARN_EXPR(addr);
480                WARN_EXPR(getTime());
481                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
482            } else {
483                exclusive = m_chip_vector[i]->getID();
484            }
485        } else if (m_chip_vector[i]->isBlockShared(addr)) {
486            sharedDetected = true;
487            lastShared = m_chip_vector[i]->getID();
488
489            if (exclusive != -1) {
490                WARN_EXPR(lastShared);
491                WARN_EXPR(exclusive);
492                WARN_EXPR(addr);
493                WARN_EXPR(getTime());
494                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
495            }
496        }
497    }
498#endif
499}
500#endif
501
502RubySystem *
503RubySystemParams::create()
504{
505    return new RubySystem(this);
506}
507