RubySystem.cc revision 10920
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33
34#include "base/intmath.hh"
35#include "base/statistics.hh"
36#include "debug/RubyCacheTrace.hh"
37#include "debug/RubySystem.hh"
38#include "mem/ruby/common/Address.hh"
39#include "mem/ruby/network/Network.hh"
40#include "mem/ruby/system/System.hh"
41#include "mem/simple_mem.hh"
42#include "sim/eventq.hh"
43#include "sim/simulate.hh"
44
45using namespace std;
46
47int RubySystem::m_random_seed;
48bool RubySystem::m_randomization;
49uint32_t RubySystem::m_block_size_bytes;
50uint32_t RubySystem::m_block_size_bits;
51uint32_t RubySystem::m_memory_size_bits;
52bool RubySystem::m_warmup_enabled = false;
53// To look forward to allowing multiple RubySystem instances, track the number
54// of RubySystems that need to be warmed up on checkpoint restore.
55unsigned RubySystem::m_systems_to_warmup = 0;
56bool RubySystem::m_cooldown_enabled = false;
57
58RubySystem::RubySystem(const Params *p)
59    : ClockedObject(p), m_access_backing_store(p->access_backing_store)
60{
61    m_random_seed = p->random_seed;
62    srandom(m_random_seed);
63    m_randomization = p->randomization;
64
65    m_block_size_bytes = p->block_size_bytes;
66    assert(isPowerOf2(m_block_size_bytes));
67    m_block_size_bits = floorLog2(m_block_size_bytes);
68    m_memory_size_bits = p->memory_size_bits;
69
70    // Resize to the size of different machine types
71    m_abstract_controls.resize(MachineType_NUM);
72
73    // Collate the statistics before they are printed.
74    Stats::registerDumpCallback(new RubyStatsCallback(this));
75    // Create the profiler
76    m_profiler = new Profiler(p, this);
77    m_phys_mem = p->phys_mem;
78}
79
80void
81RubySystem::registerNetwork(Network* network_ptr)
82{
83    m_network = network_ptr;
84}
85
86void
87RubySystem::registerAbstractController(AbstractController* cntrl)
88{
89    m_abs_cntrl_vec.push_back(cntrl);
90
91    MachineID id = cntrl->getMachineID();
92    m_abstract_controls[id.getType()][id.getNum()] = cntrl;
93}
94
95RubySystem::~RubySystem()
96{
97    delete m_network;
98    delete m_profiler;
99}
100
101void
102RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
103                                 uint64 uncompressed_trace_size)
104{
105    // Create the checkpoint file for the memory
106    string thefile = CheckpointIn::dir() + "/" + filename.c_str();
107
108    int fd = creat(thefile.c_str(), 0664);
109    if (fd < 0) {
110        perror("creat");
111        fatal("Can't open memory trace file '%s'\n", filename);
112    }
113
114    gzFile compressedMemory = gzdopen(fd, "wb");
115    if (compressedMemory == NULL)
116        fatal("Insufficient memory to allocate compression state for %s\n",
117              filename);
118
119    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
120        uncompressed_trace_size) {
121        fatal("Write failed on memory trace file '%s'\n", filename);
122    }
123
124    if (gzclose(compressedMemory)) {
125        fatal("Close failed on memory trace file '%s'\n", filename);
126    }
127    delete[] raw_data;
128}
129
130void
131RubySystem::serializeOld(CheckpointOut &cp)
132{
133    m_cooldown_enabled = true;
134    vector<Sequencer*> sequencer_map;
135    Sequencer* sequencer_ptr = NULL;
136
137    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
138        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
139        if (sequencer_ptr == NULL) {
140            sequencer_ptr = sequencer_map[cntrl];
141        }
142    }
143
144    assert(sequencer_ptr != NULL);
145
146    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
147        if (sequencer_map[cntrl] == NULL) {
148            sequencer_map[cntrl] = sequencer_ptr;
149        }
150    }
151
152    // Store the cache-block size, so we are able to restore on systems with a
153    // different cache-block size. CacheRecorder depends on the correct
154    // cache-block size upon unserializing.
155    uint64 block_size_bytes = getBlockSizeBytes();
156    SERIALIZE_SCALAR(block_size_bytes);
157
158    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
159    // Create the CacheRecorder and record the cache trace
160    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map,
161                                         block_size_bytes);
162
163    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
164        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
165    }
166
167    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
168    // save the current tick value
169    Tick curtick_original = curTick();
170    // save the event queue head
171    Event* eventq_head = eventq->replaceHead(NULL);
172    DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
173            curtick_original);
174
175    // Schedule an event to start cache cooldown
176    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
177    enqueueRubyEvent(curTick());
178    simulate();
179    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
180
181    // Restore eventq head
182    eventq_head = eventq->replaceHead(eventq_head);
183    // Restore curTick
184    setCurTick(curtick_original);
185
186    // Aggregate the trace entries together into a single array
187    uint8_t *raw_data = new uint8_t[4096];
188    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
189                                                                 4096);
190    string cache_trace_file = name() + ".cache.gz";
191    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
192
193    SERIALIZE_SCALAR(cache_trace_file);
194    SERIALIZE_SCALAR(cache_trace_size);
195
196    m_cooldown_enabled = false;
197}
198
199void
200RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
201                                uint64& uncompressed_trace_size)
202{
203    // Read the trace file
204    gzFile compressedTrace;
205
206    // trace file
207    int fd = open(filename.c_str(), O_RDONLY);
208    if (fd < 0) {
209        perror("open");
210        fatal("Unable to open trace file %s", filename);
211    }
212
213    compressedTrace = gzdopen(fd, "rb");
214    if (compressedTrace == NULL) {
215        fatal("Insufficient memory to allocate compression state for %s\n",
216              filename);
217    }
218
219    raw_data = new uint8_t[uncompressed_trace_size];
220    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
221            uncompressed_trace_size) {
222        fatal("Unable to read complete trace from file %s\n", filename);
223    }
224
225    if (gzclose(compressedTrace)) {
226        fatal("Failed to close cache trace file '%s'\n", filename);
227    }
228}
229
230void
231RubySystem::unserialize(CheckpointIn &cp)
232{
233    uint8_t *uncompressed_trace = NULL;
234
235    // This value should be set to the checkpoint-system's block-size.
236    // Optional, as checkpoints without it can be run if the
237    // checkpoint-system's block-size == current block-size.
238    uint64 block_size_bytes = getBlockSizeBytes();
239    UNSERIALIZE_OPT_SCALAR(block_size_bytes);
240
241    string cache_trace_file;
242    uint64 cache_trace_size = 0;
243
244    UNSERIALIZE_SCALAR(cache_trace_file);
245    UNSERIALIZE_SCALAR(cache_trace_size);
246    cache_trace_file = cp.cptDir + "/" + cache_trace_file;
247
248    readCompressedTrace(cache_trace_file, uncompressed_trace,
249                        cache_trace_size);
250    m_warmup_enabled = true;
251    m_systems_to_warmup++;
252
253    vector<Sequencer*> sequencer_map;
254    Sequencer* t = NULL;
255    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
256        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
257        if (t == NULL) t = sequencer_map[cntrl];
258    }
259
260    assert(t != NULL);
261
262    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
263        if (sequencer_map[cntrl] == NULL) {
264            sequencer_map[cntrl] = t;
265        }
266    }
267
268    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
269                                         sequencer_map, block_size_bytes);
270}
271
272void
273RubySystem::startup()
274{
275
276    // Ruby restores state from a checkpoint by resetting the clock to 0 and
277    // playing the requests that can possibly re-generate the cache state.
278    // The clock value is set to the actual checkpointed value once all the
279    // requests have been executed.
280    //
281    // This way of restoring state is pretty finicky. For example, if a
282    // Ruby component reads time before the state has been restored, it would
283    // cache this value and hence its clock would not be reset to 0, when
284    // Ruby resets the global clock. This can potentially result in a
285    // deadlock.
286    //
287    // The solution is that no Ruby component should read time before the
288    // simulation starts. And then one also needs to hope that the time
289    // Ruby finishes restoring the state is less than the time when the
290    // state was checkpointed.
291
292    if (m_warmup_enabled) {
293        // save the current tick value
294        Tick curtick_original = curTick();
295        // save the event queue head
296        Event* eventq_head = eventq->replaceHead(NULL);
297        // set curTick to 0 and reset Ruby System's clock
298        setCurTick(0);
299        resetClock();
300
301        // Schedule an event to start cache warmup
302        enqueueRubyEvent(curTick());
303        simulate();
304
305        delete m_cache_recorder;
306        m_cache_recorder = NULL;
307        m_systems_to_warmup--;
308        if (m_systems_to_warmup == 0) {
309            m_warmup_enabled = false;
310        }
311
312        // Restore eventq head
313        eventq_head = eventq->replaceHead(eventq_head);
314        // Restore curTick and Ruby System's clock
315        setCurTick(curtick_original);
316        resetClock();
317    }
318
319    resetStats();
320}
321
322void
323RubySystem::RubyEvent::process()
324{
325    if (RubySystem::getWarmupEnabled()) {
326        m_ruby_system->m_cache_recorder->enqueueNextFetchRequest();
327    } else if (RubySystem::getCooldownEnabled()) {
328        m_ruby_system->m_cache_recorder->enqueueNextFlushRequest();
329    }
330}
331
332void
333RubySystem::resetStats()
334{
335    m_start_cycle = curCycle();
336}
337
338bool
339RubySystem::functionalRead(PacketPtr pkt)
340{
341    Address address(pkt->getAddr());
342    Address line_address(address);
343    line_address.makeLineAddress();
344
345    AccessPermission access_perm = AccessPermission_NotPresent;
346    int num_controllers = m_abs_cntrl_vec.size();
347
348    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
349
350    unsigned int num_ro = 0;
351    unsigned int num_rw = 0;
352    unsigned int num_busy = 0;
353    unsigned int num_backing_store = 0;
354    unsigned int num_invalid = 0;
355
356    // In this loop we count the number of controllers that have the given
357    // address in read only, read write and busy states.
358    for (unsigned int i = 0; i < num_controllers; ++i) {
359        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
360        if (access_perm == AccessPermission_Read_Only)
361            num_ro++;
362        else if (access_perm == AccessPermission_Read_Write)
363            num_rw++;
364        else if (access_perm == AccessPermission_Busy)
365            num_busy++;
366        else if (access_perm == AccessPermission_Backing_Store)
367            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
368            // to represent blocks in memory *for Broadcast/Snooping protocols*,
369            // where memory has no idea whether it has an exclusive copy of data
370            // or not.
371            num_backing_store++;
372        else if (access_perm == AccessPermission_Invalid ||
373                 access_perm == AccessPermission_NotPresent)
374            num_invalid++;
375    }
376    assert(num_rw <= 1);
377
378    // This if case is meant to capture what happens in a Broadcast/Snoop
379    // protocol where the block does not exist in the cache hierarchy. You
380    // only want to read from the Backing_Store memory if there is no copy in
381    // the cache hierarchy, otherwise you want to try to read the RO or RW
382    // copies existing in the cache hierarchy (covered by the else statement).
383    // The reason is because the Backing_Store memory could easily be stale, if
384    // there are copies floating around the cache hierarchy, so you want to read
385    // it only if it's not in the cache hierarchy at all.
386    if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
387        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
388        for (unsigned int i = 0; i < num_controllers; ++i) {
389            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
390            if (access_perm == AccessPermission_Backing_Store) {
391                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
392                return true;
393            }
394        }
395    } else if (num_ro > 0 || num_rw == 1) {
396        // In Broadcast/Snoop protocols, this covers if you know the block
397        // exists somewhere in the caching hierarchy, then you want to read any
398        // valid RO or RW block.  In directory protocols, same thing, you want
399        // to read any valid readable copy of the block.
400        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
401                num_busy, num_ro, num_rw);
402        // In this loop, we try to figure which controller has a read only or
403        // a read write copy of the given address. Any valid copy would suffice
404        // for a functional read.
405        for (unsigned int i = 0;i < num_controllers;++i) {
406            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
407            if (access_perm == AccessPermission_Read_Only ||
408                access_perm == AccessPermission_Read_Write) {
409                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
410                return true;
411            }
412        }
413    }
414
415    return false;
416}
417
418// The function searches through all the buffers that exist in different
419// cache, directory and memory controllers, and in the network components
420// and writes the data portion of those that hold the address specified
421// in the packet.
422bool
423RubySystem::functionalWrite(PacketPtr pkt)
424{
425    Address addr(pkt->getAddr());
426    Address line_addr = line_address(addr);
427    AccessPermission access_perm = AccessPermission_NotPresent;
428    int num_controllers = m_abs_cntrl_vec.size();
429
430    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
431
432    uint32_t M5_VAR_USED num_functional_writes = 0;
433
434    for (unsigned int i = 0; i < num_controllers;++i) {
435        num_functional_writes +=
436            m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
437
438        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
439        if (access_perm != AccessPermission_Invalid &&
440            access_perm != AccessPermission_NotPresent) {
441            num_functional_writes +=
442                m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
443        }
444    }
445
446    num_functional_writes += m_network->functionalWrite(pkt);
447    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
448
449    return true;
450}
451
452#ifdef CHECK_COHERENCE
453// This code will check for cases if the given cache block is exclusive in
454// one node and shared in another-- a coherence violation
455//
456// To use, the SLICC specification must call sequencer.checkCoherence(address)
457// when the controller changes to a state with new permissions.  Do this
458// in setState.  The SLICC spec must also define methods "isBlockShared"
459// and "isBlockExclusive" that are specific to that protocol
460//
461void
462RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
463{
464#if 0
465    NodeID exclusive = -1;
466    bool sharedDetected = false;
467    NodeID lastShared = -1;
468
469    for (int i = 0; i < m_chip_vector.size(); i++) {
470        if (m_chip_vector[i]->isBlockExclusive(addr)) {
471            if (exclusive != -1) {
472                // coherence violation
473                WARN_EXPR(exclusive);
474                WARN_EXPR(m_chip_vector[i]->getID());
475                WARN_EXPR(addr);
476                WARN_EXPR(getTime());
477                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
478            } else if (sharedDetected) {
479                WARN_EXPR(lastShared);
480                WARN_EXPR(m_chip_vector[i]->getID());
481                WARN_EXPR(addr);
482                WARN_EXPR(getTime());
483                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
484            } else {
485                exclusive = m_chip_vector[i]->getID();
486            }
487        } else if (m_chip_vector[i]->isBlockShared(addr)) {
488            sharedDetected = true;
489            lastShared = m_chip_vector[i]->getID();
490
491            if (exclusive != -1) {
492                WARN_EXPR(lastShared);
493                WARN_EXPR(exclusive);
494                WARN_EXPR(addr);
495                WARN_EXPR(getTime());
496                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
497            }
498        }
499    }
500#endif
501}
502#endif
503
504RubySystem *
505RubySystemParams::create()
506{
507    return new RubySystem(this);
508}
509