RubySystem.cc revision 10917
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33
34#include "base/intmath.hh"
35#include "base/statistics.hh"
36#include "debug/RubyCacheTrace.hh"
37#include "debug/RubySystem.hh"
38#include "mem/ruby/common/Address.hh"
39#include "mem/ruby/network/Network.hh"
40#include "mem/ruby/system/System.hh"
41#include "mem/simple_mem.hh"
42#include "sim/eventq.hh"
43#include "sim/simulate.hh"
44
45using namespace std;
46
47int RubySystem::m_random_seed;
48bool RubySystem::m_randomization;
49uint32_t RubySystem::m_block_size_bytes;
50uint32_t RubySystem::m_block_size_bits;
51uint32_t RubySystem::m_memory_size_bits;
52bool RubySystem::m_warmup_enabled = false;
53// To look forward to allowing multiple RubySystem instances, track the number
54// of RubySystems that need to be warmed up on checkpoint restore.
55unsigned RubySystem::m_systems_to_warmup = 0;
56bool RubySystem::m_cooldown_enabled = false;
57
58RubySystem::RubySystem(const Params *p)
59    : ClockedObject(p), m_access_backing_store(p->access_backing_store)
60{
61    if (g_system_ptr != NULL)
62        fatal("Only one RubySystem object currently allowed.\n");
63
64    m_random_seed = p->random_seed;
65    srandom(m_random_seed);
66    m_randomization = p->randomization;
67
68    m_block_size_bytes = p->block_size_bytes;
69    assert(isPowerOf2(m_block_size_bytes));
70    m_block_size_bits = floorLog2(m_block_size_bytes);
71    m_memory_size_bits = p->memory_size_bits;
72
73    // Setup the global variables used in Ruby
74    g_system_ptr = this;
75
76    // Resize to the size of different machine types
77    g_abs_controls.resize(MachineType_NUM);
78
79    // Collate the statistics before they are printed.
80    Stats::registerDumpCallback(new RubyStatsCallback(this));
81    // Create the profiler
82    m_profiler = new Profiler(p);
83    m_phys_mem = p->phys_mem;
84}
85
86void
87RubySystem::registerNetwork(Network* network_ptr)
88{
89    m_network = network_ptr;
90}
91
92void
93RubySystem::registerAbstractController(AbstractController* cntrl)
94{
95    m_abs_cntrl_vec.push_back(cntrl);
96
97    MachineID id = cntrl->getMachineID();
98    g_abs_controls[id.getType()][id.getNum()] = cntrl;
99}
100
101RubySystem::~RubySystem()
102{
103    delete m_network;
104    delete m_profiler;
105}
106
107void
108RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
109                                 uint64 uncompressed_trace_size)
110{
111    // Create the checkpoint file for the memory
112    string thefile = CheckpointIn::dir() + "/" + filename.c_str();
113
114    int fd = creat(thefile.c_str(), 0664);
115    if (fd < 0) {
116        perror("creat");
117        fatal("Can't open memory trace file '%s'\n", filename);
118    }
119
120    gzFile compressedMemory = gzdopen(fd, "wb");
121    if (compressedMemory == NULL)
122        fatal("Insufficient memory to allocate compression state for %s\n",
123              filename);
124
125    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
126        uncompressed_trace_size) {
127        fatal("Write failed on memory trace file '%s'\n", filename);
128    }
129
130    if (gzclose(compressedMemory)) {
131        fatal("Close failed on memory trace file '%s'\n", filename);
132    }
133    delete[] raw_data;
134}
135
136void
137RubySystem::serializeOld(CheckpointOut &cp)
138{
139    m_cooldown_enabled = true;
140    vector<Sequencer*> sequencer_map;
141    Sequencer* sequencer_ptr = NULL;
142
143    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
144        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
145        if (sequencer_ptr == NULL) {
146            sequencer_ptr = sequencer_map[cntrl];
147        }
148    }
149
150    assert(sequencer_ptr != NULL);
151
152    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
153        if (sequencer_map[cntrl] == NULL) {
154            sequencer_map[cntrl] = sequencer_ptr;
155        }
156    }
157
158    // Store the cache-block size, so we are able to restore on systems with a
159    // different cache-block size. CacheRecorder depends on the correct
160    // cache-block size upon unserializing.
161    uint64 block_size_bytes = getBlockSizeBytes();
162    SERIALIZE_SCALAR(block_size_bytes);
163
164    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
165    // Create the CacheRecorder and record the cache trace
166    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map,
167                                         block_size_bytes);
168
169    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
170        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
171    }
172
173    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
174    // save the current tick value
175    Tick curtick_original = curTick();
176    // save the event queue head
177    Event* eventq_head = eventq->replaceHead(NULL);
178    DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
179            curtick_original);
180
181    // Schedule an event to start cache cooldown
182    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
183    enqueueRubyEvent(curTick());
184    simulate();
185    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
186
187    // Restore eventq head
188    eventq_head = eventq->replaceHead(eventq_head);
189    // Restore curTick
190    setCurTick(curtick_original);
191
192    // Aggregate the trace entries together into a single array
193    uint8_t *raw_data = new uint8_t[4096];
194    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
195                                                                 4096);
196    string cache_trace_file = name() + ".cache.gz";
197    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
198
199    SERIALIZE_SCALAR(cache_trace_file);
200    SERIALIZE_SCALAR(cache_trace_size);
201
202    m_cooldown_enabled = false;
203}
204
205void
206RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
207                                uint64& uncompressed_trace_size)
208{
209    // Read the trace file
210    gzFile compressedTrace;
211
212    // trace file
213    int fd = open(filename.c_str(), O_RDONLY);
214    if (fd < 0) {
215        perror("open");
216        fatal("Unable to open trace file %s", filename);
217    }
218
219    compressedTrace = gzdopen(fd, "rb");
220    if (compressedTrace == NULL) {
221        fatal("Insufficient memory to allocate compression state for %s\n",
222              filename);
223    }
224
225    raw_data = new uint8_t[uncompressed_trace_size];
226    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
227            uncompressed_trace_size) {
228        fatal("Unable to read complete trace from file %s\n", filename);
229    }
230
231    if (gzclose(compressedTrace)) {
232        fatal("Failed to close cache trace file '%s'\n", filename);
233    }
234}
235
236void
237RubySystem::unserialize(CheckpointIn &cp)
238{
239    uint8_t *uncompressed_trace = NULL;
240
241    // This value should be set to the checkpoint-system's block-size.
242    // Optional, as checkpoints without it can be run if the
243    // checkpoint-system's block-size == current block-size.
244    uint64 block_size_bytes = getBlockSizeBytes();
245    UNSERIALIZE_OPT_SCALAR(block_size_bytes);
246
247    string cache_trace_file;
248    uint64 cache_trace_size = 0;
249
250    UNSERIALIZE_SCALAR(cache_trace_file);
251    UNSERIALIZE_SCALAR(cache_trace_size);
252    cache_trace_file = cp.cptDir + "/" + cache_trace_file;
253
254    readCompressedTrace(cache_trace_file, uncompressed_trace,
255                        cache_trace_size);
256    m_warmup_enabled = true;
257    m_systems_to_warmup++;
258
259    vector<Sequencer*> sequencer_map;
260    Sequencer* t = NULL;
261    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
262        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
263        if (t == NULL) t = sequencer_map[cntrl];
264    }
265
266    assert(t != NULL);
267
268    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
269        if (sequencer_map[cntrl] == NULL) {
270            sequencer_map[cntrl] = t;
271        }
272    }
273
274    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
275                                         sequencer_map, block_size_bytes);
276}
277
278void
279RubySystem::startup()
280{
281
282    // Ruby restores state from a checkpoint by resetting the clock to 0 and
283    // playing the requests that can possibly re-generate the cache state.
284    // The clock value is set to the actual checkpointed value once all the
285    // requests have been executed.
286    //
287    // This way of restoring state is pretty finicky. For example, if a
288    // Ruby component reads time before the state has been restored, it would
289    // cache this value and hence its clock would not be reset to 0, when
290    // Ruby resets the global clock. This can potentially result in a
291    // deadlock.
292    //
293    // The solution is that no Ruby component should read time before the
294    // simulation starts. And then one also needs to hope that the time
295    // Ruby finishes restoring the state is less than the time when the
296    // state was checkpointed.
297
298    if (m_warmup_enabled) {
299        // save the current tick value
300        Tick curtick_original = curTick();
301        // save the event queue head
302        Event* eventq_head = eventq->replaceHead(NULL);
303        // set curTick to 0 and reset Ruby System's clock
304        setCurTick(0);
305        resetClock();
306
307        // Schedule an event to start cache warmup
308        enqueueRubyEvent(curTick());
309        simulate();
310
311        delete m_cache_recorder;
312        m_cache_recorder = NULL;
313        m_systems_to_warmup--;
314        if (m_systems_to_warmup == 0) {
315            m_warmup_enabled = false;
316        }
317
318        // Restore eventq head
319        eventq_head = eventq->replaceHead(eventq_head);
320        // Restore curTick and Ruby System's clock
321        setCurTick(curtick_original);
322        resetClock();
323    }
324
325    resetStats();
326}
327
328void
329RubySystem::RubyEvent::process()
330{
331    if (RubySystem::getWarmupEnabled()) {
332        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
333    } else if (RubySystem::getCooldownEnabled()) {
334        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
335    }
336}
337
338void
339RubySystem::resetStats()
340{
341    g_ruby_start = curCycle();
342}
343
344bool
345RubySystem::functionalRead(PacketPtr pkt)
346{
347    Address address(pkt->getAddr());
348    Address line_address(address);
349    line_address.makeLineAddress();
350
351    AccessPermission access_perm = AccessPermission_NotPresent;
352    int num_controllers = m_abs_cntrl_vec.size();
353
354    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
355
356    unsigned int num_ro = 0;
357    unsigned int num_rw = 0;
358    unsigned int num_busy = 0;
359    unsigned int num_backing_store = 0;
360    unsigned int num_invalid = 0;
361
362    // In this loop we count the number of controllers that have the given
363    // address in read only, read write and busy states.
364    for (unsigned int i = 0; i < num_controllers; ++i) {
365        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
366        if (access_perm == AccessPermission_Read_Only)
367            num_ro++;
368        else if (access_perm == AccessPermission_Read_Write)
369            num_rw++;
370        else if (access_perm == AccessPermission_Busy)
371            num_busy++;
372        else if (access_perm == AccessPermission_Backing_Store)
373            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
374            // to represent blocks in memory *for Broadcast/Snooping protocols*,
375            // where memory has no idea whether it has an exclusive copy of data
376            // or not.
377            num_backing_store++;
378        else if (access_perm == AccessPermission_Invalid ||
379                 access_perm == AccessPermission_NotPresent)
380            num_invalid++;
381    }
382    assert(num_rw <= 1);
383
384    // This if case is meant to capture what happens in a Broadcast/Snoop
385    // protocol where the block does not exist in the cache hierarchy. You
386    // only want to read from the Backing_Store memory if there is no copy in
387    // the cache hierarchy, otherwise you want to try to read the RO or RW
388    // copies existing in the cache hierarchy (covered by the else statement).
389    // The reason is because the Backing_Store memory could easily be stale, if
390    // there are copies floating around the cache hierarchy, so you want to read
391    // it only if it's not in the cache hierarchy at all.
392    if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
393        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
394        for (unsigned int i = 0; i < num_controllers; ++i) {
395            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
396            if (access_perm == AccessPermission_Backing_Store) {
397                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
398                return true;
399            }
400        }
401    } else if (num_ro > 0 || num_rw == 1) {
402        // In Broadcast/Snoop protocols, this covers if you know the block
403        // exists somewhere in the caching hierarchy, then you want to read any
404        // valid RO or RW block.  In directory protocols, same thing, you want
405        // to read any valid readable copy of the block.
406        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
407                num_busy, num_ro, num_rw);
408        // In this loop, we try to figure which controller has a read only or
409        // a read write copy of the given address. Any valid copy would suffice
410        // for a functional read.
411        for (unsigned int i = 0;i < num_controllers;++i) {
412            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
413            if (access_perm == AccessPermission_Read_Only ||
414                access_perm == AccessPermission_Read_Write) {
415                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
416                return true;
417            }
418        }
419    }
420
421    return false;
422}
423
424// The function searches through all the buffers that exist in different
425// cache, directory and memory controllers, and in the network components
426// and writes the data portion of those that hold the address specified
427// in the packet.
428bool
429RubySystem::functionalWrite(PacketPtr pkt)
430{
431    Address addr(pkt->getAddr());
432    Address line_addr = line_address(addr);
433    AccessPermission access_perm = AccessPermission_NotPresent;
434    int num_controllers = m_abs_cntrl_vec.size();
435
436    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
437
438    uint32_t M5_VAR_USED num_functional_writes = 0;
439
440    for (unsigned int i = 0; i < num_controllers;++i) {
441        num_functional_writes +=
442            m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
443
444        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
445        if (access_perm != AccessPermission_Invalid &&
446            access_perm != AccessPermission_NotPresent) {
447            num_functional_writes +=
448                m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
449        }
450    }
451
452    num_functional_writes += m_network->functionalWrite(pkt);
453    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
454
455    return true;
456}
457
458#ifdef CHECK_COHERENCE
459// This code will check for cases if the given cache block is exclusive in
460// one node and shared in another-- a coherence violation
461//
462// To use, the SLICC specification must call sequencer.checkCoherence(address)
463// when the controller changes to a state with new permissions.  Do this
464// in setState.  The SLICC spec must also define methods "isBlockShared"
465// and "isBlockExclusive" that are specific to that protocol
466//
467void
468RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
469{
470#if 0
471    NodeID exclusive = -1;
472    bool sharedDetected = false;
473    NodeID lastShared = -1;
474
475    for (int i = 0; i < m_chip_vector.size(); i++) {
476        if (m_chip_vector[i]->isBlockExclusive(addr)) {
477            if (exclusive != -1) {
478                // coherence violation
479                WARN_EXPR(exclusive);
480                WARN_EXPR(m_chip_vector[i]->getID());
481                WARN_EXPR(addr);
482                WARN_EXPR(getTime());
483                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
484            } else if (sharedDetected) {
485                WARN_EXPR(lastShared);
486                WARN_EXPR(m_chip_vector[i]->getID());
487                WARN_EXPR(addr);
488                WARN_EXPR(getTime());
489                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
490            } else {
491                exclusive = m_chip_vector[i]->getID();
492            }
493        } else if (m_chip_vector[i]->isBlockShared(addr)) {
494            sharedDetected = true;
495            lastShared = m_chip_vector[i]->getID();
496
497            if (exclusive != -1) {
498                WARN_EXPR(lastShared);
499                WARN_EXPR(exclusive);
500                WARN_EXPR(addr);
501                WARN_EXPR(getTime());
502                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
503            }
504        }
505    }
506#endif
507}
508#endif
509
510RubySystem *
511RubySystemParams::create()
512{
513    return new RubySystem(this);
514}
515