RubySystem.cc revision 10163
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33
34#include "base/intmath.hh"
35#include "base/statistics.hh"
36#include "debug/RubyCacheTrace.hh"
37#include "debug/RubySystem.hh"
38#include "mem/ruby/common/Address.hh"
39#include "mem/ruby/network/Network.hh"
40#include "mem/ruby/system/System.hh"
41#include "sim/eventq.hh"
42#include "sim/simulate.hh"
43
44using namespace std;
45
46int RubySystem::m_random_seed;
47bool RubySystem::m_randomization;
48uint32_t RubySystem::m_block_size_bytes;
49uint32_t RubySystem::m_block_size_bits;
50uint64_t RubySystem::m_memory_size_bytes;
51uint32_t RubySystem::m_memory_size_bits;
52
53RubySystem::RubySystem(const Params *p)
54    : ClockedObject(p)
55{
56    if (g_system_ptr != NULL)
57        fatal("Only one RubySystem object currently allowed.\n");
58
59    m_random_seed = p->random_seed;
60    srandom(m_random_seed);
61    m_randomization = p->randomization;
62
63    m_block_size_bytes = p->block_size_bytes;
64    assert(isPowerOf2(m_block_size_bytes));
65    m_block_size_bits = floorLog2(m_block_size_bytes);
66
67    m_memory_size_bytes = p->mem_size;
68    if (m_memory_size_bytes == 0) {
69        m_memory_size_bits = 0;
70    } else {
71        m_memory_size_bits = ceilLog2(m_memory_size_bytes);
72    }
73
74    if (p->no_mem_vec) {
75        m_mem_vec = NULL;
76    } else {
77        m_mem_vec = new MemoryVector;
78        m_mem_vec->resize(m_memory_size_bytes);
79    }
80
81    m_warmup_enabled = false;
82    m_cooldown_enabled = false;
83
84    // Setup the global variables used in Ruby
85    g_system_ptr = this;
86
87    // Resize to the size of different machine types
88    g_abs_controls.resize(MachineType_NUM);
89
90    // Collate the statistics before they are printed.
91    Stats::registerDumpCallback(new RubyStatsCallback(this));
92    // Create the profiler
93    m_profiler = new Profiler(p);
94}
95
96void
97RubySystem::registerNetwork(Network* network_ptr)
98{
99  m_network = network_ptr;
100}
101
102void
103RubySystem::registerAbstractController(AbstractController* cntrl)
104{
105  m_abs_cntrl_vec.push_back(cntrl);
106
107  MachineID id = cntrl->getMachineID();
108  g_abs_controls[id.getType()][id.getNum()] = cntrl;
109}
110
111void
112RubySystem::registerSparseMemory(SparseMemory* s)
113{
114    m_sparse_memory_vector.push_back(s);
115}
116
117void
118RubySystem::registerMemController(MemoryControl *mc) {
119    m_memory_controller_vec.push_back(mc);
120}
121
122RubySystem::~RubySystem()
123{
124    delete m_network;
125    delete m_profiler;
126    if (m_mem_vec)
127        delete m_mem_vec;
128}
129
130void
131RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
132                                 uint64 uncompressed_trace_size)
133{
134    // Create the checkpoint file for the memory
135    string thefile = Checkpoint::dir() + "/" + filename.c_str();
136
137    int fd = creat(thefile.c_str(), 0664);
138    if (fd < 0) {
139        perror("creat");
140        fatal("Can't open memory trace file '%s'\n", filename);
141    }
142
143    gzFile compressedMemory = gzdopen(fd, "wb");
144    if (compressedMemory == NULL)
145        fatal("Insufficient memory to allocate compression state for %s\n",
146              filename);
147
148    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
149        uncompressed_trace_size) {
150        fatal("Write failed on memory trace file '%s'\n", filename);
151    }
152
153    if (gzclose(compressedMemory)) {
154        fatal("Close failed on memory trace file '%s'\n", filename);
155    }
156    delete raw_data;
157}
158
159void
160RubySystem::serialize(std::ostream &os)
161{
162    m_cooldown_enabled = true;
163
164    vector<Sequencer*> sequencer_map;
165    Sequencer* sequencer_ptr = NULL;
166    int cntrl_id = -1;
167
168
169    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
170        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
171        if (sequencer_ptr == NULL) {
172            sequencer_ptr = sequencer_map[cntrl];
173            cntrl_id = cntrl;
174        }
175    }
176
177    assert(sequencer_ptr != NULL);
178
179    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
180        if (sequencer_map[cntrl] == NULL) {
181            sequencer_map[cntrl] = sequencer_ptr;
182        }
183    }
184
185    // Store the cache-block size, so we are able to restore on systems with a
186    // different cache-block size. CacheRecorder depends on the correct
187    // cache-block size upon unserializing.
188    uint64 block_size_bytes = getBlockSizeBytes();
189    SERIALIZE_SCALAR(block_size_bytes);
190
191    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
192    // Create the CacheRecorder and record the cache trace
193    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map,
194                                         block_size_bytes);
195
196    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
197        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
198    }
199
200    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
201    // save the current tick value
202    Tick curtick_original = curTick();
203    // save the event queue head
204    Event* eventq_head = eventq->replaceHead(NULL);
205    DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
206            curtick_original);
207
208    // Schedule an event to start cache cooldown
209    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
210    enqueueRubyEvent(curTick());
211    simulate();
212    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
213
214    // Restore eventq head
215    eventq_head = eventq->replaceHead(eventq_head);
216    // Restore curTick
217    setCurTick(curtick_original);
218
219    uint8_t *raw_data = NULL;
220
221    if (m_mem_vec != NULL) {
222        uint64 memory_trace_size = m_mem_vec->collatePages(raw_data);
223
224        string memory_trace_file = name() + ".memory.gz";
225        writeCompressedTrace(raw_data, memory_trace_file,
226                             memory_trace_size);
227
228        SERIALIZE_SCALAR(memory_trace_file);
229        SERIALIZE_SCALAR(memory_trace_size);
230
231    } else {
232        for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
233            m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
234                                                    m_cache_recorder);
235        }
236    }
237
238    // Aggergate the trace entries together into a single array
239    raw_data = new uint8_t[4096];
240    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
241                                                                 4096);
242    string cache_trace_file = name() + ".cache.gz";
243    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
244
245    SERIALIZE_SCALAR(cache_trace_file);
246    SERIALIZE_SCALAR(cache_trace_size);
247
248    m_cooldown_enabled = false;
249}
250
251void
252RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
253                                uint64& uncompressed_trace_size)
254{
255    // Read the trace file
256    gzFile compressedTrace;
257
258    // trace file
259    int fd = open(filename.c_str(), O_RDONLY);
260    if (fd < 0) {
261        perror("open");
262        fatal("Unable to open trace file %s", filename);
263    }
264
265    compressedTrace = gzdopen(fd, "rb");
266    if (compressedTrace == NULL) {
267        fatal("Insufficient memory to allocate compression state for %s\n",
268              filename);
269    }
270
271    raw_data = new uint8_t[uncompressed_trace_size];
272    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
273            uncompressed_trace_size) {
274        fatal("Unable to read complete trace from file %s\n", filename);
275    }
276
277    if (gzclose(compressedTrace)) {
278        fatal("Failed to close cache trace file '%s'\n", filename);
279    }
280}
281
282void
283RubySystem::unserialize(Checkpoint *cp, const string &section)
284{
285    uint8_t *uncompressed_trace = NULL;
286
287    // This value should be set to the checkpoint-system's block-size.
288    // Optional, as checkpoints without it can be run if the
289    // checkpoint-system's block-size == current block-size.
290    uint64 block_size_bytes = getBlockSizeBytes();
291    UNSERIALIZE_OPT_SCALAR(block_size_bytes);
292
293    if (m_mem_vec != NULL) {
294        string memory_trace_file;
295        uint64 memory_trace_size = 0;
296
297        UNSERIALIZE_SCALAR(memory_trace_file);
298        UNSERIALIZE_SCALAR(memory_trace_size);
299        memory_trace_file = cp->cptDir + "/" + memory_trace_file;
300
301        readCompressedTrace(memory_trace_file, uncompressed_trace,
302                            memory_trace_size);
303        m_mem_vec->populatePages(uncompressed_trace);
304
305        delete [] uncompressed_trace;
306        uncompressed_trace = NULL;
307    }
308
309    string cache_trace_file;
310    uint64 cache_trace_size = 0;
311
312    UNSERIALIZE_SCALAR(cache_trace_file);
313    UNSERIALIZE_SCALAR(cache_trace_size);
314    cache_trace_file = cp->cptDir + "/" + cache_trace_file;
315
316    readCompressedTrace(cache_trace_file, uncompressed_trace,
317                        cache_trace_size);
318    m_warmup_enabled = true;
319
320    vector<Sequencer*> sequencer_map;
321    Sequencer* t = NULL;
322    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
323        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
324        if (t == NULL) t = sequencer_map[cntrl];
325    }
326
327    assert(t != NULL);
328
329    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
330        if (sequencer_map[cntrl] == NULL) {
331            sequencer_map[cntrl] = t;
332        }
333    }
334
335    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
336                                         sequencer_map, block_size_bytes);
337}
338
339void
340RubySystem::startup()
341{
342
343    // Ruby restores state from a checkpoint by resetting the clock to 0 and
344    // playing the requests that can possibly re-generate the cache state.
345    // The clock value is set to the actual checkpointed value once all the
346    // requests have been executed.
347    //
348    // This way of restoring state is pretty finicky. For example, if a
349    // Ruby component reads time before the state has been restored, it would
350    // cache this value and hence its clock would not be reset to 0, when
351    // Ruby resets the global clock. This can potentially result in a
352    // deadlock.
353    //
354    // The solution is that no Ruby component should read time before the
355    // simulation starts. And then one also needs to hope that the time
356    // Ruby finishes restoring the state is less than the time when the
357    // state was checkpointed.
358
359    if (m_warmup_enabled) {
360        // save the current tick value
361        Tick curtick_original = curTick();
362        // save the event queue head
363        Event* eventq_head = eventq->replaceHead(NULL);
364        // set curTick to 0 and reset Ruby System's clock
365        setCurTick(0);
366        resetClock();
367
368        // Schedule an event to start cache warmup
369        enqueueRubyEvent(curTick());
370        simulate();
371
372        delete m_cache_recorder;
373        m_cache_recorder = NULL;
374        m_warmup_enabled = false;
375
376        // reset DRAM so that it's not waiting for events on the old event
377        // queue
378        for (int i = 0; i < m_memory_controller_vec.size(); ++i) {
379            m_memory_controller_vec[i]->reset();
380        }
381
382        // Restore eventq head
383        eventq_head = eventq->replaceHead(eventq_head);
384        // Restore curTick and Ruby System's clock
385        setCurTick(curtick_original);
386        resetClock();
387    }
388
389    resetStats();
390}
391
392void
393RubySystem::RubyEvent::process()
394{
395    if (ruby_system->m_warmup_enabled) {
396        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
397    }  else if (ruby_system->m_cooldown_enabled) {
398        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
399    }
400}
401
402void
403RubySystem::resetStats()
404{
405    g_ruby_start = curCycle();
406}
407
408bool
409RubySystem::functionalRead(PacketPtr pkt)
410{
411    Address address(pkt->getAddr());
412    Address line_address(address);
413    line_address.makeLineAddress();
414
415    AccessPermission access_perm = AccessPermission_NotPresent;
416    int num_controllers = m_abs_cntrl_vec.size();
417
418    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
419
420    unsigned int num_ro = 0;
421    unsigned int num_rw = 0;
422    unsigned int num_busy = 0;
423    unsigned int num_backing_store = 0;
424    unsigned int num_invalid = 0;
425
426    // In this loop we count the number of controllers that have the given
427    // address in read only, read write and busy states.
428    for (unsigned int i = 0; i < num_controllers; ++i) {
429        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
430        if (access_perm == AccessPermission_Read_Only)
431            num_ro++;
432        else if (access_perm == AccessPermission_Read_Write)
433            num_rw++;
434        else if (access_perm == AccessPermission_Busy)
435            num_busy++;
436        else if (access_perm == AccessPermission_Backing_Store)
437            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
438            // to represent blocks in memory *for Broadcast/Snooping protocols*,
439            // where memory has no idea whether it has an exclusive copy of data
440            // or not.
441            num_backing_store++;
442        else if (access_perm == AccessPermission_Invalid ||
443                 access_perm == AccessPermission_NotPresent)
444            num_invalid++;
445    }
446    assert(num_rw <= 1);
447
448    uint8_t *data = pkt->getPtr<uint8_t>(true);
449    unsigned int size_in_bytes = pkt->getSize();
450    unsigned startByte = address.getAddress() - line_address.getAddress();
451
452    // This if case is meant to capture what happens in a Broadcast/Snoop
453    // protocol where the block does not exist in the cache hierarchy. You
454    // only want to read from the Backing_Store memory if there is no copy in
455    // the cache hierarchy, otherwise you want to try to read the RO or RW
456    // copies existing in the cache hierarchy (covered by the else statement).
457    // The reason is because the Backing_Store memory could easily be stale, if
458    // there are copies floating around the cache hierarchy, so you want to read
459    // it only if it's not in the cache hierarchy at all.
460    if (num_invalid == (num_controllers - 1) &&
461            num_backing_store == 1) {
462        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
463        for (unsigned int i = 0; i < num_controllers; ++i) {
464            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
465            if (access_perm == AccessPermission_Backing_Store) {
466                DataBlock& block = m_abs_cntrl_vec[i]->
467                    getDataBlock(line_address);
468
469                DPRINTF(RubySystem, "reading from %s block %s\n",
470                        m_abs_cntrl_vec[i]->name(), block);
471                for (unsigned j = 0; j < size_in_bytes; ++j) {
472                    data[j] = block.getByte(j + startByte);
473                }
474                return true;
475            }
476        }
477    } else if (num_ro > 0 || num_rw == 1) {
478        // In Broadcast/Snoop protocols, this covers if you know the block
479        // exists somewhere in the caching hierarchy, then you want to read any
480        // valid RO or RW block.  In directory protocols, same thing, you want
481        // to read any valid readable copy of the block.
482        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
483                num_busy, num_ro, num_rw);
484        // In this loop, we try to figure which controller has a read only or
485        // a read write copy of the given address. Any valid copy would suffice
486        // for a functional read.
487        for (unsigned int i = 0;i < num_controllers;++i) {
488            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
489            if (access_perm == AccessPermission_Read_Only ||
490                access_perm == AccessPermission_Read_Write) {
491                DataBlock& block = m_abs_cntrl_vec[i]->
492                    getDataBlock(line_address);
493
494                DPRINTF(RubySystem, "reading from %s block %s\n",
495                        m_abs_cntrl_vec[i]->name(), block);
496                for (unsigned j = 0; j < size_in_bytes; ++j) {
497                    data[j] = block.getByte(j + startByte);
498                }
499                return true;
500            }
501        }
502    }
503
504    return false;
505}
506
507// The function searches through all the buffers that exist in different
508// cache, directory and memory controllers, and in the network components
509// and writes the data portion of those that hold the address specified
510// in the packet.
511bool
512RubySystem::functionalWrite(PacketPtr pkt)
513{
514    Address addr(pkt->getAddr());
515    Address line_addr = line_address(addr);
516    AccessPermission access_perm = AccessPermission_NotPresent;
517    int num_controllers = m_abs_cntrl_vec.size();
518
519    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
520
521    uint8_t *data = pkt->getPtr<uint8_t>(true);
522    unsigned int size_in_bytes = pkt->getSize();
523    unsigned startByte = addr.getAddress() - line_addr.getAddress();
524
525    uint32_t M5_VAR_USED num_functional_writes = 0;
526
527    for (unsigned int i = 0; i < num_controllers;++i) {
528        num_functional_writes +=
529            m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
530
531        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
532        if (access_perm != AccessPermission_Invalid &&
533            access_perm != AccessPermission_NotPresent) {
534
535            num_functional_writes++;
536
537            DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr);
538            DPRINTF(RubySystem, "%s\n",block);
539            for (unsigned j = 0; j < size_in_bytes; ++j) {
540              block.setByte(j + startByte, data[j]);
541            }
542            DPRINTF(RubySystem, "%s\n",block);
543        }
544    }
545
546    for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) {
547        num_functional_writes +=
548            m_memory_controller_vec[i]->functionalWriteBuffers(pkt);
549    }
550
551    num_functional_writes += m_network->functionalWrite(pkt);
552    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
553
554    return true;
555}
556
557#ifdef CHECK_COHERENCE
558// This code will check for cases if the given cache block is exclusive in
559// one node and shared in another-- a coherence violation
560//
561// To use, the SLICC specification must call sequencer.checkCoherence(address)
562// when the controller changes to a state with new permissions.  Do this
563// in setState.  The SLICC spec must also define methods "isBlockShared"
564// and "isBlockExclusive" that are specific to that protocol
565//
566void
567RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
568{
569#if 0
570    NodeID exclusive = -1;
571    bool sharedDetected = false;
572    NodeID lastShared = -1;
573
574    for (int i = 0; i < m_chip_vector.size(); i++) {
575        if (m_chip_vector[i]->isBlockExclusive(addr)) {
576            if (exclusive != -1) {
577                // coherence violation
578                WARN_EXPR(exclusive);
579                WARN_EXPR(m_chip_vector[i]->getID());
580                WARN_EXPR(addr);
581                WARN_EXPR(getTime());
582                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
583            } else if (sharedDetected) {
584                WARN_EXPR(lastShared);
585                WARN_EXPR(m_chip_vector[i]->getID());
586                WARN_EXPR(addr);
587                WARN_EXPR(getTime());
588                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
589            } else {
590                exclusive = m_chip_vector[i]->getID();
591            }
592        } else if (m_chip_vector[i]->isBlockShared(addr)) {
593            sharedDetected = true;
594            lastShared = m_chip_vector[i]->getID();
595
596            if (exclusive != -1) {
597                WARN_EXPR(lastShared);
598                WARN_EXPR(exclusive);
599                WARN_EXPR(addr);
600                WARN_EXPR(getTime());
601                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
602            }
603        }
604    }
605#endif
606}
607#endif
608
609RubySystem *
610RubySystemParams::create()
611{
612    return new RubySystem(this);
613}
614