RubySystem.cc revision 9300
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33
34#include "base/intmath.hh"
35#include "base/output.hh"
36#include "debug/RubyCacheTrace.hh"
37#include "debug/RubySystem.hh"
38#include "mem/ruby/common/Address.hh"
39#include "mem/ruby/network/Network.hh"
40#include "mem/ruby/profiler/Profiler.hh"
41#include "mem/ruby/system/System.hh"
42#include "sim/eventq.hh"
43#include "sim/simulate.hh"
44
45using namespace std;
46
47int RubySystem::m_random_seed;
48bool RubySystem::m_randomization;
49int RubySystem::m_block_size_bytes;
50int RubySystem::m_block_size_bits;
51uint64 RubySystem::m_memory_size_bytes;
52int RubySystem::m_memory_size_bits;
53
54RubySystem::RubySystem(const Params *p)
55    : ClockedObject(p)
56{
57    if (g_system_ptr != NULL)
58        fatal("Only one RubySystem object currently allowed.\n");
59
60    m_random_seed = p->random_seed;
61    srandom(m_random_seed);
62    m_randomization = p->randomization;
63
64    m_block_size_bytes = p->block_size_bytes;
65    assert(isPowerOf2(m_block_size_bytes));
66    m_block_size_bits = floorLog2(m_block_size_bytes);
67
68    m_memory_size_bytes = p->mem_size;
69    if (m_memory_size_bytes == 0) {
70        m_memory_size_bits = 0;
71    } else {
72        m_memory_size_bits = floorLog2(m_memory_size_bytes);
73    }
74
75    g_system_ptr = this;
76    if (p->no_mem_vec) {
77        m_mem_vec_ptr = NULL;
78    } else {
79        m_mem_vec_ptr = new MemoryVector;
80        m_mem_vec_ptr->resize(m_memory_size_bytes);
81    }
82
83    // Print ruby configuration and stats at exit
84    registerExitCallback(new RubyExitCallback(p->stats_filename, this));
85
86    m_warmup_enabled = false;
87    m_cooldown_enabled = false;
88}
89
90void
91RubySystem::init()
92{
93    m_profiler_ptr->clearStats();
94}
95
96void
97RubySystem::registerNetwork(Network* network_ptr)
98{
99  m_network_ptr = network_ptr;
100}
101
102void
103RubySystem::registerProfiler(Profiler* profiler_ptr)
104{
105  m_profiler_ptr = profiler_ptr;
106}
107
108void
109RubySystem::registerAbstractController(AbstractController* cntrl)
110{
111  m_abs_cntrl_vec.push_back(cntrl);
112}
113
114void
115RubySystem::registerSparseMemory(SparseMemory* s)
116{
117    m_sparse_memory_vector.push_back(s);
118}
119
120void
121RubySystem::registerMemController(MemoryControl *mc) {
122    m_memory_controller_vec.push_back(mc);
123}
124
125RubySystem::~RubySystem()
126{
127    delete m_network_ptr;
128    delete m_profiler_ptr;
129    if (m_mem_vec_ptr)
130        delete m_mem_vec_ptr;
131}
132
133void
134RubySystem::printStats(ostream& out)
135{
136    const time_t T = time(NULL);
137    tm *localTime = localtime(&T);
138    char buf[100];
139    strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime);
140
141    out << "Real time: " << buf << endl;
142
143    m_profiler_ptr->printStats(out);
144    m_network_ptr->printStats(out);
145}
146
147void
148RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
149                                 uint64 uncompressed_trace_size)
150{
151    // Create the checkpoint file for the memory
152    string thefile = Checkpoint::dir() + "/" + filename.c_str();
153
154    int fd = creat(thefile.c_str(), 0664);
155    if (fd < 0) {
156        perror("creat");
157        fatal("Can't open memory trace file '%s'\n", filename);
158    }
159
160    gzFile compressedMemory = gzdopen(fd, "wb");
161    if (compressedMemory == NULL)
162        fatal("Insufficient memory to allocate compression state for %s\n",
163              filename);
164
165    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
166        uncompressed_trace_size) {
167        fatal("Write failed on memory trace file '%s'\n", filename);
168    }
169
170    if (gzclose(compressedMemory)) {
171        fatal("Close failed on memory trace file '%s'\n", filename);
172    }
173    delete raw_data;
174}
175
176void
177RubySystem::serialize(std::ostream &os)
178{
179    m_cooldown_enabled = true;
180
181    vector<Sequencer*> sequencer_map;
182    Sequencer* sequencer_ptr = NULL;
183    int cntrl_id = -1;
184
185
186    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
187        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
188        if (sequencer_ptr == NULL) {
189            sequencer_ptr = sequencer_map[cntrl];
190            cntrl_id = cntrl;
191        }
192    }
193
194    assert(sequencer_ptr != NULL);
195
196    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
197        if (sequencer_map[cntrl] == NULL) {
198            sequencer_map[cntrl] = sequencer_ptr;
199        }
200    }
201
202    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
203    // Create the CacheRecorder and record the cache trace
204    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
205
206    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
207        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
208    }
209
210    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
211    // save the current tick value
212    Tick curtick_original = curTick();
213    // save the event queue head
214    Event* eventq_head = eventq->replaceHead(NULL);
215    DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
216            curtick_original);
217
218    // Schedule an event to start cache cooldown
219    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
220    enqueueRubyEvent(curTick());
221    simulate();
222    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
223
224    // Restore eventq head
225    eventq_head = eventq->replaceHead(eventq_head);
226    // Restore curTick
227    curTick(curtick_original);
228
229    uint8_t *raw_data = NULL;
230
231    if (m_mem_vec_ptr != NULL) {
232        uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
233
234        string memory_trace_file = name() + ".memory.gz";
235        writeCompressedTrace(raw_data, memory_trace_file,
236                             memory_trace_size);
237
238        SERIALIZE_SCALAR(memory_trace_file);
239        SERIALIZE_SCALAR(memory_trace_size);
240
241    } else {
242        for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
243            m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
244                                                    m_cache_recorder);
245        }
246    }
247
248    // Aggergate the trace entries together into a single array
249    raw_data = new uint8_t[4096];
250    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
251                                                                 4096);
252    string cache_trace_file = name() + ".cache.gz";
253    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
254
255    SERIALIZE_SCALAR(cache_trace_file);
256    SERIALIZE_SCALAR(cache_trace_size);
257
258    m_cooldown_enabled = false;
259}
260
261void
262RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
263                                uint64& uncompressed_trace_size)
264{
265    // Read the trace file
266    gzFile compressedTrace;
267
268    // trace file
269    int fd = open(filename.c_str(), O_RDONLY);
270    if (fd < 0) {
271        perror("open");
272        fatal("Unable to open trace file %s", filename);
273    }
274
275    compressedTrace = gzdopen(fd, "rb");
276    if (compressedTrace == NULL) {
277        fatal("Insufficient memory to allocate compression state for %s\n",
278              filename);
279    }
280
281    raw_data = new uint8_t[uncompressed_trace_size];
282    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
283            uncompressed_trace_size) {
284        fatal("Unable to read complete trace from file %s\n", filename);
285    }
286
287    if (gzclose(compressedTrace)) {
288        fatal("Failed to close cache trace file '%s'\n", filename);
289    }
290}
291
292void
293RubySystem::unserialize(Checkpoint *cp, const string &section)
294{
295    //
296    // The main purpose for clearing stats in the unserialize process is so
297    // that the profiler can correctly set its start time to the unserialized
298    // value of curTick()
299    //
300    clearStats();
301    uint8_t *uncompressed_trace = NULL;
302
303    if (m_mem_vec_ptr != NULL) {
304        string memory_trace_file;
305        uint64 memory_trace_size = 0;
306
307        UNSERIALIZE_SCALAR(memory_trace_file);
308        UNSERIALIZE_SCALAR(memory_trace_size);
309        memory_trace_file = cp->cptDir + "/" + memory_trace_file;
310
311        readCompressedTrace(memory_trace_file, uncompressed_trace,
312                            memory_trace_size);
313        m_mem_vec_ptr->populatePages(uncompressed_trace);
314
315        delete uncompressed_trace;
316        uncompressed_trace = NULL;
317    }
318
319    string cache_trace_file;
320    uint64 cache_trace_size = 0;
321
322    UNSERIALIZE_SCALAR(cache_trace_file);
323    UNSERIALIZE_SCALAR(cache_trace_size);
324    cache_trace_file = cp->cptDir + "/" + cache_trace_file;
325
326    readCompressedTrace(cache_trace_file, uncompressed_trace,
327                        cache_trace_size);
328    m_warmup_enabled = true;
329
330    vector<Sequencer*> sequencer_map;
331    Sequencer* t = NULL;
332    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
333        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
334        if (t == NULL) t = sequencer_map[cntrl];
335    }
336
337    assert(t != NULL);
338
339    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
340        if (sequencer_map[cntrl] == NULL) {
341            sequencer_map[cntrl] = t;
342        }
343    }
344
345    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
346                                         sequencer_map);
347}
348
349void
350RubySystem::startup()
351{
352    if (m_warmup_enabled) {
353        // save the current tick value
354        Tick curtick_original = curTick();
355        // save the event queue head
356        Event* eventq_head = eventq->replaceHead(NULL);
357        // set curTick to 0 and reset Ruby System's clock
358        curTick(0);
359        resetClock();
360
361        // Schedule an event to start cache warmup
362        enqueueRubyEvent(curTick());
363        simulate();
364
365        delete m_cache_recorder;
366        m_cache_recorder = NULL;
367        m_warmup_enabled = false;
368
369        // reset DRAM so that it's not waiting for events on the old event
370        // queue
371        for (int i = 0; i < m_memory_controller_vec.size(); ++i) {
372            m_memory_controller_vec[i]->reset();
373        }
374
375        // Restore eventq head
376        eventq_head = eventq->replaceHead(eventq_head);
377        // Restore curTick and Ruby System's clock
378        curTick(curtick_original);
379        resetClock();
380    }
381}
382
383void
384RubySystem::RubyEvent::process()
385{
386    if (ruby_system->m_warmup_enabled) {
387        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
388    }  else if (ruby_system->m_cooldown_enabled) {
389        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
390    }
391}
392
393void
394RubySystem::clearStats() const
395{
396    m_profiler_ptr->clearStats();
397    m_network_ptr->clearStats();
398}
399
400bool
401RubySystem::functionalRead(PacketPtr pkt)
402{
403    Address address(pkt->getAddr());
404    Address line_address(address);
405    line_address.makeLineAddress();
406
407    AccessPermission access_perm = AccessPermission_NotPresent;
408    int num_controllers = m_abs_cntrl_vec.size();
409
410    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
411
412    unsigned int num_ro = 0;
413    unsigned int num_rw = 0;
414    unsigned int num_busy = 0;
415    unsigned int num_backing_store = 0;
416    unsigned int num_invalid = 0;
417
418    // In this loop we count the number of controllers that have the given
419    // address in read only, read write and busy states.
420    for (int i = 0; i < num_controllers; ++i) {
421        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
422        if (access_perm == AccessPermission_Read_Only)
423            num_ro++;
424        else if (access_perm == AccessPermission_Read_Write)
425            num_rw++;
426        else if (access_perm == AccessPermission_Busy)
427            num_busy++;
428        else if (access_perm == AccessPermission_Backing_Store)
429            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
430            // to represent blocks in memory *for Broadcast/Snooping protocols*,
431            // where memory has no idea whether it has an exclusive copy of data
432            // or not.
433            num_backing_store++;
434        else if (access_perm == AccessPermission_Invalid ||
435                 access_perm == AccessPermission_NotPresent)
436            num_invalid++;
437    }
438    assert(num_rw <= 1);
439
440    uint8_t *data = pkt->getPtr<uint8_t>(true);
441    unsigned int size_in_bytes = pkt->getSize();
442    unsigned startByte = address.getAddress() - line_address.getAddress();
443
444    // This if case is meant to capture what happens in a Broadcast/Snoop
445    // protocol where the block does not exist in the cache hierarchy. You
446    // only want to read from the Backing_Store memory if there is no copy in
447    // the cache hierarchy, otherwise you want to try to read the RO or RW
448    // copies existing in the cache hierarchy (covered by the else statement).
449    // The reason is because the Backing_Store memory could easily be stale, if
450    // there are copies floating around the cache hierarchy, so you want to read
451    // it only if it's not in the cache hierarchy at all.
452    if (num_invalid == (num_controllers - 1) &&
453            num_backing_store == 1) {
454        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
455        for (int i = 0; i < num_controllers; ++i) {
456            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
457            if (access_perm == AccessPermission_Backing_Store) {
458                DataBlock& block = m_abs_cntrl_vec[i]->
459                    getDataBlock(line_address);
460
461                DPRINTF(RubySystem, "reading from %s block %s\n",
462                        m_abs_cntrl_vec[i]->name(), block);
463                for (unsigned i = 0; i < size_in_bytes; ++i) {
464                    data[i] = block.getByte(i + startByte);
465                }
466                return true;
467            }
468        }
469    } else {
470        // In Broadcast/Snoop protocols, this covers if you know the block
471        // exists somewhere in the caching hierarchy, then you want to read any
472        // valid RO or RW block.  In directory protocols, same thing, you want
473        // to read any valid readable copy of the block.
474        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
475                num_busy, num_ro, num_rw);
476        // In this loop, we try to figure which controller has a read only or
477        // a read write copy of the given address. Any valid copy would suffice
478        // for a functional read.
479        for (int i = 0;i < num_controllers;++i) {
480            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
481            if (access_perm == AccessPermission_Read_Only ||
482                access_perm == AccessPermission_Read_Write) {
483                DataBlock& block = m_abs_cntrl_vec[i]->
484                    getDataBlock(line_address);
485
486                DPRINTF(RubySystem, "reading from %s block %s\n",
487                        m_abs_cntrl_vec[i]->name(), block);
488                for (unsigned i = 0; i < size_in_bytes; ++i) {
489                    data[i] = block.getByte(i + startByte);
490                }
491                return true;
492            }
493        }
494    }
495    return false;
496}
497
498bool
499RubySystem::functionalWrite(PacketPtr pkt)
500{
501    Address addr(pkt->getAddr());
502    Address line_addr = line_address(addr);
503    AccessPermission access_perm = AccessPermission_NotPresent;
504    int num_controllers = m_abs_cntrl_vec.size();
505
506    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
507
508    unsigned int num_ro = 0;
509    unsigned int num_rw = 0;
510    unsigned int num_busy = 0;
511    unsigned int num_backing_store = 0;
512    unsigned int num_invalid = 0;
513
514    // In this loop we count the number of controllers that have the given
515    // address in read only, read write and busy states.
516    for (int i = 0;i < num_controllers;++i) {
517        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
518        if (access_perm == AccessPermission_Read_Only)
519            num_ro++;
520        else if (access_perm == AccessPermission_Read_Write)
521            num_rw++;
522        else if (access_perm == AccessPermission_Busy)
523            num_busy++;
524        else if (access_perm == AccessPermission_Backing_Store)
525            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
526            // to represent blocks in memory *for Broadcast/Snooping protocols*,
527            // where memory has no idea whether it has an exclusive copy of data
528            // or not.
529            num_backing_store++;
530        else if (access_perm == AccessPermission_Invalid ||
531                 access_perm == AccessPermission_NotPresent)
532            num_invalid++;
533    }
534
535    // If the number of read write copies is more than 1, then there is bug in
536    // coherence protocol. Otherwise, if all copies are in stable states, i.e.
537    // num_busy == 0, we update all the copies. If there is at least one copy
538    // in busy state, then we check if there is read write copy. If yes, then
539    // also we let the access go through. Or, if there is no copy in the cache
540    // hierarchy at all, we still want to do the write to the memory
541    // (Backing_Store) instead of failing.
542
543    DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
544            num_busy, num_ro, num_rw);
545    assert(num_rw <= 1);
546
547    uint8_t *data = pkt->getPtr<uint8_t>(true);
548    unsigned int size_in_bytes = pkt->getSize();
549    unsigned startByte = addr.getAddress() - line_addr.getAddress();
550
551    if ((num_busy == 0 && num_ro > 0) || num_rw == 1 ||
552        (num_invalid == (num_controllers - 1) && num_backing_store == 1)) {
553        for (int i = 0; i < num_controllers;++i) {
554            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
555            if (access_perm == AccessPermission_Read_Only ||
556                access_perm == AccessPermission_Read_Write||
557                access_perm == AccessPermission_Maybe_Stale ||
558                access_perm == AccessPermission_Backing_Store) {
559
560                DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr);
561                DPRINTF(RubySystem, "%s\n",block);
562                for (unsigned i = 0; i < size_in_bytes; ++i) {
563                  block.setByte(i + startByte, data[i]);
564                }
565                DPRINTF(RubySystem, "%s\n",block);
566            }
567        }
568        return true;
569    }
570    return false;
571}
572
573#ifdef CHECK_COHERENCE
574// This code will check for cases if the given cache block is exclusive in
575// one node and shared in another-- a coherence violation
576//
577// To use, the SLICC specification must call sequencer.checkCoherence(address)
578// when the controller changes to a state with new permissions.  Do this
579// in setState.  The SLICC spec must also define methods "isBlockShared"
580// and "isBlockExclusive" that are specific to that protocol
581//
582void
583RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
584{
585#if 0
586    NodeID exclusive = -1;
587    bool sharedDetected = false;
588    NodeID lastShared = -1;
589
590    for (int i = 0; i < m_chip_vector.size(); i++) {
591        if (m_chip_vector[i]->isBlockExclusive(addr)) {
592            if (exclusive != -1) {
593                // coherence violation
594                WARN_EXPR(exclusive);
595                WARN_EXPR(m_chip_vector[i]->getID());
596                WARN_EXPR(addr);
597                WARN_EXPR(getTime());
598                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
599            } else if (sharedDetected) {
600                WARN_EXPR(lastShared);
601                WARN_EXPR(m_chip_vector[i]->getID());
602                WARN_EXPR(addr);
603                WARN_EXPR(getTime());
604                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
605            } else {
606                exclusive = m_chip_vector[i]->getID();
607            }
608        } else if (m_chip_vector[i]->isBlockShared(addr)) {
609            sharedDetected = true;
610            lastShared = m_chip_vector[i]->getID();
611
612            if (exclusive != -1) {
613                WARN_EXPR(lastShared);
614                WARN_EXPR(exclusive);
615                WARN_EXPR(addr);
616                WARN_EXPR(getTime());
617                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
618            }
619        }
620    }
621#endif
622}
623#endif
624
625RubySystem *
626RubySystemParams::create()
627{
628    return new RubySystem(this);
629}
630
631/**
632 * virtual process function that is invoked when the callback
633 * queue is executed.
634 */
635void
636RubyExitCallback::process()
637{
638    std::ostream *os = simout.create(stats_filename);
639    ruby_system->printStats(*os);
640}
641