RubySystem.cc revision 9302
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33
34#include "base/intmath.hh"
35#include "base/output.hh"
36#include "debug/RubyCacheTrace.hh"
37#include "debug/RubySystem.hh"
38#include "mem/ruby/common/Address.hh"
39#include "mem/ruby/network/Network.hh"
40#include "mem/ruby/profiler/Profiler.hh"
41#include "mem/ruby/system/System.hh"
42#include "sim/eventq.hh"
43#include "sim/simulate.hh"
44
45using namespace std;
46
47int RubySystem::m_random_seed;
48bool RubySystem::m_randomization;
49int RubySystem::m_block_size_bytes;
50int RubySystem::m_block_size_bits;
51uint64 RubySystem::m_memory_size_bytes;
52int RubySystem::m_memory_size_bits;
53
54RubySystem::RubySystem(const Params *p)
55    : ClockedObject(p)
56{
57    if (g_system_ptr != NULL)
58        fatal("Only one RubySystem object currently allowed.\n");
59
60    m_random_seed = p->random_seed;
61    srandom(m_random_seed);
62    m_randomization = p->randomization;
63
64    m_block_size_bytes = p->block_size_bytes;
65    assert(isPowerOf2(m_block_size_bytes));
66    m_block_size_bits = floorLog2(m_block_size_bytes);
67
68    m_memory_size_bytes = p->mem_size;
69    if (m_memory_size_bytes == 0) {
70        m_memory_size_bits = 0;
71    } else {
72        m_memory_size_bits = floorLog2(m_memory_size_bytes);
73    }
74
75    g_system_ptr = this;
76    if (p->no_mem_vec) {
77        m_mem_vec_ptr = NULL;
78    } else {
79        m_mem_vec_ptr = new MemoryVector;
80        m_mem_vec_ptr->resize(m_memory_size_bytes);
81    }
82
83    // Print ruby configuration and stats at exit
84    registerExitCallback(new RubyExitCallback(p->stats_filename, this));
85
86    m_warmup_enabled = false;
87    m_cooldown_enabled = false;
88}
89
90void
91RubySystem::init()
92{
93    m_profiler_ptr->clearStats();
94}
95
96void
97RubySystem::registerNetwork(Network* network_ptr)
98{
99  m_network_ptr = network_ptr;
100}
101
102void
103RubySystem::registerProfiler(Profiler* profiler_ptr)
104{
105  m_profiler_ptr = profiler_ptr;
106}
107
108void
109RubySystem::registerAbstractController(AbstractController* cntrl)
110{
111  m_abs_cntrl_vec.push_back(cntrl);
112}
113
114void
115RubySystem::registerSparseMemory(SparseMemory* s)
116{
117    m_sparse_memory_vector.push_back(s);
118}
119
120void
121RubySystem::registerMemController(MemoryControl *mc) {
122    m_memory_controller_vec.push_back(mc);
123}
124
125RubySystem::~RubySystem()
126{
127    delete m_network_ptr;
128    delete m_profiler_ptr;
129    if (m_mem_vec_ptr)
130        delete m_mem_vec_ptr;
131}
132
133void
134RubySystem::printStats(ostream& out)
135{
136    const time_t T = time(NULL);
137    tm *localTime = localtime(&T);
138    char buf[100];
139    strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime);
140
141    out << "Real time: " << buf << endl;
142
143    m_profiler_ptr->printStats(out);
144    m_network_ptr->printStats(out);
145}
146
147void
148RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
149                                 uint64 uncompressed_trace_size)
150{
151    // Create the checkpoint file for the memory
152    string thefile = Checkpoint::dir() + "/" + filename.c_str();
153
154    int fd = creat(thefile.c_str(), 0664);
155    if (fd < 0) {
156        perror("creat");
157        fatal("Can't open memory trace file '%s'\n", filename);
158    }
159
160    gzFile compressedMemory = gzdopen(fd, "wb");
161    if (compressedMemory == NULL)
162        fatal("Insufficient memory to allocate compression state for %s\n",
163              filename);
164
165    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
166        uncompressed_trace_size) {
167        fatal("Write failed on memory trace file '%s'\n", filename);
168    }
169
170    if (gzclose(compressedMemory)) {
171        fatal("Close failed on memory trace file '%s'\n", filename);
172    }
173    delete raw_data;
174}
175
176void
177RubySystem::serialize(std::ostream &os)
178{
179    m_cooldown_enabled = true;
180
181    vector<Sequencer*> sequencer_map;
182    Sequencer* sequencer_ptr = NULL;
183    int cntrl_id = -1;
184
185
186    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
187        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
188        if (sequencer_ptr == NULL) {
189            sequencer_ptr = sequencer_map[cntrl];
190            cntrl_id = cntrl;
191        }
192    }
193
194    assert(sequencer_ptr != NULL);
195
196    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
197        if (sequencer_map[cntrl] == NULL) {
198            sequencer_map[cntrl] = sequencer_ptr;
199        }
200    }
201
202    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
203    // Create the CacheRecorder and record the cache trace
204    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
205
206    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
207        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
208    }
209
210    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
211    // save the current tick value
212    Tick curtick_original = curTick();
213    // save the event queue head
214    Event* eventq_head = eventq->replaceHead(NULL);
215    DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
216            curtick_original);
217
218    // Schedule an event to start cache cooldown
219    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
220    enqueueRubyEvent(curTick());
221    simulate();
222    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
223
224    // Restore eventq head
225    eventq_head = eventq->replaceHead(eventq_head);
226    // Restore curTick
227    curTick(curtick_original);
228
229    uint8_t *raw_data = NULL;
230
231    if (m_mem_vec_ptr != NULL) {
232        uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
233
234        string memory_trace_file = name() + ".memory.gz";
235        writeCompressedTrace(raw_data, memory_trace_file,
236                             memory_trace_size);
237
238        SERIALIZE_SCALAR(memory_trace_file);
239        SERIALIZE_SCALAR(memory_trace_size);
240
241    } else {
242        for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
243            m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
244                                                    m_cache_recorder);
245        }
246    }
247
248    // Aggergate the trace entries together into a single array
249    raw_data = new uint8_t[4096];
250    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
251                                                                 4096);
252    string cache_trace_file = name() + ".cache.gz";
253    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
254
255    SERIALIZE_SCALAR(cache_trace_file);
256    SERIALIZE_SCALAR(cache_trace_size);
257
258    m_cooldown_enabled = false;
259}
260
261void
262RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
263                                uint64& uncompressed_trace_size)
264{
265    // Read the trace file
266    gzFile compressedTrace;
267
268    // trace file
269    int fd = open(filename.c_str(), O_RDONLY);
270    if (fd < 0) {
271        perror("open");
272        fatal("Unable to open trace file %s", filename);
273    }
274
275    compressedTrace = gzdopen(fd, "rb");
276    if (compressedTrace == NULL) {
277        fatal("Insufficient memory to allocate compression state for %s\n",
278              filename);
279    }
280
281    raw_data = new uint8_t[uncompressed_trace_size];
282    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
283            uncompressed_trace_size) {
284        fatal("Unable to read complete trace from file %s\n", filename);
285    }
286
287    if (gzclose(compressedTrace)) {
288        fatal("Failed to close cache trace file '%s'\n", filename);
289    }
290}
291
292void
293RubySystem::unserialize(Checkpoint *cp, const string &section)
294{
295    //
296    // The main purpose for clearing stats in the unserialize process is so
297    // that the profiler can correctly set its start time to the unserialized
298    // value of curTick()
299    //
300    clearStats();
301    uint8_t *uncompressed_trace = NULL;
302
303    if (m_mem_vec_ptr != NULL) {
304        string memory_trace_file;
305        uint64 memory_trace_size = 0;
306
307        UNSERIALIZE_SCALAR(memory_trace_file);
308        UNSERIALIZE_SCALAR(memory_trace_size);
309        memory_trace_file = cp->cptDir + "/" + memory_trace_file;
310
311        readCompressedTrace(memory_trace_file, uncompressed_trace,
312                            memory_trace_size);
313        m_mem_vec_ptr->populatePages(uncompressed_trace);
314
315        delete uncompressed_trace;
316        uncompressed_trace = NULL;
317    }
318
319    string cache_trace_file;
320    uint64 cache_trace_size = 0;
321
322    UNSERIALIZE_SCALAR(cache_trace_file);
323    UNSERIALIZE_SCALAR(cache_trace_size);
324    cache_trace_file = cp->cptDir + "/" + cache_trace_file;
325
326    readCompressedTrace(cache_trace_file, uncompressed_trace,
327                        cache_trace_size);
328    m_warmup_enabled = true;
329
330    vector<Sequencer*> sequencer_map;
331    Sequencer* t = NULL;
332    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
333        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
334        if (t == NULL) t = sequencer_map[cntrl];
335    }
336
337    assert(t != NULL);
338
339    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
340        if (sequencer_map[cntrl] == NULL) {
341            sequencer_map[cntrl] = t;
342        }
343    }
344
345    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
346                                         sequencer_map);
347}
348
349void
350RubySystem::startup()
351{
352    if (m_warmup_enabled) {
353        // save the current tick value
354        Tick curtick_original = curTick();
355        // save the event queue head
356        Event* eventq_head = eventq->replaceHead(NULL);
357        // set curTick to 0 and reset Ruby System's clock
358        curTick(0);
359        resetClock();
360
361        // Schedule an event to start cache warmup
362        enqueueRubyEvent(curTick());
363        simulate();
364
365        delete m_cache_recorder;
366        m_cache_recorder = NULL;
367        m_warmup_enabled = false;
368
369        // reset DRAM so that it's not waiting for events on the old event
370        // queue
371        for (int i = 0; i < m_memory_controller_vec.size(); ++i) {
372            m_memory_controller_vec[i]->reset();
373        }
374
375        // Restore eventq head
376        eventq_head = eventq->replaceHead(eventq_head);
377        // Restore curTick and Ruby System's clock
378        curTick(curtick_original);
379        resetClock();
380    }
381}
382
383void
384RubySystem::RubyEvent::process()
385{
386    if (ruby_system->m_warmup_enabled) {
387        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
388    }  else if (ruby_system->m_cooldown_enabled) {
389        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
390    }
391}
392
393void
394RubySystem::clearStats() const
395{
396    m_profiler_ptr->clearStats();
397    m_network_ptr->clearStats();
398}
399
400bool
401RubySystem::functionalRead(PacketPtr pkt)
402{
403    Address address(pkt->getAddr());
404    Address line_address(address);
405    line_address.makeLineAddress();
406
407    AccessPermission access_perm = AccessPermission_NotPresent;
408    int num_controllers = m_abs_cntrl_vec.size();
409
410    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
411
412    unsigned int num_ro = 0;
413    unsigned int num_rw = 0;
414    unsigned int num_busy = 0;
415    unsigned int num_backing_store = 0;
416    unsigned int num_invalid = 0;
417
418    // In this loop we count the number of controllers that have the given
419    // address in read only, read write and busy states.
420    for (unsigned int i = 0; i < num_controllers; ++i) {
421        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
422        if (access_perm == AccessPermission_Read_Only)
423            num_ro++;
424        else if (access_perm == AccessPermission_Read_Write)
425            num_rw++;
426        else if (access_perm == AccessPermission_Busy)
427            num_busy++;
428        else if (access_perm == AccessPermission_Backing_Store)
429            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
430            // to represent blocks in memory *for Broadcast/Snooping protocols*,
431            // where memory has no idea whether it has an exclusive copy of data
432            // or not.
433            num_backing_store++;
434        else if (access_perm == AccessPermission_Invalid ||
435                 access_perm == AccessPermission_NotPresent)
436            num_invalid++;
437    }
438    assert(num_rw <= 1);
439
440    uint8_t *data = pkt->getPtr<uint8_t>(true);
441    unsigned int size_in_bytes = pkt->getSize();
442    unsigned startByte = address.getAddress() - line_address.getAddress();
443
444    // This if case is meant to capture what happens in a Broadcast/Snoop
445    // protocol where the block does not exist in the cache hierarchy. You
446    // only want to read from the Backing_Store memory if there is no copy in
447    // the cache hierarchy, otherwise you want to try to read the RO or RW
448    // copies existing in the cache hierarchy (covered by the else statement).
449    // The reason is because the Backing_Store memory could easily be stale, if
450    // there are copies floating around the cache hierarchy, so you want to read
451    // it only if it's not in the cache hierarchy at all.
452    if (num_invalid == (num_controllers - 1) &&
453            num_backing_store == 1) {
454        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
455        for (unsigned int i = 0; i < num_controllers; ++i) {
456            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
457            if (access_perm == AccessPermission_Backing_Store) {
458                DataBlock& block = m_abs_cntrl_vec[i]->
459                    getDataBlock(line_address);
460
461                DPRINTF(RubySystem, "reading from %s block %s\n",
462                        m_abs_cntrl_vec[i]->name(), block);
463                for (unsigned i = 0; i < size_in_bytes; ++i) {
464                    data[i] = block.getByte(i + startByte);
465                }
466                return true;
467            }
468        }
469    } else if (num_ro > 0 || num_rw == 1) {
470        // In Broadcast/Snoop protocols, this covers if you know the block
471        // exists somewhere in the caching hierarchy, then you want to read any
472        // valid RO or RW block.  In directory protocols, same thing, you want
473        // to read any valid readable copy of the block.
474        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
475                num_busy, num_ro, num_rw);
476        // In this loop, we try to figure which controller has a read only or
477        // a read write copy of the given address. Any valid copy would suffice
478        // for a functional read.
479        for (unsigned int i = 0;i < num_controllers;++i) {
480            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
481            if (access_perm == AccessPermission_Read_Only ||
482                access_perm == AccessPermission_Read_Write) {
483                DataBlock& block = m_abs_cntrl_vec[i]->
484                    getDataBlock(line_address);
485
486                DPRINTF(RubySystem, "reading from %s block %s\n",
487                        m_abs_cntrl_vec[i]->name(), block);
488                for (unsigned i = 0; i < size_in_bytes; ++i) {
489                    data[i] = block.getByte(i + startByte);
490                }
491                return true;
492            }
493        }
494    }
495
496    // Since we are here, this means that none of the controllers hold this
497    // address in a stable/base state. The function searches through all the
498    // buffers that exist in different cache, directory and memory
499    // controllers, and in the network components and reads the data portion
500    // of the first message that holds address specified in the packet.
501    for (unsigned int i = 0; i < num_controllers;++i) {
502        if (m_abs_cntrl_vec[i]->functionalReadBuffers(pkt)) {
503            return true;
504        }
505    }
506
507    for (unsigned int i = 0; i < m_memory_controller_vec.size(); ++i) {
508        if (m_memory_controller_vec[i]->functionalReadBuffers(pkt)) {
509            return true;
510        }
511    }
512
513    if (m_network_ptr->functionalRead(pkt)) {
514        return true;
515    }
516    return false;
517}
518
519// The function searches through all the buffers that exist in different
520// cache, directory and memory controllers, and in the network components
521// and writes the data portion of those that hold the address specified
522// in the packet.
523bool
524RubySystem::functionalWrite(PacketPtr pkt)
525{
526    Address addr(pkt->getAddr());
527    Address line_addr = line_address(addr);
528    AccessPermission access_perm = AccessPermission_NotPresent;
529    int num_controllers = m_abs_cntrl_vec.size();
530
531    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
532
533    uint8_t *data = pkt->getPtr<uint8_t>(true);
534    unsigned int size_in_bytes = pkt->getSize();
535    unsigned startByte = addr.getAddress() - line_addr.getAddress();
536
537    for (unsigned int i = 0; i < num_controllers;++i) {
538        m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
539
540        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
541        if (access_perm != AccessPermission_Invalid &&
542            access_perm != AccessPermission_NotPresent) {
543
544            DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr);
545            DPRINTF(RubySystem, "%s\n",block);
546            for (unsigned i = 0; i < size_in_bytes; ++i) {
547              block.setByte(i + startByte, data[i]);
548            }
549            DPRINTF(RubySystem, "%s\n",block);
550        }
551    }
552
553    uint32_t M5_VAR_USED num_functional_writes = 0;
554    for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) {
555        num_functional_writes +=
556            m_memory_controller_vec[i]->functionalWriteBuffers(pkt);
557    }
558
559    num_functional_writes += m_network_ptr->functionalWrite(pkt);
560    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
561
562    return true;
563}
564
565#ifdef CHECK_COHERENCE
566// This code will check for cases if the given cache block is exclusive in
567// one node and shared in another-- a coherence violation
568//
569// To use, the SLICC specification must call sequencer.checkCoherence(address)
570// when the controller changes to a state with new permissions.  Do this
571// in setState.  The SLICC spec must also define methods "isBlockShared"
572// and "isBlockExclusive" that are specific to that protocol
573//
574void
575RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
576{
577#if 0
578    NodeID exclusive = -1;
579    bool sharedDetected = false;
580    NodeID lastShared = -1;
581
582    for (int i = 0; i < m_chip_vector.size(); i++) {
583        if (m_chip_vector[i]->isBlockExclusive(addr)) {
584            if (exclusive != -1) {
585                // coherence violation
586                WARN_EXPR(exclusive);
587                WARN_EXPR(m_chip_vector[i]->getID());
588                WARN_EXPR(addr);
589                WARN_EXPR(getTime());
590                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
591            } else if (sharedDetected) {
592                WARN_EXPR(lastShared);
593                WARN_EXPR(m_chip_vector[i]->getID());
594                WARN_EXPR(addr);
595                WARN_EXPR(getTime());
596                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
597            } else {
598                exclusive = m_chip_vector[i]->getID();
599            }
600        } else if (m_chip_vector[i]->isBlockShared(addr)) {
601            sharedDetected = true;
602            lastShared = m_chip_vector[i]->getID();
603
604            if (exclusive != -1) {
605                WARN_EXPR(lastShared);
606                WARN_EXPR(exclusive);
607                WARN_EXPR(addr);
608                WARN_EXPR(getTime());
609                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
610            }
611        }
612    }
613#endif
614}
615#endif
616
617RubySystem *
618RubySystemParams::create()
619{
620    return new RubySystem(this);
621}
622
623/**
624 * virtual process function that is invoked when the callback
625 * queue is executed.
626 */
627void
628RubyExitCallback::process()
629{
630    std::ostream *os = simout.create(stats_filename);
631    ruby_system->printStats(*os);
632}
633