RubySystem.cc revision 9350
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33
34#include "base/intmath.hh"
35#include "base/statistics.hh"
36#include "debug/RubyCacheTrace.hh"
37#include "debug/RubySystem.hh"
38#include "mem/ruby/common/Address.hh"
39#include "mem/ruby/network/Network.hh"
40#include "mem/ruby/profiler/Profiler.hh"
41#include "mem/ruby/system/System.hh"
42#include "sim/eventq.hh"
43#include "sim/simulate.hh"
44
45using namespace std;
46
47int RubySystem::m_random_seed;
48bool RubySystem::m_randomization;
49int RubySystem::m_block_size_bytes;
50int RubySystem::m_block_size_bits;
51uint64 RubySystem::m_memory_size_bytes;
52int RubySystem::m_memory_size_bits;
53
54RubySystem::RubySystem(const Params *p)
55    : ClockedObject(p)
56{
57    if (g_system_ptr != NULL)
58        fatal("Only one RubySystem object currently allowed.\n");
59
60    m_random_seed = p->random_seed;
61    srandom(m_random_seed);
62    m_randomization = p->randomization;
63
64    m_block_size_bytes = p->block_size_bytes;
65    assert(isPowerOf2(m_block_size_bytes));
66    m_block_size_bits = floorLog2(m_block_size_bytes);
67
68    m_memory_size_bytes = p->mem_size;
69    if (m_memory_size_bytes == 0) {
70        m_memory_size_bits = 0;
71    } else {
72        m_memory_size_bits = floorLog2(m_memory_size_bytes);
73    }
74
75    g_system_ptr = this;
76    if (p->no_mem_vec) {
77        m_mem_vec_ptr = NULL;
78    } else {
79        m_mem_vec_ptr = new MemoryVector;
80        m_mem_vec_ptr->resize(m_memory_size_bytes);
81    }
82
83    // Print ruby configuration and stats at exit and when asked for
84    Stats::registerDumpCallback(new RubyDumpStatsCallback(p->stats_filename,
85                                                          this));
86
87    m_warmup_enabled = false;
88    m_cooldown_enabled = false;
89}
90
91void
92RubySystem::init()
93{
94    m_profiler_ptr->clearStats();
95    m_network_ptr->clearStats();
96}
97
98void
99RubySystem::registerNetwork(Network* network_ptr)
100{
101  m_network_ptr = network_ptr;
102}
103
104void
105RubySystem::registerProfiler(Profiler* profiler_ptr)
106{
107  m_profiler_ptr = profiler_ptr;
108}
109
110void
111RubySystem::registerAbstractController(AbstractController* cntrl)
112{
113  m_abs_cntrl_vec.push_back(cntrl);
114}
115
116void
117RubySystem::registerSparseMemory(SparseMemory* s)
118{
119    m_sparse_memory_vector.push_back(s);
120}
121
122void
123RubySystem::registerMemController(MemoryControl *mc) {
124    m_memory_controller_vec.push_back(mc);
125}
126
127RubySystem::~RubySystem()
128{
129    delete m_network_ptr;
130    delete m_profiler_ptr;
131    if (m_mem_vec_ptr)
132        delete m_mem_vec_ptr;
133}
134
135void
136RubySystem::printStats(ostream& out)
137{
138    const time_t T = time(NULL);
139    tm *localTime = localtime(&T);
140    char buf[100];
141    strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime);
142
143    out << "Real time: " << buf << endl;
144
145    m_profiler_ptr->printStats(out);
146    m_network_ptr->printStats(out);
147}
148
149void
150RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
151                                 uint64 uncompressed_trace_size)
152{
153    // Create the checkpoint file for the memory
154    string thefile = Checkpoint::dir() + "/" + filename.c_str();
155
156    int fd = creat(thefile.c_str(), 0664);
157    if (fd < 0) {
158        perror("creat");
159        fatal("Can't open memory trace file '%s'\n", filename);
160    }
161
162    gzFile compressedMemory = gzdopen(fd, "wb");
163    if (compressedMemory == NULL)
164        fatal("Insufficient memory to allocate compression state for %s\n",
165              filename);
166
167    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
168        uncompressed_trace_size) {
169        fatal("Write failed on memory trace file '%s'\n", filename);
170    }
171
172    if (gzclose(compressedMemory)) {
173        fatal("Close failed on memory trace file '%s'\n", filename);
174    }
175    delete raw_data;
176}
177
178void
179RubySystem::serialize(std::ostream &os)
180{
181    m_cooldown_enabled = true;
182
183    vector<Sequencer*> sequencer_map;
184    Sequencer* sequencer_ptr = NULL;
185    int cntrl_id = -1;
186
187
188    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
189        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
190        if (sequencer_ptr == NULL) {
191            sequencer_ptr = sequencer_map[cntrl];
192            cntrl_id = cntrl;
193        }
194    }
195
196    assert(sequencer_ptr != NULL);
197
198    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
199        if (sequencer_map[cntrl] == NULL) {
200            sequencer_map[cntrl] = sequencer_ptr;
201        }
202    }
203
204    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
205    // Create the CacheRecorder and record the cache trace
206    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
207
208    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
209        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
210    }
211
212    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
213    // save the current tick value
214    Tick curtick_original = curTick();
215    // save the event queue head
216    Event* eventq_head = eventq->replaceHead(NULL);
217    DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
218            curtick_original);
219
220    // Schedule an event to start cache cooldown
221    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
222    enqueueRubyEvent(curTick());
223    simulate();
224    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
225
226    // Restore eventq head
227    eventq_head = eventq->replaceHead(eventq_head);
228    // Restore curTick
229    curTick(curtick_original);
230
231    uint8_t *raw_data = NULL;
232
233    if (m_mem_vec_ptr != NULL) {
234        uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
235
236        string memory_trace_file = name() + ".memory.gz";
237        writeCompressedTrace(raw_data, memory_trace_file,
238                             memory_trace_size);
239
240        SERIALIZE_SCALAR(memory_trace_file);
241        SERIALIZE_SCALAR(memory_trace_size);
242
243    } else {
244        for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
245            m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
246                                                    m_cache_recorder);
247        }
248    }
249
250    // Aggergate the trace entries together into a single array
251    raw_data = new uint8_t[4096];
252    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
253                                                                 4096);
254    string cache_trace_file = name() + ".cache.gz";
255    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
256
257    SERIALIZE_SCALAR(cache_trace_file);
258    SERIALIZE_SCALAR(cache_trace_size);
259
260    m_cooldown_enabled = false;
261}
262
263void
264RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
265                                uint64& uncompressed_trace_size)
266{
267    // Read the trace file
268    gzFile compressedTrace;
269
270    // trace file
271    int fd = open(filename.c_str(), O_RDONLY);
272    if (fd < 0) {
273        perror("open");
274        fatal("Unable to open trace file %s", filename);
275    }
276
277    compressedTrace = gzdopen(fd, "rb");
278    if (compressedTrace == NULL) {
279        fatal("Insufficient memory to allocate compression state for %s\n",
280              filename);
281    }
282
283    raw_data = new uint8_t[uncompressed_trace_size];
284    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
285            uncompressed_trace_size) {
286        fatal("Unable to read complete trace from file %s\n", filename);
287    }
288
289    if (gzclose(compressedTrace)) {
290        fatal("Failed to close cache trace file '%s'\n", filename);
291    }
292}
293
294void
295RubySystem::unserialize(Checkpoint *cp, const string &section)
296{
297    //
298    // The main purpose for clearing stats in the unserialize process is so
299    // that the profiler can correctly set its start time to the unserialized
300    // value of curTick()
301    //
302    resetStats();
303    uint8_t *uncompressed_trace = NULL;
304
305    if (m_mem_vec_ptr != NULL) {
306        string memory_trace_file;
307        uint64 memory_trace_size = 0;
308
309        UNSERIALIZE_SCALAR(memory_trace_file);
310        UNSERIALIZE_SCALAR(memory_trace_size);
311        memory_trace_file = cp->cptDir + "/" + memory_trace_file;
312
313        readCompressedTrace(memory_trace_file, uncompressed_trace,
314                            memory_trace_size);
315        m_mem_vec_ptr->populatePages(uncompressed_trace);
316
317        delete uncompressed_trace;
318        uncompressed_trace = NULL;
319    }
320
321    string cache_trace_file;
322    uint64 cache_trace_size = 0;
323
324    UNSERIALIZE_SCALAR(cache_trace_file);
325    UNSERIALIZE_SCALAR(cache_trace_size);
326    cache_trace_file = cp->cptDir + "/" + cache_trace_file;
327
328    readCompressedTrace(cache_trace_file, uncompressed_trace,
329                        cache_trace_size);
330    m_warmup_enabled = true;
331
332    vector<Sequencer*> sequencer_map;
333    Sequencer* t = NULL;
334    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
335        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
336        if (t == NULL) t = sequencer_map[cntrl];
337    }
338
339    assert(t != NULL);
340
341    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
342        if (sequencer_map[cntrl] == NULL) {
343            sequencer_map[cntrl] = t;
344        }
345    }
346
347    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
348                                         sequencer_map);
349}
350
351void
352RubySystem::startup()
353{
354    if (m_warmup_enabled) {
355        // save the current tick value
356        Tick curtick_original = curTick();
357        // save the event queue head
358        Event* eventq_head = eventq->replaceHead(NULL);
359        // set curTick to 0 and reset Ruby System's clock
360        curTick(0);
361        resetClock();
362
363        // Schedule an event to start cache warmup
364        enqueueRubyEvent(curTick());
365        simulate();
366
367        delete m_cache_recorder;
368        m_cache_recorder = NULL;
369        m_warmup_enabled = false;
370
371        // reset DRAM so that it's not waiting for events on the old event
372        // queue
373        for (int i = 0; i < m_memory_controller_vec.size(); ++i) {
374            m_memory_controller_vec[i]->reset();
375        }
376
377        // Restore eventq head
378        eventq_head = eventq->replaceHead(eventq_head);
379        // Restore curTick and Ruby System's clock
380        curTick(curtick_original);
381        resetClock();
382    }
383}
384
385void
386RubySystem::RubyEvent::process()
387{
388    if (ruby_system->m_warmup_enabled) {
389        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
390    }  else if (ruby_system->m_cooldown_enabled) {
391        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
392    }
393}
394
395void
396RubySystem::resetStats()
397{
398    m_profiler_ptr->clearStats();
399    m_network_ptr->clearStats();
400}
401
402bool
403RubySystem::functionalRead(PacketPtr pkt)
404{
405    Address address(pkt->getAddr());
406    Address line_address(address);
407    line_address.makeLineAddress();
408
409    AccessPermission access_perm = AccessPermission_NotPresent;
410    int num_controllers = m_abs_cntrl_vec.size();
411
412    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
413
414    unsigned int num_ro = 0;
415    unsigned int num_rw = 0;
416    unsigned int num_busy = 0;
417    unsigned int num_backing_store = 0;
418    unsigned int num_invalid = 0;
419
420    // In this loop we count the number of controllers that have the given
421    // address in read only, read write and busy states.
422    for (unsigned int i = 0; i < num_controllers; ++i) {
423        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
424        if (access_perm == AccessPermission_Read_Only)
425            num_ro++;
426        else if (access_perm == AccessPermission_Read_Write)
427            num_rw++;
428        else if (access_perm == AccessPermission_Busy)
429            num_busy++;
430        else if (access_perm == AccessPermission_Backing_Store)
431            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
432            // to represent blocks in memory *for Broadcast/Snooping protocols*,
433            // where memory has no idea whether it has an exclusive copy of data
434            // or not.
435            num_backing_store++;
436        else if (access_perm == AccessPermission_Invalid ||
437                 access_perm == AccessPermission_NotPresent)
438            num_invalid++;
439    }
440    assert(num_rw <= 1);
441
442    uint8_t *data = pkt->getPtr<uint8_t>(true);
443    unsigned int size_in_bytes = pkt->getSize();
444    unsigned startByte = address.getAddress() - line_address.getAddress();
445
446    // This if case is meant to capture what happens in a Broadcast/Snoop
447    // protocol where the block does not exist in the cache hierarchy. You
448    // only want to read from the Backing_Store memory if there is no copy in
449    // the cache hierarchy, otherwise you want to try to read the RO or RW
450    // copies existing in the cache hierarchy (covered by the else statement).
451    // The reason is because the Backing_Store memory could easily be stale, if
452    // there are copies floating around the cache hierarchy, so you want to read
453    // it only if it's not in the cache hierarchy at all.
454    if (num_invalid == (num_controllers - 1) &&
455            num_backing_store == 1) {
456        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
457        for (unsigned int i = 0; i < num_controllers; ++i) {
458            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
459            if (access_perm == AccessPermission_Backing_Store) {
460                DataBlock& block = m_abs_cntrl_vec[i]->
461                    getDataBlock(line_address);
462
463                DPRINTF(RubySystem, "reading from %s block %s\n",
464                        m_abs_cntrl_vec[i]->name(), block);
465                for (unsigned i = 0; i < size_in_bytes; ++i) {
466                    data[i] = block.getByte(i + startByte);
467                }
468                return true;
469            }
470        }
471    } else if (num_ro > 0 || num_rw == 1) {
472        // In Broadcast/Snoop protocols, this covers if you know the block
473        // exists somewhere in the caching hierarchy, then you want to read any
474        // valid RO or RW block.  In directory protocols, same thing, you want
475        // to read any valid readable copy of the block.
476        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
477                num_busy, num_ro, num_rw);
478        // In this loop, we try to figure which controller has a read only or
479        // a read write copy of the given address. Any valid copy would suffice
480        // for a functional read.
481        for (unsigned int i = 0;i < num_controllers;++i) {
482            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
483            if (access_perm == AccessPermission_Read_Only ||
484                access_perm == AccessPermission_Read_Write) {
485                DataBlock& block = m_abs_cntrl_vec[i]->
486                    getDataBlock(line_address);
487
488                DPRINTF(RubySystem, "reading from %s block %s\n",
489                        m_abs_cntrl_vec[i]->name(), block);
490                for (unsigned i = 0; i < size_in_bytes; ++i) {
491                    data[i] = block.getByte(i + startByte);
492                }
493                return true;
494            }
495        }
496    }
497
498    // Since we are here, this means that none of the controllers hold this
499    // address in a stable/base state. The function searches through all the
500    // buffers that exist in different cache, directory and memory
501    // controllers, and in the network components and reads the data portion
502    // of the first message that holds address specified in the packet.
503    for (unsigned int i = 0; i < num_controllers;++i) {
504        if (m_abs_cntrl_vec[i]->functionalReadBuffers(pkt)) {
505            return true;
506        }
507    }
508
509    for (unsigned int i = 0; i < m_memory_controller_vec.size(); ++i) {
510        if (m_memory_controller_vec[i]->functionalReadBuffers(pkt)) {
511            return true;
512        }
513    }
514
515    if (m_network_ptr->functionalRead(pkt)) {
516        return true;
517    }
518    return false;
519}
520
521// The function searches through all the buffers that exist in different
522// cache, directory and memory controllers, and in the network components
523// and writes the data portion of those that hold the address specified
524// in the packet.
525bool
526RubySystem::functionalWrite(PacketPtr pkt)
527{
528    Address addr(pkt->getAddr());
529    Address line_addr = line_address(addr);
530    AccessPermission access_perm = AccessPermission_NotPresent;
531    int num_controllers = m_abs_cntrl_vec.size();
532
533    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
534
535    uint8_t *data = pkt->getPtr<uint8_t>(true);
536    unsigned int size_in_bytes = pkt->getSize();
537    unsigned startByte = addr.getAddress() - line_addr.getAddress();
538
539    for (unsigned int i = 0; i < num_controllers;++i) {
540        m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
541
542        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
543        if (access_perm != AccessPermission_Invalid &&
544            access_perm != AccessPermission_NotPresent) {
545
546            DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr);
547            DPRINTF(RubySystem, "%s\n",block);
548            for (unsigned i = 0; i < size_in_bytes; ++i) {
549              block.setByte(i + startByte, data[i]);
550            }
551            DPRINTF(RubySystem, "%s\n",block);
552        }
553    }
554
555    uint32_t M5_VAR_USED num_functional_writes = 0;
556    for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) {
557        num_functional_writes +=
558            m_memory_controller_vec[i]->functionalWriteBuffers(pkt);
559    }
560
561    num_functional_writes += m_network_ptr->functionalWrite(pkt);
562    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
563
564    return true;
565}
566
567#ifdef CHECK_COHERENCE
568// This code will check for cases if the given cache block is exclusive in
569// one node and shared in another-- a coherence violation
570//
571// To use, the SLICC specification must call sequencer.checkCoherence(address)
572// when the controller changes to a state with new permissions.  Do this
573// in setState.  The SLICC spec must also define methods "isBlockShared"
574// and "isBlockExclusive" that are specific to that protocol
575//
576void
577RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
578{
579#if 0
580    NodeID exclusive = -1;
581    bool sharedDetected = false;
582    NodeID lastShared = -1;
583
584    for (int i = 0; i < m_chip_vector.size(); i++) {
585        if (m_chip_vector[i]->isBlockExclusive(addr)) {
586            if (exclusive != -1) {
587                // coherence violation
588                WARN_EXPR(exclusive);
589                WARN_EXPR(m_chip_vector[i]->getID());
590                WARN_EXPR(addr);
591                WARN_EXPR(getTime());
592                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
593            } else if (sharedDetected) {
594                WARN_EXPR(lastShared);
595                WARN_EXPR(m_chip_vector[i]->getID());
596                WARN_EXPR(addr);
597                WARN_EXPR(getTime());
598                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
599            } else {
600                exclusive = m_chip_vector[i]->getID();
601            }
602        } else if (m_chip_vector[i]->isBlockShared(addr)) {
603            sharedDetected = true;
604            lastShared = m_chip_vector[i]->getID();
605
606            if (exclusive != -1) {
607                WARN_EXPR(lastShared);
608                WARN_EXPR(exclusive);
609                WARN_EXPR(addr);
610                WARN_EXPR(getTime());
611                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
612            }
613        }
614    }
615#endif
616}
617#endif
618
619RubySystem *
620RubySystemParams::create()
621{
622    return new RubySystem(this);
623}
624
625/**
626 * virtual process function that is invoked when the callback
627 * queue is executed.
628 */
629void
630RubyDumpStatsCallback::process()
631{
632    ruby_system->printStats(*os);
633}
634