RubySystem.cc revision 9670
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33
34#include "base/intmath.hh"
35#include "base/statistics.hh"
36#include "debug/RubyCacheTrace.hh"
37#include "debug/RubySystem.hh"
38#include "mem/ruby/common/Address.hh"
39#include "mem/ruby/network/Network.hh"
40#include "mem/ruby/profiler/Profiler.hh"
41#include "mem/ruby/system/System.hh"
42#include "sim/eventq.hh"
43#include "sim/simulate.hh"
44
45using namespace std;
46
47int RubySystem::m_random_seed;
48bool RubySystem::m_randomization;
49uint32_t RubySystem::m_block_size_bytes;
50uint32_t RubySystem::m_block_size_bits;
51uint64_t RubySystem::m_memory_size_bytes;
52uint32_t RubySystem::m_memory_size_bits;
53
54RubySystem::RubySystem(const Params *p)
55    : ClockedObject(p)
56{
57    if (g_system_ptr != NULL)
58        fatal("Only one RubySystem object currently allowed.\n");
59
60    m_random_seed = p->random_seed;
61    srandom(m_random_seed);
62    m_randomization = p->randomization;
63
64    m_block_size_bytes = p->block_size_bytes;
65    assert(isPowerOf2(m_block_size_bytes));
66    m_block_size_bits = floorLog2(m_block_size_bytes);
67
68    m_memory_size_bytes = p->mem_size;
69    if (m_memory_size_bytes == 0) {
70        m_memory_size_bits = 0;
71    } else {
72        m_memory_size_bits = ceilLog2(m_memory_size_bytes);
73    }
74
75    if (p->no_mem_vec) {
76        m_mem_vec_ptr = NULL;
77    } else {
78        m_mem_vec_ptr = new MemoryVector;
79        m_mem_vec_ptr->resize(m_memory_size_bytes);
80    }
81
82    // Print ruby configuration and stats at exit and when asked for
83    Stats::registerDumpCallback(new RubyDumpStatsCallback(p->stats_filename,
84                                                          this));
85
86    m_warmup_enabled = false;
87    m_cooldown_enabled = false;
88
89    // Setup the global variables used in Ruby
90    g_system_ptr = this;
91
92    // Resize to the size of different machine types
93    g_abs_controls.resize(MachineType_NUM);
94}
95
96void
97RubySystem::registerNetwork(Network* network_ptr)
98{
99  m_network_ptr = network_ptr;
100}
101
102void
103RubySystem::registerProfiler(Profiler* profiler_ptr)
104{
105  m_profiler_ptr = profiler_ptr;
106}
107
108void
109RubySystem::registerAbstractController(AbstractController* cntrl)
110{
111  m_abs_cntrl_vec.push_back(cntrl);
112
113  MachineID id = cntrl->getMachineID();
114  g_abs_controls[id.getType()][id.getNum()] = cntrl;
115}
116
117void
118RubySystem::registerSparseMemory(SparseMemory* s)
119{
120    m_sparse_memory_vector.push_back(s);
121}
122
123void
124RubySystem::registerMemController(MemoryControl *mc) {
125    m_memory_controller_vec.push_back(mc);
126}
127
128RubySystem::~RubySystem()
129{
130    delete m_network_ptr;
131    delete m_profiler_ptr;
132    if (m_mem_vec_ptr)
133        delete m_mem_vec_ptr;
134}
135
136void
137RubySystem::printStats(ostream& out)
138{
139    const time_t T = time(NULL);
140    tm *localTime = localtime(&T);
141    char buf[100];
142    strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime);
143
144    out << "Real time: " << buf << endl;
145
146    m_profiler_ptr->printStats(out);
147    m_network_ptr->printStats(out);
148
149    for (uint32_t i = 0;i < g_abs_controls.size(); ++i) {
150        for (map<uint32_t, AbstractController *>::iterator it =
151                g_abs_controls[i].begin();
152             it != g_abs_controls[i].end(); ++it) {
153
154            ((*it).second)->printStats(out);
155        }
156    }
157}
158
159void
160RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
161                                 uint64 uncompressed_trace_size)
162{
163    // Create the checkpoint file for the memory
164    string thefile = Checkpoint::dir() + "/" + filename.c_str();
165
166    int fd = creat(thefile.c_str(), 0664);
167    if (fd < 0) {
168        perror("creat");
169        fatal("Can't open memory trace file '%s'\n", filename);
170    }
171
172    gzFile compressedMemory = gzdopen(fd, "wb");
173    if (compressedMemory == NULL)
174        fatal("Insufficient memory to allocate compression state for %s\n",
175              filename);
176
177    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
178        uncompressed_trace_size) {
179        fatal("Write failed on memory trace file '%s'\n", filename);
180    }
181
182    if (gzclose(compressedMemory)) {
183        fatal("Close failed on memory trace file '%s'\n", filename);
184    }
185    delete raw_data;
186}
187
188void
189RubySystem::serialize(std::ostream &os)
190{
191    m_cooldown_enabled = true;
192
193    vector<Sequencer*> sequencer_map;
194    Sequencer* sequencer_ptr = NULL;
195    int cntrl_id = -1;
196
197
198    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
199        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
200        if (sequencer_ptr == NULL) {
201            sequencer_ptr = sequencer_map[cntrl];
202            cntrl_id = cntrl;
203        }
204    }
205
206    assert(sequencer_ptr != NULL);
207
208    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
209        if (sequencer_map[cntrl] == NULL) {
210            sequencer_map[cntrl] = sequencer_ptr;
211        }
212    }
213
214    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
215    // Create the CacheRecorder and record the cache trace
216    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
217
218    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
219        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
220    }
221
222    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
223    // save the current tick value
224    Tick curtick_original = curTick();
225    // save the event queue head
226    Event* eventq_head = eventq->replaceHead(NULL);
227    DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
228            curtick_original);
229
230    // Schedule an event to start cache cooldown
231    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
232    enqueueRubyEvent(curTick());
233    simulate();
234    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
235
236    // Restore eventq head
237    eventq_head = eventq->replaceHead(eventq_head);
238    // Restore curTick
239    setCurTick(curtick_original);
240
241    uint8_t *raw_data = NULL;
242
243    if (m_mem_vec_ptr != NULL) {
244        uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
245
246        string memory_trace_file = name() + ".memory.gz";
247        writeCompressedTrace(raw_data, memory_trace_file,
248                             memory_trace_size);
249
250        SERIALIZE_SCALAR(memory_trace_file);
251        SERIALIZE_SCALAR(memory_trace_size);
252
253    } else {
254        for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
255            m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
256                                                    m_cache_recorder);
257        }
258    }
259
260    // Aggergate the trace entries together into a single array
261    raw_data = new uint8_t[4096];
262    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
263                                                                 4096);
264    string cache_trace_file = name() + ".cache.gz";
265    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
266
267    SERIALIZE_SCALAR(cache_trace_file);
268    SERIALIZE_SCALAR(cache_trace_size);
269
270    m_cooldown_enabled = false;
271}
272
273void
274RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
275                                uint64& uncompressed_trace_size)
276{
277    // Read the trace file
278    gzFile compressedTrace;
279
280    // trace file
281    int fd = open(filename.c_str(), O_RDONLY);
282    if (fd < 0) {
283        perror("open");
284        fatal("Unable to open trace file %s", filename);
285    }
286
287    compressedTrace = gzdopen(fd, "rb");
288    if (compressedTrace == NULL) {
289        fatal("Insufficient memory to allocate compression state for %s\n",
290              filename);
291    }
292
293    raw_data = new uint8_t[uncompressed_trace_size];
294    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
295            uncompressed_trace_size) {
296        fatal("Unable to read complete trace from file %s\n", filename);
297    }
298
299    if (gzclose(compressedTrace)) {
300        fatal("Failed to close cache trace file '%s'\n", filename);
301    }
302}
303
304void
305RubySystem::unserialize(Checkpoint *cp, const string &section)
306{
307    uint8_t *uncompressed_trace = NULL;
308
309    if (m_mem_vec_ptr != NULL) {
310        string memory_trace_file;
311        uint64 memory_trace_size = 0;
312
313        UNSERIALIZE_SCALAR(memory_trace_file);
314        UNSERIALIZE_SCALAR(memory_trace_size);
315        memory_trace_file = cp->cptDir + "/" + memory_trace_file;
316
317        readCompressedTrace(memory_trace_file, uncompressed_trace,
318                            memory_trace_size);
319        m_mem_vec_ptr->populatePages(uncompressed_trace);
320
321        delete [] uncompressed_trace;
322        uncompressed_trace = NULL;
323    }
324
325    string cache_trace_file;
326    uint64 cache_trace_size = 0;
327
328    UNSERIALIZE_SCALAR(cache_trace_file);
329    UNSERIALIZE_SCALAR(cache_trace_size);
330    cache_trace_file = cp->cptDir + "/" + cache_trace_file;
331
332    readCompressedTrace(cache_trace_file, uncompressed_trace,
333                        cache_trace_size);
334    m_warmup_enabled = true;
335
336    vector<Sequencer*> sequencer_map;
337    Sequencer* t = NULL;
338    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
339        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
340        if (t == NULL) t = sequencer_map[cntrl];
341    }
342
343    assert(t != NULL);
344
345    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
346        if (sequencer_map[cntrl] == NULL) {
347            sequencer_map[cntrl] = t;
348        }
349    }
350
351    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
352                                         sequencer_map);
353}
354
355void
356RubySystem::startup()
357{
358
359    // Ruby restores state from a checkpoint by resetting the clock to 0 and
360    // playing the requests that can possibly re-generate the cache state.
361    // The clock value is set to the actual checkpointed value once all the
362    // requests have been executed.
363    //
364    // This way of restoring state is pretty finicky. For example, if a
365    // Ruby component reads time before the state has been restored, it would
366    // cache this value and hence its clock would not be reset to 0, when
367    // Ruby resets the global clock. This can potentially result in a
368    // deadlock.
369    //
370    // The solution is that no Ruby component should read time before the
371    // simulation starts. And then one also needs to hope that the time
372    // Ruby finishes restoring the state is less than the time when the
373    // state was checkpointed.
374
375    if (m_warmup_enabled) {
376        // save the current tick value
377        Tick curtick_original = curTick();
378        // save the event queue head
379        Event* eventq_head = eventq->replaceHead(NULL);
380        // set curTick to 0 and reset Ruby System's clock
381        setCurTick(0);
382        resetClock();
383
384        // Schedule an event to start cache warmup
385        enqueueRubyEvent(curTick());
386        simulate();
387
388        delete m_cache_recorder;
389        m_cache_recorder = NULL;
390        m_warmup_enabled = false;
391
392        // reset DRAM so that it's not waiting for events on the old event
393        // queue
394        for (int i = 0; i < m_memory_controller_vec.size(); ++i) {
395            m_memory_controller_vec[i]->reset();
396        }
397
398        // Restore eventq head
399        eventq_head = eventq->replaceHead(eventq_head);
400        // Restore curTick and Ruby System's clock
401        setCurTick(curtick_original);
402        resetClock();
403    }
404
405    resetStats();
406}
407
408void
409RubySystem::RubyEvent::process()
410{
411    if (ruby_system->m_warmup_enabled) {
412        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
413    }  else if (ruby_system->m_cooldown_enabled) {
414        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
415    }
416}
417
418void
419RubySystem::resetStats()
420{
421    m_profiler_ptr->clearStats();
422    m_network_ptr->clearStats();
423    for (uint32_t cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
424        m_abs_cntrl_vec[cntrl]->clearStats();
425    }
426
427    g_ruby_start = curCycle();
428}
429
430bool
431RubySystem::functionalRead(PacketPtr pkt)
432{
433    Address address(pkt->getAddr());
434    Address line_address(address);
435    line_address.makeLineAddress();
436
437    AccessPermission access_perm = AccessPermission_NotPresent;
438    int num_controllers = m_abs_cntrl_vec.size();
439
440    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
441
442    unsigned int num_ro = 0;
443    unsigned int num_rw = 0;
444    unsigned int num_busy = 0;
445    unsigned int num_backing_store = 0;
446    unsigned int num_invalid = 0;
447
448    // In this loop we count the number of controllers that have the given
449    // address in read only, read write and busy states.
450    for (unsigned int i = 0; i < num_controllers; ++i) {
451        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
452        if (access_perm == AccessPermission_Read_Only)
453            num_ro++;
454        else if (access_perm == AccessPermission_Read_Write)
455            num_rw++;
456        else if (access_perm == AccessPermission_Busy)
457            num_busy++;
458        else if (access_perm == AccessPermission_Backing_Store)
459            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
460            // to represent blocks in memory *for Broadcast/Snooping protocols*,
461            // where memory has no idea whether it has an exclusive copy of data
462            // or not.
463            num_backing_store++;
464        else if (access_perm == AccessPermission_Invalid ||
465                 access_perm == AccessPermission_NotPresent)
466            num_invalid++;
467    }
468    assert(num_rw <= 1);
469
470    uint8_t *data = pkt->getPtr<uint8_t>(true);
471    unsigned int size_in_bytes = pkt->getSize();
472    unsigned startByte = address.getAddress() - line_address.getAddress();
473
474    // This if case is meant to capture what happens in a Broadcast/Snoop
475    // protocol where the block does not exist in the cache hierarchy. You
476    // only want to read from the Backing_Store memory if there is no copy in
477    // the cache hierarchy, otherwise you want to try to read the RO or RW
478    // copies existing in the cache hierarchy (covered by the else statement).
479    // The reason is because the Backing_Store memory could easily be stale, if
480    // there are copies floating around the cache hierarchy, so you want to read
481    // it only if it's not in the cache hierarchy at all.
482    if (num_invalid == (num_controllers - 1) &&
483            num_backing_store == 1) {
484        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
485        for (unsigned int i = 0; i < num_controllers; ++i) {
486            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
487            if (access_perm == AccessPermission_Backing_Store) {
488                DataBlock& block = m_abs_cntrl_vec[i]->
489                    getDataBlock(line_address);
490
491                DPRINTF(RubySystem, "reading from %s block %s\n",
492                        m_abs_cntrl_vec[i]->name(), block);
493                for (unsigned j = 0; j < size_in_bytes; ++j) {
494                    data[j] = block.getByte(j + startByte);
495                }
496                return true;
497            }
498        }
499    } else if (num_ro > 0 || num_rw == 1) {
500        // In Broadcast/Snoop protocols, this covers if you know the block
501        // exists somewhere in the caching hierarchy, then you want to read any
502        // valid RO or RW block.  In directory protocols, same thing, you want
503        // to read any valid readable copy of the block.
504        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
505                num_busy, num_ro, num_rw);
506        // In this loop, we try to figure which controller has a read only or
507        // a read write copy of the given address. Any valid copy would suffice
508        // for a functional read.
509        for (unsigned int i = 0;i < num_controllers;++i) {
510            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
511            if (access_perm == AccessPermission_Read_Only ||
512                access_perm == AccessPermission_Read_Write) {
513                DataBlock& block = m_abs_cntrl_vec[i]->
514                    getDataBlock(line_address);
515
516                DPRINTF(RubySystem, "reading from %s block %s\n",
517                        m_abs_cntrl_vec[i]->name(), block);
518                for (unsigned j = 0; j < size_in_bytes; ++j) {
519                    data[j] = block.getByte(j + startByte);
520                }
521                return true;
522            }
523        }
524    }
525
526    return false;
527}
528
529// The function searches through all the buffers that exist in different
530// cache, directory and memory controllers, and in the network components
531// and writes the data portion of those that hold the address specified
532// in the packet.
533bool
534RubySystem::functionalWrite(PacketPtr pkt)
535{
536    Address addr(pkt->getAddr());
537    Address line_addr = line_address(addr);
538    AccessPermission access_perm = AccessPermission_NotPresent;
539    int num_controllers = m_abs_cntrl_vec.size();
540
541    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
542
543    uint8_t *data = pkt->getPtr<uint8_t>(true);
544    unsigned int size_in_bytes = pkt->getSize();
545    unsigned startByte = addr.getAddress() - line_addr.getAddress();
546
547    uint32_t M5_VAR_USED num_functional_writes = 0;
548
549    for (unsigned int i = 0; i < num_controllers;++i) {
550        num_functional_writes +=
551            m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
552
553        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
554        if (access_perm != AccessPermission_Invalid &&
555            access_perm != AccessPermission_NotPresent) {
556
557            num_functional_writes++;
558
559            DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr);
560            DPRINTF(RubySystem, "%s\n",block);
561            for (unsigned j = 0; j < size_in_bytes; ++j) {
562              block.setByte(j + startByte, data[j]);
563            }
564            DPRINTF(RubySystem, "%s\n",block);
565        }
566    }
567
568    for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) {
569        num_functional_writes +=
570            m_memory_controller_vec[i]->functionalWriteBuffers(pkt);
571    }
572
573    num_functional_writes += m_network_ptr->functionalWrite(pkt);
574    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
575
576    return true;
577}
578
579#ifdef CHECK_COHERENCE
580// This code will check for cases if the given cache block is exclusive in
581// one node and shared in another-- a coherence violation
582//
583// To use, the SLICC specification must call sequencer.checkCoherence(address)
584// when the controller changes to a state with new permissions.  Do this
585// in setState.  The SLICC spec must also define methods "isBlockShared"
586// and "isBlockExclusive" that are specific to that protocol
587//
588void
589RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
590{
591#if 0
592    NodeID exclusive = -1;
593    bool sharedDetected = false;
594    NodeID lastShared = -1;
595
596    for (int i = 0; i < m_chip_vector.size(); i++) {
597        if (m_chip_vector[i]->isBlockExclusive(addr)) {
598            if (exclusive != -1) {
599                // coherence violation
600                WARN_EXPR(exclusive);
601                WARN_EXPR(m_chip_vector[i]->getID());
602                WARN_EXPR(addr);
603                WARN_EXPR(getTime());
604                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
605            } else if (sharedDetected) {
606                WARN_EXPR(lastShared);
607                WARN_EXPR(m_chip_vector[i]->getID());
608                WARN_EXPR(addr);
609                WARN_EXPR(getTime());
610                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
611            } else {
612                exclusive = m_chip_vector[i]->getID();
613            }
614        } else if (m_chip_vector[i]->isBlockShared(addr)) {
615            sharedDetected = true;
616            lastShared = m_chip_vector[i]->getID();
617
618            if (exclusive != -1) {
619                WARN_EXPR(lastShared);
620                WARN_EXPR(exclusive);
621                WARN_EXPR(addr);
622                WARN_EXPR(getTime());
623                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
624            }
625        }
626    }
627#endif
628}
629#endif
630
631RubySystem *
632RubySystemParams::create()
633{
634    return new RubySystem(this);
635}
636
637/**
638 * virtual process function that is invoked when the callback
639 * queue is executed.
640 */
641void
642RubyDumpStatsCallback::process()
643{
644    ruby_system->printStats(*os);
645}
646