RubySystem.cc revision 9572
1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <fcntl.h>
30#include <zlib.h>
31
32#include <cstdio>
33
34#include "base/intmath.hh"
35#include "base/statistics.hh"
36#include "debug/RubyCacheTrace.hh"
37#include "debug/RubySystem.hh"
38#include "mem/ruby/common/Address.hh"
39#include "mem/ruby/network/Network.hh"
40#include "mem/ruby/profiler/Profiler.hh"
41#include "mem/ruby/system/System.hh"
42#include "sim/eventq.hh"
43#include "sim/simulate.hh"
44
45using namespace std;
46
47int RubySystem::m_random_seed;
48bool RubySystem::m_randomization;
49uint32_t RubySystem::m_block_size_bytes;
50uint32_t RubySystem::m_block_size_bits;
51uint64_t RubySystem::m_memory_size_bytes;
52uint32_t RubySystem::m_memory_size_bits;
53
54RubySystem::RubySystem(const Params *p)
55    : ClockedObject(p)
56{
57    if (g_system_ptr != NULL)
58        fatal("Only one RubySystem object currently allowed.\n");
59
60    m_random_seed = p->random_seed;
61    srandom(m_random_seed);
62    m_randomization = p->randomization;
63
64    m_block_size_bytes = p->block_size_bytes;
65    assert(isPowerOf2(m_block_size_bytes));
66    m_block_size_bits = floorLog2(m_block_size_bytes);
67
68    m_memory_size_bytes = p->mem_size;
69    if (m_memory_size_bytes == 0) {
70        m_memory_size_bits = 0;
71    } else {
72        m_memory_size_bits = ceilLog2(m_memory_size_bytes);
73    }
74
75    if (p->no_mem_vec) {
76        m_mem_vec_ptr = NULL;
77    } else {
78        m_mem_vec_ptr = new MemoryVector;
79        m_mem_vec_ptr->resize(m_memory_size_bytes);
80    }
81
82    // Print ruby configuration and stats at exit and when asked for
83    Stats::registerDumpCallback(new RubyDumpStatsCallback(p->stats_filename,
84                                                          this));
85
86    m_warmup_enabled = false;
87    m_cooldown_enabled = false;
88
89    // Setup the global variables used in Ruby
90    g_system_ptr = this;
91
92    // Resize to the size of different machine types
93    g_abs_controls.resize(MachineType_NUM);
94}
95
96void
97RubySystem::init()
98{
99    m_profiler_ptr->clearStats();
100    m_network_ptr->clearStats();
101}
102
103void
104RubySystem::registerNetwork(Network* network_ptr)
105{
106  m_network_ptr = network_ptr;
107}
108
109void
110RubySystem::registerProfiler(Profiler* profiler_ptr)
111{
112  m_profiler_ptr = profiler_ptr;
113}
114
115void
116RubySystem::registerAbstractController(AbstractController* cntrl)
117{
118  m_abs_cntrl_vec.push_back(cntrl);
119
120  MachineID id = cntrl->getMachineID();
121  g_abs_controls[id.getType()][id.getNum()] = cntrl;
122}
123
124void
125RubySystem::registerSparseMemory(SparseMemory* s)
126{
127    m_sparse_memory_vector.push_back(s);
128}
129
130void
131RubySystem::registerMemController(MemoryControl *mc) {
132    m_memory_controller_vec.push_back(mc);
133}
134
135RubySystem::~RubySystem()
136{
137    delete m_network_ptr;
138    delete m_profiler_ptr;
139    if (m_mem_vec_ptr)
140        delete m_mem_vec_ptr;
141}
142
143void
144RubySystem::printStats(ostream& out)
145{
146    const time_t T = time(NULL);
147    tm *localTime = localtime(&T);
148    char buf[100];
149    strftime(buf, 100, "%b/%d/%Y %H:%M:%S", localTime);
150
151    out << "Real time: " << buf << endl;
152
153    m_profiler_ptr->printStats(out);
154    m_network_ptr->printStats(out);
155
156    for (uint32_t i = 0;i < g_abs_controls.size(); ++i) {
157        for (map<uint32_t, AbstractController *>::iterator it =
158                g_abs_controls[i].begin();
159             it != g_abs_controls[i].end(); ++it) {
160
161            ((*it).second)->printStats(out);
162        }
163    }
164}
165
166void
167RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
168                                 uint64 uncompressed_trace_size)
169{
170    // Create the checkpoint file for the memory
171    string thefile = Checkpoint::dir() + "/" + filename.c_str();
172
173    int fd = creat(thefile.c_str(), 0664);
174    if (fd < 0) {
175        perror("creat");
176        fatal("Can't open memory trace file '%s'\n", filename);
177    }
178
179    gzFile compressedMemory = gzdopen(fd, "wb");
180    if (compressedMemory == NULL)
181        fatal("Insufficient memory to allocate compression state for %s\n",
182              filename);
183
184    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
185        uncompressed_trace_size) {
186        fatal("Write failed on memory trace file '%s'\n", filename);
187    }
188
189    if (gzclose(compressedMemory)) {
190        fatal("Close failed on memory trace file '%s'\n", filename);
191    }
192    delete raw_data;
193}
194
195void
196RubySystem::serialize(std::ostream &os)
197{
198    m_cooldown_enabled = true;
199
200    vector<Sequencer*> sequencer_map;
201    Sequencer* sequencer_ptr = NULL;
202    int cntrl_id = -1;
203
204
205    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
206        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
207        if (sequencer_ptr == NULL) {
208            sequencer_ptr = sequencer_map[cntrl];
209            cntrl_id = cntrl;
210        }
211    }
212
213    assert(sequencer_ptr != NULL);
214
215    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
216        if (sequencer_map[cntrl] == NULL) {
217            sequencer_map[cntrl] = sequencer_ptr;
218        }
219    }
220
221    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
222    // Create the CacheRecorder and record the cache trace
223    m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
224
225    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
226        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
227    }
228
229    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
230    // save the current tick value
231    Tick curtick_original = curTick();
232    // save the event queue head
233    Event* eventq_head = eventq->replaceHead(NULL);
234    DPRINTF(RubyCacheTrace, "Recording current tick %ld and event queue\n",
235            curtick_original);
236
237    // Schedule an event to start cache cooldown
238    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
239    enqueueRubyEvent(curTick());
240    simulate();
241    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
242
243    // Restore eventq head
244    eventq_head = eventq->replaceHead(eventq_head);
245    // Restore curTick
246    setCurTick(curtick_original);
247
248    uint8_t *raw_data = NULL;
249
250    if (m_mem_vec_ptr != NULL) {
251        uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
252
253        string memory_trace_file = name() + ".memory.gz";
254        writeCompressedTrace(raw_data, memory_trace_file,
255                             memory_trace_size);
256
257        SERIALIZE_SCALAR(memory_trace_file);
258        SERIALIZE_SCALAR(memory_trace_size);
259
260    } else {
261        for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
262            m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
263                                                    m_cache_recorder);
264        }
265    }
266
267    // Aggergate the trace entries together into a single array
268    raw_data = new uint8_t[4096];
269    uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
270                                                                 4096);
271    string cache_trace_file = name() + ".cache.gz";
272    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
273
274    SERIALIZE_SCALAR(cache_trace_file);
275    SERIALIZE_SCALAR(cache_trace_size);
276
277    m_cooldown_enabled = false;
278}
279
280void
281RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
282                                uint64& uncompressed_trace_size)
283{
284    // Read the trace file
285    gzFile compressedTrace;
286
287    // trace file
288    int fd = open(filename.c_str(), O_RDONLY);
289    if (fd < 0) {
290        perror("open");
291        fatal("Unable to open trace file %s", filename);
292    }
293
294    compressedTrace = gzdopen(fd, "rb");
295    if (compressedTrace == NULL) {
296        fatal("Insufficient memory to allocate compression state for %s\n",
297              filename);
298    }
299
300    raw_data = new uint8_t[uncompressed_trace_size];
301    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
302            uncompressed_trace_size) {
303        fatal("Unable to read complete trace from file %s\n", filename);
304    }
305
306    if (gzclose(compressedTrace)) {
307        fatal("Failed to close cache trace file '%s'\n", filename);
308    }
309}
310
311void
312RubySystem::unserialize(Checkpoint *cp, const string &section)
313{
314    //
315    // The main purpose for clearing stats in the unserialize process is so
316    // that the profiler can correctly set its start time to the unserialized
317    // value of curTick()
318    //
319    resetStats();
320    uint8_t *uncompressed_trace = NULL;
321
322    if (m_mem_vec_ptr != NULL) {
323        string memory_trace_file;
324        uint64 memory_trace_size = 0;
325
326        UNSERIALIZE_SCALAR(memory_trace_file);
327        UNSERIALIZE_SCALAR(memory_trace_size);
328        memory_trace_file = cp->cptDir + "/" + memory_trace_file;
329
330        readCompressedTrace(memory_trace_file, uncompressed_trace,
331                            memory_trace_size);
332        m_mem_vec_ptr->populatePages(uncompressed_trace);
333
334        delete uncompressed_trace;
335        uncompressed_trace = NULL;
336    }
337
338    string cache_trace_file;
339    uint64 cache_trace_size = 0;
340
341    UNSERIALIZE_SCALAR(cache_trace_file);
342    UNSERIALIZE_SCALAR(cache_trace_size);
343    cache_trace_file = cp->cptDir + "/" + cache_trace_file;
344
345    readCompressedTrace(cache_trace_file, uncompressed_trace,
346                        cache_trace_size);
347    m_warmup_enabled = true;
348
349    vector<Sequencer*> sequencer_map;
350    Sequencer* t = NULL;
351    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
352        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
353        if (t == NULL) t = sequencer_map[cntrl];
354    }
355
356    assert(t != NULL);
357
358    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
359        if (sequencer_map[cntrl] == NULL) {
360            sequencer_map[cntrl] = t;
361        }
362    }
363
364    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
365                                         sequencer_map);
366}
367
368void
369RubySystem::startup()
370{
371    if (m_warmup_enabled) {
372        // save the current tick value
373        Tick curtick_original = curTick();
374        // save the event queue head
375        Event* eventq_head = eventq->replaceHead(NULL);
376        // set curTick to 0 and reset Ruby System's clock
377        setCurTick(0);
378        resetClock();
379
380        // Schedule an event to start cache warmup
381        enqueueRubyEvent(curTick());
382        simulate();
383
384        delete m_cache_recorder;
385        m_cache_recorder = NULL;
386        m_warmup_enabled = false;
387
388        // reset DRAM so that it's not waiting for events on the old event
389        // queue
390        for (int i = 0; i < m_memory_controller_vec.size(); ++i) {
391            m_memory_controller_vec[i]->reset();
392        }
393
394        // Restore eventq head
395        eventq_head = eventq->replaceHead(eventq_head);
396        // Restore curTick and Ruby System's clock
397        setCurTick(curtick_original);
398        resetClock();
399    }
400}
401
402void
403RubySystem::RubyEvent::process()
404{
405    if (ruby_system->m_warmup_enabled) {
406        ruby_system->m_cache_recorder->enqueueNextFetchRequest();
407    }  else if (ruby_system->m_cooldown_enabled) {
408        ruby_system->m_cache_recorder->enqueueNextFlushRequest();
409    }
410}
411
412void
413RubySystem::resetStats()
414{
415    m_profiler_ptr->clearStats();
416    m_network_ptr->clearStats();
417    for (uint32_t cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
418        m_abs_cntrl_vec[cntrl]->clearStats();
419    }
420}
421
422bool
423RubySystem::functionalRead(PacketPtr pkt)
424{
425    Address address(pkt->getAddr());
426    Address line_address(address);
427    line_address.makeLineAddress();
428
429    AccessPermission access_perm = AccessPermission_NotPresent;
430    int num_controllers = m_abs_cntrl_vec.size();
431
432    DPRINTF(RubySystem, "Functional Read request for %s\n",address);
433
434    unsigned int num_ro = 0;
435    unsigned int num_rw = 0;
436    unsigned int num_busy = 0;
437    unsigned int num_backing_store = 0;
438    unsigned int num_invalid = 0;
439
440    // In this loop we count the number of controllers that have the given
441    // address in read only, read write and busy states.
442    for (unsigned int i = 0; i < num_controllers; ++i) {
443        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
444        if (access_perm == AccessPermission_Read_Only)
445            num_ro++;
446        else if (access_perm == AccessPermission_Read_Write)
447            num_rw++;
448        else if (access_perm == AccessPermission_Busy)
449            num_busy++;
450        else if (access_perm == AccessPermission_Backing_Store)
451            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
452            // to represent blocks in memory *for Broadcast/Snooping protocols*,
453            // where memory has no idea whether it has an exclusive copy of data
454            // or not.
455            num_backing_store++;
456        else if (access_perm == AccessPermission_Invalid ||
457                 access_perm == AccessPermission_NotPresent)
458            num_invalid++;
459    }
460    assert(num_rw <= 1);
461
462    uint8_t *data = pkt->getPtr<uint8_t>(true);
463    unsigned int size_in_bytes = pkt->getSize();
464    unsigned startByte = address.getAddress() - line_address.getAddress();
465
466    // This if case is meant to capture what happens in a Broadcast/Snoop
467    // protocol where the block does not exist in the cache hierarchy. You
468    // only want to read from the Backing_Store memory if there is no copy in
469    // the cache hierarchy, otherwise you want to try to read the RO or RW
470    // copies existing in the cache hierarchy (covered by the else statement).
471    // The reason is because the Backing_Store memory could easily be stale, if
472    // there are copies floating around the cache hierarchy, so you want to read
473    // it only if it's not in the cache hierarchy at all.
474    if (num_invalid == (num_controllers - 1) &&
475            num_backing_store == 1) {
476        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
477        for (unsigned int i = 0; i < num_controllers; ++i) {
478            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
479            if (access_perm == AccessPermission_Backing_Store) {
480                DataBlock& block = m_abs_cntrl_vec[i]->
481                    getDataBlock(line_address);
482
483                DPRINTF(RubySystem, "reading from %s block %s\n",
484                        m_abs_cntrl_vec[i]->name(), block);
485                for (unsigned j = 0; j < size_in_bytes; ++j) {
486                    data[j] = block.getByte(j + startByte);
487                }
488                return true;
489            }
490        }
491    } else if (num_ro > 0 || num_rw == 1) {
492        // In Broadcast/Snoop protocols, this covers if you know the block
493        // exists somewhere in the caching hierarchy, then you want to read any
494        // valid RO or RW block.  In directory protocols, same thing, you want
495        // to read any valid readable copy of the block.
496        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
497                num_busy, num_ro, num_rw);
498        // In this loop, we try to figure which controller has a read only or
499        // a read write copy of the given address. Any valid copy would suffice
500        // for a functional read.
501        for (unsigned int i = 0;i < num_controllers;++i) {
502            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
503            if (access_perm == AccessPermission_Read_Only ||
504                access_perm == AccessPermission_Read_Write) {
505                DataBlock& block = m_abs_cntrl_vec[i]->
506                    getDataBlock(line_address);
507
508                DPRINTF(RubySystem, "reading from %s block %s\n",
509                        m_abs_cntrl_vec[i]->name(), block);
510                for (unsigned j = 0; j < size_in_bytes; ++j) {
511                    data[j] = block.getByte(j + startByte);
512                }
513                return true;
514            }
515        }
516    }
517
518    return false;
519}
520
521// The function searches through all the buffers that exist in different
522// cache, directory and memory controllers, and in the network components
523// and writes the data portion of those that hold the address specified
524// in the packet.
525bool
526RubySystem::functionalWrite(PacketPtr pkt)
527{
528    Address addr(pkt->getAddr());
529    Address line_addr = line_address(addr);
530    AccessPermission access_perm = AccessPermission_NotPresent;
531    int num_controllers = m_abs_cntrl_vec.size();
532
533    DPRINTF(RubySystem, "Functional Write request for %s\n",addr);
534
535    uint8_t *data = pkt->getPtr<uint8_t>(true);
536    unsigned int size_in_bytes = pkt->getSize();
537    unsigned startByte = addr.getAddress() - line_addr.getAddress();
538
539    uint32_t M5_VAR_USED num_functional_writes = 0;
540
541    for (unsigned int i = 0; i < num_controllers;++i) {
542        num_functional_writes +=
543            m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
544
545        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
546        if (access_perm != AccessPermission_Invalid &&
547            access_perm != AccessPermission_NotPresent) {
548
549            num_functional_writes++;
550
551            DataBlock& block = m_abs_cntrl_vec[i]->getDataBlock(line_addr);
552            DPRINTF(RubySystem, "%s\n",block);
553            for (unsigned j = 0; j < size_in_bytes; ++j) {
554              block.setByte(j + startByte, data[j]);
555            }
556            DPRINTF(RubySystem, "%s\n",block);
557        }
558    }
559
560    for (unsigned int i = 0; i < m_memory_controller_vec.size() ;++i) {
561        num_functional_writes +=
562            m_memory_controller_vec[i]->functionalWriteBuffers(pkt);
563    }
564
565    num_functional_writes += m_network_ptr->functionalWrite(pkt);
566    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
567
568    return true;
569}
570
571#ifdef CHECK_COHERENCE
572// This code will check for cases if the given cache block is exclusive in
573// one node and shared in another-- a coherence violation
574//
575// To use, the SLICC specification must call sequencer.checkCoherence(address)
576// when the controller changes to a state with new permissions.  Do this
577// in setState.  The SLICC spec must also define methods "isBlockShared"
578// and "isBlockExclusive" that are specific to that protocol
579//
580void
581RubySystem::checkGlobalCoherenceInvariant(const Address& addr)
582{
583#if 0
584    NodeID exclusive = -1;
585    bool sharedDetected = false;
586    NodeID lastShared = -1;
587
588    for (int i = 0; i < m_chip_vector.size(); i++) {
589        if (m_chip_vector[i]->isBlockExclusive(addr)) {
590            if (exclusive != -1) {
591                // coherence violation
592                WARN_EXPR(exclusive);
593                WARN_EXPR(m_chip_vector[i]->getID());
594                WARN_EXPR(addr);
595                WARN_EXPR(getTime());
596                ERROR_MSG("Coherence Violation Detected -- 2 exclusive chips");
597            } else if (sharedDetected) {
598                WARN_EXPR(lastShared);
599                WARN_EXPR(m_chip_vector[i]->getID());
600                WARN_EXPR(addr);
601                WARN_EXPR(getTime());
602                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
603            } else {
604                exclusive = m_chip_vector[i]->getID();
605            }
606        } else if (m_chip_vector[i]->isBlockShared(addr)) {
607            sharedDetected = true;
608            lastShared = m_chip_vector[i]->getID();
609
610            if (exclusive != -1) {
611                WARN_EXPR(lastShared);
612                WARN_EXPR(exclusive);
613                WARN_EXPR(addr);
614                WARN_EXPR(getTime());
615                ERROR_MSG("Coherence Violation Detected -- exclusive chip with >=1 shared");
616            }
617        }
618    }
619#endif
620}
621#endif
622
623RubySystem *
624RubySystemParams::create()
625{
626    return new RubySystem(this);
627}
628
629/**
630 * virtual process function that is invoked when the callback
631 * queue is executed.
632 */
633void
634RubyDumpStatsCallback::process()
635{
636    ruby_system->printStats(*os);
637}
638