1/*
2 * Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "mem/ruby/system/RubySystem.hh"
30
31#include <fcntl.h>
32#include <zlib.h>
33
34#include <cstdio>
35#include <list>
36
37#include "base/intmath.hh"
38#include "base/statistics.hh"
39#include "debug/RubyCacheTrace.hh"
40#include "debug/RubySystem.hh"
41#include "mem/ruby/common/Address.hh"
42#include "mem/ruby/network/Network.hh"
43#include "mem/simple_mem.hh"
44#include "sim/eventq.hh"
45#include "sim/simulate.hh"
46
47using namespace std;
48
49bool RubySystem::m_randomization;
50uint32_t RubySystem::m_block_size_bytes;
51uint32_t RubySystem::m_block_size_bits;
52uint32_t RubySystem::m_memory_size_bits;
53bool RubySystem::m_warmup_enabled = false;
54// To look forward to allowing multiple RubySystem instances, track the number
55// of RubySystems that need to be warmed up on checkpoint restore.
56unsigned RubySystem::m_systems_to_warmup = 0;
57bool RubySystem::m_cooldown_enabled = false;
58
59RubySystem::RubySystem(const Params *p)
60    : ClockedObject(p), m_access_backing_store(p->access_backing_store),
61      m_cache_recorder(NULL)
62{
63    m_randomization = p->randomization;
64
65    m_block_size_bytes = p->block_size_bytes;
66    assert(isPowerOf2(m_block_size_bytes));
67    m_block_size_bits = floorLog2(m_block_size_bytes);
68    m_memory_size_bits = p->memory_size_bits;
69
70    // Resize to the size of different machine types
71    m_abstract_controls.resize(MachineType_NUM);
72
73    // Collate the statistics before they are printed.
74    Stats::registerDumpCallback(new RubyStatsCallback(this));
75    // Create the profiler
76    m_profiler = new Profiler(p, this);
77    m_phys_mem = p->phys_mem;
78}
79
80void
81RubySystem::registerNetwork(Network* network_ptr)
82{
83    m_network = network_ptr;
84}
85
86void
87RubySystem::registerAbstractController(AbstractController* cntrl)
88{
89    m_abs_cntrl_vec.push_back(cntrl);
90
91    MachineID id = cntrl->getMachineID();
92    m_abstract_controls[id.getType()][id.getNum()] = cntrl;
93}
94
95RubySystem::~RubySystem()
96{
97    delete m_network;
98    delete m_profiler;
99}
100
101void
102RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
103                              uint64_t cache_trace_size,
104                              uint64_t block_size_bytes)
105{
106    vector<Sequencer*> sequencer_map;
107    Sequencer* sequencer_ptr = NULL;
108
109    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
110        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getCPUSequencer());
111        if (sequencer_ptr == NULL) {
112            sequencer_ptr = sequencer_map[cntrl];
113        }
114    }
115
116    assert(sequencer_ptr != NULL);
117
118    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
119        if (sequencer_map[cntrl] == NULL) {
120            sequencer_map[cntrl] = sequencer_ptr;
121        }
122    }
123
124    // Remove the old CacheRecorder if it's still hanging about.
125    if (m_cache_recorder != NULL) {
126        delete m_cache_recorder;
127    }
128
129    // Create the CacheRecorder and record the cache trace
130    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
131                                         sequencer_map, block_size_bytes);
132}
133
134void
135RubySystem::memWriteback()
136{
137    m_cooldown_enabled = true;
138
139    // Make the trace so we know what to write back.
140    DPRINTF(RubyCacheTrace, "Recording Cache Trace\n");
141    makeCacheRecorder(NULL, 0, getBlockSizeBytes());
142    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
143        m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
144    }
145    DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
146
147    // save the current tick value
148    Tick curtick_original = curTick();
149    DPRINTF(RubyCacheTrace, "Recording current tick %ld\n", curtick_original);
150
151    // Deschedule all prior events on the event queue, but record the tick they
152    // were scheduled at so they can be restored correctly later.
153    list<pair<Event*, Tick> > original_events;
154    while (!eventq->empty()) {
155        Event *curr_head = eventq->getHead();
156        if (curr_head->isAutoDelete()) {
157            DPRINTF(RubyCacheTrace, "Event %s auto-deletes when descheduled,"
158                    " not recording\n", curr_head->name());
159        } else {
160            original_events.push_back(make_pair(curr_head, curr_head->when()));
161        }
162        eventq->deschedule(curr_head);
163    }
164
165    // Schedule an event to start cache cooldown
166    DPRINTF(RubyCacheTrace, "Starting cache flush\n");
167    enqueueRubyEvent(curTick());
168    simulate();
169    DPRINTF(RubyCacheTrace, "Cache flush complete\n");
170
171    // Deschedule any events left on the event queue.
172    while (!eventq->empty()) {
173        eventq->deschedule(eventq->getHead());
174    }
175
176    // Restore curTick
177    setCurTick(curtick_original);
178
179    // Restore all events that were originally on the event queue.  This is
180    // done after setting curTick back to its original value so that events do
181    // not seem to be scheduled in the past.
182    while (!original_events.empty()) {
183        pair<Event*, Tick> event = original_events.back();
184        eventq->schedule(event.first, event.second);
185        original_events.pop_back();
186    }
187
188    // No longer flushing back to memory.
189    m_cooldown_enabled = false;
190
191    // There are several issues with continuing simulation after calling
192    // memWriteback() at the moment, that stem from taking events off the
193    // queue, simulating again, and then putting them back on, whilst
194    // pretending that no time has passed.  One is that some events will have
195    // been deleted, so can't be put back.  Another is that any object
196    // recording the tick something happens may end up storing a tick in the
197    // future.  A simple warning here alerts the user that things may not work
198    // as expected.
199    warn_once("Ruby memory writeback is experimental.  Continuing simulation "
200              "afterwards may not always work as intended.");
201
202    // Keep the cache recorder around so that we can dump the trace if a
203    // checkpoint is immediately taken.
204}
205
206void
207RubySystem::writeCompressedTrace(uint8_t *raw_data, string filename,
208                                 uint64_t uncompressed_trace_size)
209{
210    // Create the checkpoint file for the memory
211    string thefile = CheckpointIn::dir() + "/" + filename.c_str();
212
213    int fd = creat(thefile.c_str(), 0664);
214    if (fd < 0) {
215        perror("creat");
216        fatal("Can't open memory trace file '%s'\n", filename);
217    }
218
219    gzFile compressedMemory = gzdopen(fd, "wb");
220    if (compressedMemory == NULL)
221        fatal("Insufficient memory to allocate compression state for %s\n",
222              filename);
223
224    if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
225        uncompressed_trace_size) {
226        fatal("Write failed on memory trace file '%s'\n", filename);
227    }
228
229    if (gzclose(compressedMemory)) {
230        fatal("Close failed on memory trace file '%s'\n", filename);
231    }
232    delete[] raw_data;
233}
234
235void
236RubySystem::serialize(CheckpointOut &cp) const
237{
238    // Store the cache-block size, so we are able to restore on systems with a
239    // different cache-block size. CacheRecorder depends on the correct
240    // cache-block size upon unserializing.
241    uint64_t block_size_bytes = getBlockSizeBytes();
242    SERIALIZE_SCALAR(block_size_bytes);
243
244    // Check that there's a valid trace to use.  If not, then memory won't be
245    // up-to-date and the simulation will probably fail when restoring from the
246    // checkpoint.
247    if (m_cache_recorder == NULL) {
248        fatal("Call memWriteback() before serialize() to create ruby trace");
249    }
250
251    // Aggregate the trace entries together into a single array
252    uint8_t *raw_data = new uint8_t[4096];
253    uint64_t cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
254                                                                 4096);
255    string cache_trace_file = name() + ".cache.gz";
256    writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
257
258    SERIALIZE_SCALAR(cache_trace_file);
259    SERIALIZE_SCALAR(cache_trace_size);
260}
261
262void
263RubySystem::drainResume()
264{
265    // Delete the cache recorder if it was created in memWriteback()
266    // to checkpoint the current cache state.
267    if (m_cache_recorder) {
268        delete m_cache_recorder;
269        m_cache_recorder = NULL;
270    }
271}
272
273void
274RubySystem::readCompressedTrace(string filename, uint8_t *&raw_data,
275                                uint64_t &uncompressed_trace_size)
276{
277    // Read the trace file
278    gzFile compressedTrace;
279
280    // trace file
281    int fd = open(filename.c_str(), O_RDONLY);
282    if (fd < 0) {
283        perror("open");
284        fatal("Unable to open trace file %s", filename);
285    }
286
287    compressedTrace = gzdopen(fd, "rb");
288    if (compressedTrace == NULL) {
289        fatal("Insufficient memory to allocate compression state for %s\n",
290              filename);
291    }
292
293    raw_data = new uint8_t[uncompressed_trace_size];
294    if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
295            uncompressed_trace_size) {
296        fatal("Unable to read complete trace from file %s\n", filename);
297    }
298
299    if (gzclose(compressedTrace)) {
300        fatal("Failed to close cache trace file '%s'\n", filename);
301    }
302}
303
304void
305RubySystem::unserialize(CheckpointIn &cp)
306{
307    uint8_t *uncompressed_trace = NULL;
308
309    // This value should be set to the checkpoint-system's block-size.
310    // Optional, as checkpoints without it can be run if the
311    // checkpoint-system's block-size == current block-size.
312    uint64_t block_size_bytes = getBlockSizeBytes();
313    UNSERIALIZE_OPT_SCALAR(block_size_bytes);
314
315    string cache_trace_file;
316    uint64_t cache_trace_size = 0;
317
318    UNSERIALIZE_SCALAR(cache_trace_file);
319    UNSERIALIZE_SCALAR(cache_trace_size);
320    cache_trace_file = cp.cptDir + "/" + cache_trace_file;
321
322    readCompressedTrace(cache_trace_file, uncompressed_trace,
323                        cache_trace_size);
324    m_warmup_enabled = true;
325    m_systems_to_warmup++;
326
327    // Create the cache recorder that will hang around until startup.
328    makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes);
329}
330
331void
332RubySystem::startup()
333{
334
335    // Ruby restores state from a checkpoint by resetting the clock to 0 and
336    // playing the requests that can possibly re-generate the cache state.
337    // The clock value is set to the actual checkpointed value once all the
338    // requests have been executed.
339    //
340    // This way of restoring state is pretty finicky. For example, if a
341    // Ruby component reads time before the state has been restored, it would
342    // cache this value and hence its clock would not be reset to 0, when
343    // Ruby resets the global clock. This can potentially result in a
344    // deadlock.
345    //
346    // The solution is that no Ruby component should read time before the
347    // simulation starts. And then one also needs to hope that the time
348    // Ruby finishes restoring the state is less than the time when the
349    // state was checkpointed.
350
351    if (m_warmup_enabled) {
352        DPRINTF(RubyCacheTrace, "Starting ruby cache warmup\n");
353        // save the current tick value
354        Tick curtick_original = curTick();
355        // save the event queue head
356        Event* eventq_head = eventq->replaceHead(NULL);
357        // set curTick to 0 and reset Ruby System's clock
358        setCurTick(0);
359        resetClock();
360
361        // Schedule an event to start cache warmup
362        enqueueRubyEvent(curTick());
363        simulate();
364
365        delete m_cache_recorder;
366        m_cache_recorder = NULL;
367        m_systems_to_warmup--;
368        if (m_systems_to_warmup == 0) {
369            m_warmup_enabled = false;
370        }
371
372        // Restore eventq head
373        eventq->replaceHead(eventq_head);
374        // Restore curTick and Ruby System's clock
375        setCurTick(curtick_original);
376        resetClock();
377    }
378
379    resetStats();
380}
381
382void
383RubySystem::processRubyEvent()
384{
385    if (getWarmupEnabled()) {
386        m_cache_recorder->enqueueNextFetchRequest();
387    } else if (getCooldownEnabled()) {
388        m_cache_recorder->enqueueNextFlushRequest();
389    }
390}
391
392void
393RubySystem::resetStats()
394{
395    m_start_cycle = curCycle();
396}
397
398bool
399RubySystem::functionalRead(PacketPtr pkt)
400{
401    Addr address(pkt->getAddr());
402    Addr line_address = makeLineAddress(address);
403
404    AccessPermission access_perm = AccessPermission_NotPresent;
405    int num_controllers = m_abs_cntrl_vec.size();
406
407    DPRINTF(RubySystem, "Functional Read request for %#x\n", address);
408
409    unsigned int num_ro = 0;
410    unsigned int num_rw = 0;
411    unsigned int num_busy = 0;
412    unsigned int num_backing_store = 0;
413    unsigned int num_invalid = 0;
414
415    // In this loop we count the number of controllers that have the given
416    // address in read only, read write and busy states.
417    for (unsigned int i = 0; i < num_controllers; ++i) {
418        access_perm = m_abs_cntrl_vec[i]-> getAccessPermission(line_address);
419        if (access_perm == AccessPermission_Read_Only)
420            num_ro++;
421        else if (access_perm == AccessPermission_Read_Write)
422            num_rw++;
423        else if (access_perm == AccessPermission_Busy)
424            num_busy++;
425        else if (access_perm == AccessPermission_Backing_Store)
426            // See RubySlicc_Exports.sm for details, but Backing_Store is meant
427            // to represent blocks in memory *for Broadcast/Snooping protocols*,
428            // where memory has no idea whether it has an exclusive copy of data
429            // or not.
430            num_backing_store++;
431        else if (access_perm == AccessPermission_Invalid ||
432                 access_perm == AccessPermission_NotPresent)
433            num_invalid++;
434    }
435
436    // This if case is meant to capture what happens in a Broadcast/Snoop
437    // protocol where the block does not exist in the cache hierarchy. You
438    // only want to read from the Backing_Store memory if there is no copy in
439    // the cache hierarchy, otherwise you want to try to read the RO or RW
440    // copies existing in the cache hierarchy (covered by the else statement).
441    // The reason is because the Backing_Store memory could easily be stale, if
442    // there are copies floating around the cache hierarchy, so you want to read
443    // it only if it's not in the cache hierarchy at all.
444    if (num_invalid == (num_controllers - 1) && num_backing_store == 1) {
445        DPRINTF(RubySystem, "only copy in Backing_Store memory, read from it\n");
446        for (unsigned int i = 0; i < num_controllers; ++i) {
447            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
448            if (access_perm == AccessPermission_Backing_Store) {
449                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
450                return true;
451            }
452        }
453    } else if (num_ro > 0 || num_rw >= 1) {
454        if (num_rw > 1) {
455            // We iterate over the vector of abstract controllers, and return
456            // the first copy found. If we have more than one cache with block
457            // in writable permission, the first one found would be returned.
458            warn("More than one Abstract Controller with RW permission for "
459                 "addr: %#x on cacheline: %#x.", address, line_address);
460        }
461        // In Broadcast/Snoop protocols, this covers if you know the block
462        // exists somewhere in the caching hierarchy, then you want to read any
463        // valid RO or RW block.  In directory protocols, same thing, you want
464        // to read any valid readable copy of the block.
465        DPRINTF(RubySystem, "num_busy = %d, num_ro = %d, num_rw = %d\n",
466                num_busy, num_ro, num_rw);
467        // In this loop, we try to figure which controller has a read only or
468        // a read write copy of the given address. Any valid copy would suffice
469        // for a functional read.
470        for (unsigned int i = 0;i < num_controllers;++i) {
471            access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_address);
472            if (access_perm == AccessPermission_Read_Only ||
473                access_perm == AccessPermission_Read_Write) {
474                m_abs_cntrl_vec[i]->functionalRead(line_address, pkt);
475                return true;
476            }
477        }
478    }
479
480    return false;
481}
482
483// The function searches through all the buffers that exist in different
484// cache, directory and memory controllers, and in the network components
485// and writes the data portion of those that hold the address specified
486// in the packet.
487bool
488RubySystem::functionalWrite(PacketPtr pkt)
489{
490    Addr addr(pkt->getAddr());
491    Addr line_addr = makeLineAddress(addr);
492    AccessPermission access_perm = AccessPermission_NotPresent;
493    int num_controllers = m_abs_cntrl_vec.size();
494
495    DPRINTF(RubySystem, "Functional Write request for %#x\n", addr);
496
497    uint32_t M5_VAR_USED num_functional_writes = 0;
498
499    for (unsigned int i = 0; i < num_controllers;++i) {
500        num_functional_writes +=
501            m_abs_cntrl_vec[i]->functionalWriteBuffers(pkt);
502
503        access_perm = m_abs_cntrl_vec[i]->getAccessPermission(line_addr);
504        if (access_perm != AccessPermission_Invalid &&
505            access_perm != AccessPermission_NotPresent) {
506            num_functional_writes +=
507                m_abs_cntrl_vec[i]->functionalWrite(line_addr, pkt);
508        }
509    }
510
511    num_functional_writes += m_network->functionalWrite(pkt);
512    DPRINTF(RubySystem, "Messages written = %u\n", num_functional_writes);
513
514    return true;
515}
516
517RubySystem *
518RubySystemParams::create()
519{
520    return new RubySystem(this);
521}
522