1/*
2 * Copyright (c) 2013 - 2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Radhika Jagtap
38 *          Andreas Hansson
39 *          Thomas Grass
40 */
41
42/**
43 * @file This file describes a trace component which is a cpu probe listener
44 * used to generate elastic cpu traces. It registers listeners to probe points
45 * in the fetch, rename, iew and commit stages of the O3CPU. It processes the
46 * dependency graph of the cpu execution and writes out a protobuf trace. It
47 * also generates a protobuf trace of the instruction fetch requests.
48 */
49
50#ifndef __CPU_O3_PROBE_ELASTIC_TRACE_HH__
51#define __CPU_O3_PROBE_ELASTIC_TRACE_HH__
52
53#include <set>
54#include <unordered_map>
55#include <utility>
56
57#include "cpu/o3/dyn_inst.hh"
58#include "cpu/o3/impl.hh"
59#include "mem/request.hh"
60#include "params/ElasticTrace.hh"
61#include "proto/inst_dep_record.pb.h"
62#include "proto/packet.pb.h"
63#include "proto/protoio.hh"
64#include "sim/eventq.hh"
65#include "sim/probe/probe.hh"
66
67/**
68 * The elastic trace is a type of probe listener and listens to probe points
69 * in multiple stages of the O3CPU. The notify method is called on a probe
70 * point typically when an instruction successfully progresses through that
71 * stage.
72 *
73 * As different listener methods mapped to the different probe points execute,
74 * relevant information about the instruction, e.g. timestamps and register
75 * accesses, are captured and stored in temporary data structures. When the
76 * instruction progresses through the commit stage, the timing as well as
77 * dependency information about the instruction is finalised and encapsulated in
78 * a struct called TraceInfo. TraceInfo objects are collected in a list instead
79 * of writing them out to the trace file one a time. This is required as the
80 * trace is processed in chunks to evaluate order dependencies and computational
81 * delay in case an instruction does not have any register dependencies. By this
82 * we achieve a simpler algorithm during replay because every record in the
83 * trace can be hooked onto a record in its past. The trace is written out as
84 * a protobuf format output file.
85 *
86 * The output trace can be read in and played back by the TraceCPU.
87 */
88class ElasticTrace : public ProbeListenerObject
89{
90
91  public:
92    typedef typename O3CPUImpl::DynInstPtr DynInstPtr;
93    typedef typename O3CPUImpl::DynInstConstPtr DynInstConstPtr;
94    typedef typename std::pair<InstSeqNum, PhysRegIndex> SeqNumRegPair;
95
96    /** Trace record types corresponding to instruction node types */
97    typedef ProtoMessage::InstDepRecord::RecordType RecordType;
98    typedef ProtoMessage::InstDepRecord Record;
99
100    /** Constructor */
101    ElasticTrace(const ElasticTraceParams *params);
102
103    /**
104     * Register the probe listeners that is the methods called on a probe point
105     * notify() call.
106     */
107    void regProbeListeners();
108
109    /** Register all listeners. */
110    void regEtraceListeners();
111
112    /** Returns the name of the trace probe listener. */
113    const std::string name() const;
114
115    /**
116     * Process any outstanding trace records, flush them out to the protobuf
117     * output streams and delete the streams at simulation exit.
118     */
119    void flushTraces();
120
121    /**
122     * Take the fields of the request class object that are relevant to create
123     * an instruction fetch request. It creates a protobuf message containing
124     * the request fields and writes it to instTraceStream.
125     *
126     * @param req pointer to the fetch request
127     */
128    void fetchReqTrace(const RequestPtr &req);
129
130    /**
131     * Populate the execute timestamp field in an InstExecInfo object for an
132     * instruction in flight.
133     *
134     * @param dyn_inst pointer to dynamic instruction in flight
135     */
136    void recordExecTick(const DynInstConstPtr& dyn_inst);
137
138    /**
139     * Populate the timestamp field in an InstExecInfo object for an
140     * instruction in flight when it is execution is complete and it is ready
141     * to commit.
142     *
143     * @param dyn_inst pointer to dynamic instruction in flight
144     */
145    void recordToCommTick(const DynInstConstPtr& dyn_inst);
146
147    /**
148     * Record a Read After Write physical register dependency if there has
149     * been a write to the source register and update the physical register
150     * map. For this look up the physRegDepMap with this instruction as the
151     * writer of its destination register. If the dependency falls outside the
152     * window it is assumed as already complete. Duplicate entries are avoided.
153     *
154     * @param dyn_inst pointer to dynamic instruction in flight
155     */
156    void updateRegDep(const DynInstConstPtr& dyn_inst);
157
158    /**
159     * When an instruction gets squashed the destination register mapped to it
160     * is freed up in the rename stage. Remove the register entry from the
161     * physRegDepMap as well to avoid dependencies on squashed instructions.
162     *
163     * @param inst_reg_pair pair of inst. sequence no. and the register
164     */
165    void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair);
166
167    /**
168     * Add an instruction that is at the head of the ROB and is squashed only
169     * if it is a load and a request was sent for it.
170     *
171     * @param head_inst pointer to dynamic instruction to be squashed
172     */
173    void addSquashedInst(const DynInstConstPtr& head_inst);
174
175    /**
176     * Add an instruction that is at the head of the ROB and is committed.
177     *
178     * @param head_inst pointer to dynamic instruction to be committed
179     */
180    void addCommittedInst(const DynInstConstPtr& head_inst);
181
182    /** Register statistics for the elastic trace. */
183    void regStats();
184
185    /** Event to trigger registering this listener for all probe points. */
186    EventFunctionWrapper regEtraceListenersEvent;
187
188  private:
189    /**
190     * Used for checking the first window for processing and writing of
191     * dependency trace. At the start of the program there can be dependency-
192     * free instructions and such cases are handled differently.
193     */
194    bool firstWin;
195
196    /**
197     * @defgroup InstExecInfo Struct for storing information before an
198     * instruction reaches the commit stage, e.g. execute timestamp.
199     */
200    struct InstExecInfo
201    {
202        /**
203         * @ingroup InstExecInfo
204         * @{
205         */
206        /** Timestamp when instruction was first processed by execute stage */
207        Tick executeTick;
208        /**
209         * Timestamp when instruction execution is completed in execute stage
210         * and instruction is marked as ready to commit
211         */
212        Tick toCommitTick;
213        /**
214         * Set of instruction sequence numbers that this instruction depends on
215         * due to Read After Write data dependency based on physical register.
216         */
217        std::set<InstSeqNum> physRegDepSet;
218        /** @} */
219
220        /** Constructor */
221        InstExecInfo()
222          : executeTick(MaxTick),
223            toCommitTick(MaxTick)
224        { }
225    };
226
227    /**
228     * Temporary store of InstExecInfo objects. Later on when an instruction
229     * is processed for commit or retire, if it is chosen to be written to
230     * the output trace then this information is looked up using the instruction
231     * sequence number as the key. If it is not chosen then the entry for it in
232     * the store is cleared.
233     */
234    std::unordered_map<InstSeqNum, InstExecInfo*> tempStore;
235
236    /**
237     * The last cleared instruction sequence number used to free up the memory
238     * allocated in the temporary store.
239     */
240    InstSeqNum lastClearedSeqNum;
241
242    /**
243     * Map for recording the producer of a physical register to check Read
244     * After Write dependencies. The key is the renamed physical register and
245     * the value is the instruction sequence number of its last producer.
246     */
247    std::unordered_map<PhysRegIndex, InstSeqNum> physRegDepMap;
248
249    /**
250     * @defgroup TraceInfo Struct for a record in the instruction dependency
251     * trace. All information required to process and calculate the
252     * computational delay is stored in TraceInfo objects. The memory request
253     * fields for a load or store instruction are also included here. Note
254     * that the structure TraceInfo does not store pointers to children
255     * or parents. The dependency trace is maintained as an ordered collection
256     * of records for writing to the output trace and not as a tree data
257     * structure.
258     */
259    struct TraceInfo
260    {
261        /**
262         * @ingroup TraceInfo
263         * @{
264         */
265        /* Instruction sequence number. */
266        InstSeqNum instNum;
267        /** The type of trace record for the instruction node */
268        RecordType type;
269        /* Tick when instruction was in execute stage. */
270        Tick executeTick;
271        /* Tick when instruction was marked ready and sent to commit stage. */
272        Tick toCommitTick;
273        /* Tick when instruction was committed. */
274        Tick commitTick;
275        /* If instruction was committed, as against squashed. */
276        bool commit;
277        /* List of order dependencies. */
278        std::list<InstSeqNum> robDepList;
279        /* List of physical register RAW dependencies. */
280        std::list<InstSeqNum> physRegDepList;
281        /**
282         * Computational delay after the last dependent inst. completed.
283         * A value of -1 which means instruction has no dependencies.
284         */
285        int64_t compDelay;
286        /* Number of dependents. */
287        uint32_t numDepts;
288        /* The instruction PC for a load, store or non load/store. */
289        Addr pc;
290        /* Request flags in case of a load/store instruction */
291        Request::FlagsType reqFlags;
292        /* Request physical address in case of a load/store instruction */
293        Addr physAddr;
294        /* Request virtual address in case of a load/store instruction */
295        Addr virtAddr;
296        /* Address space id in case of a load/store instruction */
297        uint32_t asid;
298        /* Request size in case of a load/store instruction */
299        unsigned size;
300        /** Default Constructor */
301        TraceInfo()
302          : type(Record::INVALID)
303        { }
304        /** Is the record a load */
305        bool isLoad() const { return (type == Record::LOAD); }
306        /** Is the record a store */
307        bool isStore() const { return (type == Record::STORE); }
308        /** Is the record a fetch triggering an Icache request */
309        bool isComp() const { return (type == Record::COMP); }
310        /** Return string specifying the type of the node */
311        const std::string& typeToStr() const;
312        /** @} */
313
314        /**
315         * Get the execute tick of the instruction.
316         *
317         * @return Tick when instruction was executed
318         */
319        Tick getExecuteTick() const;
320    };
321
322    /**
323     * The instruction dependency trace containing TraceInfo objects. The
324     * container implemented is sequential as dependencies obey commit
325     * order (program order). For example, if B is dependent on A then B must
326     * be committed after A. Thus records are updated with dependency
327     * information and written to the trace in commit order. This ensures that
328     * when a graph is reconstructed from the  trace during replay, all the
329     * dependencies are stored in the graph before  the dependent itself is
330     * added. This facilitates creating a tree data structure during replay,
331     * i.e. adding children as records are read from the trace in an efficient
332     * manner.
333     */
334    std::vector<TraceInfo*> depTrace;
335
336    /**
337     * Map where the instruction sequence number is mapped to the pointer to
338     * the TraceInfo object.
339     */
340    std::unordered_map<InstSeqNum, TraceInfo*> traceInfoMap;
341
342    /** Typedef of iterator to the instruction dependency trace. */
343    typedef typename std::vector<TraceInfo*>::iterator depTraceItr;
344
345    /** Typedef of the reverse iterator to the instruction dependency trace. */
346    typedef typename std::reverse_iterator<depTraceItr> depTraceRevItr;
347
348    /**
349     * The maximum distance for a dependency and is set by a top level
350     * level parameter. It must be equal to or greater than the number of
351     * entries in the ROB. This variable is used as the length of the sliding
352     * window for processing the dependency trace.
353     */
354    uint32_t depWindowSize;
355
356    /** Protobuf output stream for data dependency trace */
357    ProtoOutputStream* dataTraceStream;
358
359    /** Protobuf output stream for instruction fetch trace. */
360    ProtoOutputStream* instTraceStream;
361
362    /** Number of instructions after which to enable tracing. */
363    const InstSeqNum startTraceInst;
364
365    /**
366     * Whther the elastic trace listener has been registered for all probes.
367     *
368     * When enabling tracing after a specified number of instructions have
369     * committed, check this to prevent re-registering the listener.
370     */
371    bool allProbesReg;
372
373    /** Whether to trace virtual addresses for memory requests. */
374    const bool traceVirtAddr;
375
376    /** Pointer to the O3CPU that is this listener's parent a.k.a. manager */
377    FullO3CPU<O3CPUImpl>* cpu;
378
379    /**
380     * Add a record to the dependency trace depTrace which is a sequential
381     * container. A record is inserted per committed instruction and in the same
382     * order as the order in which instructions are committed.
383     *
384     * @param head_inst     Pointer to the instruction which is head of the
385     *                      ROB and ready to commit
386     * @param exec_info_ptr Pointer to InstExecInfo for that instruction
387     * @param commit        True if instruction is committed, false if squashed
388     */
389    void addDepTraceRecord(const DynInstConstPtr& head_inst,
390                           InstExecInfo* exec_info_ptr, bool commit);
391
392    /**
393     * Clear entries in the temporary store of execution info objects to free
394     * allocated memory until the present instruction being added to the trace.
395     *
396     * @param head_inst pointer to dynamic instruction
397     */
398    void clearTempStoreUntil(const DynInstConstPtr& head_inst);
399
400    /**
401     * Calculate the computational delay between an instruction and a
402     * subsequent instruction that has an ROB (order) dependency on it
403     *
404     * @param past_record   Pointer to instruction
405     *
406     * @param new_record    Pointer to subsequent instruction having an ROB
407     *                      dependency on the instruction pointed to by
408     *                      past_record
409     */
410    void compDelayRob(TraceInfo* past_record, TraceInfo* new_record);
411
412    /**
413     * Calculate the computational delay between an instruction and a
414     * subsequent instruction that has a Physical Register (data) dependency on
415     * it.
416     *
417     * @param past_record   Pointer to instruction
418     *
419     * @param new_record    Pointer to subsequent instruction having a Physical
420     *                      Register dependency on the instruction pointed to
421     *                      by past_record
422     */
423    void compDelayPhysRegDep(TraceInfo* past_record, TraceInfo* new_record);
424
425    /**
426     * Write out given number of records to the trace starting with the first
427     * record in depTrace and iterating through the trace in sequence. A
428     * record is deleted after it is written.
429     *
430     * @param num_to_write Number of records to write to the trace
431     */
432    void writeDepTrace(uint32_t num_to_write);
433
434    /**
435     * Reverse iterate through the graph, search for a store-after-store or
436     * store-after-load dependency and update the new node's Rob dependency list.
437     *
438     * If a dependency is found, then call the assignRobDep() method that
439     * updates the store with the dependency information. This function is only
440     * called when a new store node is added to the trace.
441     *
442     * @param new_record    pointer to new store record
443     * @param find_load_not_store true for searching store-after-load and false
444     *                          for searching store-after-store dependency
445     */
446    void updateCommitOrderDep(TraceInfo* new_record, bool find_load_not_store);
447
448    /**
449     * Reverse iterate through the graph, search for an issue order dependency
450     * for a new node and update the new node's Rob dependency list.
451     *
452     * If a dependency is found, call the assignRobDep() method that updates
453     * the node with its dependency information. This function is called in
454     * case a new node to be added to the trace is dependency-free or its
455     * dependency got discarded because the dependency was outside the window.
456     *
457     * @param new_record    pointer to new record to be added to the trace
458     */
459    void updateIssueOrderDep(TraceInfo* new_record);
460
461    /**
462     * The new_record has an order dependency on a past_record, thus update the
463     * new record's Rob dependency list and increment the number of dependents
464     * of the past record.
465     *
466     * @param new_record    pointer to new record
467     * @param past_record   pointer to record that new_record has a rob
468     *                      dependency on
469     */
470    void assignRobDep(TraceInfo* past_record, TraceInfo* new_record);
471
472    /**
473     * Check if past record is a store sent earlier than the execute tick.
474     *
475     * @param past_record   pointer to past store
476     * @param execute_tick  tick with which to compare past store's commit tick
477     *
478     * @return true if past record is store sent earlier
479     */
480    bool hasStoreCommitted(TraceInfo* past_record, Tick execute_tick) const;
481
482    /**
483     * Check if past record is a load that completed earlier than the execute
484     * tick.
485     *
486     * @param past_record   pointer to past load
487     * @param execute_tick  tick with which to compare past load's complete
488     *                      tick
489     *
490     * @return true if past record is load completed earlier
491     */
492    bool hasLoadCompleted(TraceInfo* past_record, Tick execute_tick) const;
493
494    /**
495     * Check if past record is a load sent earlier than the execute tick.
496     *
497     * @param past_record   pointer to past load
498     * @param execute_tick  tick with which to compare past load's send tick
499     *
500     * @return true if past record is load sent earlier
501     */
502    bool hasLoadBeenSent(TraceInfo* past_record, Tick execute_tick) const;
503
504    /**
505     * Check if past record is a comp node that completed earlier than the
506     * execute tick.
507     *
508     * @param past_record   pointer to past comp node
509     * @param execute_tick  tick with which to compare past comp node's
510     *                      completion tick
511     *
512     * @return true if past record is comp completed earlier
513     */
514    bool hasCompCompleted(TraceInfo* past_record, Tick execute_tick) const;
515
516    /** Number of register dependencies recorded during tracing */
517    Stats::Scalar numRegDep;
518
519    /**
520     * Number of stores that got assigned a commit order dependency
521     * on a past load/store.
522     */
523    Stats::Scalar numOrderDepStores;
524
525    /**
526     * Number of load insts that got assigned an issue order dependency
527     * because they were dependency-free.
528     */
529    Stats::Scalar numIssueOrderDepLoads;
530
531    /**
532     * Number of store insts that got assigned an issue order dependency
533     * because they were dependency-free.
534     */
535    Stats::Scalar numIssueOrderDepStores;
536
537    /**
538     * Number of non load/store insts that got assigned an issue order
539     * dependency because they were dependency-free.
540     */
541    Stats::Scalar numIssueOrderDepOther;
542
543    /** Number of filtered nodes */
544    Stats::Scalar numFilteredNodes;
545
546    /** Maximum number of dependents on any instruction */
547    Stats::Scalar maxNumDependents;
548
549    /**
550     * Maximum size of the temporary store mostly useful as a check that it is
551     * not growing
552     */
553    Stats::Scalar maxTempStoreSize;
554
555    /**
556     * Maximum size of the map that holds the last writer to a physical
557     * register.
558     * */
559    Stats::Scalar maxPhysRegDepMapSize;
560
561};
562#endif//__CPU_O3_PROBE_ELASTIC_TRACE_HH__
563