trace_cpu.hh revision 11252:18bb597fc40c
13096SN/A/*
23096SN/A * Copyright (c) 2013 - 2015 ARM Limited
35520SN/A * All rights reserved
48844SAli.Saidi@ARM.com *
58844SAli.Saidi@ARM.com * The license below extends only to copyright in the software and shall
68428SN/A * not be construed as granting a license to any other intellectual
78983Snate@binkert.org * property including but not limited to intellectual property relating
88983Snate@binkert.org * to a hardware implementation of the functionality of the software
98983Snate@binkert.org * licensed hereunder.  You may use the software subject to the license
108983Snate@binkert.org * terms below provided that you ensure that this notice is replicated
118983Snate@binkert.org * unmodified and in its entirety in all distributions of the software,
128428SN/A * modified or unmodified, in source code or in binary form.
138835SAli.Saidi@ARM.com *
148844SAli.Saidi@ARM.com * Redistribution and use in source and binary forms, with or without
158844SAli.Saidi@ARM.com * modification, are permitted provided that the following conditions are
168721SN/A * met: redistributions of source code must retain the above copyright
178844SAli.Saidi@ARM.com * notice, this list of conditions and the following disclaimer;
188721SN/A * redistributions in binary form must reproduce the above copyright
198721SN/A * notice, this list of conditions and the following disclaimer in the
208844SAli.Saidi@ARM.com * documentation and/or other materials provided with the distribution;
218844SAli.Saidi@ARM.com * neither the name of the copyright holders nor the names of its
228844SAli.Saidi@ARM.com * contributors may be used to endorse or promote products derived from
238428SN/A * this software without specific prior written permission.
248428SN/A *
258428SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
268428SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
278844SAli.Saidi@ARM.com * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
288844SAli.Saidi@ARM.com * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
298428SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
308844SAli.Saidi@ARM.com * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
318844SAli.Saidi@ARM.com * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
328844SAli.Saidi@ARM.com * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
338428SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
348844SAli.Saidi@ARM.com * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
358844SAli.Saidi@ARM.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
368844SAli.Saidi@ARM.com *
378428SN/A * Authors: Radhika Jagtap
388844SAli.Saidi@ARM.com *          Andreas Hansson
398844SAli.Saidi@ARM.com *          Thomas Grass
408844SAli.Saidi@ARM.com */
418428SN/A
428844SAli.Saidi@ARM.com#ifndef __CPU_TRACE_TRACE_CPU_HH__
438428SN/A#define __CPU_TRACE_TRACE_CPU_HH__
448428SN/A
458428SN/A#include <array>
468428SN/A#include <cstdint>
478428SN/A#include <queue>
488428SN/A#include <set>
498428SN/A#include <unordered_map>
508428SN/A
518428SN/A#include "arch/registers.hh"
528428SN/A#include "base/statistics.hh"
538428SN/A#include "cpu/base.hh"
548428SN/A#include "debug/TraceCPUData.hh"
558428SN/A#include "debug/TraceCPUInst.hh"
568844SAli.Saidi@ARM.com#include "params/TraceCPU.hh"
578428SN/A#include "proto/inst_dep_record.pb.h"
588428SN/A#include "proto/packet.pb.h"
598844SAli.Saidi@ARM.com#include "proto/protoio.hh"
608844SAli.Saidi@ARM.com#include "sim/sim_events.hh"
618844SAli.Saidi@ARM.com
628844SAli.Saidi@ARM.com/**
638844SAli.Saidi@ARM.com * The trace cpu replays traces generated using the elastic trace probe
648428SN/A * attached to the O3 CPU model. The elastic trace is an execution trace with
658844SAli.Saidi@ARM.com * register data dependencies and ordering dependencies annotated to it. The
668844SAli.Saidi@ARM.com * trace cpu also replays a fixed timestamp fetch trace that is also generated
678844SAli.Saidi@ARM.com * by the elastic trace probe. This trace cpu model aims at achieving faster
688844SAli.Saidi@ARM.com * simulation compared to the detailed cpu model and good correlation when the
698844SAli.Saidi@ARM.com * same trace is used for playback on different memory sub-systems.
708844SAli.Saidi@ARM.com *
718844SAli.Saidi@ARM.com * The TraceCPU inherits from BaseCPU so some virtual methods need to be
728844SAli.Saidi@ARM.com * defined. It has two port subclasses inherited from MasterPort for
738844SAli.Saidi@ARM.com * instruction and data ports. It issues the memory requests deducing the
748844SAli.Saidi@ARM.com * timing from the trace and without performing real execution of micro-ops. As
758844SAli.Saidi@ARM.com * soon as the last dependency for an instruction is complete, its
768844SAli.Saidi@ARM.com * computational delay, also provided in the input trace is added. The
778844SAli.Saidi@ARM.com * dependency-free nodes are maintained in a list, called 'ReadyList', ordered
788844SAli.Saidi@ARM.com * by ready time. Instructions which depend on load stall until the responses
798844SAli.Saidi@ARM.com * for read requests are received thus achieving elastic replay. If the
808844SAli.Saidi@ARM.com * dependency is not found when adding a new node, it is assumed complete.
816291SN/A * Thus, if this node is found to be completely dependency-free its issue time
828844SAli.Saidi@ARM.com * is calculated and it is added to the ready list immediately. This is
838844SAli.Saidi@ARM.com * encapsulated in the subclass ElasticDataGen.
848844SAli.Saidi@ARM.com *
858844SAli.Saidi@ARM.com * If ready nodes are issued in an unconstrained way there can be more nodes
868844SAli.Saidi@ARM.com * outstanding which results in divergence in timing compared to the O3CPU.
878844SAli.Saidi@ARM.com * Therefore, the Trace CPU also models hardware resources. A sub-class to
888844SAli.Saidi@ARM.com * model hardware resources contains the maximum sizes of load buffer, store
898844SAli.Saidi@ARM.com * buffer and ROB. If resources are not available, the node is not issued. Such
908844SAli.Saidi@ARM.com * nodes that are pending issue are held in the 'depFreeQueue' structure.
916291SN/A *
926291SN/A * Modeling the ROB size in the Trace CPU as a resource limitation is arguably
936291SN/A * the most important parameter of all resources. The ROB occupancy is
948844SAli.Saidi@ARM.com * estimated using the newly added field 'robNum'. We need to use ROB number as
958844SAli.Saidi@ARM.com * sequence number is at times much higher due to squashing and trace replay is
968844SAli.Saidi@ARM.com * focused on correct path modeling.
978844SAli.Saidi@ARM.com *
988844SAli.Saidi@ARM.com * A map called 'inFlightNodes' is added to track nodes that are not only in
998844SAli.Saidi@ARM.com * the readyList but also load nodes that are executed (and thus removed from
1008844SAli.Saidi@ARM.com * readyList) but are not complete. ReadyList handles what and when to execute
1018844SAli.Saidi@ARM.com * next node while the inFlightNodes is used for resource modelling. The oldest
1028844SAli.Saidi@ARM.com * ROB number is updated when any node occupies the ROB or when an entry in the
1038844SAli.Saidi@ARM.com * ROB is released. The ROB occupancy is equal to the difference in the ROB
1048844SAli.Saidi@ARM.com * number of the newly dependency-free node and the oldest ROB number in
1058844SAli.Saidi@ARM.com * flight.
1068844SAli.Saidi@ARM.com *
1078844SAli.Saidi@ARM.com * If no node depends on a non load/store node then there is no reason to
1088844SAli.Saidi@ARM.com * track it in the dependency graph. We filter out such nodes but count them
1098844SAli.Saidi@ARM.com * and add a weight field to the subsequent node that we do include in the
1108844SAli.Saidi@ARM.com * trace. The weight field is used to model ROB occupancy during replay.
1118844SAli.Saidi@ARM.com *
1128844SAli.Saidi@ARM.com * The depFreeQueue is chosen to be FIFO so that child nodes which are in
1138844SAli.Saidi@ARM.com * program order get pushed into it in that order and thus issued in program
1148844SAli.Saidi@ARM.com * order, like in the O3CPU. This is also why the dependents is made a
1158844SAli.Saidi@ARM.com * sequential container, std::set to std::vector. We only check head of the
1168844SAli.Saidi@ARM.com * depFreeQueue as nodes are issued in order and blocking on head models that
1178844SAli.Saidi@ARM.com * better than looping the entire queue. An alternative choice would be to
1188428SN/A * inspect top N pending nodes where N is the issue-width. This is left for
1198428SN/A * future as the timing correlation looks good as it is.
1208844SAli.Saidi@ARM.com *
1218428SN/A * At the start of an execution event, first we attempt to issue such pending
1228428SN/A * nodes by checking if appropriate resources have become available. If yes, we
1238844SAli.Saidi@ARM.com * compute the execute tick with respect to the time then. Then we proceed to
1248844SAli.Saidi@ARM.com * complete nodes from the readyList.
1258844SAli.Saidi@ARM.com *
1268844SAli.Saidi@ARM.com * When a read response is received, sometimes a dependency on it that was
1278428SN/A * supposed to be released when it was issued is still not released. This
1288844SAli.Saidi@ARM.com * occurs because the dependent gets added to the graph after the read was
1298844SAli.Saidi@ARM.com * sent. So the check is made less strict and the dependency is marked complete
1308844SAli.Saidi@ARM.com * on read response instead of insisting that it should have been removed on
1318844SAli.Saidi@ARM.com * read sent.
1328844SAli.Saidi@ARM.com *
1338844SAli.Saidi@ARM.com * There is a check for requests spanning two cache lines as this condition
1348844SAli.Saidi@ARM.com * triggers an assert fail in the L1 cache. If it does then truncate the size
1358844SAli.Saidi@ARM.com * to access only until the end of that line and ignore the remainder.
1368844SAli.Saidi@ARM.com * Strictly-ordered requests are skipped and the dependencies on such requests
1378844SAli.Saidi@ARM.com * are handled by simply marking them complete immediately.
1388428SN/A *
1398844SAli.Saidi@ARM.com * The simulated seconds can be calculated as the difference between the
1408844SAli.Saidi@ARM.com * final_tick stat and the tickOffset stat. A CountedExitEvent that contains a
1418844SAli.Saidi@ARM.com * static int belonging to the Trace CPU class as a down counter is used to
1428844SAli.Saidi@ARM.com * implement multi Trace CPU simulation exit.
1438844SAli.Saidi@ARM.com */
1448844SAli.Saidi@ARM.com
1458844SAli.Saidi@ARM.comclass TraceCPU : public BaseCPU
1468844SAli.Saidi@ARM.com{
1478844SAli.Saidi@ARM.com
1488428SN/A  public:
1498428SN/A    TraceCPU(TraceCPUParams *params);
1508428SN/A    ~TraceCPU();
1518844SAli.Saidi@ARM.com
1528428SN/A    void init();
1538844SAli.Saidi@ARM.com
1548844SAli.Saidi@ARM.com    /**
1558844SAli.Saidi@ARM.com     * This is a pure virtual function in BaseCPU. As we don't know how many
1568844SAli.Saidi@ARM.com     * insts are in the trace but only know how how many micro-ops are we
1578844SAli.Saidi@ARM.com     * cannot count this stat.
1588844SAli.Saidi@ARM.com     *
1598844SAli.Saidi@ARM.com     * @return 0
1608844SAli.Saidi@ARM.com     */
1618844SAli.Saidi@ARM.com    Counter totalInsts() const
1628844SAli.Saidi@ARM.com    {
1638844SAli.Saidi@ARM.com        return 0;
1648844SAli.Saidi@ARM.com    }
1658844SAli.Saidi@ARM.com
1668844SAli.Saidi@ARM.com    /**
1678844SAli.Saidi@ARM.com     * Return totalOps as the number of committed micro-ops plus the
1688844SAli.Saidi@ARM.com     * speculatively issued loads that are modelled in the TraceCPU replay.
1698844SAli.Saidi@ARM.com     *
1708844SAli.Saidi@ARM.com     * @return number of micro-ops i.e. nodes in the elastic data generator
1718844SAli.Saidi@ARM.com     */
1728844SAli.Saidi@ARM.com    Counter totalOps() const
1738844SAli.Saidi@ARM.com    {
1748844SAli.Saidi@ARM.com        return dcacheGen.getMicroOpCount();
1758844SAli.Saidi@ARM.com    }
1768844SAli.Saidi@ARM.com
1778844SAli.Saidi@ARM.com    /* Pure virtual function in BaseCPU. Do nothing. */
1788844SAli.Saidi@ARM.com    void wakeup(ThreadID tid = 0)
1798844SAli.Saidi@ARM.com    {
1808844SAli.Saidi@ARM.com        return;
1818844SAli.Saidi@ARM.com    }
1828844SAli.Saidi@ARM.com
1838844SAli.Saidi@ARM.com    /*
1848428SN/A     * When resuming from checkpoint in FS mode, the TraceCPU takes over from
1858428SN/A     * the old cpu. This function overrides the takeOverFrom() function in the
1868241SN/A     * BaseCPU. It unbinds the ports of the old CPU and binds the ports of the
1878844SAli.Saidi@ARM.com     * TraceCPU.
1888844SAli.Saidi@ARM.com     */
1898844SAli.Saidi@ARM.com    void takeOverFrom(BaseCPU *oldCPU);
1908844SAli.Saidi@ARM.com
1918844SAli.Saidi@ARM.com    /**
1928844SAli.Saidi@ARM.com     * When instruction cache port receives a retry, schedule event
1938844SAli.Saidi@ARM.com     * icacheNextEvent.
1948844SAli.Saidi@ARM.com     */
1958844SAli.Saidi@ARM.com    void icacheRetryRecvd();
1968844SAli.Saidi@ARM.com
1978844SAli.Saidi@ARM.com    /**
1988844SAli.Saidi@ARM.com     * When data cache port receives a retry, schedule event
1998844SAli.Saidi@ARM.com     * dcacheNextEvent.
2008844SAli.Saidi@ARM.com     */
2018844SAli.Saidi@ARM.com    void dcacheRetryRecvd();
2028844SAli.Saidi@ARM.com
2038844SAli.Saidi@ARM.com    /**
2048844SAli.Saidi@ARM.com     * When data cache port receives a response, this calls the dcache
2058844SAli.Saidi@ARM.com     * generator method handle to complete the load writeback.
2068844SAli.Saidi@ARM.com     *
2078844SAli.Saidi@ARM.com     * @param pkt Pointer to packet received
2088844SAli.Saidi@ARM.com     */
2098844SAli.Saidi@ARM.com    void dcacheRecvTimingResp(PacketPtr pkt);
2108844SAli.Saidi@ARM.com
2118844SAli.Saidi@ARM.com    /**
2128844SAli.Saidi@ARM.com     * Schedule event dcacheNextEvent at the given tick
2138844SAli.Saidi@ARM.com     *
2148844SAli.Saidi@ARM.com     * @param when Tick at which to schedule event
2158844SAli.Saidi@ARM.com     */
2168844SAli.Saidi@ARM.com    void schedDcacheNextEvent(Tick when);
2178844SAli.Saidi@ARM.com
2188241SN/A  protected:
2198241SN/A
2208844SAli.Saidi@ARM.com    /**
2218844SAli.Saidi@ARM.com     * IcachePort class that interfaces with L1 Instruction Cache.
2228844SAli.Saidi@ARM.com     */
2238844SAli.Saidi@ARM.com    class IcachePort : public MasterPort
2248844SAli.Saidi@ARM.com    {
2258844SAli.Saidi@ARM.com      public:
2268844SAli.Saidi@ARM.com        /** Default constructor. */
2278428SN/A        IcachePort(TraceCPU* _cpu)
2288428SN/A            : MasterPort(_cpu->name() + ".icache_port", _cpu),
2298428SN/A                         owner(_cpu)
2308844SAli.Saidi@ARM.com        { }
2318428SN/A
2328844SAli.Saidi@ARM.com      public:
2338428SN/A        /**
2348844SAli.Saidi@ARM.com         * Receive the timing reponse and simply delete the packet since
2358464SN/A         * instruction fetch requests are issued as per the timing in the trace
2368844SAli.Saidi@ARM.com         * and responses are ignored.
2378844SAli.Saidi@ARM.com         *
2388428SN/A         * @param pkt Pointer to packet received
2398428SN/A         * @return true
2408428SN/A         */
2418428SN/A        bool recvTimingResp(PacketPtr pkt);
2428428SN/A
2438844SAli.Saidi@ARM.com        /**
2448844SAli.Saidi@ARM.com         * Required functionally but do nothing.
2458844SAli.Saidi@ARM.com         *
2468844SAli.Saidi@ARM.com         * @param pkt Pointer to packet received
2478844SAli.Saidi@ARM.com         */
2488844SAli.Saidi@ARM.com        void recvTimingSnoopReq(PacketPtr pkt) { }
2498844SAli.Saidi@ARM.com
2508844SAli.Saidi@ARM.com        /**
2518844SAli.Saidi@ARM.com         * Handle a retry signalled by the cache if instruction read failed in
2528428SN/A         * the first attempt.
2538844SAli.Saidi@ARM.com         */
2548844SAli.Saidi@ARM.com        void recvReqRetry();
2558844SAli.Saidi@ARM.com
2568844SAli.Saidi@ARM.com      private:
2578844SAli.Saidi@ARM.com        TraceCPU* owner;
2588844SAli.Saidi@ARM.com    };
2598844SAli.Saidi@ARM.com
2608428SN/A    /**
2618844SAli.Saidi@ARM.com     * DcachePort class that interfaces with L1 Data Cache.
2628844SAli.Saidi@ARM.com     */
2638844SAli.Saidi@ARM.com    class DcachePort : public MasterPort
2648844SAli.Saidi@ARM.com    {
2658844SAli.Saidi@ARM.com
2668844SAli.Saidi@ARM.com      public:
2678844SAli.Saidi@ARM.com        /** Default constructor. */
2688844SAli.Saidi@ARM.com        DcachePort(TraceCPU* _cpu)
2698844SAli.Saidi@ARM.com            : MasterPort(_cpu->name() + ".dcache_port", _cpu),
2708428SN/A                         owner(_cpu)
2718844SAli.Saidi@ARM.com        { }
2728844SAli.Saidi@ARM.com
2738428SN/A      public:
2748428SN/A
2758835SAli.Saidi@ARM.com        /**
2768844SAli.Saidi@ARM.com         * Receive the timing reponse and call dcacheRecvTimingResp() method
2778428SN/A         * of the dcacheGen to handle completing the load
2788844SAli.Saidi@ARM.com         *
2798844SAli.Saidi@ARM.com         * @param pkt Pointer to packet received
2808844SAli.Saidi@ARM.com         * @return true
2818844SAli.Saidi@ARM.com         */
2828428SN/A        bool recvTimingResp(PacketPtr pkt);
2838844SAli.Saidi@ARM.com
2848844SAli.Saidi@ARM.com        /**
2858844SAli.Saidi@ARM.com         * Required functionally but do nothing.
2868844SAli.Saidi@ARM.com         *
2878844SAli.Saidi@ARM.com         * @param pkt Pointer to packet received
2888844SAli.Saidi@ARM.com         */
2898844SAli.Saidi@ARM.com        void recvTimingSnoopReq(PacketPtr pkt)
2908844SAli.Saidi@ARM.com        { }
2918844SAli.Saidi@ARM.com
2928428SN/A        /**
2938428SN/A         * Required functionally but do nothing.
2948428SN/A         *
2958844SAli.Saidi@ARM.com         * @param pkt Pointer to packet received
2968835SAli.Saidi@ARM.com         */
2978835SAli.Saidi@ARM.com        void recvFunctionalSnoop(PacketPtr pkt)
2988428SN/A        { }
2998428SN/A
3008428SN/A        /**
3018428SN/A         * Handle a retry signalled by the cache if data access failed in the
3028428SN/A         * first attempt.
3038428SN/A         */
3048428SN/A        void recvReqRetry();
3058428SN/A
3068844SAli.Saidi@ARM.com        /**
3078428SN/A         * Required functionally.
3088844SAli.Saidi@ARM.com         *
3098844SAli.Saidi@ARM.com         * @return true since we have to snoop
3108844SAli.Saidi@ARM.com         */
3118844SAli.Saidi@ARM.com        bool isSnooping() const { return true; }
3128428SN/A
3138835SAli.Saidi@ARM.com      private:
3148428SN/A        TraceCPU* owner;
3158844SAli.Saidi@ARM.com    };
3168844SAli.Saidi@ARM.com
3178844SAli.Saidi@ARM.com    /** Port to connect to L1 instruction cache. */
3188844SAli.Saidi@ARM.com    IcachePort icachePort;
3198844SAli.Saidi@ARM.com
3208844SAli.Saidi@ARM.com    /** Port to connect to L1 data cache. */
3218428SN/A    DcachePort dcachePort;
3228428SN/A
3238428SN/A    /** Master id for instruction read requests. */
3248428SN/A    const MasterID instMasterID;
3258428SN/A
3268844SAli.Saidi@ARM.com    /** Master id for data read and write requests. */
3278844SAli.Saidi@ARM.com    const MasterID dataMasterID;
3288844SAli.Saidi@ARM.com
3298844SAli.Saidi@ARM.com    /** File names for input instruction and data traces. */
3308428SN/A    std::string instTraceFile, dataTraceFile;
3318844SAli.Saidi@ARM.com
3328844SAli.Saidi@ARM.com    /**
3338844SAli.Saidi@ARM.com     * Generator to read protobuf trace containing memory requests at fixed
3348844SAli.Saidi@ARM.com     * timestamps, perform flow control and issue memory requests. If L1 cache
3358844SAli.Saidi@ARM.com     * port sends packet succesfully, determine the tick to send the next
3368844SAli.Saidi@ARM.com     * packet else wait for retry from cache.
3378844SAli.Saidi@ARM.com     */
3388844SAli.Saidi@ARM.com    class FixedRetryGen
3398844SAli.Saidi@ARM.com    {
3408844SAli.Saidi@ARM.com
3418844SAli.Saidi@ARM.com      private:
3428844SAli.Saidi@ARM.com
3438844SAli.Saidi@ARM.com        /**
3448844SAli.Saidi@ARM.com         * This struct stores a line in the trace file.
3458844SAli.Saidi@ARM.com         */
3468844SAli.Saidi@ARM.com        struct TraceElement {
3478844SAli.Saidi@ARM.com
3488844SAli.Saidi@ARM.com            /** Specifies if the request is to be a read or a write */
3498844SAli.Saidi@ARM.com            MemCmd cmd;
3508844SAli.Saidi@ARM.com
3518844SAli.Saidi@ARM.com            /** The address for the request */
3528844SAli.Saidi@ARM.com            Addr addr;
3538844SAli.Saidi@ARM.com
3548844SAli.Saidi@ARM.com            /** The size of the access for the request */
3558844SAli.Saidi@ARM.com            Addr blocksize;
3568844SAli.Saidi@ARM.com
3578844SAli.Saidi@ARM.com            /** The time at which the request should be sent */
3588844SAli.Saidi@ARM.com            Tick tick;
3598844SAli.Saidi@ARM.com
3608844SAli.Saidi@ARM.com            /** Potential request flags to use */
3618844SAli.Saidi@ARM.com            Request::FlagsType flags;
3628844SAli.Saidi@ARM.com
3638844SAli.Saidi@ARM.com            /** Instruction PC */
3648428SN/A            Addr pc;
3658428SN/A
3668428SN/A            /**
3678428SN/A             * Check validity of this element.
3688983Snate@binkert.org             *
3698983Snate@binkert.org             * @return if this element is valid
3708428SN/A             */
3718428SN/A            bool isValid() const {
3728844SAli.Saidi@ARM.com                return cmd != MemCmd::InvalidCmd;
3738844SAli.Saidi@ARM.com            }
3748844SAli.Saidi@ARM.com
3758844SAli.Saidi@ARM.com            /**
3768844SAli.Saidi@ARM.com             * Make this element invalid.
3778844SAli.Saidi@ARM.com             */
3788844SAli.Saidi@ARM.com            void clear() {
3798844SAli.Saidi@ARM.com                cmd = MemCmd::InvalidCmd;
3808844SAli.Saidi@ARM.com            }
3818844SAli.Saidi@ARM.com        };
3828844SAli.Saidi@ARM.com
3838844SAli.Saidi@ARM.com        /**
3848844SAli.Saidi@ARM.com         * The InputStream encapsulates a trace file and the
3858844SAli.Saidi@ARM.com         * internal buffers and populates TraceElements based on
3868844SAli.Saidi@ARM.com         * the input.
3878844SAli.Saidi@ARM.com         */
3888844SAli.Saidi@ARM.com        class InputStream
3898844SAli.Saidi@ARM.com        {
3908844SAli.Saidi@ARM.com
3918844SAli.Saidi@ARM.com          private:
3928844SAli.Saidi@ARM.com
3938844SAli.Saidi@ARM.com            // Input file stream for the protobuf trace
3948844SAli.Saidi@ARM.com            ProtoInputStream trace;
3958844SAli.Saidi@ARM.com
3968428SN/A          public:
3978428SN/A
3988844SAli.Saidi@ARM.com            /**
3998844SAli.Saidi@ARM.com             * Create a trace input stream for a given file name.
4008844SAli.Saidi@ARM.com             *
4018844SAli.Saidi@ARM.com             * @param filename Path to the file to read from
4028428SN/A             */
4038844SAli.Saidi@ARM.com            InputStream(const std::string& filename);
4048844SAli.Saidi@ARM.com
4058844SAli.Saidi@ARM.com            /**
4068844SAli.Saidi@ARM.com             * Reset the stream such that it can be played once
4078844SAli.Saidi@ARM.com             * again.
4088835SAli.Saidi@ARM.com             */
4098835SAli.Saidi@ARM.com            void reset();
4108844SAli.Saidi@ARM.com
4118844SAli.Saidi@ARM.com            /**
4128844SAli.Saidi@ARM.com             * Attempt to read a trace element from the stream,
4138844SAli.Saidi@ARM.com             * and also notify the caller if the end of the file
4148844SAli.Saidi@ARM.com             * was reached.
4158844SAli.Saidi@ARM.com             *
4168835SAli.Saidi@ARM.com             * @param element Trace element to populate
4178835SAli.Saidi@ARM.com             * @return True if an element could be read successfully
4188844SAli.Saidi@ARM.com             */
4198844SAli.Saidi@ARM.com            bool read(TraceElement* element);
4208844SAli.Saidi@ARM.com        };
4218844SAli.Saidi@ARM.com
4228844SAli.Saidi@ARM.com        public:
4238844SAli.Saidi@ARM.com        /* Constructor */
4248844SAli.Saidi@ARM.com        FixedRetryGen(TraceCPU& _owner, const std::string& _name,
4258844SAli.Saidi@ARM.com                   MasterPort& _port, MasterID master_id,
4268844SAli.Saidi@ARM.com                   const std::string& trace_file)
4278844SAli.Saidi@ARM.com            : owner(_owner),
4288844SAli.Saidi@ARM.com              port(_port),
4298844SAli.Saidi@ARM.com              masterID(master_id),
4308844SAli.Saidi@ARM.com              trace(trace_file),
4318844SAli.Saidi@ARM.com              genName(owner.name() + ".fixedretry" + _name),
4328835SAli.Saidi@ARM.com              retryPkt(nullptr),
4338835SAli.Saidi@ARM.com              delta(0),
4348844SAli.Saidi@ARM.com              traceComplete(false)
4358844SAli.Saidi@ARM.com        {
4368844SAli.Saidi@ARM.com        }
4378844SAli.Saidi@ARM.com
4388844SAli.Saidi@ARM.com        /**
4398835SAli.Saidi@ARM.com         * Called from TraceCPU init(). Reads the first message from the
4408844SAli.Saidi@ARM.com         * input trace file and returns the send tick.
4418844SAli.Saidi@ARM.com         *
4428844SAli.Saidi@ARM.com         * @return Tick when first packet must be sent
4438844SAli.Saidi@ARM.com         */
4448844SAli.Saidi@ARM.com        Tick init();
4458844SAli.Saidi@ARM.com
4468428SN/A        /**
4478428SN/A         * This tries to send current or retry packet and returns true if
4488428SN/A         * successfull. It calls nextExecute() to read next message.
4498428SN/A         *
4508983Snate@binkert.org         * @return bool true if packet is sent successfully
4518983Snate@binkert.org         */
4528428SN/A        bool tryNext();
4538428SN/A
4548844SAli.Saidi@ARM.com        /** Returns name of the FixedRetryGen instance. */
4558844SAli.Saidi@ARM.com        const std::string& name() const { return genName; }
4568844SAli.Saidi@ARM.com
4578844SAli.Saidi@ARM.com        /**
4588844SAli.Saidi@ARM.com         * Creates a new request assigning the request parameters passed by the
4598844SAli.Saidi@ARM.com         * arguments. Calls the port's sendTimingReq() and returns true if
4608844SAli.Saidi@ARM.com         * the packet was sent succesfully. It is called by tryNext()
4618844SAli.Saidi@ARM.com         *
4628844SAli.Saidi@ARM.com         * @param addr address of request
4638844SAli.Saidi@ARM.com         * @param size size of request
4648844SAli.Saidi@ARM.com         * @param cmd if it is a read or write request
4658844SAli.Saidi@ARM.com         * @param flags associated request flags
4668844SAli.Saidi@ARM.com         * @param pc instruction PC that generated the request
4678844SAli.Saidi@ARM.com         *
4688844SAli.Saidi@ARM.com         * @return true if packet was sent successfully
4698844SAli.Saidi@ARM.com         */
4708844SAli.Saidi@ARM.com        bool send(Addr addr, unsigned size, const MemCmd& cmd,
4718844SAli.Saidi@ARM.com              Request::FlagsType flags, Addr pc);
4728844SAli.Saidi@ARM.com
4738844SAli.Saidi@ARM.com        /** Exit the FixedRetryGen. */
4748844SAli.Saidi@ARM.com        void exit();
4758844SAli.Saidi@ARM.com
4768844SAli.Saidi@ARM.com        /**
4778844SAli.Saidi@ARM.com         * Reads a line of the trace file. Returns the tick
4788844SAli.Saidi@ARM.com         * when the next request should be generated. If the end
4798844SAli.Saidi@ARM.com         * of the file has been reached, it returns false.
4808844SAli.Saidi@ARM.com         *
4818844SAli.Saidi@ARM.com         * @return bool false id end of file has been reached
4828844SAli.Saidi@ARM.com         */
4838844SAli.Saidi@ARM.com        bool nextExecute();
4848844SAli.Saidi@ARM.com
4858844SAli.Saidi@ARM.com        /**
4868428SN/A         * Returns the traceComplete variable which is set when end of the
4878428SN/A         * input trace file is reached.
4888844SAli.Saidi@ARM.com         *
4898428SN/A         * @return bool true if traceComplete is set, false otherwise.
4908844SAli.Saidi@ARM.com         */
4918844SAli.Saidi@ARM.com        bool isTraceComplete() { return traceComplete; }
4928428SN/A
4938844SAli.Saidi@ARM.com        int64_t tickDelta() { return delta; }
4948844SAli.Saidi@ARM.com
4958844SAli.Saidi@ARM.com        void regStats();
4968844SAli.Saidi@ARM.com
4978844SAli.Saidi@ARM.com      private:
4988835SAli.Saidi@ARM.com
4998835SAli.Saidi@ARM.com        /** Reference of the TraceCPU. */
5008835SAli.Saidi@ARM.com        TraceCPU& owner;
5018835SAli.Saidi@ARM.com
5028835SAli.Saidi@ARM.com        /** Reference of the port to be used to issue memory requests. */
5038835SAli.Saidi@ARM.com        MasterPort& port;
5048844SAli.Saidi@ARM.com
5058844SAli.Saidi@ARM.com        /** MasterID used for the requests being sent. */
5068844SAli.Saidi@ARM.com        const MasterID masterID;
5078844SAli.Saidi@ARM.com
5088844SAli.Saidi@ARM.com        /** Input stream used for reading the input trace file. */
5098844SAli.Saidi@ARM.com        InputStream trace;
5108844SAli.Saidi@ARM.com
5118844SAli.Saidi@ARM.com        /** String to store the name of the FixedRetryGen. */
5128844SAli.Saidi@ARM.com        std::string genName;
5138844SAli.Saidi@ARM.com
5148844SAli.Saidi@ARM.com        /** PacketPtr used to store the packet to retry. */
5158844SAli.Saidi@ARM.com        PacketPtr retryPkt;
5168844SAli.Saidi@ARM.com
5178844SAli.Saidi@ARM.com        /**
5188844SAli.Saidi@ARM.com         * Stores the difference in the send ticks of the current and last
5198844SAli.Saidi@ARM.com         * packets. Keeping this signed to check overflow to a negative value
5208844SAli.Saidi@ARM.com         * which will be caught by assert(delta > 0)
5218844SAli.Saidi@ARM.com         */
5228844SAli.Saidi@ARM.com        int64_t delta;
5238844SAli.Saidi@ARM.com
5248844SAli.Saidi@ARM.com        /**
5258844SAli.Saidi@ARM.com         * Set to true when end of trace is reached.
5268844SAli.Saidi@ARM.com         */
5278844SAli.Saidi@ARM.com        bool traceComplete;
5288844SAli.Saidi@ARM.com
5298844SAli.Saidi@ARM.com        /** Store an element read from the trace to send as the next packet. */
5308844SAli.Saidi@ARM.com        TraceElement currElement;
5318844SAli.Saidi@ARM.com
5328844SAli.Saidi@ARM.com        /** Stats for instruction accesses replayed. */
5338844SAli.Saidi@ARM.com        Stats::Scalar numSendAttempted;
5348844SAli.Saidi@ARM.com        Stats::Scalar numSendSucceeded;
5358844SAli.Saidi@ARM.com        Stats::Scalar numSendFailed;
5368844SAli.Saidi@ARM.com        Stats::Scalar numRetrySucceeded;
5378844SAli.Saidi@ARM.com        /** Last simulated tick by the FixedRetryGen */
5388835SAli.Saidi@ARM.com        Stats::Scalar instLastTick;
5398835SAli.Saidi@ARM.com
5408844SAli.Saidi@ARM.com    };
5418835SAli.Saidi@ARM.com
5428844SAli.Saidi@ARM.com    /**
5438835SAli.Saidi@ARM.com     * The elastic data memory request generator to read protobuf trace
5448844SAli.Saidi@ARM.com     * containing execution trace annotated with data and ordering
5458844SAli.Saidi@ARM.com     * dependencies. It deduces the time at which to send a load/store request
5468844SAli.Saidi@ARM.com     * by tracking the dependencies. It attempts to send a memory request for a
5478844SAli.Saidi@ARM.com     * load/store without performing real execution of micro-ops. If L1 cache
5488844SAli.Saidi@ARM.com     * port sends packet succesfully, the generator checks which instructions
5498844SAli.Saidi@ARM.com     * became dependency free as a result of this and schedules an event
5508844SAli.Saidi@ARM.com     * accordingly. If it fails to send the packet, it waits for a retry from
5518428SN/A     * the cache.
5528428SN/A     */
5538428SN/A    class ElasticDataGen
5548428SN/A    {
5558983Snate@binkert.org
5568983Snate@binkert.org      private:
5578428SN/A
5588428SN/A        /** Node sequence number type. */
5598844SAli.Saidi@ARM.com        typedef uint64_t NodeSeqNum;
5608844SAli.Saidi@ARM.com
5618844SAli.Saidi@ARM.com        /** Node ROB number type. */
5628844SAli.Saidi@ARM.com        typedef uint64_t NodeRobNum;
5638844SAli.Saidi@ARM.com
5648844SAli.Saidi@ARM.com        typedef ProtoMessage::InstDepRecord::RecordType RecordType;
5658844SAli.Saidi@ARM.com        typedef ProtoMessage::InstDepRecord Record;
5668844SAli.Saidi@ARM.com
5678844SAli.Saidi@ARM.com        /**
5688844SAli.Saidi@ARM.com         * The struct GraphNode stores an instruction in the trace file. The
5698844SAli.Saidi@ARM.com         * format of the trace file favours constructing a dependency graph of
5708844SAli.Saidi@ARM.com         * the execution and this struct is used to encapsulate the request
5718844SAli.Saidi@ARM.com         * data as well as pointers to its dependent GraphNodes.
5728844SAli.Saidi@ARM.com         */
5738844SAli.Saidi@ARM.com        class GraphNode {
5748844SAli.Saidi@ARM.com
5758844SAli.Saidi@ARM.com          public:
5768844SAli.Saidi@ARM.com            /**
5778844SAli.Saidi@ARM.com             * The maximum no. of ROB dependencies. There can be at most 2
5788844SAli.Saidi@ARM.com             * order dependencies which could exist for a store. For a load
5798844SAli.Saidi@ARM.com             * and comp node there can be at most one order dependency.
5808844SAli.Saidi@ARM.com             */
5818844SAli.Saidi@ARM.com            static const uint8_t maxRobDep = 2;
5828835SAli.Saidi@ARM.com
5838835SAli.Saidi@ARM.com            /** Typedef for the array containing the ROB dependencies */
5848844SAli.Saidi@ARM.com            typedef std::array<NodeSeqNum, maxRobDep> RobDepArray;
5858835SAli.Saidi@ARM.com
5868844SAli.Saidi@ARM.com            /** Typedef for the array containing the register dependencies */
5878835SAli.Saidi@ARM.com            typedef std::array<NodeSeqNum, TheISA::MaxInstSrcRegs> RegDepArray;
5888844SAli.Saidi@ARM.com
5898844SAli.Saidi@ARM.com            /** Instruction sequence number */
5908844SAli.Saidi@ARM.com            NodeSeqNum seqNum;
5918844SAli.Saidi@ARM.com
5928844SAli.Saidi@ARM.com            /** ROB occupancy number */
5938844SAli.Saidi@ARM.com            NodeRobNum robNum;
5948844SAli.Saidi@ARM.com
5958428SN/A           /** Type of the node corresponding to the instruction modelled by it */
5963096SN/A            RecordType type;
5973096SN/A
598            /** The address for the request if any */
599            Addr addr;
600
601            /** Size of request if any */
602            uint32_t size;
603
604            /** Request flags if any */
605            Request::Flags flags;
606
607            /** Instruction PC */
608            Addr pc;
609
610            /** Array of order dependencies. */
611            RobDepArray robDep;
612
613            /** Number of order dependencies */
614            uint8_t numRobDep;
615
616            /** Computational delay */
617            uint64_t compDelay;
618
619            /**
620             * Array of register dependencies (incoming) if any. Maximum number
621             * of source registers used to set maximum size of the array
622             */
623            RegDepArray regDep;
624
625            /** Number of register dependencies */
626            uint8_t numRegDep;
627
628            /**
629             * A vector of nodes dependent (outgoing) on this node. A
630             * sequential container is chosen because when dependents become
631             * free, they attempt to issue in program order.
632             */
633            std::vector<GraphNode *> dependents;
634
635            /** Is the node a load */
636            bool isLoad() const { return (type == Record::LOAD); }
637
638            /** Is the node a store */
639            bool isStore() const { return (type == Record::STORE); }
640
641            /** Is the node a compute (non load/store) node */
642            bool isComp() const { return (type == Record::COMP); }
643
644            /** Initialize register dependency array to all zeroes */
645            void clearRegDep();
646
647            /** Initialize register dependency array to all zeroes */
648            void clearRobDep();
649
650            /** Remove completed instruction from register dependency array */
651            bool removeRegDep(NodeSeqNum reg_dep);
652
653            /** Remove completed instruction from order dependency array */
654            bool removeRobDep(NodeSeqNum rob_dep);
655
656            /** Check for all dependencies on completed inst */
657            bool removeDepOnInst(NodeSeqNum done_seq_num);
658
659            /** Return true if node has a request which is strictly ordered */
660            bool isStrictlyOrdered() const {
661                return (flags.isSet(Request::STRICT_ORDER));
662            }
663            /**
664             * Write out element in trace-compatible format using debug flag
665             * TraceCPUData.
666             */
667            void writeElementAsTrace() const;
668
669            /** Return string specifying the type of the node */
670            std::string typeToStr() const;
671        };
672
673        /** Struct to store a ready-to-execute node and its execution tick. */
674        struct ReadyNode
675        {
676            /** The sequence number of the ready node */
677            NodeSeqNum seqNum;
678
679            /** The tick at which the ready node must be executed */
680            Tick execTick;
681        };
682
683        /**
684         * The HardwareResource class models structures that hold the in-flight
685         * nodes. When a node becomes dependency free, first check if resources
686         * are available to issue it.
687         */
688        class HardwareResource
689        {
690          public:
691            /**
692             * Constructor that initializes the sizes of the structures.
693             *
694             * @param max_rob size of the Reorder Buffer
695             * @param max_stores size of Store Buffer
696             * @param max_loads size of Load Buffer
697             */
698            HardwareResource(uint16_t max_rob, uint16_t max_stores,
699                                uint16_t max_loads);
700
701            /**
702             * Occupy appropriate structures for an issued node.
703             *
704             * @param node_ptr pointer to the issued node
705             */
706            void occupy(const GraphNode* new_node);
707
708            /**
709             * Release appropriate structures for a completed node.
710             *
711             * @param node_ptr pointer to the completed node
712             */
713            void release(const GraphNode* done_node);
714
715            /** Release store buffer entry for a completed store */
716            void releaseStoreBuffer();
717
718            /**
719             * Check if structures required to issue a node are free.
720             *
721             * @param node_ptr pointer to the node ready to issue
722             * @return true if resources are available
723             */
724            bool isAvailable(const GraphNode* new_node) const;
725
726            /**
727             * Check if there are any outstanding requests, i.e. requests for
728             * which we are yet to receive a response.
729             *
730             * @return true if there is at least one read or write request
731             *      outstanding
732             */
733            bool awaitingResponse() const;
734
735            /** Print resource occupancy for debugging */
736            void printOccupancy();
737
738          private:
739            /**
740             * The size of the ROB used to throttle the max. number of in-flight
741             * nodes.
742             */
743            const uint16_t sizeROB;
744
745            /**
746             * The size of store buffer. This is used to throttle the max. number
747             * of in-flight stores.
748             */
749            const uint16_t sizeStoreBuffer;
750
751            /**
752             * The size of load buffer. This is used to throttle the max. number
753             * of in-flight loads.
754             */
755            const uint16_t sizeLoadBuffer;
756
757            /**
758             * A map from the sequence number to the ROB number of the in-
759             * flight nodes. This includes all nodes that are in the readyList
760             * plus the loads for which a request has been sent which are not
761             * present in the readyList. But such loads are not yet complete
762             * and thus occupy resources. We need to query the oldest in-flight
763             * node and since a map container keeps all its keys sorted using
764             * the less than criterion, the first element is the in-flight node
765             * with the least sequence number, i.e. the oldest in-flight node.
766             */
767            std::map<NodeSeqNum, NodeRobNum> inFlightNodes;
768
769            /** The ROB number of the oldest in-flight node */
770            NodeRobNum oldestInFlightRobNum;
771
772            /** Number of ready loads for which request may or may not be sent */
773            uint16_t numInFlightLoads;
774
775            /** Number of ready stores for which request may or may not be sent */
776            uint16_t numInFlightStores;
777        };
778
779        /**
780         * The InputStream encapsulates a trace file and the
781         * internal buffers and populates GraphNodes based on
782         * the input.
783         */
784        class InputStream
785        {
786
787          private:
788
789            /** Input file stream for the protobuf trace */
790            ProtoInputStream trace;
791
792            /** Count of committed ops read from trace plus the filtered ops */
793            uint64_t microOpCount;
794
795            /**
796             * The window size that is read from the header of the protobuf
797             * trace and used to process the dependency trace
798             */
799            uint32_t windowSize;
800          public:
801
802            /**
803             * Create a trace input stream for a given file name.
804             *
805             * @param filename Path to the file to read from
806             */
807            InputStream(const std::string& filename);
808
809            /**
810             * Reset the stream such that it can be played once
811             * again.
812             */
813            void reset();
814
815            /**
816             * Attempt to read a trace element from the stream,
817             * and also notify the caller if the end of the file
818             * was reached.
819             *
820             * @param element Trace element to populate
821             * @param size of register dependency array stored in the element
822             * @return True if an element could be read successfully
823             */
824            bool read(GraphNode* element);
825
826            /** Get window size from trace */
827            uint32_t getWindowSize() const { return windowSize; }
828
829            /** Get number of micro-ops modelled in the TraceCPU replay */
830            uint64_t getMicroOpCount() const { return microOpCount; }
831        };
832
833        public:
834        /* Constructor */
835        ElasticDataGen(TraceCPU& _owner, const std::string& _name,
836                   MasterPort& _port, MasterID master_id,
837                   const std::string& trace_file, uint16_t max_rob,
838                   uint16_t max_stores, uint16_t max_loads)
839            : owner(_owner),
840              port(_port),
841              masterID(master_id),
842              trace(trace_file),
843              genName(owner.name() + ".elastic" + _name),
844              retryPkt(nullptr),
845              traceComplete(false),
846              nextRead(false),
847              execComplete(false),
848              windowSize(trace.getWindowSize()),
849              hwResource(max_rob, max_stores, max_loads)
850        {
851            DPRINTF(TraceCPUData, "Window size in the trace is %d.\n",
852                    windowSize);
853        }
854
855        /**
856         * Called from TraceCPU init(). Reads the first message from the
857         * input trace file and returns the send tick.
858         *
859         * @return Tick when first packet must be sent
860         */
861        Tick init();
862
863        /** Returns name of the ElasticDataGen instance. */
864        const std::string& name() const { return genName; }
865
866        /** Exit the ElasticDataGen. */
867        void exit();
868
869        /**
870         * Reads a line of the trace file. Returns the tick when the next
871         * request should be generated. If the end of the file has been
872         * reached, it returns false.
873         *
874         * @return bool false if end of file has been reached else true
875         */
876        bool readNextWindow();
877
878        /**
879         * Iterate over the dependencies of a new node and add the new node
880         * to the list of dependents of the parent node.
881         *
882         * @param   new_node    new node to add to the graph
883         * @tparam  dep_array   the dependency array of type rob or register,
884         *                      that is to be iterated, and may get modified
885         * @param   num_dep     the number of dependencies set in the array
886         *                      which may get modified during iteration
887         */
888        template<typename T> void addDepsOnParent(GraphNode *new_node,
889                                                    T& dep_array,
890                                                    uint8_t& num_dep);
891
892        /**
893         * This is the main execute function which consumes nodes from the
894         * sorted readyList. First attempt to issue the pending dependency-free
895         * nodes held in the depFreeQueue. Insert the ready-to-issue nodes into
896         * the readyList. Then iterate through the readyList and when a node
897         * has its execute tick equal to curTick(), execute it. If the node is
898         * a load or a store call executeMemReq() and if it is neither, simply
899         * mark it complete.
900         */
901        void execute();
902
903        /**
904         * Creates a new request for a load or store assigning the request
905         * parameters. Calls the port's sendTimingReq() and returns a packet
906         * if the send failed so that it can be saved for a retry.
907         *
908         * @param node_ptr pointer to the load or store node to be executed
909         *
910         * @return packet pointer if the request failed and nullptr if it was
911         *          sent successfully
912         */
913        PacketPtr executeMemReq(GraphNode* node_ptr);
914
915        /**
916         * Add a ready node to the readyList. When inserting, ensure the nodes
917         * are sorted in ascending order of their execute ticks.
918         *
919         * @param seq_num seq. num of ready node
920         * @param exec_tick the execute tick of the ready node
921         */
922        void addToSortedReadyList(NodeSeqNum seq_num, Tick exec_tick);
923
924        /** Print readyList for debugging using debug flag TraceCPUData. */
925        void printReadyList();
926
927        /**
928         * When a load writeback is received, that is when the load completes,
929         * release the dependents on it. This is called from the dcache port
930         * recvTimingResp().
931         */
932        void completeMemAccess(PacketPtr pkt);
933
934        /**
935         * Returns the execComplete variable which is set when the last
936         * node is executed.
937         *
938         * @return bool true if execComplete is set, false otherwise.
939         */
940        bool isExecComplete() const { return execComplete; }
941
942        /**
943         * Attempts to issue a node once the node's source dependencies are
944         * complete. If resources are available then add it to the readyList,
945         * otherwise the node is not issued and is stored in depFreeQueue
946         * until resources become available.
947         *
948         * @param node_ptr pointer to node to be issued
949         * @param first true if this is the first attempt to issue this node
950         * @return true if node was added to readyList
951         */
952        bool checkAndIssue(const GraphNode* node_ptr, bool first = true);
953
954        /** Get number of micro-ops modelled in the TraceCPU replay */
955        uint64_t getMicroOpCount() const { return trace.getMicroOpCount(); }
956
957        void regStats();
958
959      private:
960
961        /** Reference of the TraceCPU. */
962        TraceCPU& owner;
963
964        /** Reference of the port to be used to issue memory requests. */
965        MasterPort& port;
966
967        /** MasterID used for the requests being sent. */
968        const MasterID masterID;
969
970        /** Input stream used for reading the input trace file. */
971        InputStream trace;
972
973        /** String to store the name of the FixedRetryGen. */
974        std::string genName;
975
976        /** PacketPtr used to store the packet to retry. */
977        PacketPtr retryPkt;
978
979        /** Set to true when end of trace is reached. */
980        bool traceComplete;
981
982        /** Set to true when the next window of instructions need to be read */
983        bool nextRead;
984
985        /** Set true when execution of trace is complete */
986        bool execComplete;
987
988        /**
989         * Window size within which to check for dependencies. Its value is
990         * made equal to the window size used to generate the trace which is
991         * recorded in the trace header. The dependency graph must be
992         * populated enough such that when a node completes, its potential
993         * child node must be found and the dependency removed before the
994         * completed node itself is removed. Thus as soon as the graph shrinks
995         * to become smaller than this window, we read in the next window.
996         */
997        const uint32_t windowSize;
998
999        /**
1000         * Hardware resources required to contain in-flight nodes and to
1001         * throttle issuing of new nodes when resources are not available.
1002         */
1003        HardwareResource hwResource;
1004
1005        /** Store the depGraph of GraphNodes */
1006        std::unordered_map<NodeSeqNum, GraphNode*> depGraph;
1007
1008        /**
1009         * Queue of dependency-free nodes that are pending issue because
1010         * resources are not available. This is chosen to be FIFO so that
1011         * dependent nodes which become free in program order get pushed
1012         * into the queue in that order. Thus nodes are more likely to
1013         * issue in program order.
1014         */
1015        std::queue<const GraphNode*> depFreeQueue;
1016
1017        /** List of nodes that are ready to execute */
1018        std::list<ReadyNode> readyList;
1019
1020        /** Stats for data memory accesses replayed. */
1021        Stats::Scalar maxDependents;
1022        Stats::Scalar maxReadyListSize;
1023        Stats::Scalar numSendAttempted;
1024        Stats::Scalar numSendSucceeded;
1025        Stats::Scalar numSendFailed;
1026        Stats::Scalar numRetrySucceeded;
1027        Stats::Scalar numSplitReqs;
1028        Stats::Scalar numSOLoads;
1029        Stats::Scalar numSOStores;
1030        /** Tick when ElasticDataGen completes execution */
1031        Stats::Scalar dataLastTick;
1032    };
1033
1034    /** Instance of FixedRetryGen to replay instruction read requests. */
1035    FixedRetryGen icacheGen;
1036
1037    /** Instance of ElasticDataGen to replay data read and write requests. */
1038    ElasticDataGen dcacheGen;
1039
1040    /**
1041     * This is the control flow that uses the functionality of the icacheGen to
1042     * replay the trace. It calls tryNext(). If it returns true then next event
1043     * is scheduled at curTick() plus delta. If it returns false then delta is
1044     * ignored and control is brought back via recvRetry().
1045     */
1046    void schedIcacheNext();
1047
1048    /**
1049     * This is the control flow that uses the functionality of the dcacheGen to
1050     * replay the trace. It calls execute(). It checks if execution is complete
1051     * and schedules an event to exit simulation accordingly.
1052     */
1053    void schedDcacheNext();
1054
1055    /** Event for the control flow method schedIcacheNext() */
1056    EventWrapper<TraceCPU, &TraceCPU::schedIcacheNext> icacheNextEvent;
1057
1058    /** Event for the control flow method schedDcacheNext() */
1059    EventWrapper<TraceCPU, &TraceCPU::schedDcacheNext> dcacheNextEvent;
1060
1061    /** This is called when either generator finishes executing from the trace */
1062    void checkAndSchedExitEvent();
1063
1064    /** Set to true when one of the generators finishes replaying its trace. */
1065    bool oneTraceComplete;
1066
1067    /**
1068     * This is stores the tick of the first instruction fetch request
1069     * which is later used for dumping the tickOffset stat.
1070     */
1071    Tick firstFetchTick;
1072
1073    /**
1074     * Number of Trace CPUs in the system used as a shared variable and passed
1075     * to the CountedExitEvent event used for counting down exit events.  It is
1076     * incremented in the constructor call so that the total is arrived at
1077     * automatically.
1078     */
1079    static int numTraceCPUs;
1080
1081   /**
1082    * A CountedExitEvent which when serviced decrements the counter. A sim
1083    * exit event is scheduled when the counter equals zero, that is all
1084    * instances of Trace CPU have had their execCompleteEvent serviced.
1085    */
1086    CountedExitEvent *execCompleteEvent;
1087
1088    Stats::Scalar numSchedDcacheEvent;
1089    Stats::Scalar numSchedIcacheEvent;
1090
1091    /** Stat for number of simulated micro-ops. */
1092    Stats::Scalar numOps;
1093    /** Stat for the CPI. This is really cycles per micro-op and not inst. */
1094    Stats::Formula cpi;
1095
1096    /**
1097     * The first execution tick is dumped as a stat so that the simulated
1098     * seconds for a trace replay can be calculated as a difference between the
1099     * final_tick stat and the tickOffset stat
1100     */
1101    Stats::Scalar tickOffset;
1102
1103  public:
1104
1105    /** Used to get a reference to the icache port. */
1106    MasterPort &getInstPort() { return icachePort; }
1107
1108    /** Used to get a reference to the dcache port. */
1109    MasterPort &getDataPort() { return dcachePort; }
1110
1111    void regStats();
1112};
1113#endif // __CPU_TRACE_TRACE_CPU_HH__
1114