dist_iface.hh revision 11290:1640dd68b0a4
18225SN/A/*
211398SN/A * Copyright (c) 2015 ARM Limited
311398SN/A * All rights reserved
411398SN/A *
511398SN/A * The license below extends only to copyright in the software and shall
611398SN/A * not be construed as granting a license to any other intellectual
711398SN/A * property including but not limited to intellectual property relating
811398SN/A * to a hardware implementation of the functionality of the software
911398SN/A * licensed hereunder.  You may use the software subject to the license
1011398SN/A * terms below provided that you ensure that this notice is replicated
1111398SN/A * unmodified and in its entirety in all distributions of the software,
1211398SN/A * modified or unmodified, in source code or in binary form.
1311398SN/A *
1411398SN/A * Redistribution and use in source and binary forms, with or without
1511397SN/A * modification, are permitted provided that the following conditions are
1611397SN/A * met: redistributions of source code must retain the above copyright
1711397SN/A * notice, this list of conditions and the following disclaimer;
1811397SN/A * redistributions in binary form must reproduce the above copyright
1911397SN/A * notice, this list of conditions and the following disclaimer in the
2011397SN/A * documentation and/or other materials provided with the distribution;
2111397SN/A * neither the name of the copyright holders nor the names of its
2211397SN/A * contributors may be used to endorse or promote products derived from
2311397SN/A * this software without specific prior written permission.
2411397SN/A *
2511397SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2611397SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2711397SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2811397SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2911397SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
3011397SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
3111397SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
3211397SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
3311397SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
3411397SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3511397SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3611397SN/A *
3711397SN/A * Authors: Gabor Dozsa
3811397SN/A */
3911397SN/A
4011397SN/A/* @file
4111397SN/A * The interface class for dist gem5 simulations.
4211398SN/A *
4311397SN/A * dist-gem5 is an extension to gem5 to enable parallel simulation of a
448225SN/A * distributed system (e.g. simulation of a pool of machines
458225SN/A * connected by Ethernet links). A dist gem5 run consists of seperate gem5
468225SN/A * processes running in parallel. Each gem5 process executes
478225SN/A * the simulation of a component of the simulated distributed system.
488225SN/A * (An example component can be a dist-core board with an Ethernet NIC.)
498225SN/A * The DistIface class below provides services to transfer data and
508225SN/A * control messages among the gem5 processes. The main such services are
518225SN/A * as follows.
528225SN/A *
538225SN/A * 1. Send a data packet coming from a simulated Ethernet link. The packet
548225SN/A * will be transferred to (all) the target(s) gem5 processes. The send
558225SN/A * operation is always performed by the simulation thread, i.e. the gem5
568225SN/A * thread that is processing the event queue associated with the simulated
578225SN/A * Ethernet link.
588225SN/A *
598225SN/A * 2. Spawn a receiver thread to process messages coming in from the
608225SN/A * from other gem5 processes. Each simulated Ethernet link has its own
618225SN/A * associated receiver thread. The receiver thread saves the incoming packet
628225SN/A * and schedule an appropriate receive event in the event queue.
638225SN/A *
648225SN/A * 3. Schedule a global barrier event periodically to keep the gem5
658225SN/A * processes in sync.
668225SN/A * Periodic barrier event to keep peer gem5 processes in sync. The basic idea
678225SN/A * is that no gem5 process can go ahead further than the simulated link
688225SN/A * transmission delay to ensure that a corresponding receive event can always
698225SN/A * be scheduled for any message coming in from a peer gem5 process.
708225SN/A *
718225SN/A *
728225SN/A *
738225SN/A * This interface is an abstract class. It can work with various low level
748225SN/A * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP
758225SN/A * stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc].
768225SN/A */
778225SN/A#ifndef __DEV_DIST_IFACE_HH__
788225SN/A#define __DEV_DIST_IFACE_HH__
798225SN/A
808225SN/A#include <array>
818225SN/A#include <mutex>
828225SN/A#include <queue>
838225SN/A#include <thread>
848225SN/A#include <utility>
858225SN/A
868225SN/A#include "dev/net/dist_packet.hh"
878225SN/A#include "dev/net/etherpkt.hh"
888225SN/A#include "sim/core.hh"
898225SN/A#include "sim/drain.hh"
908225SN/A#include "sim/global_event.hh"
918225SN/A#include "sim/serialize.hh"
928225SN/A
9310674SN/Aclass EventManager;
9410674SN/A
9510674SN/A/**
9610674SN/A * The interface class to talk to peer gem5 processes.
9710674SN/A */
9810674SN/Aclass DistIface : public Drainable, public Serializable
9910674SN/A{
10010674SN/A  public:
10110674SN/A    typedef DistHeaderPkt::Header Header;
10210674SN/A
10310674SN/A  protected:
10410674SN/A    typedef DistHeaderPkt::MsgType MsgType;
10510674SN/A    typedef DistHeaderPkt::ReqType ReqType;
10610674SN/A
10710674SN/A  private:
10810674SN/A    class SyncEvent;
10910674SN/A    /** @class Sync
11010674SN/A     * This class implements global sync operations among gem5 peer processes.
11110674SN/A     *
11210674SN/A     * @note This class is used as a singleton object (shared by all DistIface
11310674SN/A     * objects).
11410674SN/A     */
11510674SN/A    class Sync : public Serializable
11610674SN/A    {
11710674SN/A      protected:
11810674SN/A        /**
11910674SN/A         * The lock to protect access to the Sync object.
12010674SN/A         */
12110674SN/A        std::mutex lock;
12210674SN/A        /**
12310674SN/A         * Condition variable for the simulation thread to wait on
12410674SN/A         * until all receiver threads completes the current global
12510674SN/A         * synchronisation.
12610674SN/A         */
12710674SN/A        std::condition_variable cv;
12810674SN/A        /**
12910674SN/A         * Number of receiver threads that not yet completed the current global
13010674SN/A         * synchronisation.
13110674SN/A         */
13210674SN/A        unsigned waitNum;
13310674SN/A        /**
13410674SN/A         * Flag is set if exit is permitted upon sync completion
13510674SN/A         */
13610674SN/A        bool doExit;
13710674SN/A        /**
13810674SN/A         * Flag is set if taking a ckpt is permitted upon sync completion
13910674SN/A         */
14010674SN/A        bool doCkpt;
14110674SN/A        /**
14210674SN/A         * The repeat value for the next periodic sync
14310674SN/A         */
14410674SN/A        Tick nextRepeat;
14510674SN/A        /**
14610674SN/A         * Tick for the very first periodic sync
14710674SN/A         */
14810674SN/A        Tick firstAt;
14910674SN/A        /**
15010674SN/A         * Tick for the next periodic sync (if the event is not scheduled yet)
1518225SN/A         */
1528225SN/A        Tick nextAt;
1538225SN/A
1548225SN/A        friend class SyncEvent;
1558225SN/A
1568225SN/A      public:
1578225SN/A        /**
1588225SN/A         * Initialize periodic sync params.
15910674SN/A         *
16010674SN/A         * @param start Start tick for dist synchronisation
16110674SN/A         * @param repeat Frequency of dist synchronisation
16210674SN/A         *
16310674SN/A         */
16410674SN/A        void init(Tick start, Tick repeat);
16510674SN/A        /**
16610674SN/A         *  Core method to perform a full dist sync.
16710674SN/A         */
16810674SN/A        virtual void run(bool same_tick) = 0;
1698225SN/A        /**
1708225SN/A         * Callback when the receiver thread gets a sync ack message.
17110674SN/A         */
17211808Sandreas.sandberg@arm.com        virtual void progress(Tick send_tick,
17310674SN/A                              Tick next_repeat,
17410674SN/A                              ReqType do_ckpt,
17510674SN/A                              ReqType do_exit) = 0;
17610674SN/A
17710674SN/A        virtual void requestCkpt(ReqType req) = 0;
17810674SN/A        virtual void requestExit(ReqType req) = 0;
17910674SN/A
1808225SN/A        void drainComplete();
1818225SN/A
18210674SN/A        virtual void serialize(CheckpointOut &cp) const override = 0;
18310674SN/A        virtual void unserialize(CheckpointIn &cp) override = 0;
18410674SN/A    };
18510674SN/A
1868225SN/A    class SyncNode: public Sync
1878225SN/A    {
1888225SN/A      private:
1898225SN/A        /**
1908225SN/A         * Exit requested
19110674SN/A         */
19210674SN/A        ReqType needExit;
19310674SN/A        /**
19410674SN/A         * Ckpt requested
19510674SN/A         */
1968225SN/A        ReqType needCkpt;
19710674SN/A
19810674SN/A      public:
19910674SN/A
20010674SN/A        SyncNode();
20110674SN/A        ~SyncNode() {}
20210674SN/A        void run(bool same_tick) override;
2038225SN/A        void progress(Tick max_req_tick,
20410674SN/A                      Tick next_repeat,
20510674SN/A                      ReqType do_ckpt,
20610674SN/A                      ReqType do_exit) override;
20711402SN/A
20810674SN/A        void requestCkpt(ReqType req) override;
20911402SN/A        void requestExit(ReqType req) override;
21011402SN/A
21111402SN/A        void serialize(CheckpointOut &cp) const override;
21211402SN/A        void unserialize(CheckpointIn &cp) override;
21310674SN/A    };
21410674SN/A
21511402SN/A    class SyncSwitch: public Sync
2168225SN/A    {
2178225SN/A      private:
21810275SN/A        /**
2198225SN/A         * Counter for recording exit requests
22010674SN/A         */
22110674SN/A        unsigned numExitReq;
22210674SN/A        /**
22310674SN/A         * Counter for recording ckpt requests
22410674SN/A         */
22510674SN/A        unsigned numCkptReq;
22610674SN/A        /**
22710674SN/A         *  Number of connected simulated nodes
22810674SN/A         */
22910674SN/A        unsigned numNodes;
23010674SN/A
23110674SN/A      public:
23210674SN/A        SyncSwitch(int num_nodes);
23310674SN/A        ~SyncSwitch() {}
23410674SN/A
23510674SN/A        void run(bool same_tick) override;
23610674SN/A        void progress(Tick max_req_tick,
23710674SN/A                      Tick next_repeat,
23810674SN/A                      ReqType do_ckpt,
23910674SN/A                      ReqType do_exit) override;
24010674SN/A
24110674SN/A        void requestCkpt(ReqType) override {
24210674SN/A            panic("Switch requested checkpoint");
24310674SN/A        }
24410674SN/A        void requestExit(ReqType) override {
24510674SN/A            panic("Switch requested exit");
2468225SN/A        }
24710674SN/A
24810674SN/A        void serialize(CheckpointOut &cp) const override;
24910674SN/A        void unserialize(CheckpointIn &cp) override;
25010674SN/A    };
25110674SN/A
25210674SN/A    /**
2538225SN/A     * The global event to schedule periodic dist sync. It is used as a
25410674SN/A     * singleton object.
25510674SN/A     *
25610674SN/A     * The periodic synchronisation works as follows.
25710674SN/A     * 1. A SyncEvent is scheduled as a global event when startup() is
25810674SN/A     * called.
25910674SN/A     * 2. The process() method of the SyncEvent initiates a new barrier
26010674SN/A     * for each simulated Ethernet link.
2618225SN/A     * 3. Simulation thread(s) then waits until all receiver threads
26210674SN/A     * complete the ongoing barrier. The global sync event is done.
26310674SN/A     */
26411402SN/A    class SyncEvent : public GlobalSyncEvent
26511402SN/A    {
26610674SN/A      private:
26710674SN/A        /**
2688225SN/A         * Flag to set when the system is draining
26910674SN/A         */
2708225SN/A        bool _draining;
2718225SN/A      public:
27210674SN/A        /**
27310674SN/A         * Only the firstly instantiated DistIface object will
27411402SN/A         * call this constructor.
27511402SN/A         */
2768225SN/A        SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {}
2778225SN/A
2788225SN/A        ~SyncEvent() {}
2798225SN/A        /**
2808225SN/A         * Schedule the first periodic sync event.
2818225SN/A         */
2828225SN/A        void start();
2838225SN/A        /**
2848225SN/A         * This is a global event so process() will only be called by
2858225SN/A         * exactly one simulation thread. (See further comments in the .cc
2868225SN/A         * file.)
2878225SN/A         */
2888225SN/A        void process() override;
2898225SN/A
2908225SN/A        bool draining() const { return _draining; }
2918225SN/A        void draining(bool fl) { _draining = fl; }
2928225SN/A    };
2938225SN/A    /**
2948225SN/A     * Class to encapsulate information about data packets received.
2958225SN/A
2968225SN/A     * @note The main purpose of the class to take care of scheduling receive
2978225SN/A     * done events for the simulated network link and store incoming packets
2988225SN/A     * until they can be received by the simulated network link.
2998225SN/A     */
3008225SN/A    class RecvScheduler : public Serializable
3018225SN/A    {
3028225SN/A      private:
3038225SN/A        /**
3048225SN/A         * Received packet descriptor. This information is used by the receive
3058225SN/A         * thread to schedule receive events and by the simulation thread to
3068225SN/A         * process those events.
3078225SN/A         */
3088225SN/A        struct Desc : public Serializable
3098225SN/A        {
3108225SN/A            EthPacketPtr packet;
3118225SN/A            Tick sendTick;
3128225SN/A            Tick sendDelay;
3138225SN/A
3148225SN/A            Desc() : sendTick(0), sendDelay(0) {}
3158225SN/A            Desc(EthPacketPtr p, Tick s, Tick d) :
3168225SN/A                packet(p), sendTick(s), sendDelay(d) {}
3178225SN/A            Desc(const Desc &d) :
318                packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {}
319
320            void serialize(CheckpointOut &cp) const override;
321            void unserialize(CheckpointIn &cp) override;
322        };
323        /**
324         * The queue to store the receive descriptors.
325         */
326        std::queue<Desc> descQueue;
327        /**
328         * The tick when the most recent receive event was processed.
329         *
330         * @note This information is necessary to simulate possible receiver
331         * link contention when calculating the receive tick for the next
332         * incoming data packet (see the calcReceiveTick() method)
333         */
334        Tick prevRecvTick;
335        /**
336         * The receive done event for the simulated Ethernet link.
337         *
338         * @note This object is constructed by the simulated network link. We
339         * schedule this object for each incoming data packet.
340         */
341        Event *recvDone;
342        /**
343         * The link delay in ticks for the simulated Ethernet link.
344         *
345         * @note This value is used for calculating the receive ticks for
346         * incoming data packets.
347         */
348        Tick linkDelay;
349        /**
350         * The event manager associated with the simulated Ethernet link.
351         *
352         * @note It is used to access the event queue for scheduling receive
353         * done events for the link.
354         */
355        EventManager *eventManager;
356        /**
357         * Calculate the tick to schedule the next receive done event.
358         *
359         * @param send_tick The tick the packet was sent.
360         * @param send_delay The simulated delay at the sender side.
361         * @param prev_recv_tick Tick when the last receive event was
362         * processed.
363         *
364         * @note This method tries to take into account possible receiver link
365         * contention and adjust receive tick for the incoming packets
366         * accordingly.
367         */
368        Tick calcReceiveTick(Tick send_tick,
369                             Tick send_delay,
370                             Tick prev_recv_tick);
371
372        /**
373         * Flag to set if receive ticks for pending packets need to be
374         * recalculated due to changed link latencies at a resume
375         */
376        bool ckptRestore;
377
378      public:
379        /**
380         * Scheduler for the incoming data packets.
381         *
382         * @param em The event manager associated with the simulated Ethernet
383         * link.
384         */
385        RecvScheduler(EventManager *em) :
386            prevRecvTick(0), recvDone(nullptr), linkDelay(0),
387            eventManager(em), ckptRestore(false) {}
388
389        /**
390         *  Initialize network link parameters.
391         *
392         * @note This method is called from the receiver thread (see
393         * recvThreadFunc()).
394         */
395        void init(Event *recv_done, Tick link_delay);
396        /**
397         * Fetch the next packet that is to be received by the simulated network
398         * link.
399         *
400         * @note This method is called from the process() method of the receive
401         * done event associated with the network link.
402         */
403        EthPacketPtr popPacket();
404        /**
405         * Push a newly arrived packet into the desc queue.
406         */
407        void pushPacket(EthPacketPtr new_packet,
408                        Tick send_tick,
409                        Tick send_delay);
410
411        void serialize(CheckpointOut &cp) const override;
412        void unserialize(CheckpointIn &cp) override;
413        /**
414         * Adjust receive ticks for pending packets when restoring from a
415         * checkpoint
416         *
417         * @note Link speed and delay parameters may change at resume.
418         */
419        void resumeRecvTicks();
420    };
421    /**
422     * Tick to schedule the first dist sync event.
423     * This is just as optimization : we do not need any dist sync
424     * event until the simulated NIC is brought up by the OS.
425     */
426    Tick syncStart;
427    /**
428     * Frequency of dist sync events in ticks.
429     */
430    Tick syncRepeat;
431    /**
432     * Receiver thread pointer.
433     * Each DistIface object must have exactly one receiver thread.
434     */
435    std::thread *recvThread;
436    /**
437     * Meta information about data packets received.
438     */
439    RecvScheduler recvScheduler;
440
441  protected:
442    /**
443     * The rank of this process among the gem5 peers.
444     */
445    unsigned rank;
446    /**
447     * The number of gem5 processes comprising this dist simulation.
448     */
449    unsigned size;
450    /**
451     * Number of DistIface objects (i.e. dist links in this gem5 process)
452     */
453    static unsigned distIfaceNum;
454    /**
455     * Unique id for the dist link
456     */
457    unsigned distIfaceId;
458
459    bool isMaster;
460
461  private:
462    /**
463     * Number of receiver threads (in this gem5 process)
464     */
465    static unsigned recvThreadsNum;
466    /**
467     * The singleton Sync object to perform dist synchronisation.
468     */
469    static Sync *sync;
470    /**
471     * The singleton SyncEvent object to schedule periodic dist sync.
472     */
473    static SyncEvent *syncEvent;
474    /**
475     * The very first DistIface object created becomes the master. We need
476     * a master to co-ordinate the global synchronisation.
477     */
478    static DistIface *master;
479
480  private:
481    /**
482     * Send out a data packet to the remote end.
483     * @param header Meta info about the packet (which needs to be transferred
484     * to the destination alongside the packet).
485     * @param packet Pointer to the packet to send.
486     */
487    virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0;
488    /**
489     * Send out a control command to the remote end.
490     * @param header Meta info describing the command (e.g. sync request)
491     */
492    virtual void sendCmd(const Header &header) = 0;
493    /**
494     * Receive a header (i.e. meta info describing a data packet or a control command)
495     * from the remote end.
496     * @param header The meta info structure to store the incoming header.
497     */
498    virtual bool recvHeader(Header &header) = 0;
499    /**
500     * Receive a packet from the remote end.
501     * @param header Meta info about the incoming packet (obtanied by a previous
502     * call to the recvHedaer() method).
503     * @param Pointer to packet received.
504     */
505    virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0;
506    /**
507     * Init hook for the underlaying transport
508     */
509    virtual void initTransport() = 0;
510    /**
511     * spawn the receiver thread.
512     * @param recv_done The receive done event associated with the simulated
513     * Ethernet link.
514     * @param link_delay The link delay for the simulated Ethernet link.
515     */
516    void spawnRecvThread(const Event *recv_done, Tick link_delay);
517    /**
518     * The function executed by a receiver thread.
519     */
520    void recvThreadFunc(Event *recv_done, Tick link_delay);
521
522  public:
523
524    /**
525     * ctor
526     * @param dist_rank Rank of this gem5 process within the dist run
527     * @param sync_start Start tick for dist synchronisation
528     * @param sync_repeat Frequency for dist synchronisation
529     * @param em The event manager associated with the simulated Ethernet link
530     */
531    DistIface(unsigned dist_rank,
532              unsigned dist_size,
533              Tick sync_start,
534              Tick sync_repeat,
535              EventManager *em,
536              bool is_switch,
537              int num_nodes);
538
539    virtual ~DistIface();
540    /**
541     * Send out an Ethernet packet.
542     * @param pkt The Ethernet packet to send.
543     * @param send_delay The delay in ticks for the send completion event.
544     */
545    void packetOut(EthPacketPtr pkt, Tick send_delay);
546    /**
547     * Fetch the packet scheduled to be received next by the simulated
548     * network link.
549     *
550     * @note This method is called within the process() method of the link
551     * receive done event. It also schedules the next receive event if the
552     * receive queue is not empty.
553     */
554    EthPacketPtr packetIn() { return recvScheduler.popPacket(); }
555
556    DrainState drain() override;
557    void drainResume() override;
558    void init(const Event *e, Tick link_delay);
559    void startup();
560
561    void serialize(CheckpointOut &cp) const override;
562    void unserialize(CheckpointIn &cp) override;
563    /**
564     * Initiate the exit from the simulation.
565     * @param delay Delay param from the m5 exit command. If Delay is zero
566     * then a collaborative exit is requested (i.e. all nodes have to call
567     * this method before the distributed simulation can exit). If Delay is
568     * not zero then exit is requested asap (and it will happen at the next
569     * sync tick).
570     * @return False if we are in distributed mode (i.e. exit can happen only
571     * at sync), True otherwise.
572     */
573    static bool readyToExit(Tick delay);
574    /**
575     * Initiate taking a checkpoint
576     * @param delay Delay param from the m5 checkpoint command. If Delay is
577     * zero then a collaborative checkpoint is requested (i.e. all nodes have
578     * to call this method before the checkpoint can be taken). If Delay is
579     * not zero then a checkpoint is requested asap (and it will happen at the
580     * next sync tick).
581     * @return False if we are in dist mode (i.e. exit can happen only at
582     * sync), True otherwise.
583     */
584    static bool readyToCkpt(Tick delay, Tick period);
585    /**
586     * Getter for the dist rank param.
587     */
588    static uint64_t rankParam();
589    /**
590     * Getter for the dist size param.
591     */
592    static uint64_t sizeParam();
593 };
594
595#endif
596