1/*
2 * Copyright (c) 2015-2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabor Dozsa
38 */
39
40/* @file
41 * The interface class for dist gem5 simulations.
42 *
43 * dist-gem5 is an extension to gem5 to enable parallel simulation of a
44 * distributed system (e.g. simulation of a pool of machines
45 * connected by Ethernet links). A dist gem5 run consists of seperate gem5
46 * processes running in parallel. Each gem5 process executes
47 * the simulation of a component of the simulated distributed system.
48 * (An example component can be a dist-core board with an Ethernet NIC.)
49 * The DistIface class below provides services to transfer data and
50 * control messages among the gem5 processes. The main such services are
51 * as follows.
52 *
53 * 1. Send a data packet coming from a simulated Ethernet link. The packet
54 * will be transferred to (all) the target(s) gem5 processes. The send
55 * operation is always performed by the simulation thread, i.e. the gem5
56 * thread that is processing the event queue associated with the simulated
57 * Ethernet link.
58 *
59 * 2. Spawn a receiver thread to process messages coming in from the
60 * from other gem5 processes. Each simulated Ethernet link has its own
61 * associated receiver thread. The receiver thread saves the incoming packet
62 * and schedule an appropriate receive event in the event queue.
63 *
64 * 3. Schedule a global barrier event periodically to keep the gem5
65 * processes in sync.
66 * Periodic barrier event to keep peer gem5 processes in sync. The basic idea
67 * is that no gem5 process can go ahead further than the simulated link
68 * transmission delay to ensure that a corresponding receive event can always
69 * be scheduled for any message coming in from a peer gem5 process.
70 *
71 *
72 *
73 * This interface is an abstract class. It can work with various low level
74 * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP
75 * stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc].
76 */
77#ifndef __DEV_DIST_IFACE_HH__
78#define __DEV_DIST_IFACE_HH__
79
80#include <array>
81#include <mutex>
82#include <queue>
83#include <thread>
84#include <utility>
85
86#include "base/logging.hh"
87#include "dev/net/dist_packet.hh"
88#include "dev/net/etherpkt.hh"
89#include "sim/core.hh"
90#include "sim/drain.hh"
91#include "sim/global_event.hh"
92#include "sim/serialize.hh"
93
94class EventManager;
95class System;
96class ThreadContext;
97
98/**
99 * The interface class to talk to peer gem5 processes.
100 */
101class DistIface : public Drainable, public Serializable
102{
103  public:
104    typedef DistHeaderPkt::Header Header;
105
106  protected:
107    typedef DistHeaderPkt::MsgType MsgType;
108    typedef DistHeaderPkt::ReqType ReqType;
109
110  private:
111    class SyncEvent;
112    /** @class Sync
113     * This class implements global sync operations among gem5 peer processes.
114     *
115     * @note This class is used as a singleton object (shared by all DistIface
116     * objects).
117     */
118    class Sync : public Serializable
119    {
120      protected:
121        /**
122         * The lock to protect access to the Sync object.
123         */
124        std::mutex lock;
125        /**
126         * Condition variable for the simulation thread to wait on
127         * until all receiver threads completes the current global
128         * synchronisation.
129         */
130        std::condition_variable cv;
131        /**
132         * Number of receiver threads that not yet completed the current global
133         * synchronisation.
134         */
135        unsigned waitNum;
136        /**
137         * Flag is set if exit is permitted upon sync completion
138         */
139        bool doExit;
140        /**
141         * Flag is set if taking a ckpt is permitted upon sync completion
142         */
143        bool doCkpt;
144        /**
145         * Flag is set if sync is to stop upon sync completion
146         */
147        bool doStopSync;
148        /**
149         * The repeat value for the next periodic sync
150         */
151        Tick nextRepeat;
152        /**
153         * Tick for the next periodic sync (if the event is not scheduled yet)
154         */
155        Tick nextAt;
156        /**
157         *  Flag is set if the sync is aborted (e.g. due to connection lost)
158         */
159        bool isAbort;
160
161        friend class SyncEvent;
162
163      public:
164        /**
165         * Initialize periodic sync params.
166         *
167         * @param start Start tick for dist synchronisation
168         * @param repeat Frequency of dist synchronisation
169         *
170         */
171        void init(Tick start, Tick repeat);
172        /**
173         *  Core method to perform a full dist sync.
174         *
175         * @return true if the sync completes, false if it gets aborted
176         */
177        virtual bool run(bool same_tick) = 0;
178        /**
179         * Callback when the receiver thread gets a sync ack message.
180         *
181         * @return false if the receiver thread needs to stop (e.g.
182         * simulation is to exit)
183         */
184        virtual bool progress(Tick send_tick,
185                              Tick next_repeat,
186                              ReqType do_ckpt,
187                              ReqType do_exit,
188                              ReqType do_stop_sync) = 0;
189        /**
190         * Abort processing an on-going sync event (in case of an error, e.g.
191         * lost connection to a peer gem5)
192         */
193        void abort();
194
195        virtual void requestCkpt(ReqType req) = 0;
196        virtual void requestExit(ReqType req) = 0;
197        virtual void requestStopSync(ReqType req) = 0;
198
199        void drainComplete();
200
201        virtual void serialize(CheckpointOut &cp) const override = 0;
202        virtual void unserialize(CheckpointIn &cp) override = 0;
203    };
204
205    class SyncNode: public Sync
206    {
207      private:
208        /**
209         * Exit requested
210         */
211        ReqType needExit;
212        /**
213         * Ckpt requested
214         */
215        ReqType needCkpt;
216        /**
217         * Sync stop requested
218         */
219        ReqType needStopSync;
220
221      public:
222
223        SyncNode();
224        ~SyncNode() {}
225        bool run(bool same_tick) override;
226        bool progress(Tick max_req_tick,
227                      Tick next_repeat,
228                      ReqType do_ckpt,
229                      ReqType do_exit,
230                      ReqType do_stop_sync) override;
231
232        void requestCkpt(ReqType req) override;
233        void requestExit(ReqType req) override;
234        void requestStopSync(ReqType req) override;
235
236        void serialize(CheckpointOut &cp) const override;
237        void unserialize(CheckpointIn &cp) override;
238    };
239
240    class SyncSwitch: public Sync
241    {
242      private:
243        /**
244         * Counter for recording exit requests
245         */
246        unsigned numExitReq;
247        /**
248         * Counter for recording ckpt requests
249         */
250        unsigned numCkptReq;
251        /**
252         * Counter for recording stop sync requests
253         */
254        unsigned numStopSyncReq;
255        /**
256         *  Number of connected simulated nodes
257         */
258        unsigned numNodes;
259
260      public:
261        SyncSwitch(int num_nodes);
262        ~SyncSwitch() {}
263
264        bool run(bool same_tick) override;
265        bool progress(Tick max_req_tick,
266                      Tick next_repeat,
267                      ReqType do_ckpt,
268                      ReqType do_exit,
269                      ReqType do_stop_sync) override;
270
271        void requestCkpt(ReqType) override {
272            panic("Switch requested checkpoint");
273        }
274        void requestExit(ReqType) override {
275            panic("Switch requested exit");
276        }
277        void requestStopSync(ReqType) override {
278            panic("Switch requested stop sync");
279        }
280
281        void serialize(CheckpointOut &cp) const override;
282        void unserialize(CheckpointIn &cp) override;
283    };
284
285    /**
286     * The global event to schedule periodic dist sync. It is used as a
287     * singleton object.
288     *
289     * The periodic synchronisation works as follows.
290     * 1. A SyncEvent is scheduled as a global event when startup() is
291     * called.
292     * 2. The process() method of the SyncEvent initiates a new barrier
293     * for each simulated Ethernet link.
294     * 3. Simulation thread(s) then waits until all receiver threads
295     * complete the ongoing barrier. The global sync event is done.
296     */
297    class SyncEvent : public GlobalSyncEvent
298    {
299      private:
300        /**
301         * Flag to set when the system is draining
302         */
303        bool _draining;
304      public:
305        /**
306         * Only the firstly instantiated DistIface object will
307         * call this constructor.
308         */
309        SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {}
310
311        ~SyncEvent() {}
312        /**
313         * Schedule the first periodic sync event.
314         */
315        void start();
316        /**
317         * This is a global event so process() will only be called by
318         * exactly one simulation thread. (See further comments in the .cc
319         * file.)
320         */
321        void process() override;
322
323        bool draining() const { return _draining; }
324        void draining(bool fl) { _draining = fl; }
325    };
326    /**
327     * Class to encapsulate information about data packets received.
328
329     * @note The main purpose of the class to take care of scheduling receive
330     * done events for the simulated network link and store incoming packets
331     * until they can be received by the simulated network link.
332     */
333    class RecvScheduler : public Serializable
334    {
335      private:
336        /**
337         * Received packet descriptor. This information is used by the receive
338         * thread to schedule receive events and by the simulation thread to
339         * process those events.
340         */
341        struct Desc : public Serializable
342        {
343            EthPacketPtr packet;
344            Tick sendTick;
345            Tick sendDelay;
346
347            Desc() : sendTick(0), sendDelay(0) {}
348            Desc(EthPacketPtr p, Tick s, Tick d) :
349                packet(p), sendTick(s), sendDelay(d) {}
350            Desc(const Desc &d) :
351                packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {}
352
353            void serialize(CheckpointOut &cp) const override;
354            void unserialize(CheckpointIn &cp) override;
355        };
356        /**
357         * The queue to store the receive descriptors.
358         */
359        std::queue<Desc> descQueue;
360        /**
361         * The tick when the most recent receive event was processed.
362         *
363         * @note This information is necessary to simulate possible receiver
364         * link contention when calculating the receive tick for the next
365         * incoming data packet (see the calcReceiveTick() method)
366         */
367        Tick prevRecvTick;
368        /**
369         * The receive done event for the simulated Ethernet link.
370         *
371         * @note This object is constructed by the simulated network link. We
372         * schedule this object for each incoming data packet.
373         */
374        Event *recvDone;
375        /**
376         * The link delay in ticks for the simulated Ethernet link.
377         *
378         * @note This value is used for calculating the receive ticks for
379         * incoming data packets.
380         */
381        Tick linkDelay;
382        /**
383         * The event manager associated with the simulated Ethernet link.
384         *
385         * @note It is used to access the event queue for scheduling receive
386         * done events for the link.
387         */
388        EventManager *eventManager;
389        /**
390         * Calculate the tick to schedule the next receive done event.
391         *
392         * @param send_tick The tick the packet was sent.
393         * @param send_delay The simulated delay at the sender side.
394         * @param prev_recv_tick Tick when the last receive event was
395         * processed.
396         *
397         * @note This method tries to take into account possible receiver link
398         * contention and adjust receive tick for the incoming packets
399         * accordingly.
400         */
401        Tick calcReceiveTick(Tick send_tick,
402                             Tick send_delay,
403                             Tick prev_recv_tick);
404
405        /**
406         * Flag to set if receive ticks for pending packets need to be
407         * recalculated due to changed link latencies at a resume
408         */
409        bool ckptRestore;
410
411      public:
412        /**
413         * Scheduler for the incoming data packets.
414         *
415         * @param em The event manager associated with the simulated Ethernet
416         * link.
417         */
418        RecvScheduler(EventManager *em) :
419            prevRecvTick(0), recvDone(nullptr), linkDelay(0),
420            eventManager(em), ckptRestore(false) {}
421
422        /**
423         *  Initialize network link parameters.
424         *
425         * @note This method is called from the receiver thread (see
426         * recvThreadFunc()).
427         */
428        void init(Event *recv_done, Tick link_delay);
429        /**
430         * Fetch the next packet that is to be received by the simulated network
431         * link.
432         *
433         * @note This method is called from the process() method of the receive
434         * done event associated with the network link.
435         */
436        EthPacketPtr popPacket();
437        /**
438         * Push a newly arrived packet into the desc queue.
439         */
440        void pushPacket(EthPacketPtr new_packet,
441                        Tick send_tick,
442                        Tick send_delay);
443
444        void serialize(CheckpointOut &cp) const override;
445        void unserialize(CheckpointIn &cp) override;
446        /**
447         * Adjust receive ticks for pending packets when restoring from a
448         * checkpoint
449         *
450         * @note Link speed and delay parameters may change at resume.
451         */
452        void resumeRecvTicks();
453    };
454    /**
455     * Tick to schedule the first dist sync event.
456     * This is just as optimization : we do not need any dist sync
457     * event until the simulated NIC is brought up by the OS.
458     */
459    Tick syncStart;
460    /**
461     * Frequency of dist sync events in ticks.
462     */
463    Tick syncRepeat;
464    /**
465     * Receiver thread pointer.
466     * Each DistIface object must have exactly one receiver thread.
467     */
468    std::thread *recvThread;
469    /**
470     * Meta information about data packets received.
471     */
472    RecvScheduler recvScheduler;
473    /**
474     * Use pseudoOp to start synchronization.
475     */
476    bool syncStartOnPseudoOp;
477
478  protected:
479    /**
480     * The rank of this process among the gem5 peers.
481     */
482    unsigned rank;
483    /**
484     * The number of gem5 processes comprising this dist simulation.
485     */
486    unsigned size;
487    /**
488     * Number of DistIface objects (i.e. dist links in this gem5 process)
489     */
490    static unsigned distIfaceNum;
491    /**
492     * Unique id for the dist link
493     */
494    unsigned distIfaceId;
495
496    bool isMaster;
497
498  private:
499    /**
500     * Number of receiver threads (in this gem5 process)
501     */
502    static unsigned recvThreadsNum;
503    /**
504     * The singleton Sync object to perform dist synchronisation.
505     */
506    static Sync *sync;
507    /**
508     * The singleton SyncEvent object to schedule periodic dist sync.
509     */
510    static SyncEvent *syncEvent;
511    /**
512     * The very first DistIface object created becomes the master. We need
513     * a master to co-ordinate the global synchronisation.
514     */
515    static DistIface *master;
516    /**
517     * System pointer used to wakeup sleeping threads when stopping sync.
518     */
519    static System *sys;
520    /**
521     * Is this node a switch?
522     */
523     static bool isSwitch;
524
525  private:
526    /**
527     * Send out a data packet to the remote end.
528     * @param header Meta info about the packet (which needs to be transferred
529     * to the destination alongside the packet).
530     * @param packet Pointer to the packet to send.
531     */
532    virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0;
533    /**
534     * Send out a control command to the remote end.
535     * @param header Meta info describing the command (e.g. sync request)
536     */
537    virtual void sendCmd(const Header &header) = 0;
538    /**
539     * Receive a header (i.e. meta info describing a data packet or a control command)
540     * from the remote end.
541     * @param header The meta info structure to store the incoming header.
542     */
543    virtual bool recvHeader(Header &header) = 0;
544    /**
545     * Receive a packet from the remote end.
546     * @param header Meta info about the incoming packet (obtanied by a previous
547     * call to the recvHedaer() method).
548     * @param Pointer to packet received.
549     */
550    virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0;
551    /**
552     * Init hook for the underlaying transport
553     */
554    virtual void initTransport() = 0;
555    /**
556     * spawn the receiver thread.
557     * @param recv_done The receive done event associated with the simulated
558     * Ethernet link.
559     * @param link_delay The link delay for the simulated Ethernet link.
560     */
561    void spawnRecvThread(const Event *recv_done, Tick link_delay);
562    /**
563     * The function executed by a receiver thread.
564     */
565    void recvThreadFunc(Event *recv_done, Tick link_delay);
566
567  public:
568
569    /**
570     * ctor
571     * @param dist_rank Rank of this gem5 process within the dist run
572     * @param sync_start Start tick for dist synchronisation
573     * @param sync_repeat Frequency for dist synchronisation
574     * @param em The event manager associated with the simulated Ethernet link
575     */
576    DistIface(unsigned dist_rank,
577              unsigned dist_size,
578              Tick sync_start,
579              Tick sync_repeat,
580              EventManager *em,
581              bool use_pseudo_op,
582              bool is_switch,
583              int num_nodes);
584
585    virtual ~DistIface();
586    /**
587     * Send out an Ethernet packet.
588     * @param pkt The Ethernet packet to send.
589     * @param send_delay The delay in ticks for the send completion event.
590     */
591    void packetOut(EthPacketPtr pkt, Tick send_delay);
592    /**
593     * Fetch the packet scheduled to be received next by the simulated
594     * network link.
595     *
596     * @note This method is called within the process() method of the link
597     * receive done event. It also schedules the next receive event if the
598     * receive queue is not empty.
599     */
600    EthPacketPtr packetIn() { return recvScheduler.popPacket(); }
601
602    DrainState drain() override;
603    void drainResume() override;
604    void init(const Event *e, Tick link_delay);
605    void startup();
606
607    void serialize(CheckpointOut &cp) const override;
608    void unserialize(CheckpointIn &cp) override;
609    /**
610     * Initiate the exit from the simulation.
611     * @param delay Delay param from the m5 exit command. If Delay is zero
612     * then a collaborative exit is requested (i.e. all nodes have to call
613     * this method before the distributed simulation can exit). If Delay is
614     * not zero then exit is requested asap (and it will happen at the next
615     * sync tick).
616     * @return False if we are in distributed mode (i.e. exit can happen only
617     * at sync), True otherwise.
618     */
619    static bool readyToExit(Tick delay);
620    /**
621     * Initiate taking a checkpoint
622     * @param delay Delay param from the m5 checkpoint command. If Delay is
623     * zero then a collaborative checkpoint is requested (i.e. all nodes have
624     * to call this method before the checkpoint can be taken). If Delay is
625     * not zero then a checkpoint is requested asap (and it will happen at the
626     * next sync tick).
627     * @return False if we are in dist mode (i.e. exit can happen only at
628     * sync), True otherwise.
629     */
630    static bool readyToCkpt(Tick delay, Tick period);
631    /**
632     * Getter for the dist rank param.
633     */
634    static uint64_t rankParam();
635    /**
636     * Getter for the dist size param.
637     */
638    static uint64_t sizeParam();
639    /**
640     * Trigger the master to start/stop synchronization.
641     */
642    static void toggleSync(ThreadContext *tc);
643 };
644
645#endif
646