dist_iface.hh revision 11757:78ef8daecd81
1/*
2 * Copyright (c) 2015-2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabor Dozsa
38 */
39
40/* @file
41 * The interface class for dist gem5 simulations.
42 *
43 * dist-gem5 is an extension to gem5 to enable parallel simulation of a
44 * distributed system (e.g. simulation of a pool of machines
45 * connected by Ethernet links). A dist gem5 run consists of seperate gem5
46 * processes running in parallel. Each gem5 process executes
47 * the simulation of a component of the simulated distributed system.
48 * (An example component can be a dist-core board with an Ethernet NIC.)
49 * The DistIface class below provides services to transfer data and
50 * control messages among the gem5 processes. The main such services are
51 * as follows.
52 *
53 * 1. Send a data packet coming from a simulated Ethernet link. The packet
54 * will be transferred to (all) the target(s) gem5 processes. The send
55 * operation is always performed by the simulation thread, i.e. the gem5
56 * thread that is processing the event queue associated with the simulated
57 * Ethernet link.
58 *
59 * 2. Spawn a receiver thread to process messages coming in from the
60 * from other gem5 processes. Each simulated Ethernet link has its own
61 * associated receiver thread. The receiver thread saves the incoming packet
62 * and schedule an appropriate receive event in the event queue.
63 *
64 * 3. Schedule a global barrier event periodically to keep the gem5
65 * processes in sync.
66 * Periodic barrier event to keep peer gem5 processes in sync. The basic idea
67 * is that no gem5 process can go ahead further than the simulated link
68 * transmission delay to ensure that a corresponding receive event can always
69 * be scheduled for any message coming in from a peer gem5 process.
70 *
71 *
72 *
73 * This interface is an abstract class. It can work with various low level
74 * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP
75 * stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc].
76 */
77#ifndef __DEV_DIST_IFACE_HH__
78#define __DEV_DIST_IFACE_HH__
79
80#include <array>
81#include <mutex>
82#include <queue>
83#include <thread>
84#include <utility>
85
86#include "dev/net/dist_packet.hh"
87#include "dev/net/etherpkt.hh"
88#include "sim/core.hh"
89#include "sim/drain.hh"
90#include "sim/global_event.hh"
91#include "sim/serialize.hh"
92
93class EventManager;
94class System;
95class ThreadContext;
96
97/**
98 * The interface class to talk to peer gem5 processes.
99 */
100class DistIface : public Drainable, public Serializable
101{
102  public:
103    typedef DistHeaderPkt::Header Header;
104
105  protected:
106    typedef DistHeaderPkt::MsgType MsgType;
107    typedef DistHeaderPkt::ReqType ReqType;
108
109  private:
110    class SyncEvent;
111    /** @class Sync
112     * This class implements global sync operations among gem5 peer processes.
113     *
114     * @note This class is used as a singleton object (shared by all DistIface
115     * objects).
116     */
117    class Sync : public Serializable
118    {
119      protected:
120        /**
121         * The lock to protect access to the Sync object.
122         */
123        std::mutex lock;
124        /**
125         * Condition variable for the simulation thread to wait on
126         * until all receiver threads completes the current global
127         * synchronisation.
128         */
129        std::condition_variable cv;
130        /**
131         * Number of receiver threads that not yet completed the current global
132         * synchronisation.
133         */
134        unsigned waitNum;
135        /**
136         * Flag is set if exit is permitted upon sync completion
137         */
138        bool doExit;
139        /**
140         * Flag is set if taking a ckpt is permitted upon sync completion
141         */
142        bool doCkpt;
143        /**
144         * Flag is set if sync is to stop upon sync completion
145         */
146        bool doStopSync;
147        /**
148         * The repeat value for the next periodic sync
149         */
150        Tick nextRepeat;
151        /**
152         * Tick for the next periodic sync (if the event is not scheduled yet)
153         */
154        Tick nextAt;
155        /**
156         *  Flag is set if the sync is aborted (e.g. due to connection lost)
157         */
158        bool isAbort;
159
160        friend class SyncEvent;
161
162      public:
163        /**
164         * Initialize periodic sync params.
165         *
166         * @param start Start tick for dist synchronisation
167         * @param repeat Frequency of dist synchronisation
168         *
169         */
170        void init(Tick start, Tick repeat);
171        /**
172         *  Core method to perform a full dist sync.
173         *
174         * @return true if the sync completes, false if it gets aborted
175         */
176        virtual bool run(bool same_tick) = 0;
177        /**
178         * Callback when the receiver thread gets a sync ack message.
179         *
180         * @return false if the receiver thread needs to stop (e.g.
181         * simulation is to exit)
182         */
183        virtual bool progress(Tick send_tick,
184                              Tick next_repeat,
185                              ReqType do_ckpt,
186                              ReqType do_exit,
187                              ReqType do_stop_sync) = 0;
188        /**
189         * Abort processing an on-going sync event (in case of an error, e.g.
190         * lost connection to a peer gem5)
191         */
192        void abort();
193
194        virtual void requestCkpt(ReqType req) = 0;
195        virtual void requestExit(ReqType req) = 0;
196        virtual void requestStopSync(ReqType req) = 0;
197
198        void drainComplete();
199
200        virtual void serialize(CheckpointOut &cp) const override = 0;
201        virtual void unserialize(CheckpointIn &cp) override = 0;
202    };
203
204    class SyncNode: public Sync
205    {
206      private:
207        /**
208         * Exit requested
209         */
210        ReqType needExit;
211        /**
212         * Ckpt requested
213         */
214        ReqType needCkpt;
215        /**
216         * Sync stop requested
217         */
218        ReqType needStopSync;
219
220      public:
221
222        SyncNode();
223        ~SyncNode() {}
224        bool run(bool same_tick) override;
225        bool progress(Tick max_req_tick,
226                      Tick next_repeat,
227                      ReqType do_ckpt,
228                      ReqType do_exit,
229                      ReqType do_stop_sync) override;
230
231        void requestCkpt(ReqType req) override;
232        void requestExit(ReqType req) override;
233        void requestStopSync(ReqType req) override;
234
235        void serialize(CheckpointOut &cp) const override;
236        void unserialize(CheckpointIn &cp) override;
237    };
238
239    class SyncSwitch: public Sync
240    {
241      private:
242        /**
243         * Counter for recording exit requests
244         */
245        unsigned numExitReq;
246        /**
247         * Counter for recording ckpt requests
248         */
249        unsigned numCkptReq;
250        /**
251         * Counter for recording stop sync requests
252         */
253        unsigned numStopSyncReq;
254        /**
255         *  Number of connected simulated nodes
256         */
257        unsigned numNodes;
258
259      public:
260        SyncSwitch(int num_nodes);
261        ~SyncSwitch() {}
262
263        bool run(bool same_tick) override;
264        bool progress(Tick max_req_tick,
265                      Tick next_repeat,
266                      ReqType do_ckpt,
267                      ReqType do_exit,
268                      ReqType do_stop_sync) override;
269
270        void requestCkpt(ReqType) override {
271            panic("Switch requested checkpoint");
272        }
273        void requestExit(ReqType) override {
274            panic("Switch requested exit");
275        }
276        void requestStopSync(ReqType) override {
277            panic("Switch requested stop sync");
278        }
279
280        void serialize(CheckpointOut &cp) const override;
281        void unserialize(CheckpointIn &cp) override;
282    };
283
284    /**
285     * The global event to schedule periodic dist sync. It is used as a
286     * singleton object.
287     *
288     * The periodic synchronisation works as follows.
289     * 1. A SyncEvent is scheduled as a global event when startup() is
290     * called.
291     * 2. The process() method of the SyncEvent initiates a new barrier
292     * for each simulated Ethernet link.
293     * 3. Simulation thread(s) then waits until all receiver threads
294     * complete the ongoing barrier. The global sync event is done.
295     */
296    class SyncEvent : public GlobalSyncEvent
297    {
298      private:
299        /**
300         * Flag to set when the system is draining
301         */
302        bool _draining;
303      public:
304        /**
305         * Only the firstly instantiated DistIface object will
306         * call this constructor.
307         */
308        SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {}
309
310        ~SyncEvent() {}
311        /**
312         * Schedule the first periodic sync event.
313         */
314        void start();
315        /**
316         * This is a global event so process() will only be called by
317         * exactly one simulation thread. (See further comments in the .cc
318         * file.)
319         */
320        void process() override;
321
322        bool draining() const { return _draining; }
323        void draining(bool fl) { _draining = fl; }
324    };
325    /**
326     * Class to encapsulate information about data packets received.
327
328     * @note The main purpose of the class to take care of scheduling receive
329     * done events for the simulated network link and store incoming packets
330     * until they can be received by the simulated network link.
331     */
332    class RecvScheduler : public Serializable
333    {
334      private:
335        /**
336         * Received packet descriptor. This information is used by the receive
337         * thread to schedule receive events and by the simulation thread to
338         * process those events.
339         */
340        struct Desc : public Serializable
341        {
342            EthPacketPtr packet;
343            Tick sendTick;
344            Tick sendDelay;
345
346            Desc() : sendTick(0), sendDelay(0) {}
347            Desc(EthPacketPtr p, Tick s, Tick d) :
348                packet(p), sendTick(s), sendDelay(d) {}
349            Desc(const Desc &d) :
350                packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {}
351
352            void serialize(CheckpointOut &cp) const override;
353            void unserialize(CheckpointIn &cp) override;
354        };
355        /**
356         * The queue to store the receive descriptors.
357         */
358        std::queue<Desc> descQueue;
359        /**
360         * The tick when the most recent receive event was processed.
361         *
362         * @note This information is necessary to simulate possible receiver
363         * link contention when calculating the receive tick for the next
364         * incoming data packet (see the calcReceiveTick() method)
365         */
366        Tick prevRecvTick;
367        /**
368         * The receive done event for the simulated Ethernet link.
369         *
370         * @note This object is constructed by the simulated network link. We
371         * schedule this object for each incoming data packet.
372         */
373        Event *recvDone;
374        /**
375         * The link delay in ticks for the simulated Ethernet link.
376         *
377         * @note This value is used for calculating the receive ticks for
378         * incoming data packets.
379         */
380        Tick linkDelay;
381        /**
382         * The event manager associated with the simulated Ethernet link.
383         *
384         * @note It is used to access the event queue for scheduling receive
385         * done events for the link.
386         */
387        EventManager *eventManager;
388        /**
389         * Calculate the tick to schedule the next receive done event.
390         *
391         * @param send_tick The tick the packet was sent.
392         * @param send_delay The simulated delay at the sender side.
393         * @param prev_recv_tick Tick when the last receive event was
394         * processed.
395         *
396         * @note This method tries to take into account possible receiver link
397         * contention and adjust receive tick for the incoming packets
398         * accordingly.
399         */
400        Tick calcReceiveTick(Tick send_tick,
401                             Tick send_delay,
402                             Tick prev_recv_tick);
403
404        /**
405         * Flag to set if receive ticks for pending packets need to be
406         * recalculated due to changed link latencies at a resume
407         */
408        bool ckptRestore;
409
410      public:
411        /**
412         * Scheduler for the incoming data packets.
413         *
414         * @param em The event manager associated with the simulated Ethernet
415         * link.
416         */
417        RecvScheduler(EventManager *em) :
418            prevRecvTick(0), recvDone(nullptr), linkDelay(0),
419            eventManager(em), ckptRestore(false) {}
420
421        /**
422         *  Initialize network link parameters.
423         *
424         * @note This method is called from the receiver thread (see
425         * recvThreadFunc()).
426         */
427        void init(Event *recv_done, Tick link_delay);
428        /**
429         * Fetch the next packet that is to be received by the simulated network
430         * link.
431         *
432         * @note This method is called from the process() method of the receive
433         * done event associated with the network link.
434         */
435        EthPacketPtr popPacket();
436        /**
437         * Push a newly arrived packet into the desc queue.
438         */
439        void pushPacket(EthPacketPtr new_packet,
440                        Tick send_tick,
441                        Tick send_delay);
442
443        void serialize(CheckpointOut &cp) const override;
444        void unserialize(CheckpointIn &cp) override;
445        /**
446         * Adjust receive ticks for pending packets when restoring from a
447         * checkpoint
448         *
449         * @note Link speed and delay parameters may change at resume.
450         */
451        void resumeRecvTicks();
452    };
453    /**
454     * Tick to schedule the first dist sync event.
455     * This is just as optimization : we do not need any dist sync
456     * event until the simulated NIC is brought up by the OS.
457     */
458    Tick syncStart;
459    /**
460     * Frequency of dist sync events in ticks.
461     */
462    Tick syncRepeat;
463    /**
464     * Receiver thread pointer.
465     * Each DistIface object must have exactly one receiver thread.
466     */
467    std::thread *recvThread;
468    /**
469     * Meta information about data packets received.
470     */
471    RecvScheduler recvScheduler;
472    /**
473     * Use pseudoOp to start synchronization.
474     */
475    bool syncStartOnPseudoOp;
476
477  protected:
478    /**
479     * The rank of this process among the gem5 peers.
480     */
481    unsigned rank;
482    /**
483     * The number of gem5 processes comprising this dist simulation.
484     */
485    unsigned size;
486    /**
487     * Number of DistIface objects (i.e. dist links in this gem5 process)
488     */
489    static unsigned distIfaceNum;
490    /**
491     * Unique id for the dist link
492     */
493    unsigned distIfaceId;
494
495    bool isMaster;
496
497  private:
498    /**
499     * Number of receiver threads (in this gem5 process)
500     */
501    static unsigned recvThreadsNum;
502    /**
503     * The singleton Sync object to perform dist synchronisation.
504     */
505    static Sync *sync;
506    /**
507     * The singleton SyncEvent object to schedule periodic dist sync.
508     */
509    static SyncEvent *syncEvent;
510    /**
511     * The very first DistIface object created becomes the master. We need
512     * a master to co-ordinate the global synchronisation.
513     */
514    static DistIface *master;
515    /**
516     * System pointer used to wakeup sleeping threads when stopping sync.
517     */
518    static System *sys;
519    /**
520     * Is this node a switch?
521     */
522     static bool isSwitch;
523
524  private:
525    /**
526     * Send out a data packet to the remote end.
527     * @param header Meta info about the packet (which needs to be transferred
528     * to the destination alongside the packet).
529     * @param packet Pointer to the packet to send.
530     */
531    virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0;
532    /**
533     * Send out a control command to the remote end.
534     * @param header Meta info describing the command (e.g. sync request)
535     */
536    virtual void sendCmd(const Header &header) = 0;
537    /**
538     * Receive a header (i.e. meta info describing a data packet or a control command)
539     * from the remote end.
540     * @param header The meta info structure to store the incoming header.
541     */
542    virtual bool recvHeader(Header &header) = 0;
543    /**
544     * Receive a packet from the remote end.
545     * @param header Meta info about the incoming packet (obtanied by a previous
546     * call to the recvHedaer() method).
547     * @param Pointer to packet received.
548     */
549    virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0;
550    /**
551     * Init hook for the underlaying transport
552     */
553    virtual void initTransport() = 0;
554    /**
555     * spawn the receiver thread.
556     * @param recv_done The receive done event associated with the simulated
557     * Ethernet link.
558     * @param link_delay The link delay for the simulated Ethernet link.
559     */
560    void spawnRecvThread(const Event *recv_done, Tick link_delay);
561    /**
562     * The function executed by a receiver thread.
563     */
564    void recvThreadFunc(Event *recv_done, Tick link_delay);
565
566  public:
567
568    /**
569     * ctor
570     * @param dist_rank Rank of this gem5 process within the dist run
571     * @param sync_start Start tick for dist synchronisation
572     * @param sync_repeat Frequency for dist synchronisation
573     * @param em The event manager associated with the simulated Ethernet link
574     */
575    DistIface(unsigned dist_rank,
576              unsigned dist_size,
577              Tick sync_start,
578              Tick sync_repeat,
579              EventManager *em,
580              bool use_pseudo_op,
581              bool is_switch,
582              int num_nodes);
583
584    virtual ~DistIface();
585    /**
586     * Send out an Ethernet packet.
587     * @param pkt The Ethernet packet to send.
588     * @param send_delay The delay in ticks for the send completion event.
589     */
590    void packetOut(EthPacketPtr pkt, Tick send_delay);
591    /**
592     * Fetch the packet scheduled to be received next by the simulated
593     * network link.
594     *
595     * @note This method is called within the process() method of the link
596     * receive done event. It also schedules the next receive event if the
597     * receive queue is not empty.
598     */
599    EthPacketPtr packetIn() { return recvScheduler.popPacket(); }
600
601    DrainState drain() override;
602    void drainResume() override;
603    void init(const Event *e, Tick link_delay);
604    void startup();
605
606    void serialize(CheckpointOut &cp) const override;
607    void unserialize(CheckpointIn &cp) override;
608    /**
609     * Initiate the exit from the simulation.
610     * @param delay Delay param from the m5 exit command. If Delay is zero
611     * then a collaborative exit is requested (i.e. all nodes have to call
612     * this method before the distributed simulation can exit). If Delay is
613     * not zero then exit is requested asap (and it will happen at the next
614     * sync tick).
615     * @return False if we are in distributed mode (i.e. exit can happen only
616     * at sync), True otherwise.
617     */
618    static bool readyToExit(Tick delay);
619    /**
620     * Initiate taking a checkpoint
621     * @param delay Delay param from the m5 checkpoint command. If Delay is
622     * zero then a collaborative checkpoint is requested (i.e. all nodes have
623     * to call this method before the checkpoint can be taken). If Delay is
624     * not zero then a checkpoint is requested asap (and it will happen at the
625     * next sync tick).
626     * @return False if we are in dist mode (i.e. exit can happen only at
627     * sync), True otherwise.
628     */
629    static bool readyToCkpt(Tick delay, Tick period);
630    /**
631     * Getter for the dist rank param.
632     */
633    static uint64_t rankParam();
634    /**
635     * Getter for the dist size param.
636     */
637    static uint64_t sizeParam();
638    /**
639     * Trigger the master to start/stop synchronization.
640     */
641    static void toggleSync(ThreadContext *tc);
642 };
643
644#endif
645