dist_iface.hh (11800:54436a1784dc) dist_iface.hh (12334:e0ab29a34764)
1/*
2 * Copyright (c) 2015-2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabor Dozsa
38 */
39
40/* @file
41 * The interface class for dist gem5 simulations.
42 *
43 * dist-gem5 is an extension to gem5 to enable parallel simulation of a
44 * distributed system (e.g. simulation of a pool of machines
45 * connected by Ethernet links). A dist gem5 run consists of seperate gem5
46 * processes running in parallel. Each gem5 process executes
47 * the simulation of a component of the simulated distributed system.
48 * (An example component can be a dist-core board with an Ethernet NIC.)
49 * The DistIface class below provides services to transfer data and
50 * control messages among the gem5 processes. The main such services are
51 * as follows.
52 *
53 * 1. Send a data packet coming from a simulated Ethernet link. The packet
54 * will be transferred to (all) the target(s) gem5 processes. The send
55 * operation is always performed by the simulation thread, i.e. the gem5
56 * thread that is processing the event queue associated with the simulated
57 * Ethernet link.
58 *
59 * 2. Spawn a receiver thread to process messages coming in from the
60 * from other gem5 processes. Each simulated Ethernet link has its own
61 * associated receiver thread. The receiver thread saves the incoming packet
62 * and schedule an appropriate receive event in the event queue.
63 *
64 * 3. Schedule a global barrier event periodically to keep the gem5
65 * processes in sync.
66 * Periodic barrier event to keep peer gem5 processes in sync. The basic idea
67 * is that no gem5 process can go ahead further than the simulated link
68 * transmission delay to ensure that a corresponding receive event can always
69 * be scheduled for any message coming in from a peer gem5 process.
70 *
71 *
72 *
73 * This interface is an abstract class. It can work with various low level
74 * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP
75 * stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc].
76 */
77#ifndef __DEV_DIST_IFACE_HH__
78#define __DEV_DIST_IFACE_HH__
79
80#include <array>
81#include <mutex>
82#include <queue>
83#include <thread>
84#include <utility>
85
1/*
2 * Copyright (c) 2015-2016 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Gabor Dozsa
38 */
39
40/* @file
41 * The interface class for dist gem5 simulations.
42 *
43 * dist-gem5 is an extension to gem5 to enable parallel simulation of a
44 * distributed system (e.g. simulation of a pool of machines
45 * connected by Ethernet links). A dist gem5 run consists of seperate gem5
46 * processes running in parallel. Each gem5 process executes
47 * the simulation of a component of the simulated distributed system.
48 * (An example component can be a dist-core board with an Ethernet NIC.)
49 * The DistIface class below provides services to transfer data and
50 * control messages among the gem5 processes. The main such services are
51 * as follows.
52 *
53 * 1. Send a data packet coming from a simulated Ethernet link. The packet
54 * will be transferred to (all) the target(s) gem5 processes. The send
55 * operation is always performed by the simulation thread, i.e. the gem5
56 * thread that is processing the event queue associated with the simulated
57 * Ethernet link.
58 *
59 * 2. Spawn a receiver thread to process messages coming in from the
60 * from other gem5 processes. Each simulated Ethernet link has its own
61 * associated receiver thread. The receiver thread saves the incoming packet
62 * and schedule an appropriate receive event in the event queue.
63 *
64 * 3. Schedule a global barrier event periodically to keep the gem5
65 * processes in sync.
66 * Periodic barrier event to keep peer gem5 processes in sync. The basic idea
67 * is that no gem5 process can go ahead further than the simulated link
68 * transmission delay to ensure that a corresponding receive event can always
69 * be scheduled for any message coming in from a peer gem5 process.
70 *
71 *
72 *
73 * This interface is an abstract class. It can work with various low level
74 * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP
75 * stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc].
76 */
77#ifndef __DEV_DIST_IFACE_HH__
78#define __DEV_DIST_IFACE_HH__
79
80#include <array>
81#include <mutex>
82#include <queue>
83#include <thread>
84#include <utility>
85
86#include "base/misc.hh"
86#include "base/logging.hh"
87#include "dev/net/dist_packet.hh"
88#include "dev/net/etherpkt.hh"
89#include "sim/core.hh"
90#include "sim/drain.hh"
91#include "sim/global_event.hh"
92#include "sim/serialize.hh"
93
94class EventManager;
95class System;
96class ThreadContext;
97
98/**
99 * The interface class to talk to peer gem5 processes.
100 */
101class DistIface : public Drainable, public Serializable
102{
103 public:
104 typedef DistHeaderPkt::Header Header;
105
106 protected:
107 typedef DistHeaderPkt::MsgType MsgType;
108 typedef DistHeaderPkt::ReqType ReqType;
109
110 private:
111 class SyncEvent;
112 /** @class Sync
113 * This class implements global sync operations among gem5 peer processes.
114 *
115 * @note This class is used as a singleton object (shared by all DistIface
116 * objects).
117 */
118 class Sync : public Serializable
119 {
120 protected:
121 /**
122 * The lock to protect access to the Sync object.
123 */
124 std::mutex lock;
125 /**
126 * Condition variable for the simulation thread to wait on
127 * until all receiver threads completes the current global
128 * synchronisation.
129 */
130 std::condition_variable cv;
131 /**
132 * Number of receiver threads that not yet completed the current global
133 * synchronisation.
134 */
135 unsigned waitNum;
136 /**
137 * Flag is set if exit is permitted upon sync completion
138 */
139 bool doExit;
140 /**
141 * Flag is set if taking a ckpt is permitted upon sync completion
142 */
143 bool doCkpt;
144 /**
145 * Flag is set if sync is to stop upon sync completion
146 */
147 bool doStopSync;
148 /**
149 * The repeat value for the next periodic sync
150 */
151 Tick nextRepeat;
152 /**
153 * Tick for the next periodic sync (if the event is not scheduled yet)
154 */
155 Tick nextAt;
156 /**
157 * Flag is set if the sync is aborted (e.g. due to connection lost)
158 */
159 bool isAbort;
160
161 friend class SyncEvent;
162
163 public:
164 /**
165 * Initialize periodic sync params.
166 *
167 * @param start Start tick for dist synchronisation
168 * @param repeat Frequency of dist synchronisation
169 *
170 */
171 void init(Tick start, Tick repeat);
172 /**
173 * Core method to perform a full dist sync.
174 *
175 * @return true if the sync completes, false if it gets aborted
176 */
177 virtual bool run(bool same_tick) = 0;
178 /**
179 * Callback when the receiver thread gets a sync ack message.
180 *
181 * @return false if the receiver thread needs to stop (e.g.
182 * simulation is to exit)
183 */
184 virtual bool progress(Tick send_tick,
185 Tick next_repeat,
186 ReqType do_ckpt,
187 ReqType do_exit,
188 ReqType do_stop_sync) = 0;
189 /**
190 * Abort processing an on-going sync event (in case of an error, e.g.
191 * lost connection to a peer gem5)
192 */
193 void abort();
194
195 virtual void requestCkpt(ReqType req) = 0;
196 virtual void requestExit(ReqType req) = 0;
197 virtual void requestStopSync(ReqType req) = 0;
198
199 void drainComplete();
200
201 virtual void serialize(CheckpointOut &cp) const override = 0;
202 virtual void unserialize(CheckpointIn &cp) override = 0;
203 };
204
205 class SyncNode: public Sync
206 {
207 private:
208 /**
209 * Exit requested
210 */
211 ReqType needExit;
212 /**
213 * Ckpt requested
214 */
215 ReqType needCkpt;
216 /**
217 * Sync stop requested
218 */
219 ReqType needStopSync;
220
221 public:
222
223 SyncNode();
224 ~SyncNode() {}
225 bool run(bool same_tick) override;
226 bool progress(Tick max_req_tick,
227 Tick next_repeat,
228 ReqType do_ckpt,
229 ReqType do_exit,
230 ReqType do_stop_sync) override;
231
232 void requestCkpt(ReqType req) override;
233 void requestExit(ReqType req) override;
234 void requestStopSync(ReqType req) override;
235
236 void serialize(CheckpointOut &cp) const override;
237 void unserialize(CheckpointIn &cp) override;
238 };
239
240 class SyncSwitch: public Sync
241 {
242 private:
243 /**
244 * Counter for recording exit requests
245 */
246 unsigned numExitReq;
247 /**
248 * Counter for recording ckpt requests
249 */
250 unsigned numCkptReq;
251 /**
252 * Counter for recording stop sync requests
253 */
254 unsigned numStopSyncReq;
255 /**
256 * Number of connected simulated nodes
257 */
258 unsigned numNodes;
259
260 public:
261 SyncSwitch(int num_nodes);
262 ~SyncSwitch() {}
263
264 bool run(bool same_tick) override;
265 bool progress(Tick max_req_tick,
266 Tick next_repeat,
267 ReqType do_ckpt,
268 ReqType do_exit,
269 ReqType do_stop_sync) override;
270
271 void requestCkpt(ReqType) override {
272 panic("Switch requested checkpoint");
273 }
274 void requestExit(ReqType) override {
275 panic("Switch requested exit");
276 }
277 void requestStopSync(ReqType) override {
278 panic("Switch requested stop sync");
279 }
280
281 void serialize(CheckpointOut &cp) const override;
282 void unserialize(CheckpointIn &cp) override;
283 };
284
285 /**
286 * The global event to schedule periodic dist sync. It is used as a
287 * singleton object.
288 *
289 * The periodic synchronisation works as follows.
290 * 1. A SyncEvent is scheduled as a global event when startup() is
291 * called.
292 * 2. The process() method of the SyncEvent initiates a new barrier
293 * for each simulated Ethernet link.
294 * 3. Simulation thread(s) then waits until all receiver threads
295 * complete the ongoing barrier. The global sync event is done.
296 */
297 class SyncEvent : public GlobalSyncEvent
298 {
299 private:
300 /**
301 * Flag to set when the system is draining
302 */
303 bool _draining;
304 public:
305 /**
306 * Only the firstly instantiated DistIface object will
307 * call this constructor.
308 */
309 SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {}
310
311 ~SyncEvent() {}
312 /**
313 * Schedule the first periodic sync event.
314 */
315 void start();
316 /**
317 * This is a global event so process() will only be called by
318 * exactly one simulation thread. (See further comments in the .cc
319 * file.)
320 */
321 void process() override;
322
323 bool draining() const { return _draining; }
324 void draining(bool fl) { _draining = fl; }
325 };
326 /**
327 * Class to encapsulate information about data packets received.
328
329 * @note The main purpose of the class to take care of scheduling receive
330 * done events for the simulated network link and store incoming packets
331 * until they can be received by the simulated network link.
332 */
333 class RecvScheduler : public Serializable
334 {
335 private:
336 /**
337 * Received packet descriptor. This information is used by the receive
338 * thread to schedule receive events and by the simulation thread to
339 * process those events.
340 */
341 struct Desc : public Serializable
342 {
343 EthPacketPtr packet;
344 Tick sendTick;
345 Tick sendDelay;
346
347 Desc() : sendTick(0), sendDelay(0) {}
348 Desc(EthPacketPtr p, Tick s, Tick d) :
349 packet(p), sendTick(s), sendDelay(d) {}
350 Desc(const Desc &d) :
351 packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {}
352
353 void serialize(CheckpointOut &cp) const override;
354 void unserialize(CheckpointIn &cp) override;
355 };
356 /**
357 * The queue to store the receive descriptors.
358 */
359 std::queue<Desc> descQueue;
360 /**
361 * The tick when the most recent receive event was processed.
362 *
363 * @note This information is necessary to simulate possible receiver
364 * link contention when calculating the receive tick for the next
365 * incoming data packet (see the calcReceiveTick() method)
366 */
367 Tick prevRecvTick;
368 /**
369 * The receive done event for the simulated Ethernet link.
370 *
371 * @note This object is constructed by the simulated network link. We
372 * schedule this object for each incoming data packet.
373 */
374 Event *recvDone;
375 /**
376 * The link delay in ticks for the simulated Ethernet link.
377 *
378 * @note This value is used for calculating the receive ticks for
379 * incoming data packets.
380 */
381 Tick linkDelay;
382 /**
383 * The event manager associated with the simulated Ethernet link.
384 *
385 * @note It is used to access the event queue for scheduling receive
386 * done events for the link.
387 */
388 EventManager *eventManager;
389 /**
390 * Calculate the tick to schedule the next receive done event.
391 *
392 * @param send_tick The tick the packet was sent.
393 * @param send_delay The simulated delay at the sender side.
394 * @param prev_recv_tick Tick when the last receive event was
395 * processed.
396 *
397 * @note This method tries to take into account possible receiver link
398 * contention and adjust receive tick for the incoming packets
399 * accordingly.
400 */
401 Tick calcReceiveTick(Tick send_tick,
402 Tick send_delay,
403 Tick prev_recv_tick);
404
405 /**
406 * Flag to set if receive ticks for pending packets need to be
407 * recalculated due to changed link latencies at a resume
408 */
409 bool ckptRestore;
410
411 public:
412 /**
413 * Scheduler for the incoming data packets.
414 *
415 * @param em The event manager associated with the simulated Ethernet
416 * link.
417 */
418 RecvScheduler(EventManager *em) :
419 prevRecvTick(0), recvDone(nullptr), linkDelay(0),
420 eventManager(em), ckptRestore(false) {}
421
422 /**
423 * Initialize network link parameters.
424 *
425 * @note This method is called from the receiver thread (see
426 * recvThreadFunc()).
427 */
428 void init(Event *recv_done, Tick link_delay);
429 /**
430 * Fetch the next packet that is to be received by the simulated network
431 * link.
432 *
433 * @note This method is called from the process() method of the receive
434 * done event associated with the network link.
435 */
436 EthPacketPtr popPacket();
437 /**
438 * Push a newly arrived packet into the desc queue.
439 */
440 void pushPacket(EthPacketPtr new_packet,
441 Tick send_tick,
442 Tick send_delay);
443
444 void serialize(CheckpointOut &cp) const override;
445 void unserialize(CheckpointIn &cp) override;
446 /**
447 * Adjust receive ticks for pending packets when restoring from a
448 * checkpoint
449 *
450 * @note Link speed and delay parameters may change at resume.
451 */
452 void resumeRecvTicks();
453 };
454 /**
455 * Tick to schedule the first dist sync event.
456 * This is just as optimization : we do not need any dist sync
457 * event until the simulated NIC is brought up by the OS.
458 */
459 Tick syncStart;
460 /**
461 * Frequency of dist sync events in ticks.
462 */
463 Tick syncRepeat;
464 /**
465 * Receiver thread pointer.
466 * Each DistIface object must have exactly one receiver thread.
467 */
468 std::thread *recvThread;
469 /**
470 * Meta information about data packets received.
471 */
472 RecvScheduler recvScheduler;
473 /**
474 * Use pseudoOp to start synchronization.
475 */
476 bool syncStartOnPseudoOp;
477
478 protected:
479 /**
480 * The rank of this process among the gem5 peers.
481 */
482 unsigned rank;
483 /**
484 * The number of gem5 processes comprising this dist simulation.
485 */
486 unsigned size;
487 /**
488 * Number of DistIface objects (i.e. dist links in this gem5 process)
489 */
490 static unsigned distIfaceNum;
491 /**
492 * Unique id for the dist link
493 */
494 unsigned distIfaceId;
495
496 bool isMaster;
497
498 private:
499 /**
500 * Number of receiver threads (in this gem5 process)
501 */
502 static unsigned recvThreadsNum;
503 /**
504 * The singleton Sync object to perform dist synchronisation.
505 */
506 static Sync *sync;
507 /**
508 * The singleton SyncEvent object to schedule periodic dist sync.
509 */
510 static SyncEvent *syncEvent;
511 /**
512 * The very first DistIface object created becomes the master. We need
513 * a master to co-ordinate the global synchronisation.
514 */
515 static DistIface *master;
516 /**
517 * System pointer used to wakeup sleeping threads when stopping sync.
518 */
519 static System *sys;
520 /**
521 * Is this node a switch?
522 */
523 static bool isSwitch;
524
525 private:
526 /**
527 * Send out a data packet to the remote end.
528 * @param header Meta info about the packet (which needs to be transferred
529 * to the destination alongside the packet).
530 * @param packet Pointer to the packet to send.
531 */
532 virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0;
533 /**
534 * Send out a control command to the remote end.
535 * @param header Meta info describing the command (e.g. sync request)
536 */
537 virtual void sendCmd(const Header &header) = 0;
538 /**
539 * Receive a header (i.e. meta info describing a data packet or a control command)
540 * from the remote end.
541 * @param header The meta info structure to store the incoming header.
542 */
543 virtual bool recvHeader(Header &header) = 0;
544 /**
545 * Receive a packet from the remote end.
546 * @param header Meta info about the incoming packet (obtanied by a previous
547 * call to the recvHedaer() method).
548 * @param Pointer to packet received.
549 */
550 virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0;
551 /**
552 * Init hook for the underlaying transport
553 */
554 virtual void initTransport() = 0;
555 /**
556 * spawn the receiver thread.
557 * @param recv_done The receive done event associated with the simulated
558 * Ethernet link.
559 * @param link_delay The link delay for the simulated Ethernet link.
560 */
561 void spawnRecvThread(const Event *recv_done, Tick link_delay);
562 /**
563 * The function executed by a receiver thread.
564 */
565 void recvThreadFunc(Event *recv_done, Tick link_delay);
566
567 public:
568
569 /**
570 * ctor
571 * @param dist_rank Rank of this gem5 process within the dist run
572 * @param sync_start Start tick for dist synchronisation
573 * @param sync_repeat Frequency for dist synchronisation
574 * @param em The event manager associated with the simulated Ethernet link
575 */
576 DistIface(unsigned dist_rank,
577 unsigned dist_size,
578 Tick sync_start,
579 Tick sync_repeat,
580 EventManager *em,
581 bool use_pseudo_op,
582 bool is_switch,
583 int num_nodes);
584
585 virtual ~DistIface();
586 /**
587 * Send out an Ethernet packet.
588 * @param pkt The Ethernet packet to send.
589 * @param send_delay The delay in ticks for the send completion event.
590 */
591 void packetOut(EthPacketPtr pkt, Tick send_delay);
592 /**
593 * Fetch the packet scheduled to be received next by the simulated
594 * network link.
595 *
596 * @note This method is called within the process() method of the link
597 * receive done event. It also schedules the next receive event if the
598 * receive queue is not empty.
599 */
600 EthPacketPtr packetIn() { return recvScheduler.popPacket(); }
601
602 DrainState drain() override;
603 void drainResume() override;
604 void init(const Event *e, Tick link_delay);
605 void startup();
606
607 void serialize(CheckpointOut &cp) const override;
608 void unserialize(CheckpointIn &cp) override;
609 /**
610 * Initiate the exit from the simulation.
611 * @param delay Delay param from the m5 exit command. If Delay is zero
612 * then a collaborative exit is requested (i.e. all nodes have to call
613 * this method before the distributed simulation can exit). If Delay is
614 * not zero then exit is requested asap (and it will happen at the next
615 * sync tick).
616 * @return False if we are in distributed mode (i.e. exit can happen only
617 * at sync), True otherwise.
618 */
619 static bool readyToExit(Tick delay);
620 /**
621 * Initiate taking a checkpoint
622 * @param delay Delay param from the m5 checkpoint command. If Delay is
623 * zero then a collaborative checkpoint is requested (i.e. all nodes have
624 * to call this method before the checkpoint can be taken). If Delay is
625 * not zero then a checkpoint is requested asap (and it will happen at the
626 * next sync tick).
627 * @return False if we are in dist mode (i.e. exit can happen only at
628 * sync), True otherwise.
629 */
630 static bool readyToCkpt(Tick delay, Tick period);
631 /**
632 * Getter for the dist rank param.
633 */
634 static uint64_t rankParam();
635 /**
636 * Getter for the dist size param.
637 */
638 static uint64_t sizeParam();
639 /**
640 * Trigger the master to start/stop synchronization.
641 */
642 static void toggleSync(ThreadContext *tc);
643 };
644
645#endif
87#include "dev/net/dist_packet.hh"
88#include "dev/net/etherpkt.hh"
89#include "sim/core.hh"
90#include "sim/drain.hh"
91#include "sim/global_event.hh"
92#include "sim/serialize.hh"
93
94class EventManager;
95class System;
96class ThreadContext;
97
98/**
99 * The interface class to talk to peer gem5 processes.
100 */
101class DistIface : public Drainable, public Serializable
102{
103 public:
104 typedef DistHeaderPkt::Header Header;
105
106 protected:
107 typedef DistHeaderPkt::MsgType MsgType;
108 typedef DistHeaderPkt::ReqType ReqType;
109
110 private:
111 class SyncEvent;
112 /** @class Sync
113 * This class implements global sync operations among gem5 peer processes.
114 *
115 * @note This class is used as a singleton object (shared by all DistIface
116 * objects).
117 */
118 class Sync : public Serializable
119 {
120 protected:
121 /**
122 * The lock to protect access to the Sync object.
123 */
124 std::mutex lock;
125 /**
126 * Condition variable for the simulation thread to wait on
127 * until all receiver threads completes the current global
128 * synchronisation.
129 */
130 std::condition_variable cv;
131 /**
132 * Number of receiver threads that not yet completed the current global
133 * synchronisation.
134 */
135 unsigned waitNum;
136 /**
137 * Flag is set if exit is permitted upon sync completion
138 */
139 bool doExit;
140 /**
141 * Flag is set if taking a ckpt is permitted upon sync completion
142 */
143 bool doCkpt;
144 /**
145 * Flag is set if sync is to stop upon sync completion
146 */
147 bool doStopSync;
148 /**
149 * The repeat value for the next periodic sync
150 */
151 Tick nextRepeat;
152 /**
153 * Tick for the next periodic sync (if the event is not scheduled yet)
154 */
155 Tick nextAt;
156 /**
157 * Flag is set if the sync is aborted (e.g. due to connection lost)
158 */
159 bool isAbort;
160
161 friend class SyncEvent;
162
163 public:
164 /**
165 * Initialize periodic sync params.
166 *
167 * @param start Start tick for dist synchronisation
168 * @param repeat Frequency of dist synchronisation
169 *
170 */
171 void init(Tick start, Tick repeat);
172 /**
173 * Core method to perform a full dist sync.
174 *
175 * @return true if the sync completes, false if it gets aborted
176 */
177 virtual bool run(bool same_tick) = 0;
178 /**
179 * Callback when the receiver thread gets a sync ack message.
180 *
181 * @return false if the receiver thread needs to stop (e.g.
182 * simulation is to exit)
183 */
184 virtual bool progress(Tick send_tick,
185 Tick next_repeat,
186 ReqType do_ckpt,
187 ReqType do_exit,
188 ReqType do_stop_sync) = 0;
189 /**
190 * Abort processing an on-going sync event (in case of an error, e.g.
191 * lost connection to a peer gem5)
192 */
193 void abort();
194
195 virtual void requestCkpt(ReqType req) = 0;
196 virtual void requestExit(ReqType req) = 0;
197 virtual void requestStopSync(ReqType req) = 0;
198
199 void drainComplete();
200
201 virtual void serialize(CheckpointOut &cp) const override = 0;
202 virtual void unserialize(CheckpointIn &cp) override = 0;
203 };
204
205 class SyncNode: public Sync
206 {
207 private:
208 /**
209 * Exit requested
210 */
211 ReqType needExit;
212 /**
213 * Ckpt requested
214 */
215 ReqType needCkpt;
216 /**
217 * Sync stop requested
218 */
219 ReqType needStopSync;
220
221 public:
222
223 SyncNode();
224 ~SyncNode() {}
225 bool run(bool same_tick) override;
226 bool progress(Tick max_req_tick,
227 Tick next_repeat,
228 ReqType do_ckpt,
229 ReqType do_exit,
230 ReqType do_stop_sync) override;
231
232 void requestCkpt(ReqType req) override;
233 void requestExit(ReqType req) override;
234 void requestStopSync(ReqType req) override;
235
236 void serialize(CheckpointOut &cp) const override;
237 void unserialize(CheckpointIn &cp) override;
238 };
239
240 class SyncSwitch: public Sync
241 {
242 private:
243 /**
244 * Counter for recording exit requests
245 */
246 unsigned numExitReq;
247 /**
248 * Counter for recording ckpt requests
249 */
250 unsigned numCkptReq;
251 /**
252 * Counter for recording stop sync requests
253 */
254 unsigned numStopSyncReq;
255 /**
256 * Number of connected simulated nodes
257 */
258 unsigned numNodes;
259
260 public:
261 SyncSwitch(int num_nodes);
262 ~SyncSwitch() {}
263
264 bool run(bool same_tick) override;
265 bool progress(Tick max_req_tick,
266 Tick next_repeat,
267 ReqType do_ckpt,
268 ReqType do_exit,
269 ReqType do_stop_sync) override;
270
271 void requestCkpt(ReqType) override {
272 panic("Switch requested checkpoint");
273 }
274 void requestExit(ReqType) override {
275 panic("Switch requested exit");
276 }
277 void requestStopSync(ReqType) override {
278 panic("Switch requested stop sync");
279 }
280
281 void serialize(CheckpointOut &cp) const override;
282 void unserialize(CheckpointIn &cp) override;
283 };
284
285 /**
286 * The global event to schedule periodic dist sync. It is used as a
287 * singleton object.
288 *
289 * The periodic synchronisation works as follows.
290 * 1. A SyncEvent is scheduled as a global event when startup() is
291 * called.
292 * 2. The process() method of the SyncEvent initiates a new barrier
293 * for each simulated Ethernet link.
294 * 3. Simulation thread(s) then waits until all receiver threads
295 * complete the ongoing barrier. The global sync event is done.
296 */
297 class SyncEvent : public GlobalSyncEvent
298 {
299 private:
300 /**
301 * Flag to set when the system is draining
302 */
303 bool _draining;
304 public:
305 /**
306 * Only the firstly instantiated DistIface object will
307 * call this constructor.
308 */
309 SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {}
310
311 ~SyncEvent() {}
312 /**
313 * Schedule the first periodic sync event.
314 */
315 void start();
316 /**
317 * This is a global event so process() will only be called by
318 * exactly one simulation thread. (See further comments in the .cc
319 * file.)
320 */
321 void process() override;
322
323 bool draining() const { return _draining; }
324 void draining(bool fl) { _draining = fl; }
325 };
326 /**
327 * Class to encapsulate information about data packets received.
328
329 * @note The main purpose of the class to take care of scheduling receive
330 * done events for the simulated network link and store incoming packets
331 * until they can be received by the simulated network link.
332 */
333 class RecvScheduler : public Serializable
334 {
335 private:
336 /**
337 * Received packet descriptor. This information is used by the receive
338 * thread to schedule receive events and by the simulation thread to
339 * process those events.
340 */
341 struct Desc : public Serializable
342 {
343 EthPacketPtr packet;
344 Tick sendTick;
345 Tick sendDelay;
346
347 Desc() : sendTick(0), sendDelay(0) {}
348 Desc(EthPacketPtr p, Tick s, Tick d) :
349 packet(p), sendTick(s), sendDelay(d) {}
350 Desc(const Desc &d) :
351 packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {}
352
353 void serialize(CheckpointOut &cp) const override;
354 void unserialize(CheckpointIn &cp) override;
355 };
356 /**
357 * The queue to store the receive descriptors.
358 */
359 std::queue<Desc> descQueue;
360 /**
361 * The tick when the most recent receive event was processed.
362 *
363 * @note This information is necessary to simulate possible receiver
364 * link contention when calculating the receive tick for the next
365 * incoming data packet (see the calcReceiveTick() method)
366 */
367 Tick prevRecvTick;
368 /**
369 * The receive done event for the simulated Ethernet link.
370 *
371 * @note This object is constructed by the simulated network link. We
372 * schedule this object for each incoming data packet.
373 */
374 Event *recvDone;
375 /**
376 * The link delay in ticks for the simulated Ethernet link.
377 *
378 * @note This value is used for calculating the receive ticks for
379 * incoming data packets.
380 */
381 Tick linkDelay;
382 /**
383 * The event manager associated with the simulated Ethernet link.
384 *
385 * @note It is used to access the event queue for scheduling receive
386 * done events for the link.
387 */
388 EventManager *eventManager;
389 /**
390 * Calculate the tick to schedule the next receive done event.
391 *
392 * @param send_tick The tick the packet was sent.
393 * @param send_delay The simulated delay at the sender side.
394 * @param prev_recv_tick Tick when the last receive event was
395 * processed.
396 *
397 * @note This method tries to take into account possible receiver link
398 * contention and adjust receive tick for the incoming packets
399 * accordingly.
400 */
401 Tick calcReceiveTick(Tick send_tick,
402 Tick send_delay,
403 Tick prev_recv_tick);
404
405 /**
406 * Flag to set if receive ticks for pending packets need to be
407 * recalculated due to changed link latencies at a resume
408 */
409 bool ckptRestore;
410
411 public:
412 /**
413 * Scheduler for the incoming data packets.
414 *
415 * @param em The event manager associated with the simulated Ethernet
416 * link.
417 */
418 RecvScheduler(EventManager *em) :
419 prevRecvTick(0), recvDone(nullptr), linkDelay(0),
420 eventManager(em), ckptRestore(false) {}
421
422 /**
423 * Initialize network link parameters.
424 *
425 * @note This method is called from the receiver thread (see
426 * recvThreadFunc()).
427 */
428 void init(Event *recv_done, Tick link_delay);
429 /**
430 * Fetch the next packet that is to be received by the simulated network
431 * link.
432 *
433 * @note This method is called from the process() method of the receive
434 * done event associated with the network link.
435 */
436 EthPacketPtr popPacket();
437 /**
438 * Push a newly arrived packet into the desc queue.
439 */
440 void pushPacket(EthPacketPtr new_packet,
441 Tick send_tick,
442 Tick send_delay);
443
444 void serialize(CheckpointOut &cp) const override;
445 void unserialize(CheckpointIn &cp) override;
446 /**
447 * Adjust receive ticks for pending packets when restoring from a
448 * checkpoint
449 *
450 * @note Link speed and delay parameters may change at resume.
451 */
452 void resumeRecvTicks();
453 };
454 /**
455 * Tick to schedule the first dist sync event.
456 * This is just as optimization : we do not need any dist sync
457 * event until the simulated NIC is brought up by the OS.
458 */
459 Tick syncStart;
460 /**
461 * Frequency of dist sync events in ticks.
462 */
463 Tick syncRepeat;
464 /**
465 * Receiver thread pointer.
466 * Each DistIface object must have exactly one receiver thread.
467 */
468 std::thread *recvThread;
469 /**
470 * Meta information about data packets received.
471 */
472 RecvScheduler recvScheduler;
473 /**
474 * Use pseudoOp to start synchronization.
475 */
476 bool syncStartOnPseudoOp;
477
478 protected:
479 /**
480 * The rank of this process among the gem5 peers.
481 */
482 unsigned rank;
483 /**
484 * The number of gem5 processes comprising this dist simulation.
485 */
486 unsigned size;
487 /**
488 * Number of DistIface objects (i.e. dist links in this gem5 process)
489 */
490 static unsigned distIfaceNum;
491 /**
492 * Unique id for the dist link
493 */
494 unsigned distIfaceId;
495
496 bool isMaster;
497
498 private:
499 /**
500 * Number of receiver threads (in this gem5 process)
501 */
502 static unsigned recvThreadsNum;
503 /**
504 * The singleton Sync object to perform dist synchronisation.
505 */
506 static Sync *sync;
507 /**
508 * The singleton SyncEvent object to schedule periodic dist sync.
509 */
510 static SyncEvent *syncEvent;
511 /**
512 * The very first DistIface object created becomes the master. We need
513 * a master to co-ordinate the global synchronisation.
514 */
515 static DistIface *master;
516 /**
517 * System pointer used to wakeup sleeping threads when stopping sync.
518 */
519 static System *sys;
520 /**
521 * Is this node a switch?
522 */
523 static bool isSwitch;
524
525 private:
526 /**
527 * Send out a data packet to the remote end.
528 * @param header Meta info about the packet (which needs to be transferred
529 * to the destination alongside the packet).
530 * @param packet Pointer to the packet to send.
531 */
532 virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0;
533 /**
534 * Send out a control command to the remote end.
535 * @param header Meta info describing the command (e.g. sync request)
536 */
537 virtual void sendCmd(const Header &header) = 0;
538 /**
539 * Receive a header (i.e. meta info describing a data packet or a control command)
540 * from the remote end.
541 * @param header The meta info structure to store the incoming header.
542 */
543 virtual bool recvHeader(Header &header) = 0;
544 /**
545 * Receive a packet from the remote end.
546 * @param header Meta info about the incoming packet (obtanied by a previous
547 * call to the recvHedaer() method).
548 * @param Pointer to packet received.
549 */
550 virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0;
551 /**
552 * Init hook for the underlaying transport
553 */
554 virtual void initTransport() = 0;
555 /**
556 * spawn the receiver thread.
557 * @param recv_done The receive done event associated with the simulated
558 * Ethernet link.
559 * @param link_delay The link delay for the simulated Ethernet link.
560 */
561 void spawnRecvThread(const Event *recv_done, Tick link_delay);
562 /**
563 * The function executed by a receiver thread.
564 */
565 void recvThreadFunc(Event *recv_done, Tick link_delay);
566
567 public:
568
569 /**
570 * ctor
571 * @param dist_rank Rank of this gem5 process within the dist run
572 * @param sync_start Start tick for dist synchronisation
573 * @param sync_repeat Frequency for dist synchronisation
574 * @param em The event manager associated with the simulated Ethernet link
575 */
576 DistIface(unsigned dist_rank,
577 unsigned dist_size,
578 Tick sync_start,
579 Tick sync_repeat,
580 EventManager *em,
581 bool use_pseudo_op,
582 bool is_switch,
583 int num_nodes);
584
585 virtual ~DistIface();
586 /**
587 * Send out an Ethernet packet.
588 * @param pkt The Ethernet packet to send.
589 * @param send_delay The delay in ticks for the send completion event.
590 */
591 void packetOut(EthPacketPtr pkt, Tick send_delay);
592 /**
593 * Fetch the packet scheduled to be received next by the simulated
594 * network link.
595 *
596 * @note This method is called within the process() method of the link
597 * receive done event. It also schedules the next receive event if the
598 * receive queue is not empty.
599 */
600 EthPacketPtr packetIn() { return recvScheduler.popPacket(); }
601
602 DrainState drain() override;
603 void drainResume() override;
604 void init(const Event *e, Tick link_delay);
605 void startup();
606
607 void serialize(CheckpointOut &cp) const override;
608 void unserialize(CheckpointIn &cp) override;
609 /**
610 * Initiate the exit from the simulation.
611 * @param delay Delay param from the m5 exit command. If Delay is zero
612 * then a collaborative exit is requested (i.e. all nodes have to call
613 * this method before the distributed simulation can exit). If Delay is
614 * not zero then exit is requested asap (and it will happen at the next
615 * sync tick).
616 * @return False if we are in distributed mode (i.e. exit can happen only
617 * at sync), True otherwise.
618 */
619 static bool readyToExit(Tick delay);
620 /**
621 * Initiate taking a checkpoint
622 * @param delay Delay param from the m5 checkpoint command. If Delay is
623 * zero then a collaborative checkpoint is requested (i.e. all nodes have
624 * to call this method before the checkpoint can be taken). If Delay is
625 * not zero then a checkpoint is requested asap (and it will happen at the
626 * next sync tick).
627 * @return False if we are in dist mode (i.e. exit can happen only at
628 * sync), True otherwise.
629 */
630 static bool readyToCkpt(Tick delay, Tick period);
631 /**
632 * Getter for the dist rank param.
633 */
634 static uint64_t rankParam();
635 /**
636 * Getter for the dist size param.
637 */
638 static uint64_t sizeParam();
639 /**
640 * Trigger the master to start/stop synchronization.
641 */
642 static void toggleSync(ThreadContext *tc);
643 };
644
645#endif