1/* 2 * Copyright (c) 2015-2016 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabor Dozsa 38 */ 39 40/* @file 41 * The interface class for dist gem5 simulations. 42 * 43 * dist-gem5 is an extension to gem5 to enable parallel simulation of a 44 * distributed system (e.g. simulation of a pool of machines 45 * connected by Ethernet links). A dist gem5 run consists of seperate gem5 46 * processes running in parallel. Each gem5 process executes 47 * the simulation of a component of the simulated distributed system. 48 * (An example component can be a dist-core board with an Ethernet NIC.) 49 * The DistIface class below provides services to transfer data and 50 * control messages among the gem5 processes. The main such services are 51 * as follows. 52 * 53 * 1. Send a data packet coming from a simulated Ethernet link. The packet 54 * will be transferred to (all) the target(s) gem5 processes. The send 55 * operation is always performed by the simulation thread, i.e. the gem5 56 * thread that is processing the event queue associated with the simulated 57 * Ethernet link. 58 * 59 * 2. Spawn a receiver thread to process messages coming in from the 60 * from other gem5 processes. Each simulated Ethernet link has its own 61 * associated receiver thread. The receiver thread saves the incoming packet 62 * and schedule an appropriate receive event in the event queue. 63 * 64 * 3. Schedule a global barrier event periodically to keep the gem5 65 * processes in sync. 66 * Periodic barrier event to keep peer gem5 processes in sync. The basic idea 67 * is that no gem5 process can go ahead further than the simulated link 68 * transmission delay to ensure that a corresponding receive event can always 69 * be scheduled for any message coming in from a peer gem5 process. 70 * 71 * 72 * 73 * This interface is an abstract class. It can work with various low level 74 * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP 75 * stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc]. 76 */ 77#ifndef __DEV_DIST_IFACE_HH__ 78#define __DEV_DIST_IFACE_HH__ 79 80#include <array> 81#include <mutex> 82#include <queue> 83#include <thread> 84#include <utility> 85 86#include "base/logging.hh" 87#include "dev/net/dist_packet.hh" 88#include "dev/net/etherpkt.hh" 89#include "sim/core.hh" 90#include "sim/drain.hh" 91#include "sim/global_event.hh" 92#include "sim/serialize.hh" 93 94class EventManager; 95class System; 96class ThreadContext; 97 98/** 99 * The interface class to talk to peer gem5 processes. 100 */ 101class DistIface : public Drainable, public Serializable 102{ 103 public: 104 typedef DistHeaderPkt::Header Header; 105 106 protected: 107 typedef DistHeaderPkt::MsgType MsgType; 108 typedef DistHeaderPkt::ReqType ReqType; 109 110 private: 111 class SyncEvent; 112 /** @class Sync 113 * This class implements global sync operations among gem5 peer processes. 114 * 115 * @note This class is used as a singleton object (shared by all DistIface 116 * objects). 117 */ 118 class Sync : public Serializable 119 { 120 protected: 121 /** 122 * The lock to protect access to the Sync object. 123 */ 124 std::mutex lock; 125 /** 126 * Condition variable for the simulation thread to wait on 127 * until all receiver threads completes the current global 128 * synchronisation. 129 */ 130 std::condition_variable cv; 131 /** 132 * Number of receiver threads that not yet completed the current global 133 * synchronisation. 134 */ 135 unsigned waitNum; 136 /** 137 * Flag is set if exit is permitted upon sync completion 138 */ 139 bool doExit; 140 /** 141 * Flag is set if taking a ckpt is permitted upon sync completion 142 */ 143 bool doCkpt; 144 /** 145 * Flag is set if sync is to stop upon sync completion 146 */ 147 bool doStopSync; 148 /** 149 * The repeat value for the next periodic sync 150 */ 151 Tick nextRepeat; 152 /** 153 * Tick for the next periodic sync (if the event is not scheduled yet) 154 */ 155 Tick nextAt; 156 /** 157 * Flag is set if the sync is aborted (e.g. due to connection lost) 158 */ 159 bool isAbort; 160 161 friend class SyncEvent; 162 163 public: 164 /** 165 * Initialize periodic sync params. 166 * 167 * @param start Start tick for dist synchronisation 168 * @param repeat Frequency of dist synchronisation 169 * 170 */ 171 void init(Tick start, Tick repeat); 172 /** 173 * Core method to perform a full dist sync. 174 * 175 * @return true if the sync completes, false if it gets aborted 176 */ 177 virtual bool run(bool same_tick) = 0; 178 /** 179 * Callback when the receiver thread gets a sync ack message. 180 * 181 * @return false if the receiver thread needs to stop (e.g. 182 * simulation is to exit) 183 */ 184 virtual bool progress(Tick send_tick, 185 Tick next_repeat, 186 ReqType do_ckpt, 187 ReqType do_exit, 188 ReqType do_stop_sync) = 0; 189 /** 190 * Abort processing an on-going sync event (in case of an error, e.g. 191 * lost connection to a peer gem5) 192 */ 193 void abort(); 194 195 virtual void requestCkpt(ReqType req) = 0; 196 virtual void requestExit(ReqType req) = 0; 197 virtual void requestStopSync(ReqType req) = 0; 198 199 void drainComplete(); 200 201 virtual void serialize(CheckpointOut &cp) const override = 0; 202 virtual void unserialize(CheckpointIn &cp) override = 0; 203 }; 204 205 class SyncNode: public Sync 206 { 207 private: 208 /** 209 * Exit requested 210 */ 211 ReqType needExit; 212 /** 213 * Ckpt requested 214 */ 215 ReqType needCkpt; 216 /** 217 * Sync stop requested 218 */ 219 ReqType needStopSync; 220 221 public: 222 223 SyncNode(); 224 ~SyncNode() {} 225 bool run(bool same_tick) override; 226 bool progress(Tick max_req_tick, 227 Tick next_repeat, 228 ReqType do_ckpt, 229 ReqType do_exit, 230 ReqType do_stop_sync) override; 231 232 void requestCkpt(ReqType req) override; 233 void requestExit(ReqType req) override; 234 void requestStopSync(ReqType req) override; 235 236 void serialize(CheckpointOut &cp) const override; 237 void unserialize(CheckpointIn &cp) override; 238 }; 239 240 class SyncSwitch: public Sync 241 { 242 private: 243 /** 244 * Counter for recording exit requests 245 */ 246 unsigned numExitReq; 247 /** 248 * Counter for recording ckpt requests 249 */ 250 unsigned numCkptReq; 251 /** 252 * Counter for recording stop sync requests 253 */ 254 unsigned numStopSyncReq; 255 /** 256 * Number of connected simulated nodes 257 */ 258 unsigned numNodes; 259 260 public: 261 SyncSwitch(int num_nodes); 262 ~SyncSwitch() {} 263 264 bool run(bool same_tick) override; 265 bool progress(Tick max_req_tick, 266 Tick next_repeat, 267 ReqType do_ckpt, 268 ReqType do_exit, 269 ReqType do_stop_sync) override; 270 271 void requestCkpt(ReqType) override { 272 panic("Switch requested checkpoint"); 273 } 274 void requestExit(ReqType) override { 275 panic("Switch requested exit"); 276 } 277 void requestStopSync(ReqType) override { 278 panic("Switch requested stop sync"); 279 } 280 281 void serialize(CheckpointOut &cp) const override; 282 void unserialize(CheckpointIn &cp) override; 283 }; 284 285 /** 286 * The global event to schedule periodic dist sync. It is used as a 287 * singleton object. 288 * 289 * The periodic synchronisation works as follows. 290 * 1. A SyncEvent is scheduled as a global event when startup() is 291 * called. 292 * 2. The process() method of the SyncEvent initiates a new barrier 293 * for each simulated Ethernet link. 294 * 3. Simulation thread(s) then waits until all receiver threads 295 * complete the ongoing barrier. The global sync event is done. 296 */ 297 class SyncEvent : public GlobalSyncEvent 298 { 299 private: 300 /** 301 * Flag to set when the system is draining 302 */ 303 bool _draining; 304 public: 305 /** 306 * Only the firstly instantiated DistIface object will 307 * call this constructor. 308 */ 309 SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {} 310 311 ~SyncEvent() {} 312 /** 313 * Schedule the first periodic sync event. 314 */ 315 void start(); 316 /** 317 * This is a global event so process() will only be called by 318 * exactly one simulation thread. (See further comments in the .cc 319 * file.) 320 */ 321 void process() override; 322 323 bool draining() const { return _draining; } 324 void draining(bool fl) { _draining = fl; } 325 }; 326 /** 327 * Class to encapsulate information about data packets received. 328 329 * @note The main purpose of the class to take care of scheduling receive 330 * done events for the simulated network link and store incoming packets 331 * until they can be received by the simulated network link. 332 */ 333 class RecvScheduler : public Serializable 334 { 335 private: 336 /** 337 * Received packet descriptor. This information is used by the receive 338 * thread to schedule receive events and by the simulation thread to 339 * process those events. 340 */ 341 struct Desc : public Serializable 342 { 343 EthPacketPtr packet; 344 Tick sendTick; 345 Tick sendDelay; 346 347 Desc() : sendTick(0), sendDelay(0) {} 348 Desc(EthPacketPtr p, Tick s, Tick d) : 349 packet(p), sendTick(s), sendDelay(d) {} 350 Desc(const Desc &d) : 351 packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {} 352 353 void serialize(CheckpointOut &cp) const override; 354 void unserialize(CheckpointIn &cp) override; 355 }; 356 /** 357 * The queue to store the receive descriptors. 358 */ 359 std::queue<Desc> descQueue; 360 /** 361 * The tick when the most recent receive event was processed. 362 * 363 * @note This information is necessary to simulate possible receiver 364 * link contention when calculating the receive tick for the next 365 * incoming data packet (see the calcReceiveTick() method) 366 */ 367 Tick prevRecvTick; 368 /** 369 * The receive done event for the simulated Ethernet link. 370 * 371 * @note This object is constructed by the simulated network link. We 372 * schedule this object for each incoming data packet. 373 */ 374 Event *recvDone; 375 /** 376 * The link delay in ticks for the simulated Ethernet link. 377 * 378 * @note This value is used for calculating the receive ticks for 379 * incoming data packets. 380 */ 381 Tick linkDelay; 382 /** 383 * The event manager associated with the simulated Ethernet link. 384 * 385 * @note It is used to access the event queue for scheduling receive 386 * done events for the link. 387 */ 388 EventManager *eventManager; 389 /** 390 * Calculate the tick to schedule the next receive done event. 391 * 392 * @param send_tick The tick the packet was sent. 393 * @param send_delay The simulated delay at the sender side. 394 * @param prev_recv_tick Tick when the last receive event was 395 * processed. 396 * 397 * @note This method tries to take into account possible receiver link 398 * contention and adjust receive tick for the incoming packets 399 * accordingly. 400 */ 401 Tick calcReceiveTick(Tick send_tick, 402 Tick send_delay, 403 Tick prev_recv_tick); 404 405 /** 406 * Flag to set if receive ticks for pending packets need to be 407 * recalculated due to changed link latencies at a resume 408 */ 409 bool ckptRestore; 410 411 public: 412 /** 413 * Scheduler for the incoming data packets. 414 * 415 * @param em The event manager associated with the simulated Ethernet 416 * link. 417 */ 418 RecvScheduler(EventManager *em) : 419 prevRecvTick(0), recvDone(nullptr), linkDelay(0), 420 eventManager(em), ckptRestore(false) {} 421 422 /** 423 * Initialize network link parameters. 424 * 425 * @note This method is called from the receiver thread (see 426 * recvThreadFunc()). 427 */ 428 void init(Event *recv_done, Tick link_delay); 429 /** 430 * Fetch the next packet that is to be received by the simulated network 431 * link. 432 * 433 * @note This method is called from the process() method of the receive 434 * done event associated with the network link. 435 */ 436 EthPacketPtr popPacket(); 437 /** 438 * Push a newly arrived packet into the desc queue. 439 */ 440 void pushPacket(EthPacketPtr new_packet, 441 Tick send_tick, 442 Tick send_delay); 443 444 void serialize(CheckpointOut &cp) const override; 445 void unserialize(CheckpointIn &cp) override; 446 /** 447 * Adjust receive ticks for pending packets when restoring from a 448 * checkpoint 449 * 450 * @note Link speed and delay parameters may change at resume. 451 */ 452 void resumeRecvTicks(); 453 }; 454 /** 455 * Tick to schedule the first dist sync event. 456 * This is just as optimization : we do not need any dist sync 457 * event until the simulated NIC is brought up by the OS. 458 */ 459 Tick syncStart; 460 /** 461 * Frequency of dist sync events in ticks. 462 */ 463 Tick syncRepeat; 464 /** 465 * Receiver thread pointer. 466 * Each DistIface object must have exactly one receiver thread. 467 */ 468 std::thread *recvThread; 469 /** 470 * Meta information about data packets received. 471 */ 472 RecvScheduler recvScheduler; 473 /** 474 * Use pseudoOp to start synchronization. 475 */ 476 bool syncStartOnPseudoOp; 477 478 protected: 479 /** 480 * The rank of this process among the gem5 peers. 481 */ 482 unsigned rank; 483 /** 484 * The number of gem5 processes comprising this dist simulation. 485 */ 486 unsigned size; 487 /** 488 * Number of DistIface objects (i.e. dist links in this gem5 process) 489 */ 490 static unsigned distIfaceNum; 491 /** 492 * Unique id for the dist link 493 */ 494 unsigned distIfaceId; 495 496 bool isMaster; 497 498 private: 499 /** 500 * Number of receiver threads (in this gem5 process) 501 */ 502 static unsigned recvThreadsNum; 503 /** 504 * The singleton Sync object to perform dist synchronisation. 505 */ 506 static Sync *sync; 507 /** 508 * The singleton SyncEvent object to schedule periodic dist sync. 509 */ 510 static SyncEvent *syncEvent; 511 /** 512 * The very first DistIface object created becomes the master. We need 513 * a master to co-ordinate the global synchronisation. 514 */ 515 static DistIface *master; 516 /** 517 * System pointer used to wakeup sleeping threads when stopping sync. 518 */ 519 static System *sys; 520 /** 521 * Is this node a switch? 522 */ 523 static bool isSwitch; 524 525 private: 526 /** 527 * Send out a data packet to the remote end. 528 * @param header Meta info about the packet (which needs to be transferred 529 * to the destination alongside the packet). 530 * @param packet Pointer to the packet to send. 531 */ 532 virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0; 533 /** 534 * Send out a control command to the remote end. 535 * @param header Meta info describing the command (e.g. sync request) 536 */ 537 virtual void sendCmd(const Header &header) = 0; 538 /** 539 * Receive a header (i.e. meta info describing a data packet or a control command) 540 * from the remote end. 541 * @param header The meta info structure to store the incoming header. 542 */ 543 virtual bool recvHeader(Header &header) = 0; 544 /** 545 * Receive a packet from the remote end. 546 * @param header Meta info about the incoming packet (obtanied by a previous 547 * call to the recvHedaer() method). 548 * @param Pointer to packet received. 549 */ 550 virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0; 551 /** 552 * Init hook for the underlaying transport 553 */ 554 virtual void initTransport() = 0; 555 /** 556 * spawn the receiver thread. 557 * @param recv_done The receive done event associated with the simulated 558 * Ethernet link. 559 * @param link_delay The link delay for the simulated Ethernet link. 560 */ 561 void spawnRecvThread(const Event *recv_done, Tick link_delay); 562 /** 563 * The function executed by a receiver thread. 564 */ 565 void recvThreadFunc(Event *recv_done, Tick link_delay); 566 567 public: 568 569 /** 570 * ctor 571 * @param dist_rank Rank of this gem5 process within the dist run 572 * @param sync_start Start tick for dist synchronisation 573 * @param sync_repeat Frequency for dist synchronisation 574 * @param em The event manager associated with the simulated Ethernet link 575 */ 576 DistIface(unsigned dist_rank, 577 unsigned dist_size, 578 Tick sync_start, 579 Tick sync_repeat, 580 EventManager *em, 581 bool use_pseudo_op, 582 bool is_switch, 583 int num_nodes); 584 585 virtual ~DistIface(); 586 /** 587 * Send out an Ethernet packet. 588 * @param pkt The Ethernet packet to send. 589 * @param send_delay The delay in ticks for the send completion event. 590 */ 591 void packetOut(EthPacketPtr pkt, Tick send_delay); 592 /** 593 * Fetch the packet scheduled to be received next by the simulated 594 * network link. 595 * 596 * @note This method is called within the process() method of the link 597 * receive done event. It also schedules the next receive event if the 598 * receive queue is not empty. 599 */ 600 EthPacketPtr packetIn() { return recvScheduler.popPacket(); } 601 602 DrainState drain() override; 603 void drainResume() override; 604 void init(const Event *e, Tick link_delay); 605 void startup(); 606 607 void serialize(CheckpointOut &cp) const override; 608 void unserialize(CheckpointIn &cp) override; 609 /** 610 * Initiate the exit from the simulation. 611 * @param delay Delay param from the m5 exit command. If Delay is zero 612 * then a collaborative exit is requested (i.e. all nodes have to call 613 * this method before the distributed simulation can exit). If Delay is 614 * not zero then exit is requested asap (and it will happen at the next 615 * sync tick). 616 * @return False if we are in distributed mode (i.e. exit can happen only 617 * at sync), True otherwise. 618 */ 619 static bool readyToExit(Tick delay); 620 /** 621 * Initiate taking a checkpoint 622 * @param delay Delay param from the m5 checkpoint command. If Delay is 623 * zero then a collaborative checkpoint is requested (i.e. all nodes have 624 * to call this method before the checkpoint can be taken). If Delay is 625 * not zero then a checkpoint is requested asap (and it will happen at the 626 * next sync tick). 627 * @return False if we are in dist mode (i.e. exit can happen only at 628 * sync), True otherwise. 629 */ 630 static bool readyToCkpt(Tick delay, Tick period); 631 /** 632 * Getter for the dist rank param. 633 */ 634 static uint64_t rankParam(); 635 /** 636 * Getter for the dist size param. 637 */ 638 static uint64_t sizeParam(); 639 /** 640 * Trigger the master to start/stop synchronization. 641 */ 642 static void toggleSync(ThreadContext *tc); 643 }; 644 645#endif 646