dist_iface.hh revision 11757:78ef8daecd81
1/* 2 * Copyright (c) 2015-2016 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabor Dozsa 38 */ 39 40/* @file 41 * The interface class for dist gem5 simulations. 42 * 43 * dist-gem5 is an extension to gem5 to enable parallel simulation of a 44 * distributed system (e.g. simulation of a pool of machines 45 * connected by Ethernet links). A dist gem5 run consists of seperate gem5 46 * processes running in parallel. Each gem5 process executes 47 * the simulation of a component of the simulated distributed system. 48 * (An example component can be a dist-core board with an Ethernet NIC.) 49 * The DistIface class below provides services to transfer data and 50 * control messages among the gem5 processes. The main such services are 51 * as follows. 52 * 53 * 1. Send a data packet coming from a simulated Ethernet link. The packet 54 * will be transferred to (all) the target(s) gem5 processes. The send 55 * operation is always performed by the simulation thread, i.e. the gem5 56 * thread that is processing the event queue associated with the simulated 57 * Ethernet link. 58 * 59 * 2. Spawn a receiver thread to process messages coming in from the 60 * from other gem5 processes. Each simulated Ethernet link has its own 61 * associated receiver thread. The receiver thread saves the incoming packet 62 * and schedule an appropriate receive event in the event queue. 63 * 64 * 3. Schedule a global barrier event periodically to keep the gem5 65 * processes in sync. 66 * Periodic barrier event to keep peer gem5 processes in sync. The basic idea 67 * is that no gem5 process can go ahead further than the simulated link 68 * transmission delay to ensure that a corresponding receive event can always 69 * be scheduled for any message coming in from a peer gem5 process. 70 * 71 * 72 * 73 * This interface is an abstract class. It can work with various low level 74 * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP 75 * stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc]. 76 */ 77#ifndef __DEV_DIST_IFACE_HH__ 78#define __DEV_DIST_IFACE_HH__ 79 80#include <array> 81#include <mutex> 82#include <queue> 83#include <thread> 84#include <utility> 85 86#include "dev/net/dist_packet.hh" 87#include "dev/net/etherpkt.hh" 88#include "sim/core.hh" 89#include "sim/drain.hh" 90#include "sim/global_event.hh" 91#include "sim/serialize.hh" 92 93class EventManager; 94class System; 95class ThreadContext; 96 97/** 98 * The interface class to talk to peer gem5 processes. 99 */ 100class DistIface : public Drainable, public Serializable 101{ 102 public: 103 typedef DistHeaderPkt::Header Header; 104 105 protected: 106 typedef DistHeaderPkt::MsgType MsgType; 107 typedef DistHeaderPkt::ReqType ReqType; 108 109 private: 110 class SyncEvent; 111 /** @class Sync 112 * This class implements global sync operations among gem5 peer processes. 113 * 114 * @note This class is used as a singleton object (shared by all DistIface 115 * objects). 116 */ 117 class Sync : public Serializable 118 { 119 protected: 120 /** 121 * The lock to protect access to the Sync object. 122 */ 123 std::mutex lock; 124 /** 125 * Condition variable for the simulation thread to wait on 126 * until all receiver threads completes the current global 127 * synchronisation. 128 */ 129 std::condition_variable cv; 130 /** 131 * Number of receiver threads that not yet completed the current global 132 * synchronisation. 133 */ 134 unsigned waitNum; 135 /** 136 * Flag is set if exit is permitted upon sync completion 137 */ 138 bool doExit; 139 /** 140 * Flag is set if taking a ckpt is permitted upon sync completion 141 */ 142 bool doCkpt; 143 /** 144 * Flag is set if sync is to stop upon sync completion 145 */ 146 bool doStopSync; 147 /** 148 * The repeat value for the next periodic sync 149 */ 150 Tick nextRepeat; 151 /** 152 * Tick for the next periodic sync (if the event is not scheduled yet) 153 */ 154 Tick nextAt; 155 /** 156 * Flag is set if the sync is aborted (e.g. due to connection lost) 157 */ 158 bool isAbort; 159 160 friend class SyncEvent; 161 162 public: 163 /** 164 * Initialize periodic sync params. 165 * 166 * @param start Start tick for dist synchronisation 167 * @param repeat Frequency of dist synchronisation 168 * 169 */ 170 void init(Tick start, Tick repeat); 171 /** 172 * Core method to perform a full dist sync. 173 * 174 * @return true if the sync completes, false if it gets aborted 175 */ 176 virtual bool run(bool same_tick) = 0; 177 /** 178 * Callback when the receiver thread gets a sync ack message. 179 * 180 * @return false if the receiver thread needs to stop (e.g. 181 * simulation is to exit) 182 */ 183 virtual bool progress(Tick send_tick, 184 Tick next_repeat, 185 ReqType do_ckpt, 186 ReqType do_exit, 187 ReqType do_stop_sync) = 0; 188 /** 189 * Abort processing an on-going sync event (in case of an error, e.g. 190 * lost connection to a peer gem5) 191 */ 192 void abort(); 193 194 virtual void requestCkpt(ReqType req) = 0; 195 virtual void requestExit(ReqType req) = 0; 196 virtual void requestStopSync(ReqType req) = 0; 197 198 void drainComplete(); 199 200 virtual void serialize(CheckpointOut &cp) const override = 0; 201 virtual void unserialize(CheckpointIn &cp) override = 0; 202 }; 203 204 class SyncNode: public Sync 205 { 206 private: 207 /** 208 * Exit requested 209 */ 210 ReqType needExit; 211 /** 212 * Ckpt requested 213 */ 214 ReqType needCkpt; 215 /** 216 * Sync stop requested 217 */ 218 ReqType needStopSync; 219 220 public: 221 222 SyncNode(); 223 ~SyncNode() {} 224 bool run(bool same_tick) override; 225 bool progress(Tick max_req_tick, 226 Tick next_repeat, 227 ReqType do_ckpt, 228 ReqType do_exit, 229 ReqType do_stop_sync) override; 230 231 void requestCkpt(ReqType req) override; 232 void requestExit(ReqType req) override; 233 void requestStopSync(ReqType req) override; 234 235 void serialize(CheckpointOut &cp) const override; 236 void unserialize(CheckpointIn &cp) override; 237 }; 238 239 class SyncSwitch: public Sync 240 { 241 private: 242 /** 243 * Counter for recording exit requests 244 */ 245 unsigned numExitReq; 246 /** 247 * Counter for recording ckpt requests 248 */ 249 unsigned numCkptReq; 250 /** 251 * Counter for recording stop sync requests 252 */ 253 unsigned numStopSyncReq; 254 /** 255 * Number of connected simulated nodes 256 */ 257 unsigned numNodes; 258 259 public: 260 SyncSwitch(int num_nodes); 261 ~SyncSwitch() {} 262 263 bool run(bool same_tick) override; 264 bool progress(Tick max_req_tick, 265 Tick next_repeat, 266 ReqType do_ckpt, 267 ReqType do_exit, 268 ReqType do_stop_sync) override; 269 270 void requestCkpt(ReqType) override { 271 panic("Switch requested checkpoint"); 272 } 273 void requestExit(ReqType) override { 274 panic("Switch requested exit"); 275 } 276 void requestStopSync(ReqType) override { 277 panic("Switch requested stop sync"); 278 } 279 280 void serialize(CheckpointOut &cp) const override; 281 void unserialize(CheckpointIn &cp) override; 282 }; 283 284 /** 285 * The global event to schedule periodic dist sync. It is used as a 286 * singleton object. 287 * 288 * The periodic synchronisation works as follows. 289 * 1. A SyncEvent is scheduled as a global event when startup() is 290 * called. 291 * 2. The process() method of the SyncEvent initiates a new barrier 292 * for each simulated Ethernet link. 293 * 3. Simulation thread(s) then waits until all receiver threads 294 * complete the ongoing barrier. The global sync event is done. 295 */ 296 class SyncEvent : public GlobalSyncEvent 297 { 298 private: 299 /** 300 * Flag to set when the system is draining 301 */ 302 bool _draining; 303 public: 304 /** 305 * Only the firstly instantiated DistIface object will 306 * call this constructor. 307 */ 308 SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {} 309 310 ~SyncEvent() {} 311 /** 312 * Schedule the first periodic sync event. 313 */ 314 void start(); 315 /** 316 * This is a global event so process() will only be called by 317 * exactly one simulation thread. (See further comments in the .cc 318 * file.) 319 */ 320 void process() override; 321 322 bool draining() const { return _draining; } 323 void draining(bool fl) { _draining = fl; } 324 }; 325 /** 326 * Class to encapsulate information about data packets received. 327 328 * @note The main purpose of the class to take care of scheduling receive 329 * done events for the simulated network link and store incoming packets 330 * until they can be received by the simulated network link. 331 */ 332 class RecvScheduler : public Serializable 333 { 334 private: 335 /** 336 * Received packet descriptor. This information is used by the receive 337 * thread to schedule receive events and by the simulation thread to 338 * process those events. 339 */ 340 struct Desc : public Serializable 341 { 342 EthPacketPtr packet; 343 Tick sendTick; 344 Tick sendDelay; 345 346 Desc() : sendTick(0), sendDelay(0) {} 347 Desc(EthPacketPtr p, Tick s, Tick d) : 348 packet(p), sendTick(s), sendDelay(d) {} 349 Desc(const Desc &d) : 350 packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {} 351 352 void serialize(CheckpointOut &cp) const override; 353 void unserialize(CheckpointIn &cp) override; 354 }; 355 /** 356 * The queue to store the receive descriptors. 357 */ 358 std::queue<Desc> descQueue; 359 /** 360 * The tick when the most recent receive event was processed. 361 * 362 * @note This information is necessary to simulate possible receiver 363 * link contention when calculating the receive tick for the next 364 * incoming data packet (see the calcReceiveTick() method) 365 */ 366 Tick prevRecvTick; 367 /** 368 * The receive done event for the simulated Ethernet link. 369 * 370 * @note This object is constructed by the simulated network link. We 371 * schedule this object for each incoming data packet. 372 */ 373 Event *recvDone; 374 /** 375 * The link delay in ticks for the simulated Ethernet link. 376 * 377 * @note This value is used for calculating the receive ticks for 378 * incoming data packets. 379 */ 380 Tick linkDelay; 381 /** 382 * The event manager associated with the simulated Ethernet link. 383 * 384 * @note It is used to access the event queue for scheduling receive 385 * done events for the link. 386 */ 387 EventManager *eventManager; 388 /** 389 * Calculate the tick to schedule the next receive done event. 390 * 391 * @param send_tick The tick the packet was sent. 392 * @param send_delay The simulated delay at the sender side. 393 * @param prev_recv_tick Tick when the last receive event was 394 * processed. 395 * 396 * @note This method tries to take into account possible receiver link 397 * contention and adjust receive tick for the incoming packets 398 * accordingly. 399 */ 400 Tick calcReceiveTick(Tick send_tick, 401 Tick send_delay, 402 Tick prev_recv_tick); 403 404 /** 405 * Flag to set if receive ticks for pending packets need to be 406 * recalculated due to changed link latencies at a resume 407 */ 408 bool ckptRestore; 409 410 public: 411 /** 412 * Scheduler for the incoming data packets. 413 * 414 * @param em The event manager associated with the simulated Ethernet 415 * link. 416 */ 417 RecvScheduler(EventManager *em) : 418 prevRecvTick(0), recvDone(nullptr), linkDelay(0), 419 eventManager(em), ckptRestore(false) {} 420 421 /** 422 * Initialize network link parameters. 423 * 424 * @note This method is called from the receiver thread (see 425 * recvThreadFunc()). 426 */ 427 void init(Event *recv_done, Tick link_delay); 428 /** 429 * Fetch the next packet that is to be received by the simulated network 430 * link. 431 * 432 * @note This method is called from the process() method of the receive 433 * done event associated with the network link. 434 */ 435 EthPacketPtr popPacket(); 436 /** 437 * Push a newly arrived packet into the desc queue. 438 */ 439 void pushPacket(EthPacketPtr new_packet, 440 Tick send_tick, 441 Tick send_delay); 442 443 void serialize(CheckpointOut &cp) const override; 444 void unserialize(CheckpointIn &cp) override; 445 /** 446 * Adjust receive ticks for pending packets when restoring from a 447 * checkpoint 448 * 449 * @note Link speed and delay parameters may change at resume. 450 */ 451 void resumeRecvTicks(); 452 }; 453 /** 454 * Tick to schedule the first dist sync event. 455 * This is just as optimization : we do not need any dist sync 456 * event until the simulated NIC is brought up by the OS. 457 */ 458 Tick syncStart; 459 /** 460 * Frequency of dist sync events in ticks. 461 */ 462 Tick syncRepeat; 463 /** 464 * Receiver thread pointer. 465 * Each DistIface object must have exactly one receiver thread. 466 */ 467 std::thread *recvThread; 468 /** 469 * Meta information about data packets received. 470 */ 471 RecvScheduler recvScheduler; 472 /** 473 * Use pseudoOp to start synchronization. 474 */ 475 bool syncStartOnPseudoOp; 476 477 protected: 478 /** 479 * The rank of this process among the gem5 peers. 480 */ 481 unsigned rank; 482 /** 483 * The number of gem5 processes comprising this dist simulation. 484 */ 485 unsigned size; 486 /** 487 * Number of DistIface objects (i.e. dist links in this gem5 process) 488 */ 489 static unsigned distIfaceNum; 490 /** 491 * Unique id for the dist link 492 */ 493 unsigned distIfaceId; 494 495 bool isMaster; 496 497 private: 498 /** 499 * Number of receiver threads (in this gem5 process) 500 */ 501 static unsigned recvThreadsNum; 502 /** 503 * The singleton Sync object to perform dist synchronisation. 504 */ 505 static Sync *sync; 506 /** 507 * The singleton SyncEvent object to schedule periodic dist sync. 508 */ 509 static SyncEvent *syncEvent; 510 /** 511 * The very first DistIface object created becomes the master. We need 512 * a master to co-ordinate the global synchronisation. 513 */ 514 static DistIface *master; 515 /** 516 * System pointer used to wakeup sleeping threads when stopping sync. 517 */ 518 static System *sys; 519 /** 520 * Is this node a switch? 521 */ 522 static bool isSwitch; 523 524 private: 525 /** 526 * Send out a data packet to the remote end. 527 * @param header Meta info about the packet (which needs to be transferred 528 * to the destination alongside the packet). 529 * @param packet Pointer to the packet to send. 530 */ 531 virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0; 532 /** 533 * Send out a control command to the remote end. 534 * @param header Meta info describing the command (e.g. sync request) 535 */ 536 virtual void sendCmd(const Header &header) = 0; 537 /** 538 * Receive a header (i.e. meta info describing a data packet or a control command) 539 * from the remote end. 540 * @param header The meta info structure to store the incoming header. 541 */ 542 virtual bool recvHeader(Header &header) = 0; 543 /** 544 * Receive a packet from the remote end. 545 * @param header Meta info about the incoming packet (obtanied by a previous 546 * call to the recvHedaer() method). 547 * @param Pointer to packet received. 548 */ 549 virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0; 550 /** 551 * Init hook for the underlaying transport 552 */ 553 virtual void initTransport() = 0; 554 /** 555 * spawn the receiver thread. 556 * @param recv_done The receive done event associated with the simulated 557 * Ethernet link. 558 * @param link_delay The link delay for the simulated Ethernet link. 559 */ 560 void spawnRecvThread(const Event *recv_done, Tick link_delay); 561 /** 562 * The function executed by a receiver thread. 563 */ 564 void recvThreadFunc(Event *recv_done, Tick link_delay); 565 566 public: 567 568 /** 569 * ctor 570 * @param dist_rank Rank of this gem5 process within the dist run 571 * @param sync_start Start tick for dist synchronisation 572 * @param sync_repeat Frequency for dist synchronisation 573 * @param em The event manager associated with the simulated Ethernet link 574 */ 575 DistIface(unsigned dist_rank, 576 unsigned dist_size, 577 Tick sync_start, 578 Tick sync_repeat, 579 EventManager *em, 580 bool use_pseudo_op, 581 bool is_switch, 582 int num_nodes); 583 584 virtual ~DistIface(); 585 /** 586 * Send out an Ethernet packet. 587 * @param pkt The Ethernet packet to send. 588 * @param send_delay The delay in ticks for the send completion event. 589 */ 590 void packetOut(EthPacketPtr pkt, Tick send_delay); 591 /** 592 * Fetch the packet scheduled to be received next by the simulated 593 * network link. 594 * 595 * @note This method is called within the process() method of the link 596 * receive done event. It also schedules the next receive event if the 597 * receive queue is not empty. 598 */ 599 EthPacketPtr packetIn() { return recvScheduler.popPacket(); } 600 601 DrainState drain() override; 602 void drainResume() override; 603 void init(const Event *e, Tick link_delay); 604 void startup(); 605 606 void serialize(CheckpointOut &cp) const override; 607 void unserialize(CheckpointIn &cp) override; 608 /** 609 * Initiate the exit from the simulation. 610 * @param delay Delay param from the m5 exit command. If Delay is zero 611 * then a collaborative exit is requested (i.e. all nodes have to call 612 * this method before the distributed simulation can exit). If Delay is 613 * not zero then exit is requested asap (and it will happen at the next 614 * sync tick). 615 * @return False if we are in distributed mode (i.e. exit can happen only 616 * at sync), True otherwise. 617 */ 618 static bool readyToExit(Tick delay); 619 /** 620 * Initiate taking a checkpoint 621 * @param delay Delay param from the m5 checkpoint command. If Delay is 622 * zero then a collaborative checkpoint is requested (i.e. all nodes have 623 * to call this method before the checkpoint can be taken). If Delay is 624 * not zero then a checkpoint is requested asap (and it will happen at the 625 * next sync tick). 626 * @return False if we are in dist mode (i.e. exit can happen only at 627 * sync), True otherwise. 628 */ 629 static bool readyToCkpt(Tick delay, Tick period); 630 /** 631 * Getter for the dist rank param. 632 */ 633 static uint64_t rankParam(); 634 /** 635 * Getter for the dist size param. 636 */ 637 static uint64_t sizeParam(); 638 /** 639 * Trigger the master to start/stop synchronization. 640 */ 641 static void toggleSync(ThreadContext *tc); 642 }; 643 644#endif 645