dist_iface.hh revision 11703:08b78e0a3717
1/* 2 * Copyright (c) 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabor Dozsa 38 */ 39 40/* @file 41 * The interface class for dist gem5 simulations. 42 * 43 * dist-gem5 is an extension to gem5 to enable parallel simulation of a 44 * distributed system (e.g. simulation of a pool of machines 45 * connected by Ethernet links). A dist gem5 run consists of seperate gem5 46 * processes running in parallel. Each gem5 process executes 47 * the simulation of a component of the simulated distributed system. 48 * (An example component can be a dist-core board with an Ethernet NIC.) 49 * The DistIface class below provides services to transfer data and 50 * control messages among the gem5 processes. The main such services are 51 * as follows. 52 * 53 * 1. Send a data packet coming from a simulated Ethernet link. The packet 54 * will be transferred to (all) the target(s) gem5 processes. The send 55 * operation is always performed by the simulation thread, i.e. the gem5 56 * thread that is processing the event queue associated with the simulated 57 * Ethernet link. 58 * 59 * 2. Spawn a receiver thread to process messages coming in from the 60 * from other gem5 processes. Each simulated Ethernet link has its own 61 * associated receiver thread. The receiver thread saves the incoming packet 62 * and schedule an appropriate receive event in the event queue. 63 * 64 * 3. Schedule a global barrier event periodically to keep the gem5 65 * processes in sync. 66 * Periodic barrier event to keep peer gem5 processes in sync. The basic idea 67 * is that no gem5 process can go ahead further than the simulated link 68 * transmission delay to ensure that a corresponding receive event can always 69 * be scheduled for any message coming in from a peer gem5 process. 70 * 71 * 72 * 73 * This interface is an abstract class. It can work with various low level 74 * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP 75 * stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc]. 76 */ 77#ifndef __DEV_DIST_IFACE_HH__ 78#define __DEV_DIST_IFACE_HH__ 79 80#include <array> 81#include <mutex> 82#include <queue> 83#include <thread> 84#include <utility> 85 86#include "dev/net/dist_packet.hh" 87#include "dev/net/etherpkt.hh" 88#include "sim/core.hh" 89#include "sim/drain.hh" 90#include "sim/global_event.hh" 91#include "sim/serialize.hh" 92 93class EventManager; 94class System; 95class ThreadContext; 96 97/** 98 * The interface class to talk to peer gem5 processes. 99 */ 100class DistIface : public Drainable, public Serializable 101{ 102 public: 103 typedef DistHeaderPkt::Header Header; 104 105 protected: 106 typedef DistHeaderPkt::MsgType MsgType; 107 typedef DistHeaderPkt::ReqType ReqType; 108 109 private: 110 class SyncEvent; 111 /** @class Sync 112 * This class implements global sync operations among gem5 peer processes. 113 * 114 * @note This class is used as a singleton object (shared by all DistIface 115 * objects). 116 */ 117 class Sync : public Serializable 118 { 119 protected: 120 /** 121 * The lock to protect access to the Sync object. 122 */ 123 std::mutex lock; 124 /** 125 * Condition variable for the simulation thread to wait on 126 * until all receiver threads completes the current global 127 * synchronisation. 128 */ 129 std::condition_variable cv; 130 /** 131 * Number of receiver threads that not yet completed the current global 132 * synchronisation. 133 */ 134 unsigned waitNum; 135 /** 136 * Flag is set if exit is permitted upon sync completion 137 */ 138 bool doExit; 139 /** 140 * Flag is set if taking a ckpt is permitted upon sync completion 141 */ 142 bool doCkpt; 143 /** 144 * Flag is set if sync is to stop upon sync completion 145 */ 146 bool doStopSync; 147 /** 148 * The repeat value for the next periodic sync 149 */ 150 Tick nextRepeat; 151 /** 152 * Tick for the next periodic sync (if the event is not scheduled yet) 153 */ 154 Tick nextAt; 155 156 friend class SyncEvent; 157 158 public: 159 /** 160 * Initialize periodic sync params. 161 * 162 * @param start Start tick for dist synchronisation 163 * @param repeat Frequency of dist synchronisation 164 * 165 */ 166 void init(Tick start, Tick repeat); 167 /** 168 * Core method to perform a full dist sync. 169 */ 170 virtual void run(bool same_tick) = 0; 171 /** 172 * Callback when the receiver thread gets a sync ack message. 173 */ 174 virtual void progress(Tick send_tick, 175 Tick next_repeat, 176 ReqType do_ckpt, 177 ReqType do_exit, 178 ReqType do_stop_sync) = 0; 179 180 virtual void requestCkpt(ReqType req) = 0; 181 virtual void requestExit(ReqType req) = 0; 182 virtual void requestStopSync(ReqType req) = 0; 183 184 void drainComplete(); 185 186 virtual void serialize(CheckpointOut &cp) const override = 0; 187 virtual void unserialize(CheckpointIn &cp) override = 0; 188 }; 189 190 class SyncNode: public Sync 191 { 192 private: 193 /** 194 * Exit requested 195 */ 196 ReqType needExit; 197 /** 198 * Ckpt requested 199 */ 200 ReqType needCkpt; 201 /** 202 * Sync stop requested 203 */ 204 ReqType needStopSync; 205 206 public: 207 208 SyncNode(); 209 ~SyncNode() {} 210 void run(bool same_tick) override; 211 void progress(Tick max_req_tick, 212 Tick next_repeat, 213 ReqType do_ckpt, 214 ReqType do_exit, 215 ReqType do_stop_sync) override; 216 217 void requestCkpt(ReqType req) override; 218 void requestExit(ReqType req) override; 219 void requestStopSync(ReqType req) override; 220 221 void serialize(CheckpointOut &cp) const override; 222 void unserialize(CheckpointIn &cp) override; 223 }; 224 225 class SyncSwitch: public Sync 226 { 227 private: 228 /** 229 * Counter for recording exit requests 230 */ 231 unsigned numExitReq; 232 /** 233 * Counter for recording ckpt requests 234 */ 235 unsigned numCkptReq; 236 /** 237 * Counter for recording stop sync requests 238 */ 239 unsigned numStopSyncReq; 240 /** 241 * Number of connected simulated nodes 242 */ 243 unsigned numNodes; 244 245 public: 246 SyncSwitch(int num_nodes); 247 ~SyncSwitch() {} 248 249 void run(bool same_tick) override; 250 void progress(Tick max_req_tick, 251 Tick next_repeat, 252 ReqType do_ckpt, 253 ReqType do_exit, 254 ReqType do_stop_sync) override; 255 256 void requestCkpt(ReqType) override { 257 panic("Switch requested checkpoint"); 258 } 259 void requestExit(ReqType) override { 260 panic("Switch requested exit"); 261 } 262 void requestStopSync(ReqType) override { 263 panic("Switch requested stop sync"); 264 } 265 266 void serialize(CheckpointOut &cp) const override; 267 void unserialize(CheckpointIn &cp) override; 268 }; 269 270 /** 271 * The global event to schedule periodic dist sync. It is used as a 272 * singleton object. 273 * 274 * The periodic synchronisation works as follows. 275 * 1. A SyncEvent is scheduled as a global event when startup() is 276 * called. 277 * 2. The process() method of the SyncEvent initiates a new barrier 278 * for each simulated Ethernet link. 279 * 3. Simulation thread(s) then waits until all receiver threads 280 * complete the ongoing barrier. The global sync event is done. 281 */ 282 class SyncEvent : public GlobalSyncEvent 283 { 284 private: 285 /** 286 * Flag to set when the system is draining 287 */ 288 bool _draining; 289 public: 290 /** 291 * Only the firstly instantiated DistIface object will 292 * call this constructor. 293 */ 294 SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {} 295 296 ~SyncEvent() {} 297 /** 298 * Schedule the first periodic sync event. 299 */ 300 void start(); 301 /** 302 * This is a global event so process() will only be called by 303 * exactly one simulation thread. (See further comments in the .cc 304 * file.) 305 */ 306 void process() override; 307 308 bool draining() const { return _draining; } 309 void draining(bool fl) { _draining = fl; } 310 }; 311 /** 312 * Class to encapsulate information about data packets received. 313 314 * @note The main purpose of the class to take care of scheduling receive 315 * done events for the simulated network link and store incoming packets 316 * until they can be received by the simulated network link. 317 */ 318 class RecvScheduler : public Serializable 319 { 320 private: 321 /** 322 * Received packet descriptor. This information is used by the receive 323 * thread to schedule receive events and by the simulation thread to 324 * process those events. 325 */ 326 struct Desc : public Serializable 327 { 328 EthPacketPtr packet; 329 Tick sendTick; 330 Tick sendDelay; 331 332 Desc() : sendTick(0), sendDelay(0) {} 333 Desc(EthPacketPtr p, Tick s, Tick d) : 334 packet(p), sendTick(s), sendDelay(d) {} 335 Desc(const Desc &d) : 336 packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {} 337 338 void serialize(CheckpointOut &cp) const override; 339 void unserialize(CheckpointIn &cp) override; 340 }; 341 /** 342 * The queue to store the receive descriptors. 343 */ 344 std::queue<Desc> descQueue; 345 /** 346 * The tick when the most recent receive event was processed. 347 * 348 * @note This information is necessary to simulate possible receiver 349 * link contention when calculating the receive tick for the next 350 * incoming data packet (see the calcReceiveTick() method) 351 */ 352 Tick prevRecvTick; 353 /** 354 * The receive done event for the simulated Ethernet link. 355 * 356 * @note This object is constructed by the simulated network link. We 357 * schedule this object for each incoming data packet. 358 */ 359 Event *recvDone; 360 /** 361 * The link delay in ticks for the simulated Ethernet link. 362 * 363 * @note This value is used for calculating the receive ticks for 364 * incoming data packets. 365 */ 366 Tick linkDelay; 367 /** 368 * The event manager associated with the simulated Ethernet link. 369 * 370 * @note It is used to access the event queue for scheduling receive 371 * done events for the link. 372 */ 373 EventManager *eventManager; 374 /** 375 * Calculate the tick to schedule the next receive done event. 376 * 377 * @param send_tick The tick the packet was sent. 378 * @param send_delay The simulated delay at the sender side. 379 * @param prev_recv_tick Tick when the last receive event was 380 * processed. 381 * 382 * @note This method tries to take into account possible receiver link 383 * contention and adjust receive tick for the incoming packets 384 * accordingly. 385 */ 386 Tick calcReceiveTick(Tick send_tick, 387 Tick send_delay, 388 Tick prev_recv_tick); 389 390 /** 391 * Flag to set if receive ticks for pending packets need to be 392 * recalculated due to changed link latencies at a resume 393 */ 394 bool ckptRestore; 395 396 public: 397 /** 398 * Scheduler for the incoming data packets. 399 * 400 * @param em The event manager associated with the simulated Ethernet 401 * link. 402 */ 403 RecvScheduler(EventManager *em) : 404 prevRecvTick(0), recvDone(nullptr), linkDelay(0), 405 eventManager(em), ckptRestore(false) {} 406 407 /** 408 * Initialize network link parameters. 409 * 410 * @note This method is called from the receiver thread (see 411 * recvThreadFunc()). 412 */ 413 void init(Event *recv_done, Tick link_delay); 414 /** 415 * Fetch the next packet that is to be received by the simulated network 416 * link. 417 * 418 * @note This method is called from the process() method of the receive 419 * done event associated with the network link. 420 */ 421 EthPacketPtr popPacket(); 422 /** 423 * Push a newly arrived packet into the desc queue. 424 */ 425 void pushPacket(EthPacketPtr new_packet, 426 Tick send_tick, 427 Tick send_delay); 428 429 void serialize(CheckpointOut &cp) const override; 430 void unserialize(CheckpointIn &cp) override; 431 /** 432 * Adjust receive ticks for pending packets when restoring from a 433 * checkpoint 434 * 435 * @note Link speed and delay parameters may change at resume. 436 */ 437 void resumeRecvTicks(); 438 }; 439 /** 440 * Tick to schedule the first dist sync event. 441 * This is just as optimization : we do not need any dist sync 442 * event until the simulated NIC is brought up by the OS. 443 */ 444 Tick syncStart; 445 /** 446 * Frequency of dist sync events in ticks. 447 */ 448 Tick syncRepeat; 449 /** 450 * Receiver thread pointer. 451 * Each DistIface object must have exactly one receiver thread. 452 */ 453 std::thread *recvThread; 454 /** 455 * Meta information about data packets received. 456 */ 457 RecvScheduler recvScheduler; 458 /** 459 * Use pseudoOp to start synchronization. 460 */ 461 bool syncStartOnPseudoOp; 462 463 protected: 464 /** 465 * The rank of this process among the gem5 peers. 466 */ 467 unsigned rank; 468 /** 469 * The number of gem5 processes comprising this dist simulation. 470 */ 471 unsigned size; 472 /** 473 * Number of DistIface objects (i.e. dist links in this gem5 process) 474 */ 475 static unsigned distIfaceNum; 476 /** 477 * Unique id for the dist link 478 */ 479 unsigned distIfaceId; 480 481 bool isMaster; 482 483 private: 484 /** 485 * Number of receiver threads (in this gem5 process) 486 */ 487 static unsigned recvThreadsNum; 488 /** 489 * The singleton Sync object to perform dist synchronisation. 490 */ 491 static Sync *sync; 492 /** 493 * The singleton SyncEvent object to schedule periodic dist sync. 494 */ 495 static SyncEvent *syncEvent; 496 /** 497 * The very first DistIface object created becomes the master. We need 498 * a master to co-ordinate the global synchronisation. 499 */ 500 static DistIface *master; 501 /** 502 * System pointer used to wakeup sleeping threads when stopping sync. 503 */ 504 static System *sys; 505 /** 506 * Is this node a switch? 507 */ 508 static bool isSwitch; 509 510 private: 511 /** 512 * Send out a data packet to the remote end. 513 * @param header Meta info about the packet (which needs to be transferred 514 * to the destination alongside the packet). 515 * @param packet Pointer to the packet to send. 516 */ 517 virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0; 518 /** 519 * Send out a control command to the remote end. 520 * @param header Meta info describing the command (e.g. sync request) 521 */ 522 virtual void sendCmd(const Header &header) = 0; 523 /** 524 * Receive a header (i.e. meta info describing a data packet or a control command) 525 * from the remote end. 526 * @param header The meta info structure to store the incoming header. 527 */ 528 virtual bool recvHeader(Header &header) = 0; 529 /** 530 * Receive a packet from the remote end. 531 * @param header Meta info about the incoming packet (obtanied by a previous 532 * call to the recvHedaer() method). 533 * @param Pointer to packet received. 534 */ 535 virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0; 536 /** 537 * Init hook for the underlaying transport 538 */ 539 virtual void initTransport() = 0; 540 /** 541 * spawn the receiver thread. 542 * @param recv_done The receive done event associated with the simulated 543 * Ethernet link. 544 * @param link_delay The link delay for the simulated Ethernet link. 545 */ 546 void spawnRecvThread(const Event *recv_done, Tick link_delay); 547 /** 548 * The function executed by a receiver thread. 549 */ 550 void recvThreadFunc(Event *recv_done, Tick link_delay); 551 552 public: 553 554 /** 555 * ctor 556 * @param dist_rank Rank of this gem5 process within the dist run 557 * @param sync_start Start tick for dist synchronisation 558 * @param sync_repeat Frequency for dist synchronisation 559 * @param em The event manager associated with the simulated Ethernet link 560 */ 561 DistIface(unsigned dist_rank, 562 unsigned dist_size, 563 Tick sync_start, 564 Tick sync_repeat, 565 EventManager *em, 566 bool use_pseudo_op, 567 bool is_switch, 568 int num_nodes); 569 570 virtual ~DistIface(); 571 /** 572 * Send out an Ethernet packet. 573 * @param pkt The Ethernet packet to send. 574 * @param send_delay The delay in ticks for the send completion event. 575 */ 576 void packetOut(EthPacketPtr pkt, Tick send_delay); 577 /** 578 * Fetch the packet scheduled to be received next by the simulated 579 * network link. 580 * 581 * @note This method is called within the process() method of the link 582 * receive done event. It also schedules the next receive event if the 583 * receive queue is not empty. 584 */ 585 EthPacketPtr packetIn() { return recvScheduler.popPacket(); } 586 587 DrainState drain() override; 588 void drainResume() override; 589 void init(const Event *e, Tick link_delay); 590 void startup(); 591 592 void serialize(CheckpointOut &cp) const override; 593 void unserialize(CheckpointIn &cp) override; 594 /** 595 * Initiate the exit from the simulation. 596 * @param delay Delay param from the m5 exit command. If Delay is zero 597 * then a collaborative exit is requested (i.e. all nodes have to call 598 * this method before the distributed simulation can exit). If Delay is 599 * not zero then exit is requested asap (and it will happen at the next 600 * sync tick). 601 * @return False if we are in distributed mode (i.e. exit can happen only 602 * at sync), True otherwise. 603 */ 604 static bool readyToExit(Tick delay); 605 /** 606 * Initiate taking a checkpoint 607 * @param delay Delay param from the m5 checkpoint command. If Delay is 608 * zero then a collaborative checkpoint is requested (i.e. all nodes have 609 * to call this method before the checkpoint can be taken). If Delay is 610 * not zero then a checkpoint is requested asap (and it will happen at the 611 * next sync tick). 612 * @return False if we are in dist mode (i.e. exit can happen only at 613 * sync), True otherwise. 614 */ 615 static bool readyToCkpt(Tick delay, Tick period); 616 /** 617 * Getter for the dist rank param. 618 */ 619 static uint64_t rankParam(); 620 /** 621 * Getter for the dist size param. 622 */ 623 static uint64_t sizeParam(); 624 /** 625 * Trigger the master to start/stop synchronization. 626 */ 627 static void toggleSync(ThreadContext *tc); 628 }; 629 630#endif 631