dist_iface.hh revision 11290:1640dd68b0a4
1/* 2 * Copyright (c) 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Gabor Dozsa 38 */ 39 40/* @file 41 * The interface class for dist gem5 simulations. 42 * 43 * dist-gem5 is an extension to gem5 to enable parallel simulation of a 44 * distributed system (e.g. simulation of a pool of machines 45 * connected by Ethernet links). A dist gem5 run consists of seperate gem5 46 * processes running in parallel. Each gem5 process executes 47 * the simulation of a component of the simulated distributed system. 48 * (An example component can be a dist-core board with an Ethernet NIC.) 49 * The DistIface class below provides services to transfer data and 50 * control messages among the gem5 processes. The main such services are 51 * as follows. 52 * 53 * 1. Send a data packet coming from a simulated Ethernet link. The packet 54 * will be transferred to (all) the target(s) gem5 processes. The send 55 * operation is always performed by the simulation thread, i.e. the gem5 56 * thread that is processing the event queue associated with the simulated 57 * Ethernet link. 58 * 59 * 2. Spawn a receiver thread to process messages coming in from the 60 * from other gem5 processes. Each simulated Ethernet link has its own 61 * associated receiver thread. The receiver thread saves the incoming packet 62 * and schedule an appropriate receive event in the event queue. 63 * 64 * 3. Schedule a global barrier event periodically to keep the gem5 65 * processes in sync. 66 * Periodic barrier event to keep peer gem5 processes in sync. The basic idea 67 * is that no gem5 process can go ahead further than the simulated link 68 * transmission delay to ensure that a corresponding receive event can always 69 * be scheduled for any message coming in from a peer gem5 process. 70 * 71 * 72 * 73 * This interface is an abstract class. It can work with various low level 74 * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP 75 * stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc]. 76 */ 77#ifndef __DEV_DIST_IFACE_HH__ 78#define __DEV_DIST_IFACE_HH__ 79 80#include <array> 81#include <mutex> 82#include <queue> 83#include <thread> 84#include <utility> 85 86#include "dev/net/dist_packet.hh" 87#include "dev/net/etherpkt.hh" 88#include "sim/core.hh" 89#include "sim/drain.hh" 90#include "sim/global_event.hh" 91#include "sim/serialize.hh" 92 93class EventManager; 94 95/** 96 * The interface class to talk to peer gem5 processes. 97 */ 98class DistIface : public Drainable, public Serializable 99{ 100 public: 101 typedef DistHeaderPkt::Header Header; 102 103 protected: 104 typedef DistHeaderPkt::MsgType MsgType; 105 typedef DistHeaderPkt::ReqType ReqType; 106 107 private: 108 class SyncEvent; 109 /** @class Sync 110 * This class implements global sync operations among gem5 peer processes. 111 * 112 * @note This class is used as a singleton object (shared by all DistIface 113 * objects). 114 */ 115 class Sync : public Serializable 116 { 117 protected: 118 /** 119 * The lock to protect access to the Sync object. 120 */ 121 std::mutex lock; 122 /** 123 * Condition variable for the simulation thread to wait on 124 * until all receiver threads completes the current global 125 * synchronisation. 126 */ 127 std::condition_variable cv; 128 /** 129 * Number of receiver threads that not yet completed the current global 130 * synchronisation. 131 */ 132 unsigned waitNum; 133 /** 134 * Flag is set if exit is permitted upon sync completion 135 */ 136 bool doExit; 137 /** 138 * Flag is set if taking a ckpt is permitted upon sync completion 139 */ 140 bool doCkpt; 141 /** 142 * The repeat value for the next periodic sync 143 */ 144 Tick nextRepeat; 145 /** 146 * Tick for the very first periodic sync 147 */ 148 Tick firstAt; 149 /** 150 * Tick for the next periodic sync (if the event is not scheduled yet) 151 */ 152 Tick nextAt; 153 154 friend class SyncEvent; 155 156 public: 157 /** 158 * Initialize periodic sync params. 159 * 160 * @param start Start tick for dist synchronisation 161 * @param repeat Frequency of dist synchronisation 162 * 163 */ 164 void init(Tick start, Tick repeat); 165 /** 166 * Core method to perform a full dist sync. 167 */ 168 virtual void run(bool same_tick) = 0; 169 /** 170 * Callback when the receiver thread gets a sync ack message. 171 */ 172 virtual void progress(Tick send_tick, 173 Tick next_repeat, 174 ReqType do_ckpt, 175 ReqType do_exit) = 0; 176 177 virtual void requestCkpt(ReqType req) = 0; 178 virtual void requestExit(ReqType req) = 0; 179 180 void drainComplete(); 181 182 virtual void serialize(CheckpointOut &cp) const override = 0; 183 virtual void unserialize(CheckpointIn &cp) override = 0; 184 }; 185 186 class SyncNode: public Sync 187 { 188 private: 189 /** 190 * Exit requested 191 */ 192 ReqType needExit; 193 /** 194 * Ckpt requested 195 */ 196 ReqType needCkpt; 197 198 public: 199 200 SyncNode(); 201 ~SyncNode() {} 202 void run(bool same_tick) override; 203 void progress(Tick max_req_tick, 204 Tick next_repeat, 205 ReqType do_ckpt, 206 ReqType do_exit) override; 207 208 void requestCkpt(ReqType req) override; 209 void requestExit(ReqType req) override; 210 211 void serialize(CheckpointOut &cp) const override; 212 void unserialize(CheckpointIn &cp) override; 213 }; 214 215 class SyncSwitch: public Sync 216 { 217 private: 218 /** 219 * Counter for recording exit requests 220 */ 221 unsigned numExitReq; 222 /** 223 * Counter for recording ckpt requests 224 */ 225 unsigned numCkptReq; 226 /** 227 * Number of connected simulated nodes 228 */ 229 unsigned numNodes; 230 231 public: 232 SyncSwitch(int num_nodes); 233 ~SyncSwitch() {} 234 235 void run(bool same_tick) override; 236 void progress(Tick max_req_tick, 237 Tick next_repeat, 238 ReqType do_ckpt, 239 ReqType do_exit) override; 240 241 void requestCkpt(ReqType) override { 242 panic("Switch requested checkpoint"); 243 } 244 void requestExit(ReqType) override { 245 panic("Switch requested exit"); 246 } 247 248 void serialize(CheckpointOut &cp) const override; 249 void unserialize(CheckpointIn &cp) override; 250 }; 251 252 /** 253 * The global event to schedule periodic dist sync. It is used as a 254 * singleton object. 255 * 256 * The periodic synchronisation works as follows. 257 * 1. A SyncEvent is scheduled as a global event when startup() is 258 * called. 259 * 2. The process() method of the SyncEvent initiates a new barrier 260 * for each simulated Ethernet link. 261 * 3. Simulation thread(s) then waits until all receiver threads 262 * complete the ongoing barrier. The global sync event is done. 263 */ 264 class SyncEvent : public GlobalSyncEvent 265 { 266 private: 267 /** 268 * Flag to set when the system is draining 269 */ 270 bool _draining; 271 public: 272 /** 273 * Only the firstly instantiated DistIface object will 274 * call this constructor. 275 */ 276 SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {} 277 278 ~SyncEvent() {} 279 /** 280 * Schedule the first periodic sync event. 281 */ 282 void start(); 283 /** 284 * This is a global event so process() will only be called by 285 * exactly one simulation thread. (See further comments in the .cc 286 * file.) 287 */ 288 void process() override; 289 290 bool draining() const { return _draining; } 291 void draining(bool fl) { _draining = fl; } 292 }; 293 /** 294 * Class to encapsulate information about data packets received. 295 296 * @note The main purpose of the class to take care of scheduling receive 297 * done events for the simulated network link and store incoming packets 298 * until they can be received by the simulated network link. 299 */ 300 class RecvScheduler : public Serializable 301 { 302 private: 303 /** 304 * Received packet descriptor. This information is used by the receive 305 * thread to schedule receive events and by the simulation thread to 306 * process those events. 307 */ 308 struct Desc : public Serializable 309 { 310 EthPacketPtr packet; 311 Tick sendTick; 312 Tick sendDelay; 313 314 Desc() : sendTick(0), sendDelay(0) {} 315 Desc(EthPacketPtr p, Tick s, Tick d) : 316 packet(p), sendTick(s), sendDelay(d) {} 317 Desc(const Desc &d) : 318 packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {} 319 320 void serialize(CheckpointOut &cp) const override; 321 void unserialize(CheckpointIn &cp) override; 322 }; 323 /** 324 * The queue to store the receive descriptors. 325 */ 326 std::queue<Desc> descQueue; 327 /** 328 * The tick when the most recent receive event was processed. 329 * 330 * @note This information is necessary to simulate possible receiver 331 * link contention when calculating the receive tick for the next 332 * incoming data packet (see the calcReceiveTick() method) 333 */ 334 Tick prevRecvTick; 335 /** 336 * The receive done event for the simulated Ethernet link. 337 * 338 * @note This object is constructed by the simulated network link. We 339 * schedule this object for each incoming data packet. 340 */ 341 Event *recvDone; 342 /** 343 * The link delay in ticks for the simulated Ethernet link. 344 * 345 * @note This value is used for calculating the receive ticks for 346 * incoming data packets. 347 */ 348 Tick linkDelay; 349 /** 350 * The event manager associated with the simulated Ethernet link. 351 * 352 * @note It is used to access the event queue for scheduling receive 353 * done events for the link. 354 */ 355 EventManager *eventManager; 356 /** 357 * Calculate the tick to schedule the next receive done event. 358 * 359 * @param send_tick The tick the packet was sent. 360 * @param send_delay The simulated delay at the sender side. 361 * @param prev_recv_tick Tick when the last receive event was 362 * processed. 363 * 364 * @note This method tries to take into account possible receiver link 365 * contention and adjust receive tick for the incoming packets 366 * accordingly. 367 */ 368 Tick calcReceiveTick(Tick send_tick, 369 Tick send_delay, 370 Tick prev_recv_tick); 371 372 /** 373 * Flag to set if receive ticks for pending packets need to be 374 * recalculated due to changed link latencies at a resume 375 */ 376 bool ckptRestore; 377 378 public: 379 /** 380 * Scheduler for the incoming data packets. 381 * 382 * @param em The event manager associated with the simulated Ethernet 383 * link. 384 */ 385 RecvScheduler(EventManager *em) : 386 prevRecvTick(0), recvDone(nullptr), linkDelay(0), 387 eventManager(em), ckptRestore(false) {} 388 389 /** 390 * Initialize network link parameters. 391 * 392 * @note This method is called from the receiver thread (see 393 * recvThreadFunc()). 394 */ 395 void init(Event *recv_done, Tick link_delay); 396 /** 397 * Fetch the next packet that is to be received by the simulated network 398 * link. 399 * 400 * @note This method is called from the process() method of the receive 401 * done event associated with the network link. 402 */ 403 EthPacketPtr popPacket(); 404 /** 405 * Push a newly arrived packet into the desc queue. 406 */ 407 void pushPacket(EthPacketPtr new_packet, 408 Tick send_tick, 409 Tick send_delay); 410 411 void serialize(CheckpointOut &cp) const override; 412 void unserialize(CheckpointIn &cp) override; 413 /** 414 * Adjust receive ticks for pending packets when restoring from a 415 * checkpoint 416 * 417 * @note Link speed and delay parameters may change at resume. 418 */ 419 void resumeRecvTicks(); 420 }; 421 /** 422 * Tick to schedule the first dist sync event. 423 * This is just as optimization : we do not need any dist sync 424 * event until the simulated NIC is brought up by the OS. 425 */ 426 Tick syncStart; 427 /** 428 * Frequency of dist sync events in ticks. 429 */ 430 Tick syncRepeat; 431 /** 432 * Receiver thread pointer. 433 * Each DistIface object must have exactly one receiver thread. 434 */ 435 std::thread *recvThread; 436 /** 437 * Meta information about data packets received. 438 */ 439 RecvScheduler recvScheduler; 440 441 protected: 442 /** 443 * The rank of this process among the gem5 peers. 444 */ 445 unsigned rank; 446 /** 447 * The number of gem5 processes comprising this dist simulation. 448 */ 449 unsigned size; 450 /** 451 * Number of DistIface objects (i.e. dist links in this gem5 process) 452 */ 453 static unsigned distIfaceNum; 454 /** 455 * Unique id for the dist link 456 */ 457 unsigned distIfaceId; 458 459 bool isMaster; 460 461 private: 462 /** 463 * Number of receiver threads (in this gem5 process) 464 */ 465 static unsigned recvThreadsNum; 466 /** 467 * The singleton Sync object to perform dist synchronisation. 468 */ 469 static Sync *sync; 470 /** 471 * The singleton SyncEvent object to schedule periodic dist sync. 472 */ 473 static SyncEvent *syncEvent; 474 /** 475 * The very first DistIface object created becomes the master. We need 476 * a master to co-ordinate the global synchronisation. 477 */ 478 static DistIface *master; 479 480 private: 481 /** 482 * Send out a data packet to the remote end. 483 * @param header Meta info about the packet (which needs to be transferred 484 * to the destination alongside the packet). 485 * @param packet Pointer to the packet to send. 486 */ 487 virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0; 488 /** 489 * Send out a control command to the remote end. 490 * @param header Meta info describing the command (e.g. sync request) 491 */ 492 virtual void sendCmd(const Header &header) = 0; 493 /** 494 * Receive a header (i.e. meta info describing a data packet or a control command) 495 * from the remote end. 496 * @param header The meta info structure to store the incoming header. 497 */ 498 virtual bool recvHeader(Header &header) = 0; 499 /** 500 * Receive a packet from the remote end. 501 * @param header Meta info about the incoming packet (obtanied by a previous 502 * call to the recvHedaer() method). 503 * @param Pointer to packet received. 504 */ 505 virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0; 506 /** 507 * Init hook for the underlaying transport 508 */ 509 virtual void initTransport() = 0; 510 /** 511 * spawn the receiver thread. 512 * @param recv_done The receive done event associated with the simulated 513 * Ethernet link. 514 * @param link_delay The link delay for the simulated Ethernet link. 515 */ 516 void spawnRecvThread(const Event *recv_done, Tick link_delay); 517 /** 518 * The function executed by a receiver thread. 519 */ 520 void recvThreadFunc(Event *recv_done, Tick link_delay); 521 522 public: 523 524 /** 525 * ctor 526 * @param dist_rank Rank of this gem5 process within the dist run 527 * @param sync_start Start tick for dist synchronisation 528 * @param sync_repeat Frequency for dist synchronisation 529 * @param em The event manager associated with the simulated Ethernet link 530 */ 531 DistIface(unsigned dist_rank, 532 unsigned dist_size, 533 Tick sync_start, 534 Tick sync_repeat, 535 EventManager *em, 536 bool is_switch, 537 int num_nodes); 538 539 virtual ~DistIface(); 540 /** 541 * Send out an Ethernet packet. 542 * @param pkt The Ethernet packet to send. 543 * @param send_delay The delay in ticks for the send completion event. 544 */ 545 void packetOut(EthPacketPtr pkt, Tick send_delay); 546 /** 547 * Fetch the packet scheduled to be received next by the simulated 548 * network link. 549 * 550 * @note This method is called within the process() method of the link 551 * receive done event. It also schedules the next receive event if the 552 * receive queue is not empty. 553 */ 554 EthPacketPtr packetIn() { return recvScheduler.popPacket(); } 555 556 DrainState drain() override; 557 void drainResume() override; 558 void init(const Event *e, Tick link_delay); 559 void startup(); 560 561 void serialize(CheckpointOut &cp) const override; 562 void unserialize(CheckpointIn &cp) override; 563 /** 564 * Initiate the exit from the simulation. 565 * @param delay Delay param from the m5 exit command. If Delay is zero 566 * then a collaborative exit is requested (i.e. all nodes have to call 567 * this method before the distributed simulation can exit). If Delay is 568 * not zero then exit is requested asap (and it will happen at the next 569 * sync tick). 570 * @return False if we are in distributed mode (i.e. exit can happen only 571 * at sync), True otherwise. 572 */ 573 static bool readyToExit(Tick delay); 574 /** 575 * Initiate taking a checkpoint 576 * @param delay Delay param from the m5 checkpoint command. If Delay is 577 * zero then a collaborative checkpoint is requested (i.e. all nodes have 578 * to call this method before the checkpoint can be taken). If Delay is 579 * not zero then a checkpoint is requested asap (and it will happen at the 580 * next sync tick). 581 * @return False if we are in dist mode (i.e. exit can happen only at 582 * sync), True otherwise. 583 */ 584 static bool readyToCkpt(Tick delay, Tick period); 585 /** 586 * Getter for the dist rank param. 587 */ 588 static uint64_t rankParam(); 589 /** 590 * Getter for the dist size param. 591 */ 592 static uint64_t sizeParam(); 593 }; 594 595#endif 596