dist_iface.hh revision 11290:1640dd68b0a4
18225SN/A/* 211398SN/A * Copyright (c) 2015 ARM Limited 311398SN/A * All rights reserved 411398SN/A * 511398SN/A * The license below extends only to copyright in the software and shall 611398SN/A * not be construed as granting a license to any other intellectual 711398SN/A * property including but not limited to intellectual property relating 811398SN/A * to a hardware implementation of the functionality of the software 911398SN/A * licensed hereunder. You may use the software subject to the license 1011398SN/A * terms below provided that you ensure that this notice is replicated 1111398SN/A * unmodified and in its entirety in all distributions of the software, 1211398SN/A * modified or unmodified, in source code or in binary form. 1311398SN/A * 1411398SN/A * Redistribution and use in source and binary forms, with or without 1511397SN/A * modification, are permitted provided that the following conditions are 1611397SN/A * met: redistributions of source code must retain the above copyright 1711397SN/A * notice, this list of conditions and the following disclaimer; 1811397SN/A * redistributions in binary form must reproduce the above copyright 1911397SN/A * notice, this list of conditions and the following disclaimer in the 2011397SN/A * documentation and/or other materials provided with the distribution; 2111397SN/A * neither the name of the copyright holders nor the names of its 2211397SN/A * contributors may be used to endorse or promote products derived from 2311397SN/A * this software without specific prior written permission. 2411397SN/A * 2511397SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2611397SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2711397SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2811397SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2911397SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 3011397SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 3111397SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 3211397SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 3311397SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 3411397SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 3511397SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3611397SN/A * 3711397SN/A * Authors: Gabor Dozsa 3811397SN/A */ 3911397SN/A 4011397SN/A/* @file 4111397SN/A * The interface class for dist gem5 simulations. 4211398SN/A * 4311397SN/A * dist-gem5 is an extension to gem5 to enable parallel simulation of a 448225SN/A * distributed system (e.g. simulation of a pool of machines 458225SN/A * connected by Ethernet links). A dist gem5 run consists of seperate gem5 468225SN/A * processes running in parallel. Each gem5 process executes 478225SN/A * the simulation of a component of the simulated distributed system. 488225SN/A * (An example component can be a dist-core board with an Ethernet NIC.) 498225SN/A * The DistIface class below provides services to transfer data and 508225SN/A * control messages among the gem5 processes. The main such services are 518225SN/A * as follows. 528225SN/A * 538225SN/A * 1. Send a data packet coming from a simulated Ethernet link. The packet 548225SN/A * will be transferred to (all) the target(s) gem5 processes. The send 558225SN/A * operation is always performed by the simulation thread, i.e. the gem5 568225SN/A * thread that is processing the event queue associated with the simulated 578225SN/A * Ethernet link. 588225SN/A * 598225SN/A * 2. Spawn a receiver thread to process messages coming in from the 608225SN/A * from other gem5 processes. Each simulated Ethernet link has its own 618225SN/A * associated receiver thread. The receiver thread saves the incoming packet 628225SN/A * and schedule an appropriate receive event in the event queue. 638225SN/A * 648225SN/A * 3. Schedule a global barrier event periodically to keep the gem5 658225SN/A * processes in sync. 668225SN/A * Periodic barrier event to keep peer gem5 processes in sync. The basic idea 678225SN/A * is that no gem5 process can go ahead further than the simulated link 688225SN/A * transmission delay to ensure that a corresponding receive event can always 698225SN/A * be scheduled for any message coming in from a peer gem5 process. 708225SN/A * 718225SN/A * 728225SN/A * 738225SN/A * This interface is an abstract class. It can work with various low level 748225SN/A * send/receive service implementations (e.g. TCP/IP, MPI,...). A TCP 758225SN/A * stream socket version is implemented in src/dev/net/tcp_iface.[hh,cc]. 768225SN/A */ 778225SN/A#ifndef __DEV_DIST_IFACE_HH__ 788225SN/A#define __DEV_DIST_IFACE_HH__ 798225SN/A 808225SN/A#include <array> 818225SN/A#include <mutex> 828225SN/A#include <queue> 838225SN/A#include <thread> 848225SN/A#include <utility> 858225SN/A 868225SN/A#include "dev/net/dist_packet.hh" 878225SN/A#include "dev/net/etherpkt.hh" 888225SN/A#include "sim/core.hh" 898225SN/A#include "sim/drain.hh" 908225SN/A#include "sim/global_event.hh" 918225SN/A#include "sim/serialize.hh" 928225SN/A 9310674SN/Aclass EventManager; 9410674SN/A 9510674SN/A/** 9610674SN/A * The interface class to talk to peer gem5 processes. 9710674SN/A */ 9810674SN/Aclass DistIface : public Drainable, public Serializable 9910674SN/A{ 10010674SN/A public: 10110674SN/A typedef DistHeaderPkt::Header Header; 10210674SN/A 10310674SN/A protected: 10410674SN/A typedef DistHeaderPkt::MsgType MsgType; 10510674SN/A typedef DistHeaderPkt::ReqType ReqType; 10610674SN/A 10710674SN/A private: 10810674SN/A class SyncEvent; 10910674SN/A /** @class Sync 11010674SN/A * This class implements global sync operations among gem5 peer processes. 11110674SN/A * 11210674SN/A * @note This class is used as a singleton object (shared by all DistIface 11310674SN/A * objects). 11410674SN/A */ 11510674SN/A class Sync : public Serializable 11610674SN/A { 11710674SN/A protected: 11810674SN/A /** 11910674SN/A * The lock to protect access to the Sync object. 12010674SN/A */ 12110674SN/A std::mutex lock; 12210674SN/A /** 12310674SN/A * Condition variable for the simulation thread to wait on 12410674SN/A * until all receiver threads completes the current global 12510674SN/A * synchronisation. 12610674SN/A */ 12710674SN/A std::condition_variable cv; 12810674SN/A /** 12910674SN/A * Number of receiver threads that not yet completed the current global 13010674SN/A * synchronisation. 13110674SN/A */ 13210674SN/A unsigned waitNum; 13310674SN/A /** 13410674SN/A * Flag is set if exit is permitted upon sync completion 13510674SN/A */ 13610674SN/A bool doExit; 13710674SN/A /** 13810674SN/A * Flag is set if taking a ckpt is permitted upon sync completion 13910674SN/A */ 14010674SN/A bool doCkpt; 14110674SN/A /** 14210674SN/A * The repeat value for the next periodic sync 14310674SN/A */ 14410674SN/A Tick nextRepeat; 14510674SN/A /** 14610674SN/A * Tick for the very first periodic sync 14710674SN/A */ 14810674SN/A Tick firstAt; 14910674SN/A /** 15010674SN/A * Tick for the next periodic sync (if the event is not scheduled yet) 1518225SN/A */ 1528225SN/A Tick nextAt; 1538225SN/A 1548225SN/A friend class SyncEvent; 1558225SN/A 1568225SN/A public: 1578225SN/A /** 1588225SN/A * Initialize periodic sync params. 15910674SN/A * 16010674SN/A * @param start Start tick for dist synchronisation 16110674SN/A * @param repeat Frequency of dist synchronisation 16210674SN/A * 16310674SN/A */ 16410674SN/A void init(Tick start, Tick repeat); 16510674SN/A /** 16610674SN/A * Core method to perform a full dist sync. 16710674SN/A */ 16810674SN/A virtual void run(bool same_tick) = 0; 1698225SN/A /** 1708225SN/A * Callback when the receiver thread gets a sync ack message. 17110674SN/A */ 17211808Sandreas.sandberg@arm.com virtual void progress(Tick send_tick, 17310674SN/A Tick next_repeat, 17410674SN/A ReqType do_ckpt, 17510674SN/A ReqType do_exit) = 0; 17610674SN/A 17710674SN/A virtual void requestCkpt(ReqType req) = 0; 17810674SN/A virtual void requestExit(ReqType req) = 0; 17910674SN/A 1808225SN/A void drainComplete(); 1818225SN/A 18210674SN/A virtual void serialize(CheckpointOut &cp) const override = 0; 18310674SN/A virtual void unserialize(CheckpointIn &cp) override = 0; 18410674SN/A }; 18510674SN/A 1868225SN/A class SyncNode: public Sync 1878225SN/A { 1888225SN/A private: 1898225SN/A /** 1908225SN/A * Exit requested 19110674SN/A */ 19210674SN/A ReqType needExit; 19310674SN/A /** 19410674SN/A * Ckpt requested 19510674SN/A */ 1968225SN/A ReqType needCkpt; 19710674SN/A 19810674SN/A public: 19910674SN/A 20010674SN/A SyncNode(); 20110674SN/A ~SyncNode() {} 20210674SN/A void run(bool same_tick) override; 2038225SN/A void progress(Tick max_req_tick, 20410674SN/A Tick next_repeat, 20510674SN/A ReqType do_ckpt, 20610674SN/A ReqType do_exit) override; 20711402SN/A 20810674SN/A void requestCkpt(ReqType req) override; 20911402SN/A void requestExit(ReqType req) override; 21011402SN/A 21111402SN/A void serialize(CheckpointOut &cp) const override; 21211402SN/A void unserialize(CheckpointIn &cp) override; 21310674SN/A }; 21410674SN/A 21511402SN/A class SyncSwitch: public Sync 2168225SN/A { 2178225SN/A private: 21810275SN/A /** 2198225SN/A * Counter for recording exit requests 22010674SN/A */ 22110674SN/A unsigned numExitReq; 22210674SN/A /** 22310674SN/A * Counter for recording ckpt requests 22410674SN/A */ 22510674SN/A unsigned numCkptReq; 22610674SN/A /** 22710674SN/A * Number of connected simulated nodes 22810674SN/A */ 22910674SN/A unsigned numNodes; 23010674SN/A 23110674SN/A public: 23210674SN/A SyncSwitch(int num_nodes); 23310674SN/A ~SyncSwitch() {} 23410674SN/A 23510674SN/A void run(bool same_tick) override; 23610674SN/A void progress(Tick max_req_tick, 23710674SN/A Tick next_repeat, 23810674SN/A ReqType do_ckpt, 23910674SN/A ReqType do_exit) override; 24010674SN/A 24110674SN/A void requestCkpt(ReqType) override { 24210674SN/A panic("Switch requested checkpoint"); 24310674SN/A } 24410674SN/A void requestExit(ReqType) override { 24510674SN/A panic("Switch requested exit"); 2468225SN/A } 24710674SN/A 24810674SN/A void serialize(CheckpointOut &cp) const override; 24910674SN/A void unserialize(CheckpointIn &cp) override; 25010674SN/A }; 25110674SN/A 25210674SN/A /** 2538225SN/A * The global event to schedule periodic dist sync. It is used as a 25410674SN/A * singleton object. 25510674SN/A * 25610674SN/A * The periodic synchronisation works as follows. 25710674SN/A * 1. A SyncEvent is scheduled as a global event when startup() is 25810674SN/A * called. 25910674SN/A * 2. The process() method of the SyncEvent initiates a new barrier 26010674SN/A * for each simulated Ethernet link. 2618225SN/A * 3. Simulation thread(s) then waits until all receiver threads 26210674SN/A * complete the ongoing barrier. The global sync event is done. 26310674SN/A */ 26411402SN/A class SyncEvent : public GlobalSyncEvent 26511402SN/A { 26610674SN/A private: 26710674SN/A /** 2688225SN/A * Flag to set when the system is draining 26910674SN/A */ 2708225SN/A bool _draining; 2718225SN/A public: 27210674SN/A /** 27310674SN/A * Only the firstly instantiated DistIface object will 27411402SN/A * call this constructor. 27511402SN/A */ 2768225SN/A SyncEvent() : GlobalSyncEvent(Sim_Exit_Pri, 0), _draining(false) {} 2778225SN/A 2788225SN/A ~SyncEvent() {} 2798225SN/A /** 2808225SN/A * Schedule the first periodic sync event. 2818225SN/A */ 2828225SN/A void start(); 2838225SN/A /** 2848225SN/A * This is a global event so process() will only be called by 2858225SN/A * exactly one simulation thread. (See further comments in the .cc 2868225SN/A * file.) 2878225SN/A */ 2888225SN/A void process() override; 2898225SN/A 2908225SN/A bool draining() const { return _draining; } 2918225SN/A void draining(bool fl) { _draining = fl; } 2928225SN/A }; 2938225SN/A /** 2948225SN/A * Class to encapsulate information about data packets received. 2958225SN/A 2968225SN/A * @note The main purpose of the class to take care of scheduling receive 2978225SN/A * done events for the simulated network link and store incoming packets 2988225SN/A * until they can be received by the simulated network link. 2998225SN/A */ 3008225SN/A class RecvScheduler : public Serializable 3018225SN/A { 3028225SN/A private: 3038225SN/A /** 3048225SN/A * Received packet descriptor. This information is used by the receive 3058225SN/A * thread to schedule receive events and by the simulation thread to 3068225SN/A * process those events. 3078225SN/A */ 3088225SN/A struct Desc : public Serializable 3098225SN/A { 3108225SN/A EthPacketPtr packet; 3118225SN/A Tick sendTick; 3128225SN/A Tick sendDelay; 3138225SN/A 3148225SN/A Desc() : sendTick(0), sendDelay(0) {} 3158225SN/A Desc(EthPacketPtr p, Tick s, Tick d) : 3168225SN/A packet(p), sendTick(s), sendDelay(d) {} 3178225SN/A Desc(const Desc &d) : 318 packet(d.packet), sendTick(d.sendTick), sendDelay(d.sendDelay) {} 319 320 void serialize(CheckpointOut &cp) const override; 321 void unserialize(CheckpointIn &cp) override; 322 }; 323 /** 324 * The queue to store the receive descriptors. 325 */ 326 std::queue<Desc> descQueue; 327 /** 328 * The tick when the most recent receive event was processed. 329 * 330 * @note This information is necessary to simulate possible receiver 331 * link contention when calculating the receive tick for the next 332 * incoming data packet (see the calcReceiveTick() method) 333 */ 334 Tick prevRecvTick; 335 /** 336 * The receive done event for the simulated Ethernet link. 337 * 338 * @note This object is constructed by the simulated network link. We 339 * schedule this object for each incoming data packet. 340 */ 341 Event *recvDone; 342 /** 343 * The link delay in ticks for the simulated Ethernet link. 344 * 345 * @note This value is used for calculating the receive ticks for 346 * incoming data packets. 347 */ 348 Tick linkDelay; 349 /** 350 * The event manager associated with the simulated Ethernet link. 351 * 352 * @note It is used to access the event queue for scheduling receive 353 * done events for the link. 354 */ 355 EventManager *eventManager; 356 /** 357 * Calculate the tick to schedule the next receive done event. 358 * 359 * @param send_tick The tick the packet was sent. 360 * @param send_delay The simulated delay at the sender side. 361 * @param prev_recv_tick Tick when the last receive event was 362 * processed. 363 * 364 * @note This method tries to take into account possible receiver link 365 * contention and adjust receive tick for the incoming packets 366 * accordingly. 367 */ 368 Tick calcReceiveTick(Tick send_tick, 369 Tick send_delay, 370 Tick prev_recv_tick); 371 372 /** 373 * Flag to set if receive ticks for pending packets need to be 374 * recalculated due to changed link latencies at a resume 375 */ 376 bool ckptRestore; 377 378 public: 379 /** 380 * Scheduler for the incoming data packets. 381 * 382 * @param em The event manager associated with the simulated Ethernet 383 * link. 384 */ 385 RecvScheduler(EventManager *em) : 386 prevRecvTick(0), recvDone(nullptr), linkDelay(0), 387 eventManager(em), ckptRestore(false) {} 388 389 /** 390 * Initialize network link parameters. 391 * 392 * @note This method is called from the receiver thread (see 393 * recvThreadFunc()). 394 */ 395 void init(Event *recv_done, Tick link_delay); 396 /** 397 * Fetch the next packet that is to be received by the simulated network 398 * link. 399 * 400 * @note This method is called from the process() method of the receive 401 * done event associated with the network link. 402 */ 403 EthPacketPtr popPacket(); 404 /** 405 * Push a newly arrived packet into the desc queue. 406 */ 407 void pushPacket(EthPacketPtr new_packet, 408 Tick send_tick, 409 Tick send_delay); 410 411 void serialize(CheckpointOut &cp) const override; 412 void unserialize(CheckpointIn &cp) override; 413 /** 414 * Adjust receive ticks for pending packets when restoring from a 415 * checkpoint 416 * 417 * @note Link speed and delay parameters may change at resume. 418 */ 419 void resumeRecvTicks(); 420 }; 421 /** 422 * Tick to schedule the first dist sync event. 423 * This is just as optimization : we do not need any dist sync 424 * event until the simulated NIC is brought up by the OS. 425 */ 426 Tick syncStart; 427 /** 428 * Frequency of dist sync events in ticks. 429 */ 430 Tick syncRepeat; 431 /** 432 * Receiver thread pointer. 433 * Each DistIface object must have exactly one receiver thread. 434 */ 435 std::thread *recvThread; 436 /** 437 * Meta information about data packets received. 438 */ 439 RecvScheduler recvScheduler; 440 441 protected: 442 /** 443 * The rank of this process among the gem5 peers. 444 */ 445 unsigned rank; 446 /** 447 * The number of gem5 processes comprising this dist simulation. 448 */ 449 unsigned size; 450 /** 451 * Number of DistIface objects (i.e. dist links in this gem5 process) 452 */ 453 static unsigned distIfaceNum; 454 /** 455 * Unique id for the dist link 456 */ 457 unsigned distIfaceId; 458 459 bool isMaster; 460 461 private: 462 /** 463 * Number of receiver threads (in this gem5 process) 464 */ 465 static unsigned recvThreadsNum; 466 /** 467 * The singleton Sync object to perform dist synchronisation. 468 */ 469 static Sync *sync; 470 /** 471 * The singleton SyncEvent object to schedule periodic dist sync. 472 */ 473 static SyncEvent *syncEvent; 474 /** 475 * The very first DistIface object created becomes the master. We need 476 * a master to co-ordinate the global synchronisation. 477 */ 478 static DistIface *master; 479 480 private: 481 /** 482 * Send out a data packet to the remote end. 483 * @param header Meta info about the packet (which needs to be transferred 484 * to the destination alongside the packet). 485 * @param packet Pointer to the packet to send. 486 */ 487 virtual void sendPacket(const Header &header, const EthPacketPtr &packet) = 0; 488 /** 489 * Send out a control command to the remote end. 490 * @param header Meta info describing the command (e.g. sync request) 491 */ 492 virtual void sendCmd(const Header &header) = 0; 493 /** 494 * Receive a header (i.e. meta info describing a data packet or a control command) 495 * from the remote end. 496 * @param header The meta info structure to store the incoming header. 497 */ 498 virtual bool recvHeader(Header &header) = 0; 499 /** 500 * Receive a packet from the remote end. 501 * @param header Meta info about the incoming packet (obtanied by a previous 502 * call to the recvHedaer() method). 503 * @param Pointer to packet received. 504 */ 505 virtual void recvPacket(const Header &header, EthPacketPtr &packet) = 0; 506 /** 507 * Init hook for the underlaying transport 508 */ 509 virtual void initTransport() = 0; 510 /** 511 * spawn the receiver thread. 512 * @param recv_done The receive done event associated with the simulated 513 * Ethernet link. 514 * @param link_delay The link delay for the simulated Ethernet link. 515 */ 516 void spawnRecvThread(const Event *recv_done, Tick link_delay); 517 /** 518 * The function executed by a receiver thread. 519 */ 520 void recvThreadFunc(Event *recv_done, Tick link_delay); 521 522 public: 523 524 /** 525 * ctor 526 * @param dist_rank Rank of this gem5 process within the dist run 527 * @param sync_start Start tick for dist synchronisation 528 * @param sync_repeat Frequency for dist synchronisation 529 * @param em The event manager associated with the simulated Ethernet link 530 */ 531 DistIface(unsigned dist_rank, 532 unsigned dist_size, 533 Tick sync_start, 534 Tick sync_repeat, 535 EventManager *em, 536 bool is_switch, 537 int num_nodes); 538 539 virtual ~DistIface(); 540 /** 541 * Send out an Ethernet packet. 542 * @param pkt The Ethernet packet to send. 543 * @param send_delay The delay in ticks for the send completion event. 544 */ 545 void packetOut(EthPacketPtr pkt, Tick send_delay); 546 /** 547 * Fetch the packet scheduled to be received next by the simulated 548 * network link. 549 * 550 * @note This method is called within the process() method of the link 551 * receive done event. It also schedules the next receive event if the 552 * receive queue is not empty. 553 */ 554 EthPacketPtr packetIn() { return recvScheduler.popPacket(); } 555 556 DrainState drain() override; 557 void drainResume() override; 558 void init(const Event *e, Tick link_delay); 559 void startup(); 560 561 void serialize(CheckpointOut &cp) const override; 562 void unserialize(CheckpointIn &cp) override; 563 /** 564 * Initiate the exit from the simulation. 565 * @param delay Delay param from the m5 exit command. If Delay is zero 566 * then a collaborative exit is requested (i.e. all nodes have to call 567 * this method before the distributed simulation can exit). If Delay is 568 * not zero then exit is requested asap (and it will happen at the next 569 * sync tick). 570 * @return False if we are in distributed mode (i.e. exit can happen only 571 * at sync), True otherwise. 572 */ 573 static bool readyToExit(Tick delay); 574 /** 575 * Initiate taking a checkpoint 576 * @param delay Delay param from the m5 checkpoint command. If Delay is 577 * zero then a collaborative checkpoint is requested (i.e. all nodes have 578 * to call this method before the checkpoint can be taken). If Delay is 579 * not zero then a checkpoint is requested asap (and it will happen at the 580 * next sync tick). 581 * @return False if we are in dist mode (i.e. exit can happen only at 582 * sync), True otherwise. 583 */ 584 static bool readyToCkpt(Tick delay, Tick period); 585 /** 586 * Getter for the dist rank param. 587 */ 588 static uint64_t rankParam(); 589 /** 590 * Getter for the dist size param. 591 */ 592 static uint64_t sizeParam(); 593 }; 594 595#endif 596