110923SN/A/*
210923SN/A * Copyright (c) 2015 ARM Limited
310923SN/A * All rights reserved
410923SN/A *
510923SN/A * The license below extends only to copyright in the software and shall
610923SN/A * not be construed as granting a license to any other intellectual
710923SN/A * property including but not limited to intellectual property relating
810923SN/A * to a hardware implementation of the functionality of the software
910923SN/A * licensed hereunder.  You may use the software subject to the license
1010923SN/A * terms below provided that you ensure that this notice is replicated
1110923SN/A * unmodified and in its entirety in all distributions of the software,
1210923SN/A * modified or unmodified, in source code or in binary form.
1310923SN/A *
1410923SN/A * Redistribution and use in source and binary forms, with or without
1510923SN/A * modification, are permitted provided that the following conditions are
1610923SN/A * met: redistributions of source code must retain the above copyright
1710923SN/A * notice, this list of conditions and the following disclaimer;
1810923SN/A * redistributions in binary form must reproduce the above copyright
1910923SN/A * notice, this list of conditions and the following disclaimer in the
2010923SN/A * documentation and/or other materials provided with the distribution;
2110923SN/A * neither the name of the copyright holders nor the names of its
2210923SN/A * contributors may be used to endorse or promote products derived from
2310923SN/A * this software without specific prior written permission.
2410923SN/A *
2510923SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2610923SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2710923SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2810923SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2910923SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
3010923SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
3110923SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
3210923SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
3310923SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
3410923SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3510923SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3610923SN/A *
3710923SN/A * Authors: Gabor Dozsa
3811290Sgabor.dozsa@arm.com *          Mohammad Alian
3910923SN/A */
4010923SN/A
4110923SN/A/* @file
4211290Sgabor.dozsa@arm.com * TCP stream socket based interface class implementation for dist-gem5 runs.
4310923SN/A */
4410923SN/A
4511263Sandreas.sandberg@arm.com#include "dev/net/tcp_iface.hh"
4610923SN/A
4710923SN/A#include <arpa/inet.h>
4810923SN/A#include <netdb.h>
4911290Sgabor.dozsa@arm.com#include <netinet/tcp.h>
5010923SN/A#include <sys/socket.h>
5110923SN/A#include <sys/types.h>
5210923SN/A#include <unistd.h>
5310923SN/A
5410923SN/A#include <cerrno>
5510923SN/A#include <cstring>
5611290Sgabor.dozsa@arm.com#include <vector>
5710923SN/A
5810923SN/A#include "base/types.hh"
5911290Sgabor.dozsa@arm.com#include "debug/DistEthernet.hh"
6011290Sgabor.dozsa@arm.com#include "debug/DistEthernetCmd.hh"
6111290Sgabor.dozsa@arm.com#include "sim/sim_exit.hh"
6211290Sgabor.dozsa@arm.com
6311290Sgabor.dozsa@arm.com#if defined(__FreeBSD__)
6411290Sgabor.dozsa@arm.com#include <netinet/in.h>
6511290Sgabor.dozsa@arm.com
6611290Sgabor.dozsa@arm.com#endif
6710923SN/A
6810923SN/A// MSG_NOSIGNAL does not exists on OS X
6910923SN/A#if defined(__APPLE__) || defined(__MACH__)
7010923SN/A#ifndef MSG_NOSIGNAL
7110923SN/A#define MSG_NOSIGNAL SO_NOSIGPIPE
7210923SN/A#endif
7310923SN/A#endif
7410923SN/A
7510923SN/Ausing namespace std;
7610923SN/A
7711290Sgabor.dozsa@arm.comstd::vector<std::pair<TCPIface::NodeInfo, int> > TCPIface::nodes;
7810923SN/Avector<int> TCPIface::sockRegistry;
7911290Sgabor.dozsa@arm.comint TCPIface::fdStatic = -1;
8011290Sgabor.dozsa@arm.combool TCPIface::anyListening = false;
8110923SN/A
8210923SN/ATCPIface::TCPIface(string server_name, unsigned server_port,
8311290Sgabor.dozsa@arm.com                   unsigned dist_rank, unsigned dist_size,
8411290Sgabor.dozsa@arm.com                   Tick sync_start, Tick sync_repeat,
8511703Smichael.lebeane@amd.com                   EventManager *em, bool use_pseudo_op, bool is_switch,
8611703Smichael.lebeane@amd.com                   int num_nodes) :
8711703Smichael.lebeane@amd.com    DistIface(dist_rank, dist_size, sync_start, sync_repeat, em, use_pseudo_op,
8811290Sgabor.dozsa@arm.com              is_switch, num_nodes), serverName(server_name),
8911290Sgabor.dozsa@arm.com    serverPort(server_port), isSwitch(is_switch), listening(false)
9011290Sgabor.dozsa@arm.com{
9111290Sgabor.dozsa@arm.com    if (is_switch && isMaster) {
9211290Sgabor.dozsa@arm.com        while (!listen(serverPort)) {
9311290Sgabor.dozsa@arm.com            DPRINTF(DistEthernet, "TCPIface(listen): Can't bind port %d\n",
9411290Sgabor.dozsa@arm.com                    serverPort);
9511290Sgabor.dozsa@arm.com            serverPort++;
9611290Sgabor.dozsa@arm.com        }
9711290Sgabor.dozsa@arm.com        inform("tcp_iface listening on port %d", serverPort);
9811290Sgabor.dozsa@arm.com        // Now accept the first connection requests from each compute node and
9911290Sgabor.dozsa@arm.com        // store the node info. The compute nodes will then wait for ack
10011290Sgabor.dozsa@arm.com        // messages. Ack messages will be sent by initTransport() in the
10111290Sgabor.dozsa@arm.com        // appropriate order to make sure that every compute node is always
10211290Sgabor.dozsa@arm.com        // connected to the same switch port.
10311290Sgabor.dozsa@arm.com        NodeInfo ni;
10411290Sgabor.dozsa@arm.com        for (int i = 0; i < size; i++) {
10511290Sgabor.dozsa@arm.com            accept();
10611290Sgabor.dozsa@arm.com            DPRINTF(DistEthernet, "First connection, waiting for link info\n");
10711290Sgabor.dozsa@arm.com            if (!recvTCP(sock, &ni, sizeof(ni)))
10811290Sgabor.dozsa@arm.com                panic("Failed to receive link info");
10911290Sgabor.dozsa@arm.com            nodes.push_back(make_pair(ni, sock));
11011290Sgabor.dozsa@arm.com        }
11111290Sgabor.dozsa@arm.com    }
11211290Sgabor.dozsa@arm.com}
11311290Sgabor.dozsa@arm.com
11411290Sgabor.dozsa@arm.combool
11511290Sgabor.dozsa@arm.comTCPIface::listen(int port)
11611290Sgabor.dozsa@arm.com{
11711290Sgabor.dozsa@arm.com    if (listening)
11811290Sgabor.dozsa@arm.com        panic("Socket already listening!");
11911290Sgabor.dozsa@arm.com
12011290Sgabor.dozsa@arm.com    struct sockaddr_in sockaddr;
12111290Sgabor.dozsa@arm.com    int ret;
12211290Sgabor.dozsa@arm.com
12311290Sgabor.dozsa@arm.com    fdStatic = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
12411290Sgabor.dozsa@arm.com    panic_if(fdStatic < 0, "socket() failed: %s", strerror(errno));
12511290Sgabor.dozsa@arm.com
12611290Sgabor.dozsa@arm.com    sockaddr.sin_family = PF_INET;
12711290Sgabor.dozsa@arm.com    sockaddr.sin_addr.s_addr = INADDR_ANY;
12811290Sgabor.dozsa@arm.com    sockaddr.sin_port = htons(port);
12911290Sgabor.dozsa@arm.com    // finally clear sin_zero
13011290Sgabor.dozsa@arm.com    memset(&sockaddr.sin_zero, 0, sizeof(sockaddr.sin_zero));
13111290Sgabor.dozsa@arm.com    ret = ::bind(fdStatic, (struct sockaddr *)&sockaddr, sizeof (sockaddr));
13211290Sgabor.dozsa@arm.com
13311290Sgabor.dozsa@arm.com    if (ret != 0) {
13411290Sgabor.dozsa@arm.com        if (ret == -1 && errno != EADDRINUSE)
13511290Sgabor.dozsa@arm.com            panic("ListenSocket(listen): bind() failed!");
13611290Sgabor.dozsa@arm.com        return false;
13711290Sgabor.dozsa@arm.com    }
13811290Sgabor.dozsa@arm.com
13911290Sgabor.dozsa@arm.com    if (::listen(fdStatic, 24) == -1) {
14011290Sgabor.dozsa@arm.com        if (errno != EADDRINUSE)
14111290Sgabor.dozsa@arm.com            panic("ListenSocket(listen): listen() failed!");
14211290Sgabor.dozsa@arm.com
14311290Sgabor.dozsa@arm.com        return false;
14411290Sgabor.dozsa@arm.com    }
14511290Sgabor.dozsa@arm.com
14611290Sgabor.dozsa@arm.com    listening = true;
14711290Sgabor.dozsa@arm.com    anyListening = true;
14811290Sgabor.dozsa@arm.com    return true;
14911290Sgabor.dozsa@arm.com}
15011290Sgabor.dozsa@arm.com
15111290Sgabor.dozsa@arm.comvoid
15211290Sgabor.dozsa@arm.comTCPIface::establishConnection()
15311290Sgabor.dozsa@arm.com{
15411290Sgabor.dozsa@arm.com    static unsigned cur_rank = 0;
15511290Sgabor.dozsa@arm.com    static unsigned cur_id = 0;
15611290Sgabor.dozsa@arm.com    NodeInfo ni;
15711290Sgabor.dozsa@arm.com
15811290Sgabor.dozsa@arm.com    if (isSwitch) {
15911290Sgabor.dozsa@arm.com        if (cur_id == 0) { // first connection accepted in the ctor already
16011290Sgabor.dozsa@arm.com            auto const &iface0 =
16111290Sgabor.dozsa@arm.com                find_if(nodes.begin(), nodes.end(),
16211290Sgabor.dozsa@arm.com                        [](const pair<NodeInfo, int> &cn) -> bool {
16311290Sgabor.dozsa@arm.com                            return cn.first.rank == cur_rank;
16411290Sgabor.dozsa@arm.com                        });
16511290Sgabor.dozsa@arm.com            assert(iface0 != nodes.end());
16611290Sgabor.dozsa@arm.com            assert(iface0->first.distIfaceId == 0);
16711290Sgabor.dozsa@arm.com            sock = iface0->second;
16811290Sgabor.dozsa@arm.com            ni = iface0->first;
16911290Sgabor.dozsa@arm.com        } else { // additional connections from the same compute node
17011290Sgabor.dozsa@arm.com            accept();
17111290Sgabor.dozsa@arm.com            DPRINTF(DistEthernet, "Next connection, waiting for link info\n");
17211290Sgabor.dozsa@arm.com            if (!recvTCP(sock, &ni, sizeof(ni)))
17311290Sgabor.dozsa@arm.com                panic("Failed to receive link info");
17411290Sgabor.dozsa@arm.com            assert(ni.rank == cur_rank);
17511290Sgabor.dozsa@arm.com            assert(ni.distIfaceId == cur_id);
17611290Sgabor.dozsa@arm.com        }
17711290Sgabor.dozsa@arm.com        inform("Link okay  (iface:%d -> (node:%d, iface:%d))",
17811290Sgabor.dozsa@arm.com               distIfaceId, ni.rank, ni.distIfaceId);
17911290Sgabor.dozsa@arm.com        if (ni.distIfaceId < ni.distIfaceNum - 1) {
18011290Sgabor.dozsa@arm.com            cur_id++;
18111290Sgabor.dozsa@arm.com        } else {
18211290Sgabor.dozsa@arm.com            cur_rank++;
18311290Sgabor.dozsa@arm.com            cur_id = 0;
18411290Sgabor.dozsa@arm.com        }
18511290Sgabor.dozsa@arm.com        // send ack
18611290Sgabor.dozsa@arm.com        ni.distIfaceId = distIfaceId;
18711290Sgabor.dozsa@arm.com        ni.distIfaceNum = distIfaceNum;
18811290Sgabor.dozsa@arm.com        sendTCP(sock, &ni, sizeof(ni));
18911290Sgabor.dozsa@arm.com    } else { // this is not a switch
19011290Sgabor.dozsa@arm.com        connect();
19111290Sgabor.dozsa@arm.com        // send link info
19211290Sgabor.dozsa@arm.com        ni.rank = rank;
19311290Sgabor.dozsa@arm.com        ni.distIfaceId = distIfaceId;
19411290Sgabor.dozsa@arm.com        ni.distIfaceNum = distIfaceNum;
19511290Sgabor.dozsa@arm.com        sendTCP(sock, &ni, sizeof(ni));
19611290Sgabor.dozsa@arm.com        DPRINTF(DistEthernet, "Connected, waiting for ack (distIfaceId:%d\n",
19711290Sgabor.dozsa@arm.com                distIfaceId);
19811290Sgabor.dozsa@arm.com        if (!recvTCP(sock, &ni, sizeof(ni)))
19911290Sgabor.dozsa@arm.com            panic("Failed to receive ack");
20011290Sgabor.dozsa@arm.com        assert(ni.rank == rank);
20111290Sgabor.dozsa@arm.com        inform("Link okay  (iface:%d -> switch iface:%d)", distIfaceId,
20211290Sgabor.dozsa@arm.com               ni.distIfaceId);
20311290Sgabor.dozsa@arm.com    }
20411290Sgabor.dozsa@arm.com    sockRegistry.push_back(sock);
20511290Sgabor.dozsa@arm.com}
20611290Sgabor.dozsa@arm.com
20711290Sgabor.dozsa@arm.comvoid
20811290Sgabor.dozsa@arm.comTCPIface::accept()
20911290Sgabor.dozsa@arm.com{
21011290Sgabor.dozsa@arm.com    struct sockaddr_in sockaddr;
21111290Sgabor.dozsa@arm.com    socklen_t slen = sizeof (sockaddr);
21211290Sgabor.dozsa@arm.com    sock = ::accept(fdStatic, (struct sockaddr *)&sockaddr, &slen);
21311290Sgabor.dozsa@arm.com    if (sock != -1) {
21411290Sgabor.dozsa@arm.com        int i = 1;
21511290Sgabor.dozsa@arm.com        if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&i,
21611290Sgabor.dozsa@arm.com                         sizeof(i)) < 0)
21711290Sgabor.dozsa@arm.com            warn("ListenSocket(accept): setsockopt() TCP_NODELAY failed!");
21811290Sgabor.dozsa@arm.com    }
21911290Sgabor.dozsa@arm.com}
22011290Sgabor.dozsa@arm.com
22111290Sgabor.dozsa@arm.comvoid
22211290Sgabor.dozsa@arm.comTCPIface::connect()
22310923SN/A{
22410923SN/A    struct addrinfo addr_hint, *addr_results;
22511290Sgabor.dozsa@arm.com     int ret;
22610923SN/A
22711290Sgabor.dozsa@arm.com     string port_str = to_string(serverPort);
22810923SN/A
22911290Sgabor.dozsa@arm.com     sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
23011290Sgabor.dozsa@arm.com     panic_if(sock < 0, "socket() failed: %s", strerror(errno));
23110923SN/A
23211290Sgabor.dozsa@arm.com     int fl = 1;
23311290Sgabor.dozsa@arm.com     if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&fl, sizeof(fl)) < 0)
23411290Sgabor.dozsa@arm.com         warn("ConnectSocket(connect): setsockopt() TCP_NODELAY failed!");
23510923SN/A
23611290Sgabor.dozsa@arm.com     bzero(&addr_hint, sizeof(addr_hint));
23711290Sgabor.dozsa@arm.com     addr_hint.ai_family = AF_INET;
23811290Sgabor.dozsa@arm.com     addr_hint.ai_socktype = SOCK_STREAM;
23911290Sgabor.dozsa@arm.com     addr_hint.ai_protocol = IPPROTO_TCP;
24010923SN/A
24111290Sgabor.dozsa@arm.com     ret = getaddrinfo(serverName.c_str(), port_str.c_str(),
24211290Sgabor.dozsa@arm.com                       &addr_hint, &addr_results);
24311290Sgabor.dozsa@arm.com     panic_if(ret < 0, "getaddrinf() failed: %s", strerror(errno));
24410923SN/A
24511290Sgabor.dozsa@arm.com     DPRINTF(DistEthernet, "Connecting to %s:%s\n",
24611290Sgabor.dozsa@arm.com             serverName.c_str(), port_str.c_str());
24710923SN/A
24811290Sgabor.dozsa@arm.com     ret = ::connect(sock, (struct sockaddr *)(addr_results->ai_addr),
24911290Sgabor.dozsa@arm.com                     addr_results->ai_addrlen);
25011290Sgabor.dozsa@arm.com     panic_if(ret < 0, "connect() failed: %s", strerror(errno));
25111290Sgabor.dozsa@arm.com
25211290Sgabor.dozsa@arm.com     freeaddrinfo(addr_results);
25310923SN/A}
25410923SN/A
25510923SN/ATCPIface::~TCPIface()
25610923SN/A{
25710923SN/A    int M5_VAR_USED ret;
25810923SN/A
25910923SN/A    ret = close(sock);
26010923SN/A    assert(ret == 0);
26110923SN/A}
26210923SN/A
26310923SN/Avoid
26411290Sgabor.dozsa@arm.comTCPIface::sendTCP(int sock, const void *buf, unsigned length)
26510923SN/A{
26610923SN/A    ssize_t ret;
26710923SN/A
26810923SN/A    ret = ::send(sock, buf, length, MSG_NOSIGNAL);
26911290Sgabor.dozsa@arm.com    if (ret < 0) {
27011290Sgabor.dozsa@arm.com        if (errno == ECONNRESET || errno == EPIPE) {
27111622Smichael.lebeane@amd.com            exitSimLoop("Message server closed connection, simulation "
27211622Smichael.lebeane@amd.com                        "is exiting");
27311290Sgabor.dozsa@arm.com        } else {
27411290Sgabor.dozsa@arm.com            panic("send() failed: %s", strerror(errno));
27511290Sgabor.dozsa@arm.com        }
27611290Sgabor.dozsa@arm.com    }
27710923SN/A    panic_if(ret != length, "send() failed");
27810923SN/A}
27910923SN/A
28010923SN/Abool
28110923SN/ATCPIface::recvTCP(int sock, void *buf, unsigned length)
28210923SN/A{
28310923SN/A    ssize_t ret;
28410923SN/A
28510923SN/A    ret = ::recv(sock, buf, length,  MSG_WAITALL );
28610923SN/A    if (ret < 0) {
28710923SN/A        if (errno == ECONNRESET || errno == EPIPE)
28810923SN/A            inform("recv(): %s", strerror(errno));
28910923SN/A        else if (ret < 0)
29010923SN/A            panic("recv() failed: %s", strerror(errno));
29110923SN/A    } else if (ret == 0) {
29210923SN/A        inform("recv(): Connection closed");
29310923SN/A    } else if (ret != length)
29410923SN/A        panic("recv() failed");
29510923SN/A
29610923SN/A    return (ret == length);
29710923SN/A}
29810923SN/A
29910923SN/Avoid
30011290Sgabor.dozsa@arm.comTCPIface::sendPacket(const Header &header, const EthPacketPtr &packet)
30110923SN/A{
30211290Sgabor.dozsa@arm.com    sendTCP(sock, &header, sizeof(header));
30311290Sgabor.dozsa@arm.com    sendTCP(sock, packet->data, packet->length);
30410923SN/A}
30510923SN/A
30611290Sgabor.dozsa@arm.comvoid
30711290Sgabor.dozsa@arm.comTCPIface::sendCmd(const Header &header)
30811290Sgabor.dozsa@arm.com{
30911290Sgabor.dozsa@arm.com    DPRINTF(DistEthernetCmd, "TCPIface::sendCmd() type: %d\n",
31011290Sgabor.dozsa@arm.com            static_cast<int>(header.msgType));
31111290Sgabor.dozsa@arm.com    // Global commands (i.e. sync request) are always sent by the master
31211290Sgabor.dozsa@arm.com    // DistIface. The transfer method is simply implemented as point-to-point
31311290Sgabor.dozsa@arm.com    // messages for now
31411290Sgabor.dozsa@arm.com    for (auto s: sockRegistry)
31511290Sgabor.dozsa@arm.com        sendTCP(s, (void*)&header, sizeof(header));
31611290Sgabor.dozsa@arm.com}
31711290Sgabor.dozsa@arm.com
31811290Sgabor.dozsa@arm.combool
31911290Sgabor.dozsa@arm.comTCPIface::recvHeader(Header &header)
32011290Sgabor.dozsa@arm.com{
32111290Sgabor.dozsa@arm.com    bool ret = recvTCP(sock, &header, sizeof(header));
32211290Sgabor.dozsa@arm.com    DPRINTF(DistEthernetCmd, "TCPIface::recvHeader() type: %d ret: %d\n",
32311290Sgabor.dozsa@arm.com            static_cast<int>(header.msgType), ret);
32411290Sgabor.dozsa@arm.com    return ret;
32511290Sgabor.dozsa@arm.com}
32611290Sgabor.dozsa@arm.com
32711290Sgabor.dozsa@arm.comvoid
32811290Sgabor.dozsa@arm.comTCPIface::recvPacket(const Header &header, EthPacketPtr &packet)
32911290Sgabor.dozsa@arm.com{
33011290Sgabor.dozsa@arm.com    packet = make_shared<EthPacketData>(header.dataPacketLength);
33111290Sgabor.dozsa@arm.com    bool ret = recvTCP(sock, packet->data, header.dataPacketLength);
33211290Sgabor.dozsa@arm.com    panic_if(!ret, "Error while reading socket");
33311701Smichael.lebeane@amd.com    packet->simLength = header.simLength;
33411290Sgabor.dozsa@arm.com    packet->length = header.dataPacketLength;
33511290Sgabor.dozsa@arm.com}
33611290Sgabor.dozsa@arm.com
33711290Sgabor.dozsa@arm.comvoid
33811290Sgabor.dozsa@arm.comTCPIface::initTransport()
33911290Sgabor.dozsa@arm.com{
34011290Sgabor.dozsa@arm.com    // We cannot setup the conections in the constructor because the number
34111290Sgabor.dozsa@arm.com    // of dist interfaces (per process) is unknown until the (simobject) init
34211290Sgabor.dozsa@arm.com    // phase. That information is necessary for global connection ordering.
34311290Sgabor.dozsa@arm.com    establishConnection();
34411290Sgabor.dozsa@arm.com}
345