tcp_iface.cc revision 11701
110923SN/A/*
210923SN/A * Copyright (c) 2015 ARM Limited
310923SN/A * All rights reserved
410923SN/A *
510923SN/A * The license below extends only to copyright in the software and shall
610923SN/A * not be construed as granting a license to any other intellectual
710923SN/A * property including but not limited to intellectual property relating
810923SN/A * to a hardware implementation of the functionality of the software
910923SN/A * licensed hereunder.  You may use the software subject to the license
1010923SN/A * terms below provided that you ensure that this notice is replicated
1110923SN/A * unmodified and in its entirety in all distributions of the software,
1210923SN/A * modified or unmodified, in source code or in binary form.
1310923SN/A *
1410923SN/A * Redistribution and use in source and binary forms, with or without
1510923SN/A * modification, are permitted provided that the following conditions are
1610923SN/A * met: redistributions of source code must retain the above copyright
1710923SN/A * notice, this list of conditions and the following disclaimer;
1810923SN/A * redistributions in binary form must reproduce the above copyright
1910923SN/A * notice, this list of conditions and the following disclaimer in the
2010923SN/A * documentation and/or other materials provided with the distribution;
2110923SN/A * neither the name of the copyright holders nor the names of its
2210923SN/A * contributors may be used to endorse or promote products derived from
2310923SN/A * this software without specific prior written permission.
2410923SN/A *
2510923SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2610923SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2710923SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2810923SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2910923SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
3010923SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
3110923SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
3210923SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
3310923SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
3410923SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3510923SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3610923SN/A *
3710923SN/A * Authors: Gabor Dozsa
3811290Sgabor.dozsa@arm.com *          Mohammad Alian
3910923SN/A */
4010923SN/A
4110923SN/A/* @file
4211290Sgabor.dozsa@arm.com * TCP stream socket based interface class implementation for dist-gem5 runs.
4310923SN/A */
4410923SN/A
4511263Sandreas.sandberg@arm.com#include "dev/net/tcp_iface.hh"
4610923SN/A
4710923SN/A#include <arpa/inet.h>
4810923SN/A#include <netdb.h>
4911290Sgabor.dozsa@arm.com#include <netinet/tcp.h>
5010923SN/A#include <sys/socket.h>
5110923SN/A#include <sys/types.h>
5210923SN/A#include <unistd.h>
5310923SN/A
5410923SN/A#include <cerrno>
5510923SN/A#include <cstring>
5611290Sgabor.dozsa@arm.com#include <vector>
5710923SN/A
5810923SN/A#include "base/types.hh"
5911290Sgabor.dozsa@arm.com#include "debug/DistEthernet.hh"
6011290Sgabor.dozsa@arm.com#include "debug/DistEthernetCmd.hh"
6111290Sgabor.dozsa@arm.com#include "sim/sim_exit.hh"
6211290Sgabor.dozsa@arm.com
6311290Sgabor.dozsa@arm.com#if defined(__FreeBSD__)
6411290Sgabor.dozsa@arm.com#include <netinet/in.h>
6511290Sgabor.dozsa@arm.com
6611290Sgabor.dozsa@arm.com#endif
6710923SN/A
6810923SN/A// MSG_NOSIGNAL does not exists on OS X
6910923SN/A#if defined(__APPLE__) || defined(__MACH__)
7010923SN/A#ifndef MSG_NOSIGNAL
7110923SN/A#define MSG_NOSIGNAL SO_NOSIGPIPE
7210923SN/A#endif
7310923SN/A#endif
7410923SN/A
7510923SN/Ausing namespace std;
7610923SN/A
7711290Sgabor.dozsa@arm.comstd::vector<std::pair<TCPIface::NodeInfo, int> > TCPIface::nodes;
7810923SN/Avector<int> TCPIface::sockRegistry;
7911290Sgabor.dozsa@arm.comint TCPIface::fdStatic = -1;
8011290Sgabor.dozsa@arm.combool TCPIface::anyListening = false;
8110923SN/A
8210923SN/ATCPIface::TCPIface(string server_name, unsigned server_port,
8311290Sgabor.dozsa@arm.com                   unsigned dist_rank, unsigned dist_size,
8411290Sgabor.dozsa@arm.com                   Tick sync_start, Tick sync_repeat,
8511290Sgabor.dozsa@arm.com                   EventManager *em, bool is_switch, int num_nodes) :
8611290Sgabor.dozsa@arm.com    DistIface(dist_rank, dist_size, sync_start, sync_repeat, em,
8711290Sgabor.dozsa@arm.com              is_switch, num_nodes), serverName(server_name),
8811290Sgabor.dozsa@arm.com    serverPort(server_port), isSwitch(is_switch), listening(false)
8911290Sgabor.dozsa@arm.com{
9011290Sgabor.dozsa@arm.com    if (is_switch && isMaster) {
9111290Sgabor.dozsa@arm.com        while (!listen(serverPort)) {
9211290Sgabor.dozsa@arm.com            DPRINTF(DistEthernet, "TCPIface(listen): Can't bind port %d\n",
9311290Sgabor.dozsa@arm.com                    serverPort);
9411290Sgabor.dozsa@arm.com            serverPort++;
9511290Sgabor.dozsa@arm.com        }
9611290Sgabor.dozsa@arm.com        inform("tcp_iface listening on port %d", serverPort);
9711290Sgabor.dozsa@arm.com        // Now accept the first connection requests from each compute node and
9811290Sgabor.dozsa@arm.com        // store the node info. The compute nodes will then wait for ack
9911290Sgabor.dozsa@arm.com        // messages. Ack messages will be sent by initTransport() in the
10011290Sgabor.dozsa@arm.com        // appropriate order to make sure that every compute node is always
10111290Sgabor.dozsa@arm.com        // connected to the same switch port.
10211290Sgabor.dozsa@arm.com        NodeInfo ni;
10311290Sgabor.dozsa@arm.com        for (int i = 0; i < size; i++) {
10411290Sgabor.dozsa@arm.com            accept();
10511290Sgabor.dozsa@arm.com            DPRINTF(DistEthernet, "First connection, waiting for link info\n");
10611290Sgabor.dozsa@arm.com            if (!recvTCP(sock, &ni, sizeof(ni)))
10711290Sgabor.dozsa@arm.com                panic("Failed to receive link info");
10811290Sgabor.dozsa@arm.com            nodes.push_back(make_pair(ni, sock));
10911290Sgabor.dozsa@arm.com        }
11011290Sgabor.dozsa@arm.com    }
11111290Sgabor.dozsa@arm.com}
11211290Sgabor.dozsa@arm.com
11311290Sgabor.dozsa@arm.combool
11411290Sgabor.dozsa@arm.comTCPIface::listen(int port)
11511290Sgabor.dozsa@arm.com{
11611290Sgabor.dozsa@arm.com    if (listening)
11711290Sgabor.dozsa@arm.com        panic("Socket already listening!");
11811290Sgabor.dozsa@arm.com
11911290Sgabor.dozsa@arm.com    struct sockaddr_in sockaddr;
12011290Sgabor.dozsa@arm.com    int ret;
12111290Sgabor.dozsa@arm.com
12211290Sgabor.dozsa@arm.com    fdStatic = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
12311290Sgabor.dozsa@arm.com    panic_if(fdStatic < 0, "socket() failed: %s", strerror(errno));
12411290Sgabor.dozsa@arm.com
12511290Sgabor.dozsa@arm.com    sockaddr.sin_family = PF_INET;
12611290Sgabor.dozsa@arm.com    sockaddr.sin_addr.s_addr = INADDR_ANY;
12711290Sgabor.dozsa@arm.com    sockaddr.sin_port = htons(port);
12811290Sgabor.dozsa@arm.com    // finally clear sin_zero
12911290Sgabor.dozsa@arm.com    memset(&sockaddr.sin_zero, 0, sizeof(sockaddr.sin_zero));
13011290Sgabor.dozsa@arm.com    ret = ::bind(fdStatic, (struct sockaddr *)&sockaddr, sizeof (sockaddr));
13111290Sgabor.dozsa@arm.com
13211290Sgabor.dozsa@arm.com    if (ret != 0) {
13311290Sgabor.dozsa@arm.com        if (ret == -1 && errno != EADDRINUSE)
13411290Sgabor.dozsa@arm.com            panic("ListenSocket(listen): bind() failed!");
13511290Sgabor.dozsa@arm.com        return false;
13611290Sgabor.dozsa@arm.com    }
13711290Sgabor.dozsa@arm.com
13811290Sgabor.dozsa@arm.com    if (::listen(fdStatic, 24) == -1) {
13911290Sgabor.dozsa@arm.com        if (errno != EADDRINUSE)
14011290Sgabor.dozsa@arm.com            panic("ListenSocket(listen): listen() failed!");
14111290Sgabor.dozsa@arm.com
14211290Sgabor.dozsa@arm.com        return false;
14311290Sgabor.dozsa@arm.com    }
14411290Sgabor.dozsa@arm.com
14511290Sgabor.dozsa@arm.com    listening = true;
14611290Sgabor.dozsa@arm.com    anyListening = true;
14711290Sgabor.dozsa@arm.com    return true;
14811290Sgabor.dozsa@arm.com}
14911290Sgabor.dozsa@arm.com
15011290Sgabor.dozsa@arm.comvoid
15111290Sgabor.dozsa@arm.comTCPIface::establishConnection()
15211290Sgabor.dozsa@arm.com{
15311290Sgabor.dozsa@arm.com    static unsigned cur_rank = 0;
15411290Sgabor.dozsa@arm.com    static unsigned cur_id = 0;
15511290Sgabor.dozsa@arm.com    NodeInfo ni;
15611290Sgabor.dozsa@arm.com
15711290Sgabor.dozsa@arm.com    if (isSwitch) {
15811290Sgabor.dozsa@arm.com        if (cur_id == 0) { // first connection accepted in the ctor already
15911290Sgabor.dozsa@arm.com            auto const &iface0 =
16011290Sgabor.dozsa@arm.com                find_if(nodes.begin(), nodes.end(),
16111290Sgabor.dozsa@arm.com                        [](const pair<NodeInfo, int> &cn) -> bool {
16211290Sgabor.dozsa@arm.com                            return cn.first.rank == cur_rank;
16311290Sgabor.dozsa@arm.com                        });
16411290Sgabor.dozsa@arm.com            assert(iface0 != nodes.end());
16511290Sgabor.dozsa@arm.com            assert(iface0->first.distIfaceId == 0);
16611290Sgabor.dozsa@arm.com            sock = iface0->second;
16711290Sgabor.dozsa@arm.com            ni = iface0->first;
16811290Sgabor.dozsa@arm.com        } else { // additional connections from the same compute node
16911290Sgabor.dozsa@arm.com            accept();
17011290Sgabor.dozsa@arm.com            DPRINTF(DistEthernet, "Next connection, waiting for link info\n");
17111290Sgabor.dozsa@arm.com            if (!recvTCP(sock, &ni, sizeof(ni)))
17211290Sgabor.dozsa@arm.com                panic("Failed to receive link info");
17311290Sgabor.dozsa@arm.com            assert(ni.rank == cur_rank);
17411290Sgabor.dozsa@arm.com            assert(ni.distIfaceId == cur_id);
17511290Sgabor.dozsa@arm.com        }
17611290Sgabor.dozsa@arm.com        inform("Link okay  (iface:%d -> (node:%d, iface:%d))",
17711290Sgabor.dozsa@arm.com               distIfaceId, ni.rank, ni.distIfaceId);
17811290Sgabor.dozsa@arm.com        if (ni.distIfaceId < ni.distIfaceNum - 1) {
17911290Sgabor.dozsa@arm.com            cur_id++;
18011290Sgabor.dozsa@arm.com        } else {
18111290Sgabor.dozsa@arm.com            cur_rank++;
18211290Sgabor.dozsa@arm.com            cur_id = 0;
18311290Sgabor.dozsa@arm.com        }
18411290Sgabor.dozsa@arm.com        // send ack
18511290Sgabor.dozsa@arm.com        ni.distIfaceId = distIfaceId;
18611290Sgabor.dozsa@arm.com        ni.distIfaceNum = distIfaceNum;
18711290Sgabor.dozsa@arm.com        sendTCP(sock, &ni, sizeof(ni));
18811290Sgabor.dozsa@arm.com    } else { // this is not a switch
18911290Sgabor.dozsa@arm.com        connect();
19011290Sgabor.dozsa@arm.com        // send link info
19111290Sgabor.dozsa@arm.com        ni.rank = rank;
19211290Sgabor.dozsa@arm.com        ni.distIfaceId = distIfaceId;
19311290Sgabor.dozsa@arm.com        ni.distIfaceNum = distIfaceNum;
19411290Sgabor.dozsa@arm.com        sendTCP(sock, &ni, sizeof(ni));
19511290Sgabor.dozsa@arm.com        DPRINTF(DistEthernet, "Connected, waiting for ack (distIfaceId:%d\n",
19611290Sgabor.dozsa@arm.com                distIfaceId);
19711290Sgabor.dozsa@arm.com        if (!recvTCP(sock, &ni, sizeof(ni)))
19811290Sgabor.dozsa@arm.com            panic("Failed to receive ack");
19911290Sgabor.dozsa@arm.com        assert(ni.rank == rank);
20011290Sgabor.dozsa@arm.com        inform("Link okay  (iface:%d -> switch iface:%d)", distIfaceId,
20111290Sgabor.dozsa@arm.com               ni.distIfaceId);
20211290Sgabor.dozsa@arm.com    }
20311290Sgabor.dozsa@arm.com    sockRegistry.push_back(sock);
20411290Sgabor.dozsa@arm.com}
20511290Sgabor.dozsa@arm.com
20611290Sgabor.dozsa@arm.comvoid
20711290Sgabor.dozsa@arm.comTCPIface::accept()
20811290Sgabor.dozsa@arm.com{
20911290Sgabor.dozsa@arm.com    struct sockaddr_in sockaddr;
21011290Sgabor.dozsa@arm.com    socklen_t slen = sizeof (sockaddr);
21111290Sgabor.dozsa@arm.com    sock = ::accept(fdStatic, (struct sockaddr *)&sockaddr, &slen);
21211290Sgabor.dozsa@arm.com    if (sock != -1) {
21311290Sgabor.dozsa@arm.com        int i = 1;
21411290Sgabor.dozsa@arm.com        if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&i,
21511290Sgabor.dozsa@arm.com                         sizeof(i)) < 0)
21611290Sgabor.dozsa@arm.com            warn("ListenSocket(accept): setsockopt() TCP_NODELAY failed!");
21711290Sgabor.dozsa@arm.com    }
21811290Sgabor.dozsa@arm.com}
21911290Sgabor.dozsa@arm.com
22011290Sgabor.dozsa@arm.comvoid
22111290Sgabor.dozsa@arm.comTCPIface::connect()
22210923SN/A{
22310923SN/A    struct addrinfo addr_hint, *addr_results;
22411290Sgabor.dozsa@arm.com     int ret;
22510923SN/A
22611290Sgabor.dozsa@arm.com     string port_str = to_string(serverPort);
22710923SN/A
22811290Sgabor.dozsa@arm.com     sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
22911290Sgabor.dozsa@arm.com     panic_if(sock < 0, "socket() failed: %s", strerror(errno));
23010923SN/A
23111290Sgabor.dozsa@arm.com     int fl = 1;
23211290Sgabor.dozsa@arm.com     if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&fl, sizeof(fl)) < 0)
23311290Sgabor.dozsa@arm.com         warn("ConnectSocket(connect): setsockopt() TCP_NODELAY failed!");
23410923SN/A
23511290Sgabor.dozsa@arm.com     bzero(&addr_hint, sizeof(addr_hint));
23611290Sgabor.dozsa@arm.com     addr_hint.ai_family = AF_INET;
23711290Sgabor.dozsa@arm.com     addr_hint.ai_socktype = SOCK_STREAM;
23811290Sgabor.dozsa@arm.com     addr_hint.ai_protocol = IPPROTO_TCP;
23910923SN/A
24011290Sgabor.dozsa@arm.com     ret = getaddrinfo(serverName.c_str(), port_str.c_str(),
24111290Sgabor.dozsa@arm.com                       &addr_hint, &addr_results);
24211290Sgabor.dozsa@arm.com     panic_if(ret < 0, "getaddrinf() failed: %s", strerror(errno));
24310923SN/A
24411290Sgabor.dozsa@arm.com     DPRINTF(DistEthernet, "Connecting to %s:%s\n",
24511290Sgabor.dozsa@arm.com             serverName.c_str(), port_str.c_str());
24610923SN/A
24711290Sgabor.dozsa@arm.com     ret = ::connect(sock, (struct sockaddr *)(addr_results->ai_addr),
24811290Sgabor.dozsa@arm.com                     addr_results->ai_addrlen);
24911290Sgabor.dozsa@arm.com     panic_if(ret < 0, "connect() failed: %s", strerror(errno));
25011290Sgabor.dozsa@arm.com
25111290Sgabor.dozsa@arm.com     freeaddrinfo(addr_results);
25210923SN/A}
25310923SN/A
25410923SN/ATCPIface::~TCPIface()
25510923SN/A{
25610923SN/A    int M5_VAR_USED ret;
25710923SN/A
25810923SN/A    ret = close(sock);
25910923SN/A    assert(ret == 0);
26010923SN/A}
26110923SN/A
26210923SN/Avoid
26311290Sgabor.dozsa@arm.comTCPIface::sendTCP(int sock, const void *buf, unsigned length)
26410923SN/A{
26510923SN/A    ssize_t ret;
26610923SN/A
26710923SN/A    ret = ::send(sock, buf, length, MSG_NOSIGNAL);
26811290Sgabor.dozsa@arm.com    if (ret < 0) {
26911290Sgabor.dozsa@arm.com        if (errno == ECONNRESET || errno == EPIPE) {
27011622Smichael.lebeane@amd.com            exitSimLoop("Message server closed connection, simulation "
27111622Smichael.lebeane@amd.com                        "is exiting");
27211290Sgabor.dozsa@arm.com        } else {
27311290Sgabor.dozsa@arm.com            panic("send() failed: %s", strerror(errno));
27411290Sgabor.dozsa@arm.com        }
27511290Sgabor.dozsa@arm.com    }
27610923SN/A    panic_if(ret != length, "send() failed");
27710923SN/A}
27810923SN/A
27910923SN/Abool
28010923SN/ATCPIface::recvTCP(int sock, void *buf, unsigned length)
28110923SN/A{
28210923SN/A    ssize_t ret;
28310923SN/A
28410923SN/A    ret = ::recv(sock, buf, length,  MSG_WAITALL );
28510923SN/A    if (ret < 0) {
28610923SN/A        if (errno == ECONNRESET || errno == EPIPE)
28710923SN/A            inform("recv(): %s", strerror(errno));
28810923SN/A        else if (ret < 0)
28910923SN/A            panic("recv() failed: %s", strerror(errno));
29010923SN/A    } else if (ret == 0) {
29110923SN/A        inform("recv(): Connection closed");
29210923SN/A    } else if (ret != length)
29310923SN/A        panic("recv() failed");
29410923SN/A
29510923SN/A    return (ret == length);
29610923SN/A}
29710923SN/A
29810923SN/Avoid
29911290Sgabor.dozsa@arm.comTCPIface::sendPacket(const Header &header, const EthPacketPtr &packet)
30010923SN/A{
30111290Sgabor.dozsa@arm.com    sendTCP(sock, &header, sizeof(header));
30211290Sgabor.dozsa@arm.com    sendTCP(sock, packet->data, packet->length);
30310923SN/A}
30410923SN/A
30511290Sgabor.dozsa@arm.comvoid
30611290Sgabor.dozsa@arm.comTCPIface::sendCmd(const Header &header)
30711290Sgabor.dozsa@arm.com{
30811290Sgabor.dozsa@arm.com    DPRINTF(DistEthernetCmd, "TCPIface::sendCmd() type: %d\n",
30911290Sgabor.dozsa@arm.com            static_cast<int>(header.msgType));
31011290Sgabor.dozsa@arm.com    // Global commands (i.e. sync request) are always sent by the master
31111290Sgabor.dozsa@arm.com    // DistIface. The transfer method is simply implemented as point-to-point
31211290Sgabor.dozsa@arm.com    // messages for now
31311290Sgabor.dozsa@arm.com    for (auto s: sockRegistry)
31411290Sgabor.dozsa@arm.com        sendTCP(s, (void*)&header, sizeof(header));
31511290Sgabor.dozsa@arm.com}
31611290Sgabor.dozsa@arm.com
31711290Sgabor.dozsa@arm.combool
31811290Sgabor.dozsa@arm.comTCPIface::recvHeader(Header &header)
31911290Sgabor.dozsa@arm.com{
32011290Sgabor.dozsa@arm.com    bool ret = recvTCP(sock, &header, sizeof(header));
32111290Sgabor.dozsa@arm.com    DPRINTF(DistEthernetCmd, "TCPIface::recvHeader() type: %d ret: %d\n",
32211290Sgabor.dozsa@arm.com            static_cast<int>(header.msgType), ret);
32311290Sgabor.dozsa@arm.com    return ret;
32411290Sgabor.dozsa@arm.com}
32511290Sgabor.dozsa@arm.com
32611290Sgabor.dozsa@arm.comvoid
32711290Sgabor.dozsa@arm.comTCPIface::recvPacket(const Header &header, EthPacketPtr &packet)
32811290Sgabor.dozsa@arm.com{
32911290Sgabor.dozsa@arm.com    packet = make_shared<EthPacketData>(header.dataPacketLength);
33011290Sgabor.dozsa@arm.com    bool ret = recvTCP(sock, packet->data, header.dataPacketLength);
33111290Sgabor.dozsa@arm.com    panic_if(!ret, "Error while reading socket");
33211701Smichael.lebeane@amd.com    packet->simLength = header.simLength;
33311290Sgabor.dozsa@arm.com    packet->length = header.dataPacketLength;
33411290Sgabor.dozsa@arm.com}
33511290Sgabor.dozsa@arm.com
33611290Sgabor.dozsa@arm.comvoid
33711290Sgabor.dozsa@arm.comTCPIface::initTransport()
33811290Sgabor.dozsa@arm.com{
33911290Sgabor.dozsa@arm.com    // We cannot setup the conections in the constructor because the number
34011290Sgabor.dozsa@arm.com    // of dist interfaces (per process) is unknown until the (simobject) init
34111290Sgabor.dozsa@arm.com    // phase. That information is necessary for global connection ordering.
34211290Sgabor.dozsa@arm.com    establishConnection();
34311290Sgabor.dozsa@arm.com}
344