tcp_iface.cc revision 11622
15086Sgblack@eecs.umich.edu/*
25086Sgblack@eecs.umich.edu * Copyright (c) 2015 ARM Limited
35086Sgblack@eecs.umich.edu * All rights reserved
45086Sgblack@eecs.umich.edu *
57087Snate@binkert.org * The license below extends only to copyright in the software and shall
67087Snate@binkert.org * not be construed as granting a license to any other intellectual
77087Snate@binkert.org * property including but not limited to intellectual property relating
87087Snate@binkert.org * to a hardware implementation of the functionality of the software
97087Snate@binkert.org * licensed hereunder.  You may use the software subject to the license
107087Snate@binkert.org * terms below provided that you ensure that this notice is replicated
117087Snate@binkert.org * unmodified and in its entirety in all distributions of the software,
127087Snate@binkert.org * modified or unmodified, in source code or in binary form.
135086Sgblack@eecs.umich.edu *
147087Snate@binkert.org * Redistribution and use in source and binary forms, with or without
157087Snate@binkert.org * modification, are permitted provided that the following conditions are
167087Snate@binkert.org * met: redistributions of source code must retain the above copyright
177087Snate@binkert.org * notice, this list of conditions and the following disclaimer;
187087Snate@binkert.org * redistributions in binary form must reproduce the above copyright
197087Snate@binkert.org * notice, this list of conditions and the following disclaimer in the
207087Snate@binkert.org * documentation and/or other materials provided with the distribution;
217087Snate@binkert.org * neither the name of the copyright holders nor the names of its
225086Sgblack@eecs.umich.edu * contributors may be used to endorse or promote products derived from
237087Snate@binkert.org * this software without specific prior written permission.
245086Sgblack@eecs.umich.edu *
255086Sgblack@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
265086Sgblack@eecs.umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
275086Sgblack@eecs.umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
285086Sgblack@eecs.umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
295086Sgblack@eecs.umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
305086Sgblack@eecs.umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
315086Sgblack@eecs.umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
325086Sgblack@eecs.umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
335086Sgblack@eecs.umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
345086Sgblack@eecs.umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
355086Sgblack@eecs.umich.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
365086Sgblack@eecs.umich.edu *
375086Sgblack@eecs.umich.edu * Authors: Gabor Dozsa
385086Sgblack@eecs.umich.edu *          Mohammad Alian
395086Sgblack@eecs.umich.edu */
405647Sgblack@eecs.umich.edu
415647Sgblack@eecs.umich.edu/* @file
425647Sgblack@eecs.umich.edu * TCP stream socket based interface class implementation for dist-gem5 runs.
435647Sgblack@eecs.umich.edu */
445647Sgblack@eecs.umich.edu
457629Sgblack@eecs.umich.edu#include "dev/net/tcp_iface.hh"
467629Sgblack@eecs.umich.edu
477629Sgblack@eecs.umich.edu#include <arpa/inet.h>
485086Sgblack@eecs.umich.edu#include <netdb.h>
495135Sgblack@eecs.umich.edu#include <netinet/tcp.h>
505647Sgblack@eecs.umich.edu#include <sys/socket.h>
515234Sgblack@eecs.umich.edu#include <sys/types.h>
525086Sgblack@eecs.umich.edu#include <unistd.h>
535086Sgblack@eecs.umich.edu
545086Sgblack@eecs.umich.edu#include <cerrno>
557707Sgblack@eecs.umich.edu#include <cstring>
567707Sgblack@eecs.umich.edu#include <vector>
577707Sgblack@eecs.umich.edu
585086Sgblack@eecs.umich.edu#include "base/types.hh"
595086Sgblack@eecs.umich.edu#include "debug/DistEthernet.hh"
605086Sgblack@eecs.umich.edu#include "debug/DistEthernetCmd.hh"
615086Sgblack@eecs.umich.edu#include "sim/sim_exit.hh"
625086Sgblack@eecs.umich.edu
635086Sgblack@eecs.umich.edu#if defined(__FreeBSD__)
645086Sgblack@eecs.umich.edu#include <netinet/in.h>
655135Sgblack@eecs.umich.edu
665135Sgblack@eecs.umich.edu#endif
675135Sgblack@eecs.umich.edu
685135Sgblack@eecs.umich.edu// MSG_NOSIGNAL does not exists on OS X
696048Sgblack@eecs.umich.edu#if defined(__APPLE__) || defined(__MACH__)
706048Sgblack@eecs.umich.edu#ifndef MSG_NOSIGNAL
716048Sgblack@eecs.umich.edu#define MSG_NOSIGNAL SO_NOSIGPIPE
726048Sgblack@eecs.umich.edu#endif
736048Sgblack@eecs.umich.edu#endif
746048Sgblack@eecs.umich.edu
757720Sgblack@eecs.umich.eduusing namespace std;
767720Sgblack@eecs.umich.edu
777720Sgblack@eecs.umich.edustd::vector<std::pair<TCPIface::NodeInfo, int> > TCPIface::nodes;
787720Sgblack@eecs.umich.eduvector<int> TCPIface::sockRegistry;
795135Sgblack@eecs.umich.eduint TCPIface::fdStatic = -1;
805135Sgblack@eecs.umich.edubool TCPIface::anyListening = false;
815135Sgblack@eecs.umich.edu
825135Sgblack@eecs.umich.eduTCPIface::TCPIface(string server_name, unsigned server_port,
835135Sgblack@eecs.umich.edu                   unsigned dist_rank, unsigned dist_size,
845135Sgblack@eecs.umich.edu                   Tick sync_start, Tick sync_repeat,
855135Sgblack@eecs.umich.edu                   EventManager *em, bool is_switch, int num_nodes) :
865135Sgblack@eecs.umich.edu    DistIface(dist_rank, dist_size, sync_start, sync_repeat, em,
875135Sgblack@eecs.umich.edu              is_switch, num_nodes), serverName(server_name),
885135Sgblack@eecs.umich.edu    serverPort(server_port), isSwitch(is_switch), listening(false)
895135Sgblack@eecs.umich.edu{
905135Sgblack@eecs.umich.edu    if (is_switch && isMaster) {
915135Sgblack@eecs.umich.edu        while (!listen(serverPort)) {
925135Sgblack@eecs.umich.edu            DPRINTF(DistEthernet, "TCPIface(listen): Can't bind port %d\n",
935135Sgblack@eecs.umich.edu                    serverPort);
945135Sgblack@eecs.umich.edu            serverPort++;
955135Sgblack@eecs.umich.edu        }
965264Sgblack@eecs.umich.edu        inform("tcp_iface listening on port %d", serverPort);
975135Sgblack@eecs.umich.edu        // Now accept the first connection requests from each compute node and
985135Sgblack@eecs.umich.edu        // store the node info. The compute nodes will then wait for ack
995135Sgblack@eecs.umich.edu        // messages. Ack messages will be sent by initTransport() in the
1005135Sgblack@eecs.umich.edu        // appropriate order to make sure that every compute node is always
1015141Sgblack@eecs.umich.edu        // connected to the same switch port.
1025141Sgblack@eecs.umich.edu        NodeInfo ni;
1035141Sgblack@eecs.umich.edu        for (int i = 0; i < size; i++) {
1045141Sgblack@eecs.umich.edu            accept();
1055141Sgblack@eecs.umich.edu            DPRINTF(DistEthernet, "First connection, waiting for link info\n");
1065141Sgblack@eecs.umich.edu            if (!recvTCP(sock, &ni, sizeof(ni)))
1075141Sgblack@eecs.umich.edu                panic("Failed to receive link info");
1085141Sgblack@eecs.umich.edu            nodes.push_back(make_pair(ni, sock));
1095141Sgblack@eecs.umich.edu        }
1105182Sgblack@eecs.umich.edu    }
1115141Sgblack@eecs.umich.edu}
1125141Sgblack@eecs.umich.edu
1135141Sgblack@eecs.umich.edubool
1145141Sgblack@eecs.umich.eduTCPIface::listen(int port)
1155141Sgblack@eecs.umich.edu{
1165141Sgblack@eecs.umich.edu    if (listening)
1175135Sgblack@eecs.umich.edu        panic("Socket already listening!");
1185141Sgblack@eecs.umich.edu
1195141Sgblack@eecs.umich.edu    struct sockaddr_in sockaddr;
1205141Sgblack@eecs.umich.edu    int ret;
1215141Sgblack@eecs.umich.edu
1225141Sgblack@eecs.umich.edu    fdStatic = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
1235141Sgblack@eecs.umich.edu    panic_if(fdStatic < 0, "socket() failed: %s", strerror(errno));
1245141Sgblack@eecs.umich.edu
1255141Sgblack@eecs.umich.edu    sockaddr.sin_family = PF_INET;
1265141Sgblack@eecs.umich.edu    sockaddr.sin_addr.s_addr = INADDR_ANY;
1275141Sgblack@eecs.umich.edu    sockaddr.sin_port = htons(port);
1285141Sgblack@eecs.umich.edu    // finally clear sin_zero
1295141Sgblack@eecs.umich.edu    memset(&sockaddr.sin_zero, 0, sizeof(sockaddr.sin_zero));
1305135Sgblack@eecs.umich.edu    ret = ::bind(fdStatic, (struct sockaddr *)&sockaddr, sizeof (sockaddr));
1315141Sgblack@eecs.umich.edu
1325141Sgblack@eecs.umich.edu    if (ret != 0) {
1335135Sgblack@eecs.umich.edu        if (ret == -1 && errno != EADDRINUSE)
1345141Sgblack@eecs.umich.edu            panic("ListenSocket(listen): bind() failed!");
1355141Sgblack@eecs.umich.edu        return false;
1365141Sgblack@eecs.umich.edu    }
1375141Sgblack@eecs.umich.edu
1385135Sgblack@eecs.umich.edu    if (::listen(fdStatic, 24) == -1) {
1395141Sgblack@eecs.umich.edu        if (errno != EADDRINUSE)
1405141Sgblack@eecs.umich.edu            panic("ListenSocket(listen): listen() failed!");
1415141Sgblack@eecs.umich.edu
1425141Sgblack@eecs.umich.edu        return false;
1435141Sgblack@eecs.umich.edu    }
1445141Sgblack@eecs.umich.edu
1455141Sgblack@eecs.umich.edu    listening = true;
1465141Sgblack@eecs.umich.edu    anyListening = true;
1475141Sgblack@eecs.umich.edu    return true;
1485141Sgblack@eecs.umich.edu}
1495141Sgblack@eecs.umich.edu
1505141Sgblack@eecs.umich.eduvoid
1515264Sgblack@eecs.umich.eduTCPIface::establishConnection()
1525141Sgblack@eecs.umich.edu{
1535141Sgblack@eecs.umich.edu    static unsigned cur_rank = 0;
1545141Sgblack@eecs.umich.edu    static unsigned cur_id = 0;
1555141Sgblack@eecs.umich.edu    NodeInfo ni;
1565141Sgblack@eecs.umich.edu
1575141Sgblack@eecs.umich.edu    if (isSwitch) {
1585141Sgblack@eecs.umich.edu        if (cur_id == 0) { // first connection accepted in the ctor already
1595141Sgblack@eecs.umich.edu            auto const &iface0 =
1605141Sgblack@eecs.umich.edu                find_if(nodes.begin(), nodes.end(),
1615141Sgblack@eecs.umich.edu                        [](const pair<NodeInfo, int> &cn) -> bool {
1625141Sgblack@eecs.umich.edu                            return cn.first.rank == cur_rank;
1635141Sgblack@eecs.umich.edu                        });
1645141Sgblack@eecs.umich.edu            assert(iface0 != nodes.end());
1655141Sgblack@eecs.umich.edu            assert(iface0->first.distIfaceId == 0);
1665141Sgblack@eecs.umich.edu            sock = iface0->second;
1675141Sgblack@eecs.umich.edu            ni = iface0->first;
1685141Sgblack@eecs.umich.edu        } else { // additional connections from the same compute node
1695135Sgblack@eecs.umich.edu            accept();
1705135Sgblack@eecs.umich.edu            DPRINTF(DistEthernet, "Next connection, waiting for link info\n");
1715135Sgblack@eecs.umich.edu            if (!recvTCP(sock, &ni, sizeof(ni)))
1725360Sgblack@eecs.umich.edu                panic("Failed to receive link info");
1735360Sgblack@eecs.umich.edu            assert(ni.rank == cur_rank);
1745360Sgblack@eecs.umich.edu            assert(ni.distIfaceId == cur_id);
1755360Sgblack@eecs.umich.edu        }
1765360Sgblack@eecs.umich.edu        inform("Link okay  (iface:%d -> (node:%d, iface:%d))",
1775360Sgblack@eecs.umich.edu               distIfaceId, ni.rank, ni.distIfaceId);
1785647Sgblack@eecs.umich.edu        if (ni.distIfaceId < ni.distIfaceNum - 1) {
1795647Sgblack@eecs.umich.edu            cur_id++;
1805647Sgblack@eecs.umich.edu        } else {
1815360Sgblack@eecs.umich.edu            cur_rank++;
1825647Sgblack@eecs.umich.edu            cur_id = 0;
1835647Sgblack@eecs.umich.edu        }
1845647Sgblack@eecs.umich.edu        // send ack
1855648Sgblack@eecs.umich.edu        ni.distIfaceId = distIfaceId;
1865648Sgblack@eecs.umich.edu        ni.distIfaceNum = distIfaceNum;
1875360Sgblack@eecs.umich.edu        sendTCP(sock, &ni, sizeof(ni));
1885141Sgblack@eecs.umich.edu    } else { // this is not a switch
1895141Sgblack@eecs.umich.edu        connect();
1905141Sgblack@eecs.umich.edu        // send link info
1915141Sgblack@eecs.umich.edu        ni.rank = rank;
1925141Sgblack@eecs.umich.edu        ni.distIfaceId = distIfaceId;
1935141Sgblack@eecs.umich.edu        ni.distIfaceNum = distIfaceNum;
1945135Sgblack@eecs.umich.edu        sendTCP(sock, &ni, sizeof(ni));
1955135Sgblack@eecs.umich.edu        DPRINTF(DistEthernet, "Connected, waiting for ack (distIfaceId:%d\n",
1965135Sgblack@eecs.umich.edu                distIfaceId);
1975135Sgblack@eecs.umich.edu        if (!recvTCP(sock, &ni, sizeof(ni)))
1985135Sgblack@eecs.umich.edu            panic("Failed to receive ack");
1995135Sgblack@eecs.umich.edu        assert(ni.rank == rank);
2006042Sgblack@eecs.umich.edu        inform("Link okay  (iface:%d -> switch iface:%d)", distIfaceId,
2015135Sgblack@eecs.umich.edu               ni.distIfaceId);
2025135Sgblack@eecs.umich.edu    }
2035135Sgblack@eecs.umich.edu    sockRegistry.push_back(sock);
2045135Sgblack@eecs.umich.edu}
2055135Sgblack@eecs.umich.edu
2065135Sgblack@eecs.umich.eduvoid
2076042Sgblack@eecs.umich.eduTCPIface::accept()
2085135Sgblack@eecs.umich.edu{
2096042Sgblack@eecs.umich.edu    struct sockaddr_in sockaddr;
2106042Sgblack@eecs.umich.edu    socklen_t slen = sizeof (sockaddr);
2116042Sgblack@eecs.umich.edu    sock = ::accept(fdStatic, (struct sockaddr *)&sockaddr, &slen);
2125135Sgblack@eecs.umich.edu    if (sock != -1) {
2135135Sgblack@eecs.umich.edu        int i = 1;
2146329Sgblack@eecs.umich.edu        if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&i,
2156329Sgblack@eecs.umich.edu                         sizeof(i)) < 0)
2166329Sgblack@eecs.umich.edu            warn("ListenSocket(accept): setsockopt() TCP_NODELAY failed!");
2176329Sgblack@eecs.umich.edu    }
2186329Sgblack@eecs.umich.edu}
2196329Sgblack@eecs.umich.edu
2206329Sgblack@eecs.umich.eduvoid
2216329Sgblack@eecs.umich.eduTCPIface::connect()
2226329Sgblack@eecs.umich.edu{
2236329Sgblack@eecs.umich.edu    struct addrinfo addr_hint, *addr_results;
2246329Sgblack@eecs.umich.edu     int ret;
2256329Sgblack@eecs.umich.edu
2266329Sgblack@eecs.umich.edu     string port_str = to_string(serverPort);
2276329Sgblack@eecs.umich.edu
2286329Sgblack@eecs.umich.edu     sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
2296329Sgblack@eecs.umich.edu     panic_if(sock < 0, "socket() failed: %s", strerror(errno));
2306329Sgblack@eecs.umich.edu
2316329Sgblack@eecs.umich.edu     int fl = 1;
2326329Sgblack@eecs.umich.edu     if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&fl, sizeof(fl)) < 0)
2336329Sgblack@eecs.umich.edu         warn("ConnectSocket(connect): setsockopt() TCP_NODELAY failed!");
2346329Sgblack@eecs.umich.edu
2356329Sgblack@eecs.umich.edu     bzero(&addr_hint, sizeof(addr_hint));
2367720Sgblack@eecs.umich.edu     addr_hint.ai_family = AF_INET;
2376329Sgblack@eecs.umich.edu     addr_hint.ai_socktype = SOCK_STREAM;
2386329Sgblack@eecs.umich.edu     addr_hint.ai_protocol = IPPROTO_TCP;
2397693SAli.Saidi@ARM.com
2407693SAli.Saidi@ARM.com     ret = getaddrinfo(serverName.c_str(), port_str.c_str(),
2417693SAli.Saidi@ARM.com                       &addr_hint, &addr_results);
2427693SAli.Saidi@ARM.com     panic_if(ret < 0, "getaddrinf() failed: %s", strerror(errno));
2437693SAli.Saidi@ARM.com
2447693SAli.Saidi@ARM.com     DPRINTF(DistEthernet, "Connecting to %s:%s\n",
2457693SAli.Saidi@ARM.com             serverName.c_str(), port_str.c_str());
2467811Ssteve.reinhardt@amd.com
247     ret = ::connect(sock, (struct sockaddr *)(addr_results->ai_addr),
248                     addr_results->ai_addrlen);
249     panic_if(ret < 0, "connect() failed: %s", strerror(errno));
250
251     freeaddrinfo(addr_results);
252}
253
254TCPIface::~TCPIface()
255{
256    int M5_VAR_USED ret;
257
258    ret = close(sock);
259    assert(ret == 0);
260}
261
262void
263TCPIface::sendTCP(int sock, const void *buf, unsigned length)
264{
265    ssize_t ret;
266
267    ret = ::send(sock, buf, length, MSG_NOSIGNAL);
268    if (ret < 0) {
269        if (errno == ECONNRESET || errno == EPIPE) {
270            exitSimLoop("Message server closed connection, simulation "
271                        "is exiting");
272        } else {
273            panic("send() failed: %s", strerror(errno));
274        }
275    }
276    panic_if(ret != length, "send() failed");
277}
278
279bool
280TCPIface::recvTCP(int sock, void *buf, unsigned length)
281{
282    ssize_t ret;
283
284    ret = ::recv(sock, buf, length,  MSG_WAITALL );
285    if (ret < 0) {
286        if (errno == ECONNRESET || errno == EPIPE)
287            inform("recv(): %s", strerror(errno));
288        else if (ret < 0)
289            panic("recv() failed: %s", strerror(errno));
290    } else if (ret == 0) {
291        inform("recv(): Connection closed");
292    } else if (ret != length)
293        panic("recv() failed");
294
295    return (ret == length);
296}
297
298void
299TCPIface::sendPacket(const Header &header, const EthPacketPtr &packet)
300{
301    sendTCP(sock, &header, sizeof(header));
302    sendTCP(sock, packet->data, packet->length);
303}
304
305void
306TCPIface::sendCmd(const Header &header)
307{
308    DPRINTF(DistEthernetCmd, "TCPIface::sendCmd() type: %d\n",
309            static_cast<int>(header.msgType));
310    // Global commands (i.e. sync request) are always sent by the master
311    // DistIface. The transfer method is simply implemented as point-to-point
312    // messages for now
313    for (auto s: sockRegistry)
314        sendTCP(s, (void*)&header, sizeof(header));
315}
316
317bool
318TCPIface::recvHeader(Header &header)
319{
320    bool ret = recvTCP(sock, &header, sizeof(header));
321    DPRINTF(DistEthernetCmd, "TCPIface::recvHeader() type: %d ret: %d\n",
322            static_cast<int>(header.msgType), ret);
323    return ret;
324}
325
326void
327TCPIface::recvPacket(const Header &header, EthPacketPtr &packet)
328{
329    packet = make_shared<EthPacketData>(header.dataPacketLength);
330    bool ret = recvTCP(sock, packet->data, header.dataPacketLength);
331    panic_if(!ret, "Error while reading socket");
332    packet->length = header.dataPacketLength;
333}
334
335void
336TCPIface::initTransport()
337{
338    // We cannot setup the conections in the constructor because the number
339    // of dist interfaces (per process) is unknown until the (simobject) init
340    // phase. That information is necessary for global connection ordering.
341    establishConnection();
342}
343