tcp_iface.cc revision 11290
111723Sar4jc@virginia.edu/*
211963Sar4jc@virginia.edu * Copyright (c) 2015 ARM Limited
311963Sar4jc@virginia.edu * All rights reserved
411963Sar4jc@virginia.edu *
511963Sar4jc@virginia.edu * The license below extends only to copyright in the software and shall
611963Sar4jc@virginia.edu * not be construed as granting a license to any other intellectual
711963Sar4jc@virginia.edu * property including but not limited to intellectual property relating
811963Sar4jc@virginia.edu * to a hardware implementation of the functionality of the software
911963Sar4jc@virginia.edu * licensed hereunder.  You may use the software subject to the license
1011963Sar4jc@virginia.edu * terms below provided that you ensure that this notice is replicated
1111963Sar4jc@virginia.edu * unmodified and in its entirety in all distributions of the software,
1211963Sar4jc@virginia.edu * modified or unmodified, in source code or in binary form.
1311963Sar4jc@virginia.edu *
1411963Sar4jc@virginia.edu * Redistribution and use in source and binary forms, with or without
1511963Sar4jc@virginia.edu * modification, are permitted provided that the following conditions are
1611963Sar4jc@virginia.edu * met: redistributions of source code must retain the above copyright
1711723Sar4jc@virginia.edu * notice, this list of conditions and the following disclaimer;
1811723Sar4jc@virginia.edu * redistributions in binary form must reproduce the above copyright
1911723Sar4jc@virginia.edu * notice, this list of conditions and the following disclaimer in the
2011723Sar4jc@virginia.edu * documentation and/or other materials provided with the distribution;
2111723Sar4jc@virginia.edu * neither the name of the copyright holders nor the names of its
2211723Sar4jc@virginia.edu * contributors may be used to endorse or promote products derived from
2311723Sar4jc@virginia.edu * this software without specific prior written permission.
2411723Sar4jc@virginia.edu *
2511723Sar4jc@virginia.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2611723Sar4jc@virginia.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2711723Sar4jc@virginia.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2811723Sar4jc@virginia.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2911723Sar4jc@virginia.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
3011723Sar4jc@virginia.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
3111723Sar4jc@virginia.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
3211723Sar4jc@virginia.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
3311723Sar4jc@virginia.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
3411723Sar4jc@virginia.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3511723Sar4jc@virginia.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3611723Sar4jc@virginia.edu *
3711723Sar4jc@virginia.edu * Authors: Gabor Dozsa
3811723Sar4jc@virginia.edu *          Mohammad Alian
3911723Sar4jc@virginia.edu */
4011723Sar4jc@virginia.edu
4111723Sar4jc@virginia.edu/* @file
4211723Sar4jc@virginia.edu * TCP stream socket based interface class implementation for dist-gem5 runs.
4311723Sar4jc@virginia.edu */
4411963Sar4jc@virginia.edu
4511963Sar4jc@virginia.edu#include "dev/net/tcp_iface.hh"
4611963Sar4jc@virginia.edu
4711963Sar4jc@virginia.edu#include <arpa/inet.h>
4811723Sar4jc@virginia.edu#include <netdb.h>
4911723Sar4jc@virginia.edu#include <netinet/tcp.h>
5011963Sar4jc@virginia.edu#include <sys/socket.h>
5111963Sar4jc@virginia.edu#include <sys/types.h>
5211963Sar4jc@virginia.edu#include <unistd.h>
5311963Sar4jc@virginia.edu
5411963Sar4jc@virginia.edu#include <cerrno>
5511963Sar4jc@virginia.edu#include <cstring>
5611963Sar4jc@virginia.edu#include <vector>
5711963Sar4jc@virginia.edu
5811963Sar4jc@virginia.edu#include "base/types.hh"
5911963Sar4jc@virginia.edu#include "debug/DistEthernet.hh"
6011963Sar4jc@virginia.edu#include "debug/DistEthernetCmd.hh"
6111963Sar4jc@virginia.edu#include "sim/sim_exit.hh"
6211963Sar4jc@virginia.edu
6311963Sar4jc@virginia.edu#if defined(__FreeBSD__)
6411963Sar4jc@virginia.edu#include <netinet/in.h>
6511963Sar4jc@virginia.edu
6611963Sar4jc@virginia.edu#endif
6711963Sar4jc@virginia.edu
6811963Sar4jc@virginia.edu// MSG_NOSIGNAL does not exists on OS X
6911963Sar4jc@virginia.edu#if defined(__APPLE__) || defined(__MACH__)
7011963Sar4jc@virginia.edu#ifndef MSG_NOSIGNAL
7111963Sar4jc@virginia.edu#define MSG_NOSIGNAL SO_NOSIGPIPE
7211963Sar4jc@virginia.edu#endif
7311963Sar4jc@virginia.edu#endif
7411963Sar4jc@virginia.edu
7511963Sar4jc@virginia.eduusing namespace std;
7611963Sar4jc@virginia.edu
7711963Sar4jc@virginia.edustd::vector<std::pair<TCPIface::NodeInfo, int> > TCPIface::nodes;
7811963Sar4jc@virginia.eduvector<int> TCPIface::sockRegistry;
7911963Sar4jc@virginia.eduint TCPIface::fdStatic = -1;
8011963Sar4jc@virginia.edubool TCPIface::anyListening = false;
8111963Sar4jc@virginia.edu
8211963Sar4jc@virginia.eduTCPIface::TCPIface(string server_name, unsigned server_port,
8311963Sar4jc@virginia.edu                   unsigned dist_rank, unsigned dist_size,
8411963Sar4jc@virginia.edu                   Tick sync_start, Tick sync_repeat,
8511963Sar4jc@virginia.edu                   EventManager *em, bool is_switch, int num_nodes) :
8611963Sar4jc@virginia.edu    DistIface(dist_rank, dist_size, sync_start, sync_repeat, em,
8711963Sar4jc@virginia.edu              is_switch, num_nodes), serverName(server_name),
8811963Sar4jc@virginia.edu    serverPort(server_port), isSwitch(is_switch), listening(false)
8911963Sar4jc@virginia.edu{
9011963Sar4jc@virginia.edu    if (is_switch && isMaster) {
9111963Sar4jc@virginia.edu        while (!listen(serverPort)) {
9211963Sar4jc@virginia.edu            DPRINTF(DistEthernet, "TCPIface(listen): Can't bind port %d\n",
9311963Sar4jc@virginia.edu                    serverPort);
9411963Sar4jc@virginia.edu            serverPort++;
9511963Sar4jc@virginia.edu        }
9611963Sar4jc@virginia.edu        inform("tcp_iface listening on port %d", serverPort);
9711963Sar4jc@virginia.edu        // Now accept the first connection requests from each compute node and
9811963Sar4jc@virginia.edu        // store the node info. The compute nodes will then wait for ack
9911963Sar4jc@virginia.edu        // messages. Ack messages will be sent by initTransport() in the
10011963Sar4jc@virginia.edu        // appropriate order to make sure that every compute node is always
10111963Sar4jc@virginia.edu        // connected to the same switch port.
10211963Sar4jc@virginia.edu        NodeInfo ni;
10311963Sar4jc@virginia.edu        for (int i = 0; i < size; i++) {
10411963Sar4jc@virginia.edu            accept();
10511963Sar4jc@virginia.edu            DPRINTF(DistEthernet, "First connection, waiting for link info\n");
10611963Sar4jc@virginia.edu            if (!recvTCP(sock, &ni, sizeof(ni)))
10711963Sar4jc@virginia.edu                panic("Failed to receive link info");
10811963Sar4jc@virginia.edu            nodes.push_back(make_pair(ni, sock));
10911963Sar4jc@virginia.edu        }
11011963Sar4jc@virginia.edu    }
11111963Sar4jc@virginia.edu}
11211963Sar4jc@virginia.edu
11311963Sar4jc@virginia.edubool
11411963Sar4jc@virginia.eduTCPIface::listen(int port)
11511963Sar4jc@virginia.edu{
11611963Sar4jc@virginia.edu    if (listening)
11711963Sar4jc@virginia.edu        panic("Socket already listening!");
11811963Sar4jc@virginia.edu
11911963Sar4jc@virginia.edu    struct sockaddr_in sockaddr;
12011963Sar4jc@virginia.edu    int ret;
12111963Sar4jc@virginia.edu
12211963Sar4jc@virginia.edu    fdStatic = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
12311963Sar4jc@virginia.edu    panic_if(fdStatic < 0, "socket() failed: %s", strerror(errno));
12411963Sar4jc@virginia.edu
12511963Sar4jc@virginia.edu    sockaddr.sin_family = PF_INET;
12611963Sar4jc@virginia.edu    sockaddr.sin_addr.s_addr = INADDR_ANY;
12711963Sar4jc@virginia.edu    sockaddr.sin_port = htons(port);
12811963Sar4jc@virginia.edu    // finally clear sin_zero
12911963Sar4jc@virginia.edu    memset(&sockaddr.sin_zero, 0, sizeof(sockaddr.sin_zero));
13011963Sar4jc@virginia.edu    ret = ::bind(fdStatic, (struct sockaddr *)&sockaddr, sizeof (sockaddr));
13111963Sar4jc@virginia.edu
13211963Sar4jc@virginia.edu    if (ret != 0) {
13311963Sar4jc@virginia.edu        if (ret == -1 && errno != EADDRINUSE)
13411963Sar4jc@virginia.edu            panic("ListenSocket(listen): bind() failed!");
13511963Sar4jc@virginia.edu        return false;
13611963Sar4jc@virginia.edu    }
13711963Sar4jc@virginia.edu
13811963Sar4jc@virginia.edu    if (::listen(fdStatic, 24) == -1) {
13911963Sar4jc@virginia.edu        if (errno != EADDRINUSE)
14011963Sar4jc@virginia.edu            panic("ListenSocket(listen): listen() failed!");
14111963Sar4jc@virginia.edu
14211963Sar4jc@virginia.edu        return false;
14311963Sar4jc@virginia.edu    }
14411963Sar4jc@virginia.edu
14511963Sar4jc@virginia.edu    listening = true;
14611963Sar4jc@virginia.edu    anyListening = true;
14711723Sar4jc@virginia.edu    return true;
14811723Sar4jc@virginia.edu}
14911723Sar4jc@virginia.edu
15011723Sar4jc@virginia.eduvoid
15112449Sgabeblack@google.comTCPIface::establishConnection()
15212449Sgabeblack@google.com{
15311723Sar4jc@virginia.edu    static unsigned cur_rank = 0;
15411723Sar4jc@virginia.edu    static unsigned cur_id = 0;
15511723Sar4jc@virginia.edu    NodeInfo ni;
15611963Sar4jc@virginia.edu
15711963Sar4jc@virginia.edu    if (isSwitch) {
15811723Sar4jc@virginia.edu        if (cur_id == 0) { // first connection accepted in the ctor already
15912455Sgabeblack@google.com            auto const &iface0 =
16012455Sgabeblack@google.com                find_if(nodes.begin(), nodes.end(),
16111723Sar4jc@virginia.edu                        [](const pair<NodeInfo, int> &cn) -> bool {
16211723Sar4jc@virginia.edu                            return cn.first.rank == cur_rank;
16311723Sar4jc@virginia.edu                        });
16411963Sar4jc@virginia.edu            assert(iface0 != nodes.end());
16511723Sar4jc@virginia.edu            assert(iface0->first.distIfaceId == 0);
16611963Sar4jc@virginia.edu            sock = iface0->second;
16711963Sar4jc@virginia.edu            ni = iface0->first;
16811963Sar4jc@virginia.edu        } else { // additional connections from the same compute node
16911963Sar4jc@virginia.edu            accept();
17011963Sar4jc@virginia.edu            DPRINTF(DistEthernet, "Next connection, waiting for link info\n");
17111963Sar4jc@virginia.edu            if (!recvTCP(sock, &ni, sizeof(ni)))
17211963Sar4jc@virginia.edu                panic("Failed to receive link info");
17311963Sar4jc@virginia.edu            assert(ni.rank == cur_rank);
17411963Sar4jc@virginia.edu            assert(ni.distIfaceId == cur_id);
17511963Sar4jc@virginia.edu        }
17611963Sar4jc@virginia.edu        inform("Link okay  (iface:%d -> (node:%d, iface:%d))",
17711963Sar4jc@virginia.edu               distIfaceId, ni.rank, ni.distIfaceId);
17811963Sar4jc@virginia.edu        if (ni.distIfaceId < ni.distIfaceNum - 1) {
17911723Sar4jc@virginia.edu            cur_id++;
18011723Sar4jc@virginia.edu        } else {
18111723Sar4jc@virginia.edu            cur_rank++;
18211963Sar4jc@virginia.edu            cur_id = 0;
18311723Sar4jc@virginia.edu        }
18411963Sar4jc@virginia.edu        // send ack
18511963Sar4jc@virginia.edu        ni.distIfaceId = distIfaceId;
18611963Sar4jc@virginia.edu        ni.distIfaceNum = distIfaceNum;
18711963Sar4jc@virginia.edu        sendTCP(sock, &ni, sizeof(ni));
18811963Sar4jc@virginia.edu    } else { // this is not a switch
18911963Sar4jc@virginia.edu        connect();
19011963Sar4jc@virginia.edu        // send link info
19111963Sar4jc@virginia.edu        ni.rank = rank;
19211963Sar4jc@virginia.edu        ni.distIfaceId = distIfaceId;
19311963Sar4jc@virginia.edu        ni.distIfaceNum = distIfaceNum;
19411963Sar4jc@virginia.edu        sendTCP(sock, &ni, sizeof(ni));
19511963Sar4jc@virginia.edu        DPRINTF(DistEthernet, "Connected, waiting for ack (distIfaceId:%d\n",
19611963Sar4jc@virginia.edu                distIfaceId);
19711723Sar4jc@virginia.edu        if (!recvTCP(sock, &ni, sizeof(ni)))
19811963Sar4jc@virginia.edu            panic("Failed to receive ack");
19912449Sgabeblack@google.com        assert(ni.rank == rank);
20012449Sgabeblack@google.com        inform("Link okay  (iface:%d -> switch iface:%d)", distIfaceId,
20112449Sgabeblack@google.com               ni.distIfaceId);
20212031Sgabeblack@google.com    }
20311963Sar4jc@virginia.edu    sockRegistry.push_back(sock);
204}
205
206void
207TCPIface::accept()
208{
209    struct sockaddr_in sockaddr;
210    socklen_t slen = sizeof (sockaddr);
211    sock = ::accept(fdStatic, (struct sockaddr *)&sockaddr, &slen);
212    if (sock != -1) {
213        int i = 1;
214        if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&i,
215                         sizeof(i)) < 0)
216            warn("ListenSocket(accept): setsockopt() TCP_NODELAY failed!");
217    }
218}
219
220void
221TCPIface::connect()
222{
223    struct addrinfo addr_hint, *addr_results;
224     int ret;
225
226     string port_str = to_string(serverPort);
227
228     sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
229     panic_if(sock < 0, "socket() failed: %s", strerror(errno));
230
231     int fl = 1;
232     if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&fl, sizeof(fl)) < 0)
233         warn("ConnectSocket(connect): setsockopt() TCP_NODELAY failed!");
234
235     bzero(&addr_hint, sizeof(addr_hint));
236     addr_hint.ai_family = AF_INET;
237     addr_hint.ai_socktype = SOCK_STREAM;
238     addr_hint.ai_protocol = IPPROTO_TCP;
239
240     ret = getaddrinfo(serverName.c_str(), port_str.c_str(),
241                       &addr_hint, &addr_results);
242     panic_if(ret < 0, "getaddrinf() failed: %s", strerror(errno));
243
244     DPRINTF(DistEthernet, "Connecting to %s:%s\n",
245             serverName.c_str(), port_str.c_str());
246
247     ret = ::connect(sock, (struct sockaddr *)(addr_results->ai_addr),
248                     addr_results->ai_addrlen);
249     panic_if(ret < 0, "connect() failed: %s", strerror(errno));
250
251     freeaddrinfo(addr_results);
252}
253
254TCPIface::~TCPIface()
255{
256    int M5_VAR_USED ret;
257
258    ret = close(sock);
259    assert(ret == 0);
260}
261
262void
263TCPIface::sendTCP(int sock, const void *buf, unsigned length)
264{
265    ssize_t ret;
266
267    ret = ::send(sock, buf, length, MSG_NOSIGNAL);
268    if (ret < 0) {
269        if (errno == ECONNRESET || errno == EPIPE) {
270            inform("send(): %s", strerror(errno));
271            exit_message("info", 0, "Message server closed connection, "
272                         "simulation is exiting");
273        } else {
274            panic("send() failed: %s", strerror(errno));
275        }
276    }
277    panic_if(ret != length, "send() failed");
278}
279
280bool
281TCPIface::recvTCP(int sock, void *buf, unsigned length)
282{
283    ssize_t ret;
284
285    ret = ::recv(sock, buf, length,  MSG_WAITALL );
286    if (ret < 0) {
287        if (errno == ECONNRESET || errno == EPIPE)
288            inform("recv(): %s", strerror(errno));
289        else if (ret < 0)
290            panic("recv() failed: %s", strerror(errno));
291    } else if (ret == 0) {
292        inform("recv(): Connection closed");
293    } else if (ret != length)
294        panic("recv() failed");
295
296    return (ret == length);
297}
298
299void
300TCPIface::sendPacket(const Header &header, const EthPacketPtr &packet)
301{
302    sendTCP(sock, &header, sizeof(header));
303    sendTCP(sock, packet->data, packet->length);
304}
305
306void
307TCPIface::sendCmd(const Header &header)
308{
309    DPRINTF(DistEthernetCmd, "TCPIface::sendCmd() type: %d\n",
310            static_cast<int>(header.msgType));
311    // Global commands (i.e. sync request) are always sent by the master
312    // DistIface. The transfer method is simply implemented as point-to-point
313    // messages for now
314    for (auto s: sockRegistry)
315        sendTCP(s, (void*)&header, sizeof(header));
316}
317
318bool
319TCPIface::recvHeader(Header &header)
320{
321    bool ret = recvTCP(sock, &header, sizeof(header));
322    DPRINTF(DistEthernetCmd, "TCPIface::recvHeader() type: %d ret: %d\n",
323            static_cast<int>(header.msgType), ret);
324    return ret;
325}
326
327void
328TCPIface::recvPacket(const Header &header, EthPacketPtr &packet)
329{
330    packet = make_shared<EthPacketData>(header.dataPacketLength);
331    bool ret = recvTCP(sock, packet->data, header.dataPacketLength);
332    panic_if(!ret, "Error while reading socket");
333    packet->length = header.dataPacketLength;
334}
335
336void
337TCPIface::initTransport()
338{
339    // We cannot setup the conections in the constructor because the number
340    // of dist interfaces (per process) is unknown until the (simobject) init
341    // phase. That information is necessary for global connection ordering.
342    establishConnection();
343}
344