110923SN/A/* 210923SN/A * Copyright (c) 2015 ARM Limited 310923SN/A * All rights reserved 410923SN/A * 510923SN/A * The license below extends only to copyright in the software and shall 610923SN/A * not be construed as granting a license to any other intellectual 710923SN/A * property including but not limited to intellectual property relating 810923SN/A * to a hardware implementation of the functionality of the software 910923SN/A * licensed hereunder. You may use the software subject to the license 1010923SN/A * terms below provided that you ensure that this notice is replicated 1110923SN/A * unmodified and in its entirety in all distributions of the software, 1210923SN/A * modified or unmodified, in source code or in binary form. 1310923SN/A * 1410923SN/A * Redistribution and use in source and binary forms, with or without 1510923SN/A * modification, are permitted provided that the following conditions are 1610923SN/A * met: redistributions of source code must retain the above copyright 1710923SN/A * notice, this list of conditions and the following disclaimer; 1810923SN/A * redistributions in binary form must reproduce the above copyright 1910923SN/A * notice, this list of conditions and the following disclaimer in the 2010923SN/A * documentation and/or other materials provided with the distribution; 2110923SN/A * neither the name of the copyright holders nor the names of its 2210923SN/A * contributors may be used to endorse or promote products derived from 2310923SN/A * this software without specific prior written permission. 2410923SN/A * 2510923SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2610923SN/A * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2710923SN/A * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2810923SN/A * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2910923SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 3010923SN/A * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 3110923SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 3210923SN/A * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 3310923SN/A * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 3410923SN/A * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 3510923SN/A * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3610923SN/A * 3710923SN/A * Authors: Gabor Dozsa 3811290Sgabor.dozsa@arm.com * Mohammad Alian 3910923SN/A */ 4010923SN/A 4110923SN/A/* @file 4211290Sgabor.dozsa@arm.com * TCP stream socket based interface class implementation for dist-gem5 runs. 4310923SN/A */ 4410923SN/A 4511263Sandreas.sandberg@arm.com#include "dev/net/tcp_iface.hh" 4610923SN/A 4710923SN/A#include <arpa/inet.h> 4810923SN/A#include <netdb.h> 4911290Sgabor.dozsa@arm.com#include <netinet/tcp.h> 5010923SN/A#include <sys/socket.h> 5110923SN/A#include <sys/types.h> 5210923SN/A#include <unistd.h> 5310923SN/A 5410923SN/A#include <cerrno> 5510923SN/A#include <cstring> 5611290Sgabor.dozsa@arm.com#include <vector> 5710923SN/A 5810923SN/A#include "base/types.hh" 5911290Sgabor.dozsa@arm.com#include "debug/DistEthernet.hh" 6011290Sgabor.dozsa@arm.com#include "debug/DistEthernetCmd.hh" 6111290Sgabor.dozsa@arm.com#include "sim/sim_exit.hh" 6211290Sgabor.dozsa@arm.com 6311290Sgabor.dozsa@arm.com#if defined(__FreeBSD__) 6411290Sgabor.dozsa@arm.com#include <netinet/in.h> 6511290Sgabor.dozsa@arm.com 6611290Sgabor.dozsa@arm.com#endif 6710923SN/A 6810923SN/A// MSG_NOSIGNAL does not exists on OS X 6910923SN/A#if defined(__APPLE__) || defined(__MACH__) 7010923SN/A#ifndef MSG_NOSIGNAL 7110923SN/A#define MSG_NOSIGNAL SO_NOSIGPIPE 7210923SN/A#endif 7310923SN/A#endif 7410923SN/A 7510923SN/Ausing namespace std; 7610923SN/A 7711290Sgabor.dozsa@arm.comstd::vector<std::pair<TCPIface::NodeInfo, int> > TCPIface::nodes; 7810923SN/Avector<int> TCPIface::sockRegistry; 7911290Sgabor.dozsa@arm.comint TCPIface::fdStatic = -1; 8011290Sgabor.dozsa@arm.combool TCPIface::anyListening = false; 8110923SN/A 8210923SN/ATCPIface::TCPIface(string server_name, unsigned server_port, 8311290Sgabor.dozsa@arm.com unsigned dist_rank, unsigned dist_size, 8411290Sgabor.dozsa@arm.com Tick sync_start, Tick sync_repeat, 8511703Smichael.lebeane@amd.com EventManager *em, bool use_pseudo_op, bool is_switch, 8611703Smichael.lebeane@amd.com int num_nodes) : 8711703Smichael.lebeane@amd.com DistIface(dist_rank, dist_size, sync_start, sync_repeat, em, use_pseudo_op, 8811290Sgabor.dozsa@arm.com is_switch, num_nodes), serverName(server_name), 8911290Sgabor.dozsa@arm.com serverPort(server_port), isSwitch(is_switch), listening(false) 9011290Sgabor.dozsa@arm.com{ 9111290Sgabor.dozsa@arm.com if (is_switch && isMaster) { 9211290Sgabor.dozsa@arm.com while (!listen(serverPort)) { 9311290Sgabor.dozsa@arm.com DPRINTF(DistEthernet, "TCPIface(listen): Can't bind port %d\n", 9411290Sgabor.dozsa@arm.com serverPort); 9511290Sgabor.dozsa@arm.com serverPort++; 9611290Sgabor.dozsa@arm.com } 9711290Sgabor.dozsa@arm.com inform("tcp_iface listening on port %d", serverPort); 9811290Sgabor.dozsa@arm.com // Now accept the first connection requests from each compute node and 9911290Sgabor.dozsa@arm.com // store the node info. The compute nodes will then wait for ack 10011290Sgabor.dozsa@arm.com // messages. Ack messages will be sent by initTransport() in the 10111290Sgabor.dozsa@arm.com // appropriate order to make sure that every compute node is always 10211290Sgabor.dozsa@arm.com // connected to the same switch port. 10311290Sgabor.dozsa@arm.com NodeInfo ni; 10411290Sgabor.dozsa@arm.com for (int i = 0; i < size; i++) { 10511290Sgabor.dozsa@arm.com accept(); 10611290Sgabor.dozsa@arm.com DPRINTF(DistEthernet, "First connection, waiting for link info\n"); 10711290Sgabor.dozsa@arm.com if (!recvTCP(sock, &ni, sizeof(ni))) 10811290Sgabor.dozsa@arm.com panic("Failed to receive link info"); 10911290Sgabor.dozsa@arm.com nodes.push_back(make_pair(ni, sock)); 11011290Sgabor.dozsa@arm.com } 11111290Sgabor.dozsa@arm.com } 11211290Sgabor.dozsa@arm.com} 11311290Sgabor.dozsa@arm.com 11411290Sgabor.dozsa@arm.combool 11511290Sgabor.dozsa@arm.comTCPIface::listen(int port) 11611290Sgabor.dozsa@arm.com{ 11711290Sgabor.dozsa@arm.com if (listening) 11811290Sgabor.dozsa@arm.com panic("Socket already listening!"); 11911290Sgabor.dozsa@arm.com 12011290Sgabor.dozsa@arm.com struct sockaddr_in sockaddr; 12111290Sgabor.dozsa@arm.com int ret; 12211290Sgabor.dozsa@arm.com 12311290Sgabor.dozsa@arm.com fdStatic = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); 12411290Sgabor.dozsa@arm.com panic_if(fdStatic < 0, "socket() failed: %s", strerror(errno)); 12511290Sgabor.dozsa@arm.com 12611290Sgabor.dozsa@arm.com sockaddr.sin_family = PF_INET; 12711290Sgabor.dozsa@arm.com sockaddr.sin_addr.s_addr = INADDR_ANY; 12811290Sgabor.dozsa@arm.com sockaddr.sin_port = htons(port); 12911290Sgabor.dozsa@arm.com // finally clear sin_zero 13011290Sgabor.dozsa@arm.com memset(&sockaddr.sin_zero, 0, sizeof(sockaddr.sin_zero)); 13111290Sgabor.dozsa@arm.com ret = ::bind(fdStatic, (struct sockaddr *)&sockaddr, sizeof (sockaddr)); 13211290Sgabor.dozsa@arm.com 13311290Sgabor.dozsa@arm.com if (ret != 0) { 13411290Sgabor.dozsa@arm.com if (ret == -1 && errno != EADDRINUSE) 13511290Sgabor.dozsa@arm.com panic("ListenSocket(listen): bind() failed!"); 13611290Sgabor.dozsa@arm.com return false; 13711290Sgabor.dozsa@arm.com } 13811290Sgabor.dozsa@arm.com 13911290Sgabor.dozsa@arm.com if (::listen(fdStatic, 24) == -1) { 14011290Sgabor.dozsa@arm.com if (errno != EADDRINUSE) 14111290Sgabor.dozsa@arm.com panic("ListenSocket(listen): listen() failed!"); 14211290Sgabor.dozsa@arm.com 14311290Sgabor.dozsa@arm.com return false; 14411290Sgabor.dozsa@arm.com } 14511290Sgabor.dozsa@arm.com 14611290Sgabor.dozsa@arm.com listening = true; 14711290Sgabor.dozsa@arm.com anyListening = true; 14811290Sgabor.dozsa@arm.com return true; 14911290Sgabor.dozsa@arm.com} 15011290Sgabor.dozsa@arm.com 15111290Sgabor.dozsa@arm.comvoid 15211290Sgabor.dozsa@arm.comTCPIface::establishConnection() 15311290Sgabor.dozsa@arm.com{ 15411290Sgabor.dozsa@arm.com static unsigned cur_rank = 0; 15511290Sgabor.dozsa@arm.com static unsigned cur_id = 0; 15611290Sgabor.dozsa@arm.com NodeInfo ni; 15711290Sgabor.dozsa@arm.com 15811290Sgabor.dozsa@arm.com if (isSwitch) { 15911290Sgabor.dozsa@arm.com if (cur_id == 0) { // first connection accepted in the ctor already 16011290Sgabor.dozsa@arm.com auto const &iface0 = 16111290Sgabor.dozsa@arm.com find_if(nodes.begin(), nodes.end(), 16211290Sgabor.dozsa@arm.com [](const pair<NodeInfo, int> &cn) -> bool { 16311290Sgabor.dozsa@arm.com return cn.first.rank == cur_rank; 16411290Sgabor.dozsa@arm.com }); 16511290Sgabor.dozsa@arm.com assert(iface0 != nodes.end()); 16611290Sgabor.dozsa@arm.com assert(iface0->first.distIfaceId == 0); 16711290Sgabor.dozsa@arm.com sock = iface0->second; 16811290Sgabor.dozsa@arm.com ni = iface0->first; 16911290Sgabor.dozsa@arm.com } else { // additional connections from the same compute node 17011290Sgabor.dozsa@arm.com accept(); 17111290Sgabor.dozsa@arm.com DPRINTF(DistEthernet, "Next connection, waiting for link info\n"); 17211290Sgabor.dozsa@arm.com if (!recvTCP(sock, &ni, sizeof(ni))) 17311290Sgabor.dozsa@arm.com panic("Failed to receive link info"); 17411290Sgabor.dozsa@arm.com assert(ni.rank == cur_rank); 17511290Sgabor.dozsa@arm.com assert(ni.distIfaceId == cur_id); 17611290Sgabor.dozsa@arm.com } 17711290Sgabor.dozsa@arm.com inform("Link okay (iface:%d -> (node:%d, iface:%d))", 17811290Sgabor.dozsa@arm.com distIfaceId, ni.rank, ni.distIfaceId); 17911290Sgabor.dozsa@arm.com if (ni.distIfaceId < ni.distIfaceNum - 1) { 18011290Sgabor.dozsa@arm.com cur_id++; 18111290Sgabor.dozsa@arm.com } else { 18211290Sgabor.dozsa@arm.com cur_rank++; 18311290Sgabor.dozsa@arm.com cur_id = 0; 18411290Sgabor.dozsa@arm.com } 18511290Sgabor.dozsa@arm.com // send ack 18611290Sgabor.dozsa@arm.com ni.distIfaceId = distIfaceId; 18711290Sgabor.dozsa@arm.com ni.distIfaceNum = distIfaceNum; 18811290Sgabor.dozsa@arm.com sendTCP(sock, &ni, sizeof(ni)); 18911290Sgabor.dozsa@arm.com } else { // this is not a switch 19011290Sgabor.dozsa@arm.com connect(); 19111290Sgabor.dozsa@arm.com // send link info 19211290Sgabor.dozsa@arm.com ni.rank = rank; 19311290Sgabor.dozsa@arm.com ni.distIfaceId = distIfaceId; 19411290Sgabor.dozsa@arm.com ni.distIfaceNum = distIfaceNum; 19511290Sgabor.dozsa@arm.com sendTCP(sock, &ni, sizeof(ni)); 19611290Sgabor.dozsa@arm.com DPRINTF(DistEthernet, "Connected, waiting for ack (distIfaceId:%d\n", 19711290Sgabor.dozsa@arm.com distIfaceId); 19811290Sgabor.dozsa@arm.com if (!recvTCP(sock, &ni, sizeof(ni))) 19911290Sgabor.dozsa@arm.com panic("Failed to receive ack"); 20011290Sgabor.dozsa@arm.com assert(ni.rank == rank); 20111290Sgabor.dozsa@arm.com inform("Link okay (iface:%d -> switch iface:%d)", distIfaceId, 20211290Sgabor.dozsa@arm.com ni.distIfaceId); 20311290Sgabor.dozsa@arm.com } 20411290Sgabor.dozsa@arm.com sockRegistry.push_back(sock); 20511290Sgabor.dozsa@arm.com} 20611290Sgabor.dozsa@arm.com 20711290Sgabor.dozsa@arm.comvoid 20811290Sgabor.dozsa@arm.comTCPIface::accept() 20911290Sgabor.dozsa@arm.com{ 21011290Sgabor.dozsa@arm.com struct sockaddr_in sockaddr; 21111290Sgabor.dozsa@arm.com socklen_t slen = sizeof (sockaddr); 21211290Sgabor.dozsa@arm.com sock = ::accept(fdStatic, (struct sockaddr *)&sockaddr, &slen); 21311290Sgabor.dozsa@arm.com if (sock != -1) { 21411290Sgabor.dozsa@arm.com int i = 1; 21511290Sgabor.dozsa@arm.com if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&i, 21611290Sgabor.dozsa@arm.com sizeof(i)) < 0) 21711290Sgabor.dozsa@arm.com warn("ListenSocket(accept): setsockopt() TCP_NODELAY failed!"); 21811290Sgabor.dozsa@arm.com } 21911290Sgabor.dozsa@arm.com} 22011290Sgabor.dozsa@arm.com 22111290Sgabor.dozsa@arm.comvoid 22211290Sgabor.dozsa@arm.comTCPIface::connect() 22310923SN/A{ 22410923SN/A struct addrinfo addr_hint, *addr_results; 22511290Sgabor.dozsa@arm.com int ret; 22610923SN/A 22711290Sgabor.dozsa@arm.com string port_str = to_string(serverPort); 22810923SN/A 22911290Sgabor.dozsa@arm.com sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); 23011290Sgabor.dozsa@arm.com panic_if(sock < 0, "socket() failed: %s", strerror(errno)); 23110923SN/A 23211290Sgabor.dozsa@arm.com int fl = 1; 23311290Sgabor.dozsa@arm.com if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&fl, sizeof(fl)) < 0) 23411290Sgabor.dozsa@arm.com warn("ConnectSocket(connect): setsockopt() TCP_NODELAY failed!"); 23510923SN/A 23611290Sgabor.dozsa@arm.com bzero(&addr_hint, sizeof(addr_hint)); 23711290Sgabor.dozsa@arm.com addr_hint.ai_family = AF_INET; 23811290Sgabor.dozsa@arm.com addr_hint.ai_socktype = SOCK_STREAM; 23911290Sgabor.dozsa@arm.com addr_hint.ai_protocol = IPPROTO_TCP; 24010923SN/A 24111290Sgabor.dozsa@arm.com ret = getaddrinfo(serverName.c_str(), port_str.c_str(), 24211290Sgabor.dozsa@arm.com &addr_hint, &addr_results); 24311290Sgabor.dozsa@arm.com panic_if(ret < 0, "getaddrinf() failed: %s", strerror(errno)); 24410923SN/A 24511290Sgabor.dozsa@arm.com DPRINTF(DistEthernet, "Connecting to %s:%s\n", 24611290Sgabor.dozsa@arm.com serverName.c_str(), port_str.c_str()); 24710923SN/A 24811290Sgabor.dozsa@arm.com ret = ::connect(sock, (struct sockaddr *)(addr_results->ai_addr), 24911290Sgabor.dozsa@arm.com addr_results->ai_addrlen); 25011290Sgabor.dozsa@arm.com panic_if(ret < 0, "connect() failed: %s", strerror(errno)); 25111290Sgabor.dozsa@arm.com 25211290Sgabor.dozsa@arm.com freeaddrinfo(addr_results); 25310923SN/A} 25410923SN/A 25510923SN/ATCPIface::~TCPIface() 25610923SN/A{ 25710923SN/A int M5_VAR_USED ret; 25810923SN/A 25910923SN/A ret = close(sock); 26010923SN/A assert(ret == 0); 26110923SN/A} 26210923SN/A 26310923SN/Avoid 26411290Sgabor.dozsa@arm.comTCPIface::sendTCP(int sock, const void *buf, unsigned length) 26510923SN/A{ 26610923SN/A ssize_t ret; 26710923SN/A 26810923SN/A ret = ::send(sock, buf, length, MSG_NOSIGNAL); 26911290Sgabor.dozsa@arm.com if (ret < 0) { 27011290Sgabor.dozsa@arm.com if (errno == ECONNRESET || errno == EPIPE) { 27111622Smichael.lebeane@amd.com exitSimLoop("Message server closed connection, simulation " 27211622Smichael.lebeane@amd.com "is exiting"); 27311290Sgabor.dozsa@arm.com } else { 27411290Sgabor.dozsa@arm.com panic("send() failed: %s", strerror(errno)); 27511290Sgabor.dozsa@arm.com } 27611290Sgabor.dozsa@arm.com } 27710923SN/A panic_if(ret != length, "send() failed"); 27810923SN/A} 27910923SN/A 28010923SN/Abool 28110923SN/ATCPIface::recvTCP(int sock, void *buf, unsigned length) 28210923SN/A{ 28310923SN/A ssize_t ret; 28410923SN/A 28510923SN/A ret = ::recv(sock, buf, length, MSG_WAITALL ); 28610923SN/A if (ret < 0) { 28710923SN/A if (errno == ECONNRESET || errno == EPIPE) 28810923SN/A inform("recv(): %s", strerror(errno)); 28910923SN/A else if (ret < 0) 29010923SN/A panic("recv() failed: %s", strerror(errno)); 29110923SN/A } else if (ret == 0) { 29210923SN/A inform("recv(): Connection closed"); 29310923SN/A } else if (ret != length) 29410923SN/A panic("recv() failed"); 29510923SN/A 29610923SN/A return (ret == length); 29710923SN/A} 29810923SN/A 29910923SN/Avoid 30011290Sgabor.dozsa@arm.comTCPIface::sendPacket(const Header &header, const EthPacketPtr &packet) 30110923SN/A{ 30211290Sgabor.dozsa@arm.com sendTCP(sock, &header, sizeof(header)); 30311290Sgabor.dozsa@arm.com sendTCP(sock, packet->data, packet->length); 30410923SN/A} 30510923SN/A 30611290Sgabor.dozsa@arm.comvoid 30711290Sgabor.dozsa@arm.comTCPIface::sendCmd(const Header &header) 30811290Sgabor.dozsa@arm.com{ 30911290Sgabor.dozsa@arm.com DPRINTF(DistEthernetCmd, "TCPIface::sendCmd() type: %d\n", 31011290Sgabor.dozsa@arm.com static_cast<int>(header.msgType)); 31111290Sgabor.dozsa@arm.com // Global commands (i.e. sync request) are always sent by the master 31211290Sgabor.dozsa@arm.com // DistIface. The transfer method is simply implemented as point-to-point 31311290Sgabor.dozsa@arm.com // messages for now 31411290Sgabor.dozsa@arm.com for (auto s: sockRegistry) 31511290Sgabor.dozsa@arm.com sendTCP(s, (void*)&header, sizeof(header)); 31611290Sgabor.dozsa@arm.com} 31711290Sgabor.dozsa@arm.com 31811290Sgabor.dozsa@arm.combool 31911290Sgabor.dozsa@arm.comTCPIface::recvHeader(Header &header) 32011290Sgabor.dozsa@arm.com{ 32111290Sgabor.dozsa@arm.com bool ret = recvTCP(sock, &header, sizeof(header)); 32211290Sgabor.dozsa@arm.com DPRINTF(DistEthernetCmd, "TCPIface::recvHeader() type: %d ret: %d\n", 32311290Sgabor.dozsa@arm.com static_cast<int>(header.msgType), ret); 32411290Sgabor.dozsa@arm.com return ret; 32511290Sgabor.dozsa@arm.com} 32611290Sgabor.dozsa@arm.com 32711290Sgabor.dozsa@arm.comvoid 32811290Sgabor.dozsa@arm.comTCPIface::recvPacket(const Header &header, EthPacketPtr &packet) 32911290Sgabor.dozsa@arm.com{ 33011290Sgabor.dozsa@arm.com packet = make_shared<EthPacketData>(header.dataPacketLength); 33111290Sgabor.dozsa@arm.com bool ret = recvTCP(sock, packet->data, header.dataPacketLength); 33211290Sgabor.dozsa@arm.com panic_if(!ret, "Error while reading socket"); 33311701Smichael.lebeane@amd.com packet->simLength = header.simLength; 33411290Sgabor.dozsa@arm.com packet->length = header.dataPacketLength; 33511290Sgabor.dozsa@arm.com} 33611290Sgabor.dozsa@arm.com 33711290Sgabor.dozsa@arm.comvoid 33811290Sgabor.dozsa@arm.comTCPIface::initTransport() 33911290Sgabor.dozsa@arm.com{ 34011290Sgabor.dozsa@arm.com // We cannot setup the conections in the constructor because the number 34111290Sgabor.dozsa@arm.com // of dist interfaces (per process) is unknown until the (simobject) init 34211290Sgabor.dozsa@arm.com // phase. That information is necessary for global connection ordering. 34311290Sgabor.dozsa@arm.com establishConnection(); 34411290Sgabor.dozsa@arm.com} 345