tcp_iface.cc revision 11290
111723Sar4jc@virginia.edu/* 211963Sar4jc@virginia.edu * Copyright (c) 2015 ARM Limited 311963Sar4jc@virginia.edu * All rights reserved 411963Sar4jc@virginia.edu * 511963Sar4jc@virginia.edu * The license below extends only to copyright in the software and shall 611963Sar4jc@virginia.edu * not be construed as granting a license to any other intellectual 711963Sar4jc@virginia.edu * property including but not limited to intellectual property relating 811963Sar4jc@virginia.edu * to a hardware implementation of the functionality of the software 911963Sar4jc@virginia.edu * licensed hereunder. You may use the software subject to the license 1011963Sar4jc@virginia.edu * terms below provided that you ensure that this notice is replicated 1111963Sar4jc@virginia.edu * unmodified and in its entirety in all distributions of the software, 1211963Sar4jc@virginia.edu * modified or unmodified, in source code or in binary form. 1311963Sar4jc@virginia.edu * 1411963Sar4jc@virginia.edu * Redistribution and use in source and binary forms, with or without 1511963Sar4jc@virginia.edu * modification, are permitted provided that the following conditions are 1611963Sar4jc@virginia.edu * met: redistributions of source code must retain the above copyright 1711723Sar4jc@virginia.edu * notice, this list of conditions and the following disclaimer; 1811723Sar4jc@virginia.edu * redistributions in binary form must reproduce the above copyright 1911723Sar4jc@virginia.edu * notice, this list of conditions and the following disclaimer in the 2011723Sar4jc@virginia.edu * documentation and/or other materials provided with the distribution; 2111723Sar4jc@virginia.edu * neither the name of the copyright holders nor the names of its 2211723Sar4jc@virginia.edu * contributors may be used to endorse or promote products derived from 2311723Sar4jc@virginia.edu * this software without specific prior written permission. 2411723Sar4jc@virginia.edu * 2511723Sar4jc@virginia.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2611723Sar4jc@virginia.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2711723Sar4jc@virginia.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2811723Sar4jc@virginia.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2911723Sar4jc@virginia.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 3011723Sar4jc@virginia.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 3111723Sar4jc@virginia.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 3211723Sar4jc@virginia.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 3311723Sar4jc@virginia.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 3411723Sar4jc@virginia.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 3511723Sar4jc@virginia.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3611723Sar4jc@virginia.edu * 3711723Sar4jc@virginia.edu * Authors: Gabor Dozsa 3811723Sar4jc@virginia.edu * Mohammad Alian 3911723Sar4jc@virginia.edu */ 4011723Sar4jc@virginia.edu 4111723Sar4jc@virginia.edu/* @file 4211723Sar4jc@virginia.edu * TCP stream socket based interface class implementation for dist-gem5 runs. 4311723Sar4jc@virginia.edu */ 4411963Sar4jc@virginia.edu 4511963Sar4jc@virginia.edu#include "dev/net/tcp_iface.hh" 4611963Sar4jc@virginia.edu 4711963Sar4jc@virginia.edu#include <arpa/inet.h> 4811723Sar4jc@virginia.edu#include <netdb.h> 4911723Sar4jc@virginia.edu#include <netinet/tcp.h> 5011963Sar4jc@virginia.edu#include <sys/socket.h> 5111963Sar4jc@virginia.edu#include <sys/types.h> 5211963Sar4jc@virginia.edu#include <unistd.h> 5311963Sar4jc@virginia.edu 5411963Sar4jc@virginia.edu#include <cerrno> 5511963Sar4jc@virginia.edu#include <cstring> 5611963Sar4jc@virginia.edu#include <vector> 5711963Sar4jc@virginia.edu 5811963Sar4jc@virginia.edu#include "base/types.hh" 5911963Sar4jc@virginia.edu#include "debug/DistEthernet.hh" 6011963Sar4jc@virginia.edu#include "debug/DistEthernetCmd.hh" 6111963Sar4jc@virginia.edu#include "sim/sim_exit.hh" 6211963Sar4jc@virginia.edu 6311963Sar4jc@virginia.edu#if defined(__FreeBSD__) 6411963Sar4jc@virginia.edu#include <netinet/in.h> 6511963Sar4jc@virginia.edu 6611963Sar4jc@virginia.edu#endif 6711963Sar4jc@virginia.edu 6811963Sar4jc@virginia.edu// MSG_NOSIGNAL does not exists on OS X 6911963Sar4jc@virginia.edu#if defined(__APPLE__) || defined(__MACH__) 7011963Sar4jc@virginia.edu#ifndef MSG_NOSIGNAL 7111963Sar4jc@virginia.edu#define MSG_NOSIGNAL SO_NOSIGPIPE 7211963Sar4jc@virginia.edu#endif 7311963Sar4jc@virginia.edu#endif 7411963Sar4jc@virginia.edu 7511963Sar4jc@virginia.eduusing namespace std; 7611963Sar4jc@virginia.edu 7711963Sar4jc@virginia.edustd::vector<std::pair<TCPIface::NodeInfo, int> > TCPIface::nodes; 7811963Sar4jc@virginia.eduvector<int> TCPIface::sockRegistry; 7911963Sar4jc@virginia.eduint TCPIface::fdStatic = -1; 8011963Sar4jc@virginia.edubool TCPIface::anyListening = false; 8111963Sar4jc@virginia.edu 8211963Sar4jc@virginia.eduTCPIface::TCPIface(string server_name, unsigned server_port, 8311963Sar4jc@virginia.edu unsigned dist_rank, unsigned dist_size, 8411963Sar4jc@virginia.edu Tick sync_start, Tick sync_repeat, 8511963Sar4jc@virginia.edu EventManager *em, bool is_switch, int num_nodes) : 8611963Sar4jc@virginia.edu DistIface(dist_rank, dist_size, sync_start, sync_repeat, em, 8711963Sar4jc@virginia.edu is_switch, num_nodes), serverName(server_name), 8811963Sar4jc@virginia.edu serverPort(server_port), isSwitch(is_switch), listening(false) 8911963Sar4jc@virginia.edu{ 9011963Sar4jc@virginia.edu if (is_switch && isMaster) { 9111963Sar4jc@virginia.edu while (!listen(serverPort)) { 9211963Sar4jc@virginia.edu DPRINTF(DistEthernet, "TCPIface(listen): Can't bind port %d\n", 9311963Sar4jc@virginia.edu serverPort); 9411963Sar4jc@virginia.edu serverPort++; 9511963Sar4jc@virginia.edu } 9611963Sar4jc@virginia.edu inform("tcp_iface listening on port %d", serverPort); 9711963Sar4jc@virginia.edu // Now accept the first connection requests from each compute node and 9811963Sar4jc@virginia.edu // store the node info. The compute nodes will then wait for ack 9911963Sar4jc@virginia.edu // messages. Ack messages will be sent by initTransport() in the 10011963Sar4jc@virginia.edu // appropriate order to make sure that every compute node is always 10111963Sar4jc@virginia.edu // connected to the same switch port. 10211963Sar4jc@virginia.edu NodeInfo ni; 10311963Sar4jc@virginia.edu for (int i = 0; i < size; i++) { 10411963Sar4jc@virginia.edu accept(); 10511963Sar4jc@virginia.edu DPRINTF(DistEthernet, "First connection, waiting for link info\n"); 10611963Sar4jc@virginia.edu if (!recvTCP(sock, &ni, sizeof(ni))) 10711963Sar4jc@virginia.edu panic("Failed to receive link info"); 10811963Sar4jc@virginia.edu nodes.push_back(make_pair(ni, sock)); 10911963Sar4jc@virginia.edu } 11011963Sar4jc@virginia.edu } 11111963Sar4jc@virginia.edu} 11211963Sar4jc@virginia.edu 11311963Sar4jc@virginia.edubool 11411963Sar4jc@virginia.eduTCPIface::listen(int port) 11511963Sar4jc@virginia.edu{ 11611963Sar4jc@virginia.edu if (listening) 11711963Sar4jc@virginia.edu panic("Socket already listening!"); 11811963Sar4jc@virginia.edu 11911963Sar4jc@virginia.edu struct sockaddr_in sockaddr; 12011963Sar4jc@virginia.edu int ret; 12111963Sar4jc@virginia.edu 12211963Sar4jc@virginia.edu fdStatic = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); 12311963Sar4jc@virginia.edu panic_if(fdStatic < 0, "socket() failed: %s", strerror(errno)); 12411963Sar4jc@virginia.edu 12511963Sar4jc@virginia.edu sockaddr.sin_family = PF_INET; 12611963Sar4jc@virginia.edu sockaddr.sin_addr.s_addr = INADDR_ANY; 12711963Sar4jc@virginia.edu sockaddr.sin_port = htons(port); 12811963Sar4jc@virginia.edu // finally clear sin_zero 12911963Sar4jc@virginia.edu memset(&sockaddr.sin_zero, 0, sizeof(sockaddr.sin_zero)); 13011963Sar4jc@virginia.edu ret = ::bind(fdStatic, (struct sockaddr *)&sockaddr, sizeof (sockaddr)); 13111963Sar4jc@virginia.edu 13211963Sar4jc@virginia.edu if (ret != 0) { 13311963Sar4jc@virginia.edu if (ret == -1 && errno != EADDRINUSE) 13411963Sar4jc@virginia.edu panic("ListenSocket(listen): bind() failed!"); 13511963Sar4jc@virginia.edu return false; 13611963Sar4jc@virginia.edu } 13711963Sar4jc@virginia.edu 13811963Sar4jc@virginia.edu if (::listen(fdStatic, 24) == -1) { 13911963Sar4jc@virginia.edu if (errno != EADDRINUSE) 14011963Sar4jc@virginia.edu panic("ListenSocket(listen): listen() failed!"); 14111963Sar4jc@virginia.edu 14211963Sar4jc@virginia.edu return false; 14311963Sar4jc@virginia.edu } 14411963Sar4jc@virginia.edu 14511963Sar4jc@virginia.edu listening = true; 14611963Sar4jc@virginia.edu anyListening = true; 14711723Sar4jc@virginia.edu return true; 14811723Sar4jc@virginia.edu} 14911723Sar4jc@virginia.edu 15011723Sar4jc@virginia.eduvoid 15112449Sgabeblack@google.comTCPIface::establishConnection() 15212449Sgabeblack@google.com{ 15311723Sar4jc@virginia.edu static unsigned cur_rank = 0; 15411723Sar4jc@virginia.edu static unsigned cur_id = 0; 15511723Sar4jc@virginia.edu NodeInfo ni; 15611963Sar4jc@virginia.edu 15711963Sar4jc@virginia.edu if (isSwitch) { 15811723Sar4jc@virginia.edu if (cur_id == 0) { // first connection accepted in the ctor already 15912455Sgabeblack@google.com auto const &iface0 = 16012455Sgabeblack@google.com find_if(nodes.begin(), nodes.end(), 16111723Sar4jc@virginia.edu [](const pair<NodeInfo, int> &cn) -> bool { 16211723Sar4jc@virginia.edu return cn.first.rank == cur_rank; 16311723Sar4jc@virginia.edu }); 16411963Sar4jc@virginia.edu assert(iface0 != nodes.end()); 16511723Sar4jc@virginia.edu assert(iface0->first.distIfaceId == 0); 16611963Sar4jc@virginia.edu sock = iface0->second; 16711963Sar4jc@virginia.edu ni = iface0->first; 16811963Sar4jc@virginia.edu } else { // additional connections from the same compute node 16911963Sar4jc@virginia.edu accept(); 17011963Sar4jc@virginia.edu DPRINTF(DistEthernet, "Next connection, waiting for link info\n"); 17111963Sar4jc@virginia.edu if (!recvTCP(sock, &ni, sizeof(ni))) 17211963Sar4jc@virginia.edu panic("Failed to receive link info"); 17311963Sar4jc@virginia.edu assert(ni.rank == cur_rank); 17411963Sar4jc@virginia.edu assert(ni.distIfaceId == cur_id); 17511963Sar4jc@virginia.edu } 17611963Sar4jc@virginia.edu inform("Link okay (iface:%d -> (node:%d, iface:%d))", 17711963Sar4jc@virginia.edu distIfaceId, ni.rank, ni.distIfaceId); 17811963Sar4jc@virginia.edu if (ni.distIfaceId < ni.distIfaceNum - 1) { 17911723Sar4jc@virginia.edu cur_id++; 18011723Sar4jc@virginia.edu } else { 18111723Sar4jc@virginia.edu cur_rank++; 18211963Sar4jc@virginia.edu cur_id = 0; 18311723Sar4jc@virginia.edu } 18411963Sar4jc@virginia.edu // send ack 18511963Sar4jc@virginia.edu ni.distIfaceId = distIfaceId; 18611963Sar4jc@virginia.edu ni.distIfaceNum = distIfaceNum; 18711963Sar4jc@virginia.edu sendTCP(sock, &ni, sizeof(ni)); 18811963Sar4jc@virginia.edu } else { // this is not a switch 18911963Sar4jc@virginia.edu connect(); 19011963Sar4jc@virginia.edu // send link info 19111963Sar4jc@virginia.edu ni.rank = rank; 19211963Sar4jc@virginia.edu ni.distIfaceId = distIfaceId; 19311963Sar4jc@virginia.edu ni.distIfaceNum = distIfaceNum; 19411963Sar4jc@virginia.edu sendTCP(sock, &ni, sizeof(ni)); 19511963Sar4jc@virginia.edu DPRINTF(DistEthernet, "Connected, waiting for ack (distIfaceId:%d\n", 19611963Sar4jc@virginia.edu distIfaceId); 19711723Sar4jc@virginia.edu if (!recvTCP(sock, &ni, sizeof(ni))) 19811963Sar4jc@virginia.edu panic("Failed to receive ack"); 19912449Sgabeblack@google.com assert(ni.rank == rank); 20012449Sgabeblack@google.com inform("Link okay (iface:%d -> switch iface:%d)", distIfaceId, 20112449Sgabeblack@google.com ni.distIfaceId); 20212031Sgabeblack@google.com } 20311963Sar4jc@virginia.edu sockRegistry.push_back(sock); 204} 205 206void 207TCPIface::accept() 208{ 209 struct sockaddr_in sockaddr; 210 socklen_t slen = sizeof (sockaddr); 211 sock = ::accept(fdStatic, (struct sockaddr *)&sockaddr, &slen); 212 if (sock != -1) { 213 int i = 1; 214 if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&i, 215 sizeof(i)) < 0) 216 warn("ListenSocket(accept): setsockopt() TCP_NODELAY failed!"); 217 } 218} 219 220void 221TCPIface::connect() 222{ 223 struct addrinfo addr_hint, *addr_results; 224 int ret; 225 226 string port_str = to_string(serverPort); 227 228 sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); 229 panic_if(sock < 0, "socket() failed: %s", strerror(errno)); 230 231 int fl = 1; 232 if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *)&fl, sizeof(fl)) < 0) 233 warn("ConnectSocket(connect): setsockopt() TCP_NODELAY failed!"); 234 235 bzero(&addr_hint, sizeof(addr_hint)); 236 addr_hint.ai_family = AF_INET; 237 addr_hint.ai_socktype = SOCK_STREAM; 238 addr_hint.ai_protocol = IPPROTO_TCP; 239 240 ret = getaddrinfo(serverName.c_str(), port_str.c_str(), 241 &addr_hint, &addr_results); 242 panic_if(ret < 0, "getaddrinf() failed: %s", strerror(errno)); 243 244 DPRINTF(DistEthernet, "Connecting to %s:%s\n", 245 serverName.c_str(), port_str.c_str()); 246 247 ret = ::connect(sock, (struct sockaddr *)(addr_results->ai_addr), 248 addr_results->ai_addrlen); 249 panic_if(ret < 0, "connect() failed: %s", strerror(errno)); 250 251 freeaddrinfo(addr_results); 252} 253 254TCPIface::~TCPIface() 255{ 256 int M5_VAR_USED ret; 257 258 ret = close(sock); 259 assert(ret == 0); 260} 261 262void 263TCPIface::sendTCP(int sock, const void *buf, unsigned length) 264{ 265 ssize_t ret; 266 267 ret = ::send(sock, buf, length, MSG_NOSIGNAL); 268 if (ret < 0) { 269 if (errno == ECONNRESET || errno == EPIPE) { 270 inform("send(): %s", strerror(errno)); 271 exit_message("info", 0, "Message server closed connection, " 272 "simulation is exiting"); 273 } else { 274 panic("send() failed: %s", strerror(errno)); 275 } 276 } 277 panic_if(ret != length, "send() failed"); 278} 279 280bool 281TCPIface::recvTCP(int sock, void *buf, unsigned length) 282{ 283 ssize_t ret; 284 285 ret = ::recv(sock, buf, length, MSG_WAITALL ); 286 if (ret < 0) { 287 if (errno == ECONNRESET || errno == EPIPE) 288 inform("recv(): %s", strerror(errno)); 289 else if (ret < 0) 290 panic("recv() failed: %s", strerror(errno)); 291 } else if (ret == 0) { 292 inform("recv(): Connection closed"); 293 } else if (ret != length) 294 panic("recv() failed"); 295 296 return (ret == length); 297} 298 299void 300TCPIface::sendPacket(const Header &header, const EthPacketPtr &packet) 301{ 302 sendTCP(sock, &header, sizeof(header)); 303 sendTCP(sock, packet->data, packet->length); 304} 305 306void 307TCPIface::sendCmd(const Header &header) 308{ 309 DPRINTF(DistEthernetCmd, "TCPIface::sendCmd() type: %d\n", 310 static_cast<int>(header.msgType)); 311 // Global commands (i.e. sync request) are always sent by the master 312 // DistIface. The transfer method is simply implemented as point-to-point 313 // messages for now 314 for (auto s: sockRegistry) 315 sendTCP(s, (void*)&header, sizeof(header)); 316} 317 318bool 319TCPIface::recvHeader(Header &header) 320{ 321 bool ret = recvTCP(sock, &header, sizeof(header)); 322 DPRINTF(DistEthernetCmd, "TCPIface::recvHeader() type: %d ret: %d\n", 323 static_cast<int>(header.msgType), ret); 324 return ret; 325} 326 327void 328TCPIface::recvPacket(const Header &header, EthPacketPtr &packet) 329{ 330 packet = make_shared<EthPacketData>(header.dataPacketLength); 331 bool ret = recvTCP(sock, packet->data, header.dataPacketLength); 332 panic_if(!ret, "Error while reading socket"); 333 packet->length = header.dataPacketLength; 334} 335 336void 337TCPIface::initTransport() 338{ 339 // We cannot setup the conections in the constructor because the number 340 // of dist interfaces (per process) is unknown until the (simobject) init 341 // phase. That information is necessary for global connection ordering. 342 establishConnection(); 343} 344