Skip to content
Advertisement

Linux Socket timeout works on WSL, but not on Ubuntu

I try to run a TCP client without a server. The idea is simply to periodically try to connect. For this, the client tries to connect to port 1500 on localhost.

Piece of code:

    // Create socket
    if ((create_socket=socket (AF_INET, SOCK_STREAM, PF_UNSPEC)) > 0)
      printf ("Socket createdn");
    address.sin_family = AF_INET;
    address.sin_port = htons (1500);
    inet_aton (argv[1], &address.sin_addr);

    // Connect to server
    connect ( create_socket,
                    (struct sockaddr *) &address,
                    sizeof (address));

    FD_ZERO(&fdset);
    FD_SET(create_socket, &fdset);
    tv.tv_sec = 2;             /* 2 seconds timeout */
    tv.tv_usec = 0;

    rv = select(create_socket + 1, NULL, &fdset, NULL, &tv);
    if (rv == 1)
    {
        int so_error;
        socklen_t len = sizeof so_error;

        getsockopt(create_socket, SOL_SOCKET, SO_ERROR, &so_error, &len);

        if (so_error == 0)
        {
          printf ("Connection with server (%s) established n",
          inet_ntoa (address.sin_addr));
        }
        else
        {
          printf("Error on connect: unsuccessfulln");
          close (create_socket);
          continue;
        }
    }
    else if (rv == 0)
    {
      printf("Timeout on connectn");
      close (create_socket);
      continue;
    }
    else
    {
      printf("Error on connectn");
      close (create_socket);
      continue;
    }

I’ve set it up in Ubuntu 18.04 on WSL. There, the code waits on select for the defined timeout of 2 seconds and returns appropriate return values. (0 on timeout, 1 on connect). The return value of connect is -1 on WSL and VMware. In Ubuntu 18 (VMware) there is no pause in that line. In any case, even without any server listening on that port, I get immediately a return value of 1.

Why is there this difference?

There is a similar behavior later on in that code:

    tv.tv_sec = 2;
    tv.tv_usec = 0;
    if (setsockopt(create_socket, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof tv) < 0)
    {
      printf("Error on setsockopt SO_RCVTIMEO");
      exit(EXIT_FAILURE);
    }
    // INNER LOOP: Receive data
    do
    {
      size = recv(create_socket, buffer, BUF-1, 0);
      if( size > 0)
      {
        buffer[size] = '';
        printf ("Message received: %sn", buffer);
      }
      else if (size == -1)
      {
      // on VMware, errno is 107 if there is no server, but coming to that line was not intended
        printf ("Timeoutn");
      }
      else //
      {
        printf("Server offlinen");
        // GO BACK TO OUTER LOOP and reconnect
        break;
      }

Here, in WSL the recv takes up to 2 seconds, while waiting for any incoming data. (But only if the aforementioned block (connect, select) indicates a valid connection) In VMware I directly get the feedback. (even without connection)

Does it simply work on WSL by chance?

The argument contains the server IP and is 127.0.0.1. lsof shows no connection.


Update 2020-11-18

Here’s the full code as requested by Bodo

#include <iostream>
#include <vector>
#include <string>
#include <sys/types.h>
#include <sys/socket.h>
#include <cstring>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>

#define BUF 1024
using namespace std;

int main (int argc, char **argv) {
  int create_socket;
  char *buffer =  (char*)malloc(BUF);
  struct sockaddr_in address;
  int size;
  int rv;
  struct timeval tv;
  fd_set fdset;

  // HERE STARTS THE OUTER LOOP - Connect and restart connection
  do
  {
    // Create socket
    if ((create_socket=socket (AF_INET, SOCK_STREAM, PF_UNSPEC)) > 0)
      printf ("Socket createdn");
    address.sin_family = AF_INET;
    address.sin_port = htons (15000);
    inet_aton ("127.0.0.1", &address.sin_addr);

    // Connect to server
    int flags = fcntl(create_socket, F_GETFL, 0);
    if (flags == -1) return false;
    rv = connect ( create_socket,
                    (struct sockaddr *) &address,
                    sizeof (address));

    printf ("Connect. rv = %in", rv);

    if (rv == -1)
    {
      switch (errno)
      {
        case ECONNREFUSED:  printf ("errno = %i (ECONNREFUSED)n", errno); break;
        default:  printf ("errno = %i (ECONNREFUSED)n", errno); break;
      }

    }

    FD_ZERO(&fdset);
    FD_SET(create_socket, &fdset);

    tv.tv_sec = 2;
    tv.tv_usec = 0;
    rv = select(create_socket + 1, NULL, &fdset, NULL, &tv);
    if (rv == 1)
    {
        int so_error;
        socklen_t len = sizeof so_error;

        getsockopt(create_socket, SOL_SOCKET, SO_ERROR, &so_error, &len);

        if (so_error == 0)
        {
         printf ("Connection with server (%s) established n",
          inet_ntoa (address.sin_addr));
        }
        else
        {
          printf("Error on connect: unsuccessfulln");
          close (create_socket);
          continue;
        }
    }
    else if (rv == 0)
    {
      printf("Timeout on connectn");
      close (create_socket);
      continue;
    }
    else
    {
      printf("Error on connectn");
      close (create_socket);
      continue;
    }

    if (setsockopt(create_socket, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof tv) < 0)
    {
      printf("Error on setsockopt SO_RCVTIMEO");
      exit(EXIT_FAILURE);
    }
    // INNER LOOP: Receive data
    do
    {
      size = recv(create_socket, buffer, BUF-1, 0);
      if( size > 0)
      {
        buffer[size] = '';
        printf ("Data received: %sn", buffer);
      }
      else if (size == -1)
      {
        printf ("Timeoutn");
      }
      else //
      {
        printf("Server offlinen");
        // GO BACK TO OUTER LOOP and reconnect
        break;
      }
    } while (strcmp (buffer, "quitn") != 0);
    close (create_socket);
  } while (strcmp (buffer, "quitn") != 0);
  return EXIT_SUCCESS;
}

In WSL the output is

Socket created
Connect. rv = -1
errno = 111 (ECONNREFUSED)

then nothing for 2 seconds afterwards

Timeout on connect
Socket created
Connect. rv = -1
errno = 111 (ECONNREFUSED)

and again nothing for 2 seconds …

Output in VMware

Socket created
Connect. rv = -1
errno = 111 (ECONNREFUSED)
Connection with server (127.0.0.1) established 
Timeout
Timeout
Timeout
Timeout

Where no timeout is fulfilled.

The idea of timeout has been to try to connect on a regular basis, but not as fast as possible.

Advertisement

Answer

Obviously there is something wrong when errno = 111 (ECONNREFUSED) is followed by Connection with server (127.0.0.1) established.

When connect returns -1 and errno is NOT EINPROGRESS you should not use selectand getsockopt(...SO_ERROR...). According to https://man7.org/linux/man-pages/man2/connect.2.html, this is only documented for EINPROGRESS.

Both on real Linux and WSL you get errno = 111 (ECONNREFUSED) after a failed connect. I consider the timeout in WSL wrong as the error (conection refused) was already reported, so it does not make sense to wait for a result. But as the behavior is not specified, it may be implementation dependent.

If you want to have a delay before the next connection attempt, you should not use select but for example sleep followed by repeating the loop.

I suggest something like this:

    rv = connect ( create_socket,
                    (struct sockaddr *) &address,
                    sizeof (address));

    printf ("Connect. rv = %in", rv);

    if (rv == -1)
    {
      switch (errno)
      {
        case ECONNREFUSED:  printf ("errno = %i (ECONNREFUSED) %sn", errno, strerror(errno)); break;
        default:  printf ("errno = %i (other) %sn", errno, strerror(errno)); break;
      }
      if(errno != EINPROGRESS)
      {
        sleep(10); // chose a suitable delay before next connection attempt
        continue;
      }
    }

User contributions licensed under: CC BY-SA
7 People found this is helpful
Advertisement