Expansion of our checks for connection-loss errors

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Expansion of our checks for connection-loss errors
Дата
Msg-id 2621622.1602184554@sss.pgh.pa.us
обсуждение исходный текст
Ответы Re: Expansion of our checks for connection-loss errors  (Kyotaro Horiguchi <horikyota.ntt@gmail.com>)
Re: Expansion of our checks for connection-loss errors  (Fujii Masao <masao.fujii@oss.nttdata.com>)
Список pgsql-hackers
Over in the thread at [1], we've tentatively determined that the
reason buildfarm member lorikeet is currently failing is that its
network stack returns ECONNABORTED for (some?) connection failures,
whereas our code is only expecting ECONNRESET.  Fujii Masao therefore
proposes that we treat ECONNABORTED the same as ECONNRESET.  I think
this is a good idea, but after a bit of research I feel it does not
go far enough.  I find these POSIX-standard errnos that also seem
likely candidates to be returned for a hard loss of connection:

    ECONNABORTED
    EHOSTUNREACH
    ENETDOWN
    ENETUNREACH

All of these have been in POSIX since SUSv2, so it seems unlikely
that we need to #ifdef any of them.  (It is in any case pretty silly
that we have #ifdefs around a very small minority of our references
to ECONNRESET :-(.)

There are some other related errnos, such as ECONNREFUSED, that
don't seem like they'd be returned for a failure of a pre-existing
connection, so we don't need to include them in such tests.

Accordingly, I propose the attached patch (an expansion of
Fujii-san's) that causes us to test for all five errnos anyplace
we had been checking for ECONNRESET.  I felt that this was getting to
the point where we'd better centralize the knowledge of what to check,
so the patch does that, via an inline function and an admittedly hacky
macro.  I also upgraded some places such as strerror.c to have full
support for these symbols.

All of the machines I have (even as far back as HPUX 10.20) also
define ENETRESET and EHOSTDOWN.  However, those symbols do not appear
in SUSv2.  ENETRESET was added at some later point, but EHOSTDOWN is
still not in POSIX.  For the moment I've left these second-tier
symbols out of the patch, but there's a case for adding them.  I'm
not sure whether there'd be any point in trying to #ifdef them.

BTW, I took out the conditional defines of some of these errnos in
libpq's win32.h; AFAICS that's been dead code ever since we added
#define's for them to win32_port.h.  Am I missing something?

This seems like a bug fix to me, so I'm inclined to back-patch.

            regards, tom lane

[1] https://www.postgresql.org/message-id/flat/E1kPc9v-0005L4-2l%40gemulon.postgresql.org

diff --git a/src/backend/port/win32/socket.c b/src/backend/port/win32/socket.c
index 6fbd1ed6fb..0f28c07ed1 100644
--- a/src/backend/port/win32/socket.c
+++ b/src/backend/port/win32/socket.c
@@ -123,10 +123,14 @@ TranslateSocketError(void)
         case WSAEHOSTUNREACH:
         case WSAEHOSTDOWN:
         case WSAHOST_NOT_FOUND:
+            errno = EHOSTUNREACH;
+            break;
         case WSAENETDOWN:
-        case WSAENETUNREACH:
         case WSAENETRESET:
-            errno = EHOSTUNREACH;
+            errno = ENETDOWN;
+            break;
+        case WSAENETUNREACH:
+            errno = ENETUNREACH;
             break;
         case WSAENOTCONN:
         case WSAESHUTDOWN:
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index d0b368530e..8937f223a8 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -712,9 +712,7 @@ errcode_for_socket_access(void)
     {
             /* Loss of connection */
         case EPIPE:
-#ifdef ECONNRESET
-        case ECONNRESET:
-#endif
+        case ALL_CONNECTION_LOSS_ERRNOS:
             edata->sqlerrcode = ERRCODE_CONNECTION_FAILURE;
             break;

diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c
index f0587f41e4..b8ab939179 100644
--- a/src/bin/pg_dump/parallel.c
+++ b/src/bin/pg_dump/parallel.c
@@ -1825,7 +1825,7 @@ piperead(int s, char *buf, int len)
 {
     int            ret = recv(s, buf, len, 0);

-    if (ret < 0 && WSAGetLastError() == WSAECONNRESET)
+    if (ret < 0 && errno_is_connection_loss(WSAGetLastError()))
     {
         /* EOF on the pipe! */
         ret = 0;
diff --git a/src/include/port.h b/src/include/port.h
index 84bf2c363f..ffa21e782a 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -99,6 +99,30 @@ extern void pgfnames_cleanup(char **filenames);
 )
 #endif

+/* Test for all errnos that report loss of an established TCP connection */
+static inline bool
+errno_is_connection_loss(int e)
+{
+    if (e == ECONNRESET ||
+        e == ECONNABORTED ||
+        e == EHOSTUNREACH ||
+        e == ENETDOWN ||
+        e == ENETUNREACH)
+        return true;
+    return false;
+}
+
+/*
+ * To test for connection-loss errnos in a switch statement, write
+ * "case ALL_CONNECTION_LOSS_ERRNOS:".
+ */
+#define ALL_CONNECTION_LOSS_ERRNOS \
+    ECONNRESET: \
+    case ECONNABORTED: \
+    case EHOSTUNREACH: \
+    case ENETDOWN: \
+    case ENETUNREACH
+
 /* Portable locale initialization (in exec.c) */
 extern void set_pglocale_pgservice(const char *argv0, const char *app);

diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h
index 8b6576b23d..7ce8c87e8d 100644
--- a/src/include/port/win32_port.h
+++ b/src/include/port/win32_port.h
@@ -351,6 +351,10 @@ extern int    pgwin32_safestat(const char *path, struct stat *buf);
 #define EADDRNOTAVAIL WSAEADDRNOTAVAIL
 #undef EHOSTUNREACH
 #define EHOSTUNREACH WSAEHOSTUNREACH
+#undef ENETDOWN
+#define ENETDOWN WSAENETDOWN
+#undef ENETUNREACH
+#define ENETUNREACH WSAENETUNREACH
 #undef ENOTCONN
 #define ENOTCONN WSAENOTCONN

diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
index ff840b7730..ecc3b4ba0b 100644
--- a/src/interfaces/libpq/fe-misc.c
+++ b/src/interfaces/libpq/fe-misc.c
@@ -679,11 +679,9 @@ retry3:
         if (SOCK_ERRNO == EWOULDBLOCK)
             return someread;
 #endif
-        /* We might get ECONNRESET here if using TCP and backend died */
-#ifdef ECONNRESET
-        if (SOCK_ERRNO == ECONNRESET)
+        /* We might get ECONNRESET etc here if using TCP and backend died */
+        if (errno_is_connection_loss(SOCK_ERRNO))
             goto definitelyFailed;
-#endif
         /* pqsecure_read set the error message for us */
         return -1;
     }
@@ -769,11 +767,9 @@ retry4:
         if (SOCK_ERRNO == EWOULDBLOCK)
             return 0;
 #endif
-        /* We might get ECONNRESET here if using TCP and backend died */
-#ifdef ECONNRESET
-        if (SOCK_ERRNO == ECONNRESET)
+        /* We might get ECONNRESET etc here if using TCP and backend died */
+        if (errno_is_connection_loss(SOCK_ERRNO))
             goto definitelyFailed;
-#endif
         /* pqsecure_read set the error message for us */
         return -1;
     }
diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c
index d609a38bbe..3c89c34f80 100644
--- a/src/interfaces/libpq/fe-secure-openssl.c
+++ b/src/interfaces/libpq/fe-secure-openssl.c
@@ -204,7 +204,7 @@ rloop:
             {
                 result_errno = SOCK_ERRNO;
                 if (result_errno == EPIPE ||
-                    result_errno == ECONNRESET)
+                    errno_is_connection_loss(result_errno))
                     printfPQExpBuffer(&conn->errorMessage,
                                       libpq_gettext("server closed the connection unexpectedly\n"
                                                     "\tThis probably means the server terminated abnormally\n"
@@ -311,7 +311,8 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
             if (n < 0)
             {
                 result_errno = SOCK_ERRNO;
-                if (result_errno == EPIPE || result_errno == ECONNRESET)
+                if (result_errno == EPIPE ||
+                    errno_is_connection_loss(result_errno))
                     printfPQExpBuffer(&conn->errorMessage,
                                       libpq_gettext("server closed the connection unexpectedly\n"
                                                     "\tThis probably means the server terminated abnormally\n"
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index 3311fd7a5b..f13651b544 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -261,14 +261,12 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
                 /* no error message, caller is expected to retry */
                 break;

-#ifdef ECONNRESET
-            case ECONNRESET:
+            case ALL_CONNECTION_LOSS_ERRNOS:
                 printfPQExpBuffer(&conn->errorMessage,
                                   libpq_gettext("server closed the connection unexpectedly\n"
                                                 "\tThis probably means the server terminated abnormally\n"
                                                 "\tbefore or while processing the request.\n"));
                 break;
-#endif

             default:
                 printfPQExpBuffer(&conn->errorMessage,
@@ -374,11 +372,9 @@ retry_masked:
                 /* Set flag for EPIPE */
                 REMEMBER_EPIPE(spinfo, true);

-#ifdef ECONNRESET
                 /* FALL THRU */

-            case ECONNRESET:
-#endif
+            case ALL_CONNECTION_LOSS_ERRNOS:
                 printfPQExpBuffer(&conn->errorMessage,
                                   libpq_gettext("server closed the connection unexpectedly\n"
                                                 "\tThis probably means the server terminated abnormally\n"
diff --git a/src/interfaces/libpq/win32.h b/src/interfaces/libpq/win32.h
index c42d7abfe3..eba4c2a358 100644
--- a/src/interfaces/libpq/win32.h
+++ b/src/interfaces/libpq/win32.h
@@ -16,15 +16,6 @@
 #undef EAGAIN                    /* doesn't apply on sockets */
 #undef EINTR
 #define EINTR WSAEINTR
-#ifndef EWOULDBLOCK
-#define EWOULDBLOCK WSAEWOULDBLOCK
-#endif
-#ifndef ECONNRESET
-#define ECONNRESET WSAECONNRESET
-#endif
-#ifndef EINPROGRESS
-#define EINPROGRESS WSAEINPROGRESS
-#endif

 /*
  * support for handling Windows Socket errors
diff --git a/src/port/strerror.c b/src/port/strerror.c
index 375edb0f5a..c179c9ae60 100644
--- a/src/port/strerror.c
+++ b/src/port/strerror.c
@@ -146,16 +146,12 @@ get_errno_symbol(int errnum)
             return "EBUSY";
         case ECHILD:
             return "ECHILD";
-#ifdef ECONNABORTED
         case ECONNABORTED:
             return "ECONNABORTED";
-#endif
         case ECONNREFUSED:
             return "ECONNREFUSED";
-#ifdef ECONNRESET
         case ECONNRESET:
             return "ECONNRESET";
-#endif
         case EDEADLK:
             return "EDEADLK";
         case EDOM:
@@ -166,10 +162,8 @@ get_errno_symbol(int errnum)
             return "EFAULT";
         case EFBIG:
             return "EFBIG";
-#ifdef EHOSTUNREACH
         case EHOSTUNREACH:
             return "EHOSTUNREACH";
-#endif
         case EIDRM:
             return "EIDRM";
         case EINPROGRESS:
@@ -198,6 +192,10 @@ get_errno_symbol(int errnum)
             return "EMSGSIZE";
         case ENAMETOOLONG:
             return "ENAMETOOLONG";
+        case ENETDOWN:
+            return "ENETDOWN";
+        case ENETUNREACH:
+            return "ENETUNREACH";
         case ENFILE:
             return "ENFILE";
         case ENOBUFS:

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Bruce Momjian
Дата:
Сообщение: Re: Wrong example in the bloom documentation
Следующее
От: Tom Lane
Дата:
Сообщение: Re: Wrong example in the bloom documentation