Re: backtrace_on_internal_error

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: backtrace_on_internal_error
Дата
Msg-id 1761445.1702163640@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: backtrace_on_internal_error  (Andres Freund <andres@anarazel.de>)
Ответы Re: backtrace_on_internal_error  (Jelte Fennema-Nio <postgres@jeltef.nl>)
Список pgsql-hackers
Andres Freund <andres@anarazel.de> writes:
> On 2023-12-09 12:41:30 -0500, Tom Lane wrote:
>> On further reflection I realized that you're right so far as the SSL
>> code path goes, because SSL_write() can involve physical reads as well
>> as writes, so at least in principle it's possible that we'd see EOF
>> reported this way from that function.

> Heh. I'll just claim that's what I was thinking about.
> I'd perhaps add a comment explaining why it's plausible that we'd see that
> that in the write case.

Done in v3 attached.

>> I also realized that we have more or less the same problem at the
>> caller level, allowing a similar failure for non-SSL connections.

> If we were treating it as EOF, we'd not "queue" an error message, no? Normally
> recv() returns 0 in that case, so we'd just return, right?

Duh, right, so more like this version.

I'm not actually sure that the fe-secure.c part of v3-0002 is
necessary, because it's guarding plain recv(2) which really shouldn't
return -1 without setting errno.  Still, it's a pretty harmless
addition.

            regards, tom lane

diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index 6b8125695a..22e3dc5a81 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -460,6 +460,7 @@ aloop:
      * per-thread error queue following another call to an OpenSSL I/O
      * routine.
      */
+    errno = 0;
     ERR_clear_error();
     r = SSL_accept(port->ssl);
     if (r <= 0)
@@ -496,7 +497,7 @@ aloop:
                                          WAIT_EVENT_SSL_OPEN_SERVER);
                 goto aloop;
             case SSL_ERROR_SYSCALL:
-                if (r < 0)
+                if (r < 0 && errno != 0)
                     ereport(COMMERROR,
                             (errcode_for_socket_access(),
                              errmsg("could not accept SSL connection: %m")));
@@ -732,7 +733,7 @@ be_tls_read(Port *port, void *ptr, size_t len, int *waitfor)
             break;
         case SSL_ERROR_SYSCALL:
             /* leave it to caller to ereport the value of errno */
-            if (n != -1)
+            if (n != -1 || errno == 0)
             {
                 errno = ECONNRESET;
                 n = -1;
@@ -790,8 +791,14 @@ be_tls_write(Port *port, void *ptr, size_t len, int *waitfor)
             n = -1;
             break;
         case SSL_ERROR_SYSCALL:
-            /* leave it to caller to ereport the value of errno */
-            if (n != -1)
+
+            /*
+             * Leave it to caller to ereport the value of errno.  However, if
+             * errno is still zero then assume it's a read EOF situation, and
+             * report ECONNRESET.  (This seems possible because SSL_write can
+             * also do reads.)
+             */
+            if (n != -1 || errno == 0)
             {
                 errno = ECONNRESET;
                 n = -1;
diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c
index e669bdbf1d..2b221e7d15 100644
--- a/src/interfaces/libpq/fe-secure-openssl.c
+++ b/src/interfaces/libpq/fe-secure-openssl.c
@@ -200,7 +200,7 @@ rloop:
              */
             goto rloop;
         case SSL_ERROR_SYSCALL:
-            if (n < 0)
+            if (n < 0 && SOCK_ERRNO != 0)
             {
                 result_errno = SOCK_ERRNO;
                 if (result_errno == EPIPE ||
@@ -301,7 +301,13 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
             n = 0;
             break;
         case SSL_ERROR_SYSCALL:
-            if (n < 0)
+
+            /*
+             * If errno is still zero then assume it's a read EOF situation,
+             * and report EOF.  (This seems possible because SSL_write can
+             * also do reads.)
+             */
+            if (n < 0 && SOCK_ERRNO != 0)
             {
                 result_errno = SOCK_ERRNO;
                 if (result_errno == EPIPE || result_errno == ECONNRESET)
@@ -1510,11 +1516,12 @@ open_client_SSL(PGconn *conn)
                      * was using the system CA pool. For other errors, log
                      * them using the normal SYSCALL logging.
                      */
-                    if (!save_errno && vcode == X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY &&
+                    if (save_errno == 0 &&
+                        vcode == X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY &&
                         strcmp(conn->sslrootcert, "system") == 0)
                         libpq_append_conn_error(conn, "SSL error: certificate verify failed: %s",
                                                 X509_verify_cert_error_string(vcode));
-                    else if (r == -1)
+                    else if (r == -1 && save_errno != 0)
                         libpq_append_conn_error(conn, "SSL SYSCALL error: %s",
                                                 SOCK_STRERROR(save_errno, sebuf, sizeof(sebuf)));
                     else
diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c
index 2802efc63f..0084a9bf13 100644
--- a/src/backend/libpq/pqcomm.c
+++ b/src/backend/libpq/pqcomm.c
@@ -936,6 +936,8 @@ pq_recvbuf(void)
     {
         int            r;

+        errno = 0;
+
         r = secure_read(MyProcPort, PqRecvBuffer + PqRecvLength,
                         PQ_RECV_BUFFER_SIZE - PqRecvLength);

@@ -948,10 +950,13 @@ pq_recvbuf(void)
              * Careful: an ereport() that tries to write to the client would
              * cause recursion to here, leading to stack overflow and core
              * dump!  This message must go *only* to the postmaster log.
+             *
+             * If errno is zero, assume it's EOF and let the caller complain.
              */
-            ereport(COMMERROR,
-                    (errcode_for_socket_access(),
-                     errmsg("could not receive data from client: %m")));
+            if (errno != 0)
+                ereport(COMMERROR,
+                        (errcode_for_socket_access(),
+                         errmsg("could not receive data from client: %m")));
             return EOF;
         }
         if (r == 0)
@@ -1028,6 +1033,8 @@ pq_getbyte_if_available(unsigned char *c)
     /* Put the socket into non-blocking mode */
     socket_set_nonblocking(true);

+    errno = 0;
+
     r = secure_read(MyProcPort, c, 1);
     if (r < 0)
     {
@@ -1044,10 +1051,13 @@ pq_getbyte_if_available(unsigned char *c)
              * Careful: an ereport() that tries to write to the client would
              * cause recursion to here, leading to stack overflow and core
              * dump!  This message must go *only* to the postmaster log.
+             *
+             * If errno is zero, assume it's EOF and let the caller complain.
              */
-            ereport(COMMERROR,
-                    (errcode_for_socket_access(),
-                     errmsg("could not receive data from client: %m")));
+            if (errno != 0)
+                ereport(COMMERROR,
+                        (errcode_for_socket_access(),
+                         errmsg("could not receive data from client: %m")));
             r = EOF;
         }
     }
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index bd72a87bbb..b2430362a9 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -211,6 +211,8 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
     int            result_errno = 0;
     char        sebuf[PG_STRERROR_R_BUFLEN];

+    SOCK_ERRNO_SET(0);
+
     n = recv(conn->sock, ptr, len, 0);

     if (n < 0)
@@ -237,6 +239,11 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
                                         "\tbefore or while processing the request.");
                 break;

+            case 0:
+                /* If errno didn't get set, treat it as regular EOF */
+                n = 0;
+                break;
+
             default:
                 libpq_append_conn_error(conn, "could not receive data from server: %s",
                                         SOCK_STRERROR(result_errno,

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Peter Geoghegan
Дата:
Сообщение: Recording whether Heap2/PRUNE records are from VACUUM or from opportunistic pruning (Was: Show various offset arrays for heap WAL records)
Следующее
От: Junwang Zhao
Дата:
Сообщение: Re: Make COPY format extendable: Extract COPY TO format implementations