libpq should not look up all host addresses at once

Поиск
Список
Период
Сортировка
Whilst fooling with the patch for CVE-2018-10915, I got annoyed by
the fact that connectDBStart() does the DNS lookups for all supplied
hostnames at once, and fails if any of them are bad.  It was reasonable
to do the lookup there when we only allowed one hostname, but now that
"host" can be a list, this is really pretty stupid.  The whole point
of allowing multiple hostnames is redundancy and avoiding a single
point of failure; but the way this is written, *each* of your servers'
DNS servers is a single point of failure all by itself.  If any one of
them is down, you don't connect.  Plus, in the normal case where you
successfully connect to something before the very last host in the list,
the extra DNS lookups are wasted --- and DNS lookups aren't that cheap,
since they typically involve a network round trip.

So I think what this code should do is (1) look up each hostname as it
needs it, not all at once, and (2) proceed on to the next hostname
if it gets a DNS lookup failure, not fail the whole connection attempt
immediately.  As attached.

I'm tempted to call this a back-patchable bug fix, because the existing
behavior basically negates the entire value of the multi-hostname feature
once you consider the possibility of DNS server failures.  But given the
lack of field complaints, maybe that's an overreaction.

I'll put this in the September fest for review.

            regards, tom lane

diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index 986f74f..8200b39 100644
*** a/src/interfaces/libpq/fe-connect.c
--- b/src/interfaces/libpq/fe-connect.c
*************** static PGconn *makeEmptyPGconn(void);
*** 360,366 ****
  static bool fillPGconn(PGconn *conn, PQconninfoOption *connOptions);
  static void freePGconn(PGconn *conn);
  static void closePGconn(PGconn *conn);
! static void release_all_addrinfo(PGconn *conn);
  static void sendTerminateConn(PGconn *conn);
  static PQconninfoOption *conninfo_init(PQExpBuffer errorMessage);
  static PQconninfoOption *parse_connection_string(const char *conninfo,
--- 360,366 ----
  static bool fillPGconn(PGconn *conn, PQconninfoOption *connOptions);
  static void freePGconn(PGconn *conn);
  static void closePGconn(PGconn *conn);
! static void release_conn_addrinfo(PGconn *conn);
  static void sendTerminateConn(PGconn *conn);
  static PQconninfoOption *conninfo_init(PQExpBuffer errorMessage);
  static PQconninfoOption *parse_connection_string(const char *conninfo,
*************** setKeepalivesWin32(PGconn *conn)
*** 1742,1751 ****
  static int
  connectDBStart(PGconn *conn)
  {
-     char        portstr[MAXPGPATH];
-     int            ret;
-     int            i;
-
      if (!conn)
          return 0;

--- 1742,1747 ----
*************** connectDBStart(PGconn *conn)
*** 1765,1865 ****
      resetPQExpBuffer(&conn->errorMessage);

      /*
-      * Look up socket addresses for each possible host using
-      * pg_getaddrinfo_all.
-      */
-     for (i = 0; i < conn->nconnhost; ++i)
-     {
-         pg_conn_host *ch = &conn->connhost[i];
-         struct addrinfo hint;
-         int            thisport;
-
-         /* Initialize hint structure */
-         MemSet(&hint, 0, sizeof(hint));
-         hint.ai_socktype = SOCK_STREAM;
-         hint.ai_family = AF_UNSPEC;
-
-         /* Figure out the port number we're going to use. */
-         if (ch->port == NULL || ch->port[0] == '\0')
-             thisport = DEF_PGPORT;
-         else
-         {
-             thisport = atoi(ch->port);
-             if (thisport < 1 || thisport > 65535)
-             {
-                 appendPQExpBuffer(&conn->errorMessage,
-                                   libpq_gettext("invalid port number: \"%s\"\n"),
-                                   ch->port);
-                 conn->options_valid = false;
-                 goto connect_errReturn;
-             }
-         }
-         snprintf(portstr, sizeof(portstr), "%d", thisport);
-
-         /* Use pg_getaddrinfo_all() to resolve the address */
-         ret = 1;
-         switch (ch->type)
-         {
-             case CHT_HOST_NAME:
-                 ret = pg_getaddrinfo_all(ch->host, portstr, &hint, &ch->addrlist);
-                 if (ret || !ch->addrlist)
-                     appendPQExpBuffer(&conn->errorMessage,
-                                       libpq_gettext("could not translate host name \"%s\" to address: %s\n"),
-                                       ch->host, gai_strerror(ret));
-                 break;
-
-             case CHT_HOST_ADDRESS:
-                 hint.ai_flags = AI_NUMERICHOST;
-                 ret = pg_getaddrinfo_all(ch->hostaddr, portstr, &hint, &ch->addrlist);
-                 if (ret || !ch->addrlist)
-                     appendPQExpBuffer(&conn->errorMessage,
-                                       libpq_gettext("could not parse network address \"%s\": %s\n"),
-                                       ch->hostaddr, gai_strerror(ret));
-                 break;
-
-             case CHT_UNIX_SOCKET:
- #ifdef HAVE_UNIX_SOCKETS
-                 hint.ai_family = AF_UNIX;
-                 UNIXSOCK_PATH(portstr, thisport, ch->host);
-                 if (strlen(portstr) >= UNIXSOCK_PATH_BUFLEN)
-                 {
-                     appendPQExpBuffer(&conn->errorMessage,
-                                       libpq_gettext("Unix-domain socket path \"%s\" is too long (maximum %d
bytes)\n"),
-                                       portstr,
-                                       (int) (UNIXSOCK_PATH_BUFLEN - 1));
-                     conn->options_valid = false;
-                     goto connect_errReturn;
-                 }
-
-                 /*
-                  * NULL hostname tells pg_getaddrinfo_all to parse the service
-                  * name as a Unix-domain socket path.
-                  */
-                 ret = pg_getaddrinfo_all(NULL, portstr, &hint, &ch->addrlist);
-                 if (ret || !ch->addrlist)
-                     appendPQExpBuffer(&conn->errorMessage,
-                                       libpq_gettext("could not translate Unix-domain socket path \"%s\" to address:
%s\n"),
-                                       portstr, gai_strerror(ret));
-                 break;
- #else
-                 Assert(false);
-                 conn->options_valid = false;
-                 goto connect_errReturn;
- #endif
-         }
-         if (ret || !ch->addrlist)
-         {
-             if (ch->addrlist)
-             {
-                 pg_freeaddrinfo_all(hint.ai_family, ch->addrlist);
-                 ch->addrlist = NULL;
-             }
-             conn->options_valid = false;
-             goto connect_errReturn;
-         }
-     }
-
-     /*
       * Set up to try to connect to the first host.  (Setting whichhost = -1 is
       * a bit of a cheat, but PQconnectPoll will advance it to 0 before
       * anything else looks at it.)
--- 1761,1766 ----
*************** keep_going:                        /* We will come back to
*** 2148,2153 ****
--- 2049,2060 ----
      /* Time to advance to next connhost[] entry? */
      if (conn->try_next_host)
      {
+         pg_conn_host *ch;
+         struct addrinfo hint;
+         int            thisport;
+         int            ret;
+         char        portstr[MAXPGPATH];
+
          if (conn->whichhost + 1 >= conn->nconnhost)
          {
              /*
*************** keep_going:                        /* We will come back to
*** 2157,2166 ****
              goto error_return;
          }
          conn->whichhost++;
!         conn->addr_cur = conn->connhost[conn->whichhost].addrlist;
!         /* If no addresses for this host, just try the next one */
!         if (conn->addr_cur == NULL)
!             goto keep_going;
          reset_connection_state_machine = true;
          conn->try_next_host = false;
      }
--- 2064,2163 ----
              goto error_return;
          }
          conn->whichhost++;
!
!         /* Drop any address info for previous host */
!         release_conn_addrinfo(conn);
!
!         /*
!          * Look up info for the new host.  On failure, log the problem in
!          * conn->errorMessage, then loop around to try the next host.  (Note
!          * we don't clear try_next_host until we've succeeded.)
!          */
!         ch = &conn->connhost[conn->whichhost];
!
!         /* Initialize hint structure */
!         MemSet(&hint, 0, sizeof(hint));
!         hint.ai_socktype = SOCK_STREAM;
!         conn->addrlist_family = hint.ai_family = AF_UNSPEC;
!
!         /* Figure out the port number we're going to use. */
!         if (ch->port == NULL || ch->port[0] == '\0')
!             thisport = DEF_PGPORT;
!         else
!         {
!             thisport = atoi(ch->port);
!             if (thisport < 1 || thisport > 65535)
!             {
!                 appendPQExpBuffer(&conn->errorMessage,
!                                   libpq_gettext("invalid port number: \"%s\"\n"),
!                                   ch->port);
!                 goto keep_going;
!             }
!         }
!         snprintf(portstr, sizeof(portstr), "%d", thisport);
!
!         /* Use pg_getaddrinfo_all() to resolve the address */
!         switch (ch->type)
!         {
!             case CHT_HOST_NAME:
!                 ret = pg_getaddrinfo_all(ch->host, portstr, &hint,
!                                          &conn->addrlist);
!                 if (ret || !conn->addrlist)
!                 {
!                     appendPQExpBuffer(&conn->errorMessage,
!                                       libpq_gettext("could not translate host name \"%s\" to address: %s\n"),
!                                       ch->host, gai_strerror(ret));
!                     goto keep_going;
!                 }
!                 break;
!
!             case CHT_HOST_ADDRESS:
!                 hint.ai_flags = AI_NUMERICHOST;
!                 ret = pg_getaddrinfo_all(ch->hostaddr, portstr, &hint,
!                                          &conn->addrlist);
!                 if (ret || !conn->addrlist)
!                 {
!                     appendPQExpBuffer(&conn->errorMessage,
!                                       libpq_gettext("could not parse network address \"%s\": %s\n"),
!                                       ch->hostaddr, gai_strerror(ret));
!                     goto keep_going;
!                 }
!                 break;
!
!             case CHT_UNIX_SOCKET:
! #ifdef HAVE_UNIX_SOCKETS
!                 conn->addrlist_family = hint.ai_family = AF_UNIX;
!                 UNIXSOCK_PATH(portstr, thisport, ch->host);
!                 if (strlen(portstr) >= UNIXSOCK_PATH_BUFLEN)
!                 {
!                     appendPQExpBuffer(&conn->errorMessage,
!                                       libpq_gettext("Unix-domain socket path \"%s\" is too long (maximum %d
bytes)\n"),
!                                       portstr,
!                                       (int) (UNIXSOCK_PATH_BUFLEN - 1));
!                     goto keep_going;
!                 }
!
!                 /*
!                  * NULL hostname tells pg_getaddrinfo_all to parse the service
!                  * name as a Unix-domain socket path.
!                  */
!                 ret = pg_getaddrinfo_all(NULL, portstr, &hint,
!                                          &conn->addrlist);
!                 if (ret || !conn->addrlist)
!                 {
!                     appendPQExpBuffer(&conn->errorMessage,
!                                       libpq_gettext("could not translate Unix-domain socket path \"%s\" to address:
%s\n"),
!                                       portstr, gai_strerror(ret));
!                     goto keep_going;
!                 }
! #else
!                 Assert(false);
! #endif
!                 break;
!         }
!
!         /* OK, scan this addrlist for a working server address */
!         conn->addr_cur = conn->addrlist;
          reset_connection_state_machine = true;
          conn->try_next_host = false;
      }
*************** keep_going:                        /* We will come back to
*** 3125,3132 ****
                      return PGRES_POLLING_READING;
                  }

!                 /* We can release the address lists now. */
!                 release_all_addrinfo(conn);

                  /* We are open for business! */
                  conn->status = CONNECTION_OK;
--- 3122,3129 ----
                      return PGRES_POLLING_READING;
                  }

!                 /* We can release the address list now. */
!                 release_conn_addrinfo(conn);

                  /* We are open for business! */
                  conn->status = CONNECTION_OK;
*************** keep_going:                        /* We will come back to
*** 3188,3195 ****
                  return PGRES_POLLING_READING;
              }

!             /* We can release the address lists now. */
!             release_all_addrinfo(conn);

              /* We are open for business! */
              conn->status = CONNECTION_OK;
--- 3185,3192 ----
                  return PGRES_POLLING_READING;
              }

!             /* We can release the address list now. */
!             release_conn_addrinfo(conn);

              /* We are open for business! */
              conn->status = CONNECTION_OK;
*************** keep_going:                        /* We will come back to
*** 3218,3223 ****
--- 3215,3223 ----
                      goto keep_going;
                  }

+                 /* We can release the address list now. */
+                 release_conn_addrinfo(conn);
+
                  /* We are open for business! */
                  conn->status = CONNECTION_OK;
                  return PGRES_POLLING_OK;
*************** keep_going:                        /* We will come back to
*** 3291,3299 ****
                      PQclear(res);
                      termPQExpBuffer(&savedMessage);

-                     /* We can release the address lists now. */
-                     release_all_addrinfo(conn);
-
                      /*
                       * Finish reading any remaining messages before being
                       * considered as ready.
--- 3291,3296 ----
*************** freePGconn(PGconn *conn)
*** 3630,3661 ****
  }

  /*
!  * release_all_addrinfo
!  *     - free addrinfo of all hostconn elements.
   */
-
  static void
! release_all_addrinfo(PGconn *conn)
  {
!     if (conn->connhost != NULL)
      {
!         int            i;
!
!         for (i = 0; i < conn->nconnhost; ++i)
!         {
!             int            family = AF_UNSPEC;
!
! #ifdef HAVE_UNIX_SOCKETS
!             if (conn->connhost[i].type == CHT_UNIX_SOCKET)
!                 family = AF_UNIX;
! #endif
!
!             pg_freeaddrinfo_all(family,
!                                 conn->connhost[i].addrlist);
!             conn->connhost[i].addrlist = NULL;
!         }
      }
-     conn->addr_cur = NULL;
  }

  /*
--- 3627,3644 ----
  }

  /*
!  * release_conn_addrinfo
!  *     - Free any addrinfo list in the PGconn.
   */
  static void
! release_conn_addrinfo(PGconn *conn)
  {
!     if (conn->addrlist)
      {
!         pg_freeaddrinfo_all(conn->addrlist_family, conn->addrlist);
!         conn->addrlist = NULL;
!         conn->addr_cur = NULL;    /* for safety */
      }
  }

  /*
*************** closePGconn(PGconn *conn)
*** 3714,3720 ****
      conn->xactStatus = PQTRANS_IDLE;
      pqClearAsyncResult(conn);    /* deallocate result */
      resetPQExpBuffer(&conn->errorMessage);
!     release_all_addrinfo(conn);

      /* Reset all state obtained from server, too */
      pqDropServerData(conn);
--- 3697,3703 ----
      conn->xactStatus = PQTRANS_IDLE;
      pqClearAsyncResult(conn);    /* deallocate result */
      resetPQExpBuffer(&conn->errorMessage);
!     release_conn_addrinfo(conn);

      /* Reset all state obtained from server, too */
      pqDropServerData(conn);
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index bc60373..fb04c8c 100644
*** a/src/interfaces/libpq/libpq-int.h
--- b/src/interfaces/libpq/libpq-int.h
*************** typedef struct pg_conn_host
*** 312,318 ****
      char       *password;        /* password for this host, read from the
                                   * password file; NULL if not sought or not
                                   * found in password file. */
-     struct addrinfo *addrlist;    /* list of possible backend addresses */
  } pg_conn_host;

  /*
--- 312,317 ----
*************** struct pg_conn
*** 412,418 ****
      /* Transient state needed while establishing connection */
      bool        try_next_addr;    /* time to advance to next address/host? */
      bool        try_next_host;    /* time to advance to next connhost[]? */
!     struct addrinfo *addr_cur;    /* backend address currently being tried */
      PGSetenvStatusType setenv_state;    /* for 2.0 protocol only */
      const PQEnvironmentOption *next_eo;
      bool        send_appname;    /* okay to send application_name? */
--- 411,419 ----
      /* Transient state needed while establishing connection */
      bool        try_next_addr;    /* time to advance to next address/host? */
      bool        try_next_host;    /* time to advance to next connhost[]? */
!     struct addrinfo *addrlist;    /* list of addresses for current connhost */
!     struct addrinfo *addr_cur;    /* the one currently being tried */
!     int            addrlist_family;    /* needed to know how to free addrlist */
      PGSetenvStatusType setenv_state;    /* for 2.0 protocol only */
      const PQEnvironmentOption *next_eo;
      bool        send_appname;    /* okay to send application_name? */

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Emre Hasegeli
Дата:
Сообщение: Re: Optimizer misses big in 10.4 with BRIN index
Следующее
От: Marina Polyakova
Дата:
Сообщение: Re: [HACKERS] WIP Patch: Pgbench Serialization and deadlock errors