Hi,
Recently we encountered a case where clients connected to postgres server through psqlODBC driver got strucked (hanged) when server unexpectedly shutdown (hard power off or LAN cable removed). The clients were hanged forever. I analyzed the code and found a potential issue with retry count logic in function SOCK_wait_for_ready() (in socket.c).
The no_timeout variable is initialized to TRUE and is not modified if retry_count is positive number. That means if retry_count is passed then it will treat it as “no timeout”. If I understood it correctly it should work other way. If retry_count is positive number then socket should wait with timeout and not indefinitely.
I have changed the default initialization of no_timeout to FALSE. Attached is the patch. Kindly review the same and let me know comments.
Copied the function below for quick reference: socket.c: SOCK_wait_for_ready()
528 static int SOCK_wait_for_ready(SocketClass *sock, BOOL output, int retry_count)
529 {
530 int ret, gerrno;
531 #ifdef HAVE_POLL
532 struct pollfd fds;
533 #else
534 fd_set fds, except_fds;
535 struct timeval tm;
536 #endif /* HAVE_POLL */
537 BOOL no_timeout = TRUE;
538
539 if (0 == retry_count)
540 no_timeout = FALSE;
541 else if (0 > retry_count)
542 no_timeout = TRUE;
543 #ifdef USE_SSL
544 else if (sock->ssl == NULL)
545 no_timeout = TRUE;
546 #endif /* USE_SSL */
547 do {
548 #ifdef HAVE_POLL
549 fds.fd = sock->socket;
550 fds.events = output ? POLLOUT : POLLIN;
551 fds.revents = 0;
552 ret = poll(&fds, 1, no_timeout ? -1 : retry_count * 1000);
553 mylog("!!! poll ret=%d revents=%x\n", ret, fds.revents);
554 #else
555 FD_ZERO(&fds);
556 FD_ZERO(&except_fds);
557 FD_SET(sock->socket, &fds);
558 FD_SET(sock->socket, &except_fds);
559 if (!no_timeout)
560 {
561 tm.tv_sec = retry_count;
562 tm.tv_usec = 0;
563 }
564 ret = select((int) sock->socket + 1, output ? NULL : &fds, output ? &fds : NULL, &except_fds, no_timeout ? NULL : &tm);
565 #endif /* HAVE_POLL */
566 gerrno = SOCK_ERRNO;
567 } while (ret < 0 && EINTR == gerrno);
568 if (retry_count < 0)
569 retry_count *= -1;
570 if (0 == ret && retry_count > MAX_RETRY_COUNT)
571 {
572 ret = -1;
573 SOCK_set_error(sock, output ? SOCKET_WRITE_TIMEOUT : SOCKET_READ_TIMEOUT, "SOCK_wait_for_ready timeout");
574 }
575 return ret;
576 }
--
Regards,
Prakash I.