Re: OOM in spgist insert

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: OOM in spgist insert
Дата
Msg-id 1577069.1620926983@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: OOM in spgist insert  (Alvaro Herrera <alvherre@alvh.no-ip.org>)
Ответы Re: OOM in spgist insert  (Tom Lane <tgl@sss.pgh.pa.us>)
Re: OOM in spgist insert  (Alvaro Herrera <alvherre@alvh.no-ip.org>)
Список pgsql-hackers
Alvaro Herrera <alvherre@alvh.no-ip.org> writes:
> (Looking again, the nbtpage.c hunk might have been made obsolete by
> c34787f91058 and other commits).

OK.  Here's a revision that adopts your idea, except that I left
out the nbtpage.c change since you aren't sure of that part.

I added a macro that allows spgdoinsert to Assert that it's not
called in a context where the infinite-loop-due-to-InterruptPending
risk would arise.  This is a little bit fragile, because it'd be
easy for ill-considered changes to ProcessInterrupts to break it,
but it's better than nothing.

            regards, tom lane

diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 4d380c99f0..1c14fc7f76 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -1905,13 +1905,14 @@ spgSplitNodeAction(Relation index, SpGistState *state,
  * Insert one item into the index.
  *
  * Returns true on success, false if we failed to complete the insertion
- * because of conflict with a concurrent insert.  In the latter case,
- * caller should re-call spgdoinsert() with the same args.
+ * (typically because of conflict with a concurrent insert).  In the latter
+ * case, caller should re-call spgdoinsert() with the same args.
  */
 bool
 spgdoinsert(Relation index, SpGistState *state,
             ItemPointer heapPtr, Datum *datums, bool *isnulls)
 {
+    bool        result = true;
     TupleDesc    leafDescriptor = state->leafTupDesc;
     bool        isnull = isnulls[spgKeyColumn];
     int            level = 0;
@@ -2019,9 +2020,17 @@ spgdoinsert(Relation index, SpGistState *state,
         /*
          * Bail out if query cancel is pending.  We must have this somewhere
          * in the loop since a broken opclass could produce an infinite
-         * picksplit loop.
+         * picksplit loop.  Moreover, because it's typically the case that
+         * we're holding buffer lock(s), ProcessInterrupts() won't be able to
+         * throw an error now.  Hence, if we see that the interrupt condition
+         * remains true, break out of the loop and deal with the case below.
          */
         CHECK_FOR_INTERRUPTS();
+        if (INTERRUPTS_PENDING_CONDITION())
+        {
+            result = false;
+            break;
+        }

         if (current.blkno == InvalidBlockNumber)
         {
@@ -2140,10 +2149,15 @@ spgdoinsert(Relation index, SpGistState *state,
              * spgAddNode and spgSplitTuple cases will loop back to here to
              * complete the insertion operation.  Just in case the choose
              * function is broken and produces add or split requests
-             * repeatedly, check for query cancel.
+             * repeatedly, check for query cancel (see comments above).
              */
     process_inner_tuple:
             CHECK_FOR_INTERRUPTS();
+            if (INTERRUPTS_PENDING_CONDITION())
+            {
+                result = false;
+                break;
+            }

             innerTuple = (SpGistInnerTuple) PageGetItem(current.page,
                                                         PageGetItemId(current.page, current.offnum));
@@ -2267,5 +2281,21 @@ spgdoinsert(Relation index, SpGistState *state,
         UnlockReleaseBuffer(parent.buffer);
     }

-    return true;
+    /*
+     * We do not support being called while some outer function is holding a
+     * buffer lock (or any other reason to postpone query cancels).  If that
+     * were the case, telling the caller to retry would create an infinite
+     * loop.
+     */
+    Assert(INTERRUPTS_CAN_BE_PROCESSED());
+
+    /*
+     * Finally, check for interrupts again.  If there was a query cancel,
+     * ProcessInterrupts() will be able to throw the error now.  If it was
+     * some other kind of interrupt that can just be cleared, return false to
+     * tell our caller to retry.
+     */
+    CHECK_FOR_INTERRUPTS();
+
+    return result;
 }
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 6200699ddd..dd2ade7bb6 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -554,7 +554,7 @@ ProcessClientWriteInterrupt(bool blocked)
         {
             /*
              * Don't mess with whereToSendOutput if ProcessInterrupts wouldn't
-             * do anything.
+             * service ProcDiePending.
              */
             if (InterruptHoldoffCount == 0 && CritSectionCount == 0)
             {
@@ -3118,6 +3118,12 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
  * If an interrupt condition is pending, and it's safe to service it,
  * then clear the flag and accept the interrupt.  Called only when
  * InterruptPending is true.
+ *
+ * Note: if INTERRUPTS_CAN_BE_PROCESSED() is true, then ProcessInterrupts
+ * is guaranteed to clear the InterruptPending flag before returning.
+ * (This is not the same as guaranteeing that it's still clear when we
+ * return; another interrupt could have arrived.  But we promise that
+ * any pre-existing one will have been serviced.)
  */
 void
 ProcessInterrupts(void)
@@ -3248,7 +3254,11 @@ ProcessInterrupts(void)
     {
         /*
          * Re-arm InterruptPending so that we process the cancel request as
-         * soon as we're done reading the message.
+         * soon as we're done reading the message.  (XXX this is seriously
+         * ugly: it complicates INTERRUPTS_CAN_BE_PROCESSED(), and it means we
+         * can't use that macro directly as the initial test in this function,
+         * meaning that this code also creates opportunities for other bugs to
+         * appear.)
          */
         InterruptPending = true;
     }
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 95202d37af..645766a4ab 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -57,6 +57,11 @@
  * allowing die interrupts: HOLD_CANCEL_INTERRUPTS() and
  * RESUME_CANCEL_INTERRUPTS().
  *
+ * Also, INTERRUPTS_PENDING_CONDITION() can be checked to see whether an
+ * interrupt needs to be serviced, without trying to do so immediately.
+ * Some callers are also interested in INTERRUPTS_CAN_BE_PROCESSED(),
+ * which tells whether ProcessInterrupts is sure to clear the interrupt.
+ *
  * Special mechanisms are used to let an interrupt be accepted when we are
  * waiting for a lock or when we are waiting for command input (but, of
  * course, only if the interrupt holdoff counter is zero).  See the
@@ -97,24 +102,27 @@ extern PGDLLIMPORT volatile uint32 CritSectionCount;
 /* in tcop/postgres.c */
 extern void ProcessInterrupts(void);

+/* Test whether an interrupt is pending */
 #ifndef WIN32
+#define INTERRUPTS_PENDING_CONDITION() \
+    (unlikely(InterruptPending))
+#else
+#define INTERRUPTS_PENDING_CONDITION() \
+    (unlikely(UNBLOCKED_SIGNAL_QUEUE()) ? pgwin32_dispatch_queued_signals() : 0,  \
+     unlikely(InterruptPending))
+#endif

+/* Service interrupt if one is pending */
 #define CHECK_FOR_INTERRUPTS() \
 do { \
-    if (unlikely(InterruptPending)) \
-        ProcessInterrupts(); \
-} while(0)
-#else                            /* WIN32 */
-
-#define CHECK_FOR_INTERRUPTS() \
-do { \
-    if (unlikely(UNBLOCKED_SIGNAL_QUEUE())) \
-        pgwin32_dispatch_queued_signals(); \
-    if (unlikely(InterruptPending)) \
+    if (INTERRUPTS_PENDING_CONDITION()) \
         ProcessInterrupts(); \
 } while(0)
-#endif                            /* WIN32 */

+/* Is ProcessInterrupts() guaranteed to clear InterruptPending? */
+#define INTERRUPTS_CAN_BE_PROCESSED() \
+    (InterruptHoldoffCount == 0 && CritSectionCount == 0 && \
+     QueryCancelHoldoffCount == 0)

 #define HOLD_INTERRUPTS()  (InterruptHoldoffCount++)


В списке pgsql-hackers по дате отправления:

Предыдущее
От: Tom Lane
Дата:
Сообщение: Re: compute_query_id and pg_stat_statements
Следующее
От: Bruce Momjian
Дата:
Сообщение: Re: compute_query_id and pg_stat_statements