Re: MultiXact\SLRU buffers configuration

Поиск
Список
Период
Сортировка
От Kyotaro Horiguchi
Тема Re: MultiXact\SLRU buffers configuration
Дата
Msg-id 20200515.090333.24867479329066911.horikyota.ntt@gmail.com
обсуждение исходный текст
Ответ на Re: MultiXact\SLRU buffers configuration  ("Andrey M. Borodin" <x4mmm@yandex-team.ru>)
Ответы Re: MultiXact\SLRU buffers configuration  ("Andrey M. Borodin" <x4mmm@yandex-team.ru>)
Re: MultiXact\SLRU buffers configuration  (Daniel Gustafsson <daniel@yesql.se>)
Список pgsql-hackers
At Thu, 14 May 2020 11:44:01 +0500, "Andrey M. Borodin" <x4mmm@yandex-team.ru> wrote in 
> > GetMultiXactIdMembers believes that 4 is successfully done if 2
> > returned valid offset, but actually that is not obvious.
> > 
> > If we add a single giant lock just to isolate ,say,
> > GetMultiXactIdMember and RecordNewMultiXact, it reduces concurrency
> > unnecessarily.  Perhaps we need finer-grained locking-key for standby
> > that works similary to buffer lock on primary, that doesn't cause
> > confilicts between irrelevant mxids.
> > 
> We can just replay members before offsets. If offset is already there - members are there too.
> But I'd be happy if we could mitigate those 1000us too - with a hint about last maixd state in a shared MX state, for
example.

Generally in such cases, condition variables would work.  In the
attached PoC, the reader side gets no penalty in the "likely" code
path.  The writer side always calls ConditionVariableBroadcast but the
waiter list is empty in almost all cases.  But I couldn't cause the
situation where the sleep 1000u is reached.

> Actually, if we read empty mxid array instead of something that is replayed just yet - it's not a problem of
inconsistency,because transaction in this mxid could not commit before we started. ISTM.
 
> So instead of fix, we, probably, can just add a comment. If this reasoning is correct.

The step 4 of the reader side reads the members of the target mxid. It
is already written if the offset of the *next* mxid is filled-in.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index e2aa5c9ce4..9db8f6cddd 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -82,6 +82,7 @@
 #include "lib/ilist.h"
 #include "miscadmin.h"
 #include "pg_trace.h"
+#include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "storage/lmgr.h"
 #include "storage/pmsignal.h"
@@ -233,6 +234,7 @@ typedef struct MultiXactStateData
     /* support for members anti-wraparound measures */
     MultiXactOffset offsetStopLimit;    /* known if oldestOffsetKnown */
 
+    ConditionVariable nextoff_cv;
     /*
      * Per-backend data starts here.  We have two arrays stored in the area
      * immediately following the MultiXactStateData struct. Each is indexed by
@@ -873,6 +875,14 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
     /* Exchange our lock */
     LWLockRelease(MultiXactOffsetControlLock);
 
+    /*
+     *  Let everybody know the offset of this mxid is recorded now. The waiters
+     *  are waiting for the offset of the mxid next of the target to know the
+     *  number of members of the target mxid, so we don't need to wait for
+     *  members of this mxid are recorded.
+     */
+    ConditionVariableBroadcast(&MultiXactState->nextoff_cv);
+
     LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
 
     prev_pageno = -1;
@@ -1367,9 +1377,19 @@ retry:
         if (nextMXOffset == 0)
         {
             /* Corner case 2: next multixact is still being filled in */
+
+            /*
+             * The recorder of the next mxid is just before writing the offset.
+             * Wait for the offset to be written.
+             */
+            ConditionVariablePrepareToSleep(&MultiXactState->nextoff_cv);
+
             LWLockRelease(MultiXactOffsetControlLock);
             CHECK_FOR_INTERRUPTS();
-            pg_usleep(1000L);
+
+            ConditionVariableSleep(&MultiXactState->nextoff_cv,
+                                   WAIT_EVENT_WAIT_NEXT_MXMEMBERS);
+            ConditionVariableCancelSleep();
             goto retry;
         }
 
@@ -1847,6 +1867,7 @@ MultiXactShmemInit(void)
 
         /* Make sure we zero out the per-backend state */
         MemSet(MultiXactState, 0, SHARED_MULTIXACT_STATE_SIZE);
+        ConditionVariableInit(&MultiXactState->nextoff_cv);
     }
     else
         Assert(found);
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 0ecd29a1d9..1ac6b37188 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3879,6 +3879,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
         case WAIT_EVENT_SYNC_REP:
             event_name = "SyncRep";
             break;
+        case WAIT_EVENT_WAIT_NEXT_MXMEMBERS:
+            event_name = "Mact/WaitNextXactMembers";
+            break;
             /* no default case, so that compiler will warn */
     }
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index ae9a39573c..e79bba0bef 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -886,7 +886,8 @@ typedef enum
     WAIT_EVENT_REPLICATION_ORIGIN_DROP,
     WAIT_EVENT_REPLICATION_SLOT_DROP,
     WAIT_EVENT_SAFE_SNAPSHOT,
-    WAIT_EVENT_SYNC_REP
+    WAIT_EVENT_SYNC_REP,
+    WAIT_EVENT_WAIT_NEXT_MXMEMBERS
 } WaitEventIPC;
 
 /* ----------

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Alvaro Herrera
Дата:
Сообщение: Re: Add A Glossary
Следующее
От: Andy Fan
Дата:
Сообщение: Re: Add "-Wimplicit-fallthrough" to default flags