Re: Multixact and prepared transactions

Поиск
Список
Период
Сортировка
От Heikki Linnakangas
Тема Re: Multixact and prepared transactions
Дата
Msg-id 4B028857.7030505@enterprisedb.com
обсуждение исходный текст
Ответ на Multixact and prepared transactions  (Heikki Linnakangas <hlinnaka@iki.fi>)
Ответы Re: Multixact and prepared transactions  (Heikki Linnakangas <heikki.linnakangas@enterprisedb.com>)
Список pgsql-bugs
Heikki Linnakangas wrote:
> We seem to have neglected prepared transactions in the logic that tracks
> the oldest visible multixact. OldestMemberMXactId doesn't contain
> entries for prepared transactions, so the UPDATE incorrectly considers
> the multixact as an old obsolete one.
>
> A straightforward fix is to enlarge OldestMemberMXactId to make room for
> max_prepared_transactions extra entries, and at prepare transfer the
> value of the current backend to one of those slots.

So here's a patch doing that:

- OldestMemberMXactId is enlarged to make room for prepared transactions
in addition to real backends (enlarged from MaxBackends to
MaxBackends+max_prepared_xacts). OldestVisibleMXactId is enlarged as
well for simplicity, even though the slots for prepared xacts are never
used.

- OldestMemberMXactId of a backend is copied to the slot of the prepared
xact at prepare time, and also stored in the 2pc state file so that it
can be recovered at database startup.

- The arrays are indexed by backend ID, but dummy PGPROCs representing
prepared transactions don't have backend ids. So I invented "dummy
backend IDs", which are unique numbers assigned to all dummy procs,
similar to backend ids for real procs. The dummy backend IDs start from
MaxBackends+1, so we can use it directly as an index into the enlarged
OldestMemberMXactId array.

I considered assigning dummy procs real backend IDs, in
PGPROC->backendId, but backend IDs are used to index the
SISeg->procState array, for example, and there's no entries for prepared
transactions there. While I don't see anything that would break right
now, it seems dangerous.


BTW, I noticed that in deadlock.c, we reserve many working arrays of
size MaxBackends. But prepared transactions can hold locks too, and
therefore can be visited by the deadlock checker. Shouldn't we reserve
space in the arrays for prepared xacts as well? You'll be hard-pressed
to hit that in practice, given that MaxBackends includes room for
autovacuum launcher and worker too, and you'd need to have all backends
involved in the deadlock. But still.

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 6f86961..1301d85 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -51,6 +51,8 @@
 #include "access/multixact.h"
 #include "access/slru.h"
 #include "access/transam.h"
+#include "access/twophase.h"
+#include "access/twophase_rmgr.h"
 #include "access/xact.h"
 #include "miscadmin.h"
 #include "pg_trace.h"
@@ -117,9 +119,12 @@ typedef struct MultiXactStateData

     /*
      * Per-backend data starts here.  We have two arrays stored in the area
-     * immediately following the MultiXactStateData struct. Each is indexed by
-     * BackendId.  (Note: valid BackendIds run from 1 to MaxBackends; element
-     * zero of each array is never used.)
+     * immediately following the MultiXactStateData struct. Each is indexed
+     * by BackendId.
+     *
+     * In both arrays, there's a slot for all normal backends (1..MaxBackends)
+     * followed by a slot for all max_prepared_xacts prepared transactions.
+     * Valid BackendIds start from 1; element zero of each array is never used.
      *
      * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
      * transaction(s) could possibly be a member of, or InvalidMultiXactId
@@ -152,6 +157,12 @@ typedef struct MultiXactStateData
     MultiXactId perBackendXactIds[1];    /* VARIABLE LENGTH ARRAY */
 } MultiXactStateData;

+/*
+ * Last element of OldestMemberMXactID and OldestVisibleMXactId arrays.
+ * Valid elements are (1..MaxOldestSlot); element 0 is never used.
+ */
+#define MaxOldestSlot    (MaxBackends + max_prepared_xacts)
+
 /* Pointers to the state data in shared memory */
 static MultiXactStateData *MultiXactState;
 static MultiXactId *OldestMemberMXactId;
@@ -539,7 +550,7 @@ MultiXactIdSetOldestVisible(void)
         if (oldestMXact < FirstMultiXactId)
             oldestMXact = FirstMultiXactId;

-        for (i = 1; i <= MaxBackends; i++)
+        for (i = 1; i <= MaxOldestSlot; i++)
         {
             MultiXactId thisoldest = OldestMemberMXactId[i];

@@ -1276,6 +1287,119 @@ AtEOXact_MultiXact(void)
 }

 /*
+ * AtPrepare_MultiXact
+ *        Save multixact state at 2PC tranasction prepare.
+ *
+ * In this phase, we only store our OldestMemberMXactId value in the two-phase
+ * state file.
+ */
+void
+AtPrepare_MultiXact(void)
+{
+    MultiXactId myOldestMember = OldestMemberMXactId[MyBackendId];
+
+    if (MultiXactIdIsValid(myOldestMember))
+        RegisterTwoPhaseRecord(TWOPHASE_RM_MULTIXACT_ID, 0,
+                               &myOldestMember, sizeof(MultiXactId));
+}
+
+/*
+ * PostPrepare_MultiXact
+ *        Clean up after successful PREPARE TRANSACTION
+ */
+void
+PostPrepare_MultiXact(TransactionId xid)
+{
+    MultiXactId myOldestMember;
+
+    /*
+     * Transfer our OldestMemberMXactId value to the slot reserved for the
+     * prepared transaction.
+     */
+    myOldestMember = OldestMemberMXactId[MyBackendId];
+    if (MultiXactIdIsValid(myOldestMember))
+    {
+        BackendId dummyBackendId = TwoPhaseGetDummyBackendId(xid);
+
+        /*
+         * Even though storing MultiXactId is atomic, acquire lock to make sure
+         * others see both changes, not just the reset of the slot of the
+         * current backend. Using a volatile pointer might suffice, but this
+         * isn't a hot spot.
+         */
+        LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
+
+        OldestMemberMXactId[dummyBackendId] = myOldestMember;
+        OldestMemberMXactId[MyBackendId] = InvalidMultiXactId;
+
+        LWLockRelease(MultiXactGenLock);
+    }
+
+    /*
+     * We don't need to transfer OldestVisibleMXactId value, because the
+     * transaction is not going to be looking at any more multixacts once
+     * it's prepared.
+     *
+     * We assume that storing a MultiXactId is atomic and so we need not take
+     * MultiXactGenLock to do this.
+     */
+    OldestVisibleMXactId[MyBackendId] = InvalidMultiXactId;
+
+    /*
+     * Discard the local MultiXactId cache like in AtEOX_MultiXact
+     */
+    MXactContext = NULL;
+    MXactCache = NULL;
+}
+
+/*
+ * multixact_twophase_recover
+ *        recover the state of a prepared transaction at startup.
+ */
+void
+multixact_twophase_recover(TransactionId xid, uint16 info,
+                           void *recdata, uint32 len)
+{
+    BackendId    dummyBackendId = TwoPhaseGetDummyBackendId(xid);
+    MultiXactId oldestMember;
+
+    /*
+     * Get the oldest member XID from the state file record, and set it in
+     * the OldestMemberMXactId slot reserved for this prepared transaction.
+     */
+    Assert(len == sizeof(MultiXactId));
+    oldestMember = *((MultiXactId *)recdata);
+
+    OldestMemberMXactId[dummyBackendId] = oldestMember;
+}
+
+/*
+ * multixact_twophase_postcommit
+ *        similar to AtEOX_MultiXact but for COMMIT PREPARED
+ */
+void
+multixact_twophase_postcommit(TransactionId xid, uint16 info,
+                              void *recdata, uint32 len)
+{
+    BackendId    dummyBackendId = TwoPhaseGetDummyBackendId(xid);
+
+    Assert(len == sizeof(MultiXactId));
+
+    OldestMemberMXactId[dummyBackendId] = InvalidMultiXactId;
+}
+
+/*
+ * multixact_twophase_postabort
+ *        this is actually just the same as the COMMIT case.
+ */
+void
+multixact_twophase_postabort(TransactionId xid, uint16 info,
+                        void *recdata, uint32 len)
+{
+    multixact_twophase_postcommit(xid, info, recdata, len);
+}
+
+/*
  * Initialization of shared memory for MultiXact.  We use two SLRU areas,
  * thus double memory.    Also, reserve space for the shared MultiXactState
  * struct and the per-backend MultiXactId arrays (two of those, too).
@@ -1287,7 +1411,7 @@ MultiXactShmemSize(void)

 #define SHARED_MULTIXACT_STATE_SIZE \
     add_size(sizeof(MultiXactStateData), \
-             mul_size(sizeof(MultiXactId) * 2, MaxBackends))
+             mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))

     size = SHARED_MULTIXACT_STATE_SIZE;
     size = add_size(size, SimpleLruShmemSize(NUM_MXACTOFFSET_BUFFERS, 0));
@@ -1329,10 +1453,10 @@ MultiXactShmemInit(void)

     /*
      * Set up array pointers.  Note that perBackendXactIds[0] is wasted space
-     * since we only use indexes 1..MaxBackends in each array.
+     * since we only use indexes 1..MaxOldestSlot in each array.
      */
     OldestMemberMXactId = MultiXactState->perBackendXactIds;
-    OldestVisibleMXactId = OldestMemberMXactId + MaxBackends;
+    OldestVisibleMXactId = OldestMemberMXactId + MaxOldestSlot;
 }

 /*
@@ -1702,7 +1826,7 @@ TruncateMultiXact(void)
         nextMXact = FirstMultiXactId;

     oldestMXact = nextMXact;
-    for (i = 1; i <= MaxBackends; i++)
+    for (i = 1; i <= MaxOldestSlot; i++)
     {
         MultiXactId thisoldest;

diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 195c90c..bafa006 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -109,6 +109,7 @@ int            max_prepared_xacts = 0;
 typedef struct GlobalTransactionData
 {
     PGPROC        proc;            /* dummy proc */
+    BackendId    dummyBackendId;    /* similar to backend id for backends */
     TimestampTz prepared_at;    /* time of preparation */
     XLogRecPtr    prepare_lsn;    /* XLOG offset of prepare record */
     Oid            owner;            /* ID of user that executed the xact */
@@ -200,6 +201,20 @@ TwoPhaseShmemInit(void)
         {
             gxacts[i].proc.links.next = (SHM_QUEUE *) TwoPhaseState->freeGXacts;
             TwoPhaseState->freeGXacts = &gxacts[i];
+
+            /*
+             * Assign a unique ID for each dummy proc, so that the range of
+             * dummy backend IDs immediately follows the range of normal
+             * backend IDs. We don't dare to assign a real backend ID to
+             * dummy procs, because prepared transactions don't take part in
+             * cache invalidation like a real backend ID would imply, but
+             * having a unique ID for them is nevertheless handy. This
+             * arrangement allows you to allocate an array of size
+             * (MaxBackends + max_prepared_xacts + 1), and have a slot for
+             * every backend and prepared transaction. Currently multixact.c
+             * uses that technique.
+             */
+            gxacts[i].dummyBackendId = MaxBackends + 1 + i;
         }
     }
     else
@@ -649,6 +664,22 @@ pg_prepared_xact(PG_FUNCTION_ARGS)

 /*
  * TwoPhaseGetDummyProc
+ *        Get the dummy backend ID for prepared transaction specified by XID
+ *
+ * Dummy backend IDs are similar to real backend IDs of real backends.
+ * They start at MaxBackends + 1, and are unique across all currently active
+ * real backends and prepared transactions.
+ */
+BackendId
+TwoPhaseGetDummyBackendId(TransactionId xid)
+{
+    PGPROC *proc = TwoPhaseGetDummyProc(xid);
+
+    return ((GlobalTransaction) proc)->dummyBackendId;
+}
+
+/*
+ * TwoPhaseGetDummyProc
  *        Get the PGPROC that represents a prepared transaction specified by XID
  */
 PGPROC *
diff --git a/src/backend/access/transam/twophase_rmgr.c b/src/backend/access/transam/twophase_rmgr.c
index 4ff9549..f019868 100644
--- a/src/backend/access/transam/twophase_rmgr.c
+++ b/src/backend/access/transam/twophase_rmgr.c
@@ -14,6 +14,7 @@
  */
 #include "postgres.h"

+#include "access/multixact.h"
 #include "access/twophase_rmgr.h"
 #include "commands/async.h"
 #include "pgstat.h"
@@ -27,7 +28,8 @@ const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID + 1] =
     lock_twophase_recover,        /* Lock */
     NULL,                        /* Inval */
     NULL,                        /* notify/listen */
-    NULL                        /* pgstat */
+    NULL,                        /* pgstat */
+    multixact_twophase_recover    /* MultiXact */
 };

 const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] =
@@ -36,7 +38,8 @@ const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] =
     lock_twophase_postcommit,    /* Lock */
     inval_twophase_postcommit,    /* Inval */
     notify_twophase_postcommit, /* notify/listen */
-    pgstat_twophase_postcommit    /* pgstat */
+    pgstat_twophase_postcommit,    /* pgstat */
+    multixact_twophase_postcommit /* MultiXact */
 };

 const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] =
@@ -45,5 +48,6 @@ const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] =
     lock_twophase_postabort,    /* Lock */
     NULL,                        /* Inval */
     NULL,                        /* notify/listen */
-    pgstat_twophase_postabort    /* pgstat */
+    pgstat_twophase_postabort,    /* pgstat */
+    multixact_twophase_postabort /* MultiXact */
 };
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index c3606e0..777bd17 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -1856,6 +1856,7 @@ PrepareTransaction(void)
     AtPrepare_Inval();
     AtPrepare_Locks();
     AtPrepare_PgStat();
+    AtPrepare_MultiXact();

     /*
      * Here is where we really truly prepare.
@@ -1909,7 +1910,7 @@ PrepareTransaction(void)

     PostPrepare_smgr();

-    AtEOXact_MultiXact();
+    PostPrepare_MultiXact(xid);

     PostPrepare_Locks(xid);

diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 7ffc15a..72425bf 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -52,6 +52,8 @@ extern void MultiXactIdSetOldestMember(void);
 extern int    GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids);

 extern void AtEOXact_MultiXact(void);
+extern void AtPrepare_MultiXact(void);
+extern void PostPrepare_MultiXact(TransactionId xid);

 extern Size MultiXactShmemSize(void);
 extern void MultiXactShmemInit(void);
@@ -67,6 +69,13 @@ extern void MultiXactSetNextMXact(MultiXactId nextMulti,
 extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
                           MultiXactOffset minMultiOffset);

+extern void multixact_twophase_recover(TransactionId xid, uint16 info,
+                           void *recdata, uint32 len);
+extern void multixact_twophase_postcommit(TransactionId xid, uint16 info,
+                              void *recdata, uint32 len);
+extern void multixact_twophase_postabort(TransactionId xid, uint16 info,
+                             void *recdata, uint32 len);
+
 extern void multixact_redo(XLogRecPtr lsn, XLogRecord *record);
 extern void multixact_desc(StringInfo buf, uint8 xl_info, char *rec);

diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h
index 64dba0c..a3a1492 100644
--- a/src/include/access/twophase.h
+++ b/src/include/access/twophase.h
@@ -15,6 +15,7 @@
 #define TWOPHASE_H

 #include "access/xlogdefs.h"
+#include "storage/backendid.h"
 #include "storage/proc.h"
 #include "utils/timestamp.h"

@@ -31,6 +32,7 @@ extern Size TwoPhaseShmemSize(void);
 extern void TwoPhaseShmemInit(void);

 extern PGPROC *TwoPhaseGetDummyProc(TransactionId xid);
+extern BackendId TwoPhaseGetDummyBackendId(TransactionId xid);

 extern GlobalTransaction MarkAsPreparing(TransactionId xid, const char *gid,
                 TimestampTz prepared_at,
diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h
index 20c2b2f..86acc8e 100644
--- a/src/include/access/twophase_rmgr.h
+++ b/src/include/access/twophase_rmgr.h
@@ -26,7 +26,8 @@ typedef uint8 TwoPhaseRmgrId;
 #define TWOPHASE_RM_INVAL_ID        2
 #define TWOPHASE_RM_NOTIFY_ID        3
 #define TWOPHASE_RM_PGSTAT_ID        4
-#define TWOPHASE_RM_MAX_ID            TWOPHASE_RM_PGSTAT_ID
+#define TWOPHASE_RM_MULTIXACT_ID    5
+#define TWOPHASE_RM_MAX_ID            TWOPHASE_RM_MULTIXACT_ID

 extern const TwoPhaseCallback twophase_recover_callbacks[];
 extern const TwoPhaseCallback twophase_postcommit_callbacks[];

В списке pgsql-bugs по дате отправления:

Предыдущее
От: Tom Lane
Дата:
Сообщение: Re: BUG #5193: should "select * from ab limitkkk" cause a syntax error?
Следующее
От: "goldenhawking"
Дата:
Сообщение: BUG #5194: fire IDispatch Error #3015 when run PutCollect() in ADO