Re: [BUGS] Bug #613: Sequence values fall back to previously

Поиск
Список
Период
Сортировка
Искать

Re: [BUGS] Bug #613: Sequence values fall back to previously

От:
Bruce Momjian <pgman@candle.pha.pa.us>
Дата:

Re: [BUGS] Bug #613: Sequence values fall back to previously

От:
Bruce Momjian <pgman@candle.pha.pa.us>
Дата:

Re: [BUGS] Bug #613: Sequence values fall back to previously chec

От:
Tom Lane <tgl@sss.pgh.pa.us>
Дата:
Attached is a patch against current CVS that fixes both of the known
problems with sequences: failure to flush XLOG after a transaction
that only does "SELECT nextval()", and failure to force a new WAL
record to be written on the first nextval after a checkpoint.
(The latter uses Vadim's idea of looking at the sequence page LSN.)
I haven't tested it really extensively, but it seems to cure the
reported problems.

Some notes:

1. I found what I believe is another bug in the sequence logic:
		fetch = log = fetch - log + SEQ_LOG_VALS;
should be
		fetch = log = fetch + SEQ_LOG_VALS;
I can't see any reason to reduce the number of values prefetched
by the number formerly prefetched.  Also, if the sequence's "cache"
setting is large (more than SEQ_LOG_VALS), the original code could
easily fail to fetch as many values as it was supposed to cache,
let alone additional ones to be prefetched and logged.

2. I renamed XLogCtl->RedoRecPtr to SavedRedoRecPtr, and renamed
the associated routines to SetSavedRedoRecPtr/GetSavedRedoRecPtr,
in hopes of reducing confusion.

3. I believe it'd now be possible to remove SavedRedoRecPtr and
SetSavedRedoRecPtr/GetSavedRedoRecPtr entirely, in favor of letting
the postmaster fetch the updated pointer with GetRedoRecPtr just
like a backend would.  This would be cleaner and less code ... but
someone might object that it introduces a risk of postmaster hangup,
if some backend crashes whilst holding info_lck.  I consider that
risk minuscule given the short intervals in which info_lck is held,
but it can't be denied that the risk is not zero.  Thoughts?

Comments?  Unless I hear objections I will patch this in current
and the 7.2 branch.  (If we agree to remove SavedRedoRecPtr,
though, I don't think we should back-patch that change.)

			regards, tom lane

*** src/backend/access/transam/xact.c.orig	Tue Mar 12 07:56:31 2002
--- src/backend/access/transam/xact.c	Thu Mar 14 20:00:50 2002
***************
*** 546,577 ****
  	xid = GetCurrentTransactionId();
  
  	/*
! 	 * We needn't write anything in xlog or clog if the transaction was
! 	 * read-only, which we check by testing if it made any xlog entries.
  	 */
! 	if (MyLastRecPtr.xrecoff != 0)
  	{
- 		XLogRecData rdata;
- 		xl_xact_commit xlrec;
  		XLogRecPtr	recptr;
  
  		BufmgrCommit();
  
- 		xlrec.xtime = time(NULL);
- 		rdata.buffer = InvalidBuffer;
- 		rdata.data = (char *) (&xlrec);
- 		rdata.len = SizeOfXactCommit;
- 		rdata.next = NULL;
- 
  		START_CRIT_SECTION();
  
! 		/*
! 		 * SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP
! 		 */
! 		recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, &rdata);
  
  		/*
! 		 * Sleep before commit! So we can flush more than one commit
  		 * records per single fsync.  (The idea is some other backend may
  		 * do the XLogFlush while we're sleeping.  This needs work still,
  		 * because on most Unixen, the minimum select() delay is 10msec or
--- 546,593 ----
  	xid = GetCurrentTransactionId();
  
  	/*
! 	 * We only need to log the commit in xlog and clog if the transaction made
! 	 * any transaction-controlled XLOG entries.  (Otherwise, its XID appears
! 	 * nowhere in permanent storage, so no one will ever care if it
! 	 * committed.)  However, we must flush XLOG to disk if we made any XLOG
! 	 * entries, whether in or out of transaction control.  For example, if we
! 	 * reported a nextval() result to the client, this ensures that any XLOG
! 	 * record generated by nextval will hit the disk before we report the
! 	 * transaction committed.
  	 */
! 	if (MyXactMadeXLogEntry)
  	{
  		XLogRecPtr	recptr;
  
  		BufmgrCommit();
  
  		START_CRIT_SECTION();
  
! 		if (MyLastRecPtr.xrecoff != 0)
! 		{
! 			/* Need to emit a commit record */
! 			XLogRecData rdata;
! 			xl_xact_commit xlrec;
! 
! 			xlrec.xtime = time(NULL);
! 			rdata.buffer = InvalidBuffer;
! 			rdata.data = (char *) (&xlrec);
! 			rdata.len = SizeOfXactCommit;
! 			rdata.next = NULL;
! 
! 			/*
! 			 * XXX SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP
! 			 */
! 			recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, &rdata);
! 		}
! 		else
! 		{
! 			/* Just flush through last record written by me */
! 			recptr = ProcLastRecEnd;
! 		}
  
  		/*
! 		 * Sleep before flush! So we can flush more than one commit
  		 * records per single fsync.  (The idea is some other backend may
  		 * do the XLogFlush while we're sleeping.  This needs work still,
  		 * because on most Unixen, the minimum select() delay is 10msec or
***************
*** 593,607 ****
  
  		XLogFlush(recptr);
  
! 		/* Break the chain of back-links in the XLOG records I output */
! 		MyLastRecPtr.xrecoff = 0;
! 
! 		/* Mark the transaction committed in clog */
! 		TransactionIdCommit(xid);
  
  		END_CRIT_SECTION();
  	}
  
  	/* Show myself as out of the transaction in PROC array */
  	MyProc->logRec.xrecoff = 0;
  
--- 609,625 ----
  
  		XLogFlush(recptr);
  
! 		/* Mark the transaction committed in clog, if needed */
! 		if (MyLastRecPtr.xrecoff != 0)
! 			TransactionIdCommit(xid);
  
  		END_CRIT_SECTION();
  	}
  
+ 	/* Break the chain of back-links in the XLOG records I output */
+ 	MyLastRecPtr.xrecoff = 0;
+ 	MyXactMadeXLogEntry = false;
+ 
  	/* Show myself as out of the transaction in PROC array */
  	MyProc->logRec.xrecoff = 0;
  
***************
*** 689,696 ****
  	TransactionId xid = GetCurrentTransactionId();
  
  	/*
! 	 * We needn't write anything in xlog or clog if the transaction was
! 	 * read-only, which we check by testing if it made any xlog entries.
  	 *
  	 * Extra check here is to catch case that we aborted partway through
  	 * RecordTransactionCommit ...
--- 707,717 ----
  	TransactionId xid = GetCurrentTransactionId();
  
  	/*
! 	 * We only need to log the abort in xlog and clog if the transaction made
! 	 * any transaction-controlled XLOG entries.  (Otherwise, its XID appears
! 	 * nowhere in permanent storage, so no one will ever care if it
! 	 * committed.)  We do not flush XLOG to disk in any case, since the
! 	 * default assumption after a crash would be that we aborted, anyway.
  	 *
  	 * Extra check here is to catch case that we aborted partway through
  	 * RecordTransactionCommit ...
***************
*** 714,724 ****
  		 */
  		recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, &rdata);
  
- 		/*
- 		 * There's no need for XLogFlush here, since the default
- 		 * assumption would be that we aborted, anyway.
- 		 */
- 
  		/* Mark the transaction aborted in clog */
  		TransactionIdAbort(xid);
  
--- 735,740 ----
***************
*** 727,732 ****
--- 743,750 ----
  
  	/* Break the chain of back-links in the XLOG records I output */
  	MyLastRecPtr.xrecoff = 0;
+ 	MyXactMadeXLogEntry = false;
+ 
  	/* Show myself as out of the transaction in PROC array */
  	MyProc->logRec.xrecoff = 0;
  
*** src/backend/access/transam/xlog.c.orig	Tue Mar 12 07:56:31 2002
--- src/backend/access/transam/xlog.c	Thu Mar 14 20:29:51 2002
***************
*** 131,157 ****
  
  /*
   * MyLastRecPtr points to the start of the last XLOG record inserted by the
!  * current transaction.  If MyLastRecPtr.xrecoff == 0, then we are not in
!  * a transaction or the transaction has not yet made any loggable changes.
   *
   * Note that XLOG records inserted outside transaction control are not
!  * reflected into MyLastRecPtr.
   */
  XLogRecPtr	MyLastRecPtr = {0, 0};
  
  /*
   * ProcLastRecPtr points to the start of the last XLOG record inserted by the
   * current backend.  It is updated for all inserts, transaction-controlled
!  * or not.
   */
  static XLogRecPtr ProcLastRecPtr = {0, 0};
  
  /*
   * RedoRecPtr is this backend's local copy of the REDO record pointer
   * (which is almost but not quite the same as a pointer to the most recent
   * CHECKPOINT record).	We update this from the shared-memory copy,
   * XLogCtl->Insert.RedoRecPtr, whenever we can safely do so (ie, when we
!  * hold the Insert lock).  See XLogInsert for details.
   */
  static XLogRecPtr RedoRecPtr;
  
--- 131,166 ----
  
  /*
   * MyLastRecPtr points to the start of the last XLOG record inserted by the
!  * current transaction.  If MyLastRecPtr.xrecoff == 0, then the current
!  * xact hasn't yet inserted any transaction-controlled XLOG records.
   *
   * Note that XLOG records inserted outside transaction control are not
!  * reflected into MyLastRecPtr.  They do, however, cause MyXactMadeXLogEntry
!  * to be set true.  The latter can be used to test whether the current xact
!  * made any loggable changes (including out-of-xact changes, such as
!  * sequence updates).
   */
  XLogRecPtr	MyLastRecPtr = {0, 0};
  
+ bool		MyXactMadeXLogEntry = false;
+ 
  /*
   * ProcLastRecPtr points to the start of the last XLOG record inserted by the
   * current backend.  It is updated for all inserts, transaction-controlled
!  * or not.  ProcLastRecEnd is similar but points to end+1 of last record.
   */
  static XLogRecPtr ProcLastRecPtr = {0, 0};
  
+ XLogRecPtr	ProcLastRecEnd = {0, 0};
+ 
  /*
   * RedoRecPtr is this backend's local copy of the REDO record pointer
   * (which is almost but not quite the same as a pointer to the most recent
   * CHECKPOINT record).	We update this from the shared-memory copy,
   * XLogCtl->Insert.RedoRecPtr, whenever we can safely do so (ie, when we
!  * hold the Insert lock).  See XLogInsert for details.  We are also allowed
!  * to update from XLogCtl->Insert.RedoRecPtr if we hold the info_lck;
!  * see GetRedoRecPtr.
   */
  static XLogRecPtr RedoRecPtr;
  
***************
*** 272,278 ****
  	StartUpID	ThisStartUpID;
  
  	/* This value is not protected by *any* lock... */
! 	XLogRecPtr	RedoRecPtr;		/* see SetRedoRecPtr/GetRedoRecPtr */
  
  	slock_t		info_lck;		/* locks shared LogwrtRqst/LogwrtResult */
  } XLogCtlData;
--- 281,288 ----
  	StartUpID	ThisStartUpID;
  
  	/* This value is not protected by *any* lock... */
! 	/* see SetSavedRedoRecPtr/GetSavedRedoRecPtr */
! 	XLogRecPtr	SavedRedoRecPtr;
  
  	slock_t		info_lck;		/* locks shared LogwrtRqst/LogwrtResult */
  } XLogCtlData;
***************
*** 777,782 ****
--- 787,793 ----
  		MyLastRecPtr = RecPtr;
  	ProcLastRecPtr = RecPtr;
  	Insert->PrevRecord = RecPtr;
+ 	MyXactMadeXLogEntry = true;
  
  	Insert->currpos += SizeOfXLogRecord;
  	freespace -= SizeOfXLogRecord;
***************
*** 855,860 ****
--- 866,873 ----
  		SpinLockRelease_NoHoldoff(&xlogctl->info_lck);
  	}
  
+ 	ProcLastRecEnd = RecPtr;
+ 
  	END_CRIT_SECTION();
  
  	return (RecPtr);
***************
*** 2538,2544 ****
  
  	ThisStartUpID = checkPoint.ThisStartUpID;
  	RedoRecPtr = XLogCtl->Insert.RedoRecPtr =
! 		XLogCtl->RedoRecPtr = checkPoint.redo;
  
  	if (XLByteLT(RecPtr, checkPoint.redo))
  		elog(PANIC, "invalid redo in checkpoint record");
--- 2551,2557 ----
  
  	ThisStartUpID = checkPoint.ThisStartUpID;
  	RedoRecPtr = XLogCtl->Insert.RedoRecPtr =
! 		XLogCtl->SavedRedoRecPtr = checkPoint.redo;
  
  	if (XLByteLT(RecPtr, checkPoint.redo))
  		elog(PANIC, "invalid redo in checkpoint record");
***************
*** 2824,2855 ****
  SetThisStartUpID(void)
  {
  	ThisStartUpID = XLogCtl->ThisStartUpID;
! 	RedoRecPtr = XLogCtl->RedoRecPtr;
  }
  
  /*
   * CheckPoint process called by postmaster saves copy of new RedoRecPtr
!  * in shmem (using SetRedoRecPtr).	When checkpointer completes, postmaster
!  * calls GetRedoRecPtr to update its own copy of RedoRecPtr, so that
!  * subsequently-spawned backends will start out with a reasonably up-to-date
!  * local RedoRecPtr.  Since these operations are not protected by any lock
!  * and copying an XLogRecPtr isn't atomic, it's unsafe to use either of these
!  * routines at other times!
!  *
!  * Note: once spawned, a backend must update its local RedoRecPtr from
!  * XLogCtl->Insert.RedoRecPtr while holding the insert lock.  This is
!  * done in XLogInsert().
   */
  void
! SetRedoRecPtr(void)
  {
! 	XLogCtl->RedoRecPtr = RedoRecPtr;
  }
  
  void
  GetRedoRecPtr(void)
  {
! 	RedoRecPtr = XLogCtl->RedoRecPtr;
  }
  
  /*
--- 2837,2883 ----
  SetThisStartUpID(void)
  {
  	ThisStartUpID = XLogCtl->ThisStartUpID;
! 	RedoRecPtr = XLogCtl->SavedRedoRecPtr;
  }
  
  /*
   * CheckPoint process called by postmaster saves copy of new RedoRecPtr
!  * in shmem (using SetSavedRedoRecPtr).  When checkpointer completes,
!  * postmaster calls GetSavedRedoRecPtr to update its own copy of RedoRecPtr,
!  * so that subsequently-spawned backends will start out with a reasonably
!  * up-to-date local RedoRecPtr.  Since these operations are not protected by
!  * any lock and copying an XLogRecPtr isn't atomic, it's unsafe to use either
!  * of these routines at other times!
   */
  void
! SetSavedRedoRecPtr(void)
  {
! 	XLogCtl->SavedRedoRecPtr = RedoRecPtr;
  }
  
  void
+ GetSavedRedoRecPtr(void)
+ {
+ 	RedoRecPtr = XLogCtl->SavedRedoRecPtr;
+ }
+ 
+ /*
+  * Once spawned, a backend may update its local RedoRecPtr from
+  * XLogCtl->Insert.RedoRecPtr; it must hold the insert lock or info_lck
+  * to do so.  This is done in XLogInsert() or GetRedoRecPtr().
+  */
+ XLogRecPtr
  GetRedoRecPtr(void)
  {
! 	/* use volatile pointer to prevent code rearrangement */
! 	volatile XLogCtlData *xlogctl = XLogCtl;
! 
! 	SpinLockAcquire_NoHoldoff(&xlogctl->info_lck);
! 	Assert(XLByteLE(RedoRecPtr, xlogctl->Insert.RedoRecPtr));
! 	RedoRecPtr = xlogctl->Insert.RedoRecPtr;
! 	SpinLockRelease_NoHoldoff(&xlogctl->info_lck);
! 
! 	return RedoRecPtr;
  }
  
  /*
***************
*** 2862,2867 ****
--- 2890,2896 ----
  
  	/* suppress in-transaction check in CreateCheckPoint */
  	MyLastRecPtr.xrecoff = 0;
+ 	MyXactMadeXLogEntry = false;
  
  	CritSectionCount++;
  	CreateDummyCaches();
***************
*** 2886,2892 ****
  	uint32		_logId;
  	uint32		_logSeg;
  
! 	if (MyLastRecPtr.xrecoff != 0)
  		elog(ERROR, "CreateCheckPoint: cannot be called inside transaction block");
  
  	/*
--- 2915,2921 ----
  	uint32		_logId;
  	uint32		_logSeg;
  
! 	if (MyXactMadeXLogEntry)
  		elog(ERROR, "CreateCheckPoint: cannot be called inside transaction block");
  
  	/*
***************
*** 2972,2980 ****
  
  	/*
  	 * Here we update the shared RedoRecPtr for future XLogInsert calls;
! 	 * this must be done while holding the insert lock.
  	 */
! 	RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
  
  	/*
  	 * Get UNDO record ptr - this is oldest of PROC->logRec values. We do
--- 3001,3016 ----
  
  	/*
  	 * Here we update the shared RedoRecPtr for future XLogInsert calls;
! 	 * this must be done while holding the insert lock AND the info_lck.
  	 */
! 	{
! 		/* use volatile pointer to prevent code rearrangement */
! 		volatile XLogCtlData *xlogctl = XLogCtl;
! 
! 		SpinLockAcquire_NoHoldoff(&xlogctl->info_lck);
! 		RedoRecPtr = xlogctl->Insert.RedoRecPtr = checkPoint.redo;
! 		SpinLockRelease_NoHoldoff(&xlogctl->info_lck);
! 	}
  
  	/*
  	 * Get UNDO record ptr - this is oldest of PROC->logRec values. We do
*** src/backend/bootstrap/bootstrap.c.orig	Tue Mar 12 07:56:32 2002
--- src/backend/bootstrap/bootstrap.c	Thu Mar 14 20:19:51 2002
***************
*** 386,392 ****
  				InitDummyProcess();		/* needed to get LWLocks */
  			CreateDummyCaches();
  			CreateCheckPoint(false);
! 			SetRedoRecPtr();
  			proc_exit(0);		/* done */
  
  		case BS_XLOG_STARTUP:
--- 386,392 ----
  				InitDummyProcess();		/* needed to get LWLocks */
  			CreateDummyCaches();
  			CreateCheckPoint(false);
! 			SetSavedRedoRecPtr(); /* pass redo ptr back to postmaster */
  			proc_exit(0);		/* done */
  
  		case BS_XLOG_STARTUP:
*** src/backend/commands/sequence.c.orig	Tue Mar 12 07:56:35 2002
--- src/backend/commands/sequence.c	Thu Mar 14 22:06:22 2002
***************
*** 286,291 ****
--- 286,292 ----
  	char	   *seqname = get_seq_name(seqin);
  	SeqTable	elm;
  	Buffer		buf;
+ 	Page		page;
  	Form_pg_sequence seq;
  	int64		incby,
  				maxv,
***************
*** 316,321 ****
--- 317,323 ----
  
  	seq = read_info("nextval", elm, &buf);		/* lock page' buffer and
  												 * read tuple */
+ 	page = BufferGetPage(buf);
  
  	last = next = result = seq->last_value;
  	incby = seq->increment_by;
***************
*** 331,341 ****
  		log--;
  	}
  
  	if (log < fetch)
  	{
! 		fetch = log = fetch - log + SEQ_LOG_VALS;
  		logit = true;
  	}
  
  	while (fetch)				/* try to fetch cache [+ log ] numbers */
  	{
--- 333,365 ----
  		log--;
  	}
  
+ 	/*
+ 	 * Decide whether we should emit a WAL log record.  If so, force up
+ 	 * the fetch count to grab SEQ_LOG_VALS more values than we actually
+ 	 * need to cache.  (These will then be usable without logging.)
+ 	 *
+ 	 * If this is the first nextval after a checkpoint, we must force
+ 	 * a new WAL record to be written anyway, else replay starting from the
+ 	 * checkpoint would fail to advance the sequence past the logged
+ 	 * values.  In this case we may as well fetch extra values.
+ 	 */
  	if (log < fetch)
  	{
! 		/* forced log to satisfy local demand for values */
! 		fetch = log = fetch + SEQ_LOG_VALS;
  		logit = true;
  	}
+ 	else
+ 	{
+ 		XLogRecPtr	redoptr = GetRedoRecPtr();
+ 
+ 		if (XLByteLE(PageGetLSN(page), redoptr))
+ 		{
+ 			/* last update of seq was before checkpoint */
+ 			fetch = log = fetch + SEQ_LOG_VALS;
+ 			logit = true;
+ 		}
+ 	}
  
  	while (fetch)				/* try to fetch cache [+ log ] numbers */
  	{
***************
*** 386,391 ****
--- 410,418 ----
  		}
  	}
  
+ 	log -= fetch;				/* adjust for any unfetched numbers */
+ 	Assert(log >= 0);
+ 
  	/* save info in local cache */
  	elm->last = result;			/* last returned number */
  	elm->cached = last;			/* last fetched number */
***************
*** 396,402 ****
  		xl_seq_rec	xlrec;
  		XLogRecPtr	recptr;
  		XLogRecData rdata[2];
- 		Page		page = BufferGetPage(buf);
  
  		xlrec.node = elm->rel->rd_node;
  		rdata[0].buffer = InvalidBuffer;
--- 423,428 ----
***************
*** 417,431 ****
  
  		PageSetLSN(page, recptr);
  		PageSetSUI(page, ThisStartUpID);
- 
- 		if (fetch)				/* not all numbers were fetched */
- 			log -= fetch;
  	}
  
  	/* update on-disk data */
  	seq->last_value = last;		/* last fetched number */
  	seq->is_called = true;
- 	Assert(log >= 0);
  	seq->log_cnt = log;			/* how much is logged */
  	END_CRIT_SECTION();
  
--- 443,453 ----
*** src/backend/postmaster/postmaster.c.orig	Tue Mar 12 07:57:01 2002
--- src/backend/postmaster/postmaster.c	Thu Mar 14 20:20:01 2002
***************
*** 1683,1689 ****
  			{
  				checkpointed = time(NULL);
  				/* Update RedoRecPtr for future child backends */
! 				GetRedoRecPtr();
  			}
  		}
  		else
--- 1683,1689 ----
  			{
  				checkpointed = time(NULL);
  				/* Update RedoRecPtr for future child backends */
! 				GetSavedRedoRecPtr();
  			}
  		}
  		else
*** src/include/access/xlog.h.orig	Fri Nov 16 12:17:19 2001
--- src/include/access/xlog.h	Thu Mar 14 20:20:51 2002
***************
*** 178,183 ****
--- 178,185 ----
  extern StartUpID ThisStartUpID; /* current SUI */
  extern bool InRecovery;
  extern XLogRecPtr MyLastRecPtr;
+ extern bool MyXactMadeXLogEntry;
+ extern XLogRecPtr ProcLastRecEnd;
  
  /* these variables are GUC parameters related to XLOG */
  extern int	CheckPointSegments;
***************
*** 205,212 ****
  extern void CreateCheckPoint(bool shutdown);
  extern void SetThisStartUpID(void);
  extern void XLogPutNextOid(Oid nextOid);
! extern void SetRedoRecPtr(void);
! extern void GetRedoRecPtr(void);
  
  /* in storage/ipc/sinval.c, but don't want to declare in sinval.h because
   * we'd have to include xlog.h into that ...
--- 207,215 ----
  extern void CreateCheckPoint(bool shutdown);
  extern void SetThisStartUpID(void);
  extern void XLogPutNextOid(Oid nextOid);
! extern void SetSavedRedoRecPtr(void);
! extern void GetSavedRedoRecPtr(void);
! extern XLogRecPtr GetRedoRecPtr(void);
  
  /* in storage/ipc/sinval.c, but don't want to declare in sinval.h because
   * we'd have to include xlog.h into that ...
FAQ