Re: Avoiding unnecessary reads in recovery

Поиск
Список
Период
Сортировка
От Heikki Linnakangas
Тема Re: Avoiding unnecessary reads in recovery
Дата
Msg-id 462F6D9D.8030503@enterprisedb.com
обсуждение исходный текст
Ответ на Re: Avoiding unnecessary reads in recovery  (Gregory Stark <stark@enterprisedb.com>)
Список pgsql-hackers
Gregory Stark wrote:
> So in short I think with your patch this piece of code no longer has a role.
> Either your patch kicks in and we never even look at the damaged page at all,
> or we should be treating it as corrupt data and just check zero_damaged_pages
> alone and not do anything special in recovery.

Good point. Adjusted patch attached. I added some comments as well.

--
   Heikki Linnakangas
   EnterpriseDB   http://www.enterprisedb.com
Index: src/backend/access/transam/xlogutils.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/transam/xlogutils.c,v
retrieving revision 1.49
diff -c -r1.49 xlogutils.c
*** src/backend/access/transam/xlogutils.c    5 Jan 2007 22:19:24 -0000    1.49
--- src/backend/access/transam/xlogutils.c    25 Apr 2007 14:27:05 -0000
***************
*** 226,232 ****
      if (blkno < lastblock)
      {
          /* page exists in file */
!         buffer = ReadBuffer(reln, blkno);
      }
      else
      {
--- 226,235 ----
      if (blkno < lastblock)
      {
          /* page exists in file */
!         if (init)
!             buffer = ZapBuffer(reln, blkno);
!         else
!             buffer = ReadBuffer(reln, blkno);
      }
      else
      {
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.216
diff -c -r1.216 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c    30 Mar 2007 18:34:55 -0000    1.216
--- src/backend/storage/buffer/bufmgr.c    25 Apr 2007 14:36:15 -0000
***************
*** 17,22 ****
--- 17,26 ----
   *        and pin it so that no one can destroy it while this process
   *        is using it.
   *
+  * ZapBuffer() -- like ReadBuffer, but destroys the contents of the
+  *        page. Used in recovery when the page is completely overwritten
+  *        from WAL.
+  *
   * ReleaseBuffer() -- unpin a buffer
   *
   * MarkBufferDirty() -- mark a pinned buffer's contents as "dirty".
***************
*** 97,102 ****
--- 101,107 ----
  static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
                    int set_flag_bits);
  static void buffer_write_error_callback(void *arg);
+ static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum, bool alloc_only);
  static volatile BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
              bool *foundPtr);
  static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
***************
*** 121,126 ****
--- 126,155 ----
  Buffer
  ReadBuffer(Relation reln, BlockNumber blockNum)
  {
+     return ReadBuffer_common(reln, blockNum, false);
+ }
+
+ /*
+  * ZapBuffer -- like ReadBuffer, but doesn't read the contents of the page
+  *        from disk. The caller is expected to completely rewrite the page,
+  *        regardless of the current contents. This should only be used in
+  *        recovery where there's no concurrent readers that might see the
+  *        contents of the page before the caller rewrites it.
+  */
+ Buffer
+ ZapBuffer(Relation reln, BlockNumber blockNum)
+ {
+     Assert(InRecovery);
+
+     return ReadBuffer_common(reln, blockNum, true);
+ }
+
+ /*
+  * ReadBuffer_common -- common logic of ReadBuffer and ZapBuffer.
+  */
+ static Buffer
+ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool alloc_only)
+ {
      volatile BufferDesc *bufHdr;
      Block        bufBlock;
      bool        found;
***************
*** 253,269 ****
      }
      else
      {
          smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
          /* check for garbage data */
          if (!PageHeaderIsValid((PageHeader) bufBlock))
          {
              /*
!              * During WAL recovery, the first access to any data page should
!              * overwrite the whole page from the WAL; so a clobbered page
!              * header is not reason to fail.  Hence, when InRecovery we may
!              * always act as though zero_damaged_pages is ON.
               */
!             if (zero_damaged_pages || InRecovery)
              {
                  ereport(WARNING,
                          (errcode(ERRCODE_DATA_CORRUPTED),
--- 282,304 ----
      }
      else
      {
+         /*
+          * Read in the page, unless the caller intends to overwrite it
+          * and just wants us to allocate a buffer.
+          */
+         if (!alloc_only)
+         {
          smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);
          /* check for garbage data */
          if (!PageHeaderIsValid((PageHeader) bufBlock))
          {
              /*
!              * We used to ignore corrupt pages in WAL recovery, but
!              * that was only ever safe if full_page_writes was enabled.
!              * Now the caller sets alloc_only to false if he intends to
!              * overwrite the whole page, which is already checked above.
               */
!             if (zero_damaged_pages)
              {
                  ereport(WARNING,
                          (errcode(ERRCODE_DATA_CORRUPTED),
***************
*** 277,282 ****
--- 312,318 ----
                   errmsg("invalid page header in block %u of relation \"%s\"",
                          blockNum, RelationGetRelationName(reln))));
          }
+         }
      }

      if (isLocalBuf)
Index: src/include/storage/bufmgr.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/storage/bufmgr.h,v
retrieving revision 1.102
diff -c -r1.102 bufmgr.h
*** src/include/storage/bufmgr.h    5 Jan 2007 22:19:57 -0000    1.102
--- src/include/storage/bufmgr.h    25 Apr 2007 14:14:28 -0000
***************
*** 111,116 ****
--- 111,117 ----
   * prototypes for functions in bufmgr.c
   */
  extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
+ extern Buffer ZapBuffer(Relation reln, BlockNumber blockNum);
  extern void ReleaseBuffer(Buffer buffer);
  extern void UnlockReleaseBuffer(Buffer buffer);
  extern void MarkBufferDirty(Buffer buffer);

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Andrew Dunstan
Дата:
Сообщение: Re: ECPG failure on BF member Vaquita (Windows Vista)
Следующее
От: Dave Page
Дата:
Сообщение: Re: ECPG failure on BF member Vaquita (Windows Vista)