Re: [HACKERS] Poor memory context performance in large hash joins

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: [HACKERS] Poor memory context performance in large hash joins
Дата
Msg-id 32354.1487977458@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: [HACKERS] Poor memory context performance in large hash joins  (Jeff Janes <jeff.janes@gmail.com>)
Ответы Re: [HACKERS] Poor memory context performance in large hash joins  (Andres Freund <andres@anarazel.de>)
Список pgsql-hackers
Jeff Janes <jeff.janes@gmail.com> writes:
> On Fri, Feb 24, 2017 at 10:59 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> Uh, what?  In a doubly-linked list, you can remove an element in O(1)
>> time, you don't need any searching.

> Currently it is walking the chain to identify which block holds the chunk
> in the first place, not just to get the pointer to the previous block.  But
> I see that that could be fixed by pointer arithmetic once there is a reason
> to fix it. Which there isn't a reason to as long as you need to walk the
> chain to get the prev pointer anyway.  Like this:?
> targetblock = (AllocBlock) (((char*)chunk) - ALLOC_BLOCKHDRSZ);

Right.  We're just turning around the existing address calculation.  It'd
be a good idea to provide some cross-check that we found a sane-looking
block header, but that's not that hard --- we know what ought to be in
aset, freeptr, and endptr for a single-chunk block's header, and that
seems like enough of a crosscheck to me.

Concretely, something like the attached.  This passes regression tests
but I've not pushed on it any harder than that.

One argument against this is that it adds some nonzero amount of overhead
to block management; but considering that we are calling malloc or realloc
or free alongside any one of these manipulations, that overhead should be
pretty negligible.  I'm a bit more worried about increasing the alloc
block header size by 8 bytes, but that would only really matter with a
very small allocChunkLimit.

            regards, tom lane

diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index 4dfc3ec..c250bae 100644
*** a/src/backend/utils/mmgr/aset.c
--- b/src/backend/utils/mmgr/aset.c
*************** typedef AllocSetContext *AllocSet;
*** 201,207 ****
  typedef struct AllocBlockData
  {
      AllocSet    aset;            /* aset that owns this block */
!     AllocBlock    next;            /* next block in aset's blocks list */
      char       *freeptr;        /* start of free space in this block */
      char       *endptr;            /* end of space in this block */
  }    AllocBlockData;
--- 201,208 ----
  typedef struct AllocBlockData
  {
      AllocSet    aset;            /* aset that owns this block */
!     AllocBlock    prev;            /* prev block in aset's blocks list, if any */
!     AllocBlock    next;            /* next block in aset's blocks list, if any */
      char       *freeptr;        /* start of free space in this block */
      char       *endptr;            /* end of space in this block */
  }    AllocBlockData;
*************** AllocSetContextCreate(MemoryContext pare
*** 522,528 ****
--- 523,532 ----
          block->aset = set;
          block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ;
          block->endptr = ((char *) block) + blksize;
+         block->prev = NULL;
          block->next = set->blocks;
+         if (block->next)
+             block->next->prev = block;
          set->blocks = block;
          /* Mark block as not to be released at reset time */
          set->keeper = block;
*************** AllocSetReset(MemoryContext context)
*** 604,609 ****
--- 608,614 ----
              VALGRIND_MAKE_MEM_NOACCESS(datastart, block->freeptr - datastart);
  #endif
              block->freeptr = datastart;
+             block->prev = NULL;
              block->next = NULL;
          }
          else
*************** AllocSetAlloc(MemoryContext context, Siz
*** 710,725 ****
  #endif

          /*
!          * Stick the new block underneath the active allocation block, so that
!          * we don't lose the use of the space remaining therein.
           */
          if (set->blocks != NULL)
          {
              block->next = set->blocks->next;
              set->blocks->next = block;
          }
          else
          {
              block->next = NULL;
              set->blocks = block;
          }
--- 715,734 ----
  #endif

          /*
!          * Stick the new block underneath the active allocation block, if any,
!          * so that we don't lose the use of the space remaining therein.
           */
          if (set->blocks != NULL)
          {
+             block->prev = set->blocks;
              block->next = set->blocks->next;
+             if (block->next)
+                 block->next->prev = block;
              set->blocks->next = block;
          }
          else
          {
+             block->prev = NULL;
              block->next = NULL;
              set->blocks = block;
          }
*************** AllocSetAlloc(MemoryContext context, Siz
*** 900,906 ****
--- 909,918 ----
          VALGRIND_MAKE_MEM_NOACCESS(block->freeptr,
                                     blksize - ALLOC_BLOCKHDRSZ);

+         block->prev = NULL;
          block->next = set->blocks;
+         if (block->next)
+             block->next->prev = block;
          set->blocks = block;
      }

*************** AllocSetFree(MemoryContext context, void
*** 960,988 ****
      {
          /*
           * Big chunks are certain to have been allocated as single-chunk
!          * blocks.  Find the containing block and return it to malloc().
           */
!         AllocBlock    block = set->blocks;
!         AllocBlock    prevblock = NULL;

!         while (block != NULL)
!         {
!             if (chunk == (AllocChunk) (((char *) block) + ALLOC_BLOCKHDRSZ))
!                 break;
!             prevblock = block;
!             block = block->next;
!         }
!         if (block == NULL)
              elog(ERROR, "could not find block containing chunk %p", chunk);
-         /* let's just make sure chunk is the only one in the block */
-         Assert(block->freeptr == ((char *) block) +
-                (chunk->size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ));

          /* OK, remove block from aset's list and free it */
!         if (prevblock == NULL)
              set->blocks = block->next;
          else
!             prevblock->next = block->next;
  #ifdef CLOBBER_FREED_MEMORY
          wipe_mem(block, block->freeptr - ((char *) block));
  #endif
--- 972,999 ----
      {
          /*
           * Big chunks are certain to have been allocated as single-chunk
!          * blocks.  Just unlink that block and return it to malloc().
           */
!         AllocBlock    block = (AllocBlock) (((char *) chunk) - ALLOC_BLOCKHDRSZ);

!         /*
!          * Try to verify that we have a sane block pointer: it should
!          * reference the correct aset, and freeptr and endptr should point
!          * just past the chunk.
!          */
!         if (block->aset != set ||
!             block->freeptr != block->endptr ||
!             block->freeptr != ((char *) block) +
!             (chunk->size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ))
              elog(ERROR, "could not find block containing chunk %p", chunk);

          /* OK, remove block from aset's list and free it */
!         if (block->prev == NULL)
              set->blocks = block->next;
          else
!             block->prev->next = block->next;
!         if (block->next != NULL)
!             block->next->prev = block->prev;
  #ifdef CLOBBER_FREED_MEMORY
          wipe_mem(block, block->freeptr - ((char *) block));
  #endif
*************** AllocSetRealloc(MemoryContext context, v
*** 1088,1114 ****
      if (oldsize > set->allocChunkLimit)
      {
          /*
!          * The chunk must have been allocated as a single-chunk block.  Find
!          * the containing block and use realloc() to make it bigger with
!          * minimum space wastage.
           */
!         AllocBlock    block = set->blocks;
!         AllocBlock    prevblock = NULL;
          Size        chksize;
          Size        blksize;

!         while (block != NULL)
!         {
!             if (chunk == (AllocChunk) (((char *) block) + ALLOC_BLOCKHDRSZ))
!                 break;
!             prevblock = block;
!             block = block->next;
!         }
!         if (block == NULL)
              elog(ERROR, "could not find block containing chunk %p", chunk);
-         /* let's just make sure chunk is the only one in the block */
-         Assert(block->freeptr == ((char *) block) +
-                (chunk->size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ));

          /* Do the realloc */
          chksize = MAXALIGN(size);
--- 1099,1122 ----
      if (oldsize > set->allocChunkLimit)
      {
          /*
!          * The chunk must have been allocated as a single-chunk block.  Use
!          * realloc() to make the containing block bigger with minimum space
!          * wastage.
           */
!         AllocBlock    block = (AllocBlock) (((char *) chunk) - ALLOC_BLOCKHDRSZ);
          Size        chksize;
          Size        blksize;

!         /*
!          * Try to verify that we have a sane block pointer: it should
!          * reference the correct aset, and freeptr and endptr should point
!          * just past the chunk.
!          */
!         if (block->aset != set ||
!             block->freeptr != block->endptr ||
!             block->freeptr != ((char *) block) +
!             (chunk->size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ))
              elog(ERROR, "could not find block containing chunk %p", chunk);

          /* Do the realloc */
          chksize = MAXALIGN(size);
*************** AllocSetRealloc(MemoryContext context, v
*** 1121,1130 ****
          /* Update pointers since block has likely been moved */
          chunk = (AllocChunk) (((char *) block) + ALLOC_BLOCKHDRSZ);
          pointer = AllocChunkGetPointer(chunk);
!         if (prevblock == NULL)
              set->blocks = block;
          else
!             prevblock->next = block;
          chunk->size = chksize;

  #ifdef MEMORY_CONTEXT_CHECKING
--- 1129,1140 ----
          /* Update pointers since block has likely been moved */
          chunk = (AllocChunk) (((char *) block) + ALLOC_BLOCKHDRSZ);
          pointer = AllocChunkGetPointer(chunk);
!         if (block->prev == NULL)
              set->blocks = block;
          else
!             block->prev->next = block;
!         if (block->next != NULL)
!             block->next->prev = block;
          chunk->size = chksize;

  #ifdef MEMORY_CONTEXT_CHECKING
*************** AllocSetCheck(MemoryContext context)
*** 1315,1323 ****
  {
      AllocSet    set = (AllocSet) context;
      char       *name = set->header.name;
      AllocBlock    block;

!     for (block = set->blocks; block != NULL; block = block->next)
      {
          char       *bpoz = ((char *) block) + ALLOC_BLOCKHDRSZ;
          long        blk_used = block->freeptr - bpoz;
--- 1325,1336 ----
  {
      AllocSet    set = (AllocSet) context;
      char       *name = set->header.name;
+     AllocBlock    prevblock;
      AllocBlock    block;

!     for (prevblock = NULL, block = set->blocks;
!          block != NULL;
!          prevblock = block, block = block->next)
      {
          char       *bpoz = ((char *) block) + ALLOC_BLOCKHDRSZ;
          long        blk_used = block->freeptr - bpoz;
*************** AllocSetCheck(MemoryContext context)
*** 1335,1340 ****
--- 1348,1363 ----
          }

          /*
+          * Check block header fields
+          */
+         if (block->aset != set ||
+             block->prev != prevblock ||
+             block->freeptr < bpoz ||
+             block->freeptr > block->endptr)
+             elog(WARNING, "problem in alloc set %s: corrupt header in block %p",
+                  name, block);
+
+         /*
           * Chunk walker
           */
          while (bpoz < block->freeptr)

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Andrew Dunstan
Дата:
Сообщение: Re: [HACKERS] btree_gin and btree_gist for enums
Следующее
От: Erik Rijkers
Дата:
Сообщение: Re: [HACKERS] Logical replication existing data copy