Обсуждение: BUG #19438: segfault with temp_file_limit inside cursor

Поиск
Список
Период
Сортировка

BUG #19438: segfault with temp_file_limit inside cursor

От
PG Bug reporting form
Дата:
The following bug has been logged on the website:

Bug reference:      19438
Logged by:          Dmitriy Kuzmin
Email address:      kuzmin.db4@gmail.com
PostgreSQL version: 14.22
Operating system:   Rocky Linux 8.10 (Green Obsidian)
Description:

Greetings

I experimented with setting temp_file_limit within a cursor and discovered a
segmentation fault under certain circumstances.
The issue exist in the current minors of 14 and 15 (14.22 and 15.17), but I
was unable to reproduce it in version 16 or higher.

To reproduce, simply run the following code.

begin;
declare cur1 cursor for select c, c c2  from generate_series(0, 1000000)
x(c) order by c;
\o /dev/null
fetch all from cur1;
set temp_file_limit TO '1MB';
fetch backward all from cur1;
rollback ;

Logs:
2026-03-25 16:24:58.264 MSK [3321241] ERROR:  temporary file size exceeds
temp_file_limit (1024kB)
2026-03-25 16:24:58.264 MSK [3321241] STATEMENT:  fetch backward all from
cur1;
2026-03-25 16:24:58.338 MSK [3320934] LOG:  server process (PID 3321241) was
terminated by signal 11: Segmentation fault
2026-03-25 16:24:58.338 MSK [3320934] DETAIL:  Failed process was running:
rollback ;
2026-03-25 16:24:58.338 MSK [3320934] LOG:  terminating any other active
server processes

Backtrace on pastebin(postgresql 14.22): https://pastebin.com/2srPbzhN
Backtrace(postgresql 14.22)

[New LWP 3320966]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
Core was generated by `postgres: postgres postgres [local]'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  pfree (pointer=0x2d81538) at mcxt.c:1202
1202            context->methods->free_p(context, pointer);
#0  pfree (pointer=0x2d81538) at mcxt.c:1202
        context = 0x0
#1  0x000000000095399f in tuplestore_end (state=0x2d81318) at
tuplestore.c:462
        i = 0
#2  0x0000000000946920 in PortalDrop (portal=0x2ccf7f8,
isTopCommit=<optimized out>) at portalmem.c:585
        oldcontext = 0x2c6c930
        __func__ = "PortalDrop"
#3  0x0000000000946a50 in CreatePortal (name=name@entry=0xaa970d "",
allowDup=allowDup@entry=true, dupSilent=dupSilent@entry=true) at
portalmem.c:193
        portal = 0x2ccf7f8
        __func__ = "CreatePortal"
#4  0x0000000000801116 in exec_simple_query (query_string=0x2c6ca48
"rollback ;") at postgres.c:1124
        snapshot_set = false
        per_parsetree_context = 0x0
        plantree_list = 0x2c6d7d0
        parsetree = 0x2c6d450
        commandTag = CMDTAG_ROLLBACK
        qc = {commandTag = CMDTAG_UNKNOWN, nprocessed = 1064392740122972416}
        querytree_list = <optimized out>
        portal = <optimized out>
        receiver = <optimized out>
        format = 0
        parsetree_item__state = {l = 0x2c6d480, i = 0}
        dest = DestRemote
        oldcontext = 0x2d22810
        parsetree_list = 0x2c6d480
        parsetree_item = <optimized out>
        save_log_statement_stats = false
        was_logged = false
        use_implicit_block = false
        msec_str =

"Z\000\000\000\000\000\000\000Q\000\000\000\000\000\000\000\370\227\311\002\000\000\000\000\267\360\222\000\000\000\000"
        __func__ = "exec_simple_query"
#5  0x0000000000802a6d in PostgresMain (argc=argc@entry=1,
argv=argv@entry=0x7ffcd5351a90, dbname=<optimized out>, username=<optimized
out>) at postgres.c:4571
        query_string = 0x2c6ca48 "rollback ;"
        firstchar = <optimized out>
        input_message = {data = 0x2c6ca48 "rollback ;", len = 11, maxlen =
1024, cursor = 11}
        local_sigjmp_buf = {{__jmpbuf = {140723885512688,
7291976700258799160, 46766072, 0, 3, 582, -7292484179921744328,
7291977799450480184}, __mask_was_saved = 1, __saved_mask = {__val =
{4194304, 140723885518811, 0, 0, 140723885513328, 140321394626256,
1064392740122972416, 206158430232, 9872339, 206158430240, 140723885513248,
140723885513056, 1064392740122972416, 46728512, 0, 11180896}}}}
        send_ready_for_query = false
        idle_in_transaction_timeout_enabled = false
        idle_session_timeout_enabled = false
        __func__ = "PostgresMain"
#6  0x00000000007816ca in BackendRun (port=<optimized out>, port=<optimized
out>) at postmaster.c:4543
        av = {0x972bd4 "postgres", 0x0}
        ac = 1
        av = <optimized out>
        ac = <optimized out>
#7  BackendStartup (port=<optimized out>) at postmaster.c:4265
        bn = <optimized out>
        pid = <optimized out>
        bn = <optimized out>
        pid = <optimized out>
        __func__ = "BackendStartup"
        __errno_location = <optimized out>
        __errno_location = <optimized out>
        save_errno = <optimized out>
        __errno_location = <optimized out>
        __errno_location = <optimized out>
#8  ServerLoop () at postmaster.c:1752
        port = <optimized out>
        i = <optimized out>
        rmask = {fds_bits = {256, 0 <repeats 15 times>}}
        selres = <optimized out>
        now = <optimized out>
        readmask = {fds_bits = {960, 0 <repeats 15 times>}}
        nSockets = <optimized out>
        last_lockfile_recheck_time = 1774444257
        last_touch_time = 1774444257
        __func__ = "ServerLoop"
#9  0x0000000000782539 in PostmasterMain (argc=argc@entry=3,
argv=argv@entry=0x2c65120) at postmaster.c:1424
        opt = <optimized out>
        status = <optimized out>
        userDoption = <optimized out>
        listen_addr_saved = true
        i = <optimized out>
        output_config_variable = <optimized out>
        __func__ = "PostmasterMain"
#10 0x0000000000500bde in main (argc=3, argv=0x2c65120) at main.c:211
No locals.





Re: BUG #19438: segfault with temp_file_limit inside cursor

От
Tom Lane
Дата:
PG Bug reporting form <noreply@postgresql.org> writes:
> I experimented with setting temp_file_limit within a cursor and discovered a
> segmentation fault under certain circumstances.
> The issue exist in the current minors of 14 and 15 (14.22 and 15.17), but I
> was unable to reproduce it in version 16 or higher.

> To reproduce, simply run the following code.

> begin;
> declare cur1 cursor for select c, c c2  from generate_series(0, 1000000)
> x(c) order by c;
> \o /dev/null
> fetch all from cur1;
> set temp_file_limit TO '1MB';
> fetch backward all from cur1;
> rollback ;

Many thanks for the report!  I confirm your results that this fails
in v14 and v15 but not later branches.  However, I'm quite mystified
why v16 and v17 don't fail.  The attached patch fixes it in v15,
and I think we need to apply it to all branches.

What is happening is that the last FETCH is trying to fill the
holdStore of the Portal holding the FETCH execution, and we soon run
out of work_mem and start dumping the tuples into a temp file.  While
doing that, we run up against the temp_file_limit and fd.c throws an
error.  This leaves the Portal's holdStore in a corrupted state, as a
result of the oversight described and fixed in the attached patch:
we've already deleted some tuples from its in-memory array, but the
tuplestore's state doesn't reflect that.  Then during transaction
abort we must clean up the tuplestore (since it's part of a long-lived
data structure), and tuplestore_end therefore tries to delete all the
tuples in the in-memory array.  Double free.  Kaboom.

At least, that's what happens in v15 and (probably) all prior branches
for a long way back.  v18 and later fortuitously avoid the failure
because they got rid of tuplestore_end's retail tuple deletion loop
in favor of a memory context deletion (cf 590b045c3).  v16 and v17
*should* fail, but somehow they don't, and I don't understand why not.
I bisected it and determined that the failures stop with

c6e0fe1f2a08505544c410f613839664eea9eb21 is the first new commit
commit c6e0fe1f2a08505544c410f613839664eea9eb21
Author: David Rowley <drowley@postgresql.org>
Date:   Mon Aug 29 17:15:00 2022 +1200

    Improve performance of and reduce overheads of memory management

which makes no sense whatsoever.  Somehow, we are not crashing on a
double free with the new memory chunk header infrastructure.  David,
have you any idea why not?

Even though no failure manifests with this example in v16+, we are
clearly at risk by leaving corrupted tuplestore state behind,
so I think the attached has to go into all branches.

            regards, tom lane

diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index f605ece721e..f12e8d23a9c 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -1221,6 +1221,17 @@ dumptuples(Tuplestorestate *state)
         if (i >= state->memtupcount)
             break;
         WRITETUP(state, state->memtuples[i]);
+
+        /*
+         * Increase memtupdeleted to track the fact that we just deleted that
+         * tuple.  Think not to remove this on the grounds that we'll reset
+         * memtupdeleted to zero below.  We might not get there, if some later
+         * WRITETUP fails (e.g. due to overrunning temp_file_limit).  If so,
+         * we'd error out leaving an effectively-corrupt tuplestore, which
+         * would be quite bad if it's a persistent data structure such as a
+         * Portal's holdStore.
+         */
+        state->memtupdeleted++;
     }
     state->memtupdeleted = 0;
     state->memtupcount = 0;

Re: BUG #19438: segfault with temp_file_limit inside cursor

От
Tom Lane
Дата:
I wrote:
> Somehow, we are not crashing on a
> double free with the new memory chunk header infrastructure.

In fact, we are not.  AllocSetFree does not change the "hdrmask" field
of a freed chunk.  So if we try to free it again, we end up right back
at AllocSetFree, and the outcome is there's no detected problem but
the corresponding freelist is now corrupt because the chunk got linked
into it twice.  In this example that doesn't cause any visible
misbehavior, because we'll free the holdStore's context before doing
very much more with it (and AllocSetCheck won't notice this type of
corruption).  Other cases could lead to very hard-to-diagnose problems
that manifest somewhere far removed from the actual bug.

In MEMORY_CONTEXT_CHECKING builds, we can cheaply detect double frees
by using the existing behavior that requested_size is set to
InvalidAllocSize during AllocSetFree.  Another plausible idea is to
change a freed chunk's MemoryContextMethodID to something invalid,
which'd permit detection of double frees even in
non-MEMORY_CONTEXT_CHECKING builds.

I made draft patches showing how to do it both ways.  (Both patches
pass check-world and are able to detect the bug in v17.)  The
methodid-change way seems like the better alternative to me,
but it is more invasive and does add a cycle or two when freeing or
reusing a chunk.

The other mcxt modules need to be looked at too, but I thought
I'd try to get agreement on the solution approach before going
further.

            regards, tom lane

diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index 161c2e2d3df..ebf237bf575 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -1175,6 +1175,10 @@ AllocSetFree(void *pointer)
         link = GetFreeListLink(chunk);
 
 #ifdef MEMORY_CONTEXT_CHECKING
+        /* Test for previously-freed chunk */
+        if (unlikely(chunk->requested_size == InvalidAllocSize))
+            elog(WARNING, "detected double pfree in %s %p",
+                 set->header.name, chunk);
         /* Test for someone scribbling on unused space in chunk */
         if (chunk->requested_size < GetChunkSizeFromFreeListIdx(fidx))
             if (!sentinel_ok(pointer, chunk->requested_size))
diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index 161c2e2d3df..82ce25133ff 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -765,7 +765,7 @@ AllocSetAllocLarge(MemoryContext context, Size size, int flags)

     chunk = (MemoryChunk *) (((char *) block) + ALLOC_BLOCKHDRSZ);

-    /* mark the MemoryChunk as externally managed */
+    /* mark the MemoryChunk as externally managed and valid */
     MemoryChunkSetHdrMaskExternal(chunk, MCTX_ASET_ID);

 #ifdef MEMORY_CONTEXT_CHECKING
@@ -826,7 +826,7 @@ AllocSetAllocChunkFromBlock(MemoryContext context, AllocBlock block,
     block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ);
     Assert(block->freeptr <= block->endptr);

-    /* store the free list index in the value field */
+    /* store the free list index in the value field, and mark chunk valid */
     MemoryChunkSetHdrMask(chunk, block, fidx, MCTX_ASET_ID);

 #ifdef MEMORY_CONTEXT_CHECKING
@@ -910,8 +910,8 @@ AllocSetAllocFromNewBlock(MemoryContext context, Size size, int flags,
         block->freeptr += (availchunk + ALLOC_CHUNKHDRSZ);
         availspace -= (availchunk + ALLOC_CHUNKHDRSZ);

-        /* store the freelist index in the value field */
-        MemoryChunkSetHdrMask(chunk, block, a_fidx, MCTX_ASET_ID);
+        /* store the freelist index in the value field, but mark chunk free */
+        MemoryChunkSetHdrMask(chunk, block, a_fidx, MCTX_UNUSED_CHUNK_ID);
 #ifdef MEMORY_CONTEXT_CHECKING
         chunk->requested_size = InvalidAllocSize;    /* mark it free */
 #endif
@@ -1058,6 +1058,9 @@ AllocSetAlloc(MemoryContext context, Size size, int flags)
         set->freelist[fidx] = link->next;
         VALGRIND_MAKE_MEM_NOACCESS(link, sizeof(AllocFreeListLink));

+        /* mark chunk valid */
+        MemoryChunkSetMethodID(chunk, MCTX_ASET_ID);
+
 #ifdef MEMORY_CONTEXT_CHECKING
         chunk->requested_size = size;
         /* set mark to catch clobber of "unused" space */
@@ -1185,6 +1188,10 @@ AllocSetFree(void *pointer)
 #ifdef CLOBBER_FREED_MEMORY
         wipe_mem(pointer, GetChunkSizeFromFreeListIdx(fidx));
 #endif
+
+        /* mark chunk free */
+        MemoryChunkSetMethodID(chunk, MCTX_UNUSED_CHUNK_ID);
+
         /* push this chunk onto the top of the free list */
         VALGRIND_MAKE_MEM_DEFINED(link, sizeof(AllocFreeListLink));
         link->next = set->freelist[fidx];
diff --git a/src/include/utils/memutils_internal.h b/src/include/utils/memutils_internal.h
index 475e91b336b..9fa877332ee 100644
--- a/src/include/utils/memutils_internal.h
+++ b/src/include/utils/memutils_internal.h
@@ -138,6 +138,9 @@ typedef enum MemoryContextMethodID
     MCTX_15_RESERVED_WIPEDMEM_ID    /* 1111 occurs in wipe_mem'd memory */
 } MemoryContextMethodID;

+/* This MemoryContextMethodID is recommended to put in pfree'd chunks */
+#define MCTX_UNUSED_CHUNK_ID    MCTX_0_RESERVED_UNUSEDMEM_ID
+
 /*
  * The number of bits that 8-byte memory chunk headers can use to encode the
  * MemoryContextMethodID.
diff --git a/src/include/utils/memutils_memorychunk.h b/src/include/utils/memutils_memorychunk.h
index bda9912182d..b876940d5e0 100644
--- a/src/include/utils/memutils_memorychunk.h
+++ b/src/include/utils/memutils_memorychunk.h
@@ -58,6 +58,9 @@
  * MemoryChunkSetHdrMaskExternal:
  *        Used to set up an externally managed MemoryChunk.
  *
+ * MemoryChunkSetMethodID:
+ *        Used to change a MemoryChunk's MethodID while preserving other fields.
+ *
  * MemoryChunkIsExternal:
  *        Determine if the given MemoryChunk is externally managed, i.e.
  *        MemoryChunkSetHdrMaskExternal() was called on the chunk.
@@ -196,6 +199,21 @@ MemoryChunkSetHdrMaskExternal(MemoryChunk *chunk,
         methodid;
 }

+/*
+ * MemoryChunkSetMethodID:
+ *        Set the methodid without changing any other chunk header fields.
+ *        This is typically used while marking a chunk as free or not-free.
+ */
+static inline void
+MemoryChunkSetMethodID(MemoryChunk *chunk,
+                       MemoryContextMethodID methodid)
+{
+    Assert((int) methodid <= MEMORY_CONTEXT_METHODID_MASK);
+
+    chunk->hdrmask = (chunk->hdrmask & ~MEMORY_CONTEXT_METHODID_MASK) |
+        methodid;
+}
+
 /*
  * MemoryChunkIsExternal
  *        Return true if 'chunk' is marked as external.

Re: BUG #19438: segfault with temp_file_limit inside cursor

От
David Rowley
Дата:
On Sat, 28 Mar 2026 at 06:41, Tom Lane <tgl@sss.pgh.pa.us> wrote:
> In MEMORY_CONTEXT_CHECKING builds, we can cheaply detect double frees
> by using the existing behavior that requested_size is set to
> InvalidAllocSize during AllocSetFree.  Another plausible idea is to
> change a freed chunk's MemoryContextMethodID to something invalid,
> which'd permit detection of double frees even in
> non-MEMORY_CONTEXT_CHECKING builds.
>
> I made draft patches showing how to do it both ways.  (Both patches
> pass check-world and are able to detect the bug in v17.)  The
> methodid-change way seems like the better alternative to me,
> but it is more invasive and does add a cycle or two when freeing or
> reusing a chunk.

I do think it's quite nice that we can detect the double free in
production builds by switching the MemoryContextMethodID to an unused
one. However, I did spend quite a bit of time making all that code as
fast as possible. For example, storing the freelist index in the chunk
header rather than the size, just to save the (pretty cheap)
AllocSetFreeIndex() call during pfree to get the freelist index from
the chunk size. That sort of thing was done because I could measure a
speedup from doing it.

For the switching MemoryContextMethodID patch, I applied the memory
context benchmarking patch I used when writing that code to test out
the overhead in a tight palloc/pfree loop (attached). I can see an
overhead of a little over 6.5%.

select run,pg_allocate_memory_test(8,512,1024::bigint*1024*1024,'aset')
as seconds from generate_Series(1,3) run;

master
 run | seconds
-----+----------
   1 | 0.823345
   2 | 0.834834
   3 | 0.835506

patched
 run | seconds
-----+----------
   1 | 0.887794
   2 | 0.884866
   3 |  0.88592

I would rather see us using the requested_size method in
MEMORY_CONTEXT_CHECKING enabled builds.

Thanks for working on the patches.

David

Вложения

Re: BUG #19438: segfault with temp_file_limit inside cursor

От
Tom Lane
Дата:
David Rowley <dgrowleyml@gmail.com> writes:
> For the switching MemoryContextMethodID patch, I applied the memory
> context benchmarking patch I used when writing that code to test out
> the overhead in a tight palloc/pfree loop (attached). I can see an
> overhead of a little over 6.5%.

Hm.  I got an overhead of about 2% on an Apple M4, which might be
argued to be acceptable, but 12% on an aging x86_64 platform.
Realistically, given that we failed to notice this omission at
all for more than three years, it's hard to argue that testing
for it in non-debug builds is worth any overhead.

Here's a fleshed-out version of the requested_size method.
I noted that AllocSetRealloc needs a defense too, and then
extended the patch to generation.c and slab.c.  bump.c
doesn't have an issue, and I don't think alignedalloc.c
needs its own defense either: it can rely on the underlying
context type.

            regards, tom lane

diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index 161c2e2d3df..bea9e47ee4f 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -1175,6 +1175,10 @@ AllocSetFree(void *pointer)
         link = GetFreeListLink(chunk);

 #ifdef MEMORY_CONTEXT_CHECKING
+        /* Test for previously-freed chunk */
+        if (unlikely(chunk->requested_size == InvalidAllocSize))
+            elog(WARNING, "detected double pfree in %s %p",
+                 set->header.name, chunk);
         /* Test for someone scribbling on unused space in chunk */
         if (chunk->requested_size < GetChunkSizeFromFreeListIdx(fidx))
             if (!sentinel_ok(pointer, chunk->requested_size))
@@ -1373,6 +1377,10 @@ AllocSetRealloc(void *pointer, Size size, int flags)
     oldchksize = GetChunkSizeFromFreeListIdx(fidx);

 #ifdef MEMORY_CONTEXT_CHECKING
+    /* Test for previously-freed chunk */
+    if (unlikely(chunk->requested_size == InvalidAllocSize))
+        elog(WARNING, "detected realloc of freed chunk in %s %p",
+             set->header.name, chunk);
     /* Test for someone scribbling on unused space in chunk */
     if (chunk->requested_size < oldchksize)
         if (!sentinel_ok(pointer, chunk->requested_size))
diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c
index 9077ed299b4..fe9d087c85e 100644
--- a/src/backend/utils/mmgr/generation.c
+++ b/src/backend/utils/mmgr/generation.c
@@ -762,6 +762,10 @@ GenerationFree(void *pointer)
     }

 #ifdef MEMORY_CONTEXT_CHECKING
+    /* Test for previously-freed chunk */
+    if (unlikely(chunk->requested_size == InvalidAllocSize))
+        elog(WARNING, "detected double pfree in %s %p",
+             ((MemoryContext) block->context)->name, chunk);
     /* Test for someone scribbling on unused space in chunk */
     Assert(chunk->requested_size < chunksize);
     if (!sentinel_ok(pointer, chunk->requested_size))
@@ -867,6 +871,10 @@ GenerationRealloc(void *pointer, Size size, int flags)
     set = block->context;

 #ifdef MEMORY_CONTEXT_CHECKING
+    /* Test for previously-freed chunk */
+    if (unlikely(chunk->requested_size == InvalidAllocSize))
+        elog(WARNING, "detected realloc of freed chunk in %s %p",
+             ((MemoryContext) set)->name, chunk);
     /* Test for someone scribbling on unused space in chunk */
     Assert(chunk->requested_size < oldsize);
     if (!sentinel_ok(pointer, chunk->requested_size))
diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c
index bd00bab18fe..0d1e6285c29 100644
--- a/src/backend/utils/mmgr/slab.c
+++ b/src/backend/utils/mmgr/slab.c
@@ -539,6 +539,7 @@ SlabAllocSetupNewChunk(MemoryContext context, SlabBlock *block,
     MemoryChunkSetHdrMask(chunk, block, MAXALIGN(slab->chunkSize), MCTX_SLAB_ID);

 #ifdef MEMORY_CONTEXT_CHECKING
+    chunk->requested_size = size;
     /* slab mark to catch clobber of "unused" space */
     Assert(slab->chunkSize < (slab->fullChunkSize - Slab_CHUNKHDRSZ));
     set_sentinel(MemoryChunkGetPointer(chunk), size);
@@ -748,11 +749,17 @@ SlabFree(void *pointer)
     slab = block->slab;

 #ifdef MEMORY_CONTEXT_CHECKING
+    /* Test for previously-freed chunk */
+    if (unlikely(chunk->requested_size == InvalidAllocSize))
+        elog(WARNING, "detected double pfree in %s %p",
+             slab->header.name, chunk);
     /* Test for someone scribbling on unused space in chunk */
     Assert(slab->chunkSize < (slab->fullChunkSize - Slab_CHUNKHDRSZ));
     if (!sentinel_ok(pointer, slab->chunkSize))
         elog(WARNING, "detected write past chunk end in %s %p",
              slab->header.name, chunk);
+    /* Reset requested_size to InvalidAllocSize in free chunks */
+    chunk->requested_size = InvalidAllocSize;
 #endif

     /* push this chunk onto the head of the block's free list */

Re: BUG #19438: segfault with temp_file_limit inside cursor

От
Tom Lane
Дата:
I wrote:
> ... I don't think alignedalloc.c
> needs its own defense either: it can rely on the underlying
> context type.

I started to wonder if an explicit test in AlignedAllocFree
could be useful anyway to make such problems a bit less obscure.
However, when I tried

    p = palloc_aligned(...);
    pfree(p);
    pfree(p);

I got

ERROR:  pfree called with invalid pointer 0x1f286b0 (header 0x7f7f7f7f7f7f7f7f)

That is, we'll never get to AlignedAllocFree because the underlying
context would have wipe_mem'd the aligned chunk's header during the
first pfree.  The only case in which such a test could be helpful is
in a build with MEMORY_CONTEXT_CHECKING but not CLOBBER_FREED_MEMORY.
While I suppose some people might build that way, it's got to be such
a tiny minority as to not be worth worrying about.

            regards, tom lane



Re: BUG #19438: segfault with temp_file_limit inside cursor

От
David Rowley
Дата:
On Mon, 30 Mar 2026 at 05:25, Tom Lane <tgl@sss.pgh.pa.us> wrote:
> I started to wonder if an explicit test in AlignedAllocFree
> could be useful anyway to make such problems a bit less obscure.
> However, when I tried
>
>         p = palloc_aligned(...);
>         pfree(p);
>         pfree(p);
>
> I got
>
> ERROR:  pfree called with invalid pointer 0x1f286b0 (header 0x7f7f7f7f7f7f7f7f)
>
> That is, we'll never get to AlignedAllocFree because the underlying
> context would have wipe_mem'd the aligned chunk's header during the
> first pfree.  The only case in which such a test could be helpful is
> in a build with MEMORY_CONTEXT_CHECKING but not CLOBBER_FREED_MEMORY.
> While I suppose some people might build that way, it's got to be such
> a tiny minority as to not be worth worrying about.

I think you might have trouble trying to get the MemoryContext.name
for the elog warning anyway. That's only accessible from the unaligned
allocation and whatever method that context type uses to backlink the
owning context from the chunk pointer. Given that, it very much seems
not worthwhile as I imagine that means adding some callback function
to MemoryContextMethods!

David



Re: BUG #19438: segfault with temp_file_limit inside cursor

От
David Rowley
Дата:
On Mon, 30 Mar 2026 at 04:32, Tom Lane <tgl@sss.pgh.pa.us> wrote:
> Here's a fleshed-out version of the requested_size method.
> I noted that AllocSetRealloc needs a defense too, and then
> extended the patch to generation.c and slab.c.  bump.c
> doesn't have an issue, and I don't think alignedalloc.c
> needs its own defense either: it can rely on the underlying
> context type.

Thanks for writing that up.

I looked at the code and tested.  The only thing that I noted was
GenerationFree(), where we do:

/* Test for previously-freed chunk */
if (unlikely(chunk->requested_size == InvalidAllocSize))
    elog(WARNING, "detected double pfree in %s %p",
         ((MemoryContext) block->context)->name, chunk);
/* Test for someone scribbling on unused space in chunk */
Assert(chunk->requested_size < chunksize);

I expect you've likely thought of this, but if we do spit out the
warning there, then the Assert is definitely going to fail, as
InvalidAllocSize is defined as SIZE_MAX. I don't know if that means
it's worth deviating from the similar WARNINGs you've added and making
that one an ERROR. There's certainly no guarantee with the other
context that we'll not crash sometime very soon after issuing the
warning anyway, so maybe it's fine.

David



Re: BUG #19438: segfault with temp_file_limit inside cursor

От
Tom Lane
Дата:
David Rowley <dgrowleyml@gmail.com> writes:
> I looked at the code and tested.  The only thing that I noted was
> GenerationFree(), where we do:

> /* Test for previously-freed chunk */
> if (unlikely(chunk->requested_size == InvalidAllocSize))
>     elog(WARNING, "detected double pfree in %s %p",
>          ((MemoryContext) block->context)->name, chunk);
> /* Test for someone scribbling on unused space in chunk */
> Assert(chunk->requested_size < chunksize);

> I expect you've likely thought of this, but if we do spit out the
> warning there, then the Assert is definitely going to fail, as
> InvalidAllocSize is defined as SIZE_MAX.

Yeah, I saw that after sending the patch.  Not only would that
Assert fail, but without it, code below would go nuts too.

> I don't know if that means
> it's worth deviating from the similar WARNINGs you've added and making
> that one an ERROR. There's certainly no guarantee with the other
> context that we'll not crash sometime very soon after issuing the
> warning anyway, so maybe it's fine.

Seems like a reasonable answer.  What do you think of making the
double-free cases ERRORs across the board?  If we don't error out,
there will likely be cascading problems in all the mcxt types not
just this one.

            regards, tom lane



Re: BUG #19438: segfault with temp_file_limit inside cursor

От
David Rowley
Дата:
On Mon, 30 Mar 2026 at 12:51, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>
> David Rowley <dgrowleyml@gmail.com> writes:
> > I don't know if that means
> > it's worth deviating from the similar WARNINGs you've added and making
> > that one an ERROR. There's certainly no guarantee with the other
> > context that we'll not crash sometime very soon after issuing the
> > warning anyway, so maybe it's fine.
>
> Seems like a reasonable answer.  What do you think of making the
> double-free cases ERRORs across the board?  If we don't error out,
> there will likely be cascading problems in all the mcxt types not
> just this one.

I think it's a good idea. It might slightly increase the chances that
we get a report about an issue. I suppose the logic in deciding which
elevel to make it could be applied about equally to the sentinel byte
check as well. Maybe that should also be an error for the same reason.

David



Re: BUG #19438: segfault with temp_file_limit inside cursor

От
Tom Lane
Дата:
David Rowley <dgrowleyml@gmail.com> writes:
> On Mon, 30 Mar 2026 at 12:51, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> Seems like a reasonable answer.  What do you think of making the
>> double-free cases ERRORs across the board?  If we don't error out,
>> there will likely be cascading problems in all the mcxt types not
>> just this one.

> I think it's a good idea. It might slightly increase the chances that
> we get a report about an issue. I suppose the logic in deciding which
> elevel to make it could be applied about equally to the sentinel byte
> check as well. Maybe that should also be an error for the same reason.

I thought about that, but it's been a WARNING for a long time and I'm
hesitant to change that.  We've seen many cases where scribbling one
or two bytes past the end of the requested size doesn't actually cause
fatal problems, because that was padding or unused space anyway.
Double frees are in a different category: if we let one happen,
it's pretty much guaranteed to cause hard-to-decipher problems down
the road.  (The fact that that didn't happen in the particular case
reported here doesn't mean it's usually okay.)

            regards, tom lane



Re: BUG #19438: segfault with temp_file_limit inside cursor

От
David Rowley
Дата:
On Mon, 30 Mar 2026 at 13:34, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>
> David Rowley <dgrowleyml@gmail.com> writes:
> > On Mon, 30 Mar 2026 at 12:51, Tom Lane <tgl@sss.pgh.pa.us> wrote:
> >> Seems like a reasonable answer.  What do you think of making the
> >> double-free cases ERRORs across the board?  If we don't error out,
> >> there will likely be cascading problems in all the mcxt types not
> >> just this one.
>
> > I think it's a good idea. It might slightly increase the chances that
> > we get a report about an issue. I suppose the logic in deciding which
> > elevel to make it could be applied about equally to the sentinel byte
> > check as well. Maybe that should also be an error for the same reason.
>
> I thought about that, but it's been a WARNING for a long time and I'm
> hesitant to change that.  We've seen many cases where scribbling one
> or two bytes past the end of the requested size doesn't actually cause
> fatal problems, because that was padding or unused space anyway.
> Double frees are in a different category: if we let one happen,
> it's pretty much guaranteed to cause hard-to-decipher problems down
> the road.  (The fact that that didn't happen in the particular case
> reported here doesn't mean it's usually okay.)

Fair. Maybe worth a short comment in the code to explain why we don't
use the same elevel then? Just considering someone stumbling upon the
variation in the future and reporting or asking why, and us having to
dig up the reason why in the archives to answer them.

Maybe something like this?

/*
* Test for someone scribbling on unused space in chunk.  Small
* overwrites are less likely to cause issues than a double-free, so
* warn for this instead of erroring.
*/

David



Re: BUG #19438: segfault with temp_file_limit inside cursor

От
Tom Lane
Дата:
David Rowley <dgrowleyml@gmail.com> writes:
> Fair. Maybe worth a short comment in the code to explain why we don't
> use the same elevel then? Just considering someone stumbling upon the
> variation in the future and reporting or asking why, and us having to
> dig up the reason why in the archives to answer them.
> Maybe something like this?

Works for me.  I'm done for the day but will make it so tomorrow.

            regards, tom lane



Re: BUG #19438: segfault with temp_file_limit inside cursor

От
Tom Lane
Дата:
I wrote:
> Works for me.  I'm done for the day but will make it so tomorrow.

Pushed that, though in the event I wrote a rather longer apologia
for why we use ERROR in one complaint and WARNING in the other.

Getting back to the original bug ... I went through tuplestore.c
and found two other places where foreseeable failures in subroutines
could leave the tuplestore in an inconsistent state.  I think we
need to fix them all, per the attached draft against HEAD.  The
back branches might have more/different bugs, didn't look yet.

            regards, tom lane

diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index caad7cad0b4..f9e2d95186a 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -708,10 +708,10 @@ grow_memtuples(Tuplestorestate *state)

     /* OK, do it */
     FREEMEM(state, GetMemoryChunkSpace(state->memtuples));
-    state->memtupsize = newmemtupsize;
     state->memtuples = (void **)
         repalloc_huge(state->memtuples,
-                      state->memtupsize * sizeof(void *));
+                      newmemtupsize * sizeof(void *));
+    state->memtupsize = newmemtupsize;
     USEMEM(state, GetMemoryChunkSpace(state->memtuples));
     if (LACKMEM(state))
         elog(ERROR, "unexpected out-of-memory situation in tuplestore");
@@ -1306,7 +1306,19 @@ dumptuples(Tuplestorestate *state)
         if (i >= state->memtupcount)
             break;
         WRITETUP(state, state->memtuples[i]);
+
+        /*
+         * Increase memtupdeleted to track the fact that we just deleted that
+         * tuple.  Think not to remove this on the grounds that we'll reset
+         * memtupdeleted to zero below.  We might not reach that if some later
+         * WRITETUP fails (e.g. due to overrunning temp_file_limit).  If so,
+         * we'd error out leaving an effectively-corrupt tuplestore, which
+         * would be quite bad if it's a persistent data structure such as a
+         * Portal's holdStore.
+         */
+        state->memtupdeleted++;
     }
+    /* Now we can reset memtupdeleted along with memtupcount */
     state->memtupdeleted = 0;
     state->memtupcount = 0;
 }
@@ -1496,8 +1508,10 @@ tuplestore_trim(Tuplestorestate *state)
         FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
         pfree(state->memtuples[i]);
         state->memtuples[i] = NULL;
+        /* As in dumptuples(), increment memtupdeleted synchronously */
+        state->memtupdeleted++;
     }
-    state->memtupdeleted = nremove;
+    Assert(state->memtupdeleted == nremove);

     /* mark tuplestore as truncated (used for Assert crosschecks only) */
     state->truncated = true;