Обсуждение: bug: ANALYZE progress report with inheritance tables

Поиск
Список
Период
Сортировка

bug: ANALYZE progress report with inheritance tables

От
Justin Pryzby
Дата:
pg_stat_progress_analyze was added in v13 (a166d408e).

For tables with inheritance children, do_analyze_rel() and
acquire_sample_rows() are called twice.  The first time through,
pgstat_progress_start_command() has memset() the progress array to zero.

But the 2nd time, ANALYZE_BLOCKS_DONE is already set from the previous
call, and BLOCKS_TOTAL can be set to some lower value (and in any case a
value unrelated to the pre-existing value of BLOCKS_DONE).  So the
progress report briefly shows a bogus combination of values and, with
these assertions, fails regression tests in master and v13, unless
BLOCKS_DONE is first zeroed.

| Core was generated by `postgres: pryzbyj regression [local] VACUUM                                   '.
| ...
| #5  0x0000559a1c9fbbcc in ExceptionalCondition (conditionName=conditionName@entry=0x559a1cb68068
"a[PROGRESS_ANALYZE_BLOCKS_DONE]<= a[PROGRESS_ANALYZE_BLOCKS_TOTAL]", 
 
| ...
| #16 0x0000563165cc7cfe in exec_simple_query (query_string=query_string@entry=0x563167cad0c8 "VACUUM ANALYZE stxdinh,
stxdinh1,stxdinh2;") at ../src/backend/tcop/postgres.c:1237
 
| ...
| (gdb) p MyBEEntry->st_progress_param[1]
| $1 = 5
| (gdb) p MyBEEntry->st_progress_param[2]
| $2 = 9

BTW, I found this bug as well as the COPY progress bug I reported [0]
while testing the CREATE INDEX progress bug reported by Ilya.  It seems
like the progress infrastructure should have some checks added.

[0] https://www.postgresql.org/message-id/flat/20230119054703.GB13860@telsasoft.com

diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index c86e690980e..96710b84558 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1145,6 +1145,12 @@ acquire_sample_rows(Relation onerel, int elevel,
     TableScanDesc scan;
     BlockNumber nblocks;
     BlockNumber blksdone = 0;
+    int64        progress_vals[2] = {0};
+    int const    progress_inds[2] = {
+        PROGRESS_ANALYZE_BLOCKS_DONE,
+        PROGRESS_ANALYZE_BLOCKS_TOTAL
+    };
+
 #ifdef USE_PREFETCH
     int            prefetch_maximum = 0;    /* blocks to prefetch if enabled */
     BlockSamplerData prefetch_bs;
@@ -1169,8 +1175,8 @@ acquire_sample_rows(Relation onerel, int elevel,
 #endif
 
     /* Report sampling block numbers */
-    pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL,
-                                 nblocks);
+    progress_vals[1] = nblocks;
+    pgstat_progress_update_multi_param(2, progress_inds, progress_vals);
 
     /* Prepare for sampling rows */
     reservoir_init_selection_state(&rstate, targrows);
diff --git a/src/backend/utils/activity/backend_progress.c b/src/backend/utils/activity/backend_progress.c
index d96af812b19..05593fb13cb 100644
--- a/src/backend/utils/activity/backend_progress.c
+++ b/src/backend/utils/activity/backend_progress.c
@@ -10,6 +10,7 @@
  */
 #include "postgres.h"
 
+#include "commands/progress.h"
 #include "port/atomics.h"        /* for memory barriers */
 #include "utils/backend_progress.h"
 #include "utils/backend_status.h"
@@ -37,6 +38,83 @@ pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
     PGSTAT_END_WRITE_ACTIVITY(beentry);
 }
 
+/*
+ * Check for consistency of progress data (current < total).
+ *
+ * Check during pgstat_progress_updates_*() rather than only from
+ * pgstat_progress_end_command() to catch issues with uninitialized/stale data
+ * from previous progress commands.
+ *
+ * If a command fails due to interrupt or error, the values may be less than
+ * the expected final value.
+ */
+static void
+pgstat_progress_asserts(void)
+{
+    volatile PgBackendStatus *beentry = MyBEEntry;
+    volatile int64             *a = beentry->st_progress_param;
+
+    switch (beentry->st_progress_command)
+    {
+    case PROGRESS_COMMAND_VACUUM:
+        Assert(a[PROGRESS_VACUUM_HEAP_BLKS_SCANNED] <=
+                a[PROGRESS_VACUUM_TOTAL_HEAP_BLKS]);
+        Assert(a[PROGRESS_VACUUM_HEAP_BLKS_VACUUMED] <=
+                a[PROGRESS_VACUUM_TOTAL_HEAP_BLKS]);
+        Assert(a[PROGRESS_VACUUM_NUM_DEAD_TUPLES] <=
+                a[PROGRESS_VACUUM_MAX_DEAD_TUPLES]);
+        break;
+
+    case PROGRESS_COMMAND_ANALYZE:
+        Assert(a[PROGRESS_ANALYZE_BLOCKS_DONE] <=
+                a[PROGRESS_ANALYZE_BLOCKS_TOTAL]);
+        Assert(a[PROGRESS_ANALYZE_EXT_STATS_COMPUTED] <=
+                a[PROGRESS_ANALYZE_EXT_STATS_TOTAL]);
+        Assert(a[PROGRESS_ANALYZE_CHILD_TABLES_DONE] <=
+                a[PROGRESS_ANALYZE_CHILD_TABLES_TOTAL]);
+        break;
+
+    case PROGRESS_COMMAND_CLUSTER:
+        Assert(a[PROGRESS_CLUSTER_HEAP_BLKS_SCANNED] <=
+                a[PROGRESS_CLUSTER_TOTAL_HEAP_BLKS]);
+        /* fall through because CLUSTER rebuilds indexes */
+    case PROGRESS_COMMAND_CREATE_INDEX:
+        Assert(a[PROGRESS_CREATEIDX_TUPLES_DONE] <=
+                a[PROGRESS_CREATEIDX_TUPLES_TOTAL]);
+        Assert(a[PROGRESS_CREATEIDX_PARTITIONS_DONE] <=
+                a[PROGRESS_CREATEIDX_PARTITIONS_TOTAL]);
+        break;
+
+    case PROGRESS_COMMAND_BASEBACKUP:
+        /* progress reporting is optional for these */
+        if (a[PROGRESS_BASEBACKUP_BACKUP_TOTAL] >= 0)
+        {
+            Assert(a[PROGRESS_BASEBACKUP_BACKUP_STREAMED] <=
+                    a[PROGRESS_BASEBACKUP_BACKUP_TOTAL]);
+            Assert(a[PROGRESS_BASEBACKUP_TBLSPC_STREAMED] <=
+                    a[PROGRESS_BASEBACKUP_TBLSPC_TOTAL]);
+        }
+        break;
+
+#if 0
+    case PROGRESS_COMMAND_COPY:
+// This currently fails file_fdw tests, since pgstat_prorgress evidently fails
+// to support simultaneous copy commands, as happens during JOIN.
+        /* bytes progress is not available in all cases */
+        if (a[PROGRESS_COPY_BYTES_TOTAL] > 0)
+            // Assert(a[PROGRESS_COPY_BYTES_PROCESSED] <= a[PROGRESS_COPY_BYTES_TOTAL]);
+            if (a[PROGRESS_COPY_BYTES_PROCESSED] > a[PROGRESS_COPY_BYTES_TOTAL])
+                elog(WARNING, "PROGRESS_COPY_BYTES_PROCESSED %ld %ld",
+                            a[PROGRESS_COPY_BYTES_PROCESSED],
+                            a[PROGRESS_COPY_BYTES_TOTAL]);
+#endif
+        break;
+
+    case PROGRESS_COMMAND_INVALID:
+        break; /* Do nothing */
+    }
+}
+
 /*-----------
  * pgstat_progress_update_param() -
  *
@@ -56,6 +134,8 @@ pgstat_progress_update_param(int index, int64 val)
     PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
     beentry->st_progress_param[index] = val;
     PGSTAT_END_WRITE_ACTIVITY(beentry);
+
+    pgstat_progress_asserts();
 }
 
 /*-----------
@@ -85,6 +165,8 @@ pgstat_progress_update_multi_param(int nparam, const int *index,
     }
 
     PGSTAT_END_WRITE_ACTIVITY(beentry);
+
+    pgstat_progress_asserts();
 }
 
 /*-----------



Re: bug: ANALYZE progress report with inheritance tables

От
Daniel Gustafsson
Дата:
> On 22 Jan 2023, at 17:23, Justin Pryzby <pryzby@telsasoft.com> wrote:

> diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
> index c86e690980e..96710b84558 100644
> ...

This CF entry fails to build in the CFBot since the patch isn't attached to the
email, and the CFBot can't extract inline patches from the mail body.  Can you
please submit this as an attached (and rebased) patch to make sure we get
automated testing on it?

--
Daniel Gustafsson




Re: bug: ANALYZE progress report with inheritance tables

От
Heikki Linnakangas
Дата:
On 22/01/2023 18:23, Justin Pryzby wrote:
> pg_stat_progress_analyze was added in v13 (a166d408e).
> 
> For tables with inheritance children, do_analyze_rel() and
> acquire_sample_rows() are called twice.  The first time through,
> pgstat_progress_start_command() has memset() the progress array to zero.
> 
> But the 2nd time, ANALYZE_BLOCKS_DONE is already set from the previous
> call, and BLOCKS_TOTAL can be set to some lower value (and in any case a
> value unrelated to the pre-existing value of BLOCKS_DONE).  So the
> progress report briefly shows a bogus combination of values and, with
> these assertions, fails regression tests in master and v13, unless
> BLOCKS_DONE is first zeroed.

Good catch!

I think the counts need do be reset even earlier, in 
acquire_inherited_sample_rows(), at the same time that we update 
PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID. See attached patch. 
Otherwise, there's a brief moment where we have already updated the 
child table ID, but the PROGRESS_ANALYZE_BLOCKS_TOTAL 
PROGRESS_ANALYZE_BLOCKS_DONE still show the counts from the previous 
child table. And if it's a foreign table, the FDW's sampling function 
might not update the progress report at all, in which case the old 
values will be displayed until the table is fully processed.

I appreciate the assertions you added, that made it easy to reproduce 
the problem. I'm inclined to not commit that though. It seems like a 
modularity violation for the code in backend_progress.c to have such 
intimate knowledge of what the different counters mean.

-- 
Heikki Linnakangas
Neon (https://neon.tech)

Вложения

Re: bug: ANALYZE progress report with inheritance tables

От
Heikki Linnakangas
Дата:
On 28/09/2023 19:06, Heikki Linnakangas wrote:
> On 22/01/2023 18:23, Justin Pryzby wrote:
>> pg_stat_progress_analyze was added in v13 (a166d408e).
>>
>> For tables with inheritance children, do_analyze_rel() and
>> acquire_sample_rows() are called twice.  The first time through,
>> pgstat_progress_start_command() has memset() the progress array to zero.
>>
>> But the 2nd time, ANALYZE_BLOCKS_DONE is already set from the previous
>> call, and BLOCKS_TOTAL can be set to some lower value (and in any case a
>> value unrelated to the pre-existing value of BLOCKS_DONE).  So the
>> progress report briefly shows a bogus combination of values and, with
>> these assertions, fails regression tests in master and v13, unless
>> BLOCKS_DONE is first zeroed.
> 
> Good catch!
> 
> I think the counts need do be reset even earlier, in
> acquire_inherited_sample_rows(), at the same time that we update
> PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID. See attached patch.
> Otherwise, there's a brief moment where we have already updated the
> child table ID, but the PROGRESS_ANALYZE_BLOCKS_TOTAL
> PROGRESS_ANALYZE_BLOCKS_DONE still show the counts from the previous
> child table. And if it's a foreign table, the FDW's sampling function
> might not update the progress report at all, in which case the old
> values will be displayed until the table is fully processed.

Committed and backported. Thank you!

-- 
Heikki Linnakangas
Neon (https://neon.tech)