More aggressive vacuuming of temporary tables

Поиск
Список
Период
Сортировка
От Tom Lane
Тема More aggressive vacuuming of temporary tables
Дата
Msg-id 3490536.1598629609@sss.pgh.pa.us
обсуждение исходный текст
Ответы Re: More aggressive vacuuming of temporary tables  (Michael Paquier <michael@paquier.xyz>)
Re: More aggressive vacuuming of temporary tables  (Andres Freund <andres@anarazel.de>)
Список pgsql-hackers
It strikes me that when we are vacuuming a temporary table (which
necessarily will be one of our own session), we don't really need
to care what the global xmin horizon is.  If we're not inside a
user transaction block, then there are no tuples in the table that
could be in-doubt anymore.  Neither are there any snapshots in our
session that could see any dead tuples.  Nor do we give a fig what
other sessions might think of those tuples.  So we could just set
the xmin cutoff as aggressively as possible, which is to say
equal to the nextXid counter.  While vacuuming a temp table is
perhaps not something people do very often, I think when they do
do it they would like us to clean out all the dead tuples not just
some.

Hence I propose 0001 attached.  80% of it is just API additions to allow
passing down the isTopLevel flag so that we can do the right thing in
the CLUSTER case; the important change is in vacuum_set_xid_limits.
(For ease of review, I didn't reindent the existing code in
vacuum_set_xid_limits, but that would be something to do before commit.)

The reason I got interested in this is that yesterday while fooling
with bug #16595, I was annoyed about our poor code test coverage in
access/gin/.  The 0002 patch attached brings coverage for ginvacuum.c
up from 59% to 93%; but as things stand, a long delay has to be inserted
between the DELETE and VACUUM steps, else the VACUUM won't remove the
dead tuples because of concurrent transactions, and we get no coverage
improvement.  Since the table in question is a temp table, that seems
pretty silly.

Thoughts?  Am I missing something important here?

            regards, tom lane

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index a0da444af0..beafb60ac2 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -470,6 +470,7 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
                           params->freeze_table_age,
                           params->multixact_freeze_min_age,
                           params->multixact_freeze_table_age,
+                          true, /* we must be a top-level command */
                           &OldestXmin, &FreezeLimit, &xidFullScanLimit,
                           &MultiXactCutoff, &mxactFullScanLimit);

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 04d12a7ece..0d647e912c 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -67,10 +67,13 @@ typedef struct
 } RelToCluster;


-static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
+static void rebuild_relation(Relation OldHeap, Oid indexOid,
+                             bool isTopLevel, bool verbose);
 static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
-                            bool verbose, bool *pSwapToastByContent,
-                            TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
+                            bool isTopLevel, bool verbose,
+                            bool *pSwapToastByContent,
+                            TransactionId *pFreezeXid,
+                            MultiXactId *pCutoffMulti);
 static List *get_tables_to_cluster(MemoryContext cluster_context);


@@ -170,7 +173,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
         table_close(rel, NoLock);

         /* Do the job. */
-        cluster_rel(tableOid, indexOid, stmt->options);
+        cluster_rel(tableOid, indexOid, stmt->options, isTopLevel);
     }
     else
     {
@@ -219,7 +222,8 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
             PushActiveSnapshot(GetTransactionSnapshot());
             /* Do the job. */
             cluster_rel(rvtc->tableOid, rvtc->indexOid,
-                        stmt->options | CLUOPT_RECHECK);
+                        stmt->options | CLUOPT_RECHECK,
+                        isTopLevel);
             PopActiveSnapshot();
             CommitTransactionCommand();
         }
@@ -250,7 +254,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
  * and error messages should refer to the operation as VACUUM not CLUSTER.
  */
 void
-cluster_rel(Oid tableOid, Oid indexOid, int options)
+cluster_rel(Oid tableOid, Oid indexOid, int options, bool isTopLevel)
 {
     Relation    OldHeap;
     bool        verbose = ((options & CLUOPT_VERBOSE) != 0);
@@ -400,7 +404,7 @@ cluster_rel(Oid tableOid, Oid indexOid, int options)
     TransferPredicateLocksToHeapRelation(OldHeap);

     /* rebuild_relation does all the dirty work */
-    rebuild_relation(OldHeap, indexOid, verbose);
+    rebuild_relation(OldHeap, indexOid, isTopLevel, verbose);

     /* NB: rebuild_relation does table_close() on OldHeap */

@@ -545,11 +549,12 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
  *
  * OldHeap: table to rebuild --- must be opened and exclusive-locked!
  * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
+ * isTopLevel: should be passed down from ProcessUtility.
  *
  * NB: this routine closes OldHeap at the right time; caller should not.
  */
 static void
-rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
+rebuild_relation(Relation OldHeap, Oid indexOid, bool isTopLevel, bool verbose)
 {
     Oid            tableOid = RelationGetRelid(OldHeap);
     Oid            tableSpace = OldHeap->rd_rel->reltablespace;
@@ -577,7 +582,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
                                AccessExclusiveLock);

     /* Copy the heap data into the new table in the desired order */
-    copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
+    copy_table_data(OIDNewHeap, tableOid, indexOid, isTopLevel, verbose,
                     &swap_toast_by_content, &frozenXid, &cutoffMulti);

     /*
@@ -728,7 +733,8 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
  */
 static void
-copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
+copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
+                bool isTopLevel, bool verbose,
                 bool *pSwapToastByContent, TransactionId *pFreezeXid,
                 MultiXactId *pCutoffMulti)
 {
@@ -826,7 +832,7 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
      * Since we're going to rewrite the whole table anyway, there's no reason
      * not to be aggressive about this.
      */
-    vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
+    vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, isTopLevel,
                           &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff,
                           NULL);

diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 23eb605d4c..62b51b078d 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -907,6 +907,9 @@ get_all_vacuum_rels(int options)
 /*
  * vacuum_set_xid_limits() -- compute oldestXmin and freeze cutoff points
  *
+ * Input parameters are the target relation, applicable freeze age settings,
+ * and isTopLevel which should be passed down from ProcessUtility.
+ *
  * The output parameters are:
  * - oldestXmin is the cutoff value used to distinguish whether tuples are
  *     DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
@@ -931,6 +934,7 @@ vacuum_set_xid_limits(Relation rel,
                       int freeze_table_age,
                       int multixact_freeze_min_age,
                       int multixact_freeze_table_age,
+                      bool isTopLevel,
                       TransactionId *oldestXmin,
                       TransactionId *freezeLimit,
                       TransactionId *xidFullScanLimit,
@@ -946,7 +950,24 @@ vacuum_set_xid_limits(Relation rel,
     MultiXactId mxactLimit;
     MultiXactId safeMxactLimit;

+    if (RELATION_IS_LOCAL(rel) && !IsInTransactionBlock(isTopLevel))
+    {
+        /*
+         * If we are processing a temp relation (which by prior checks must be
+         * one belonging to our session), and we are not inside any
+         * transaction block, then there can be no tuples in the rel that are
+         * either in-doubt or dead but possibly still interesting to some
+         * snapshot our session holds.  We don't care whether other sessions
+         * could still care about such tuples, either.  So we can aggressively
+         * set the cutoff xmin to be the nextXid.
+         */
+        *oldestXmin = XidFromFullTransactionId(ReadNextFullTransactionId());
+    }
+    else
+    {
     /*
+     * Otherwise, calculate the cutoff xmin normally.
+     *
      * We can always ignore processes running lazy vacuum.  This is because we
      * use these values only for deciding which tuples we must keep in the
      * tables.  Since lazy vacuum doesn't write its XID anywhere (usually no
@@ -974,6 +995,7 @@ vacuum_set_xid_limits(Relation rel,
             *oldestXmin = limit_xmin;
         }
     }
+    }

     Assert(TransactionIdIsNormal(*oldestXmin));

@@ -1907,7 +1929,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
             cluster_options |= CLUOPT_VERBOSE;

         /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
-        cluster_rel(relid, InvalidOid, cluster_options);
+        cluster_rel(relid, InvalidOid, cluster_options, true);
     }
     else
         table_relation_vacuum(onerel, params, vac_strategy);
diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h
index e05884781b..1eb144204b 100644
--- a/src/include/commands/cluster.h
+++ b/src/include/commands/cluster.h
@@ -19,7 +19,8 @@


 extern void cluster(ClusterStmt *stmt, bool isTopLevel);
-extern void cluster_rel(Oid tableOid, Oid indexOid, int options);
+extern void cluster_rel(Oid tableOid, Oid indexOid, int options,
+                        bool isTopLevel);
 extern void check_index_is_clusterable(Relation OldHeap, Oid indexOid,
                                        bool recheck, LOCKMODE lockmode);
 extern void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal);
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index a4cd721400..d9475c9989 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -267,6 +267,7 @@ extern void vacuum_set_xid_limits(Relation rel,
                                   int freeze_min_age, int freeze_table_age,
                                   int multixact_freeze_min_age,
                                   int multixact_freeze_table_age,
+                                  bool isTopLevel,
                                   TransactionId *oldestXmin,
                                   TransactionId *freezeLimit,
                                   TransactionId *xidFullScanLimit,
diff --git a/src/test/regress/expected/gin.out b/src/test/regress/expected/gin.out
index b335466fc4..6402e89c7f 100644
--- a/src/test/regress/expected/gin.out
+++ b/src/test/regress/expected/gin.out
@@ -264,6 +264,27 @@ select count(*) from t_gin_test_tbl where j @> '{}'::int[];
  20006
 (1 row)

+-- test vacuuming of posting trees
+delete from t_gin_test_tbl where j @> array[2];
+vacuum t_gin_test_tbl;
+select count(*) from t_gin_test_tbl where j @> array[50];
+ count
+-------
+     0
+(1 row)
+
+select count(*) from t_gin_test_tbl where j @> array[2];
+ count
+-------
+     0
+(1 row)
+
+select count(*) from t_gin_test_tbl where j @> '{}'::int[];
+ count
+-------
+     6
+(1 row)
+
 reset enable_seqscan;
 reset enable_bitmapscan;
 drop table t_gin_test_tbl;
diff --git a/src/test/regress/sql/gin.sql b/src/test/regress/sql/gin.sql
index efb8ef3e96..5194afcc1f 100644
--- a/src/test/regress/sql/gin.sql
+++ b/src/test/regress/sql/gin.sql
@@ -159,6 +159,14 @@ explain (costs off)
 select count(*) from t_gin_test_tbl where j @> '{}'::int[];
 select count(*) from t_gin_test_tbl where j @> '{}'::int[];

+-- test vacuuming of posting trees
+delete from t_gin_test_tbl where j @> array[2];
+vacuum t_gin_test_tbl;
+
+select count(*) from t_gin_test_tbl where j @> array[50];
+select count(*) from t_gin_test_tbl where j @> array[2];
+select count(*) from t_gin_test_tbl where j @> '{}'::int[];
+
 reset enable_seqscan;
 reset enable_bitmapscan;


В списке pgsql-hackers по дате отправления:

Предыдущее
От: Robert Haas
Дата:
Сообщение: Re: Hybrid Hash/Nested Loop joins and caching results from subplans
Следующее
От: Tom Lane
Дата:
Сообщение: Re: Deprecating postfix and factorial operators in PostgreSQL 13