BUG #19086: pg_dump --data-only selects and do not uses index definitions for the dumped tables.

Поиск
Список
Период
Сортировка
От PG Bug reporting form
Тема BUG #19086: pg_dump --data-only selects and do not uses index definitions for the dumped tables.
Дата
Msg-id 19086-871ff11017d03dc5@postgresql.org
обсуждение исходный текст
Ответы Re: BUG #19086: pg_dump --data-only selects and do not uses index definitions for the dumped tables.
Список pgsql-bugs
The following bug has been logged on the website:

Bug reference:      19086
Logged by:          Andrew Bille
Email address:      andrewbille@gmail.com
PostgreSQL version: 18.0
Operating system:   Debian 12
Description:

In case of system indexes corruption the collecting of index definitions can
take a really long time.

Synthetic example:

DO $$
DECLARE
    i INTEGER;
    j INTEGER;
BEGIN
    FOR i IN 1..15000 LOOP
        EXECUTE 'CREATE TABLE tab'  i  ' as SELECT 1 as f';
        FOR j IN 1..5 LOOP
            EXECUTE 'CREATE index idx_tab'  i  '_'  j   ' ON tab'  i  '(f)';
        END LOOP;
    END LOOP;
END;
$$;

If
ignore_system_indexes = on

time pg_dump --data-only test > test.sql

real    62m44,582s
user    0m0,576s
sys     0m0,259s

of which
LOG:  duration: 3474423.683 ms  statement: SELECT t.tableoid, t.oid,
i.indrelid, t.relname AS indexname, t.relpages, t.reltuples,
t.relallvisible, 0 AS relallfrozen, pg_catalog.pg_get_indexdef(i.indexrelid)
AS indexdef, i.indkey, i.indisclustered, c.contype, c.conname,
c.condeferrable, c.condeferred, c.tableoid AS contableoid, c.oid AS conoid,
pg_catalog.pg_get_constraintdef(c.oid, false) AS condef, CASE WHEN
i.indexprs IS NOT NULL THEN (SELECT pg_catalog.array_agg(attname ORDER BY
attnum)  FROM pg_catalog.pg_attribute   WHERE attrelid = i.indexrelid) ELSE
NULL END AS indattnames, (SELECT spcname FROM pg_catalog.pg_tablespace s
WHERE s.oid = t.reltablespace) AS tablespace, t.reloptions AS indreloptions,
i.indisreplident, inh.inhparent AS parentidx, i.indnkeyatts AS indnkeyatts,
i.indnatts AS indnatts, (SELECT pg_catalog.array_agg(attnum ORDER BY attnum)
FROM pg_catalog.pg_attribute   WHERE attrelid = i.indexrelid AND
attstattarget >= 0) AS indstatcols, (SELECT
pg_catalog.array_agg(attstattarget ORDER BY attnum)   FROM
pg_catalog.pg_attribute   WHERE attrelid = i.indexrelid AND
attstattarget >= 0) AS indstatvals, i.indnullsnotdistinct, NULL AS conperiod
FROM

unnest('{16385,16393,16401,16409,16417,16425,16433,16441,16449,16457,16465,16473,16481,16489,16497,16505,16513,16521,16529,16537,16545,16553,16561,16569,16577,16585,16593,16601,16609,16617,16625,16633,16641,16649,16657,16665,16673,16681,16689,16697,16705,16713,16721,16729,16737,16745,16753,16761,16769,16777,16785,16793,16801,16809,16817,16825,16833,16841,16849,16857,16865,16873,16881,16889,16897,16905,16913,16921,16929,16937,16945,16953,16961,16969,16977,16985,16993,17001,17009,17017,17025,17033,17041,17049,17057,17065,17073,17081,17089,17097,17105,17113,17121,17129,17137,17145,17153,17161,17169,17177,17185,17193,17201,17209,17217
... 36361,136369,136377}'::pg_catalog.oid[]) AS src(tbloid)
        JOIN pg_catalog.pg_index i ON (src.tbloid = i.indrelid) JOIN
pg_catalog.pg_class t ON (t.oid = i.indexrelid) JOIN pg_catalog.pg_class t2
ON (t2.oid = i.indrelid) LEFT JOIN pg_catalog.pg_constraint c ON (i.indrelid
= c.conrelid AND i.indexrelid = c.conindid AND c.contype IN ('p','u','x'))
LEFT JOIN pg_catalog.pg_inherits inh ON (inh.inhrelid = indexrelid) WHERE
(i.indisvalid OR t2.relkind = 'p') AND i.indisready ORDER BY i.indrelid,
indexname

With a simple patch (passes tests)

diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index a1976fae607..471e62da735 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -95,7 +95,7 @@ static IndxInfo *findIndexByOid(Oid oid);
  *       Collect information about all potentially dumpable objects
  */
 TableInfo *
-getSchemaData(Archive *fout, int *numTablesPtr)
+getSchemaData(Archive *fout, int *numTablesPtr, bool dataOnly)
 {
        TableInfo  *tblinfo;
        ExtensionInfo *extinfo;
@@ -211,11 +211,14 @@ getSchemaData(Archive *fout, int *numTablesPtr)
        pg_log_info("reading partitioning data");
        getPartitioningInfo(fout);

-       pg_log_info("reading indexes");
-       getIndexes(fout, tblinfo, numTables);
+       if (!dataOnly)
+       {
+               pg_log_info("reading indexes");
+               getIndexes(fout, tblinfo, numTables);

-       pg_log_info("flagging indexes in partitioned tables");
-       flagInhIndexes(fout, tblinfo, numTables);
+               pg_log_info("flagging indexes in partitioned tables");
+               flagInhIndexes(fout, tblinfo, numTables);
+       }

        pg_log_info("reading extended statistics");
        getExtendedStatistics(fout);
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 641bece12c7..ef8bd786371 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -1090,7 +1090,7 @@ main(int argc, char **argv)
         * Now scan the database and create DumpableObject structs for all
the
         * objects we intend to dump.
         */
-       tblinfo = getSchemaData(fout, &numTables);
+       tblinfo = getSchemaData(fout, &numTables, data_only);

        if (dopt.dumpData)
        {
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index fa6d1a510f7..83e097404a3 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -750,7 +750,7 @@ typedef struct _SubRelInfo
  *     common utility functions
  */

-extern TableInfo *getSchemaData(Archive *fout, int *numTablesPtr);
+extern TableInfo *getSchemaData(Archive *fout, int *numTablesPtr, bool
dataOnly);

 extern void AssignDumpId(DumpableObject *dobj);
 extern void recordAdditionalCatalogID(CatalogId catId, DumpableObject
*dobj);


we have:

time pg_dump --data-only test > test.sql

real    7m45,533s
user    0m0,726s
sys     0m0,634s

In the real case of system indexes corruption ... dump can take enourmous
amount of time.


В списке pgsql-bugs по дате отправления: