Re: Proposal: Conflict log history table for Logical Replication

Поиск
Список
Период
Сортировка
От Peter Smith
Тема Re: Proposal: Conflict log history table for Logical Replication
Дата
Msg-id CAHut+PsyXRzHcYxJ38T-5o98mKF=CrWd0LpZrQiZz8_95We1UQ@mail.gmail.com
обсуждение исходный текст
Ответ на Re: Proposal: Conflict log history table for Logical Replication  (vignesh C <vignesh21@gmail.com>)
Список pgsql-hackers
Hi Dilip.

Some review comments for patch v19-0001.

======
GENERAL

0. Apply warnings due to whitespace

$ git apply ../patches_misc/v19-0001-Add-configurable-conflict-log-table-for-Logical-.patch
../patches_misc/v19-0001-Add-configurable-conflict-log-table-for-Logical-.patch:821:
trailing whitespace.
 * Internally, we use these for bitwise comparisons (IsSet), but the string
warning: 1 line adds whitespace errors.

======
src/backend/catalog/namespace.c

CheckSetNamespace:

1.
Do you also need to update the function comment? Currently, it
mentions all the other schemas but not the PG_CONFLICT schema.

======
src/backend/commands/subscriptioncmds.c

parse_subscription_options:

2.
+ char *val;
+ ConflictLogDest dest;
+
+ if (IsSet(opts->specified_opts, SUBOPT_CONFLICT_LOG_DEST))
+ errorConflictingDefElem(defel, pstate);
+
+ val = defGetString(defel);
+ dest = GetLogDestination(val);
+ opts->logdest = dest;

Is there any purpose for the variable 'dest'?

SUGGESTION
opts->logdest = GetLogDestination(val);

~~~

CreateSubscription:

3.
+ /*
+ * If logging to a table is required, physically create the logging
+ * relation and store its OID in the catalog.
+ */
+ if (opts.logdest == CONFLICT_LOG_DEST_TABLE ||
+ opts.logdest == CONFLICT_LOG_DEST_ALL)

Those enum values are bits. This condition ought to use bitwise logic.

SUGGESTION
if (IsSet(opts.logdest, CONFLICT_LOG_DEST_TABLE))

~~~

4.
+ /* Destination is 'log'; no table is needed. */
+ values[Anum_pg_subscription_subconflictlogrelid - 1] =
+ ObjectIdGetDatum(InvalidOid);

This comment does not need to say "Destination is 'log'". And, one day
in the future, there might be more types, and then this comment would
be wrong.

SUGGESTION
/* No conflict log table is needed. */

~~~

5.
+ /*
+ * If logging to a table is required, physically create the logging
+ * relation and store its OID in the catalog.
+ */
+ if (opts.logdest == CONFLICT_LOG_DEST_TABLE ||
+ opts.logdest == CONFLICT_LOG_DEST_ALL)
+ {
+ Oid     logrelid;
+
+ /* Store the Oid returned from creation. */
+ logrelid = create_conflict_log_table(subid, stmt->subname);
+ values[Anum_pg_subscription_subconflictlogrelid - 1] =
+ ObjectIdGetDatum(logrelid);
+ }
+ else
+ {
+ /* Destination is 'log'; no table is needed. */
+ values[Anum_pg_subscription_subconflictlogrelid - 1] =
+ ObjectIdGetDatum(InvalidOid);
+ }

In fact, isn't it simpler to remove that 'else' entirely, leaving just
one values[] assignment?

SUGGESTION:
Oid logrelid = InvalidOid;
if (...)
{
 logrelid = create_conflict_log_table(...);
}
values[...] = ObjectIdGetDatum(logrelid);

~~~

create_conflict_log_table:

6.
+/*
+ * Create a structured conflict log table for a subscription.
+ *
+ * The table is created within the dedicated 'pg_conflict' namespace, which
+ * is system-managed.  The table name is generated automatically using the
+ * subscription's OID (e.g., "pg_conflict_<subid>") to ensure uniqueness
+ * within the cluster and to avoid collisions during subscription renames.
+ */
+static Oid
+create_conflict_log_table(Oid subid, char *subname)

IMO, the sentence "The table is created..." is ambiguous. e.g. What is
it trying to say is "system-managed" -- the table, or the namespace?

~~~

7.
+ relid = heap_create_with_catalog(relname,
+ PG_CONFLICT_NAMESPACE,
+ 0,
+ InvalidOid,
+ InvalidOid,
+ InvalidOid,
+ GetUserId(),
+ HEAP_TABLE_AM_OID,
+ tupdesc,
+ NIL,
+ RELKIND_RELATION,
+ RELPERSISTENCE_PERMANENT,
+ false,
+ false,
+ ONCOMMIT_NOOP,
+ (Datum) 0,
+ false,
+ true,
+ false,
+ InvalidOid,
+ NULL);

I saw this is similar to other heap_create_with_catalog() calls in the
PG source, but all the same, IMO, it's impossible to understand the
intent of all those true/false/InvalidOid params. Can you give a brief
comment per param line to say what they are for?

SUGGESTION
+ relid = heap_create_with_catalog(relname,
+ PG_CONFLICT_NAMESPACE,
+ 0, /* tablespace */
+ InvalidOid, /* relid */
+ InvalidOid, /* reltypeid */
+ InvalidOid, /* reloftypeid */
+ GetUserId(),
+ HEAP_TABLE_AM_OID,
+ tupdesc,
+ NIL,
+ RELKIND_RELATION,
+ RELPERSISTENCE_PERMANENT,
+ false, /* shared_relation */
+ false, /* mapped_relation */
+ ONCOMMIT_NOOP,
+ (Datum) 0, /* reloptions */
+ false, /* use_user_acl */
+ true, /* allow_system_table_mods */
+ false, /* is_internal */
+ InvalidOid, /* relrewrite */
+ NULL); /* typaddress */

e.g. Now that we can see better what those parameters are for, I
wonder why 'is_internal' is being passed as false?

~~~

GetLogDestination:

8.
+/*
+ * GetLogDestination
+ *
+ * Convert string to enum by comparing against standardized labels.
+ */
+ConflictLogDest
+GetLogDestination(const char *dest)
+{
+ /* Empty string or NULL defaults to LOG. */
+ if (dest == NULL || dest[0] == '\0' || pg_strcasecmp(dest, "log") == 0)
+ return CONFLICT_LOG_DEST_LOG;
+
+ if (pg_strcasecmp(dest,
+   ConflictLogDestNames[CONFLICT_LOG_DEST_TABLE]) == 0)
+ return CONFLICT_LOG_DEST_TABLE;
+
+ if (pg_strcasecmp(dest, ConflictLogDestNames[CONFLICT_LOG_DEST_ALL]) == 0)
+ return CONFLICT_LOG_DEST_ALL;
+
+ /* Unrecognized string. */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized conflict_log_destination value: \"%s\"", dest),
+ errhint("Valid values are \"log\", \"table\", and \"all\".")));
+}

I felt this function has a weird mixture of name lookups from
ConflictLogDestNames[] alongside hardwired strings for "log", "table",
and "all".

If you are going to hardwire any of them, you might as well hardwire
all of them -- IMO it's more readable too.

And, if there is any concern about those hardwired strings being
correct, then just introduce some asserts to ensure everything is
sane.

SUGGESTION
Assert(pg_strcasecmp("log", ConflictLogDestNames[CONFLICT_LOG_DEST_LOG]) == 0);
Assert(pg_strcasecmp("table",
ConflictLogDestNames[CONFLICT_LOG_DEST_TABLE]) == 0);
Assert(pg_strcasecmp("all", ConflictLogDestNames[CONFLICT_LOG_DEST_ALL]) == 0);
...
if (pg_strcasecmp(dest, "table") == 0)
 return CONFLICT_LOG_DEST_TABLE;

if (pg_strcasecmp(dest, "all") == 0)
 return CONFLICT_LOG_DEST_ALL;

~~~

IsConflictLogTable:

9.
+/*
+ * Check if the specified relation is used as a conflict log table by any
+ * subscription.
+ */
+bool
+IsConflictLogTable(Oid relid)
+{
+ Relation        rel;
+ TableScanDesc   scan;
+ HeapTuple       tup;
+ bool            is_clt = false;
+
+ rel = table_open(SubscriptionRelationId, AccessShareLock);
+ scan = table_beginscan_catalog(rel, 0, NULL);
+
+ while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))

This seemed like excessive logic to me -- why do we need to scan
through the subscriptions?

Doesn't the very fact that the table lives in the 'pg_conflict'
namespace mean that it *must* (by definition) be a conflict log table?
Therefore, isn't the namespace of this relid the only thing you need
to discover to know if it is a CLT or not?

======
src/include/catalog/pg_namespace.dat

10.
+{ oid => '1382', oid_symbol => 'PG_CONFLICT_NAMESPACE',
+  descr => 'reserved schema for subscription-specific conflict tables',
+  nspname => 'pg_conflict', nspacl => '_null_' },

Maybe better to use the same terminology as everywhere else:
/conflict tables/conflict log tables/

======
src/include/replication/conflict.h

11.
+typedef enum ConflictLogDest
+{
+ /* Log conflicts to the server logs */
+ CONFLICT_LOG_DEST_LOG   = 1 << 0,   /* 0x01 */
+
+ /* Log conflicts to an internally managed table */
+ CONFLICT_LOG_DEST_TABLE = 1 << 1,   /* 0x02 */
+
+ /* Convenience flag for all supported destinations */
+ CONFLICT_LOG_DEST_ALL   = (CONFLICT_LOG_DEST_LOG | CONFLICT_LOG_DEST_TABLE)
+} ConflictLogDest;

To emphasise that these enums are really bitmasks, I felt it would be
helpful to name them like:

CONFLICT_LOG_DEST_BIT_LOG
CONFLICT_LOG_DEST_BIT_TABLE
CONFLICT_LOG_DEST_BM_ALL

~~~

12.
+/* Define the count using the array size */
+#define MAX_CONFLICT_ATTR_NUM (sizeof(ConflictLogSchema) /
sizeof(ConflictLogSchema[0]))

Is that comment useful? The code is saying the same thing quite clearly.

Anyway, would it be better to write it as:
#define MAX_CONFLICT_ATTR_NUM lengthof(ConflictLogSchema)

======
src/test/regress/sql/subscription.sql

13.
+-- verify the physical table exists and its OID matches subconflictlogrelid
+SELECT count(*)
+FROM pg_class c
+JOIN pg_subscription s ON c.relname = 'pg_conflict_' || s.oid
+WHERE s.subname = 'regress_conflict_test1' AND c.oid = s.subconflictlogrelid;

Somewhere nearby this test, should you also verify that the CLT was
created within the 'pg_conflict' namespace?

~~~

14.
+-- ensure drop table not allowed and DROP SUBSCRIPTION reaps the table

IIUC, this was the comment that introduces the next group of "reaping"
tests, so this should also be a bit group comment like the one for the
state transitions.

~~~

15.
+-- PUBLICATION: Verify internal tables are not publishable
+-- pg_relation_is_publishable should return false for internal
conflict log tables
+SELECT pg_relation_is_publishable(subconflictlogrelid)
+FROM pg_subscription WHERE subname = 'regress_conflict_test1';

This test case seemed buried among some of the other "reaping" tests.
It should be moved outside of this group.

~~~

16.
+DROP SCHEMA clt;

There is no such schema. This 'clt' looks like a hangover from old
patch tests, so it should be removed.

======
Kind Regards,
Peter Smith.
Fujitsu Australia



В списке pgsql-hackers по дате отправления: