diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index ca074da..fff1fdd 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -6,6 +6,62 @@ * This module is a pg_clog-like system that stores the commit timestamp * for each transaction. * + * When track_commit_timestamp is enabled, this module will keep track of + * commit timestamp for each transaction. It also provides API to for + * optionally storing nodeid (origin) of each transaction. The main purpose of + * this functionality is to help with conflict detection and resolution for + * replication systems. + * + * The following example shows how to use the API provided by this module, to + * handle UPDATE conflicts coming from replication stream: + * void + * update_tuple(Relation relation, HeapTuple remote_tuple, + * TimestampTz remote_commit_ts, CommitTsNodeId remote_node_id) + * { + * bool exists; + * HeapTupleData local_tuple; + * + * // Find existing tuple with same PK/unique index combination. + * exists = find_local_tuple(relation, remote_tuple, &local_tuple); + * + * // The tuple was found. + * if (exists) + * { + * TransactionId xmin; + * TimestampTz local_commit_ts; + * CommitTsNodeId local_node_id; + * + * xmin = HeapTupleHeaderGetXmin(local_tuple.t_data); + * TransactionIdGetCommitTsData(xmin, &local_commit_ts, &nodeid); + * + * // New tuple is coming from different node than the locally saved + * // tuple and the remote commit timestamp is older than local commit + * // timestamp, this is UPDATE/UPDATE conflict (node being UPDATEd on + * // different nodes at the same time. + * if (remote_id != local_node_id && remote_commit_ts <= local_commit_ts) + * { + * if (remote_commit_ts < local_commit_ts) + * return; // Keep the local tuple. + * + * // Handle the conflict in a consistent manner. + * } + * else + * { + * // The new tuple either comes from same node as old tuple and/or + * // is has newer commit timestamp than the local tuple, apply the + * // UPDATE. + * } + * } + * else + * { + * // Tuple not found (possibly UPDATE/DELETE conflict), handle it + * // in a consistent manner. + * } + * } + * + * See default_node_id and CommitTsSetDefaultNodeId for explanation of how to + * set nodeid when applying transactions. + * * XLOG interactions: this module generates an XLOG record whenever a new * CommitTs page is initialized to zeroes. Also, one XLOG record is * generated for setting of values when the caller requests it; this allows @@ -49,6 +105,15 @@ */ /* + * CommitTimestampEntry + * + * Record containing information about the transaction commit timestamp and + * the nodeid. + * + * The nodeid provides IO efficient way for replication systems to store + * information about origin of the transaction. Currently the nodeid is opaque + * value meaning of which is defined by the replication system itself. + * * We need 8+4 bytes per xact. Note that enlarging this struct might mean * the largest possible file name is more than 5 chars long; see * SlruScanDirectory. @@ -93,6 +158,21 @@ CommitTimestampShared *commitTsShared; /* GUC variable */ bool track_commit_timestamp; +/* + * The default_node_id will be written to commit timestamp record for + * every transaction committed by the current backend. + * + * The idea behind having default_node_id is that replication system + * will have connection (backend running) for specific source of replicated + * data to which it assigned the nodeid for identification purposes. + * + * The default_node_id setting stays in effect for duration of a session + * (unless changed again), so the replication system does not have to set + * the nodeid individually for each transaction. + * + * The public access to the default_node_id is provided by + * CommitTsSetDefaultNodeId and CommitTsGetDefaultNodeId interfaces. + */ static CommitTsNodeId default_node_id = InvalidCommitTsNodeId; static void SetXidCommitTsInPage(TransactionId xid, int nsubxids, @@ -112,7 +192,7 @@ static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids, /* * CommitTsSetDefaultNodeId * - * Set default nodeid for current backend. + * Set default nodeid for current session. */ void CommitTsSetDefaultNodeId(CommitTsNodeId nodeid) @@ -123,7 +203,7 @@ CommitTsSetDefaultNodeId(CommitTsNodeId nodeid) /* * CommitTsGetDefaultNodeId * - * Set default nodeid for current backend. + * Get current default nodeid. */ CommitTsNodeId CommitTsGetDefaultNodeId(void)