Index: doc/src/sgml/datatype.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/datatype.sgml,v
retrieving revision 1.210
diff -p -u -c -r1.210 datatype.sgml
*** doc/src/sgml/datatype.sgml 13 Oct 2007 23:06:26 -0000 1.210
--- doc/src/sgml/datatype.sgml 16 Oct 2007 15:00:21 -0000
*************** SELECT * FROM test;
*** 3232,3237 ****
--- 3232,3300 ----
+
+ Transaction Snapshot Type
+
+
+ txid_snapshot
+
+
+
+ The data type txid_snapshot stores info about what
+ transaction ids are visible in a particular moment of time.
+ Components are described in
+ .
+
+
+
+ Snapshot components
+
+
+
+ Name
+ Query Function
+ Description
+
+
+
+
+
+ xmin
+ txid_snapshot_xmin()
+ Smallest txid that may be active. Below it all txids are visible.
+
+
+
+ xmax
+ txid_snapshot_xmax()
+ Next unassigned txid. Above it all txids are unassigned, thus invisible.
+
+
+
+ xip_list
+ txid_snapshot_xip()
+ Active txids at the time of snapshot. All of them are between xmin and xmax.
+ A txid that is xmin <= txid < xmax and not in this list is visible.
+
+
+
+
+
+
+
+
+ Snapshot's textual representation is [xmin]:[xmax]:[xip_list]
+ for example 10:20:10,14,15 means
+ xmin=10 xmax=20 xip_list=10,14,15.
+
+
+
+ Functions for getting and querying transaction ids and snapshots
+ are described in
+ .
+
+
+
UUID Type
Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.401
diff -p -u -c -r1.401 func.sgml
*** doc/src/sgml/func.sgml 13 Oct 2007 23:06:26 -0000 1.401
--- doc/src/sgml/func.sgml 16 Oct 2007 15:00:22 -0000
*************** SELECT pg_type_is_visible('myschema.widg
*** 12049,12054 ****
--- 12049,12060 ----
as well.
+
+
+
+ Transaction ID and snapshot Functions
+
+
txid_current
*************** SELECT pg_type_is_visible('myschema.widg
*** 12126,12131 ****
--- 12132,12389 ----
export a 64-bit format that is extended with an epoch> counter
so that it will not wrap around for the life of an installation.
+
+ describes the txid_snapshot
+ type in more detail.
+
+
+
+ Queue example
+
+ The main use of the functions comes from the fact that user can
+ query txids that were committed between 2 snapshots. As this is
+ slightly tricky, it is described here in details on the example
+ of simple queue table.
+
+
+ Let there be queue data table, where several users can do INSERTs
+ in parallel, always attaching current transaction ID with event row:
+
+ CREATE TABLE events (
+ ev_id bigserial,
+ ev_txid int8 not null default txid_current(),
+ ev_data text
+ );
+ CREATE INDEX ev_txid_idx ON events (ev_txid);
+
+
+
+ Then let there be table for snapshots, into which a separate
+ process inserts a row with current snapshot after each 5 seconds
+ (for example). Lets call it 'ticks' table:
+
+ CREATE TABLE ticks (
+ tick_id serial PRIMARY KEY,
+ tick_snapshot txid_snapshopt NOT NULL DEFAULT txid_current_snapshot()
+ );
+
+
+
+ Now if someone wants to read events from the queue table, then at first
+ he needs to get 2 rows with snapshots from ticks
+ table, then query for txids that were committed between those
+ 2 snapshots on events table.
+
+
+ Because the txids and snapshots are tied to PostgreSQL internal
+ MVCC mechanism, the reader can be certain that the txid range
+ queried stays constant.
+
+
+
+
+ How to select txids that are between snapshots
+
+ The simplest query how to fetch events between snapshots is following:
+
+ SELECT ev_data FROM events
+ WHERE ev_txid >= txid_snapshot_xmin(:snap1)
+ AND ev_txid < txid_snapshot_xmax(:snap2)
+ AND NOT txid_visible_in_snapshot(ev_txid, :snap1)
+ AND txid_visible_in_snapshot(ev_txid, :snap2);
+
+
+ But it will have problems if there are long transactions running. That means
+ the snap1.xmin will stay at the position of running transaction and the
+ range will get very large.
+
+
+ This can be fixed by fetching only [snap1.xmax..snap2.xmax]
+ by range and fetching possible txids below snap1.xmax explicitly:
+
+
+ SELECT ev_data FROM events
+ WHERE ((ev_txid >= txid_snapshot_xmax(:snap1) AND ev_txid < txid_snapshot_xmax(:snap2))
+ OR
+ (ev_txid IN (SELECT * FROM txid_snapshot_xip(:snap1))))
+ AND NOT txid_visible_in_snapshot(ev_txid, :snap1)
+ AND txid_visible_in_snapshot(ev_txid, :snap2);
+
+
+ But that is also slightly inefficient as long transactions can be open during
+ several snapshots. So it would be good to pick out exact transactions that
+ were open at the time of snap1 and committed before snap2. That can be done
+ with following query:
+
+
+ SELECT id1 FROM txid_snapshot_xip(:snap1) id1
+ LEFT JOIN txid_snapshot_xip(:snap2) id2 ON (id1 = id2)
+ WHERE id2 IS NULL
+
+
+ As txids returned by last query are certainly interesting, their visiblity
+ does not need additional checks. That means the final query can be in form:
+
+
+ SELECT ev_data FROM events
+ WHERE ((ev_txid >= txid_snapshot_xmax(:snap1)
+ AND ev_txid < txid_snapshot_xmax(:snap2)
+ AND NOT txid_visible_in_snapshot(ev_txid, :snap1)
+ AND txid_visible_in_snapshot(ev_txid, :snap2)
+ ) OR ev_txid IN (SELECT id1 FROM txid_snapshot_xip(:snap1) id1
+ LEFT JOIN txid_snapshot_xip(:snap2) id2 ON (id1 = id2)
+ WHERE id2 IS NULL));
+
+ Although the above queries are technically correct, PostgreSQL fails to plan
+ them efficiently. The actual query should always be made with actual
+ values written in.
+
+
+
+
+ Realistic code
+
+ Now the actual function that creates the fetch query can be written.
+ Here is additional optimization used - as most open transactions in
+ snap1 are near snap1.xmax, the range is
+ lowered to include nearby ones:
+
+
+ -- common helper, not tied to any table structure
+ CREATE FUNCTION txid_query_helper(
+ IN snap1 txid_snapshot, -- start snapshot
+ IN snap2 txid_snapshot, -- end snapshot
+ IN range_threshold int4, --
+ OUT range_start int8,
+ OUT range_end int8,
+ OUT committed_list text)
+ RETURNS record AS $$
+ DECLARE
+ tx int8;
+ BEGIN
+ -- Set initial range to [snap1.xmax .. snap2.xmax]
+ range_start = txid_snapshot_xmax(snap1);
+ range_end = txid_snapshot_xmax(snap2);
+ committed_list = NULL;
+
+ IF range_end < range_start THEN
+ RAISE EXCEPTION 'wrong order for snapshots';
+ END IF;
+
+ FOR tx IN
+ -- Pick out txids that were actually committed between snapshots
+ SELECT id1 FROM txid_snapshot_xip(snap1) id1
+ LEFT JOIN txid_snapshot_xip(snap2) id2 ON (id1 = id2)
+ WHERE id2 IS NULL
+ ORDER BY id1 DESC -- nearest to range_start first
+ LOOP
+ IF tx + range_threshold >= range_start THEN
+ -- If txid is nearby range_start include it in range
+ range_start = tx;
+ ELSE
+ -- Otherwise create list of txids that need to be picked
+ -- up by explicit ID.
+ IF committed_list IS NULL THEN
+ committed_list = tx;
+ ELSE
+ committed_list = committed_list || ',' || tx;
+ END IF;
+ END IF;
+ END LOOP;
+ RETURN;
+ END;
+ $$ LANGUAGE plpgsql STRICT IMMUTABLE;
+
+ -- actual query maker, tied to example table structure
+ CREATE OR REPLACE FUNCTION make_events_fetch_query(tick1_id integer, tick2_id integer)
+ RETURNS text AS $$
+ DECLARE
+ sql text;
+ snap1 txid_snapshot;
+ snap2 txid_snapshot;
+ BEGIN
+ SELECt t1.tick_snapshot, t2.tick_snapshot
+ INTO snap1, snap2
+ FROM ticks t1, ticks t2
+ WHERE t1.tick_id = tick1_id
+ AND t2.tick_id = tick2_id;
+
+ SELECT 'SELECT ev_id, ev_data FROM events, ticks t1, ticks t2'
+ || ' WHERE t1.tick_id = ' || tick1_id || ' AND t2.tick_id = ' || tick2_id
+ || ' AND ('
+ || '(ev_txid >= ' || q.range_start
+ || ' AND ev_txid < ' || q.range_end
+ || ' AND txid_visible_in_snapshot(t1.tick_snapshot)'
+ || ' AND NOT txid_visible_in_snapshot(t2.tick_snapshot)'
+ || ')'
+ || coalesce(' OR ev_txid IN (' || q.committed_list || ')', '')
+ || ') ORDER BY ev_id'
+ INTO sql
+ FROM txid_query_helper(snap1, snap2, 10) q;
+ RETURN sql;
+ END;
+ $$ LANGUAGE plpgsql;
+
+
+
+
+
+ More Realistic Code
+
+ Here is show possible common helper function that creates
+ correct expression:
+
+ -- common helper, not tied to any table structure
+ CREATE OR REPLACE FUNCTION txid_query_expr(
+ snap1 txid_snapshot,
+ snap2 txid_snapshot,
+ range_threshold integer,
+ txid_field text,
+ snap1_field text,
+ snap2_field text)
+ RETURNS text AS $$
+ DECLARE
+ expr text;
+ BEGIN
+ SELECT '((' || txid_field || ' >= ' || q.range_start
+ || ' AND ' || txid_field || ' < ' || q.range_end
+ || ' AND txid_visible_in_snapshot(' || snap1_field || ')'
+ || ' AND NOT txid_visible_in_snapshot(' || snap2_field || ')'
+ || ')'
+ || coalesce(' OR ' || txid_field || ' IN (' || q.committed_list || ')', '')
+ || ')'
+ INTO expr
+ FROM txid_query_helper(snap1, snap2, range_threshold) q;
+ RETURN expr;
+ END;
+ $$ LANGUAGE plpgsql STRICT IMMUTABLE;
+
+ -- actual query maker, tied to example table structure
+ CREATE OR REPLACE FUNCTION make_events_fetch_query2(tick1_id integer, tick2_id integer)
+ RETURNS text AS $$
+ DECLARE
+ sql text;
+ expr text;
+ BEGIN
+ select txid_query_expr(t1.tick_snapshot, t2.tick_snapshot, 10,
+ 'ev_txid', 't1.tick_snapshot', 't2.tick_snapshot')
+ INTO expr
+ FROM ticks t1, ticks t2
+ WHERE t1.tick_id = tick1_id
+ AND t2.tick_id = tick2_id;
+
+ sql = 'SELECT ev_id, ev_data FROM events, ticks t1, ticks t2'
+ || ' WHERE t1.tick_id = ' || tick1_id
+ || ' AND t2.tick_id = ' || tick2_id
+ || ' AND ' || expr
+ || ' ORDER BY ev_id';
+
+ RETURN sql;
+ END;
+ $$ LANGUAGE plpgsql;
+
+
+