Index: doc/src/sgml/datatype.sgml =================================================================== RCS file: /projects/cvsroot/pgsql/doc/src/sgml/datatype.sgml,v retrieving revision 1.210 diff -p -u -c -r1.210 datatype.sgml *** doc/src/sgml/datatype.sgml 13 Oct 2007 23:06:26 -0000 1.210 --- doc/src/sgml/datatype.sgml 16 Oct 2007 15:00:21 -0000 *************** SELECT * FROM test; *** 3232,3237 **** --- 3232,3300 ---- + + Transaction Snapshot Type + + + txid_snapshot + + + + The data type txid_snapshot stores info about what + transaction ids are visible in a particular moment of time. + Components are described in + . + + + + Snapshot components + + + + Name + Query Function + Description + + + + + + xmin + txid_snapshot_xmin() + Smallest txid that may be active. Below it all txids are visible. + + + + xmax + txid_snapshot_xmax() + Next unassigned txid. Above it all txids are unassigned, thus invisible. + + + + xip_list + txid_snapshot_xip() + Active txids at the time of snapshot. All of them are between xmin and xmax. + A txid that is xmin <= txid < xmax and not in this list is visible. + + + + + +
+ + + Snapshot's textual representation is [xmin]:[xmax]:[xip_list] + for example 10:20:10,14,15 means + xmin=10 xmax=20 xip_list=10,14,15. + + + + Functions for getting and querying transaction ids and snapshots + are described in + . + +
+ <acronym>UUID</acronym> Type Index: doc/src/sgml/func.sgml =================================================================== RCS file: /projects/cvsroot/pgsql/doc/src/sgml/func.sgml,v retrieving revision 1.401 diff -p -u -c -r1.401 func.sgml *** doc/src/sgml/func.sgml 13 Oct 2007 23:06:26 -0000 1.401 --- doc/src/sgml/func.sgml 16 Oct 2007 15:00:22 -0000 *************** SELECT pg_type_is_visible('myschema.widg *** 12049,12054 **** --- 12049,12060 ---- as well. + + + + Transaction ID and snapshot Functions + + txid_current *************** SELECT pg_type_is_visible('myschema.widg *** 12126,12131 **** --- 12132,12389 ---- export a 64-bit format that is extended with an epoch counter so that it will not wrap around for the life of an installation. + + describes the txid_snapshot + type in more detail. + + + + Queue example + + The main use of the functions comes from the fact that user can + query txids that were committed between 2 snapshots. As this is + slightly tricky, it is described here in details on the example + of simple queue table. + + + Let there be queue data table, where several users can do INSERTs + in parallel, always attaching current transaction ID with event row: + + CREATE TABLE events ( + ev_id bigserial, + ev_txid int8 not null default txid_current(), + ev_data text + ); + CREATE INDEX ev_txid_idx ON events (ev_txid); + + + + Then let there be table for snapshots, into which a separate + process inserts a row with current snapshot after each 5 seconds + (for example). Lets call it 'ticks' table: + + CREATE TABLE ticks ( + tick_id serial PRIMARY KEY, + tick_snapshot txid_snapshopt NOT NULL DEFAULT txid_current_snapshot() + ); + + + + Now if someone wants to read events from the queue table, then at first + he needs to get 2 rows with snapshots from ticks + table, then query for txids that were committed between those + 2 snapshots on events table. + + + Because the txids and snapshots are tied to PostgreSQL internal + MVCC mechanism, the reader can be certain that the txid range + queried stays constant. + + + + + How to select txids that are between snapshots + + The simplest query how to fetch events between snapshots is following: + + SELECT ev_data FROM events + WHERE ev_txid >= txid_snapshot_xmin(:snap1) + AND ev_txid < txid_snapshot_xmax(:snap2) + AND NOT txid_visible_in_snapshot(ev_txid, :snap1) + AND txid_visible_in_snapshot(ev_txid, :snap2); + + + But it will have problems if there are long transactions running. That means + the snap1.xmin will stay at the position of running transaction and the + range will get very large. + + + This can be fixed by fetching only [snap1.xmax..snap2.xmax] + by range and fetching possible txids below snap1.xmax explicitly: + + + SELECT ev_data FROM events + WHERE ((ev_txid >= txid_snapshot_xmax(:snap1) AND ev_txid < txid_snapshot_xmax(:snap2)) + OR + (ev_txid IN (SELECT * FROM txid_snapshot_xip(:snap1)))) + AND NOT txid_visible_in_snapshot(ev_txid, :snap1) + AND txid_visible_in_snapshot(ev_txid, :snap2); + + + But that is also slightly inefficient as long transactions can be open during + several snapshots. So it would be good to pick out exact transactions that + were open at the time of snap1 and committed before snap2. That can be done + with following query: + + + SELECT id1 FROM txid_snapshot_xip(:snap1) id1 + LEFT JOIN txid_snapshot_xip(:snap2) id2 ON (id1 = id2) + WHERE id2 IS NULL + + + As txids returned by last query are certainly interesting, their visiblity + does not need additional checks. That means the final query can be in form: + + + SELECT ev_data FROM events + WHERE ((ev_txid >= txid_snapshot_xmax(:snap1) + AND ev_txid < txid_snapshot_xmax(:snap2) + AND NOT txid_visible_in_snapshot(ev_txid, :snap1) + AND txid_visible_in_snapshot(ev_txid, :snap2) + ) OR ev_txid IN (SELECT id1 FROM txid_snapshot_xip(:snap1) id1 + LEFT JOIN txid_snapshot_xip(:snap2) id2 ON (id1 = id2) + WHERE id2 IS NULL)); + + Although the above queries are technically correct, PostgreSQL fails to plan + them efficiently. The actual query should always be made with actual + values written in. + + + + + Realistic code + + Now the actual function that creates the fetch query can be written. + Here is additional optimization used - as most open transactions in + snap1 are near snap1.xmax, the range is + lowered to include nearby ones: + + + -- common helper, not tied to any table structure + CREATE FUNCTION txid_query_helper( + IN snap1 txid_snapshot, -- start snapshot + IN snap2 txid_snapshot, -- end snapshot + IN range_threshold int4, -- + OUT range_start int8, + OUT range_end int8, + OUT committed_list text) + RETURNS record AS $$ + DECLARE + tx int8; + BEGIN + -- Set initial range to [snap1.xmax .. snap2.xmax] + range_start = txid_snapshot_xmax(snap1); + range_end = txid_snapshot_xmax(snap2); + committed_list = NULL; + + IF range_end < range_start THEN + RAISE EXCEPTION 'wrong order for snapshots'; + END IF; + + FOR tx IN + -- Pick out txids that were actually committed between snapshots + SELECT id1 FROM txid_snapshot_xip(snap1) id1 + LEFT JOIN txid_snapshot_xip(snap2) id2 ON (id1 = id2) + WHERE id2 IS NULL + ORDER BY id1 DESC -- nearest to range_start first + LOOP + IF tx + range_threshold >= range_start THEN + -- If txid is nearby range_start include it in range + range_start = tx; + ELSE + -- Otherwise create list of txids that need to be picked + -- up by explicit ID. + IF committed_list IS NULL THEN + committed_list = tx; + ELSE + committed_list = committed_list || ',' || tx; + END IF; + END IF; + END LOOP; + RETURN; + END; + $$ LANGUAGE plpgsql STRICT IMMUTABLE; + + -- actual query maker, tied to example table structure + CREATE OR REPLACE FUNCTION make_events_fetch_query(tick1_id integer, tick2_id integer) + RETURNS text AS $$ + DECLARE + sql text; + snap1 txid_snapshot; + snap2 txid_snapshot; + BEGIN + SELECt t1.tick_snapshot, t2.tick_snapshot + INTO snap1, snap2 + FROM ticks t1, ticks t2 + WHERE t1.tick_id = tick1_id + AND t2.tick_id = tick2_id; + + SELECT 'SELECT ev_id, ev_data FROM events, ticks t1, ticks t2' + || ' WHERE t1.tick_id = ' || tick1_id || ' AND t2.tick_id = ' || tick2_id + || ' AND (' + || '(ev_txid >= ' || q.range_start + || ' AND ev_txid < ' || q.range_end + || ' AND txid_visible_in_snapshot(t1.tick_snapshot)' + || ' AND NOT txid_visible_in_snapshot(t2.tick_snapshot)' + || ')' + || coalesce(' OR ev_txid IN (' || q.committed_list || ')', '') + || ') ORDER BY ev_id' + INTO sql + FROM txid_query_helper(snap1, snap2, 10) q; + RETURN sql; + END; + $$ LANGUAGE plpgsql; + + + + + + More Realistic Code + + Here is show possible common helper function that creates + correct expression: + + -- common helper, not tied to any table structure + CREATE OR REPLACE FUNCTION txid_query_expr( + snap1 txid_snapshot, + snap2 txid_snapshot, + range_threshold integer, + txid_field text, + snap1_field text, + snap2_field text) + RETURNS text AS $$ + DECLARE + expr text; + BEGIN + SELECT '((' || txid_field || ' >= ' || q.range_start + || ' AND ' || txid_field || ' < ' || q.range_end + || ' AND txid_visible_in_snapshot(' || snap1_field || ')' + || ' AND NOT txid_visible_in_snapshot(' || snap2_field || ')' + || ')' + || coalesce(' OR ' || txid_field || ' IN (' || q.committed_list || ')', '') + || ')' + INTO expr + FROM txid_query_helper(snap1, snap2, range_threshold) q; + RETURN expr; + END; + $$ LANGUAGE plpgsql STRICT IMMUTABLE; + + -- actual query maker, tied to example table structure + CREATE OR REPLACE FUNCTION make_events_fetch_query2(tick1_id integer, tick2_id integer) + RETURNS text AS $$ + DECLARE + sql text; + expr text; + BEGIN + select txid_query_expr(t1.tick_snapshot, t2.tick_snapshot, 10, + 'ev_txid', 't1.tick_snapshot', 't2.tick_snapshot') + INTO expr + FROM ticks t1, ticks t2 + WHERE t1.tick_id = tick1_id + AND t2.tick_id = tick2_id; + + sql = 'SELECT ev_id, ev_data FROM events, ticks t1, ticks t2' + || ' WHERE t1.tick_id = ' || tick1_id + || ' AND t2.tick_id = ' || tick2_id + || ' AND ' || expr + || ' ORDER BY ev_id'; + + RETURN sql; + END; + $$ LANGUAGE plpgsql; + + +