Re: Joins on TID

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: Joins on TID
Дата
Msg-id 1853.1545453106@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Joins on TID  (Tom Lane <tgl@sss.pgh.pa.us>)
Ответы Re: Joins on TID  (Simon Riggs <simon@2ndquadrant.com>)
Список pgsql-hackers
BTW, if we're to start taking joins on TID seriously, we should also
add the missing hash opclass for TID, so that you can do hash joins
when dealing with a lot of rows.

(In principle this also enables things like hash aggregation, though
I'm not very clear on a use-case for grouping by TID.)

            regards, tom lane

diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c
index 41d540b..7b25947 100644
*** a/src/backend/utils/adt/tid.c
--- b/src/backend/utils/adt/tid.c
***************
*** 20,25 ****
--- 20,26 ----
  #include <math.h>
  #include <limits.h>

+ #include "access/hash.h"
  #include "access/heapam.h"
  #include "access/sysattr.h"
  #include "catalog/namespace.h"
*************** tidsmaller(PG_FUNCTION_ARGS)
*** 239,244 ****
--- 240,272 ----
      PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1, arg2) <= 0 ? arg1 : arg2);
  }

+ Datum
+ hashtid(PG_FUNCTION_ARGS)
+ {
+     ItemPointer key = PG_GETARG_ITEMPOINTER(0);
+
+     /*
+      * While you'll probably have a lot of trouble with a compiler that
+      * insists on appending pad space to struct ItemPointerData, we can at
+      * least make this code work, by not using sizeof(ItemPointerData).
+      * Instead rely on knowing the sizes of the component fields.
+      */
+     return hash_any((unsigned char *) key,
+                     sizeof(BlockIdData) + sizeof(OffsetNumber));
+ }
+
+ Datum
+ hashtidextended(PG_FUNCTION_ARGS)
+ {
+     ItemPointer key = PG_GETARG_ITEMPOINTER(0);
+     uint64        seed = PG_GETARG_INT64(1);
+
+     /* As above */
+     return hash_any_extended((unsigned char *) key,
+                              sizeof(BlockIdData) + sizeof(OffsetNumber),
+                              seed);
+ }
+

  /*
   *    Functions to get latest tid of a specified tuple.
diff --git a/src/include/catalog/pg_amop.dat b/src/include/catalog/pg_amop.dat
index e689c9b..436f1bd 100644
*** a/src/include/catalog/pg_amop.dat
--- b/src/include/catalog/pg_amop.dat
***************
*** 1013,1018 ****
--- 1013,1022 ----
  { amopfamily => 'hash/cid_ops', amoplefttype => 'cid', amoprighttype => 'cid',
    amopstrategy => '1', amopopr => '=(cid,cid)', amopmethod => 'hash' },

+ # tid_ops
+ { amopfamily => 'hash/tid_ops', amoplefttype => 'tid', amoprighttype => 'tid',
+   amopstrategy => '1', amopopr => '=(tid,tid)', amopmethod => 'hash' },
+
  # text_pattern_ops
  { amopfamily => 'hash/text_pattern_ops', amoplefttype => 'text',
    amoprighttype => 'text', amopstrategy => '1', amopopr => '=(text,text)',
diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat
index bbcee26..8ddb699 100644
*** a/src/include/catalog/pg_amproc.dat
--- b/src/include/catalog/pg_amproc.dat
***************
*** 340,345 ****
--- 340,349 ----
    amprocrighttype => 'cid', amprocnum => '1', amproc => 'hashint4' },
  { amprocfamily => 'hash/cid_ops', amproclefttype => 'cid',
    amprocrighttype => 'cid', amprocnum => '2', amproc => 'hashint4extended' },
+ { amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
+   amprocrighttype => 'tid', amprocnum => '1', amproc => 'hashtid' },
+ { amprocfamily => 'hash/tid_ops', amproclefttype => 'tid',
+   amprocrighttype => 'tid', amprocnum => '2', amproc => 'hashtidextended' },
  { amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text',
    amprocrighttype => 'text', amprocnum => '1', amproc => 'hashtext' },
  { amprocfamily => 'hash/text_pattern_ops', amproclefttype => 'text',
diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat
index 5178d04..c451d36 100644
*** a/src/include/catalog/pg_opclass.dat
--- b/src/include/catalog/pg_opclass.dat
***************
*** 167,172 ****
--- 167,174 ----
    opcintype => 'xid' },
  { opcmethod => 'hash', opcname => 'cid_ops', opcfamily => 'hash/cid_ops',
    opcintype => 'cid' },
+ { opcmethod => 'hash', opcname => 'tid_ops', opcfamily => 'hash/tid_ops',
+   opcintype => 'tid' },
  { opcmethod => 'hash', opcname => 'text_pattern_ops',
    opcfamily => 'hash/text_pattern_ops', opcintype => 'text',
    opcdefault => 'f' },
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat
index 2abd531..e8452e1 100644
*** a/src/include/catalog/pg_operator.dat
--- b/src/include/catalog/pg_operator.dat
***************
*** 204,212 ****
    oprrest => 'eqsel', oprjoin => 'eqjoinsel' },

  { oid => '387', oid_symbol => 'TIDEqualOperator', descr => 'equal',
!   oprname => '=', oprcanmerge => 't', oprleft => 'tid', oprright => 'tid',
!   oprresult => 'bool', oprcom => '=(tid,tid)', oprnegate => '<>(tid,tid)',
!   oprcode => 'tideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
  { oid => '402', descr => 'not equal',
    oprname => '<>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
    oprcom => '<>(tid,tid)', oprnegate => '=(tid,tid)', oprcode => 'tidne',
--- 204,213 ----
    oprrest => 'eqsel', oprjoin => 'eqjoinsel' },

  { oid => '387', oid_symbol => 'TIDEqualOperator', descr => 'equal',
!   oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'tid',
!   oprright => 'tid', oprresult => 'bool', oprcom => '=(tid,tid)',
!   oprnegate => '<>(tid,tid)', oprcode => 'tideq', oprrest => 'eqsel',
!   oprjoin => 'eqjoinsel' },
  { oid => '402', descr => 'not equal',
    oprname => '<>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
    oprcom => '<>(tid,tid)', oprnegate => '=(tid,tid)', oprcode => 'tidne',
diff --git a/src/include/catalog/pg_opfamily.dat b/src/include/catalog/pg_opfamily.dat
index fe8a324..c5ea37b 100644
*** a/src/include/catalog/pg_opfamily.dat
--- b/src/include/catalog/pg_opfamily.dat
***************
*** 112,117 ****
--- 112,119 ----
    opfmethod => 'hash', opfname => 'xid_ops' },
  { oid => '2226',
    opfmethod => 'hash', opfname => 'cid_ops' },
+ { oid => '2227',
+   opfmethod => 'hash', opfname => 'tid_ops' },
  { oid => '2229',
    opfmethod => 'hash', opfname => 'text_pattern_ops' },
  { oid => '2231',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index acb0154..6e1e1df 100644
*** a/src/include/catalog/pg_proc.dat
--- b/src/include/catalog/pg_proc.dat
***************
*** 2484,2489 ****
--- 2484,2495 ----
  { oid => '2796', descr => 'smaller of two',
    proname => 'tidsmaller', prorettype => 'tid', proargtypes => 'tid tid',
    prosrc => 'tidsmaller' },
+ { oid => '2233', descr => 'hash',
+   proname => 'hashtid', prorettype => 'int4', proargtypes => 'tid',
+   prosrc => 'hashtid' },
+ { oid => '2234', descr => 'hash',
+   proname => 'hashtidextended', prorettype => 'int8', proargtypes => 'tid int8',
+   prosrc => 'hashtidextended' },

  { oid => '1296',
    proname => 'timedate_pl', prolang => '14', prorettype => 'timestamp',
diff --git a/src/test/regress/expected/tidscan.out b/src/test/regress/expected/tidscan.out
index 521ed1b..6a8afcb 100644
*** a/src/test/regress/expected/tidscan.out
--- b/src/test/regress/expected/tidscan.out
*************** EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF
*** 176,179 ****
--- 176,223 ----
  UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
  ERROR:  cursor "c" is not positioned on a row
  ROLLBACK;
+ -- bulk joins on CTID
+ -- (these plans don't use TID scans, but this still seems like an
+ -- appropriate place for these tests)
+ EXPLAIN (COSTS OFF)
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+                QUERY PLAN
+ ----------------------------------------
+  Aggregate
+    ->  Hash Join
+          Hash Cond: (t1.ctid = t2.ctid)
+          ->  Seq Scan on tenk1 t1
+          ->  Hash
+                ->  Seq Scan on tenk1 t2
+ (6 rows)
+
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+  count
+ -------
+  10000
+ (1 row)
+
+ SET enable_hashjoin TO off;
+ EXPLAIN (COSTS OFF)
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+                QUERY PLAN
+ -----------------------------------------
+  Aggregate
+    ->  Merge Join
+          Merge Cond: (t1.ctid = t2.ctid)
+          ->  Sort
+                Sort Key: t1.ctid
+                ->  Seq Scan on tenk1 t1
+          ->  Sort
+                Sort Key: t2.ctid
+                ->  Seq Scan on tenk1 t2
+ (9 rows)
+
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+  count
+ -------
+  10000
+ (1 row)
+
+ RESET enable_hashjoin;
  DROP TABLE tidscan;
diff --git a/src/test/regress/sql/tidscan.sql b/src/test/regress/sql/tidscan.sql
index a8472e0..aa5c997 100644
*** a/src/test/regress/sql/tidscan.sql
--- b/src/test/regress/sql/tidscan.sql
*************** EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF
*** 63,66 ****
--- 63,78 ----
  UPDATE tidscan SET id = -id WHERE CURRENT OF c RETURNING *;
  ROLLBACK;

+ -- bulk joins on CTID
+ -- (these plans don't use TID scans, but this still seems like an
+ -- appropriate place for these tests)
+ EXPLAIN (COSTS OFF)
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+ SET enable_hashjoin TO off;
+ EXPLAIN (COSTS OFF)
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
+ RESET enable_hashjoin;
+
  DROP TABLE tidscan;

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Alexander Korotkov
Дата:
Сообщение: Re: pgsql: Check for conflicting queries during replay of gistvacuumpage()
Следующее
От: Simon Riggs
Дата:
Сообщение: Re: Joins on TID