Re: Index-only scans vs. partially-retrievable indexes

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: Index-only scans vs. partially-retrievable indexes
Дата
Msg-id 3443352.1641165743@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Index-only scans vs. partially-retrievable indexes  (Tom Lane <tgl@sss.pgh.pa.us>)
Список pgsql-hackers
I wrote:
> Unless somebody's got a better idea, I'll push forward with making
> this happen.

Here's a proposed patch for that.  I ended up reverting the code
changes of 4ace45677 in favor of an alternative I'd considered
previously, which is to mark the indextlist elements as resjunk
if they're non-retrievable, and then make setrefs.c deal with
not relying on those elements.  This avoids the EXPLAIN breakage
I showed, since now we still have the indextlist elements needed
to interpret the indexqual and indexorderby expressions.

0001 is what I propose to back-patch (modulo putting the new
IndexOnlyScan.recheckqual field at the end, in the back branches).

In addition, it seems to me that we can simplify check_index_only()
by reverting b5febc1d1's changes, because we've now been forced to
put in a non-half-baked solution for the problem it addressed.
That's 0002 attached.  I'd be inclined not to back-patch that though.

            regards, tom lane

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 09f5253abb..60d0d4ad0f 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1746,7 +1746,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
         case T_IndexOnlyScan:
             show_scan_qual(((IndexOnlyScan *) plan)->indexqual,
                            "Index Cond", planstate, ancestors, es);
-            if (((IndexOnlyScan *) plan)->indexqual)
+            if (((IndexOnlyScan *) plan)->recheckqual)
                 show_instrumentation_count("Rows Removed by Index Recheck", 2,
                                            planstate, es);
             show_scan_qual(((IndexOnlyScan *) plan)->indexorderby,
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 0754e28a9a..8fee958135 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -214,13 +214,11 @@ IndexOnlyNext(IndexOnlyScanState *node)

         /*
          * If the index was lossy, we have to recheck the index quals.
-         * (Currently, this can never happen, but we should support the case
-         * for possible future use, eg with GiST indexes.)
          */
         if (scandesc->xs_recheck)
         {
             econtext->ecxt_scantuple = slot;
-            if (!ExecQualAndReset(node->indexqual, econtext))
+            if (!ExecQualAndReset(node->recheckqual, econtext))
             {
                 /* Fails recheck, so drop it and loop back for another */
                 InstrCountFiltered2(node, 1);
@@ -555,8 +553,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
      */
     indexstate->ss.ps.qual =
         ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
-    indexstate->indexqual =
-        ExecInitQual(node->indexqual, (PlanState *) indexstate);
+    indexstate->recheckqual =
+        ExecInitQual(node->recheckqual, (PlanState *) indexstate);

     /*
      * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index df0b747883..18e778e856 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -519,6 +519,7 @@ _copyIndexOnlyScan(const IndexOnlyScan *from)
      */
     COPY_SCALAR_FIELD(indexid);
     COPY_NODE_FIELD(indexqual);
+    COPY_NODE_FIELD(recheckqual);
     COPY_NODE_FIELD(indexorderby);
     COPY_NODE_FIELD(indextlist);
     COPY_SCALAR_FIELD(indexorderdir);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 91a89b6d51..6c0979ec35 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -580,6 +580,7 @@ _outIndexOnlyScan(StringInfo str, const IndexOnlyScan *node)

     WRITE_OID_FIELD(indexid);
     WRITE_NODE_FIELD(indexqual);
+    WRITE_NODE_FIELD(recheckqual);
     WRITE_NODE_FIELD(indexorderby);
     WRITE_NODE_FIELD(indextlist);
     WRITE_ENUM_FIELD(indexorderdir, ScanDirection);
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index d79af6e56e..2a699c216b 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -1884,6 +1884,7 @@ _readIndexOnlyScan(void)

     READ_OID_FIELD(indexid);
     READ_NODE_FIELD(indexqual);
+    READ_NODE_FIELD(recheckqual);
     READ_NODE_FIELD(indexorderby);
     READ_NODE_FIELD(indextlist);
     READ_ENUM_FIELD(indexorderdir, ScanDirection);
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index ff2b14e880..4b7347bc0e 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -20,7 +20,6 @@
 #include <math.h>

 #include "access/sysattr.h"
-#include "catalog/pg_am.h"
 #include "catalog/pg_class.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
@@ -189,10 +188,10 @@ static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
                                  ScanDirection indexscandir);
 static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual,
                                          Index scanrelid, Oid indexid,
-                                         List *indexqual, List *indexorderby,
+                                         List *indexqual, List *recheckqual,
+                                         List *indexorderby,
                                          List *indextlist,
                                          ScanDirection indexscandir);
-static List *make_indexonly_tlist(IndexOptInfo *indexinfo);
 static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid,
                                               List *indexqual,
                                               List *indexqualorig);
@@ -623,7 +622,7 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags)
         if (best_path->pathtype == T_IndexOnlyScan)
         {
             /* For index-only scan, the preferred tlist is the index's */
-            tlist = copyObject(make_indexonly_tlist(((IndexPath *) best_path)->indexinfo));
+            tlist = copyObject(((IndexPath *) best_path)->indexinfo->indextlist);

             /*
              * Transfer sortgroupref data to the replacement tlist, if
@@ -2934,7 +2933,8 @@ create_indexscan_plan(PlannerInfo *root,
     List       *indexclauses = best_path->indexclauses;
     List       *indexorderbys = best_path->indexorderbys;
     Index        baserelid = best_path->path.parent->relid;
-    Oid            indexoid = best_path->indexinfo->indexoid;
+    IndexOptInfo *indexinfo = best_path->indexinfo;
+    Oid            indexoid = indexinfo->indexoid;
     List       *qpqual;
     List       *stripped_indexquals;
     List       *fixed_indexquals;
@@ -3064,6 +3064,24 @@ create_indexscan_plan(PlannerInfo *root,
         }
     }

+    /*
+     * For an index-only scan, we must mark indextlist entries as resjunk if
+     * they are columns that the index AM can't return; this cues setrefs.c to
+     * not generate references to those columns.
+     */
+    if (indexonly)
+    {
+        int            i = 0;
+
+        foreach(l, indexinfo->indextlist)
+        {
+            TargetEntry *indextle = (TargetEntry *) lfirst(l);
+
+            indextle->resjunk = !indexinfo->canreturn[i];
+            i++;
+        }
+    }
+
     /* Finally ready to build the plan node */
     if (indexonly)
         scan_plan = (Scan *) make_indexonlyscan(tlist,
@@ -3071,8 +3089,9 @@ create_indexscan_plan(PlannerInfo *root,
                                                 baserelid,
                                                 indexoid,
                                                 fixed_indexquals,
+                                                stripped_indexquals,
                                                 fixed_indexorderbys,
-                                                make_indexonly_tlist(best_path->indexinfo),
+                                                indexinfo->indextlist,
                                                 best_path->indexscandir);
     else
         scan_plan = (Scan *) make_indexscan(tlist,
@@ -5444,6 +5463,7 @@ make_indexonlyscan(List *qptlist,
                    Index scanrelid,
                    Oid indexid,
                    List *indexqual,
+                   List *recheckqual,
                    List *indexorderby,
                    List *indextlist,
                    ScanDirection indexscandir)
@@ -5458,6 +5478,7 @@ make_indexonlyscan(List *qptlist,
     node->scan.scanrelid = scanrelid;
     node->indexid = indexid;
     node->indexqual = indexqual;
+    node->recheckqual = recheckqual;
     node->indexorderby = indexorderby;
     node->indextlist = indextlist;
     node->indexorderdir = indexscandir;
@@ -5465,53 +5486,6 @@ make_indexonlyscan(List *qptlist,
     return node;
 }

-/*
- * make_indexonly_tlist
- *
- * Construct the indextlist for an IndexOnlyScan plan node.
- * We must replace any column that can't be returned by the index AM
- * with a null Const of the appropriate datatype.  This is necessary
- * to prevent setrefs.c from trying to use the value of such a column,
- * and anyway it makes the indextlist a better representative of what
- * the indexscan will really return.  (We do this here, not where the
- * IndexOptInfo is originally constructed, because earlier planner
- * steps need to know what is in such columns.)
- */
-static List *
-make_indexonly_tlist(IndexOptInfo *indexinfo)
-{
-    List       *result;
-    int            i;
-    ListCell   *lc;
-
-    /* We needn't work hard for the common case of btrees. */
-    if (indexinfo->relam == BTREE_AM_OID)
-        return indexinfo->indextlist;
-
-    result = NIL;
-    i = 0;
-    foreach(lc, indexinfo->indextlist)
-    {
-        TargetEntry *indextle = (TargetEntry *) lfirst(lc);
-
-        if (indexinfo->canreturn[i])
-            result = lappend(result, indextle);
-        else
-        {
-            TargetEntry *newtle = makeNode(TargetEntry);
-            Node       *texpr = (Node *) indextle->expr;
-
-            memcpy(newtle, indextle, sizeof(TargetEntry));
-            newtle->expr = (Expr *) makeNullConst(exprType(texpr),
-                                                  exprTypmod(texpr),
-                                                  exprCollation(texpr));
-            result = lappend(result, newtle);
-        }
-        i++;
-    }
-    return result;
-}
-
 static BitmapIndexScan *
 make_bitmap_indexscan(Index scanrelid,
                       Oid indexid,
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 6ccec759bd..9c2aba45a6 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -1142,8 +1142,26 @@ set_indexonlyscan_references(PlannerInfo *root,
                              int rtoffset)
 {
     indexed_tlist *index_itlist;
+    List       *stripped_indextlist;
+    ListCell   *lc;
+
+    /*
+     * Vars in the plan node's targetlist, qual, and recheckqual must only
+     * reference columns that the index AM can actually return.  To ensure
+     * this, remove non-returnable columns (which are marked as resjunk) from
+     * the indexed tlist.  We can just drop them because the indexed_tlist
+     * machinery pays attention to TLE resnos, not physical list position.
+     */
+    stripped_indextlist = NIL;
+    foreach(lc, plan->indextlist)
+    {
+        TargetEntry *indextle = (TargetEntry *) lfirst(lc);
+
+        if (!indextle->resjunk)
+            stripped_indextlist = lappend(stripped_indextlist, indextle);
+    }

-    index_itlist = build_tlist_index(plan->indextlist);
+    index_itlist = build_tlist_index(stripped_indextlist);

     plan->scan.scanrelid += rtoffset;
     plan->scan.plan.targetlist = (List *)
@@ -1160,6 +1178,13 @@ set_indexonlyscan_references(PlannerInfo *root,
                        INDEX_VAR,
                        rtoffset,
                        NUM_EXEC_QUAL((Plan *) plan));
+    plan->recheckqual = (List *)
+        fix_upper_expr(root,
+                       (Node *) plan->recheckqual,
+                       index_itlist,
+                       INDEX_VAR,
+                       rtoffset,
+                       NUM_EXEC_QUAL((Plan *) plan));
     /* indexqual is already transformed to reference index columns */
     plan->indexqual = fix_scan_list(root, plan->indexqual,
                                     rtoffset, 1);
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index c9f7a09d10..aee5295183 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2336,6 +2336,8 @@ finalize_plan(PlannerInfo *root, Plan *plan,
         case T_IndexOnlyScan:
             finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexqual,
                               &context);
+            finalize_primnode((Node *) ((IndexOnlyScan *) plan)->recheckqual,
+                              &context);
             finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexorderby,
                               &context);

diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index ddc3529332..4ff98f4040 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1490,7 +1490,7 @@ typedef struct IndexScanState
 /* ----------------
  *     IndexOnlyScanState information
  *
- *        indexqual           execution state for indexqual expressions
+ *        recheckqual           execution state for recheckqual expressions
  *        ScanKeys           Skey structures for index quals
  *        NumScanKeys           number of ScanKeys
  *        OrderByKeys           Skey structures for index ordering operators
@@ -1509,7 +1509,7 @@ typedef struct IndexScanState
 typedef struct IndexOnlyScanState
 {
     ScanState    ss;                /* its first field is NodeTag */
-    ExprState  *indexqual;
+    ExprState  *recheckqual;
     struct ScanKeyData *ioss_ScanKeys;
     int            ioss_NumScanKeys;
     struct ScanKeyData *ioss_OrderByKeys;
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 13e07ead31..aa11dcedab 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -420,15 +420,28 @@ typedef struct IndexScan
  * index-only scan, in which the data comes from the index not the heap.
  * Because of this, *all* Vars in the plan node's targetlist, qual, and
  * index expressions reference index columns and have varno = INDEX_VAR.
- * Hence we do not need separate indexqualorig and indexorderbyorig lists,
- * since their contents would be equivalent to indexqual and indexorderby.
+ *
+ * We could almost use indexqual directly against the index's output tuple
+ * when rechecking lossy index operators, but that won't work for quals on
+ * index columns that are not retrievable.  Hence, recheckqual is needed
+ * for rechecks: it expresses the same condition as indexqual, but using
+ * only index columns that are retrievable.  (We will not generate an
+ * index-only scan if this is not possible.  An example is that if an
+ * index has table column "x" in a retrievable index column "ind1", plus
+ * an expression f(x) in a non-retrievable column "ind2", an indexable
+ * query on f(x) will use "ind2" in indexqual and f(ind1) in recheckqual.
+ * Without the "ind1" column, an index-only scan would be disallowed.)
+ *
+ * We don't currently need a recheckable equivalent of indexorderby,
+ * because we don't support lossy operators in index ORDER BY.
  *
  * To help EXPLAIN interpret the index Vars for display, we provide
  * indextlist, which represents the contents of the index as a targetlist
  * with one TLE per index column.  Vars appearing in this list reference
  * the base table, and this is the only field in the plan node that may
- * contain such Vars.  Note however that index columns that the AM can't
- * reconstruct are replaced by null Consts in indextlist.
+ * contain such Vars.  Also, for the convenience of setrefs.c, TLEs in
+ * indextlist are marked as resjunk if they correspond to columns that
+ * the index AM cannot reconstruct.
  * ----------------
  */
 typedef struct IndexOnlyScan
@@ -436,6 +449,7 @@ typedef struct IndexOnlyScan
     Scan        scan;
     Oid            indexid;        /* OID of index to scan */
     List       *indexqual;        /* list of index quals (usually OpExprs) */
+    List       *recheckqual;    /* index quals in recheckable form */
     List       *indexorderby;    /* list of index ORDER BY exprs */
     List       *indextlist;        /* TargetEntry list describing index's cols */
     ScanDirection indexorderdir;    /* forward or backward or don't care */
diff --git a/src/test/regress/expected/gist.out b/src/test/regress/expected/gist.out
index 6c6ced91e9..4ba2e7f29e 100644
--- a/src/test/regress/expected/gist.out
+++ b/src/test/regress/expected/gist.out
@@ -340,6 +340,37 @@ where p <@ box(point(5, 5), point(5.3, 5.3));
  <(5.3,5.3),1>
 (7 rows)

+-- Similarly, test that index rechecks involving a non-returnable column
+-- are done correctly.
+explain (verbose, costs off)
+select p from gist_tbl where circle(p,1) @> circle(point(0,0),0.95);
+                                      QUERY PLAN
+---------------------------------------------------------------------------------------
+ Index Only Scan using gist_tbl_multi_index on public.gist_tbl
+   Output: p
+   Index Cond: ((circle(gist_tbl.p, '1'::double precision)) @> '<(0,0),0.95>'::circle)
+(3 rows)
+
+select p from gist_tbl where circle(p,1) @> circle(point(0,0),0.95);
+   p
+-------
+ (0,0)
+(1 row)
+
+-- This case isn't supported, but it should at least EXPLAIN correctly.
+explain (verbose, costs off)
+select p from gist_tbl order by circle(p,1) <-> point(0,0) limit 1;
+                                     QUERY PLAN
+------------------------------------------------------------------------------------
+ Limit
+   Output: p, ((circle(p, '1'::double precision) <-> '(0,0)'::point))
+   ->  Index Only Scan using gist_tbl_multi_index on public.gist_tbl
+         Output: p, (circle(p, '1'::double precision) <-> '(0,0)'::point)
+         Order By: ((circle(gist_tbl.p, '1'::double precision)) <-> '(0,0)'::point)
+(5 rows)
+
+select p from gist_tbl order by circle(p,1) <-> point(0,0) limit 1;
+ERROR:  lossy distance functions are not supported in index-only scans
 -- Clean up
 reset enable_seqscan;
 reset enable_bitmapscan;
diff --git a/src/test/regress/sql/gist.sql b/src/test/regress/sql/gist.sql
index c6bac320b1..cf1069c207 100644
--- a/src/test/regress/sql/gist.sql
+++ b/src/test/regress/sql/gist.sql
@@ -153,6 +153,17 @@ where p <@ box(point(5, 5), point(5.3, 5.3));
 select circle(p,1) from gist_tbl
 where p <@ box(point(5, 5), point(5.3, 5.3));

+-- Similarly, test that index rechecks involving a non-returnable column
+-- are done correctly.
+explain (verbose, costs off)
+select p from gist_tbl where circle(p,1) @> circle(point(0,0),0.95);
+select p from gist_tbl where circle(p,1) @> circle(point(0,0),0.95);
+
+-- This case isn't supported, but it should at least EXPLAIN correctly.
+explain (verbose, costs off)
+select p from gist_tbl order by circle(p,1) <-> point(0,0) limit 1;
+select p from gist_tbl order by circle(p,1) <-> point(0,0) limit 1;
+
 -- Clean up
 reset enable_seqscan;
 reset enable_bitmapscan;
diff --git a/contrib/btree_gist/expected/inet.out b/contrib/btree_gist/expected/inet.out
index c323d903da..f15f1435f0 100644
--- a/contrib/btree_gist/expected/inet.out
+++ b/contrib/btree_gist/expected/inet.out
@@ -83,13 +83,13 @@ SELECT count(*) FROM inettmp WHERE a  = '89.225.196.191'::inet;

 DROP INDEX inetidx;
 CREATE INDEX ON inettmp USING gist (a gist_inet_ops, a inet_ops);
--- likewise here (checks for core planner bug)
+-- this can be an index-only scan, as long as the planner uses the right column
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM inettmp WHERE a  = '89.225.196.191'::inet;
-                     QUERY PLAN
-----------------------------------------------------
+                       QUERY PLAN
+---------------------------------------------------------
  Aggregate
-   ->  Index Scan using inettmp_a_a1_idx on inettmp
+   ->  Index Only Scan using inettmp_a_a1_idx on inettmp
          Index Cond: (a = '89.225.196.191'::inet)
 (3 rows)

diff --git a/contrib/btree_gist/sql/inet.sql b/contrib/btree_gist/sql/inet.sql
index 4b8d354b00..249e8085c3 100644
--- a/contrib/btree_gist/sql/inet.sql
+++ b/contrib/btree_gist/sql/inet.sql
@@ -42,7 +42,7 @@ DROP INDEX inetidx;

 CREATE INDEX ON inettmp USING gist (a gist_inet_ops, a inet_ops);

--- likewise here (checks for core planner bug)
+-- this can be an index-only scan, as long as the planner uses the right column
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM inettmp WHERE a  = '89.225.196.191'::inet;

diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 0e4e00eaf0..e2def356f6 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -1807,7 +1807,6 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
     bool        result;
     Bitmapset  *attrs_used = NULL;
     Bitmapset  *index_canreturn_attrs = NULL;
-    Bitmapset  *index_cannotreturn_attrs = NULL;
     ListCell   *lc;
     int            i;

@@ -1847,11 +1846,7 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)

     /*
      * Construct a bitmapset of columns that the index can return back in an
-     * index-only scan.  If there are multiple index columns containing the
-     * same attribute, all of them must be capable of returning the value,
-     * since we might recheck operators on any of them.  (Potentially we could
-     * be smarter about that, but it's such a weird situation that it doesn't
-     * seem worth spending a lot of sweat on.)
+     * index-only scan.
      */
     for (i = 0; i < index->ncolumns; i++)
     {
@@ -1868,21 +1863,13 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
             index_canreturn_attrs =
                 bms_add_member(index_canreturn_attrs,
                                attno - FirstLowInvalidHeapAttributeNumber);
-        else
-            index_cannotreturn_attrs =
-                bms_add_member(index_cannotreturn_attrs,
-                               attno - FirstLowInvalidHeapAttributeNumber);
     }

-    index_canreturn_attrs = bms_del_members(index_canreturn_attrs,
-                                            index_cannotreturn_attrs);
-
     /* Do we have all the necessary attributes? */
     result = bms_is_subset(attrs_used, index_canreturn_attrs);

     bms_free(attrs_used);
     bms_free(index_canreturn_attrs);
-    bms_free(index_cannotreturn_attrs);

     return result;
 }

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Andres Freund
Дата:
Сообщение: Re: Add jsonlog log_destination for JSON server logs
Следующее
От: Thomas Munro
Дата:
Сообщение: Re: daitch_mokotoff module