Re: Get more from indices.
| От | Kyotaro HORIGUCHI |
|---|---|
| Тема | Re: Get more from indices. |
| Дата | |
| Msg-id | 20131119.203516.251520490.horiguchi.kyotaro@lab.ntt.co.jp обсуждение исходный текст |
| Ответ на | Re: Get more from indices. (Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>) |
| Ответы |
Re: Get more from indices.
Re: Get more from indices. |
| Список | pgsql-hackers |
Hello, I've totally refactored the series of patches and cut out
the appropriate portion as 'unique (and non-nullable) index
stuff'.
As the discussion before, it got rid of path distinctness. This
patch works only on index 'full-orederedness', i.e., unique index
on non-nullable columns.
This patch itself does not so much. Will have power applied with
'Using indices for UNION' patch.
=== Making test table
create table t (a int not null, b int not null, c int, d text);
create unique index i_t_ab on t (a, b);
insert into t (select a / 5, 4 - (a % 5), a, 't' from generate_series(000000, 099999) a);
=== Example 1.
not-patched=# explain select * from t order by a, b ,c limit 1;
> QUERY PLAN
> ----------------------------------------------------------------------
> Limit (cost=2041.00..2041.00 rows=1 width=14)
> -> Sort (cost=2041.00..2291.00 rows=100000 width=14)
> Sort Key: a, b, c
> -> Seq Scan on t (cost=0.00..1541.00 rows=100000 width=14)
patched=# explain select * from t order by a, b ,c limit 1;
> QUERY PLAN
> -----------------------------------------------------------------------------
> Limit (cost=0.29..0.33 rows=1 width=14)
> -> Index Scan using i_t_ab on t (cost=0.29..3857.04 rows=100000 width=14)
=== Example 2.
not-patched=# explain select distinct * from t order by a limit 1;
> QUERY PLAN
> ---------------------------------------------------------------------------
> Limit (cost=1820.46..1820.47 rows=1 width=44)
> -> Sort (cost=1820.46..1835.34 rows=5951 width=44)
> Sort Key: a
> -> HashAggregate (cost=1731.20..1790.71 rows=5951 width=44)
> -> Seq Scan on t (cost=0.00..1136.10 rows=59510 width=44)
patched=# explain select distinct * from t order by a limit 1;
> QUERY PLAN
> ------------------------------------------------------------------------------------
> Limit (cost=0.29..1.09 rows=1 width=44)
> -> Unique (cost=0.29..4756.04 rows=5951 width=44)
> -> Index Scan using i_t_ab on t (cost=0.29..4160.94 rows=59510 width=44)
The unique node could be removed technically but it requires to
care the distinctness of path/plan. So it has been put out to
"Using indeces for UNION" patch.
Any comments?
regards,
--
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 606734a..43be0a5 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -953,7 +953,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_pathkeys =
build_index_pathkeys(root,index, ForwardScanDirection);
useful_pathkeys= truncate_useless_pathkeys(root, rel,
- index_pathkeys);
+ index_pathkeys,
+ index->full_ordered); orderbyclauses = NIL;
orderbyclausecols= NIL; }
@@ -1015,7 +1016,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_pathkeys =
build_index_pathkeys(root,index, BackwardScanDirection);
useful_pathkeys= truncate_useless_pathkeys(root, rel,
- index_pathkeys);
+ index_pathkeys,
+ index->full_ordered); if (useful_pathkeys != NIL) {
ipath = create_index_path(root, index,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 032b2cd..5d8ee04 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -369,6 +369,29 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,}/*
+ * path_is_ordered
+ * Return true if the path is apparently ordered on the pathkeys.
+ */
+bool
+path_is_ordered(Path *path, List *pathkeys)
+{
+ if (pathkeys_contained_in(pathkeys, path->pathkeys))
+ return true;
+
+ /*
+ * If IndexPath is fully ordered, it is sufficiently ordered if index
+ * pathkeys is a subset of target pathkeys
+ */
+ if (pathkeys && path->pathkeys &&
+ IsA(path, IndexPath) &&
+ ((IndexPath*)path)->indexinfo->full_ordered &&
+ (pathkeys_contained_in(path->pathkeys, pathkeys)))
+ return true;
+
+ return false;
+}
+
+/* * get_cheapest_fractional_path_for_pathkeys * Find the cheapest path (for retrieving a specified fraction of
all* the tuples) that satisfies the given pathkeys and parameterization.
@@ -381,6 +404,8 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, * 'pathkeys' represents a required
ordering(in canonical form!) * 'required_outer' denotes allowable outer relations for parameterized paths * 'fraction'
isthe fraction of the total tuples expected to be retrieved
+ * paths->pathkeys might be replaced with pathkeys so as to declare that the
+ * path is ordered on it. */Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
@@ -403,9 +428,17 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
compare_fractional_path_costs(matched_path,path, fraction) <= 0) continue;
- if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+ if (path_is_ordered(path, pathkeys) && bms_is_subset(PATH_REQ_OUTER(path), required_outer))
+ {
+ /*
+ * Set required pathkeys as the path's pathkeys so as to declare
+ * that this path is ordred on the pathkeys.
+ */
+ if (list_length(path->pathkeys) < list_length(pathkeys))
+ path->pathkeys = pathkeys; matched_path = path;
+ } } return matched_path;}
@@ -747,7 +780,7 @@ build_join_pathkeys(PlannerInfo *root, * contain pathkeys that were useful for forming this
joinrelbut are * uninteresting to higher levels. */
- return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
+ return truncate_useless_pathkeys(root, joinrel, outer_pathkeys,
false);}/****************************************************************************
@@ -1404,7 +1437,8 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey) * So the result is always either 0 or
list_length(root->query_pathkeys).*/static int
-pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
+pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys,
+ bool pk_full_ordered){ if (root->query_pathkeys == NIL) return 0;
/* no special ordering requested */
@@ -1418,23 +1452,35 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return
list_length(root->query_pathkeys); }
+ /*
+ * Given that the pathkeys compose an unique key, they are useful for
+ * ordering when they are a sublist of query_pathkeys.
+ */
+ if (pk_full_ordered &&
+ pathkeys_contained_in(pathkeys, root->query_pathkeys))
+ return list_length(pathkeys);
+ return 0; /* path ordering not useful */}/* * truncate_useless_pathkeys * Shorten the
givenpathkey list to just the useful pathkeys.
+ *
+ * When pk_full_ordered is true, reconsider counting the uniqueness even if
+ * the pathkeys are once judged as useless by the general criteria. */List *truncate_useless_pathkeys(PlannerInfo
*root, RelOptInfo *rel,
- List *pathkeys)
+ List *pathkeys,
+ bool pk_full_ordered){ int nuseful; int nuseful2; nuseful =
pathkeys_useful_for_merging(root,rel, pathkeys);
- nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+ nuseful2 = pathkeys_useful_for_ordering(root, pathkeys, pk_full_ordered); if (nuseful2 > nuseful)
nuseful= nuseful2;
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 954666c..ee92545 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -231,6 +231,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, */
if (info->relam == BTREE_AM_OID) {
+ bool has_nullable_keycol = false;
+ /* * If it's a btree index, we can use its opfamily OIDs * directly as
thesort ordering opfamily OIDs.
@@ -244,10 +246,25 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, for (i
=0; i < ncolumns; i++) { int16 opt = indexRelation->rd_indoption[i];
+ int16 keyattno = index->indkey.values[i]; info->reverse_sort[i] = (opt &
INDOPTION_DESC)!= 0; info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
- }
+
+ if (keyattno > 0)
+ {
+ has_nullable_keycol |=
+ !relation->rd_att->attrs[keyattno - 1]->attnotnull;
+ }
+ else if (keyattno != ObjectIdAttributeNumber)
+ has_nullable_keycol = true;
+ }
+
+ /* Check to see if it is a full-ordered index */
+ if (IndexIsValid(index) &&
+ index->indisunique && index->indimmediate &&
+ !has_nullable_keycol)
+ info->full_ordered = true; } else if (indexRelation->rd_am->amcanorder)
{
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 6d7b594..b38c667 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -524,6 +524,7 @@ typedef struct IndexOptInfo bool predOK; /* true if predicate matches query */
bool unique; /* true if a unique index */ bool immediate; /* is uniqueness
enforcedimmediately? */
+ bool full_ordered; /* is ordered rows not duplicate ? */ bool hypothetical; /* true if
indexdoesn't really exist */ bool canreturn; /* can index return IndexTuples? */ bool
amcanorderbyop;/* does AM support order by operator result? */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 96ffdb1..9e53b42 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -160,6 +160,7 @@ extern bool pathkeys_contained_in(List *keys1, List *keys2);extern Path
*get_cheapest_path_for_pathkeys(List*paths, List *pathkeys, Relids required_outer,
CostSelector cost_criterion);
+extern bool path_is_ordered(Path *path, List *pathkeys);extern Path *get_cheapest_fractional_path_for_pathkeys(List
*paths, List *pathkeys, Relids
required_outer,
@@ -191,7 +192,8 @@ extern List *make_inner_pathkeys_for_merge(PlannerInfo *root, List
*outer_pathkeys);externList *truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel,
- List *pathkeys);
+ List *pathkeys,
+ bool pk_full_ordered);extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo
*rel);#endif /* PATHS_H */
diff --git a/src/test/isolation/expected/drop-index-concurrently-1.out
b/src/test/isolation/expected/drop-index-concurrently-1.out
index 75dff56..ab96fa0 100644
--- a/src/test/isolation/expected/drop-index-concurrently-1.out
+++ b/src/test/isolation/expected/drop-index-concurrently-1.out
@@ -19,10 +19,8 @@ Sortstep explains: EXPLAIN (COSTS OFF) EXECUTE getrow_seq;QUERY PLAN
-Sort
- Sort Key: id, data
- -> Seq Scan on test_dc
- Filter: ((data)::text = '34'::text)
+Index Scan using test_dc_pkey on test_dc
+ Filter: ((data)::text = '34'::text)step select2: SELECT * FROM test_dc WHERE data=34 ORDER BY id,data;id
data
В списке pgsql-hackers по дате отправления: