Re: On disable_cost

Поиск
Список
Период
Сортировка
От Jim Finnerty
Тема Re: On disable_cost
Дата
Msg-id 1576018229539-0.post@n3.nabble.com
обсуждение исходный текст
Ответ на Re: On disable_cost  (Tom Lane <tgl@sss.pgh.pa.us>)
Ответы Re: On disable_cost  (Laurenz Albe <laurenz.albe@cybertec.at>)
Список pgsql-hackers
As a proof of concept, I hacked around a bit today to re-purpose one of the
bits of the Cost structure to mean "is_disabled" so that we can distinguish
'disabled' from 'non-disabled' paths without making the Cost structure any
bigger.  In fact, it's still a valid double.  The obvious choice would have
been to re-purpose the sign bit, but I've had occasion to exploit negative
costs before so for this POC I used the high-order bit of the fractional
bits of the double.  (see Wikipedia for double precision floating point for
the layout).

The idea is to set a special bit when disable_cost is added to a cost. 
Dedicating multiple bits instead of just 1 would be easily done, but as it
is we can accumulate many disable_costs without overflowing, so just
comparing the cost suffices.

The patch is not fully debugged and fails on a couple of tests in the serial
test suite.  It seems to fail on Cartesian products, and maybe in one other
non-CP case.  I wasn't able to debug it before the day came to an end.

In one place the core code subtracts off the disable_cost.  I left the
"disabled" bit set in this case, which might be wrong.

I don't see an option to attach the patch as an attachment, so here is the
patch inline (it is based on PG11).  The more interesting part is in a small
number of lines in costsize.c.  Other changes just add functions that assign
a disable_cost and set the bit, or that compare costs such that a
non-disabled cost always compares less than a disabled cost.

------------------

diff --git a/src/backend/optimizer/path/costsize.c
b/src/backend/optimizer/path/costsize.c
index 4e86458672..3718639330 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -123,6 +123,8 @@ double        parallel_setup_cost =
DEFAULT_PARALLEL_SETUP_COST;
 int            effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
 
 Cost        disable_cost = 1.0e10;
+uint64      disabled_mask = 0x8000000000000;
+#define IS_DISABLED(cost) (((uint64) cost) & disabled_mask)
 
 int            max_parallel_workers_per_gather = 2;
 
@@ -205,6 +207,53 @@ clamp_row_est(double nrows)
     return nrows;
 }
 
+Cost
+add_cost(Cost cost, Cost delta_cost)
+{
+    uint64 mask = (delta_cost == disable_cost) ? disabled_mask : 0;
+    Cost max_cost = disabled_mask - disable_cost;
+    
+    if (cost + delta_cost < max_cost)
+        return ((Cost) ((uint64)(cost + delta_cost) | mask));
+    else
+        return ((Cost) ((uint64)(max_cost) | mask));
+}
+
+bool
+is_lower_cost(Cost cost1, Cost cost2)
+{
+    if ((uint64)cost1 & disabled_mask && !((uint64)cost2 & disabled_mask))
+        return false;
+    
+    if (!((uint64)cost1 & disabled_mask) && (uint64)cost2 & disabled_mask)
+        return true;
+    
+    return (cost1 < cost2);
+}
+
+bool
+is_greater_cost(Cost cost1, Cost cost2)
+{
+    if ((uint64)cost1 & disabled_mask && !((uint64)cost2 & disabled_mask))
+        return true;
+    
+    if (!((uint64)cost1 & disabled_mask) && (uint64)cost2 & disabled_mask)
+        return false;
+    
+    return (cost1 > cost2);
+}
+
+bool
+is_geq_cost(Cost cost1, Cost cost2)
+{
+    if ((uint64)cost1 & disabled_mask && !((uint64)cost2 & disabled_mask))
+        return true;
+    
+    if (!((uint64)cost1 & disabled_mask) && (uint64)cost2 & disabled_mask)
+        return false;
+    
+    return (cost1 >= cost2);
+}
 
 /*
  * cost_seqscan
@@ -235,7 +284,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
         path->rows = baserel->rows;
 
     if (!enable_seqscan)
-        startup_cost += disable_cost;
+        startup_cost = add_cost(startup_cost, disable_cost);
 
     /* fetch estimated page cost for tablespace containing table */
     get_tablespace_page_costs(baserel->reltablespace,
@@ -424,7 +473,7 @@ cost_gather_merge(GatherMergePath *path, PlannerInfo
*root,
         path->path.rows = rel->rows;
 
     if (!enable_gathermerge)
-        startup_cost += disable_cost;
+        startup_cost = add_cost(startup_cost, disable_cost);
 
     /*
      * Add one to the number of workers to account for the leader.  This might
@@ -538,7 +587,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double
loop_count,
     }
 
     if (!enable_indexscan)
-        startup_cost += disable_cost;
+        startup_cost = add_cost(startup_cost, disable_cost);
     /* we don't need to check enable_indexonlyscan; indxpath.c does that */
 
     /*
@@ -976,7 +1025,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root,
RelOptInfo *baserel,
         path->rows = baserel->rows;
 
     if (!enable_bitmapscan)
-        startup_cost += disable_cost;
+        startup_cost = add_cost(startup_cost, disable_cost);
 
     pages_fetched = compute_bitmap_pages(root, baserel, bitmapqual,
                                          loop_count, &indexTotalCost,
@@ -1242,10 +1291,10 @@ cost_tidscan(Path *path, PlannerInfo *root,
     if (isCurrentOf)
     {
         Assert(baserel->baserestrictcost.startup >= disable_cost);
-        startup_cost -= disable_cost;
+        startup_cost -= disable_cost;  /* but do not un-set the disabled mark */
     }
     else if (!enable_tidscan)
-        startup_cost += disable_cost;
+        startup_cost = add_cost(startup_cost, disable_cost);
 
     /*
      * The TID qual expressions will be computed once, any other baserestrict
@@ -1676,7 +1725,7 @@ cost_sort(Path *path, PlannerInfo *root,
     long        sort_mem_bytes = sort_mem * 1024L;
 
     if (!enable_sort)
-        startup_cost += disable_cost;
+        startup_cost = add_cost(startup_cost, disable_cost);
 
     path->rows = tuples;
 
@@ -2121,8 +2170,8 @@ cost_agg(Path *path, PlannerInfo *root,
         total_cost = input_total_cost;
         if (aggstrategy == AGG_MIXED && !enable_hashagg)
         {
-            startup_cost += disable_cost;
-            total_cost += disable_cost;
+            startup_cost = add_cost(startup_cost, disable_cost);
+            total_cost = add_cost(total_cost, disable_cost);
         }
         /* calcs phrased this way to match HASHED case, see note above */
         total_cost += aggcosts->transCost.startup;
@@ -2137,7 +2186,7 @@ cost_agg(Path *path, PlannerInfo *root,
         /* must be AGG_HASHED */
         startup_cost = input_total_cost;
         if (!enable_hashagg)
-            startup_cost += disable_cost;
+            startup_cost = add_cost(startup_cost, disable_cost);
         startup_cost += aggcosts->transCost.startup;
         startup_cost += aggcosts->transCost.per_tuple * input_tuples;
         startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
@@ -2436,7 +2485,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
      * disabled, which doesn't seem like the way to bet.
      */
     if (!enable_nestloop)
-        startup_cost += disable_cost;
+        startup_cost = add_cost(startup_cost, disable_cost);
 
     /* cost of inner-relation source data (we already dealt with outer rel) */
 
@@ -2882,7 +2931,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath
*path,
      * disabled, which doesn't seem like the way to bet.
      */
     if (!enable_mergejoin)
-        startup_cost += disable_cost;
+        startup_cost = add_cost(startup_cost, disable_cost);
 
     /*
      * Compute cost of the mergequals and qpquals (other restriction clauses)
@@ -3312,7 +3361,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
      * disabled, which doesn't seem like the way to bet.
      */
     if (!enable_hashjoin)
-        startup_cost += disable_cost;
+        startup_cost = add_cost(startup_cost, disable_cost);
 
     /* mark the path with estimated # of batches */
     path->num_batches = numbatches;
@@ -3410,7 +3459,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
     if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq),
                            inner_path->pathtarget->width) >
         (work_mem * 1024L))
-        startup_cost += disable_cost;
+        startup_cost = add_cost(startup_cost, disable_cost);
 
     /*
      * Compute cost of the hashquals and qpquals (other restriction clauses)
@@ -3930,7 +3979,7 @@ cost_qual_eval_walker(Node *node,
cost_qual_eval_context *context)
     else if (IsA(node, CurrentOfExpr))
     {
         /* Report high cost to prevent selection of anything but TID scan */
-        context->total.startup += disable_cost;
+        context->total.startup = add_cost(context->total.startup, disable_cost);
     }
     else if (IsA(node, SubLink))
     {
diff --git a/src/backend/optimizer/util/pathnode.c
b/src/backend/optimizer/util/pathnode.c
index 4736d84a83..fd746a06bc 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -72,33 +72,33 @@ compare_path_costs(Path *path1, Path *path2,
CostSelector criterion)
 {
     if (criterion == STARTUP_COST)
     {
-        if (path1->startup_cost < path2->startup_cost)
+        if (is_lower_cost(path1->startup_cost, path2->startup_cost))
             return -1;
-        if (path1->startup_cost > path2->startup_cost)
+        if (is_greater_cost(path1->startup_cost, path2->startup_cost))
             return +1;
 
         /*
          * If paths have the same startup cost (not at all unlikely), order
          * them by total cost.
          */
-        if (path1->total_cost < path2->total_cost)
+        if (is_lower_cost(path1->total_cost, path2->total_cost))
             return -1;
-        if (path1->total_cost > path2->total_cost)
+        if (is_greater_cost(path1->total_cost, path2->total_cost))
             return +1;
     }
     else
     {
-        if (path1->total_cost < path2->total_cost)
+        if (is_lower_cost(path1->total_cost, path2->total_cost))
             return -1;
-        if (path1->total_cost > path2->total_cost)
+        if (is_greater_cost(path1->total_cost, path2->total_cost))
             return +1;
 
         /*
          * If paths have the same total cost, order them by startup cost.
          */
-        if (path1->startup_cost < path2->startup_cost)
+        if (is_lower_cost(path1->startup_cost, path2->startup_cost))
             return -1;
-        if (path1->startup_cost > path2->startup_cost)
+        if (is_greater_cost(path1->startup_cost, path2->startup_cost))
             return +1;
     }
     return 0;
@@ -126,9 +126,9 @@ compare_fractional_path_costs(Path *path1, Path *path2,
         fraction * (path1->total_cost - path1->startup_cost);
     cost2 = path2->startup_cost +
         fraction * (path2->total_cost - path2->startup_cost);
-    if (cost1 < cost2)
+    if (is_lower_cost(cost1, cost2))
         return -1;
-    if (cost1 > cost2)
+    if (is_greater_cost(cost1, cost2))
         return +1;
     return 0;
 }
@@ -172,11 +172,11 @@ compare_path_costs_fuzzily(Path *path1, Path *path2,
double fuzz_factor)
      * Check total cost first since it's more likely to be different; many
      * paths have zero startup cost.
      */
-    if (path1->total_cost > path2->total_cost * fuzz_factor)
+    if (is_greater_cost(path1->total_cost, path2->total_cost * fuzz_factor))
     {
         /* path1 fuzzily worse on total cost */
         if (CONSIDER_PATH_STARTUP_COST(path1) &&
-            path2->startup_cost > path1->startup_cost * fuzz_factor)
+            is_greater_cost(path2->startup_cost, path1->startup_cost * fuzz_factor))
         {
             /* ... but path2 fuzzily worse on startup, so DIFFERENT */
             return COSTS_DIFFERENT;
@@ -184,11 +184,11 @@ compare_path_costs_fuzzily(Path *path1, Path *path2,
double fuzz_factor)
         /* else path2 dominates */
         return COSTS_BETTER2;
     }
-    if (path2->total_cost > path1->total_cost * fuzz_factor)
+    if (is_greater_cost(path2->total_cost, path1->total_cost * fuzz_factor))
     {
         /* path2 fuzzily worse on total cost */
         if (CONSIDER_PATH_STARTUP_COST(path2) &&
-            path1->startup_cost > path2->startup_cost * fuzz_factor)
+            is_greater_cost(path1->startup_cost, path2->startup_cost * fuzz_factor))
         {
             /* ... but path1 fuzzily worse on startup, so DIFFERENT */
             return COSTS_DIFFERENT;
@@ -197,12 +197,12 @@ compare_path_costs_fuzzily(Path *path1, Path *path2,
double fuzz_factor)
         return COSTS_BETTER1;
     }
     /* fuzzily the same on total cost ... */
-    if (path1->startup_cost > path2->startup_cost * fuzz_factor)
+    if (is_greater_cost(path1->startup_cost, path2->startup_cost *
fuzz_factor))
     {
         /* ... but path1 fuzzily worse on startup, so path2 wins */
         return COSTS_BETTER2;
     }
-    if (path2->startup_cost > path1->startup_cost * fuzz_factor)
+    if (is_greater_cost(path2->startup_cost, path1->startup_cost *
fuzz_factor))
     {
         /* ... but path2 fuzzily worse on startup, so path1 wins */
         return COSTS_BETTER1;
@@ -605,7 +605,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
         else
         {
             /* new belongs after this old path if it has cost >= old's */
-            if (new_path->total_cost >= old_path->total_cost)
+            if (is_geq_cost(new_path->total_cost, old_path->total_cost))
                 insert_after = p1;
             /* p1_prev advances */
             p1_prev = p1;
@@ -681,7 +681,7 @@ add_path_precheck(RelOptInfo *parent_rel,
          *
          * Cost comparisons here should match compare_path_costs_fuzzily.
          */
-        if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
+        if (is_greater_cost(total_cost, old_path->total_cost * STD_FUZZ_FACTOR))
         {
             /* new path can win on startup cost only if consider_startup */
             if (startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR ||
@@ -796,14 +796,14 @@ add_partial_path(RelOptInfo *parent_rel, Path
*new_path)
         /* Unless pathkeys are incompable, keep just one of the two paths. */
         if (keyscmp != PATHKEYS_DIFFERENT)
         {
-            if (new_path->total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
+            if (is_greater_cost(new_path->total_cost, old_path->total_cost *
STD_FUZZ_FACTOR))
             {
                 /* New path costs more; keep it only if pathkeys are better. */
                 if (keyscmp != PATHKEYS_BETTER1)
                     accept_new = false;
             }
-            else if (old_path->total_cost > new_path->total_cost
-                     * STD_FUZZ_FACTOR)
+            else if (is_greater_cost(old_path->total_cost, new_path->total_cost
+                                     * STD_FUZZ_FACTOR))
             {
                 /* Old path costs more; keep it only if pathkeys are better. */
                 if (keyscmp != PATHKEYS_BETTER2)
@@ -819,7 +819,7 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
                 /* Costs are about the same, old path has better pathkeys. */
                 accept_new = false;
             }
-            else if (old_path->total_cost > new_path->total_cost * 1.0000000001)
+            else if (is_greater_cost(old_path->total_cost, new_path->total_cost *
1.0000000001))
             {
                 /* Pathkeys are the same, and the old path costs more. */
                 remove_old = true;
@@ -847,7 +847,7 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
         else
         {
             /* new belongs after this old path if it has cost >= old's */
-            if (new_path->total_cost >= old_path->total_cost)
+            if (is_geq_cost(new_path->total_cost, old_path->total_cost))
                 insert_after = p1;
             /* p1_prev advances */
             p1_prev = p1;
@@ -913,10 +913,10 @@ add_partial_path_precheck(RelOptInfo *parent_rel, Cost
total_cost,
         keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
         if (keyscmp != PATHKEYS_DIFFERENT)
         {
-            if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR &&
+            if (is_greater_cost(total_cost, old_path->total_cost * STD_FUZZ_FACTOR)
&&
                 keyscmp != PATHKEYS_BETTER1)
                 return false;
-            if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR &&
+            if (is_greater_cost(old_path->total_cost, total_cost * STD_FUZZ_FACTOR)
&&
                 keyscmp != PATHKEYS_BETTER2)
                 return true;
         }
@@ -1697,7 +1697,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel,
Path *subpath,
 
     if (sjinfo->semi_can_btree && sjinfo->semi_can_hash)
     {
-        if (agg_path.total_cost < sort_path.total_cost)
+        if (is_lower_cost(agg_path.total_cost, sort_path.total_cost))
             pathnode->umethod = UNIQUE_PATH_HASH;
         else
             pathnode->umethod = UNIQUE_PATH_SORT;
diff --git a/src/backend/utils/cache/relcache.c
b/src/backend/utils/cache/relcache.c
index 78f3b99a76..c261a9d790 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5076,8 +5076,8 @@ IsProjectionFunctionalIndex(Relation index)
          * when values differ because the expression is recalculated when
          * inserting a new index entry for the changed value.
          */
-        if ((index_expr_cost.startup + index_expr_cost.per_tuple) >
-            HEURISTIC_MAX_HOT_RECHECK_EXPR_COST)
+        if (is_greater_cost((index_expr_cost.startup +
index_expr_cost.per_tuple),
+                            HEURISTIC_MAX_HOT_RECHECK_EXPR_COST))
             is_projection = false;
 
         tuple = SearchSysCache1(RELOID,
ObjectIdGetDatum(RelationGetRelid(index)));
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 9159f2bab1..c01d08eae5 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -251,6 +251,12 @@ extern PathTarget
*set_pathtarget_cost_width(PlannerInfo *root, PathTarget *targ
 extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
                      Path *bitmapqual, int loop_count, Cost *cost, double *tuple);
 
+extern Cost add_cost(Cost cost, Cost delta_cost);
+extern bool is_lower_cost(Cost cost1, Cost cost2);
+extern bool is_greater_cost(Cost cost1, Cost cost2);
+extern bool is_geq_cost(Cost cost1, Cost cost2);
+
+
 /*
  * prototypes for clausesel.c
  *      routines to compute clause selectivities





-----
Jim Finnerty, AWS, Amazon Aurora PostgreSQL
--
Sent from: https://www.postgresql-archive.org/PostgreSQL-hackers-f1928748.html



В списке pgsql-hackers по дате отправления:

Предыдущее
От: Tom Lane
Дата:
Сообщение: Re: pg_control_init() bug
Следующее
От: Thomas Munro
Дата:
Сообщение: Re: Windows UTF-8, non-ICU collation trouble