Обсуждение: [PERFORM] self join estimate and constraint exclusion

Поиск
Список
Период
Сортировка

[PERFORM] self join estimate and constraint exclusion

От
Justin Pryzby
Дата:
I mailed last month [0] but didn't see any reponse .. (if I'm being naive,
daft, or missing something simple, please just say so).

[0] https://www.postgresql.org/message-id/20170326193344.GS31628%40telsasoft.com

It seems when self (inner/equi) joining there's two bad alternatives: either
specify a where clause for each self-joined table and incur poor estimate and
plan, due to incorrect perceived independence of clauses, even though joined
column(s) could/ought to be known equal; or, specify where clause only once,
and incur cost of joining across all partitions, due to no contraint exclusion
on one (or more) self-joined table heirarchy/s.

-- Specify WHERE for each table causes bad underestimate:
|ts=# explain analyze SELECT * FROM eric_enodeb_metrics a JOIN eric_enodeb_metrics b USING (start_time, site_id) WHERE
a.start_time>='2017-03-19'AND a.start_time<'2017-03-20' AND b.start_time>='2017-03-19' AND b.start_time<'2017-03-20'; 
| Hash Join  (cost=7310.80..14680.86 rows=14 width=1436) (actual time=33.053..73.180 rows=7869 loops=1)
|   Hash Cond: ((a.start_time = b.start_time) AND (a.site_id = b.site_id))
|   ->  Append  (cost=0.00..7192.56 rows=7883 width=723) (actual time=1.394..19.414 rows=7869 loops=1)
|         ->  Seq Scan on eric_enodeb_metrics a  (cost=0.00..0.00 rows=1 width=718) (actual time=0.003..0.003 rows=0
loops=1)
|               Filter: ((start_time >= '2017-03-19 00:00:00-04'::timestamp with time zone) AND (start_time <
'2017-03-2000:00:00-04'::timestamp with time zone)) 
|         ->  Bitmap Heap Scan on eric_enodeb_201703 a_1  (cost=605.34..7192.56 rows=7882 width=723) (actual
time=1.390..14.536rows=7869 loops=1) 
|               Recheck Cond: ((start_time >= '2017-03-19 00:00:00-04'::timestamp with time zone) AND (start_time <
'2017-03-2000:00:00-04'::timestamp with time zone)) 
|               Heap Blocks: exact=247
|               ->  Bitmap Index Scan on eric_enodeb_201703_unique_idx  (cost=0.00..603.37 rows=7882 width=0) (actual
time=1.351..1.351rows=7869 loops=1) 
|                     Index Cond: ((start_time >= '2017-03-19 00:00:00-04'::timestamp with time zone) AND (start_time <
'2017-03-2000:00:00-04'::timestamp with time zone)) 
|   ->  Hash  (cost=7192.56..7192.56 rows=7883 width=723) (actual time=31.620..31.620 rows=7869 loops=1)
|         Buckets: 8192  Batches: 1  Memory Usage: 1986kB
|         ->  Append  (cost=0.00..7192.56 rows=7883 width=723) (actual time=0.902..19.543 rows=7869 loops=1)
|               ->  Seq Scan on eric_enodeb_metrics b  (cost=0.00..0.00 rows=1 width=718) (actual time=0.002..0.002
rows=0loops=1) 
|                     Filter: ((start_time >= '2017-03-19 00:00:00-04'::timestamp with time zone) AND (start_time <
'2017-03-2000:00:00-04'::timestamp with time zone)) 
|               ->  Bitmap Heap Scan on eric_enodeb_201703 b_1  (cost=605.34..7192.56 rows=7882 width=723) (actual
time=0.899..14.353rows=7869 loops=1) 
|                     Recheck Cond: ((start_time >= '2017-03-19 00:00:00-04'::timestamp with time zone) AND (start_time
<'2017-03-20 00:00:00-04'::timestamp with time zone)) 
|                     Heap Blocks: exact=247
|                     ->  Bitmap Index Scan on eric_enodeb_201703_unique_idx  (cost=0.00..603.37 rows=7882 width=0)
(actualtime=0.867..0.867 rows=7869 loops=1) 
|                           Index Cond: ((start_time >= '2017-03-19 00:00:00-04'::timestamp with time zone) AND
(start_time< '2017-03-20 00:00:00-04'::timestamp with time zone)) 



-- Specify WHERE once gets good estimate, but with unnecessary scan of all child partitions:
|ts=# explain analyze SELECT * FROM eric_enodeb_metrics a JOIN eric_enodeb_metrics b USING (start_time, site_id) WHERE
start_time>='2017-03-19'AND start_time<'2017-03-20'; 
| Gather  (cost=8310.80..316545.60 rows=9591 width=1427) (actual time=9012.967..9073.539 rows=7869 loops=1)
|   Workers Planned: 3
|   Workers Launched: 3
|   ->  Hash Join  (cost=7310.80..314586.50 rows=3094 width=1427) (actual time=8892.121..8937.245 rows=1967 loops=4)
|         Hash Cond: ((b.start_time = a.start_time) AND (b.site_id = a.site_id))
|         ->  Append  (cost=0.00..261886.54 rows=2015655 width=714) (actual time=11.464..8214.063 rows=1308903 loops=4)
|               ->  Parallel Seq Scan on eric_enodeb_metrics b  (cost=0.00..0.00 rows=1 width=718) (actual
time=0.001..0.001rows=0 loops=4) 
|               ->  Parallel Seq Scan on eric_enodeb_201510 b_1  (cost=0.00..10954.43 rows=60343 width=707) (actual
time=11.460..258.852rows=46766 loops=4) 
|               ->  Parallel Seq Scan on eric_enodeb_201511 b_2  (cost=0.00..10310.91 rows=56891 width=707) (actual
time=18.395..237.841rows=44091 loops=4) 
|[...]
|               ->  Parallel Seq Scan on eric_enodeb_201703 b_29  (cost=0.00..6959.75 rows=81875 width=723) (actual
time=0.017..101.969rows=49127 loops=4) 
|         ->  Hash  (cost=7192.56..7192.56 rows=7883 width=723) (actual time=51.843..51.843 rows=7869 loops=4)
|               Buckets: 8192  Batches: 1  Memory Usage: 1970kB
|               ->  Append  (cost=0.00..7192.56 rows=7883 width=723) (actual time=2.558..27.829 rows=7869 loops=4)
|                     ->  Seq Scan on eric_enodeb_metrics a  (cost=0.00..0.00 rows=1 width=718) (actual
time=0.014..0.014rows=0 loops=4) 
|                           Filter: ((start_time >= '2017-03-19 00:00:00-04'::timestamp with time zone) AND (start_time
<'2017-03-20 00:00:00-04'::timestamp with time zone)) 
|                     ->  Bitmap Heap Scan on eric_enodeb_201703 a_1  (cost=605.34..7192.56 rows=7882 width=723)
(actualtime=2.542..17.305 rows=7869 loops=4) 
|                           Recheck Cond: ((start_time >= '2017-03-19 00:00:00-04'::timestamp with time zone) AND
(start_time< '2017-03-20 00:00:00-04'::timestamp with time zone)) 
|                           Heap Blocks: exact=247
|                           ->  Bitmap Index Scan on eric_enodeb_201703_unique_idx  (cost=0.00..603.37 rows=7882
width=0)(actual time=2.494..2.494 rows=7869 loops=4) 
|                                 Index Cond: ((start_time >= '2017-03-19 00:00:00-04'::timestamp with time zone) AND
(start_time< '2017-03-20 00:00:00-04'::timestamp with time zone)) 


Minor variations have same problems;
-- Scans all partitions:
ts=# explain analyze SELECT * FROM (SELECT * FROM eric_enodeb_metrics a) t1 JOIN (SELECT * FROM eric_enodeb_metrics b
WHEREstart_time>='2017-03-19 23:00:00' AND start_time<'2017-03-20') t2 USING (start_time, site_id); 

-- Underestimtes due to perceived independence of clause:
|ts=# explain analyze SELECT * FROM (SELECT * FROM eric_enodeb_metrics a WHERE start_time>='2017-03-19' AND
start_time<'2017-03-20')t1 JOIN (SELECT * FROM eric_enodeb_metrics b WHERE start_time>='2017-03-19' AND
start_time<'2017-03-20')t2 USING (start_time, site_id); 
| Hash Join  (cost=7308.59..14676.41 rows=14 width=1436) (actual time=30.352..64.004 rows=7869 loops=1)

Thank you in advance for your any response.

Justin


[PERFORM] join estimate of subqueries with range conditions and constraintexclusion

От
Justin Pryzby
Дата:
We got bitten again by what appears to be the same issue I reported (perhaps
poorly) here:
https://www.postgresql.org/message-id/20170326193344.GS31628%40telsasoft.com

We have PG9.6.3 table heirarchies partitioned by time.  Our reports use
subqueries each with their own copies of a range clauses on time column, as
needed to get constraint exclusion reference:
https://www.postgresql.org/message-id/25076.1366321335%40sss.pgh.pa.us

    SELECT * FROM
    (SELECT * FROM t WHERE col>const) a JOIN
    (SELECT * FROM t WHERE col>const) b USING (col)

I'm diagnosing a bad estimate/plan due to excessively high n_distinct leading
to underestimated rowcount when selecting from a small fraction of the table
heirarchy.  This leads intermittently to bad things, specifically a cascade of
misestimates and associated nested loops around millions of rows.

Artificial/generated/contrived test case, involving table with 99 instances
each of 99 values:

postgres=# CREATE TABLE t(i INT);
postgres=# TRUNCATE t;INSERT INTO t SELECT i FROM generate_series(1,99) i,generate_series(1,99);ANALYZE t;
postgres=# SELECT (SELECT sum(x) FROM unnest(most_common_freqs) x) frac_MCV, tablename, attname, n_distinct,
array_length(most_common_vals,1)n_mcv, array_length(histogram_bounds,1) n_hist, (SELECT MAX(x) FROM
unnest(most_common_vals::text::text[])x) maxmcv, (histogram_bounds::text::text[])[array_length(histogram_bounds,1)]
maxhistFROM pg_stats WHERE attname~'i' AND tablename='t' GROUP BY 1,2,3,4,5,6,7,8 ORDER BY 1 DESC; 
-[ RECORD 1 ]--
frac_mcv   | 1
tablename  | t
attname    | i
n_distinct | 99
n_mcv      | 99
n_hist     |
maxmcv     | 99
maxhist    |

range query (which could use constraint exclusion), but bad estimate:
postgres=# explain ANALYZE SELECT * FROM (SELECT * FROM t WHERE i<2) AS a JOIN (SELECT * FROM t WHERE i<2) AS b USING
(i);
 Merge Join  (cost=339.59..341.57 rows=99 width=4) (actual time=8.272..16.892 rows=9801 loops=1)

range query which could NOT use constraint exclusion, good estimate:
postgres=# explain ANALYZE SELECT * FROM (SELECT * FROM t) AS a JOIN (SELECT * FROM t) AS b USING (i) WHERE i<2;
 Hash Join  (cost=264.52..541.54 rows=9801 width=4) (actual time=12.688..22.325 rows=9801 loops=1)

non-range query, good estimate:
postgres=# explain ANALYZE SELECT * FROM (SELECT * FROM t WHERE i=3) AS a JOIN (SELECT * FROM t WHERE i=3) AS b USING
(i);
 Nested Loop  (cost=0.00..455.78 rows=9801 width=4) (actual time=0.482..15.820 rows=9801 loops=1)

My understanding:
Postgres estimates join selectivity using number of distinct values of
underlying.  For the subqueries "a" and "b", the estimate is same as for
underlying table "t", even when selecting only a small fraction of the table...
This is adt/selfuncs:eqjoinsel_inner().

Note, in my tests, report queries on the child table have correct estimates;
and, queries with only "push down" WHERE clause outside the subquery have
correct estimate (but not constraint exclusion), apparently due to
calc_joinrel_size_estimate() returning the size of the parent table, planning
an join without restriction clause, following by filtering the join result, at
which point I guess the MCV list becomes useful and estimate is perfect..

    SELECT * FROM
    (SELECT * FROM t)a JOIN(SELECT * FROM t)b
    USING (col) WHERE col>const

So my original question is basically still opened ... is it possible to get
both good estimates/plans AND constraint exclusion ??

Thanks
Justin


[PERFORM] Re: join estimate of subqueries with range conditions and constraintexclusion

От
Justin Pryzby
Дата:
On Wed, May 24, 2017 at 04:17:30PM -0500, Justin Pryzby wrote:
> We got bitten again by what appears to be the same issue I reported (perhaps
> poorly) here:
> https://www.postgresql.org/message-id/20170326193344.GS31628%40telsasoft.com

> I'm diagnosing a bad estimate/plan due to excessively high n_distinct leading
> to underestimated rowcount when selecting from a small fraction of the table
> heirarchy.  This leads intermittently to bad things, specifically a cascade of
> misestimates and associated nested loops around millions of rows.

I dug into this some more;  I can mitigate the issue with this change:

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 6a4f7b1..962a5b4 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2279,6 +2279,22 @@ eqjoinsel_inner(Oid operator,

        nd1 = get_variable_numdistinct(vardata1, &isdefault1);
        nd2 = get_variable_numdistinct(vardata2, &isdefault2);
+       elog(DEBUG4, "nd %lf %lf", nd1 ,nd2);
+       if (nd1>vardata1->rel->rows) nd1=vardata1->rel->rows;
+       if (nd2>vardata1->rel->rows) nd2=vardata2->rel->rows;
+
+       elog(DEBUG4, "nd %lf %lf", nd1 ,nd2);
+       elog(DEBUG4, "rows %lf %lf", vardata1->rel->rows ,vardata2->rel->rows);
+       elog(DEBUG4, "tuples %lf %lf", vardata1->rel->tuples ,vardata2->rel->tuples);

original estimate:

DEBUG:  nd 35206.000000 35206.000000
DEBUG:  nd 35206.000000 35206.000000
DEBUG:  rows 5031.000000 5031.000000
DEBUG:  tuples 5031.000000 5031.000000
                                                                           QUERY PLAN

----------------------------------------------------------------------------------------------------------------------------------------------------------------
 Hash Join  (cost=1294.56..2558.62 rows=723 width=750) (actual time=103.273..490.984 rows=50300 loops=1)
   Hash Cond: (eric_enodeb_metrics.start_time = eric_enodeb_metrics_1.start_time)

patched estimate/plan:

postgres=# explain ANALYZE SELECT * FROM (SELECT * FROM eric_enodeb_metrics WHERE start_time>'2017-04-25 18:00') x JOIN
(SELECT* FROM eric_enodeb_metrics WHERE start_time>'2017-04-25 18:00') y USING (start_time); 

DEBUG:  nd 35206.000000 35206.000000
DEBUG:  nd 5031.000000 5031.000000
DEBUG:  rows 5031.000000 5031.000000
DEBUG:  tuples 5031.000000 5031.000000

| Hash Join  (cost=1294.56..2602.14 rows=5075 width=750) (actual time=90.445..477.712 rows=50300 loops=1)
|   Hash Cond: (eric_enodeb_metrics.start_time = eric_enodeb_metrics_1.start_time)
|   ->  Append  (cost=0.00..1231.67 rows=5031 width=379) (actual time=16.424..46.899 rows=5030 loops=1)
|         ->  Seq Scan on eric_enodeb_metrics  (cost=0.00..0.00 rows=1 width=378) (actual time=0.012..0.012 rows=0
loops=1)
|               Filter: (start_time > '2017-04-25 18:00:00-05'::timestamp with time zone)
|         ->  Seq Scan on eric_enodeb_201704  (cost=0.00..1231.67 rows=5030 width=379) (actual time=16.408..45.634
rows=5030loops=1) 
|               Filter: (start_time > '2017-04-25 18:00:00-05'::timestamp with time zone)
|               Rows Removed by Filter: 23744
|   ->  Hash  (cost=1231.67..1231.67 rows=5031 width=379) (actual time=73.801..73.801 rows=5030 loops=1)
|         Buckets: 8192  Batches: 1  Memory Usage: 1283kB
|         ->  Append  (cost=0.00..1231.67 rows=5031 width=379) (actual time=14.607..47.395 rows=5030 loops=1)
|               ->  Seq Scan on eric_enodeb_metrics eric_enodeb_metrics_1  (cost=0.00..0.00 rows=1 width=378) (actual
time=0.009..0.009rows=0 loops=1) 
|                     Filter: (start_time > '2017-04-25 18:00:00-05'::timestamp with time zone)
|               ->  Seq Scan on eric_enodeb_201704 eric_enodeb_201704_1  (cost=0.00..1231.67 rows=5030 width=379)
(actualtime=14.594..46.091 rows=5030 loops=1) 
|                     Filter: (start_time > '2017-04-25 18:00:00-05'::timestamp with time zone)
|                     Rows Removed by Filter: 23744

.. which gets additionally extreme with increasingly restrictive condition, as
rows estimate diverges more from nd.

There's still an 2nd issue which this doesn't address, having to do with joins
of tables with full/complete MCV lists, and selective queries on those tables,
as demonstrated by the artificial test:

> postgres=# CREATE TABLE t(i INT);
> postgres=# TRUNCATE t;INSERT INTO t SELECT i FROM generate_series(1,99) i,generate_series(1,99);ANALYZE t;
> postgres=# SELECT (SELECT sum(x) FROM unnest(most_common_freqs) x) frac_MCV, tablename, attname, n_distinct,
array_length(most_common_vals,1)n_mcv, array_length(histogram_bounds,1) n_hist, (SELECT MAX(x) FROM
unnest(most_common_vals::text::text[])x) maxmcv, (histogram_bounds::text::text[])[array_length(histogram_bounds,1)]
maxhistFROM pg_stats WHERE attname~'i' AND tablename='t' GROUP BY 1,2,3,4,5,6,7,8 ORDER BY 1 DESC; 
> -[ RECORD 1 ]--
> frac_mcv   | 1
> tablename  | t
> attname    | i
> n_distinct | 99
> n_mcv      | 99
> n_hist     |
> maxmcv     | 99
> maxhist    |
>
> range query (which could use constraint exclusion), but bad estimate:
> postgres=# explain ANALYZE SELECT * FROM (SELECT * FROM t WHERE i<2) AS a JOIN (SELECT * FROM t WHERE i<2) AS b USING
(i);
>  Merge Join  (cost=339.59..341.57 rows=99 width=4) (actual time=8.272..16.892 rows=9801 loops=1)


> My understanding:
> Postgres estimates join selectivity using number of distinct values of
> underlying.  For the subqueries "a" and "b", the estimate is same as for
> underlying table "t", even when selecting only a small fraction of the table...

It seems to me that 1) estimates of tables with MCV lists including every
column values should get much better estiamtes than that, and hopefully
estimates of (t WHERE) JOIN (t WHERE) USING (c) as good as t JOIN t USING(c)
WHERE.  2) postgres estimator doesn't have everything it needs to invoke
existing functionality to apply all its knowledge without also invoking the
executor (testing MCVs for passing qual conditions); 3) frequency values (join
eqsel's numbers[]) should be scaled up by something resembling rows/tuples, but
my existing test showed that can be too strong a correction.

Justin


Re: [PERFORM] join estimate of subqueries with range conditions andconstraint exclusion

От
"David G. Johnston"
Дата:
On Wed, May 24, 2017 at 2:17 PM, Justin Pryzby <pryzby@telsasoft.com> wrote:
We got bitten again by what appears to be the same issue I reported (perhaps
poorly) here:
https://www.postgresql.org/message-id/20170326193344.GS31628%40telsasoft.com

We have PG9.6.3 table heirarchies partitioned by time.  Our reports use
subqueries each with their own copies of a range clauses on time column, as
needed to get constraint exclusion reference:
https://www.postgresql.org/message-id/25076.1366321335%40sss.pgh.pa.us

        SELECT * FROM
        (SELECT * FROM t WHERE col>const) a JOIN
        (SELECT * FROM t WHERE col>const) b USING (col)

I'm diagnosing a bad estimate/plan due to excessively high n_distinct leading
to underestimated rowcount when selecting from a small fraction of the table
heirarchy.  This leads intermittently to bad things, specifically a cascade of
misestimates and associated nested loops around millions of rows.

​Justin,

I'm not going to be much help personally but I just wanted to say that with PGCon just completed and Beta1 just starting, combined with the somewhat specialized nature of the problem, a response should be forthcoming even though its taking a bit longer than usual.

David J.

Justin Pryzby <pryzby@telsasoft.com> writes:
> I dug into this some more;  I can mitigate the issue with this change:

> diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
> index 6a4f7b1..962a5b4 100644
> --- a/src/backend/utils/adt/selfuncs.c
> +++ b/src/backend/utils/adt/selfuncs.c
> @@ -2279,6 +2279,22 @@ eqjoinsel_inner(Oid operator,

>         nd1 = get_variable_numdistinct(vardata1, &isdefault1);
>         nd2 = get_variable_numdistinct(vardata2, &isdefault2);
> +       elog(DEBUG4, "nd %lf %lf", nd1 ,nd2);
> +       if (nd1>vardata1->rel->rows) nd1=vardata1->rel->rows;
> +       if (nd2>vardata1->rel->rows) nd2=vardata2->rel->rows;
> +
> +       elog(DEBUG4, "nd %lf %lf", nd1 ,nd2);
> +       elog(DEBUG4, "rows %lf %lf", vardata1->rel->rows ,vardata2->rel->rows);
> +       elog(DEBUG4, "tuples %lf %lf", vardata1->rel->tuples ,vardata2->rel->tuples);

I don't like this change too much.  I agree that intuitively you would
not expect the number of distinct values to exceed the possibly-restricted
number of rows from the input relation, but I think this falls foul of
the problem mentioned in eqjoinsel_semi's comments, namely that it's
effectively double-counting the restriction selectivity.  It happens to
improve matters in the test case you show, but it's not exactly producing
a good estimate even so; and the fact that the change is in the right
direction seems like mostly an artifact of particular ndistinct and
rowcount values.  I note for instance that this patch would do nothing
at all for the toy example you posted upthread, because nd1/nd2 are
already equal to the rows estimates in that case.

The core reason why you get good results for

    select * from a join b using (x) where x = constant

is that there's a great deal of intelligence in the planner about
transitive equality deductions and what to do with partially-redundant
equality clauses.  The reason you don't get similarly good results for

    select * from a join b using (x) where x < constant

is that there is no comparable machinery for inequalities.  Maybe there
should be, but it'd be a fair bit of work to create, and we'd have to
keep one eye firmly fixed on whether it slows planning down even in cases
where no benefit ensues.  In the meantime, I'm not sure that there are
any quick-hack ways of materially improving the situation :-(

            regards, tom lane


Re: [PERFORM] Re: join under-estimates with ineq conditions

От
Justin Pryzby
Дата:
On Mon, Jun 05, 2017 at 05:02:32PM -0400, Tom Lane wrote:
> Justin Pryzby <pryzby@telsasoft.com> writes:
> > diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
> > +       if (nd1>vardata1->rel->rows) nd1=vardata1->rel->rows;
> > +       if (nd2>vardata1->rel->rows) nd2=vardata2->rel->rows;
>
> I don't like this change too much.

Thanks for your analysis ;)

I have a 2nd patch which improves the 2nd case I mentioned..

> I note for instance that this patch would do nothing at all for the toy

>> There's still an 2nd issue which this doesn't address, having to do with joins
>> of tables with full/complete MCV lists, and selective queries on those tables,
>> as demonstrated by the artificial test:
>>
>> > postgres=# CREATE TABLE t(i INT);
>> > postgres=# TRUNCATE t;INSERT INTO t SELECT i FROM generate_series(1,99) i,generate_series(1,99);ANALYZE t;
>> > postgres=# SELECT (SELECT sum(x) FROM unnest(most_common_freqs) x) frac_MCV, tablename, attname, n_distinct,
array_length(most_common_vals,1)n_mcv, array_length(histogram_bounds,1) n_hist, (SELECT MAX(x) FROM
unnest(most_common_vals::text::text[])x) maxmcv, (histogram_bounds::text::text[])[array_length(histogram_bounds,1)]
maxhistFROM pg_stats WHERE attname~'i' AND tablename='t' GROUP BY 1,2,3,4,5,6,7,8 ORDER BY 1 DESC; 

I pointed out that there were two issues, both involving underestimates from
querying a fraction of a table using inequality condition.  One due to join
estimate based on "nd" (and not substantially based on MCV), and one due to
frequencies associated with MCV list (and not substantially falling back to
estimate from "nd").

I made another patch to address the 2nd issue, which affects our pre-aggregated
tables (which are partitioned by month, same as the raw tables).  The
aggregated tables are the result of something like SELECT start_time::date, k1,
k2, ..., sum(a), avg(b) ... GROUP BY 1,2,3, so have many fewer rows, and nd for
start_time::date column would be at most 31, so MCV list would be expected to
be complete, same as the "toy" example I gave.

Sometimes when we query the aggregated tables for a small number of days we get
underestimate leading to nested loops..

Without patch:
Merge Join  (cost=339.59..341.57 rows=99 width=4) (actual time=10.190..17.430 rows=9801 loops=1)

With patch:
DEBUG:  ndfactor 99.000000 99.000000
DEBUG:  nmatches 99 matchprodfreq 1.000000
DEBUG:  nmatches 99 matchprodfreq 1.000000
DEBUG:  matchfreq1 99.000000 unmatchfreq1 0.000000
DEBUG:  matchfreq1 1.000000 unmatchfreq1 0.000000
DEBUG:  matchfreq2 99.000000 unmatchfreq2 0.000000
DEBUG:  matchfreq2 1.000000 unmatchfreq2 0.000000
DEBUG:  otherfreq1 0.000000 otherfreq2 0.000000
DEBUG:  select(1) 1.000000
 Hash Join  (cost=167.75..444.77 rows=9801 width=4) (actual time=4.706..13.892 rows=9801 loops=1)


diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 6a4f7b1..bc88423 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2279,6 +2279,14 @@ eqjoinsel_inner(Oid operator,

     nd1 = get_variable_numdistinct(vardata1, &isdefault1);
     nd2 = get_variable_numdistinct(vardata2, &isdefault2);
+    float ndfactor1=1;
+    float ndfactor2=1;
+    if (vardata1->rel->rows)
+        ndfactor1=vardata1->rel->tuples / vardata1->rel->rows;
+    if (vardata2->rel->rows)
+        ndfactor2=vardata2->rel->tuples / vardata2->rel->rows;
+    // ndfactor1=ndfactor2=1;
+    elog(DEBUG4, "ndfactor %lf %lf", ndfactor1,ndfactor2);

     opfuncoid = get_opcode(operator);

@@ -2375,7 +2383,19 @@ eqjoinsel_inner(Oid operator,
                 }
             }
         }
+
+        // you might think we should multiple by ndfactor1*ndfactor2,
+        // but that gives serious overestimates...
+        // matchprodfreq*= ndfactor1>ndfactor2?ndfactor1:ndfactor2;
+        // matchprodfreq*=ndfactor1;
+        // matchprodfreq*=ndfactor2;
+        // matchprodfreq*= ndfactor1<ndfactor2?ndfactor1:ndfactor2;
+        matchprodfreq*= ndfactor1<ndfactor2?ndfactor1:ndfactor2;
+
+        elog(DEBUG4, "nmatches %d matchprodfreq %lf", nmatches, matchprodfreq);
         CLAMP_PROBABILITY(matchprodfreq);
+        elog(DEBUG4, "nmatches %d matchprodfreq %lf", nmatches, matchprodfreq);
+
         /* Sum up frequencies of matched and unmatched MCVs */
         matchfreq1 = unmatchfreq1 = 0.0;
         for (i = 0; i < nvalues1; i++)
@@ -2385,8 +2405,14 @@ eqjoinsel_inner(Oid operator,
             else
                 unmatchfreq1 += numbers1[i];
         }
+
+        matchfreq1*=ndfactor1;
+        unmatchfreq1*=ndfactor1;
+        elog(DEBUG4, "matchfreq1 %lf unmatchfreq1 %lf", matchfreq1, unmatchfreq1);
         CLAMP_PROBABILITY(matchfreq1);
         CLAMP_PROBABILITY(unmatchfreq1);
+        elog(DEBUG4, "matchfreq1 %lf unmatchfreq1 %lf", matchfreq1, unmatchfreq1);
+
         matchfreq2 = unmatchfreq2 = 0.0;
         for (i = 0; i < nvalues2; i++)
         {
@@ -2395,8 +2421,12 @@ eqjoinsel_inner(Oid operator,
             else
                 unmatchfreq2 += numbers2[i];
         }
+        matchfreq2*=ndfactor2;
+        unmatchfreq2*=ndfactor2;
+        elog(DEBUG4, "matchfreq2 %lf unmatchfreq2 %lf", matchfreq2, unmatchfreq2);
         CLAMP_PROBABILITY(matchfreq2);
         CLAMP_PROBABILITY(unmatchfreq2);
+        elog(DEBUG4, "matchfreq2 %lf unmatchfreq2 %lf", matchfreq2, unmatchfreq2);
         pfree(hasmatch1);
         pfree(hasmatch2);

     if (have_mcvs1)

Justin

Вложения

Re: [PERFORM] Re: join under-estimates with ineq conditions

От
Justin Pryzby
Дата:
I never heard back but was hoping for some feedback/discussion about this 2nd
problem/patch.

just a reminder - Thanks

On Thu, Jun 08, 2017 at 11:05:38AM -0500, Justin Pryzby wrote:
> On Mon, Jun 05, 2017 at 05:02:32PM -0400, Tom Lane wrote:
> > Justin Pryzby <pryzby@telsasoft.com> writes:
> > > diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
> > > +       if (nd1>vardata1->rel->rows) nd1=vardata1->rel->rows;
> > > +       if (nd2>vardata1->rel->rows) nd2=vardata2->rel->rows;
> >
> > I don't like this change too much.
>
> Thanks for your analysis ;)
>
> I have a 2nd patch which improves the 2nd case I mentioned..
>
> > I note for instance that this patch would do nothing at all for the toy
>
> >> There's still an 2nd issue which this doesn't address, having to do with joins
> >> of tables with full/complete MCV lists, and selective queries on those tables,
> >> as demonstrated by the artificial test:
> >>
> >> > postgres=# CREATE TABLE t(i INT);
> >> > postgres=# TRUNCATE t;INSERT INTO t SELECT i FROM generate_series(1,99) i,generate_series(1,99);ANALYZE t;
> >> > postgres=# SELECT (SELECT sum(x) FROM unnest(most_common_freqs) x) frac_MCV, tablename, attname, n_distinct,
array_length(most_common_vals,1)n_mcv, array_length(histogram_bounds,1) n_hist, (SELECT MAX(x) FROM
unnest(most_common_vals::text::text[])x) maxmcv, (histogram_bounds::text::text[])[array_length(histogram_bounds,1)]
maxhistFROM pg_stats WHERE attname~'i' AND tablename='t' GROUP BY 1,2,3,4,5,6,7,8 ORDER BY 1 DESC; 
>
> I pointed out that there were two issues, both involving underestimates from
> querying a fraction of a table using inequality condition.  One due to join
> estimate based on "nd" (and not substantially based on MCV), and one due to
> frequencies associated with MCV list (and not substantially falling back to
> estimate from "nd").
>
> I made another patch to address the 2nd issue, which affects our pre-aggregated
> tables (which are partitioned by month, same as the raw tables).  The
> aggregated tables are the result of something like SELECT start_time::date, k1,
> k2, ..., sum(a), avg(b) ... GROUP BY 1,2,3, so have many fewer rows, and nd for
> start_time::date column would be at most 31, so MCV list would be expected to
> be complete, same as the "toy" example I gave.
>
> Sometimes when we query the aggregated tables for a small number of days we get
> underestimate leading to nested loops..
>
> Without patch:
> Merge Join  (cost=339.59..341.57 rows=99 width=4) (actual time=10.190..17.430 rows=9801 loops=1)
>
> With patch:
> DEBUG:  ndfactor 99.000000 99.000000
> DEBUG:  nmatches 99 matchprodfreq 1.000000
> DEBUG:  nmatches 99 matchprodfreq 1.000000
> DEBUG:  matchfreq1 99.000000 unmatchfreq1 0.000000
> DEBUG:  matchfreq1 1.000000 unmatchfreq1 0.000000
> DEBUG:  matchfreq2 99.000000 unmatchfreq2 0.000000
> DEBUG:  matchfreq2 1.000000 unmatchfreq2 0.000000
> DEBUG:  otherfreq1 0.000000 otherfreq2 0.000000
> DEBUG:  select(1) 1.000000
>  Hash Join  (cost=167.75..444.77 rows=9801 width=4) (actual time=4.706..13.892 rows=9801 loops=1)
>
>
> diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
> index 6a4f7b1..bc88423 100644
> --- a/src/backend/utils/adt/selfuncs.c
> +++ b/src/backend/utils/adt/selfuncs.c
> @@ -2279,6 +2279,14 @@ eqjoinsel_inner(Oid operator,
>
>      nd1 = get_variable_numdistinct(vardata1, &isdefault1);
>      nd2 = get_variable_numdistinct(vardata2, &isdefault2);
> +    float ndfactor1=1;
> +    float ndfactor2=1;
> +    if (vardata1->rel->rows)
> +        ndfactor1=vardata1->rel->tuples / vardata1->rel->rows;
> +    if (vardata2->rel->rows)
> +        ndfactor2=vardata2->rel->tuples / vardata2->rel->rows;
> +    // ndfactor1=ndfactor2=1;
> +    elog(DEBUG4, "ndfactor %lf %lf", ndfactor1,ndfactor2);
>
>      opfuncoid = get_opcode(operator);
>
> @@ -2375,7 +2383,19 @@ eqjoinsel_inner(Oid operator,
>                  }
>              }
>          }
> +
> +        // you might think we should multiple by ndfactor1*ndfactor2,
> +        // but that gives serious overestimates...
> +        // matchprodfreq*= ndfactor1>ndfactor2?ndfactor1:ndfactor2;
> +        // matchprodfreq*=ndfactor1;
> +        // matchprodfreq*=ndfactor2;
> +        // matchprodfreq*= ndfactor1<ndfactor2?ndfactor1:ndfactor2;
> +        matchprodfreq*= ndfactor1<ndfactor2?ndfactor1:ndfactor2;
> +
> +        elog(DEBUG4, "nmatches %d matchprodfreq %lf", nmatches, matchprodfreq);
>          CLAMP_PROBABILITY(matchprodfreq);
> +        elog(DEBUG4, "nmatches %d matchprodfreq %lf", nmatches, matchprodfreq);
> +
>          /* Sum up frequencies of matched and unmatched MCVs */
>          matchfreq1 = unmatchfreq1 = 0.0;
>          for (i = 0; i < nvalues1; i++)
> @@ -2385,8 +2405,14 @@ eqjoinsel_inner(Oid operator,
>              else
>                  unmatchfreq1 += numbers1[i];
>          }
> +
> +        matchfreq1*=ndfactor1;
> +        unmatchfreq1*=ndfactor1;
> +        elog(DEBUG4, "matchfreq1 %lf unmatchfreq1 %lf", matchfreq1, unmatchfreq1);
>          CLAMP_PROBABILITY(matchfreq1);
>          CLAMP_PROBABILITY(unmatchfreq1);
> +        elog(DEBUG4, "matchfreq1 %lf unmatchfreq1 %lf", matchfreq1, unmatchfreq1);
> +
>          matchfreq2 = unmatchfreq2 = 0.0;
>          for (i = 0; i < nvalues2; i++)
>          {
> @@ -2395,8 +2421,12 @@ eqjoinsel_inner(Oid operator,
>              else
>                  unmatchfreq2 += numbers2[i];
>          }
> +        matchfreq2*=ndfactor2;
> +        unmatchfreq2*=ndfactor2;
> +        elog(DEBUG4, "matchfreq2 %lf unmatchfreq2 %lf", matchfreq2, unmatchfreq2);
>          CLAMP_PROBABILITY(matchfreq2);
>          CLAMP_PROBABILITY(unmatchfreq2);
> +        elog(DEBUG4, "matchfreq2 %lf unmatchfreq2 %lf", matchfreq2, unmatchfreq2);
>          pfree(hasmatch1);
>          pfree(hasmatch2);
>
>      if (have_mcvs1)
>
> Justin