BUG #15430: partition-wise join only works in combination withpruning on 1 partition

Поиск
Список
Период
Сортировка
От PG Bug reporting form
Тема BUG #15430: partition-wise join only works in combination withpruning on 1 partition
Дата
Msg-id 15430-ee54c4208e54e9b1@postgresql.org
обсуждение исходный текст
Ответы Re: BUG #15430: partition-wise join only works in combination withpruning on 1 partition  (Amit Langote <Langote_Amit_f8@lab.ntt.co.jp>)
Список pgsql-bugs
The following bug has been logged on the website:

Bug reference:      15430
Logged by:          Bart Debersaques
Email address:      agile.data.analytics@gmail.com
PostgreSQL version: 11rc1
Operating system:   Centos 7
Description:

psql <<-EOF
create schema part_test
;
CREATE TABLE part_test.measurement (
city_id int not null,
logdate date not null,
peaktemp int,
unitsales
int
) PARTITION BY RANGE (logdate)
;
CREATE TABLE part_test.measurement_y2006m02 PARTITION OF
part_test.measurement
FOR VALUES FROM ('2006-02-01') TO ('2006-03-01')
;
CREATE TABLE part_test.measurement_y2006m03 PARTITION OF
part_test.measurement
FOR VALUES FROM ('2006-03-01') TO ('2006-04-01')
;
EOF

psql <<-EOF
CREATE TABLE part_test.measurement2 (
city_id int not null,
logdate date not null,
peaktemp int,
unitsales
int
) PARTITION BY RANGE (logdate)
;
CREATE TABLE part_test.measurement2_y2006m02 PARTITION OF
part_test.measurement2
FOR VALUES FROM ('2006-02-01') TO ('2006-03-01')
;
CREATE TABLE part_test.measurement2_y2006m03 PARTITION OF
part_test.measurement2
FOR VALUES FROM ('2006-03-01') TO ('2006-04-01')
;
EOF

psql <<-EOF
explain 
select * 
from 
part_test.measurement m1
inner join part_test.measurement2 m2
on m1.city_id = m2.city_id and m1.logdate = m2.logdate
where m1.logdate = '2006-02-01'::date
EOF
# Hash Join  (cost=33.28..66.58 rows=9 width=32)
#    Hash Cond: (m1.city_id = m2.city_id)
#    ->  Append  (cost=0.00..33.17 rows=9 width=16)
#          ->  Seq Scan on measurement_y2006m02 m1  (cost=0.00..33.12 rows=9
width=16)
#                Filter: (logdate = '2006-02-01'::date)
#    ->  Hash  (cost=33.17..33.17 rows=9 width=16)
#          ->  Append  (cost=0.00..33.17 rows=9 width=16)
#                ->  Seq Scan on measurement2_y2006m02 m2  (cost=0.00..33.12
rows=9 width=16)
#                      Filter: (logdate = '2006-02-01'::date)

# conclusion: pruning applied to both tables = OK

psql <<-EOF
explain 
select * 
from 
part_test.measurement m1
inner join part_test.measurement2 m2
on m1.city_id = m2.city_id and m1.logdate = m2.logdate
where m1.logdate in ('2006-02-01'::date, '2006-03-01'::date)
EOF
#  Hash Join  (cost=66.97..170.25 rows=3 width=32)
#   Hash Cond: ((m2.city_id = m1.city_id) AND (m2.logdate = m1.logdate))
#   ->  Append  (cost=0.00..75.50 rows=3700 width=16)
#         ->  Seq Scan on measurement2_y2006m02 m2  (cost=0.00..28.50
rows=1850 width=16)
#         ->  Seq Scan on measurement2_y2006m03 m2_1  (cost=0.00..28.50
rows=1850 width=16)
#   ->  Hash  (cost=66.43..66.43 rows=36 width=16)
#         ->  Append  (cost=0.00..66.43 rows=36 width=16)
#               ->  Seq Scan on measurement_y2006m02 m1  (cost=0.00..33.12
rows=18 width=16)
#                     Filter: (logdate = ANY
('{2006-02-01,2006-03-01}'::date[]))
#               ->  Seq Scan on measurement_y2006m03 m1_1  (cost=0.00..33.12
rows=18 width=16)
#                     Filter: (logdate = ANY
('{2006-02-01,2006-03-01}'::date[]))

# conclusion: join performed on full tab scans, join not performed on a
per-partion basis

psql <<-EOF
explain 
select * 
from 
part_test.measurement m1
inner join part_test.measurement2 m2
on m1.city_id = m2.city_id and m1.logdate = m2.logdate
EOF
# Merge Join  (cost=589.57..648.49 rows=342 width=32)
#   Merge Cond: ((m1.city_id = m2.city_id) AND (m1.logdate = m2.logdate))
#   ->  Sort  (cost=294.79..304.04 rows=3700 width=16)
#         Sort Key: m1.city_id, m1.logdate
#         ->  Append  (cost=0.00..75.50 rows=3700 width=16)
#               ->  Seq Scan on measurement_y2006m02 m1  (cost=0.00..28.50
rows=1850 width=16)
#               ->  Seq Scan on measurement_y2006m03 m1_1  (cost=0.00..28.50
rows=1850 width=16)
#   ->  Sort  (cost=294.79..304.04 rows=3700 width=16)
#         Sort Key: m2.city_id, m2.logdate
#         ->  Append  (cost=0.00..75.50 rows=3700 width=16)
#               ->  Seq Scan on measurement2_y2006m02 m2  (cost=0.00..28.50
rows=1850 width=16)
#               ->  Seq Scan on measurement2_y2006m03 m2_1
(cost=0.00..28.50 rows=1850 width=16)

# conclusion: join performed on full tab scans, join not performed on a
per-partion basis

# desired behaviour:

# in serial execution and for an equi join: partitions in m1 should be
iterated and joined with the equivaluent in m2
# parallel execution: the above should be done with several partitions in
parallel

# please assign to Ashutosh Bapat as described in
https://www.enterprisedb.com/blog/partition-wise-joins-%E2%80%9Cdivide-and-conquer-joins-between-partitioned-table

reading 

https://www.enterprisedb.com/blog/partition-wise-joins-%E2%80%9Cdivide-and-conquer-joins-between-partitioned-table


В списке pgsql-bugs по дате отправления:

Предыдущее
От: Ryan Murphy
Дата:
Сообщение: Re: BUG #15429: psql requires '-h localhost' to run basic commands
Следующее
От: Amit Langote
Дата:
Сообщение: Re: BUG #15430: partition-wise join only works in combination withpruning on 1 partition