Join for the parent table will not leverage the index scan

Поиск
Список
Период
Сортировка
От Shrikant Bhende
Тема Join for the parent table will not leverage the index scan
Дата
Msg-id CAMTQpJAU6orDxKn6jOhMc8Tm1sv+cORed9cMmFs3bCzsOrQ5ww@mail.gmail.com
обсуждение исходный текст
Список pgsql-general

We have two tables, both have ~36 partitions.
This is my first query, which attempts to join the parent tables on the columns that have been used to create multi-column indexes for all partitions (multi-column indexes defined on session_id, detail_id in that order):

PostgreSQL version : 10
-- query

explain

select * from dm_ci360.page_details pd
inner join dm_ci360.page_details_ext pde
 on (pd.session_id = pde.session_id
 and pd.detail_id = pde.detail_id)
where pd.session_dt >= now()::date-2;

-- plan

Hash Join  (cost=36927955.75..100106605.07 rows=11 width=8355)

  Hash Cond: (((pd.session_id)::text = (pde.session_id)::text) AND ((pd.detail_id)::text = (pde.detail_id)::text))

  ->  Append  (cost=1297.33..571870.20 rows=630535 width=8130)

        ->  Bitmap Heap Scan on page_details_null pd  (cost=1297.33..240207.85 rows=69259 width=8220)

              Recheck Cond: (session_dt >= ((now())::date - 2))

              ->  Bitmap Index Scan on page_details_null_session_dt_idx  (cost=0.00..1280.02 rows=69259 width=0)

                    Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m3_session_dt_idx on page_details_m3 pd_1  (cost=0.44..4.71 rows=1 width=8216)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m2_session_dt_idx on page_details_m2 pd_2  (cost=0.43..102.79 rows=621 width=8253)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m1_session_dt_idx on page_details_m1 pd_3  (cost=0.44..4.55 rows=1 width=8281)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_5_session_dt_idx on page_details_5 pd_4  (cost=0.57..51622.53 rows=196574 width=8202)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Bitmap Heap Scan on page_details_31 pd_5  (cost=982.61..194857.98 rows=52391 width=7721)

              Recheck Cond: (session_dt >= ((now())::date - 2))

              ->  Bitmap Index Scan on page_details_31_session_dt_idx1  (cost=0.00..969.51 rows=52391 width=0)

                    Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_49_session_dt_idx on page_details_49 pd_6  (cost=0.45..10991.08 rows=50039 width=7721)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_52_session_dt_idx on page_details_52 pd_7  (cost=0.44..2337.94 rows=10222 width=8297)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_59_session_dt_idx on page_details_59 pd_8  (cost=0.57..24790.17 rows=50148 width=8276)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_60_session_dt_idx on page_details_60 pd_9  (cost=0.44..7292.41 rows=4259 width=8263)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_61_session_dt_idx on page_details_61 pd_10  (cost=0.43..309.10 rows=704 width=8182)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_63_session_dt_idx on page_details_63 pd_11  (cost=0.43..13.20 rows=25 width=8250)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_64_session_dt_idx on page_details_64 pd_12  (cost=0.44..5.41 rows=1 width=8314)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_66_session_dt_idx on page_details_66 pd_13  (cost=0.42..4.49 rows=1 width=8277)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_67_session_dt_idx on page_details_67 pd_14  (cost=0.42..58.62 rows=325 width=8227)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_68_session_dt_idx on page_details_68 pd_15  (cost=0.42..35.20 rows=111 width=8259)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_78_session_dt_idx on page_details_78 pd_16  (cost=0.44..1738.10 rows=4608 width=8263)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_98_session_dt_idx on page_details_98 pd_17  (cost=0.44..2157.31 rows=7136 width=8225)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_101_session_dt_idx on page_details_101 pd_18  (cost=0.44..2956.03 rows=9241 width=8205)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_130_session_dt_idx on page_details_130 pd_19  (cost=0.44..1683.53 rows=19656 width=8199)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m100_session_dt_idx on page_details_m100 pd_20  (cost=0.43..4.96 rows=1 width=8225)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m4_session_dt_idx on page_details_m4 pd_21  (cost=0.43..659.89 rows=6097 width=8195)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_134_session_dt_idx on page_details_134 pd_22  (cost=0.45..4666.25 rows=50409 width=8176)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_58_session_dt_idx on page_details_58 pd_23  (cost=0.43..7.05 rows=1 width=8362)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_69_session_dt_idx on page_details_69 pd_24  (cost=0.43..125.03 rows=348 width=8268)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_100_session_dt_idx on page_details_100 pd_25  (cost=0.43..381.13 rows=466 width=8195)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_114_session_dt_idx on page_details_114 pd_26  (cost=0.43..7.80 rows=1 width=8183)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m5_session_dt_idx on page_details_m5 pd_27  (cost=0.57..4505.18 rows=50872 width=8225)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_32_session_dt_idx on page_details_32 pd_28  (cost=0.43..8.44 rows=1 width=8807)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_23_session_dt_idx on page_details_23 pd_29  (cost=0.29..7.47 rows=1 width=9327)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m6_session_dt_idx on page_details_m6 pd_30  (cost=0.44..769.32 rows=7135 width=7814)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m7_session_dt_idx on page_details_m7 pd_31  (cost=0.44..11400.50 rows=14694 width=8203)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_m8_session_dt_idx on page_details_m8 pd_32  (cost=0.44..6568.86 rows=8045 width=8200)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_144_session_dt_idx on page_details_144 pd_33  (cost=0.29..21.16 rows=162 width=8189)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_143_session_dt_idx on page_details_143 pd_34  (cost=0.43..1477.00 rows=16135 width=8197)

              Index Cond: (session_dt >= ((now())::date - 2))

        ->  Index Scan using page_details_145_session_dt_idx on page_details_145 pd_35  (cost=0.29..87.17 rows=844 width=8226)

              Index Cond: (session_dt >= ((now())::date - 2))

  ->  Hash  (cost=16117516.77..16117516.77 rows=449927377 width=226)

        ->  Append  (cost=0.00..16117516.77 rows=449927377 width=226)

              ->  Seq Scan on page_details_ext_null pde  (cost=0.00..1467668.52 rows=40845752 width=226)

              ->  Seq Scan on page_details_ext_m3 pde_1  (cost=0.00..347732.32 rows=9980132 width=226)

              ->  Seq Scan on page_details_ext_m2 pde_2  (cost=0.00..26146.27 rows=715527 width=226)

              ->  Seq Scan on page_details_ext_m1 pde_3  (cost=0.00..180093.08 rows=4793908 width=226)

              ->  Seq Scan on page_details_ext_5 pde_4  (cost=0.00..3056150.08 rows=82652608 width=226)

              ->  Seq Scan on page_details_ext_31 pde_5  (cost=0.00..4177416.08 rows=124953208 width=226)

              ->  Seq Scan on page_details_ext_49 pde_6  (cost=0.00..1034364.84 rows=28538484 width=226)

              ->  Seq Scan on page_details_ext_52 pde_7  (cost=0.00..575529.72 rows=16018272 width=226)

              ->  Seq Scan on page_details_ext_59 pde_8  (cost=0.00..1226617.78 rows=33054378 width=226)

              ->  Seq Scan on page_details_ext_60 pde_9  (cost=0.00..206925.60 rows=5920260 width=226)

              ->  Seq Scan on page_details_ext_61 pde_10  (cost=0.00..10599.63 rows=296463 width=226)

              ->  Seq Scan on page_details_ext_63 pde_11  (cost=0.00..32993.45 rows=898145 width=226)

              ->  Seq Scan on page_details_ext_64 pde_12  (cost=0.00..211220.00 rows=6212500 width=226)

              ->  Seq Scan on page_details_ext_66 pde_13  (cost=0.00..4888.44 rows=127544 width=226)

              ->  Seq Scan on page_details_ext_67 pde_14  (cost=0.00..5121.02 rows=135402 width=226)

              ->  Seq Scan on page_details_ext_68 pde_15  (cost=0.00..4284.18 rows=116218 width=226)

              ->  Seq Scan on page_details_ext_78 pde_16  (cost=0.00..135663.64 rows=3705564 width=226)

              ->  Seq Scan on page_details_ext_98 pde_17  (cost=0.00..232421.44 rows=6556944 width=226)

              ->  Seq Scan on page_details_ext_101 pde_18  (cost=0.00..189844.81 rows=5231481 width=226)

              ->  Seq Scan on page_details_ext_130 pde_19  (cost=0.00..318011.40 rows=8558640 width=226)

              ->  Seq Scan on page_details_ext_m100 pde_20  (cost=0.00..4914.04 rows=143504 width=226)

              ->  Seq Scan on page_details_ext_m4 pde_21  (cost=0.00..75878.68 rows=1994468 width=226)

              ->  Seq Scan on page_details_ext_134 pde_22  (cost=0.00..724264.36 rows=19192136 width=226)

              ->  Seq Scan on page_details_ext_58 pde_23  (cost=0.00..12171.94 rows=361394 width=331)

              ->  Seq Scan on page_details_ext_69 pde_24  (cost=0.00..6464.95 rows=173295 width=226)

              ->  Seq Scan on page_details_ext_100 pde_25  (cost=0.00..9217.39 rows=255239 width=226)

              ->  Seq Scan on page_details_ext_114 pde_26  (cost=0.00..11702.69 rows=358469 width=226)

              ->  Seq Scan on page_details_ext_m5 pde_27  (cost=0.00..1274434.84 rows=33330284 width=226)

              ->  Seq Scan on page_details_ext_23 pde_28  (cost=0.00..822.03 rows=25503 width=378)

              ->  Seq Scan on page_details_ext_32 pde_29  (cost=0.00..7011.56 rows=217756 width=378)

              ->  Seq Scan on page_details_ext_m6 pde_30  (cost=0.00..177039.58 rows=4616658 width=226)

              ->  Seq Scan on page_details_ext_m7 pde_31  (cost=0.00..188264.43 rows=5143643 width=226)

              ->  Seq Scan on page_details_ext_m8 pde_32  (cost=0.00..139185.56 rows=3799056 width=226)

              ->  Seq Scan on page_details_ext_144 pde_33  (cost=0.00..687.46 rows=16246 width=226)

              ->  Seq Scan on page_details_ext_143 pde_34  (cost=0.00..41053.82 rows=971482 width=226)

              ->  Seq Scan on page_details_ext_145 pde_35  (cost=0.00..711.14 rows=16814 width=226)

 

The only place an index scan is used is to filter down page_details. In my second query,
when explicitly joining single partitions an index scan is used to perform filtering, and the join as well:

--query

explain

select * from dm_ci360.page_details_5 pd
inner join dm_ci360.page_details_ext_5 pde
on (pd.session_id = pde.session_id
and pd.detail_id = pde.detail_id)
where pd.session_dt >= now()::date-2;


--plan

Nested Loop  (cost=1.27..1685980.38 rows=1 width=8428)

  ->  Index Scan using page_details_5_session_dt_idx on page_details_5 pd  (cost=0.57..51267.67 rows=196574 width=8202)

        Index Cond: (session_dt >= ((now())::date - 2))

  ->  Index Scan using page_details_ext_5_session_id_detail_id_idx on page_details_ext_5 pde  (cost=0.69..8.31 rows=1 width=226)

        Index Cond: (((session_id)::text = (pd.session_id)::text) AND ((detail_id)::text = (pd.detail_id)::text))

I just cannot figure out why the join for the parent table will not leverage the index,
but the join for a single partition will. We are running on Postgres 10, and the row counts for the tables can be found below:

Row Count
page_details -> ~413M
page_details_ext -> ~450M

Table DDL

-- page_details

CREATE TABLE dm_ci360.page_details (
    dml_id int4 NULL,
    dml_ts timestamp NULL,
    dml_ts_utc timestamp NULL,
    dml_type varchar(1) NULL,
    dwh_job_id int4 NULL,
    ins_dwh_job_id int4 NULL,
    upd_dwh_job_id int4 NULL,
    deleted_flag varchar(1) NULL,
    processed_dttm timestamp NULL,
    valid_from_dttm timestamp(0) NULL,
    valid_to_dttm timestamp(0) NULL,
    ccd varchar(2000) NULL,
    client_id int4 NULL,
    detail_id varchar(32) NULL,
    session_id varchar(29) NULL,
    visit_id varchar(32) NULL,
    window_size_txt varchar(20) NULL,
    session_dt date NULL,
    page_url_txt varchar(1332) NULL,
    domain_nm varchar(165) NULL,
    bytes_sent_cnt int4 NULL,
    page_load_sec_cnt int8 NULL,
    page_complete_sec_cnt int8 NULL,
    protocol_nm varchar(8) NULL,
    page_desc varchar(1332) NULL,
    class1_id varchar(650) NULL,
    class2_id varchar(650) NULL,
    class3_id varchar(650) NULL,
    class4_id varchar(650) NULL,
    class5_id varchar(650) NULL,
    class6_id varchar(650) NULL,
    class7_id varchar(650) NULL,
    class8_id varchar(650) NULL,
    class9_id varchar(650) NULL,
    class10_id varchar(650) NULL,
    class11_id varchar(650) NULL,
    class12_id varchar(650) NULL,
    class13_id varchar(650) NULL,
    class14_id varchar(650) NULL,
    class15_id varchar(650) NULL,
    url_domain varchar(215) NULL,
    identity_id varchar(36) NULL,
    detail_dttm timestamp NULL,
    detail_dttm_tz timestamp NULL,
    load_dttm timestamp NULL,
    session_dt_tz date NULL,
    detail_id_hex varchar(32) NULL,
    visit_id_hex varchar(32) NULL,
    session_id_hex varchar(29) NULL
)
PARTITION BY LIST (client_id);

CREATE TABLE dm_ci360.page_details_100 PARTITION OF dm_ci360.page_details FOR VALUES IN (100);

CREATE INDEX page_details_100_detail_dttm_idx ON dm_ci360.page_details_100 USING btree (detail_dttm)
CREATE INDEX page_details_100_identity_id_idx ON dm_ci360.page_details_100 USING btree (identity_id)
CREATE INDEX page_details_100_session_dt_idx ON dm_ci360.page_details_100 USING btree (session_dt)
CREATE INDEX page_details_100_session_id_detail_id_idx ON dm_ci360.page_details_100 USING btree (session_id, detail_id)

-- page_details_ext

CREATE TABLE dm_ci360.page_details_ext (
    dml_id int4 NULL,
    dml_ts timestamp NULL,
    dml_ts_utc timestamp NULL,
    dml_type varchar(1) NULL,
    dwh_job_id int4 NULL,
    ins_dwh_job_id int4 NULL,
    upd_dwh_job_id int4 NULL,
    deleted_flag varchar(1) NULL,
    processed_dttm timestamp NULL,
    valid_from_dttm timestamp(0) NULL,
    valid_to_dttm timestamp(0) NULL,
    ccd varchar(2000) NULL,
    client_id int4 NULL,
    detail_id varchar(32) NULL,
    session_id varchar(29) NULL,
    active_sec_spent_on_page_cnt int4 NULL,
    seconds_spent_on_page_cnt int4 NULL,
    load_dttm timestamp NULL,
    detail_id_hex varchar(32) NULL,
    session_id_hex varchar(29) NULL
)
PARTITION BY LIST (client_id);

CREATE TABLE dm_ci360.page_details_ext_100 PARTITION OF dm_ci360.page_details_ext FOR VALUES IN (100);

CREATE INDEX page_details_ext_100_session_id_detail_id_idx ON dm_ci360.page_details_ext_100 USING btree (session_id, detail_id)
CREATE INDEX page_details_ext_100_session_id_detail_id_idx ON dm_ci360.page_details_ext_100 USING btree (session_id, detail_id)


Thanks

В списке pgsql-general по дате отправления:

Предыдущее
От: Adrian Klaver
Дата:
Сообщение: Re: Sort question - Fractions, Metric etc
Следующее
От: Bret Stern
Дата:
Сообщение: Re: Sort question - Fractions, Metric etc