Обсуждение: Very poor performance with Nested Loop Anti Join

Поиск
Список
Период
Сортировка

Very poor performance with Nested Loop Anti Join

От
Andreas Joseph Krogh
Дата:
I have this schema:
 
CREATE TABLE onp_crm_person(    id serial PRIMARY KEY,    onp_user_id bigint references onp_user(id) deferrable initially deferred,    is_resource boolean not null default false,    UNIQUE(onp_user_id)
);

CREATE TABLE onp_crm_activity_log(    id bigserial PRIMARY KEY,    relation_id integer REFERENCES onp_crm_relation(entity_id),    logged_for int references onp_crm_person(onp_user_id),    duration bigint
);

CREATE TABLE onp_crm_invoice(    entity_id bigint PRIMARY KEY REFERENCES onp_crm_entity(entity_id),    status_key VARCHAR NOT NULL,    credit_against bigint REFERENCES onp_crm_invoice(entity_id),    sent_date DATE,    UNIQUE(credit_against) deferrable INITIALLY DEFERRED
    -- invoice_print_template_id is added after creation of origo_invoice_print_template
);

CREATE TABLE onp_crm_invoice_line (    id         SERIAL PRIMARY KEY,    invoice_id INTEGER NOT NULL REFERENCES onp_crm_invoice (entity_id)
);

CREATE TABLE onp_crm_calendarentry_invoice_membership(    invoice_line_id INTEGER NOT NULL REFERENCES onp_crm_invoice_line(id) ON DELETE CASCADE,    calendar_entry_id INTEGER NOT NULL REFERENCES onp_crm_activity_log(id),    unique(invoice_line_id, calendar_entry_id) DEFERRABLE INITIALLY DEFERRED
);
 
This query performs terribly slow ( ~26 minutes, 1561346.597ms):
 
explain analyze
SELECT
      log.relation_id as company_id    , sum(log.duration) AS duration
FROM onp_crm_activity_log log
    JOIN onp_crm_person logfor ON logfor.onp_user_id = log.logged_for AND logfor.is_resource = FALSE

WHERE 1 = 1
      -- Filter out already invoiced before 2016-06-27
      AND NOT EXISTS(    SELECT * FROM onp_crm_calendarentry_invoice_membership cem        JOIN onp_crm_invoice_line il ON cem.invoice_line_id = il.id        JOIN onp_crm_invoice inv ON il.invoice_id = inv.entity_id    WHERE cem.calendar_entry_id = log.id          AND inv.status_key = 'INVOICE_STATUS_INVOICED'
          AND inv.sent_date <= '2016-06-27'
          AND NOT EXISTS(        SELECT * FROM onp_crm_invoice creditnote        WHERE il.invoice_id = creditnote.credit_against              AND creditnote.status_key = 'INVOICE_STATUS_INVOICED'
              AND creditnote.sent_date <= '2016-06-27'
    )
)

GROUP BY
    log.relation_id
;
 
Explain output:
                                                                                                                     QUERY PLAN                                                                                                                       
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
HashAggregate  (cost=13778.63..13796.39 rows=1421 width=12) (actual time=1561343.861..1561344.042 rows=724 loops=1)
  Group Key: log.relation_id
  ->  Nested Loop Anti Join  (cost=741.35..13768.63 rows=2000 width=12) (actual time=471.973..1561221.929 rows=96095 loops=1)
        Join Filter: (cem.calendar_entry_id = log.id)
        Rows Removed by Join Filter: 11895758618
        ->  Hash Join  (cost=86.56..9729.03 rows=2000 width=20) (actual time=0.170..668.911 rows=181644 loops=1)
              Hash Cond: (log.logged_for = logfor.onp_user_id)
              ->  Seq Scan on onp_crm_activity_log log  (cost=0.00..8930.98 rows=184398 width=24) (actual time=0.007..538.893 rows=182378 loops=1)
              ->  Hash  (cost=39.46..39.46 rows=3768 width=8) (actual time=0.126..0.126 rows=36 loops=1)
                    Buckets: 4096  Batches: 1  Memory Usage: 34kB
                    ->  Bitmap Heap Scan on onp_crm_person logfor  (cost=3.69..39.46 rows=3768 width=8) (actual time=0.040..0.106 rows=36 loops=1)
                          Recheck Cond: (onp_user_id IS NOT NULL)
                          Filter: (NOT is_resource)
                          Rows Removed by Filter: 5
                          Heap Blocks: exact=10
                          ->  Bitmap Index Scan on onp_crm_person_onp_id_idx  (cost=0.00..2.75 rows=41 width=0) (actual time=0.019..0.019 rows=41 loops=1)
        ->  Materialize  (cost=654.79..4009.60 rows=1 width=4) (actual time=0.000..2.829 rows=65490 loops=181644)
              ->  Nested Loop  (cost=654.79..4009.59 rows=1 width=4) (actual time=9.056..386.835 rows=85668 loops=1)
                    ->  Nested Loop  (cost=654.50..4009.27 rows=1 width=8) (actual time=9.046..165.280 rows=88151 loops=1)
                          ->  Hash Anti Join  (cost=654.21..4008.72 rows=1 width=8) (actual time=9.016..40.672 rows=76174 loops=1)
                                Hash Cond: (il.invoice_id = creditnote.credit_against)
                                ->  Seq Scan on onp_crm_invoice_line il  (cost=0.00..3062.01 rows=78001 width=8) (actual time=0.005..11.259 rows=78614 loops=1)
                                ->  Hash  (cost=510.56..510.56 rows=11492 width=8) (actual time=8.940..8.940 rows=372 loops=1)
                                      Buckets: 16384  Batches: 1  Memory Usage: 143kB
                                      ->  Seq Scan on onp_crm_invoice creditnote  (cost=0.00..510.56 rows=11492 width=8) (actual time=0.014..7.882 rows=11507 loops=1)
                                            Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text = 'INVOICE_STATUS_INVOICED'::text))
                                            Rows Removed by Filter: 149
                          ->  Index Only Scan using onp_crm_calendarentry_invoice_invoice_line_id_calendar_entr_key on onp_crm_calendarentry_invoice_membership cem  (cost=0.29..0.45 rows=9 width=8) (actual time=0.001..0.001 rows=1 loops=76174)
                                Index Cond: (invoice_line_id = il.id)
                                Heap Fetches: 4371
                    ->  Index Scan using onp_crm_invoice_pkey on onp_crm_invoice inv  (cost=0.29..0.31 rows=1 width=8) (actual time=0.002..0.002 rows=1 loops=88151)
                          Index Cond: (entity_id = il.invoice_id)
                          Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text = 'INVOICE_STATUS_INVOICED'::text))
                          Rows Removed by Filter: 0
Planning time: 3.307 ms
Execution time: 1561346.597 ms
(36 rows)


 
 
Here is with set enable_nestloop to false;
                                                                                     QUERY PLAN                                                                                       
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
HashAggregate  (cost=15936.07..15953.83 rows=1421 width=12) (actual time=337.878..338.045 rows=724 loops=1)
  Group Key: log.relation_id
  ->  Hash Anti Join  (cost=6258.35..15926.07 rows=2000 width=12) (actual time=139.976..317.402 rows=96097 loops=1)
        Hash Cond: (log.id = cem.calendar_entry_id)
        ->  Hash Join  (cost=86.56..9729.03 rows=2000 width=20) (actual time=0.164..132.935 rows=181646 loops=1)
              Hash Cond: (log.logged_for = logfor.onp_user_id)
              ->  Seq Scan on onp_crm_activity_log log  (cost=0.00..8930.98 rows=184398 width=24) (actual time=0.006..89.170 rows=182380 loops=1)
              ->  Hash  (cost=39.46..39.46 rows=3768 width=8) (actual time=0.118..0.118 rows=36 loops=1)
                    Buckets: 4096  Batches: 1  Memory Usage: 34kB
                    ->  Bitmap Heap Scan on onp_crm_person logfor  (cost=3.69..39.46 rows=3768 width=8) (actual time=0.037..0.101 rows=36 loops=1)
                          Recheck Cond: (onp_user_id IS NOT NULL)
                          Filter: (NOT is_resource)
                          Rows Removed by Filter: 5
                          Heap Blocks: exact=10
                          ->  Bitmap Index Scan on onp_crm_person_onp_id_idx  (cost=0.00..2.75 rows=41 width=0) (actual time=0.017..0.017 rows=41 loops=1)
        ->  Hash  (cost=6171.78..6171.78 rows=1 width=4) (actual time=139.779..139.779 rows=85668 loops=1)
              Buckets: 131072 (originally 1024)  Batches: 1 (originally 1)  Memory Usage: 4036kB
              ->  Hash Join  (cost=4562.41..6171.78 rows=1 width=4) (actual time=92.471..125.417 rows=85668 loops=1)
                    Hash Cond: (cem.invoice_line_id = il.id)
                    ->  Seq Scan on onp_crm_calendarentry_invoice_membership cem  (cost=0.00..1278.44 rows=88244 width=8) (actual time=0.005..7.941 rows=88734 loops=1)
                    ->  Hash  (cost=4562.40..4562.40 rows=1 width=4) (actual time=92.444..92.444 rows=75570 loops=1)
                          Buckets: 131072 (originally 1024)  Batches: 1 (originally 1)  Memory Usage: 3681kB
                          ->  Hash Join  (cost=4008.74..4562.40 rows=1 width=4) (actual time=61.797..79.981 rows=75570 loops=1)
                                Hash Cond: (inv.entity_id = il.invoice_id)
                                ->  Seq Scan on onp_crm_invoice inv  (cost=0.00..510.56 rows=11492 width=8) (actual time=0.027..4.489 rows=11507 loops=1)
                                      Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text = 'INVOICE_STATUS_INVOICED'::text))
                                      Rows Removed by Filter: 151
                                ->  Hash  (cost=4008.72..4008.72 rows=1 width=8) (actual time=61.751..61.751 rows=76182 loops=1)
                                      Buckets: 131072 (originally 1024)  Batches: 1 (originally 1)  Memory Usage: 4000kB
                                      ->  Hash Anti Join  (cost=654.21..4008.72 rows=1 width=8) (actual time=12.568..47.911 rows=76182 loops=1)
                                            Hash Cond: (il.invoice_id = creditnote.credit_against)
                                            ->  Seq Scan on onp_crm_invoice_line il  (cost=0.00..3062.01 rows=78001 width=8) (actual time=0.008..12.476 rows=78622 loops=1)
                                            ->  Hash  (cost=510.56..510.56 rows=11492 width=8) (actual time=12.477..12.477 rows=372 loops=1)
                                                  Buckets: 16384  Batches: 1  Memory Usage: 143kB
                                                  ->  Seq Scan on onp_crm_invoice creditnote  (cost=0.00..510.56 rows=11492 width=8) (actual time=0.008..10.963 rows=11507 loops=1)
                                                        Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text = 'INVOICE_STATUS_INVOICED'::text))
                                                        Rows Removed by Filter: 151
Planning time: 3.510 ms
Execution time: 338.349 ms
(39 rows)


So my question is is there something I can do to make PG favor a Hash Anti Join instead of a Nested Loop Anti Join (which I assume is the problem)?
Can the nested NOT EXISTS be re-written to be more performant?
 
Thanks.
 
--
Andreas Joseph Krogh
CTO / Partner - Visena AS
Mobile: +47 909 56 963
Вложения

Re: Very poor performance with Nested Loop Anti Join

От
Andreas Joseph Krogh
Дата:
På mandag 01. august 2016 kl. 15:33:04, skrev Andreas Joseph Krogh <andreas@visena.com>:
I have this schema:
 
CREATE TABLE onp_crm_person(    id serial PRIMARY KEY,    onp_user_id bigint references onp_user(id) deferrable initially deferred,    is_resource boolean not null default false,    UNIQUE(onp_user_id)
);

CREATE TABLE onp_crm_activity_log(    id bigserial PRIMARY KEY,    relation_id integer REFERENCES onp_crm_relation(entity_id),    logged_for int references onp_crm_person(onp_user_id),    duration bigint
);

CREATE TABLE onp_crm_invoice(    entity_id bigint PRIMARY KEY REFERENCES onp_crm_entity(entity_id),    status_key VARCHAR NOT NULL,    credit_against bigint REFERENCES onp_crm_invoice(entity_id),    sent_date DATE,    UNIQUE(credit_against) deferrable INITIALLY DEFERRED
    -- invoice_print_template_id is added after creation of origo_invoice_print_template
);

CREATE TABLE onp_crm_invoice_line (    id         SERIAL PRIMARY KEY,    invoice_id INTEGER NOT NULL REFERENCES onp_crm_invoice (entity_id)
);

CREATE TABLE onp_crm_calendarentry_invoice_membership(    invoice_line_id INTEGER NOT NULL REFERENCES onp_crm_invoice_line(id) ON DELETE CASCADE,    calendar_entry_id INTEGER NOT NULL REFERENCES onp_crm_activity_log(id),    unique(invoice_line_id, calendar_entry_id) DEFERRABLE INITIALLY DEFERRED
);
 
This query performs terribly slow ( ~26 minutes, 1561346.597ms):
 
explain analyze
SELECT
      log.relation_id as company_id    , sum(log.duration) AS duration
FROM onp_crm_activity_log log
    JOIN onp_crm_person logfor ON logfor.onp_user_id = log.logged_for AND logfor.is_resource = FALSE

WHERE 1 = 1
      -- Filter out already invoiced before 2016-06-27
      AND NOT EXISTS(    SELECT * FROM onp_crm_calendarentry_invoice_membership cem        JOIN onp_crm_invoice_line il ON cem.invoice_line_id = il.id        JOIN onp_crm_invoice inv ON il.invoice_id = inv.entity_id    WHERE cem.calendar_entry_id = log.id          AND inv.status_key = 'INVOICE_STATUS_INVOICED'
          AND inv.sent_date <= '2016-06-27'
          AND NOT EXISTS(        SELECT * FROM onp_crm_invoice creditnote        WHERE il.invoice_id = creditnote.credit_against              AND creditnote.status_key = 'INVOICE_STATUS_INVOICED'
              AND creditnote.sent_date <= '2016-06-27'
    )
)

GROUP BY
    log.relation_id
;
 
Explain output:
                                                                                                                     QUERY PLAN                                                                                                                       
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
HashAggregate  (cost=13778.63..13796.39 rows=1421 width=12) (actual time=1561343.861..1561344.042 rows=724 loops=1)
  Group Key: log.relation_id
  ->  Nested Loop Anti Join  (cost=741.35..13768.63 rows=2000 width=12) (actual time=471.973..1561221.929 rows=96095 loops=1)
        Join Filter: (cem.calendar_entry_id = log.id)
        Rows Removed by Join Filter: 11895758618
        ->  Hash Join  (cost=86.56..9729.03 rows=2000 width=20) (actual time=0.170..668.911 rows=181644 loops=1)
              Hash Cond: (log.logged_for = logfor.onp_user_id)
              ->  Seq Scan on onp_crm_activity_log log  (cost=0.00..8930.98 rows=184398 width=24) (actual time=0.007..538.893 rows=182378 loops=1)
              ->  Hash  (cost=39.46..39.46 rows=3768 width=8) (actual time=0.126..0.126 rows=36 loops=1)
                    Buckets: 4096  Batches: 1  Memory Usage: 34kB
                    ->  Bitmap Heap Scan on onp_crm_person logfor  (cost=3.69..39.46 rows=3768 width=8) (actual time=0.040..0.106 rows=36 loops=1)
                          Recheck Cond: (onp_user_id IS NOT NULL)
                          Filter: (NOT is_resource)
                          Rows Removed by Filter: 5
                          Heap Blocks: exact=10
                          ->  Bitmap Index Scan on onp_crm_person_onp_id_idx  (cost=0.00..2.75 rows=41 width=0) (actual time=0.019..0.019 rows=41 loops=1)
        ->  Materialize  (cost=654.79..4009.60 rows=1 width=4) (actual time=0.000..2.829 rows=65490 loops=181644)
              ->  Nested Loop  (cost=654.79..4009.59 rows=1 width=4) (actual time=9.056..386.835 rows=85668 loops=1)
                    ->  Nested Loop  (cost=654.50..4009.27 rows=1 width=8) (actual time=9.046..165.280 rows=88151 loops=1)
                          ->  Hash Anti Join  (cost=654.21..4008.72 rows=1 width=8) (actual time=9.016..40.672 rows=76174 loops=1)
                                Hash Cond: (il.invoice_id = creditnote.credit_against)
                                ->  Seq Scan on onp_crm_invoice_line il  (cost=0.00..3062.01 rows=78001 width=8) (actual time=0.005..11.259 rows=78614 loops=1)
                                ->  Hash  (cost=510.56..510.56 rows=11492 width=8) (actual time=8.940..8.940 rows=372 loops=1)
                                      Buckets: 16384  Batches: 1  Memory Usage: 143kB
                                      ->  Seq Scan on onp_crm_invoice creditnote  (cost=0.00..510.56 rows=11492 width=8) (actual time=0.014..7.882 rows=11507 loops=1)
                                            Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text = 'INVOICE_STATUS_INVOICED'::text))
                                            Rows Removed by Filter: 149
                          ->  Index Only Scan using onp_crm_calendarentry_invoice_invoice_line_id_calendar_entr_key on onp_crm_calendarentry_invoice_membership cem  (cost=0.29..0.45 rows=9 width=8) (actual time=0.001..0.001 rows=1 loops=76174)
                                Index Cond: (invoice_line_id = il.id)
                                Heap Fetches: 4371
                    ->  Index Scan using onp_crm_invoice_pkey on onp_crm_invoice inv  (cost=0.29..0.31 rows=1 width=8) (actual time=0.002..0.002 rows=1 loops=88151)
                          Index Cond: (entity_id = il.invoice_id)
                          Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text = 'INVOICE_STATUS_INVOICED'::text))
                          Rows Removed by Filter: 0
Planning time: 3.307 ms
Execution time: 1561346.597 ms
(36 rows)


 
 
Here is with set enable_nestloop to false;
                                                                                     QUERY PLAN                                                                                       
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
HashAggregate  (cost=15936.07..15953.83 rows=1421 width=12) (actual time=337.878..338.045 rows=724 loops=1)
  Group Key: log.relation_id
  ->  Hash Anti Join  (cost=6258.35..15926.07 rows=2000 width=12) (actual time=139.976..317.402 rows=96097 loops=1)
        Hash Cond: (log.id = cem.calendar_entry_id)
        ->  Hash Join  (cost=86.56..9729.03 rows=2000 width=20) (actual time=0.164..132.935 rows=181646 loops=1)
              Hash Cond: (log.logged_for = logfor.onp_user_id)
              ->  Seq Scan on onp_crm_activity_log log  (cost=0.00..8930.98 rows=184398 width=24) (actual time=0.006..89.170 rows=182380 loops=1)
              ->  Hash  (cost=39.46..39.46 rows=3768 width=8) (actual time=0.118..0.118 rows=36 loops=1)
                    Buckets: 4096  Batches: 1  Memory Usage: 34kB
                    ->  Bitmap Heap Scan on onp_crm_person logfor  (cost=3.69..39.46 rows=3768 width=8) (actual time=0.037..0.101 rows=36 loops=1)
                          Recheck Cond: (onp_user_id IS NOT NULL)
                          Filter: (NOT is_resource)
                          Rows Removed by Filter: 5
                          Heap Blocks: exact=10
                          ->  Bitmap Index Scan on onp_crm_person_onp_id_idx  (cost=0.00..2.75 rows=41 width=0) (actual time=0.017..0.017 rows=41 loops=1)
        ->  Hash  (cost=6171.78..6171.78 rows=1 width=4) (actual time=139.779..139.779 rows=85668 loops=1)
              Buckets: 131072 (originally 1024)  Batches: 1 (originally 1)  Memory Usage: 4036kB
              ->  Hash Join  (cost=4562.41..6171.78 rows=1 width=4) (actual time=92.471..125.417 rows=85668 loops=1)
                    Hash Cond: (cem.invoice_line_id = il.id)
                    ->  Seq Scan on onp_crm_calendarentry_invoice_membership cem  (cost=0.00..1278.44 rows=88244 width=8) (actual time=0.005..7.941 rows=88734 loops=1)
                    ->  Hash  (cost=4562.40..4562.40 rows=1 width=4) (actual time=92.444..92.444 rows=75570 loops=1)
                          Buckets: 131072 (originally 1024)  Batches: 1 (originally 1)  Memory Usage: 3681kB
                          ->  Hash Join  (cost=4008.74..4562.40 rows=1 width=4) (actual time=61.797..79.981 rows=75570 loops=1)
                                Hash Cond: (inv.entity_id = il.invoice_id)
                                ->  Seq Scan on onp_crm_invoice inv  (cost=0.00..510.56 rows=11492 width=8) (actual time=0.027..4.489 rows=11507 loops=1)
                                      Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text = 'INVOICE_STATUS_INVOICED'::text))
                                      Rows Removed by Filter: 151
                                ->  Hash  (cost=4008.72..4008.72 rows=1 width=8) (actual time=61.751..61.751 rows=76182 loops=1)
                                      Buckets: 131072 (originally 1024)  Batches: 1 (originally 1)  Memory Usage: 4000kB
                                      ->  Hash Anti Join  (cost=654.21..4008.72 rows=1 width=8) (actual time=12.568..47.911 rows=76182 loops=1)
                                            Hash Cond: (il.invoice_id = creditnote.credit_against)
                                            ->  Seq Scan on onp_crm_invoice_line il  (cost=0.00..3062.01 rows=78001 width=8) (actual time=0.008..12.476 rows=78622 loops=1)
                                            ->  Hash  (cost=510.56..510.56 rows=11492 width=8) (actual time=12.477..12.477 rows=372 loops=1)
                                                  Buckets: 16384  Batches: 1  Memory Usage: 143kB
                                                  ->  Seq Scan on onp_crm_invoice creditnote  (cost=0.00..510.56 rows=11492 width=8) (actual time=0.008..10.963 rows=11507 loops=1)
                                                        Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text = 'INVOICE_STATUS_INVOICED'::text))
                                                        Rows Removed by Filter: 151
Planning time: 3.510 ms
Execution time: 338.349 ms
(39 rows)


So my question is is there something I can do to make PG favor a Hash Anti Join instead of a Nested Loop Anti Join (which I assume is the problem)?
Can the nested NOT EXISTS be re-written to be more performant?
 
Thanks.
 
If I leave out the JOIN with onp_crm_person the query-plan is also much more efficient:
 
explain analyze
SELECT
      log.relation_id as company_id    , sum(log.duration) AS duration
FROM onp_crm_activity_log log

WHERE 1 = 1
      -- Filter out already invoiced
      AND NOT EXISTS(    SELECT * FROM onp_crm_calendarentry_invoice_membership cem        JOIN onp_crm_invoice_line il ON cem.invoice_line_id = il.id        JOIN onp_crm_invoice inv ON il.invoice_id = inv.entity_id    WHERE cem.calendar_entry_id = log.id          AND inv.status_key = 'INVOICE_STATUS_INVOICED'
          AND inv.sent_date <= '2016-06-27'
          AND NOT EXISTS(        SELECT * FROM onp_crm_invoice creditnote        WHERE il.invoice_id = creditnote.credit_against              AND creditnote.status_key = 'INVOICE_STATUS_INVOICED'
              AND creditnote.sent_date <= '2016-06-27'
    )
)

GROUP BY
    log.relation_id
;
 
                                                                                                                     QUERY PLAN                                                                                                                      
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 HashAggregate  (cost=16192.30..16210.07 rows=1421 width=12) (actual time=562.978..563.144 rows=724 loops=1)   Group Key: log.relation_id   ->  Hash Anti Join  (cost=4009.60..15270.19 rows=184423 width=12) (actual time=422.215..542.871 rows=96712 loops=1)         Hash Cond: (log.id = cem.calendar_entry_id)         ->  Seq Scan on onp_crm_activity_log log  (cost=0.00..8932.24 rows=184424 width=20) (actual time=0.007..83.262 rows=182380 loops=1)         ->  Hash  (cost=4009.59..4009.59 rows=1 width=4) (actual time=422.178..422.178 rows=85668 loops=1)               Buckets: 131072 (originally 1024)  Batches: 1 (originally 1)  Memory Usage: 4036kB               ->  Nested Loop  (cost=654.79..4009.59 rows=1 width=4) (actual time=12.210..397.383 rows=85668 loops=1)                     ->  Nested Loop  (cost=654.50..4009.27 rows=1 width=8) (actual time=12.189..173.380 rows=88201 loops=1)                           ->  Hash Anti Join  (cost=654.21..4008.72 rows=1 width=8) (actual time=12.099..47.757 rows=76197 loops=1)                                 Hash Cond: (il.invoice_id = creditnote.credit_against)                                 ->  Seq Scan on onp_crm_invoice_line il  (cost=0.00..3062.01 rows=78001 width=8) (actual time=0.009..13.797 rows=78637 loops=1)                                 ->  Hash  (cost=510.56..510.56 rows=11492 width=8) (actual time=12.012..12.012 rows=372 loops=1)                                       Buckets: 16384  Batches: 1  Memory Usage: 143kB                                       ->  Seq Scan on onp_crm_invoice creditnote  (cost=0.00..510.56 rows=11492 width=8) (actual time=0.026..10.564 rows=11507 loops=1)                                             Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text = 'INVOICE_STATUS_INVOICED'::text))                                             Rows Removed by Filter: 155
                           ->  Index Only Scan using onp_crm_calendarentry_invoice_invoice_line_id_calendar_entr_key on onp_crm_calendarentry_invoice_membership cem  (cost=0.29..0.45 rows=9 width=8) (actual time=0.001..0.001 rows=1 loops=76197)                                 Index Cond: (invoice_line_id = il.id)                                 Heap Fetches: 4421
                     ->  Index Scan using onp_crm_invoice_pkey on onp_crm_invoice inv  (cost=0.29..0.31 rows=1 width=8) (actual time=0.002..0.002 rows=1 loops=88201)                           Index Cond: (entity_id = il.invoice_id)                           Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text = 'INVOICE_STATUS_INVOICED'::text))                           Rows Removed by Filter: 0
 Planning time: 2.763 ms Execution time: 563.403 ms
(26 rows)
 
But of course I cannot do that... It's just for information...
 
--
Andreas Joseph Krogh
CTO / Partner - Visena AS
Mobile: +47 909 56 963
 
Вложения

Re: Very poor performance with Nested Loop Anti Join

От
Tom Lane
Дата:
Andreas Joseph Krogh <andreas@visena.com> writes:
> This query performs terribly slow (~26 minutes, 1561346.597ms):

Seems like the key misestimation is on the inner antijoin:

>                ->  Hash Anti Join  (cost=654.21..4008.72 rows=1 width=8) (actual time=9.016..40.672 rows=76174
loops=1)
>                      Hash Cond: (il.invoice_id = creditnote.credit_against)
>                      ->  Seq Scan on onp_crm_invoice_line il  (cost=0.00..3062.01 rows=78001 width=8) (actual
time=0.005..11.259rows=78614 loops=1) 
>                      ->  Hash  (cost=510.56..510.56 rows=11492 width=8) (actual time=8.940..8.940 rows=372 loops=1)
>                            Buckets: 16384  Batches: 1  Memory Usage: 143kB
>                            ->  Seq Scan on onp_crm_invoice creditnote  (cost=0.00..510.56 rows=11492 width=8) (actual
time=0.014..7.882rows=11507 loops=1) 
>                                  Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text =
'INVOICE_STATUS_INVOICED'::text))
>                                  Rows Removed by Filter: 149

If it realized that this produces 78k rows not 1, it'd likely do something
smarter at the outer antijoin.

I have no idea why that estimate's so far off though.  What PG version is
this?  Stats all up to date on these two tables?  Are the rows excluded
by the filter condition on "creditnote" significantly different from the
rest of that table?

            regards, tom lane


Re: Very poor performance with Nested Loop Anti Join

От
Andreas Joseph Krogh
Дата:
På tirsdag 02. august 2016 kl. 01:15:05, skrev Tom Lane <tgl@sss.pgh.pa.us>:
Andreas Joseph Krogh <andreas@visena.com> writes:
> This query performs terribly slow (~26 minutes,��1561346.597ms):

Seems like the key misestimation is on the inner antijoin:

>                ->  Hash Anti Join  (cost=654.21..4008.72 rows=1 width=8) (actual time=9.016..40.672 rows=76174 loops=1)
>                      Hash Cond: (il.invoice_id = creditnote.credit_against)
>                      ->  Seq Scan on onp_crm_invoice_line il  (cost=0.00..3062.01 rows=78001 width=8) (actual time=0.005..11.259 rows=78614 loops=1)
>                      ->  Hash  (cost=510.56..510.56 rows=11492 width=8) (actual time=8.940..8.940 rows=372 loops=1)
>                            Buckets: 16384  Batches: 1  Memory Usage: 143kB
>                            ->  Seq Scan on onp_crm_invoice creditnote  (cost=0.00..510.56 rows=11492 width=8) (actual time=0.014..7.882 rows=11507 loops=1)
>                                  Filter: ((sent_date <= '2016-06-27'::date) AND ((status_key)::text = 'INVOICE_STATUS_INVOICED'::text))
>                                  Rows Removed by Filter: 149

If it realized that this produces 78k rows not 1, it'd likely do something
smarter at the outer antijoin.

I have no idea why that estimate's so far off though.  What PG version is
this?  Stats all up to date on these two tables?
 
Sorry for not providing PG-version, this is on 9.5.3.
All stats are up to date, or should be a I've analyzed all manually.
 
Are the rows excluded
by the filter condition on "creditnote" significantly different from the
rest of that table?
 
This happens also without the filter-cond:
 
explain analyze
SELECT
      log.relation_id as company_id    , sum(log.duration) AS duration
FROM onp_crm_activity_log log
    JOIN onp_crm_person logfor ON logfor.onp_user_id = log.logged_for AND logfor.is_resource = FALSE

WHERE 1 = 1
      -- Filter out already invoiced
      AND NOT EXISTS(    SELECT * FROM onp_crm_calendarentry_invoice_membership cem        JOIN onp_crm_invoice_line il ON cem.invoice_line_id = il.id        JOIN onp_crm_invoice inv ON il.invoice_id = inv.entity_id    WHERE cem.calendar_entry_id = log.id          AND NOT EXISTS(        SELECT * FROM onp_crm_invoice creditnote        WHERE il.invoice_id = creditnote.credit_against    )
)

GROUP BY
    log.relation_id
;
 
                                                                                                                     QUERY PLAN                                                                                                                      
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 HashAggregate  (cost=12049.35..12067.11 rows=1421 width=12) (actual time=1386683.646..1386683.858 rows=720 loops=1)   Group Key: log.relation_id   ->  Nested Loop Anti Join  (cost=512.08..12039.32 rows=2006 width=12) (actual time=395.017..1386576.756 rows=93480 loops=1)         Join Filter: (cem.calendar_entry_id = log.id)         Rows Removed by Join Filter: 12185913244
         ->  Hash Join  (cost=86.56..9757.61 rows=2006 width=20) (actual time=0.165..366.778 rows=181872 loops=1)               Hash Cond: (log.logged_for = logfor.onp_user_id)               ->  Seq Scan on onp_crm_activity_log log  (cost=0.00..8957.45 rows=184945 width=24) (actual time=0.003..256.862 rows=182606 loops=1)               ->  Hash  (cost=39.46..39.46 rows=3768 width=8) (actual time=0.132..0.132 rows=36 loops=1)                     Buckets: 4096  Batches: 1  Memory Usage: 34kB                     ->  Bitmap Heap Scan on onp_crm_person logfor  (cost=3.69..39.46 rows=3768 width=8) (actual time=0.033..0.125 rows=36 loops=1)                           Recheck Cond: (onp_user_id IS NOT NULL)                           Filter: (NOT is_resource)                           Rows Removed by Filter: 5
                           Heap Blocks: exact=10
                           ->  Bitmap Index Scan on onp_crm_person_onp_id_idx  (cost=0.00..2.75 rows=41 width=0) (actual time=0.017..0.017 rows=41 loops=1)         ->  Materialize  (cost=425.53..2251.62 rows=1 width=4) (actual time=0.000..2.544 rows=67003 loops=181872)               ->  Nested Loop  (cost=425.53..2251.61 rows=1 width=4) (actual time=3.283..320.057 rows=88511 loops=1)                     ->  Nested Loop  (cost=425.24..2251.30 rows=1 width=8) (actual time=3.241..154.783 rows=88511 loops=1)                           ->  Hash Anti Join  (cost=424.95..2250.75 rows=1 width=8) (actual time=3.110..30.097 rows=76281 loops=1)                                 Hash Cond: (il.invoice_id = creditnote.credit_against)                                 ->  Index Only Scan using origo_invoice_line_id_invoice_idx on onp_crm_invoice_line il  (cost=0.29..1530.95 rows=78707 width=8) (actual time=0.030..13.719 rows=78740 loops=1)                                       Heap Fetches: 2967
                                 ->  Hash  (cost=278.22..278.22 rows=11715 width=8) (actual time=3.003..3.003 rows=376 loops=1)                                       Buckets: 16384  Batches: 1  Memory Usage: 143kB                                       ->  Index Only Scan using origo_invoice_credit_against_idx on onp_crm_invoice creditnote  (cost=0.29..278.22 rows=11715 width=8) (actual time=0.042..2.082 rows=11692 loops=1)                                             Heap Fetches: 1151
                           ->  Index Only Scan using onp_crm_calendarentry_invoice_invoice_line_id_calendar_entr_key on onp_crm_calendarentry_invoice_membership cem  (cost=0.29..0.45 rows=9 width=8) (actual time=0.001..0.001 rows=1 loops=76281)                                 Index Cond: (invoice_line_id = il.id)                                 Heap Fetches: 4753
                     ->  Index Only Scan using onp_crm_invoice_pkey on onp_crm_invoice inv  (cost=0.29..0.30 rows=1 width=8) (actual time=0.001..0.001 rows=1 loops=88511)                           Index Cond: (entity_id = il.invoice_id)                           Heap Fetches: 12084
 Planning time: 5.824 ms Execution time: 1386686.664 ms
(35 rows)
 
With set enable_nestloop to off;
 
                                                                                                            QUERY PLAN                                                                                                            
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 HashAggregate  (cost=13975.70..13993.46 rows=1421 width=12) (actual time=338.185..338.341 rows=720 loops=1)   Group Key: log.relation_id   ->  Hash Anti Join  (cost=4265.19..13965.66 rows=2007 width=12) (actual time=147.696..318.314 rows=93511 loops=1)         Hash Cond: (log.id = cem.calendar_entry_id)         ->  Hash Join  (cost=86.56..9761.69 rows=2007 width=20) (actual time=0.166..127.604 rows=181915 loops=1)               Hash Cond: (log.logged_for = logfor.onp_user_id)               ->  Seq Scan on onp_crm_activity_log log  (cost=0.00..8961.23 rows=185023 width=24) (actual time=0.006..84.093 rows=182649 loops=1)               ->  Hash  (cost=39.46..39.46 rows=3768 width=8) (actual time=0.123..0.123 rows=36 loops=1)                     Buckets: 4096  Batches: 1  Memory Usage: 34kB                     ->  Bitmap Heap Scan on onp_crm_person logfor  (cost=3.69..39.46 rows=3768 width=8) (actual time=0.038..0.102 rows=36 loops=1)                           Recheck Cond: (onp_user_id IS NOT NULL)                           Filter: (NOT is_resource)                           Rows Removed by Filter: 5
                           Heap Blocks: exact=10
                           ->  Bitmap Index Scan on onp_crm_person_onp_id_idx  (cost=0.00..2.75 rows=41 width=0) (actual time=0.019..0.019 rows=41 loops=1)         ->  Hash  (cost=4178.62..4178.62 rows=1 width=4) (actual time=147.497..147.497 rows=88523 loops=1)               Buckets: 131072 (originally 1024)  Batches: 1 (originally 1)  Memory Usage: 4137kB               ->  Hash Join  (cost=2553.20..4178.62 rows=1 width=4) (actual time=98.512..133.017 rows=88523 loops=1)                     Hash Cond: (cem.invoice_line_id = il.id)                     ->  Seq Scan on onp_crm_calendarentry_invoice_membership cem  (cost=0.00..1290.66 rows=89266 width=8) (actual time=0.006..11.151 rows=89175 loops=1)                     ->  Hash  (cost=2553.19..2553.19 rows=1 width=4) (actual time=98.481..98.481 rows=76286 loops=1)                           Buckets: 131072 (originally 1024)  Batches: 1 (originally 1)  Memory Usage: 3706kB                           ->  Merge Join  (cost=2252.01..2553.19 rows=1 width=4) (actual time=50.922..87.641 rows=76286 loops=1)                                 Merge Cond: (il.invoice_id = inv.entity_id)                                 ->  Sort  (cost=2251.73..2251.73 rows=1 width=8) (actual time=50.872..55.552 rows=76286 loops=1)                                       Sort Key: il.invoice_id                                       Sort Method: quicksort  Memory: 6648kB                                       ->  Hash Anti Join  (cost=425.91..2251.72 rows=1 width=8) (actual time=5.904..35.979 rows=76286 loops=1)                                             Hash Cond: (il.invoice_id = creditnote.credit_against)                                             ->  Index Only Scan using origo_invoice_line_id_invoice_idx on onp_crm_invoice_line il  (cost=0.29..1530.95 rows=78707 width=8) (actual time=0.028..16.124 rows=78745 loops=1)                                                   Heap Fetches: 2972
                                             ->  Hash  (cost=278.74..278.74 rows=11750 width=8) (actual time=5.792..5.792 rows=376 loops=1)                                                   Buckets: 16384  Batches: 1  Memory Usage: 143kB                                                   ->  Index Only Scan using origo_invoice_credit_against_idx on onp_crm_invoice creditnote  (cost=0.29..278.74 rows=11750 width=8) (actual time=0.067..4.466 rows=11694 loops=1)                                                         Heap Fetches: 1155
                                 ->  Index Only Scan using onp_crm_invoice_pkey on onp_crm_invoice inv  (cost=0.29..272.09 rows=11750 width=8) (actual time=0.040..10.755 rows=76661 loops=1)                                       Heap Fetches: 3840
 Planning time: 3.762 ms Execution time: 339.634 ms
(39 rows)
 
 
 
--
Andreas Joseph Krogh
CTO / Partner - Visena AS
Mobile: +47 909 56 963
 
Вложения