Обсуждение: BUG #18330: The query planner chooses the wrong plan when using the parallel aggregation function

Поиск

Список

Период

Сортировка

BUG #18330: The query planner chooses the wrong plan when using the parallel aggregation function

От

PG Bug reporting form

Дата:

04 февраля 2024 г., 20:35:02

The following bug has been logged on the website:

Bug reference:      18330
Logged by:          Michael Efremov
Email address:      efremov20081@gmail.com
PostgreSQL version: 15.1
Operating system:   Alpine 12.2.1_git20220924-r4
Description:

Below is an example of queries. It shows that the query planner chooses
sequential aggregation instead of parallel, although limit should not have
affected this.

-- FIRSTLY - create parallel agg function

CREATE TYPE top1_units_weights_state_v1_parallel AS (
  test jsonb
);

CREATE OR REPLACE FUNCTION agg_top1_units_weights_transition_v1_parallel(
  state top1_units_weights_state_v1_parallel,
  test jsonb
) RETURNS top1_units_weights_state_v1_parallel AS $$
begin
  RETURN state;
END;
$$ LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION agg_top1_units_weights_final_v1_parallel(
  state top1_units_weights_state_v1_parallel
) RETURNS top1_units_weights_state_v1_parallel AS $$
begin
  RAISE NOTICE 'agg_top1_units_weights_final_v1_parallel:% ', state;
  RETURN state;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION agg_top1_units_weights_combinefunc_v1_parallel(
  first top1_units_weights_state_v1_parallel,
  second top1_units_weights_state_v1_parallel
) RETURNS top1_units_weights_state_v1_parallel AS $$
begin
RAISE NOTICE 'agg_top1_units_weights_combinefunc_v1_parallel:% ', first;
  RETURN first;
END;
$$ LANGUAGE plpgsql PARALLEL SAFE;

CREATE OR REPLACE AGGREGATE agg_top1_units_weights_v1_parallel(jsonb) (
  sfunc       = agg_top1_units_weights_transition_v1_parallel,
  stype       = top1_units_weights_state_v1_parallel,
  finalfunc   = agg_top1_units_weights_final_v1_parallel,
  initcond    = '({})',
  COMBINEFUNC = agg_top1_units_weights_combinefunc_v1_parallel, 
  parallel    = SAFE  
);

-- SECONDLY - create test data


CREATE TABLE public.test_jsonb_agg(
    test_jsonb jsonb not null
);

CREATE OR REPLACE FUNCTION random_between(low INT ,high INT)
   RETURNS INT AS
$$
BEGIN
   RETURN floor(random()* (high-low + 1) + low);
END;

CREATE OR REPLACE FUNCTION random_int_array(dim integer, min integer, max
integer) RETURNS integer[] AS $BODY$
begin
    return (select array_agg(random_between(min,max)) from generate_series (0,
dim));
end
$BODY$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION random_jsonb_array_of_array(dim integer, min
integer, max integer) RETURNS jsonb AS $BODY$
declare
 res jsonb = '[]'::jsonb;
 cnt integer = random_between(1,4);
begin
loop
     res = jsonb_build_array(random_int_array(dim,min,max)) || res;
    IF cnt > 3 THEN
        return res;
    END IF;
    cnt = cnt + 1;
END LOOP;
end
$BODY$ LANGUAGE plpgsql;

INSERT into test_jsonb_agg
select
    jsonb_array as jsonb_array
from (
select
    random_jsonb_array_of_array(random_between(1,5), 1, 500) as
jsonb_array
from generate_series(0,100000)
) as data_t

-- THIRDLY - check two times
analyze test_jsonb;

explain (analyze,verbose,timing,costs,buffers)
with test_speed as not MATERIALIZED(
select test_jsonb as col
from test_jsonb_agg  -- table size is 100_000
limit 900000 -- try to comment this line then parallel aggregation plan will
be used. 
) select agg_top1_units_weights_v1_parallel(col)
from test_speed

-> output
Aggregate  (cost=29174.52..29174.53 rows=1 width=32) (actual
time=50.872..50.872 rows=1 loops=1)
  Output: agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb)
  Buffers: shared hit=2174
  ->  Limit  (cost=0.00..3174.01 rows=100001 width=145) (actual
time=0.007..10.412 rows=100001 loops=1)
        Output: test_jsonb_agg.test_jsonb
        Buffers: shared hit=2174
        ->  Seq Scan on public.test_jsonb_agg  (cost=0.00..3174.01
rows=100001 width=145) (actual time=0.007..4.928 rows=100001 loops=1)
              Output: test_jsonb_agg.test_jsonb
              Buffers: shared hit=2174
Query Identifier: 8741670630168910811
Planning Time: 0.051 ms
Execution Time: 50.920 ms

explain (analyze,verbose,timing,costs,buffers)
with test_speed as not MATERIALIZED(
select test_jsonb as col
from test_jsonb_agg  -- table size is 100_000
--limit 900000 -- try to comment this line then parallel aggregation plan
will be used. 
) select agg_top1_units_weights_v1_parallel(col)
from test_speed

-> output
Finalize Aggregate  (cost=14008.38..14008.39 rows=1 width=32) (actual
time=28.777..32.664 rows=1 loops=1)
  Output: agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb)
  Buffers: shared hit=2364
  ->  Gather  (cost=14007.42..14007.63 rows=2 width=32) (actual
time=28.619..32.508 rows=3 loops=1)
        Output: (PARTIAL
agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb))
        Workers Planned: 2
        Workers Launched: 2
        Buffers: shared hit=2364
        ->  Partial Aggregate  (cost=13007.42..13007.43 rows=1 width=32)
(actual time=15.567..15.568 rows=1 loops=3)
              Output: PARTIAL
agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb)
              Buffers: shared hit=2364
              Worker 0:  actual time=9.197..9.198 rows=1 loops=1
                Buffers: shared hit=501
              Worker 1:  actual time=9.201..9.202 rows=1 loops=1
                Buffers: shared hit=520
              ->  Parallel Seq Scan on public.test_jsonb_agg
(cost=0.00..2590.67 rows=41667 width=145) (actual time=0.008..1.872
rows=33334 loops=3)
                    Output: test_jsonb_agg.test_jsonb
                    Buffers: shared hit=2174
                    Worker 0:  actual time=0.009..1.195 rows=18612 loops=1
                      Buffers: shared hit=406
                    Worker 1:  actual time=0.009..1.205 rows=19462 loops=1
                      Buffers: shared hit=425
Query Identifier: 4368818925053284440
Planning Time: 0.054 ms
Execution Time: 32.718 ms

Re: BUG #18330: The query planner chooses the wrong plan when using the parallel aggregation function

От

Tom Lane

Дата:

05 февраля 2024 г., 00:16:40

PG Bug reporting form <noreply@postgresql.org> writes:
> Below is an example of queries. It shows that the query planner chooses
> sequential aggregation instead of parallel, although limit should not have
> affected this.

On what grounds do you assert that LIMIT can be parallelized?
AFAICS it has to be done by a single process to ensure that the
limit is enforced accurately.  So LIMIT below an aggregation
removes the opportunity to parallelize the aggregation.

            regards, tom lane

Вход в личный кабинет

Восстановление пароля

Подтверждение аккаунта

Изменение пароля

Обсуждение: BUG #18330: The query planner chooses the wrong plan when using the parallel aggregation function

BUG #18330: The query planner chooses the wrong plan when using the parallel aggregation function

Re: BUG #18330: The query planner chooses the wrong plan when using the parallel aggregation function