Обсуждение: BUG #18330: The query planner chooses the wrong plan when using the parallel aggregation function
BUG #18330: The query planner chooses the wrong plan when using the parallel aggregation function
От
PG Bug reporting form
Дата:
The following bug has been logged on the website:
Bug reference: 18330
Logged by: Michael Efremov
Email address: efremov20081@gmail.com
PostgreSQL version: 15.1
Operating system: Alpine 12.2.1_git20220924-r4
Description:
Below is an example of queries. It shows that the query planner chooses
sequential aggregation instead of parallel, although limit should not have
affected this.
-- FIRSTLY - create parallel agg function
CREATE TYPE top1_units_weights_state_v1_parallel AS (
test jsonb
);
CREATE OR REPLACE FUNCTION agg_top1_units_weights_transition_v1_parallel(
state top1_units_weights_state_v1_parallel,
test jsonb
) RETURNS top1_units_weights_state_v1_parallel AS $$
begin
RETURN state;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION agg_top1_units_weights_final_v1_parallel(
state top1_units_weights_state_v1_parallel
) RETURNS top1_units_weights_state_v1_parallel AS $$
begin
RAISE NOTICE 'agg_top1_units_weights_final_v1_parallel:% ', state;
RETURN state;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION agg_top1_units_weights_combinefunc_v1_parallel(
first top1_units_weights_state_v1_parallel,
second top1_units_weights_state_v1_parallel
) RETURNS top1_units_weights_state_v1_parallel AS $$
begin
RAISE NOTICE 'agg_top1_units_weights_combinefunc_v1_parallel:% ', first;
RETURN first;
END;
$$ LANGUAGE plpgsql PARALLEL SAFE;
CREATE OR REPLACE AGGREGATE agg_top1_units_weights_v1_parallel(jsonb) (
sfunc = agg_top1_units_weights_transition_v1_parallel,
stype = top1_units_weights_state_v1_parallel,
finalfunc = agg_top1_units_weights_final_v1_parallel,
initcond = '({})',
COMBINEFUNC = agg_top1_units_weights_combinefunc_v1_parallel,
parallel = SAFE
);
-- SECONDLY - create test data
CREATE TABLE public.test_jsonb_agg(
test_jsonb jsonb not null
);
CREATE OR REPLACE FUNCTION random_between(low INT ,high INT)
RETURNS INT AS
$$
BEGIN
RETURN floor(random()* (high-low + 1) + low);
END;
CREATE OR REPLACE FUNCTION random_int_array(dim integer, min integer, max
integer) RETURNS integer[] AS $BODY$
begin
return (select array_agg(random_between(min,max)) from generate_series (0,
dim));
end
$BODY$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION random_jsonb_array_of_array(dim integer, min
integer, max integer) RETURNS jsonb AS $BODY$
declare
res jsonb = '[]'::jsonb;
cnt integer = random_between(1,4);
begin
loop
res = jsonb_build_array(random_int_array(dim,min,max)) || res;
IF cnt > 3 THEN
return res;
END IF;
cnt = cnt + 1;
END LOOP;
end
$BODY$ LANGUAGE plpgsql;
INSERT into test_jsonb_agg
select
jsonb_array as jsonb_array
from (
select
random_jsonb_array_of_array(random_between(1,5), 1, 500) as
jsonb_array
from generate_series(0,100000)
) as data_t
-- THIRDLY - check two times
analyze test_jsonb;
explain (analyze,verbose,timing,costs,buffers)
with test_speed as not MATERIALIZED(
select test_jsonb as col
from test_jsonb_agg -- table size is 100_000
limit 900000 -- try to comment this line then parallel aggregation plan will
be used.
) select agg_top1_units_weights_v1_parallel(col)
from test_speed
-> output
Aggregate (cost=29174.52..29174.53 rows=1 width=32) (actual
time=50.872..50.872 rows=1 loops=1)
Output: agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb)
Buffers: shared hit=2174
-> Limit (cost=0.00..3174.01 rows=100001 width=145) (actual
time=0.007..10.412 rows=100001 loops=1)
Output: test_jsonb_agg.test_jsonb
Buffers: shared hit=2174
-> Seq Scan on public.test_jsonb_agg (cost=0.00..3174.01
rows=100001 width=145) (actual time=0.007..4.928 rows=100001 loops=1)
Output: test_jsonb_agg.test_jsonb
Buffers: shared hit=2174
Query Identifier: 8741670630168910811
Planning Time: 0.051 ms
Execution Time: 50.920 ms
explain (analyze,verbose,timing,costs,buffers)
with test_speed as not MATERIALIZED(
select test_jsonb as col
from test_jsonb_agg -- table size is 100_000
--limit 900000 -- try to comment this line then parallel aggregation plan
will be used.
) select agg_top1_units_weights_v1_parallel(col)
from test_speed
-> output
Finalize Aggregate (cost=14008.38..14008.39 rows=1 width=32) (actual
time=28.777..32.664 rows=1 loops=1)
Output: agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb)
Buffers: shared hit=2364
-> Gather (cost=14007.42..14007.63 rows=2 width=32) (actual
time=28.619..32.508 rows=3 loops=1)
Output: (PARTIAL
agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb))
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=2364
-> Partial Aggregate (cost=13007.42..13007.43 rows=1 width=32)
(actual time=15.567..15.568 rows=1 loops=3)
Output: PARTIAL
agg_top1_units_weights_v1_parallel(test_jsonb_agg.test_jsonb)
Buffers: shared hit=2364
Worker 0: actual time=9.197..9.198 rows=1 loops=1
Buffers: shared hit=501
Worker 1: actual time=9.201..9.202 rows=1 loops=1
Buffers: shared hit=520
-> Parallel Seq Scan on public.test_jsonb_agg
(cost=0.00..2590.67 rows=41667 width=145) (actual time=0.008..1.872
rows=33334 loops=3)
Output: test_jsonb_agg.test_jsonb
Buffers: shared hit=2174
Worker 0: actual time=0.009..1.195 rows=18612 loops=1
Buffers: shared hit=406
Worker 1: actual time=0.009..1.205 rows=19462 loops=1
Buffers: shared hit=425
Query Identifier: 4368818925053284440
Planning Time: 0.054 ms
Execution Time: 32.718 ms
PG Bug reporting form <noreply@postgresql.org> writes:
> Below is an example of queries. It shows that the query planner chooses
> sequential aggregation instead of parallel, although limit should not have
> affected this.
On what grounds do you assert that LIMIT can be parallelized?
AFAICS it has to be done by a single process to ensure that the
limit is enforced accurately. So LIMIT below an aggregation
removes the opportunity to parallelize the aggregation.
regards, tom lane