The following bug has been logged on the website:
Bug reference: 15857
Logged by: Vladimir Kriukov
Email address: krujkov@gmail.com
PostgreSQL version: 11.3
Operating system: CentOS 7
Description:
CREATE TABLE a(id int);
ALTER TABLE a ADD CONSTRAINT a_pkey PRIMARY KEY(id);
INSERT INTO a(id) SELECT generate_series(1, 1000000);
INSERT INTO a(id) SELECT generate_series(1000001, 10000000);
CREATE TABLE b(id int, base_id int);
ALTER TABLE b ADD CONSTRAINT b_pkey PRIMARY KEY(id);
INSERT INTO b (id) select generate_series(1, 1000000);
UPDATE b SET base_id = 1000000 - id;
CREATE TABLE c(id int, base_id int);
ALTER TABLE c ADD CONSTRAINT c_pkey PRIMARY KEY(id);
INSERT INTO c (id) SELECT generate_series(1, 1000000);
UPDATE c SET base_id = id / 10;
VACUUM ANALYZE;
SET random_page_cost = 1.1;
SET work_mem = '3276kB';
SET effective_cache_size = '90GB';
-- This gives an incorrect result of 999991, when 100000 is expected on
Postgres 11.3 and 12 beta 1.
SELECT COUNT (*)
FROM a
JOIN b
ON a.id=b.base_id
WHERE EXISTS (
SELECT 1
FROM c
WHERE c.base_id = a.id
);
-- Just for the reference, "bad" plan has this shape:
-- Finalize Aggregate (cost=63211.58..63211.59 rows=1 width=8)
-- -> Gather (cost=63211.36..63211.57 rows=2 width=8)
-- Workers Planned: 2
-- -> Partial Aggregate (cost=62211.36..62211.37 rows=1 width=8)
-- -> Nested Loop (cost=19853.44..62201.25 rows=4045
width=0)
-- -> Parallel Hash Join (cost=19853.00..42614.78
rows=40580 width=8)
-- Hash Cond: (b.base_id = c.base_id)
-- -> Parallel Seq Scan on b
(cost=0.00..13016.67 rows=416667 width=4)
-- -> Parallel Hash (cost=13016.67..13016.67
rows=416667 width=4)
-- -> Parallel Seq Scan on c
(cost=0.00..13016.67 rows=416667 width=4)
-- -> Index Only Scan using a_pkey on a
(cost=0.43..0.48 rows=1 width=4)
-- Index Cond: (id = b.base_id)