The following bug has been logged on the website:
Bug reference: 13349
Logged by: Regina Obe
Email address: lr@pcorp.us
PostgreSQL version: Unsupported/Unknown
Operating system: Debian, Mingw-64 Postgresql 9.5 Recheck logic
Description:
We are finding issues in our PostGIS KNN distance recheck, and think the
issue is with PostgreSQL since we can replicate with PostgreSQL built-in
geometry types
This issue only seems to happen if the LATERAL clause works against multiple
records.
Here is exercise to test:
Note the answers in both cases should be the same, but when I force index
use, the index returns something different
DROP TABLE IF EXISTS knn_recheck_point;
CREATE TABLE knn_recheck_point(gid serial , geom point);
INSERT INTO knn_recheck_point(gid,geom)
SELECT ROW_NUMBER() OVER(ORDER BY x,y) AS gid, point(x*0.777,y*0.777) As
geom
FROM generate_series(-100,100, 1) AS x CROSS JOIN
generate_series(-300,10000,10) As y;
DROP TABLE IF EXISTS knn_recheck_poly;
CREATE TABLE knn_recheck_poly(gid serial , geom polygon);
INSERT INTO knn_recheck_poly(gid,geom)
SELECT 600000 + ROW_NUMBER() OVER(ORDER BY gid) AS gid,
circle(geom,1000)::polygon As geom
FROM knn_recheck_point
WHERE gid IN(1000, 10000, 2000, 40000);
SELECT a.gid, b.gid As match, a.geom <-> b.geom As dist
FROM knn_recheck_point As a
LEFT JOIN
LATERAL ( SELECT gid, geom
FROM knn_recheck_poly As g ORDER BY g.geom <-> a.geom LIMIT 5)
As b ON true
WHERE a.gid IN(50001,70000)
ORDER BY a.gid ,dist;
Returns:
gid | match | dist
-------+--------+------------------
50001 | 600003 | 616.904706911043
50001 | 600004 | 1400.95154727064
50001 | 600002 | 2543.38219144528
50001 | 600001 | 2784.23980858618
70000 | 600001 | 0
70000 | 600002 | 0
70000 | 600004 | 0
70000 | 600003 | 571.32018689698
(8 rows)
-- now theirs really doesn't want to use an index so I got to do this --
DROP INDEX IF EXISTS idx_knn_recheck_point ;
CREATE INDEX idx_knn_recheck_point ON knn_recheck_point USING gist(geom);
DROP INDEX IF EXISTS idx_knn_recheck_poly ;
CREATE INDEX idx_knn_recheck_poly ON knn_recheck_poly USING gist(geom);
SET enable_seqscan = false;
SELECT a.gid, b.gid As match, a.geom <-> b.geom As dist
FROM knn_recheck_point As a
LEFT JOIN
LATERAL ( SELECT gid, geom
FROM knn_recheck_poly As g ORDER BY g.geom <-> a.geom LIMIT 5)
As b ON true
WHERE a.gid IN(50001,70000)
ORDER BY a.gid ,dist;
gid | match | dist
-------+--------+------------------
50001 | 600003 | 616.904706911043
50001 | 600004 | 1400.95154727064
50001 | 600002 | 2543.38219144528
50001 | 600001 | 2784.23980858618
70000 | |
(5 rows)