Обсуждение: Same condition in the CTE and in the subsequent JOIN using it
Good evening,
I have written a custom function which works, but wonder if using same condition twice looks suspicious and can be optimized.# select * from words_stat_scores(1, '199928440415755383271');
out_day | out_diff | out_score
------------+----------+-----------
26.03.2018 | 75 | 10.5
27.03.2018 | 3 | 10.2
28.03.2018 | 324 | 17.8
29.03.2018 | 801 | 10.0
30.03.2018 | 12 | 19.5
31.03.2018 | 64 | 20.8
01.04.2018 | 48 | 12.3
02.04.2018 | 342 | 11.0
03.04.2018 | 12 | 14.5
04.04.2018 | 44 | 15.0
05.04.2018 | 116 | 13.6
06.04.2018 | 102 | 19.7
07.04.2018 | 54 | 14.8
08.04.2018 | 252 | 19.0
09.04.2018 | 272 | 10.4
10.04.2018 | 140 | 18.2
11.04.2018 | 41 | 11.4
12.04.2018 | 61 | 13.3
13.04.2018 | 182 | 15.3
14.04.2018 | 76 | 13.7
15.04.2018 | 199 | 20.1
16.04.2018 | 116 | 19.1
17.04.2018 | 390 | 20.1
18.04.2018 | 150 | 16.6
19.04.2018 | 448 | 15.9
20.04.2018 | 163 | 14.6
(26 rows)
CREATE OR REPLACE FUNCTION words_stat_scores(
in_social integer,
in_sid text
) RETURNS TABLE (
out_day text,
out_diff numeric,
out_score numeric
) AS
$func$
WITH cte AS (
SELECT
DATE_TRUNC('day', m.played) AS day,
m.mid,
EXTRACT(EPOCH FROM m.played - LAG(m.played) OVER (PARTITION BY m.gid ORDER BY m.played))::int/60 AS diff
FROM words_moves m
JOIN words_games g ON (m.gid = g.gid)
JOIN words_social s ON (s.uid IN (g.player1, g.player2))
WHERE s.social = in_social -- CAN THIS BE REFERRED TO FROM BELOW?
AND s.sid = in_sid
AND m.played > CURRENT_TIMESTAMP - interval '1 month'
)
SELECT
TO_CHAR(c.day, 'DD.MM.YYYY'),
ROUND(AVG(c.diff)),
ROUND(AVG(m.score), 1)
FROM words_moves m
JOIN cte c using(mid)
JOIN words_social s USING(uid)
WHERE s.social = in_social
AND s.sid = in_sid
AND m.action = 'play'
GROUP BY c.day
ORDER BY c.day;
$func$ LANGUAGE sql STABLE;
Thank you for any hints, I apologize if my question is too specific and difficult to answer...
Regards
Alex
P.S. My 3 tables are below -
CREATE TABLE words_social (
sid text NOT NULL,
social integer NOT NULL CHECK (0 < social AND social <= 64),
given text NOT NULL CHECK (given ~ '\S'),
family text,
photo text CHECK (photo ~* '^https?://...'),
lat float,
lng float,
stamp integer NOT NULL,
uid integer NOT NULL REFERENCES words_users ON DELETE CASCADE,
PRIMARY KEY(sid, social)
);
CREATE TABLE words_moves (
mid BIGSERIAL PRIMARY KEY,
action text NOT NULL,
gid integer NOT NULL REFERENCES words_games ON DELETE CASCADE,
uid integer NOT NULL REFERENCES words_users ON DELETE CASCADE,
played timestamptz NOT NULL,
tiles jsonb,
letters text,
score integer CHECK(score >= 0)
);
CREATE TABLE words_games (
gid SERIAL PRIMARY KEY,
created timestamptz NOT NULL,
finished timestamptz,
player1 integer REFERENCES words_users(uid) ON DELETE CASCADE NOT NULL CHECK (player1 <> player2),
player2 integer REFERENCES words_users(uid) ON DELETE CASCADE,
played1 timestamptz,
played2 timestamptz,
reason text, -- regular, resigned, expired, banned
state1 text, -- tie, winning, losing, draw, won, lost
state2 text, -- tie, winning, losing, draw, won, lost
hint1 text,
hint2 text,
chat1 integer NOT NULL CHECK (chat1 >= 0),
chat2 integer NOT NULL CHECK (chat2 >= 0),
score1 integer NOT NULL CHECK (score1 >= 0),
score2 integer NOT NULL CHECK (score2 >= 0),
hand1 char[7] NOT NULL,
hand2 char[7] NOT NULL,
pile char[116] NOT NULL,
letters char[15][15] NOT NULL,
values integer[15][15] NOT NULL,
bid integer NOT NULL REFERENCES words_boards ON DELETE CASCADE,
friendly boolean NOT NULL
);
CREATE INDEX words_games_state1_index ON words_games(state1);
CREATE INDEX words_games_state2_index ON words_games(state2);
CREATE INDEX words_games_reason_index ON words_games(reason);
CREATE TABLE words_social (
sid text NOT NULL,
social integer NOT NULL CHECK (0 < social AND social <= 64),
given text NOT NULL CHECK (given ~ '\S'),
family text,
photo text CHECK (photo ~* '^https?://...'),
lat float,
lng float,
stamp integer NOT NULL,
uid integer NOT NULL REFERENCES words_users ON DELETE CASCADE,
PRIMARY KEY(sid, social)
);
CREATE TABLE words_moves (
mid BIGSERIAL PRIMARY KEY,
action text NOT NULL,
gid integer NOT NULL REFERENCES words_games ON DELETE CASCADE,
uid integer NOT NULL REFERENCES words_users ON DELETE CASCADE,
played timestamptz NOT NULL,
tiles jsonb,
letters text,
score integer CHECK(score >= 0)
);
CREATE TABLE words_games (
gid SERIAL PRIMARY KEY,
created timestamptz NOT NULL,
finished timestamptz,
player1 integer REFERENCES words_users(uid) ON DELETE CASCADE NOT NULL CHECK (player1 <> player2),
player2 integer REFERENCES words_users(uid) ON DELETE CASCADE,
played1 timestamptz,
played2 timestamptz,
reason text, -- regular, resigned, expired, banned
state1 text, -- tie, winning, losing, draw, won, lost
state2 text, -- tie, winning, losing, draw, won, lost
hint1 text,
hint2 text,
chat1 integer NOT NULL CHECK (chat1 >= 0),
chat2 integer NOT NULL CHECK (chat2 >= 0),
score1 integer NOT NULL CHECK (score1 >= 0),
score2 integer NOT NULL CHECK (score2 >= 0),
hand1 char[7] NOT NULL,
hand2 char[7] NOT NULL,
pile char[116] NOT NULL,
letters char[15][15] NOT NULL,
values integer[15][15] NOT NULL,
bid integer NOT NULL REFERENCES words_boards ON DELETE CASCADE,
friendly boolean NOT NULL
);
CREATE INDEX words_games_state1_index ON words_games(state1);
CREATE INDEX words_games_state2_index ON words_games(state2);
CREATE INDEX words_games_reason_index ON words_games(reason);
Am 25.04.2018 um 17:45 schrieb Alexander Farber: > Thank you for any hints, I apologize if my question is too specific > and difficult to answer... i haven't checked the whole query, but where-conditions from the outer query are not pushed down into the CTE-query. First the whole CTE will be materialized, then the outer query executed. that said, it is better to define the where-condition in the cte. https://blog.2ndquadrant.com/postgresql-ctes-are-optimization-fences/ Regards, Andreas -- 2ndQuadrant - The PostgreSQL Support Company. www.2ndQuadrant.com
On Wed, 25 Apr 2018 17:45:39 +0200
Alexander Farber <alexander.farber@gmail.com> wrote:
> WITH cte AS (
> SELECT
> DATE_TRUNC('day', m.played) AS day,
> m.mid,
> EXTRACT(EPOCH FROM m.played - LAG(m.played) OVER
> (PARTITION BY m.gid ORDER BY m.played))::int/60 AS diff
> FROM words_moves m
> JOIN words_games g ON (m.gid = g.gid)
> JOIN words_social s ON (s.uid IN (g.player1, g.player2))
> WHERE s.social = in_social --
> CAN THIS BE REFERRED TO FROM BELOW?
> AND s.sid = in_sid
> AND m.played > CURRENT_TIMESTAMP - interval '1 month'
> )
> SELECT
> TO_CHAR(c.day, 'DD.MM.YYYY'),
> ROUND(AVG(c.diff)),
> ROUND(AVG(m.score), 1)
> FROM words_moves m
> JOIN cte c using(mid)
> JOIN words_social s USING(uid)
> WHERE s.social = in_social
> AND s.sid = in_sid
> AND m.action = 'play'
> GROUP BY c.day
> ORDER BY c.day;
>
> $func$ LANGUAGE sql STABLE;
>
> By looking at the above source code, do you think, that the condition being
> used twice (the s.social = in_social AND s.sid = in_sid) is "too much" and
> can be optimized? :-)
I would say so, because as you've already applied the filter in the CTE it won't have any effect.
But anyway, since you are not using any column from words_social in your main query, you can do away with it entirely
andjust remove
> JOIN words_social s USING(uid)
> WHERE s.social = in_social
> AND s.sid = in_sid
--
Bien à vous, Vincent Veyron
https://compta.libremen.com
Logiciel libre de comptabilité générale en partie double
> On 25 Apr 2018, at 17:45, Alexander Farber <alexander.farber@gmail.com> wrote:
(…)
> And here is the function source code:
>
> CREATE OR REPLACE FUNCTION words_stat_scores(
> in_social integer,
> in_sid text
> ) RETURNS TABLE (
> out_day text,
> out_diff numeric,
> out_score numeric
> ) AS
> $func$
> WITH cte AS (
> SELECT
> DATE_TRUNC('day', m.played) AS day,
> m.mid,
> EXTRACT(EPOCH FROM m.played - LAG(m.played) OVER (PARTITION BY m.gid ORDER BY
m.played))::int/60AS diff
> FROM words_moves m
> JOIN words_games g ON (m.gid = g.gid)
> JOIN words_social s ON (s.uid IN (g.player1, g.player2))
> WHERE s.social = in_social -- CAN THIS BE REFERRED TO FROM BELOW?
> AND s.sid = in_sid
> AND m.played > CURRENT_TIMESTAMP - interval '1 month'
> )
> SELECT
> TO_CHAR(c.day, 'DD.MM.YYYY'),
> ROUND(AVG(c.diff)),
> ROUND(AVG(m.score), 1)
> FROM words_moves m
> JOIN cte c using(mid)
> JOIN words_social s USING(uid)
> WHERE s.social = in_social
> AND s.sid = in_sid
> AND m.action = 'play'
> GROUP BY c.day
> ORDER BY c.day;
>
> $func$ LANGUAGE sql STABLE;
>
> By looking at the above source code, do you think, that the condition being used twice (the s.social = in_social AND
s.sid= in_sid) is "too much" and can be optimized? :-)
Actually, no. The conditions are part of different joins.
Within the CTE, you have a join that boils down to:
> FROM words_games g ON (m.gid = g.gid)
> JOIN words_social s ON (s.uid IN (g.player1, g.player2) AND s.social = in_social AND s.sid = in_sid)
In your outer query, you have:
> FROM words_moves m
> JOIN words_social s ON (s.uid = m.uid AND s.social = in_social AND s.sid = in_sid)
The joins are on different fields, in different tables even, so you can't just leave the conditions out because they
filterdifferent rows.
What you _can_ do is move the words_social JOIN and it's conditions into a new CTE and join with that instead.
Somethinglike so:
WITH words_in_social AS (
SELECT sid, uid
FROM words_social
WHERE social = in_social
AND sid = in_sid
),
cte AS (
SELECT
DATE_TRUNC('day', m.played) AS day,
m.mid,
EXTRACT(EPOCH FROM m.played - LAG(m.played) OVER (PARTITION BY m.gid ORDER BY m.played))::int/60 AS diff
FROM words_moves m
JOIN words_games g ON (m.gid = g.gid)
JOIN words_in_social s ON (s.uid IN (g.player1, g.player2))
WHERE m.played > CURRENT_TIMESTAMP - interval '1
)
SELECT
TO_CHAR(c.day, 'DD.MM.YYYY'),
ROUND(AVG(c.diff)),
ROUND(AVG(m.score), 1)
FROM words_moves m
JOIN cte c using(mid)
JOIN words_in_social s USING(uid)
WHERE m.action = 'play'
GROUP BY c.day
ORDER BY c.day;
That may look cleaner, but your original query probably performs better, since CTE's also act as an optimisation fence.
BTW, I suggest to use a better name for your CTE than cte; I'd rather use a name that clarifies its purpose.
> Thank you for any hints, I apologize if my question is too specific and difficult to answer...
>
> Regards
> Alex
Alban Hertroys
--
If you can't see the forest for the trees,
cut the trees and you'll find there is no forest.
Thank you for the insightful comments.
Actually in my specific case I have managed to get rid of the (almost) same condition in the outer query:CREATE OR REPLACE FUNCTION words_stat_scores_2(
in_social integer,
in_sid text
) RETURNS TABLE (
out_day text,
out_diff numeric,
out_score numeric
) AS
$func$
WITH filtered_moves AS (
SELECT
m.uid,
s.uid AS web_script_viewer,
DATE_TRUNC('day', m.played) AS day,
m.mid,
EXTRACT(EPOCH FROM m.played - LAG(m.played) OVER (PARTITION BY m.gid ORDER BY m.played))::int/60 AS diff
FROM words_moves m
JOIN words_games g ON (m.gid = g.gid)
JOIN words_social s ON (s.uid IN (g.player1, g.player2))
WHERE s.social = in_social
AND s.sid = in_sid
AND m.played > CURRENT_TIMESTAMP - interval '2 month'
)
SELECT
TO_CHAR(f.day, 'DD.MM.YYYY'),
ROUND(AVG(f.diff)),
ROUND(AVG(m.score), 1)
FROM words_moves m
JOIN filtered_moves f using(mid)
WHERE f.uid = f.web_script_viewer -- INSTEAD OF DOING JOIN ON words_social AGAIN
AND m.action = 'play'
GROUP BY f.day
ORDER BY f.day;
$func$ LANGUAGE sql STABLE;
s/ I can trust / I can't trust /