Обсуждение: temporal variants of generate_series()
I've written the following function definitions to extend
generate_series to support some temporal types (timestamptz, date and
time). Please include them if there's sufficient perceived need or
value.
-- timestamptz version
CREATE OR REPLACE FUNCTION generate_series
( start_ts timestamptz
, end_ts timestamptz
, step interval
) RETURNS SETOF timestamptz
AS $$
DECLARE current_ts timestamptz := start_ts;
BEGIN
IF start_ts < end_ts AND step > INTERVAL '0 seconds' THEN LOOP IF current_ts > end_ts THEN RETURN;
END IF; RETURN NEXT current_ts; current_ts := current_ts + step; END LOOP;
ELSIF end_ts < start_ts AND step < INTERVAL '0 seconds' THEN LOOP IF current_ts < end_ts THEN RETURN;
END IF; RETURN NEXT current_ts; current_ts := current_ts + step; END LOOP;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
-- date version
CREATE OR REPLACE FUNCTION generate_series
( start_ts date
, end_ts date
, step interval
) RETURNS SETOF date
AS $$
DECLARE current_ts date := start_ts;
BEGIN
IF start_ts < end_ts AND step > INTERVAL '0 seconds' THEN LOOP IF current_ts > end_ts THEN RETURN;
END IF; RETURN NEXT current_ts; current_ts := current_ts + step; END LOOP;
ELSIF end_ts < start_ts AND step < INTERVAL '0 seconds' THEN LOOP IF current_ts < end_ts THEN RETURN;
END IF; RETURN NEXT current_ts; current_ts := current_ts + step; END LOOP;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
-- time version
CREATE OR REPLACE FUNCTION generate_series
( start_ts time
, end_ts time
, step interval
) RETURNS SETOF time
AS $$
DECLARE current_ts time := start_ts;
BEGIN
IF step > INTERVAL '0 seconds' THEN LOOP -- handle wraparound first IF current_ts < end_ts THEN
EXIT; END IF; RETURN NEXT current_ts; current_ts := current_ts + step; END LOOP; LOOP IF
current_ts> end_ts THEN RETURN; END IF; RETURN NEXT current_ts; current_ts := current_ts +
step; END LOOP;
ELSIF step < INTERVAL '0 seconds' THEN LOOP -- handle wraparound first IF current_ts > end_ts THEN
EXIT; END IF; RETURN NEXT current_ts; current_ts := current_ts + step; END LOOP; LOOP IF
current_ts< end_ts THEN RETURN; END IF; RETURN NEXT current_ts; current_ts := current_ts +
step; END LOOP;
END IF;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
On Thu, 2007-04-12 at 14:56 -0700, Andrew Hammond wrote: > I've written the following function definitions to extend > generate_series to support some temporal types (timestamptz, date and > time). Please include them if there's sufficient perceived need or > value. I could see these being useful, but a PL/PgSQL implementation is not eligible for inclusion in the core backend (since PL/PgSQL is not enabled by default). -Neil
On Thu, Apr 12, 2007 at 02:56:24PM -0700, Andrew Hammond wrote:
> I've written the following function definitions to extend
> generate_series to support some temporal types (timestamptz, date and
> time). Please include them if there's sufficient perceived need or
> value.
>
> -- timestamptz version
> CREATE OR REPLACE FUNCTION generate_series
> ( start_ts timestamptz
> , end_ts timestamptz
> , step interval
> ) RETURNS SETOF timestamptz
> AS $$
> DECLARE
> current_ts timestamptz := start_ts;
> BEGIN
> IF start_ts < end_ts AND step > INTERVAL '0 seconds' THEN
> LOOP
> IF current_ts > end_ts THEN
> RETURN;
> END IF;
> RETURN NEXT current_ts;
> current_ts := current_ts + step;
> END LOOP;
> ELSIF end_ts < start_ts AND step < INTERVAL '0 seconds' THEN
> LOOP
> IF current_ts < end_ts THEN
> RETURN;
> END IF;
> RETURN NEXT current_ts;
> current_ts := current_ts + step;
> END LOOP;
> END IF;
> END;
> $$ LANGUAGE plpgsql IMMUTABLE;
Here's an SQL version without much in the way of bounds checking :)
CREATE OR REPLACE FUNCTION generate_series ( start_ts timestamptz, end_ts timestamptz, step interval
) RETURNS SETOF timestamptz
LANGUAGE sql
AS $$
SELECT CASE WHEN $1 < $2 THEN $1 WHEN $1 > $2 THEN $2 END + s.i * $3 AS
"generate_series"
FROM generate_series( 0, floor( CASE WHEN $1 < $2 AND $3 > INTERVAL '0 seconds' THEN
extract('epoch'FROM $2) - extract('epoch' FROM $1) WHEN $1 > $2 AND $3 < INTERVAL '0 seconds'
THEN extract('epoch' FROM $1) - extract('epoch' FROM $2) END/extract('epoch' FROM $3)
)::int8
) AS s(i);
$$;
It should be straight-forward to make similar ones to those below.
> CREATE OR REPLACE FUNCTION generate_series
> ( start_ts date
> , end_ts date
> , step interval
> ) RETURNS SETOF date
>
> -- time version
> CREATE OR REPLACE FUNCTION generate_series
> ( start_ts time
> , end_ts time
> , step interval
> ) RETURNS SETOF time
Cheers,
D
--
David Fetter <david@fetter.org> http://fetter.org/
phone: +1 415 235 3778 AIM: dfetter666 Skype: davidfetter
Remember to vote!
Consider donating to PostgreSQL: http://www.postgresql.org/about/donate
On Apr 28, 2007, at 8:00 PM, David Fetter wrote:
> Here's an SQL version without much in the way of bounds checking :)
>
> CREATE OR REPLACE FUNCTION generate_series (
> start_ts timestamptz,
> end_ts timestamptz,
> step interval
> ) RETURNS SETOF timestamptz
> LANGUAGE sql
> AS $$
> SELECT
> CASE
> WHEN $1 < $2 THEN
> $1
> WHEN $1 > $2 THEN
> $2
> END + s.i * $3 AS "generate_series"
> FROM generate_series(
> 0,
> floor(
> CASE
> WHEN $1 < $2 AND $3 > INTERVAL '0 seconds' THEN
> extract('epoch' FROM $2) -
> extract('epoch' FROM $1)
> WHEN $1 > $2 AND $3 < INTERVAL '0 seconds' THEN
> extract('epoch' FROM $1) -
> extract('epoch' FROM $2)
> END/extract('epoch' FROM $3)
> )::int8
> ) AS s(i);
> $$;
>
> It should be straight-forward to make similar ones to those below.
Are you sure the case statements are needed? It seems it would be
better to just punt to the behavior of generate_series (esp. if
generate_series eventually learns how to count backwards).
--
Jim Nasby jim@nasby.net
EnterpriseDB http://enterprisedb.com 512.569.9461 (cell)
Jim Nasby <decibel@decibel.org> writes:
> Are you sure the case statements are needed? It seems it would be
> better to just punt to the behavior of generate_series (esp. if
> generate_series eventually learns how to count backwards).
What's this "eventually"?
regression=# select * from generate_series(10,1,-1);generate_series
----------------- 10 9 8 7 6 5
4 3 2 1
(10 rows)
regards, tom lane
On Tue, May 01, 2007 at 05:08:45PM -0400, Tom Lane wrote:
> Jim Nasby <decibel@decibel.org> writes:
> > Are you sure the case statements are needed? It seems it would be
> > better to just punt to the behavior of generate_series (esp. if
> > generate_series eventually learns how to count backwards).
>
> What's this "eventually"?
>
> regression=# select * from generate_series(10,1,-1);
> generate_series
> -----------------
> 10
> 9
> 8
> 7
> 6
> 5
> 4
> 3
> 2
> 1
> (10 rows)
>
> regards, tom lane
Good point. I believe the function below does the right thing. When
given decreasing TIMESTAMPTZs and a negative interval, it will
generate them going backward in time. When given increasing
TIMESTAMPTZs and a positive interval, it will generate them going
forward in time. Given a 0 interval, it errors out, although not with
the same message as generate_series(1,1,0), and decreasing
TIMESTAMPTZs and a positive interval or vice versa, it generates no
rows.
CREATE OR REPLACE FUNCTION generate_series ( start_ts timestamptz, end_ts timestamptz, step interval
) RETURNS SETOF timestamptz
STRICT
LANGUAGE sql
AS $$
SELECT $1 + s.i * $3 AS "generate_series"
FROM generate_series( CASE WHEN $1 <= $2 THEN 0 ELSE floor( ( extract('epoch'
FROM$2) - extract('epoch' FROM $1) )/extract('epoch' FROM $3) )::int8 END, CASE WHEN $1 <= $2
THENceil( ( extract('epoch' FROM $2) - extract('epoch' FROM $1) )/extract('epoch'
FROM$3) )::int8 ELSE 0 END, sign( extract('epoch' FROM $2) - extract('epoch' FROM $1)
)::int8
) AS s(i)
ORDER BY s.i ASC
;
$$;
--
David Fetter <david@fetter.org> http://fetter.org/
phone: +1 415 235 3778 AIM: dfetter666 Skype: davidfetter
Remember to vote!
Consider donating to PostgreSQL: http://www.postgresql.org/about/donate
Here's a shorter version:
On the date variant, I wasn't sure how to handle intervals with parts
smaller than days:
floor, ceiling, round or error out
To get round, the last parameters of generate_series would be
extract('epoch' FROM '1 day'::interval)::bigint * round(extract('epoch' FROM
$3) / extract('epoch' FROM '1 day'::interval))::bigint
CREATE OR REPLACE FUNCTION generate_series ( start_ts timestamptz, end_ts timestamptz, step interval
) RETURNS SETOF timestamptz
STRICT
LANGUAGE sql
AS $$
SELECT 'epoch'::timestamptz + s.i * '1 second'::interval AS "generate_series"
FROM generate_series( extract('epoch' FROM $1)::bigint, extract('epoch' FROM $2)::bigint,
extract('epoch'FROM $3)::bigint ) s(i);
$$;
CREATE OR REPLACE FUNCTION generate_series ( start_ts date, end_ts date, step interval
) RETURNS SETOF date
STRICT
LANGUAGE sql
AS $$
SELECT ('epoch'::date + s.i * '1 second'::interval)::date AS "generate_series"
FROM generate_series( extract('epoch' FROM $1)::bigint, extract('epoch' FROM $2)::bigint,
extract('epoch'FROM date_trunc('day', $3))::bigint -- does a floor ) s(i);
$$;
Jean-Pierre Pelletier
e-djuster
Here's a shorter version:
On the date variant, I wasn't sure how to handle intervals with parts
smaller than days:
floor, ceiling, round or error out
To get round, the last parameters of generate_series would be
extract('epoch' FROM '1 day'::interval)::bigint * round(extract('epoch' FROM
$3) / extract('epoch' FROM '1 day'::interval))::bigint
CREATE OR REPLACE FUNCTION generate_series ( start_ts timestamptz, end_ts timestamptz, step interval
) RETURNS SETOF timestamptz
STRICT
LANGUAGE sql
AS $$
SELECT 'epoch'::timestamptz + s.i * '1 second'::interval AS "generate_series"
FROM generate_series( extract('epoch' FROM $1)::bigint, extract('epoch' FROM $2)::bigint,
extract('epoch'FROM $3)::bigint ) s(i);
$$;
CREATE OR REPLACE FUNCTION generate_series ( start_ts date, end_ts date, step interval
) RETURNS SETOF date
STRICT
LANGUAGE sql
AS $$
SELECT ('epoch'::date + s.i * '1 second'::interval)::date AS "generate_series"
FROM generate_series( extract('epoch' FROM $1)::bigint, extract('epoch' FROM $2)::bigint,
extract('epoch'FROM date_trunc('day', $3))::bigint -- does a floor ) s(i);
$$;
Jean-Pierre Pelletier
e-djuster
On May 2, 2007, at 8:24 PM, JEAN-PIERRE PELLETIER wrote:
> On the date variant, I wasn't sure how to handle intervals with
> parts smaller than days:
> floor, ceiling, round or error out
Hrm... I'm not sure what would be better there... I'm leaning towards
round (floor or ceil don't make much sense to me), but I could also
see throwing an error if trunc('day', $3) != $3. Comments?
Also, what would be the appropriate way to put this into initdb?
These seem a bit long to try and cram into a one-line DATA statement
in pg_proc.h. Should I add a new .sql file ala
information_schema.sql? Is it possible to still add pg_catalog
entries after the postgresql.bki stage of initdb?
Finally, should I also add a timestamp without time zone version? I
know we'll automatically cast timestamptz to timestamp, but then you
get a timestamptz back, which seems odd.
> To get round, the last parameters of generate_series would be
> extract('epoch' FROM '1 day'::interval)::bigint * round(extract
> ('epoch' FROM $3) / extract('epoch' FROM '1 day'::interval))::bigint
>
> CREATE OR REPLACE FUNCTION generate_series (
> start_ts timestamptz,
> end_ts timestamptz,
> step interval
> ) RETURNS SETOF timestamptz
> STRICT
> LANGUAGE sql
> AS $$
> SELECT
> 'epoch'::timestamptz + s.i * '1 second'::interval AS
> "generate_series"
> FROM
> generate_series(
> extract('epoch' FROM $1)::bigint,
> extract('epoch' FROM $2)::bigint,
> extract('epoch' FROM $3)::bigint
> ) s(i);
> $$;
>
> CREATE OR REPLACE FUNCTION generate_series (
> start_ts date,
> end_ts date,
> step interval
> ) RETURNS SETOF date
> STRICT
> LANGUAGE sql
> AS $$
> SELECT
> ('epoch'::date + s.i * '1 second'::interval)::date AS
> "generate_series"
> FROM
> generate_series(
> extract('epoch' FROM $1)::bigint,
> extract('epoch' FROM $2)::bigint,
> extract('epoch' FROM date_trunc('day', $3))::bigint -- does
> a floor
> ) s(i);
> $$;
>
> Jean-Pierre Pelletier
> e-djuster
>
>
>
> ---------------------------(end of
> broadcast)---------------------------
> TIP 7: You can help support the PostgreSQL project by donating at
>
> http://www.postgresql.org/about/donate
>
--
Jim Nasby jim@nasby.net
EnterpriseDB http://enterprisedb.com 512.569.9461 (cell)
Jim Nasby <decibel@decibel.org> writes:
> Also, what would be the appropriate way to put this into initdb?
You seem to have missed a step here, which is to convince people that
these belong in core at all. So far I've not even seen an argument that
would justify putting them in contrib. If they *were* of sufficiently
wide use to justify putting them into core, a more efficient
implementation would probably be expected.
regards, tom lane
On May 6, 2007, at 8:07 PM, Tom Lane wrote: > Jim Nasby <decibel@decibel.org> writes: >> Also, what would be the appropriate way to put this into initdb? > You seem to have missed a step here, which is to convince people that > these belong in core at all. So far I've not even seen an argument > that > would justify putting them in contrib. These are all examples of using generate series plus additional math to generate a series of dates/timestamps: http://archives.postgresql.org/pgsql-general/2007-01/msg01292.php http://archives.postgresql.org/pgsql-sql/2006-02/msg00249.php http://archives.postgresql.org/pgsql-general/2005-06/msg01254.php http://archives.postgresql.org/pgsql-sql/2007-03/msg00093.php http://archives.postgresql.org/pgsql-novice/2007-01/msg00002.php http://archives.postgresql.org/pgsql-sql/2006-03/msg00391.php http://archives.postgresql.org/pgsql-hackers/2006-09/msg00330.php That's from the first page of search results for 'generate_series timestamp'. FWIW, I could also make use of this in some of my code. > If they *were* of sufficiently > wide use to justify putting them into core, a more efficient > implementation would probably be expected. Ok, I'll look into a C version, but why do SQL functions have such a high overhead? I'm seeing an SQL function taking ~2.6x longer than the equivalent code run directly in a query. With 100 days, the difference drops a bit to ~2.4x. (this is on HEAD from a few months ago) This is on my MacBook Pro with the Jean-Pierre's version of generate_series: decibel=# select count(*) from generate_series(now(),now()+'10 days'::interval,'1'::interval); Time: 1851.407 ms decibel=# select count(*) from generate_series(1,86400*10); Time: 657.894 ms decibel=# select count(*) from (select now() + (generate_series (1,86400*10) * '1 second'::interval)) a; Time: 733.592 ms decibel=# select count(*) from (select 'epoch'::timestamptz + s.i * '1 second'::interval AS "generate_series" from generate_series(extract ('epoch' from now())::bigint, extract('epoch' from now()+'10 days'::interval)::bigint, extract('epoch' from '1'::interval)::bigint) s(i)) a; Time: 699.606 ms -- Jim Nasby jim@nasby.net EnterpriseDB http://enterprisedb.com 512.569.9461 (cell)
This has been saved for the 8.4 release: http://momjian.postgresql.org/cgi-bin/pgpatches_hold --------------------------------------------------------------------------- Jim Nasby wrote: > On May 6, 2007, at 8:07 PM, Tom Lane wrote: > > Jim Nasby <decibel@decibel.org> writes: > >> Also, what would be the appropriate way to put this into initdb? > > You seem to have missed a step here, which is to convince people that > > these belong in core at all. So far I've not even seen an argument > > that > > would justify putting them in contrib. > > These are all examples of using generate series plus additional math > to generate a series of dates/timestamps: > http://archives.postgresql.org/pgsql-general/2007-01/msg01292.php > http://archives.postgresql.org/pgsql-sql/2006-02/msg00249.php > http://archives.postgresql.org/pgsql-general/2005-06/msg01254.php > http://archives.postgresql.org/pgsql-sql/2007-03/msg00093.php > http://archives.postgresql.org/pgsql-novice/2007-01/msg00002.php > http://archives.postgresql.org/pgsql-sql/2006-03/msg00391.php > http://archives.postgresql.org/pgsql-hackers/2006-09/msg00330.php > > That's from the first page of search results for 'generate_series > timestamp'. > > FWIW, I could also make use of this in some of my code. > > > If they *were* of sufficiently > > wide use to justify putting them into core, a more efficient > > implementation would probably be expected. > > Ok, I'll look into a C version, but why do SQL functions have such a > high overhead? I'm seeing an SQL function taking ~2.6x longer than > the equivalent code run directly in a query. With 100 days, the > difference drops a bit to ~2.4x. (this is on HEAD from a few months ago) > > This is on my MacBook Pro with the Jean-Pierre's version of > generate_series: > > decibel=# select count(*) from generate_series(now(),now()+'10 > days'::interval,'1'::interval); > Time: 1851.407 ms > decibel=# select count(*) from generate_series(1,86400*10); > Time: 657.894 ms > decibel=# select count(*) from (select now() + (generate_series > (1,86400*10) * '1 second'::interval)) a; > Time: 733.592 ms > decibel=# select count(*) from (select 'epoch'::timestamptz + s.i * > '1 second'::interval AS "generate_series" from generate_series(extract > ('epoch' from now())::bigint, extract('epoch' from now()+'10 > days'::interval)::bigint, extract('epoch' from > '1'::interval)::bigint) s(i)) a; > Time: 699.606 ms > -- > Jim Nasby jim@nasby.net > EnterpriseDB http://enterprisedb.com 512.569.9461 (cell) > > > > ---------------------------(end of broadcast)--------------------------- > TIP 1: if posting/reading through Usenet, please send an appropriate > subscribe-nomail command to majordomo@postgresql.org so that your > message can get through to the mailing list cleanly -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://www.enterprisedb.com + If your life is a hard drive, Christ can be your backup. +
Added to TODO: * Add temporal versions of generate_series() http://archives.postgresql.org/pgsql-hackers/2007-04/msg01180.php --------------------------------------------------------------------------- Jim Nasby wrote: > On May 6, 2007, at 8:07 PM, Tom Lane wrote: > > Jim Nasby <decibel@decibel.org> writes: > >> Also, what would be the appropriate way to put this into initdb? > > You seem to have missed a step here, which is to convince people that > > these belong in core at all. So far I've not even seen an argument > > that > > would justify putting them in contrib. > > These are all examples of using generate series plus additional math > to generate a series of dates/timestamps: > http://archives.postgresql.org/pgsql-general/2007-01/msg01292.php > http://archives.postgresql.org/pgsql-sql/2006-02/msg00249.php > http://archives.postgresql.org/pgsql-general/2005-06/msg01254.php > http://archives.postgresql.org/pgsql-sql/2007-03/msg00093.php > http://archives.postgresql.org/pgsql-novice/2007-01/msg00002.php > http://archives.postgresql.org/pgsql-sql/2006-03/msg00391.php > http://archives.postgresql.org/pgsql-hackers/2006-09/msg00330.php > > That's from the first page of search results for 'generate_series > timestamp'. > > FWIW, I could also make use of this in some of my code. > > > If they *were* of sufficiently > > wide use to justify putting them into core, a more efficient > > implementation would probably be expected. > > Ok, I'll look into a C version, but why do SQL functions have such a > high overhead? I'm seeing an SQL function taking ~2.6x longer than > the equivalent code run directly in a query. With 100 days, the > difference drops a bit to ~2.4x. (this is on HEAD from a few months ago) > > This is on my MacBook Pro with the Jean-Pierre's version of > generate_series: > > decibel=# select count(*) from generate_series(now(),now()+'10 > days'::interval,'1'::interval); > Time: 1851.407 ms > decibel=# select count(*) from generate_series(1,86400*10); > Time: 657.894 ms > decibel=# select count(*) from (select now() + (generate_series > (1,86400*10) * '1 second'::interval)) a; > Time: 733.592 ms > decibel=# select count(*) from (select 'epoch'::timestamptz + s.i * > '1 second'::interval AS "generate_series" from generate_series(extract > ('epoch' from now())::bigint, extract('epoch' from now()+'10 > days'::interval)::bigint, extract('epoch' from > '1'::interval)::bigint) s(i)) a; > Time: 699.606 ms > -- > Jim Nasby jim@nasby.net > EnterpriseDB http://enterprisedb.com 512.569.9461 (cell) > > > > ---------------------------(end of broadcast)--------------------------- > TIP 1: if posting/reading through Usenet, please send an appropriate > subscribe-nomail command to majordomo@postgresql.org so that your > message can get through to the mailing list cleanly -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://postgres.enterprisedb.com + If your life is a hard drive, Christ can be your backup. +