Обсуждение: substring extraction
Hi,
I need in the SELECT query extract substring 'cccc' from string
'aaa.bbbbb.cccc.dd.eee' (extract third field from string if
delimiter is '.').
It is easy if I know where is begin/end of 'cccc' and I can
use the substring() function:
select substring('aaa.bbbbb.cccc.dd.eee' from 11 for 4);
substr
------
cccc
But how extract it if I don't know where is position of the second
and third '.'?
Yes, I know the function position() or textpos(), but this return first
a position of the substring...
For this exist nice UN*X command "cut -f3 -d." , but how make it in
SQL?
I ask about it, because I write for me this as new function in C, but
I'm not sure if not exist other (better) way for it.
Karel
------------------------------------------------------------------------------
Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/
Docs: http://docs.linux.cz (big docs archive)
Kim Project: http://home.zf.jcu.cz/~zakkr/kim/ (process manager)
FTP: ftp://ftp2.zf.jcu.cz/users/zakkr/ (C/ncurses/PgSQL)
------------------------------------------------------------------------------
Try this:
--returns the $2 field delimited by $3
drop function field(text,int,text);
create function field(text,int,text) returns text as
'declare string text; pos int2:= 0; pos1 int2:= 0; times int2:= 0; totpos int2:= 0;
begin times:= $2 - 1; string:= $1; while totpos < times loop string:=
substr(string,pos+1); pos:= strpos(string,$3); totpos:= totpos + 1; end loop;
string:=substr(string,pos+1); pos1:= strpos(string,$3); return substr(string,1,pos1 - 1);end;
' language 'plpgsql';
select field('primo.secondo.terzo',1,'.');
field
-----
primo
(1 row)
select field('primo.secondo.terzo',2,'.');
field
-------
secondo
(1 row)
select field('primo.secondo.terzo',3,'.');
field
-----
terzo
(1 row)
José
Karel Zak - Zakkr ha scritto:
> Hi,
>
> I need in the SELECT query extract substring 'cccc' from string
> 'aaa.bbbbb.cccc.dd.eee' (extract third field from string if
> delimiter is '.').
>
> It is easy if I know where is begin/end of 'cccc' and I can
> use the substring() function:
>
> select substring('aaa.bbbbb.cccc.dd.eee' from 11 for 4);
> substr
> ------
> cccc
>
> But how extract it if I don't know where is position of the second
> and third '.'?
>
> Yes, I know the function position() or textpos(), but this return first
> a position of the substring...
>
> For this exist nice UN*X command "cut -f3 -d." , but how make it in
> SQL?
>
> I ask about it, because I write for me this as new function in C, but
> I'm not sure if not exist other (better) way for it.
>
> Karel
>
> ------------------------------------------------------------------------------
> Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/
>
> Docs: http://docs.linux.cz (big docs archive)
> Kim Project: http://home.zf.jcu.cz/~zakkr/kim/ (process manager)
> FTP: ftp://ftp2.zf.jcu.cz/users/zakkr/ (C/ncurses/PgSQL)
> ------------------------------------------------------------------------------
>
> ************
On Fri, 26 Nov 1999, jose soares wrote:
> Try this:
>
> --returns the $2 field delimited by $3
> drop function field(text,int,text);
> create function field(text,int,text) returns text as
> 'declare
> string text;
> pos int2:= 0;
> pos1 int2:= 0;
> times int2:= 0;
> totpos int2:= 0;
> begin
> times:= $2 - 1;
> string:= $1;
> while totpos < times loop
> string:= substr(string,pos+1);
> pos:= strpos(string,$3);
> totpos:= totpos + 1;
> end loop;
> string:= substr(string,pos+1);
> pos1:= strpos(string,$3);
> return substr(string,1,pos1 - 1);
> end;
> ' language 'plpgsql';
>
Oh, it is great! But my implementation in C for this is
a little longer (only) :-)
I send this question to the hacker list because "extract delimited
substring" is not a abnormal uses's request, and (IMHO) will very
good if this will in PgSQL. How much uses known write this in
C or any PL?
'C' implementafion "extract delimited substring":
-----------------------------------------------
text
*strcut( text *string, char *d, int field )
{char *ptr = NULL, *p = NULL, *pe = NULL;text *result = NULL;int siz;ptr =
VARDATA(string);*(ptr+(VARSIZE(string) - VARHDRSZ)) = '\0';for(p = ptr; *p != '\0'; p++) { if (field == 1)
break; if (*p == (int) d) --field;} if (!*p) return textin(""); for(pe = p; *pe != '\0'; pe++) {
if(*pe == (int) d) break;} result = (text *) palloc(sizeof(text) * (siz = pe - p) +
VARHDRSZ);strncpy(VARDATA(result),p, siz);*(VARDATA(result) + siz) = '\0'; VARSIZE(result) = siz + VARHDRSZ;return
result;
}
CREATE FUNCTION strcut(text, char, int) RETURNS text AS '@module_dir@'
LANGUAGE 'c';
template1=> select strcut('aaa.bbb.ccc', '.', 2);
strcut
------
bbb
Karel
Karel Zak wrote:
> On Fri, 26 Nov 1999, jose soares wrote:
>
> > Try this:
> >
> > --returns the $2 field delimited by $3
> > drop function field(text,int,text);
> > create function field(text,int,text) returns text as
> > 'declare
> > string text;
> > pos int2:= 0;
> > pos1 int2:= 0;
> > times int2:= 0;
> > totpos int2:= 0;
> > begin
> > times:= $2 - 1;
> > string:= $1;
> > while totpos < times loop
> > string:= substr(string,pos+1);
> > pos:= strpos(string,$3);
> > totpos:= totpos + 1;
> > end loop;
> > string:= substr(string,pos+1);
> > pos1:= strpos(string,$3);
> > return substr(string,1,pos1 - 1);
> > end;
> > ' language 'plpgsql';
> >
>
> Oh, it is great! But my implementation in C for this is
> a little longer (only) :-)
>
> I send this question to the hacker list because "extract delimited
> substring" is not a abnormal uses's request, and (IMHO) will very
> good if this will in PgSQL. How much uses known write this in
> C or any PL?
What about this one:
create function field(text,int,text) returns text as '
return [lindex [split $1 $3] $2]
' language 'pltcl';
It does all the work as long as the third argument is a
single character. For multibyte delimiters it will be
slightly bigger, but not much. Now you might imagine why
PL/Tcl was the first language I created.
Jan
--
#======================================================================#
# It's easier to get forgiveness for being wrong than for being right. #
# Let's break this rule - forgive me. #
#========================================= wieck@debis.com (Jan Wieck) #