Re: tsvector extraction patch

Поиск
Список
Период
Сортировка
От Hans-Juergen Schoenig -- PostgreSQL
Тема Re: tsvector extraction patch
Дата
Msg-id 4A4DBAC4.4010003@cybertec.at
обсуждение исходный текст
Ответ на tsvector extraction patch  (Hans-Juergen Schoenig -- PostgreSQL <postgres@cybertec.at>)
Ответы Re: tsvector extraction patch  (Robert Haas <robertmhaas@gmail.com>)
Список pgsql-hackers
Hans-Juergen Schoenig -- PostgreSQL wrote:
> hello,
>
> this patch has not made it through yesterday, so i am trying to send
> it again.
> i made a small patch which i found useful for my personal tasks.
> it would be nice to see this in 8.5. if not core then maybe contrib.
> it transforms a tsvector to table format which is really nice for text
> processing and comparison.
>
> test=# SELECT * FROM tsvcontent(to_tsvector('english', 'i am pretty
> sure this is a good patch'));
> lex   | rank
> --------+------
> good   |    8
> patch  |    9
> pretti |    3
> sure   |    4
> (4 rows)
>
>   many thanks,
>
>      hans
>


--
Cybertec Schoenig & Schoenig GmbH
Reyergasse 9 / 2
A-2700 Wiener Neustadt
Web: www.postgresql-support.de

diff -dcrpN postgresql-8.4.0.old/contrib/Makefile postgresql-8.4.0/contrib/Makefile
*** postgresql-8.4.0.old/contrib/Makefile    2009-03-26 00:20:01.000000000 +0100
--- postgresql-8.4.0/contrib/Makefile    2009-06-29 11:03:04.000000000 +0200
*************** WANTED_DIRS = \
*** 39,44 ****
--- 39,45 ----
          tablefunc    \
          test_parser    \
          tsearch2    \
+         tsvcontent    \
          vacuumlo

  ifeq ($(with_openssl),yes)
diff -dcrpN postgresql-8.4.0.old/contrib/tsvcontent/Makefile postgresql-8.4.0/contrib/tsvcontent/Makefile
*** postgresql-8.4.0.old/contrib/tsvcontent/Makefile    1970-01-01 01:00:00.000000000 +0100
--- postgresql-8.4.0/contrib/tsvcontent/Makefile    2009-06-29 11:20:21.000000000 +0200
***************
*** 0 ****
--- 1,19 ----
+ # $PostgreSQL: pgsql/contrib/tablefunc/Makefile,v 1.9 2007/11/10 23:59:51 momjian Exp $
+
+ MODULES = tsvcontent
+ DATA_built = tsvcontent.sql
+ DATA = uninstall_tsvcontent.sql
+
+
+ SHLIB_LINK += $(filter -lm, $(LIBS))
+
+ ifdef USE_PGXS
+ PG_CONFIG = pg_config
+ PGXS := $(shell $(PG_CONFIG) --pgxs)
+ include $(PGXS)
+ else
+ subdir = contrib/tsvcontent
+ top_builddir = ../..
+ include $(top_builddir)/src/Makefile.global
+ include $(top_srcdir)/contrib/contrib-global.mk
+ endif
diff -dcrpN postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.c postgresql-8.4.0/contrib/tsvcontent/tsvcontent.c
*** postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.c    1970-01-01 01:00:00.000000000 +0100
--- postgresql-8.4.0/contrib/tsvcontent/tsvcontent.c    2009-06-29 11:18:35.000000000 +0200
***************
*** 0 ****
--- 1,169 ----
+ #include "postgres.h"
+
+ #include "fmgr.h"
+ #include "funcapi.h"
+ #include "miscadmin.h"
+ #include "executor/spi.h"
+ #include "lib/stringinfo.h"
+ #include "nodes/nodes.h"
+ #include "utils/builtins.h"
+ #include "utils/lsyscache.h"
+ #include "utils/syscache.h"
+ #include "utils/memutils.h"
+ #include "tsearch/ts_type.h"
+ #include "tsearch/ts_utils.h"
+ #include "catalog/pg_type.h"
+
+ #include "tsvcontent.h"
+
+ PG_MODULE_MAGIC;
+
+ PG_FUNCTION_INFO_V1(tsvcontent);
+
+ Datum
+ tsvcontent(PG_FUNCTION_ARGS)
+ {
+     FuncCallContext     *funcctx;
+     TupleDesc        ret_tupdesc;
+     AttInMetadata        *attinmeta;
+     int            call_cntr;
+     int            max_calls;
+     ts_to_txt_fctx        *fctx;
+     Datum            result[2];
+     bool            isnull[2] = { false, false };
+     MemoryContext         oldcontext;
+
+     /* input value containing the TS vector */
+     TSVector            in = PG_GETARG_TSVECTOR(0);
+
+     /* stuff done only on the first call of the function */
+     if (SRF_IS_FIRSTCALL())
+     {
+         TupleDesc    tupdesc;
+         int        i, j;
+         char        *wepv_base;
+
+         /* create a function context for cross-call persistence */
+         funcctx = SRF_FIRSTCALL_INIT();
+
+         /*
+          * switch to memory context appropriate for multiple function calls
+          */
+         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+         switch (get_call_result_type(fcinfo, NULL, &tupdesc))
+         {
+             case TYPEFUNC_COMPOSITE:
+                 /* success */
+                 break;
+             case TYPEFUNC_RECORD:
+                 /* failed to determine actual type of RECORD */
+                 ereport(ERROR,
+                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                         errmsg("function returning record called in context "
+                                 "that cannot accept type record")));
+                 break;
+             default:
+                 /* result type isn't composite */
+                 elog(ERROR, "return type must be a row type");
+                 break;
+         }
+
+         /* make sure we have a persistent copy of the tupdesc */
+         tupdesc = CreateTupleDescCopy(tupdesc);
+
+         /*
+          * Generate attribute metadata needed later to produce tuples from raw
+          * C strings
+          */
+         attinmeta = TupleDescGetAttInMetadata(tupdesc);
+         funcctx->attinmeta = attinmeta;
+
+         /* allocate memory */
+         fctx = (ts_to_txt_fctx *) palloc(sizeof(ts_to_txt_fctx));
+
+         wepv_base = (char *)in + offsetof(TSVectorData, entries) + in->size * sizeof(WordEntry);
+
+         fctx->n_tsvt = 0;
+         for (i = 0; i < in->size; i++)
+         {
+             if (in->entries[i].haspos)
+             {
+                 WordEntryPosVector *wepv = (WordEntryPosVector *)
+                                 (wepv_base + in->entries[i].pos + SHORTALIGN(in->entries[i].len));
+
+                 fctx->n_tsvt += wepv->npos;
+             }
+             else
+                 fctx->n_tsvt++;
+         }
+
+         fctx->tsvt = palloc(fctx->n_tsvt * sizeof(tsvec_tuple));
+
+         for (i = 0, j = 0; i < in->size; i++)
+         {
+             int pos = in->entries[i].pos;
+             int len = in->entries[i].len;
+
+             if (in->entries[i].haspos)
+             {
+                 WordEntryPosVector *wepv = (WordEntryPosVector *)
+                                 (wepv_base + in->entries[i].pos + SHORTALIGN(len));
+                 uint16    npos = wepv->npos;
+                 int    o;
+                 for (o = 0; o < npos; o++)
+                 {
+                     fctx->tsvt[j].txt = palloc(len + 1);
+                     memcpy(fctx->tsvt[j].txt, wepv_base + pos, len);
+                     fctx->tsvt[j].txt[len] = '\0';
+                     fctx->tsvt[j].pos = wepv->pos[o];
+                     j++;
+                 }
+             }
+             else
+             {
+                 fctx->tsvt[j].txt = palloc(len + 1);
+                 memcpy(fctx->tsvt[j].txt, wepv_base + pos, len);
+                 fctx->tsvt[j].txt[len] = '\0';
+                 fctx->tsvt[j].pos = 0;
+                 j++;
+             }
+         }
+
+         /* total number of tuples to be returned */
+                 funcctx->max_calls = fctx->n_tsvt;
+
+         funcctx->user_fctx = fctx;
+         MemoryContextSwitchTo(oldcontext);
+     }
+
+     funcctx = SRF_PERCALL_SETUP();
+
+     call_cntr = funcctx->call_cntr;
+     max_calls = funcctx->max_calls;
+     fctx = funcctx->user_fctx;
+
+     /* attribute return type and return tuple description */
+     attinmeta = funcctx->attinmeta;
+     ret_tupdesc = attinmeta->tupdesc;
+
+     /* are there any records inside the tsvector left? */
+     if (call_cntr < max_calls && call_cntr < fctx->n_tsvt)    /* do when there is more left to send */
+     {
+         HeapTuple    tuple;
+
+         result[0] = DirectFunctionCall1(textin, CStringGetDatum(fctx->tsvt[call_cntr].txt));
+         result[1] = Int32GetDatum(fctx->tsvt[call_cntr].pos);
+
+         tuple = heap_form_tuple(ret_tupdesc, result, isnull);
+
+         /* send the result */
+         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+     }
+     else
+     {
+         /* do when there is no more left */
+         SRF_RETURN_DONE(funcctx);
+     }
+ }
+
diff -dcrpN postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.h postgresql-8.4.0/contrib/tsvcontent/tsvcontent.h
*** postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.h    1970-01-01 01:00:00.000000000 +0100
--- postgresql-8.4.0/contrib/tsvcontent/tsvcontent.h    2009-06-29 11:18:13.000000000 +0200
***************
*** 0 ****
--- 1,13 ----
+ typedef struct
+ {
+     char    *txt;
+     int    pos;
+ } tsvec_tuple;
+
+ typedef struct
+ {
+     int        n_tsvt;
+     tsvec_tuple    *tsvt;
+ } ts_to_txt_fctx;
+
+ extern Datum tsvcontent(PG_FUNCTION_ARGS);
diff -dcrpN postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.sql.in
postgresql-8.4.0/contrib/tsvcontent/tsvcontent.sql.in
*** postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.sql.in    1970-01-01 01:00:00.000000000 +0100
--- postgresql-8.4.0/contrib/tsvcontent/tsvcontent.sql.in    2009-06-29 11:19:04.000000000 +0200
***************
*** 0 ****
--- 1,6 ----
+ CREATE TYPE tsvcontent AS (lex text, rank integer);
+
+ -- List words in "tsvector format" and their occurences found in a tsvector.
+ CREATE OR REPLACE FUNCTION tsvcontent(vec tsvector) RETURNS SETOF tsvcontent
+     AS '$libdir/tsvcontent', 'tsvcontent'
+     LANGUAGE C STRICT;

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Hans-Juergen Schoenig -- PostgreSQL
Дата:
Сообщение: tsvector extraction patch
Следующее
От: Heikki Linnakangas
Дата:
Сообщение: Re: 8.5 development schedule