Re: [HACKERS] [GENERAL] workaround for lack of REPLACE()

Поиск
Список
Период
Сортировка
От Joe Conway
Тема Re: [HACKERS] [GENERAL] workaround for lack of REPLACE()
Дата
Msg-id 3D5D5F1F.30407@joeconway.com
обсуждение исходный текст
Ответ на Re: [HACKERS] [GENERAL] workaround for lack of REPLACE() function  (Joe Conway <mail@joeconway.com>)
Ответы Re: [HACKERS] [GENERAL] workaround for lack of REPLACE()  (Bruce Momjian <pgman@candle.pha.pa.us>)
Re: [HACKERS] [GENERAL] workaround for lack of REPLACE()  (Bruce Momjian <pgman@candle.pha.pa.us>)
Список pgsql-patches
Joe Conway wrote:
> I took Tom's advice and added wrapper functions around text_substr() and
> bytea_substr() to cover the 2 argument case.
>
> I also added tests to strings.sql to cover substr() on toasted columns
> of both text and bytea.
>

Please replace the original patch (substr.2002.08.14.1.patch) with the
attached. It includes everything from the previous one, plus newly
implemented builtin functions:

replace(string, from, to)
   -- replaces all occurrences of "from" in "string" to "to"
split(string, fldsep, column)
   -- splits "string" on "fldsep" and returns "column" number piece
to_hex(int32_num) & to_hex(int64_num)
   -- takes integer number and returns as hex string

All previously discussed on the list; see thread at:
http://archives.postgresql.org/pgsql-hackers/2002-07/msg00511.php

Examples:

SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo";
  ya123da123doo
---------------
  ya123da123doo
(1 row)

select split('joeuser@mydatabase','@',1) AS "joeuser";
  joeuser
---------
  joeuser
(1 row)

select split('joeuser@mydatabase','@',2) AS "mydatabase";
  mydatabase
------------
  mydatabase
(1 row)

select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS
"ffffffff";
  ffffffff
----------
  ffffffff
(1 row)

Tests have been added to the regression suite.

Passes all regression tests. I've checked the strings.sql script in a
multibyte database and it works fine also. I'd appreciate a good look by
someone more familiar with multibyte related issues though.

If it is OK, I'd like to hold off on docs until this is committed and
after beta starts.

If there are no objections, please apply.

Thanks,

Joe
Index: src/backend/utils/adt/varlena.c
===================================================================
RCS file: /opt/src/cvs/pgsql-server/src/backend/utils/adt/varlena.c,v
retrieving revision 1.87
diff -c -r1.87 varlena.c
*** src/backend/utils/adt/varlena.c    4 Aug 2002 06:44:47 -0000    1.87
--- src/backend/utils/adt/varlena.c    16 Aug 2002 19:54:03 -0000
***************
*** 18,23 ****
--- 18,25 ----

  #include "mb/pg_wchar.h"
  #include "miscadmin.h"
+ #include "access/tuptoaster.h"
+ #include "lib/stringinfo.h"
  #include "utils/builtins.h"
  #include "utils/pg_locale.h"

***************
*** 27,34 ****
--- 29,62 ----
  #define DatumGetUnknownP(X)            ((unknown *) PG_DETOAST_DATUM(X))
  #define PG_GETARG_UNKNOWN_P(n)        DatumGetUnknownP(PG_GETARG_DATUM(n))
  #define PG_RETURN_UNKNOWN_P(x)        PG_RETURN_POINTER(x)
+ #define PG_TEXTARG_GET_STR(arg_) \
+     DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
+ #define PG_TEXT_GET_STR(textp_) \
+     DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
+ #define PG_STR_GET_TEXT(str_) \
+     DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
+ #define TEXTLEN(textp) \
+     text_length(PointerGetDatum(textp))
+ #define TEXTPOS(buf_text, from_sub_text) \
+     text_position(PointerGetDatum(buf_text), PointerGetDatum(from_sub_text), 1)
+ #define TEXTDUP(textp) \
+     DatumGetTextPCopy(PointerGetDatum(textp))
+ #define LEFT(buf_text, from_sub_text) \
+     text_substring(PointerGetDatum(buf_text), \
+                     1, \
+                     TEXTPOS(buf_text, from_sub_text) - 1, false)
+ #define RIGHT(buf_text, from_sub_text, from_sub_text_len) \
+     text_substring(PointerGetDatum(buf_text), \
+                     TEXTPOS(buf_text, from_sub_text) + from_sub_text_len, \
+                     -1, true)

  static int    text_cmp(text *arg1, text *arg2);
+ static int32 text_length(Datum str);
+ static int32 text_position(Datum str, Datum search_str, int matchnum);
+ static text *text_substring(Datum str,
+                             int32 start,
+                             int32 length,
+                             bool length_not_specified);


  /*****************************************************************************
***************
*** 285,303 ****
  Datum
  textlen(PG_FUNCTION_ARGS)
  {
!     text       *t = PG_GETARG_TEXT_P(0);

! #ifdef MULTIBYTE
!     /* optimization for single byte encoding */
!     if (pg_database_encoding_max_length() <= 1)
!         PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ);
!
!     PG_RETURN_INT32(
!         pg_mbstrlen_with_len(VARDATA(t), VARSIZE(t) - VARHDRSZ)
!         );
! #else
!     PG_RETURN_INT32(VARSIZE(t) - VARHDRSZ);
! #endif
  }

  /*
--- 313,348 ----
  Datum
  textlen(PG_FUNCTION_ARGS)
  {
!     PG_RETURN_INT32(text_length(PG_GETARG_DATUM(0)));
! }

! /*
!  * text_length -
!  *    Does the real work for textlen()
!  *    This is broken out so it can be called directly by other string processing
!  *    functions.
!  */
! static int32
! text_length(Datum str)
! {
!     /* fastpath when max encoding length is one */
!     if (pg_database_encoding_max_length() == 1)
!         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
!
!     if (pg_database_encoding_max_length() > 1)
!     {
!         text       *t = DatumGetTextP(str);
!
!         PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA(t),
!                                      VARSIZE(t) - VARHDRSZ));
!     }
!
!     /* should never get here */
!     elog(ERROR, "Invalid backend encoding; encoding max length "
!                 "is less than one.");
!
!     /* not reached: suppress compiler warning */
!     return 0;
  }

  /*
***************
*** 308,316 ****
  Datum
  textoctetlen(PG_FUNCTION_ARGS)
  {
!     text    *arg = PG_GETARG_TEXT_P(0);
!
!     PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
  }

  /*
--- 353,359 ----
  Datum
  textoctetlen(PG_FUNCTION_ARGS)
  {
!     PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
  }

  /*
***************
*** 382,471 ****
   * - Thomas Lockhart 1998-12-10
   * Now uses faster TOAST-slicing interface
   * - John Gray 2002-02-22
   */
  Datum
  text_substr(PG_FUNCTION_ARGS)
  {
!     text       *string;
!     int32        m = PG_GETARG_INT32(1);
!     int32        n = PG_GETARG_INT32(2);
!     int32       sm;
!     int32       sn;
!     int         eml = 1;
! #ifdef MULTIBYTE
!     int            i;
!     int            len;
!     text       *ret;
!     char       *p;
! #endif

!     /*
!      * starting position before the start of the string? then offset into
!      * the string per SQL92 spec...
!      */
!     if (m < 1)
      {
!         n += (m - 1);
!         m = 1;
!     }
!     /* Check for m > octet length is made in TOAST access routine */

!     /* m will now become a zero-based starting position */
!     sm = m - 1;
!     sn = n;

! #ifdef MULTIBYTE
!     eml = pg_database_encoding_max_length ();

!     if (eml > 1)
      {
!         sm = 0;
!         if (n > -1)
!             sn = (m + n) * eml + 3; /* +3 to avoid mb characters overhanging slice end */
          else
!             sn = n;        /* n < 0 is special-cased by heap_tuple_untoast_attr_slice */
!     }
! #endif

!     string = PG_GETARG_TEXT_P_SLICE (0, sm, sn);

!     if (eml == 1)
!     {
!         PG_RETURN_TEXT_P (string);
!     }
! #ifndef MULTIBYTE
!     PG_RETURN_NULL();   /* notreached: suppress compiler warning */
! #endif
! #ifdef MULTIBYTE
!     if (n > -1)
!         len = pg_mbstrlen_with_len (VARDATA (string), sn - 3);
!     else    /* n < 0 is special-cased; need full string length */
!         len = pg_mbstrlen_with_len (VARDATA (string), VARSIZE(string)-VARHDRSZ);
!
!     if (m > len)
!     {
!         m = 1;
!         n = 0;
!     }
!     m--;
!     if (((m + n) > len) || (n < 0))
!         n = (len - m);
!
!     p = VARDATA(string);
!     for (i = 0; i < m; i++)
!         p += pg_mblen(p);
!     m = p - VARDATA(string);
!     for (i = 0; i < n; i++)
!         p += pg_mblen(p);
!     n = p - (VARDATA(string) + m);

!     ret = (text *) palloc(VARHDRSZ + n);
!     VARATT_SIZEP(ret) = VARHDRSZ + n;

!     memcpy(VARDATA(ret), VARDATA(string) + m, n);

!     PG_RETURN_TEXT_P(ret);
! #endif
  }

  /*
--- 425,625 ----
   * - Thomas Lockhart 1998-12-10
   * Now uses faster TOAST-slicing interface
   * - John Gray 2002-02-22
+  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
+  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
+  * error; if E < 1, return '', not entire string). Fixed MB related bug when
+  * S > LC and < LC + 4 sometimes garbage characters are returned.
+  * - Joe Conway 2002-08-10
   */
  Datum
  text_substr(PG_FUNCTION_ARGS)
  {
!     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
!                                     PG_GETARG_INT32(1),
!                                     PG_GETARG_INT32(2),
!                                     false));
! }

! /*
!  * text_substr_no_len -
!  *      Wrapper to avoid opr_sanity failure due to
!  *      one function accepting a different number of args.
!  */
! Datum
! text_substr_no_len(PG_FUNCTION_ARGS)
! {
!     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
!                                     PG_GETARG_INT32(1),
!                                     -1, true));
! }
!
! /*
!  * text_substring -
!  *    Does the real work for text_substr() and text_substr_no_len()
!  *    This is broken out so it can be called directly by other string processing
!  *    functions.
!  */
! static text*
! text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
! {
!     int32        eml = pg_database_encoding_max_length();
!     int32        S = start;                /* start position */
!     int32        S1;                        /* adjusted start position */
!     int32        L1;                        /* adjusted substring length */
!
!     /* life is easy if the encoding max length is 1 */
!     if (eml == 1)
      {
!         S1 = Max(S, 1);

!         if (length_not_specified)    /* special case - get length to end of string */
!             L1 = -1;
!         else
!         {
!             /* end position */
!             int    E = S + length;

!             /*
!              * A negative value for L is the only way for the end position
!              * to be before the start. SQL99 says to throw an error.
!              */
!             if (E < S)
!                 elog(ERROR, "negative substring length not allowed");

!             /*
!              * A zero or negative value for the end position can happen if the start
!              * was negative or one. SQL99 says to return a zero-length string.
!              */
!             if (E < 1)
!                 return PG_STR_GET_TEXT("");
!
!             L1 = E - S1;
!         }
!
!         /*
!          * If the start position is past the end of the string,
!          * SQL99 says to return a zero-length string --
!          * PG_GETARG_TEXT_P_SLICE() will do that for us.
!          * Convert to zero-based starting position
!          */
!         return DatumGetTextPSlice(str, S1 - 1, L1);
!     }
!     else if (eml > 1)
      {
!         /*
!          * When encoding max length is > 1, we can't get LC without
!          * detoasting, so we'll grab a conservatively large slice
!          * now and go back later to do the right thing
!          */
!         int32        slice_start;
!         int32        slice_size;
!         int32        slice_strlen;
!         text        *slice;
!         int32        E1;
!         int32        i;
!         char       *p;
!         char       *s;
!         text       *ret;
!
!         /*
!          * if S is past the end of the string, the tuple toaster
!          * will return a zero-length string to us
!          */
!         S1 = Max(S, 1);
!
!         /*
!          * We need to start at position zero because there is no
!          * way to know in advance which byte offset corresponds to
!          * the supplied start position.
!          */
!         slice_start = 0;
!
!         if (length_not_specified)    /* special case - get length to end of string */
!             slice_size = L1 = -1;
          else
!         {
!             int    E = S + length;
!
!             /*
!              * A negative value for L is the only way for the end position
!              * to be before the start. SQL99 says to throw an error.
!              */
!             if (E < S)
!                 elog(ERROR, "negative substring length not allowed");

!             /*
!              * A zero or negative value for the end position can happen if the start
!              * was negative or one. SQL99 says to return a zero-length string.
!              */
!             if (E < 1)
!                 return PG_STR_GET_TEXT("");

!             /*
!              * if E is past the end of the string, the tuple toaster
!              * will truncate the length for us
!              */
!             L1 = E - S1;
!
!             /*
!              * Total slice size in bytes can't be any longer than the start
!              * position plus substring length times the encoding max length.
!              */
!             slice_size = (S1 + L1) * eml;
!         }
!         slice = DatumGetTextPSlice(str, slice_start, slice_size);

!         /* see if we got back an empty string */
!         if ((VARSIZE(slice) - VARHDRSZ) == 0)
!             return PG_STR_GET_TEXT("");

!         /* Now we can get the actual length of the slice in MB characters */
!         slice_strlen = pg_mbstrlen_with_len (VARDATA(slice), VARSIZE(slice) - VARHDRSZ);

!         /* Check that the start position wasn't > slice_strlen. If so,
!          * SQL99 says to return a zero-length string.
!          */
!         if (S1 > slice_strlen)
!             return PG_STR_GET_TEXT("");
!
!         /*
!          * Adjust L1 and E1 now that we know the slice string length.
!          * Again remember that S1 is one based, and slice_start is zero based.
!          */
!         if (L1 > -1)
!             E1 = Min(S1 + L1 , slice_start + 1 + slice_strlen);
!         else
!             E1 = slice_start + 1 + slice_strlen;
!
!         /*
!          * Find the start position in the slice;
!          * remember S1 is not zero based
!          */
!         p = VARDATA(slice);
!         for (i = 0; i < S1 - 1; i++)
!             p += pg_mblen(p);
!
!         /* hang onto a pointer to our start position */
!         s = p;
!
!         /*
!          * Count the actual bytes used by the substring of
!          * the requested length.
!          */
!         for (i = S1; i < E1; i++)
!             p += pg_mblen(p);
!
!         ret = (text *) palloc(VARHDRSZ + (p - s));
!         VARATT_SIZEP(ret) = VARHDRSZ + (p - s);
!         memcpy(VARDATA(ret), s, (p - s));
!
!         return ret;
!     }
!     else
!         elog(ERROR, "Invalid backend encoding; encoding max length "
!                     "is less than one.");
!
!     /* not reached: suppress compiler warning */
!     return PG_STR_GET_TEXT("");
  }

  /*
***************
*** 481,536 ****
  Datum
  textpos(PG_FUNCTION_ARGS)
  {
!     text       *t1 = PG_GETARG_TEXT_P(0);
!     text       *t2 = PG_GETARG_TEXT_P(1);
!     int            pos;
!     int            px,
!                 p;
!     int            len1,
                  len2;
-     pg_wchar   *p1,
-                *p2;

! #ifdef MULTIBYTE
!     pg_wchar   *ps1,
!                *ps2;
! #endif

      if (VARSIZE(t2) <= VARHDRSZ)
          PG_RETURN_INT32(1);        /* result for empty pattern */

      len1 = (VARSIZE(t1) - VARHDRSZ);
      len2 = (VARSIZE(t2) - VARHDRSZ);
! #ifdef MULTIBYTE
!     ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
!     (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
!     len1 = pg_wchar_strlen(p1);
!     ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
!     (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
!     len2 = pg_wchar_strlen(p2);
! #else
!     p1 = VARDATA(t1);
!     p2 = VARDATA(t2);
! #endif
!     pos = 0;
      px = (len1 - len2);
!     for (p = 0; p <= px; p++)
      {
! #ifdef MULTIBYTE
!         if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
! #else
!         if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
! #endif
          {
!             pos = p + 1;
!             break;
!         };
!         p1++;
!     };
! #ifdef MULTIBYTE
!     pfree(ps1);
!     pfree(ps2);
! #endif
      PG_RETURN_INT32(pos);
  }

--- 635,729 ----
  Datum
  textpos(PG_FUNCTION_ARGS)
  {
!     PG_RETURN_INT32(text_position(PG_GETARG_DATUM(0), PG_GETARG_DATUM(1), 1));
! }
!
! /*
!  * text_position -
!  *    Does the real work for textpos()
!  *    This is broken out so it can be called directly by other string processing
!  *    functions.
!  */
! static int32
! text_position(Datum str, Datum search_str, int matchnum)
! {
!     int            eml = pg_database_encoding_max_length();
!     text       *t1 = DatumGetTextP(str);
!     text       *t2 = DatumGetTextP(search_str);
!     int            match = 0,
!                 pos = 0,
!                 p = 0,
!                 px,
!                 len1,
                  len2;

!     if(matchnum == 0)
!         return 0;        /* result for 0th match */

      if (VARSIZE(t2) <= VARHDRSZ)
          PG_RETURN_INT32(1);        /* result for empty pattern */

      len1 = (VARSIZE(t1) - VARHDRSZ);
      len2 = (VARSIZE(t2) - VARHDRSZ);
!
!     /* no use in searching str past point where search_str will fit */
      px = (len1 - len2);
!
!     if (eml == 1)    /* simple case - single byte encoding */
      {
!         char   *p1,
!                *p2;
!
!         p1 = VARDATA(t1);
!         p2 = VARDATA(t2);
!
!         for (p = 0; p <= px; p++)
          {
!             if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
!             {
!                 if (++match == matchnum)
!                 {
!                     pos = p + 1;
!                     break;
!                 }
!             }
!             p1++;
!         }
!     }
!     else if (eml > 1)    /* not as simple - multibyte encoding */
!     {
!         pg_wchar   *p1,
!                    *p2,
!                    *ps1,
!                    *ps2;
!
!         ps1 = p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
!         (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t1), p1, len1);
!         len1 = pg_wchar_strlen(p1);
!         ps2 = p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
!         (void) pg_mb2wchar_with_len((unsigned char *) VARDATA(t2), p2, len2);
!         len2 = pg_wchar_strlen(p2);
!
!         for (p = 0; p <= px; p++)
!         {
!             if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
!             {
!                 if (++match == matchnum)
!                 {
!                     pos = p + 1;
!                     break;
!                 }
!             }
!             p1++;
!         }
!
!         pfree(ps1);
!         pfree(ps2);
!     }
!     else
!         elog(ERROR, "Invalid backend encoding; encoding max length "
!                     "is less than one.");
!
      PG_RETURN_INT32(pos);
  }

***************
*** 758,766 ****
  Datum
  byteaoctetlen(PG_FUNCTION_ARGS)
  {
!     bytea       *v = PG_GETARG_BYTEA_P(0);
!
!     PG_RETURN_INT32(VARSIZE(v) - VARHDRSZ);
  }

  /*
--- 951,957 ----
  Datum
  byteaoctetlen(PG_FUNCTION_ARGS)
  {
!     PG_RETURN_INT32(toast_raw_datum_size(PG_GETARG_DATUM(0)) - VARHDRSZ);
  }

  /*
***************
*** 805,810 ****
--- 996,1003 ----
      PG_RETURN_BYTEA_P(result);
  }

+ #define PG_STR_GET_BYTEA(str_) \
+     DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
  /*
   * bytea_substr()
   * Return a substring starting at the specified position.
***************
*** 813,845 ****
   * Input:
   *    - string
   *    - starting position (is one-based)
!  *    - string length
   *
   * If the starting position is zero or less, then return from the start of the string
   * adjusting the length to be consistent with the "negative start" per SQL92.
!  * If the length is less than zero, return the remaining string.
!  *
   */
  Datum
  bytea_substr(PG_FUNCTION_ARGS)
  {
!     int32        m = PG_GETARG_INT32(1);
!     int32        n = PG_GETARG_INT32(2);

!     /*
!      * starting position before the start of the string? then offset into
!      * the string per SQL92 spec...
!      */
!     if (m < 1)
      {
!         n += (m - 1);
!         m = 1;
      }

!     /* m will now become a zero-based starting position */
!     m--;

!     PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, m, n));
  }

  /*
--- 1006,1076 ----
   * Input:
   *    - string
   *    - starting position (is one-based)
!  *    - string length (optional)
   *
   * If the starting position is zero or less, then return from the start of the string
   * adjusting the length to be consistent with the "negative start" per SQL92.
!  * If the length is less than zero, an ERROR is thrown. If no third argument
!  * (length) is provided, the length to the end of the string is assumed.
   */
  Datum
  bytea_substr(PG_FUNCTION_ARGS)
  {
!     int        S = PG_GETARG_INT32(1);    /* start position */
!     int        S1;                        /* adjusted start position */
!     int        L1;                        /* adjusted substring length */

!     S1 = Max(S, 1);
!
!     if (fcinfo->nargs == 2)
!     {
!         /*
!          * Not passed a length - PG_GETARG_BYTEA_P_SLICE()
!          * grabs everything to the end of the string if we pass it
!          * a negative value for length.
!          */
!         L1 = -1;
!     }
!     else
      {
!         /* end position */
!         int    E = S + PG_GETARG_INT32(2);
!
!         /*
!          * A negative value for L is the only way for the end position
!          * to be before the start. SQL99 says to throw an error.
!          */
!         if (E < S)
!             elog(ERROR, "negative substring length not allowed");
!
!         /*
!          * A zero or negative value for the end position can happen if the start
!          * was negative or one. SQL99 says to return a zero-length string.
!          */
!         if (E < 1)
!             PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
!
!         L1 = E - S1;
      }

!     /*
!      * If the start position is past the end of the string,
!      * SQL99 says to return a zero-length string --
!      * PG_GETARG_TEXT_P_SLICE() will do that for us.
!      * Convert to zero-based starting position
!      */
!     PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE (0, S1 - 1, L1));
! }

! /*
!  * bytea_substr_no_len -
!  *      Wrapper to avoid opr_sanity failure due to
!  *      one function accepting a different number of args.
!  */
! Datum
! bytea_substr_no_len(PG_FUNCTION_ARGS)
! {
!     return bytea_substr(fcinfo);
  }

  /*
***************
*** 1422,1424 ****
--- 1653,1834 ----

      PG_RETURN_INT32(cmp);
  }
+
+ /*
+  * replace_text
+  * replace all occurences of 'old_sub_str' in 'orig_str'
+  * with 'new_sub_str' to form 'new_str'
+  *
+  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
+  * otherwise returns 'new_str'
+  */
+ Datum
+ replace_text(PG_FUNCTION_ARGS)
+ {
+     text        *left_text;
+     text        *right_text;
+     text        *buf_text;
+     text        *ret_text;
+     int            curr_posn;
+     text        *src_text = PG_GETARG_TEXT_P(0);
+     int            src_text_len = TEXTLEN(src_text);
+     text        *from_sub_text = PG_GETARG_TEXT_P(1);
+     int            from_sub_text_len = TEXTLEN(from_sub_text);
+     text        *to_sub_text = PG_GETARG_TEXT_P(2);
+     char        *to_sub_str = PG_TEXT_GET_STR(to_sub_text);
+     StringInfo    str = makeStringInfo();
+
+     if (src_text_len == 0 || from_sub_text_len == 0)
+         PG_RETURN_TEXT_P(src_text);
+
+     buf_text = TEXTDUP(src_text);
+     curr_posn = TEXTPOS(buf_text, from_sub_text);
+
+     while (curr_posn > 0)
+     {
+         left_text = LEFT(buf_text, from_sub_text);
+         right_text = RIGHT(buf_text, from_sub_text, from_sub_text_len);
+
+         appendStringInfo(str, PG_TEXT_GET_STR(left_text));
+         appendStringInfo(str, to_sub_str);
+
+         pfree(buf_text);
+         pfree(left_text);
+         buf_text = right_text;
+         curr_posn = TEXTPOS(buf_text, from_sub_text);
+     }
+
+     appendStringInfo(str, PG_TEXT_GET_STR(buf_text));
+     pfree(buf_text);
+
+     ret_text = PG_STR_GET_TEXT(str->data);
+     pfree(str->data);
+     pfree(str);
+
+     PG_RETURN_TEXT_P(ret_text);
+ }
+
+ /*
+  * split_text
+  * parse input string
+  * return ord item (1 based)
+  * based on provided field separator
+  */
+ Datum
+ split_text(PG_FUNCTION_ARGS)
+ {
+     text       *inputstring = PG_GETARG_TEXT_P(0);
+     int            inputstring_len = TEXTLEN(inputstring);
+     text       *fldsep = PG_GETARG_TEXT_P(1);
+     int            fldsep_len = TEXTLEN(fldsep);
+     int            fldnum = PG_GETARG_INT32(2);
+     int            start_posn = 0;
+     int            end_posn = 0;
+     text        *result_text;
+
+     /* return empty string for empty input string */
+     if (inputstring_len < 1)
+         PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
+
+     /* empty field separator */
+     if (fldsep_len < 1)
+     {
+         if (fldnum == 1)    /* first field - just return the input string */
+             PG_RETURN_TEXT_P(inputstring);
+         else                /* otherwise return an empty string */
+             PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
+     }
+
+     /* field number is 1 based */
+     if (fldnum < 1)
+         elog(ERROR, "field position must be > 0");
+
+     start_posn = text_position(PointerGetDatum(inputstring),
+                                 PointerGetDatum(fldsep),
+                                 fldnum - 1);
+     end_posn = text_position(PointerGetDatum(inputstring),
+                                 PointerGetDatum(fldsep),
+                                 fldnum);
+
+     if ((start_posn == 0) && (end_posn == 0))    /* fldsep not found */
+     {
+         if (fldnum == 1)    /* first field - just return the input string */
+             PG_RETURN_TEXT_P(inputstring);
+         else                /* otherwise return an empty string */
+             PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
+     }
+     else if ((start_posn != 0) && (end_posn == 0))
+     {
+         /* last field requested */
+         result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, -1, true);
+         PG_RETURN_TEXT_P(result_text);
+     }
+     else if ((start_posn == 0) && (end_posn != 0))
+     {
+         /* first field requested */
+         result_text = LEFT(inputstring, fldsep);
+         PG_RETURN_TEXT_P(result_text);
+     }
+     else
+     {
+         /* prior to last field requested */
+         result_text = text_substring(PointerGetDatum(inputstring), start_posn + fldsep_len, end_posn - start_posn -
fldsep_len,false); 
+         PG_RETURN_TEXT_P(result_text);
+     }
+ }
+
+ #define HEXBASE 16
+ /*
+  * Convert a int32 to a string containing a base 16 (hex) representation of
+  * the number.
+  */
+ Datum
+ to_hex32(PG_FUNCTION_ARGS)
+ {
+     static char        digits[] = "0123456789abcdef";
+     char            buf[32];    /* bigger than needed, but reasonable */
+     char           *ptr,
+                    *end;
+     text           *result_text;
+     int32            value = PG_GETARG_INT32(0);
+
+     end = ptr = buf + sizeof(buf) - 1;
+     *ptr = '\0';
+
+     do
+     {
+         *--ptr = digits[value % HEXBASE];
+         value /= HEXBASE;
+     } while (ptr > buf && value);
+
+     result_text = PG_STR_GET_TEXT(ptr);
+     PG_RETURN_TEXT_P(result_text);
+ }
+
+ /*
+  * Convert a int64 to a string containing a base 16 (hex) representation of
+  * the number.
+  */
+ Datum
+ to_hex64(PG_FUNCTION_ARGS)
+ {
+     static char        digits[] = "0123456789abcdef";
+     char            buf[32];    /* bigger than needed, but reasonable */
+     char            *ptr,
+                     *end;
+     text            *result_text;
+     int64            value = PG_GETARG_INT64(0);
+
+     end = ptr = buf + sizeof(buf) - 1;
+     *ptr = '\0';
+
+     do
+     {
+         *--ptr = digits[value % HEXBASE];
+         value /= HEXBASE;
+     } while (ptr > buf && value);
+
+     result_text = PG_STR_GET_TEXT(ptr);
+     PG_RETURN_TEXT_P(result_text);
+ }
+
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /opt/src/cvs/pgsql-server/src/include/catalog/pg_proc.h,v
retrieving revision 1.254
diff -c -r1.254 pg_proc.h
*** src/include/catalog/pg_proc.h    15 Aug 2002 02:51:27 -0000    1.254
--- src/include/catalog/pg_proc.h    16 Aug 2002 18:53:13 -0000
***************
*** 2121,2127 ****
  DESCR("remove initial characters from string");
  DATA(insert OID =  882 (  rtrim           PGNSP PGUID 14 f f t f i 1 25 "25"  "select rtrim($1, \' \')" - _null_ ));
  DESCR("remove trailing characters from string");
! DATA(insert OID =  883 (  substr       PGNSP PGUID 14 f f t f i 2 25 "25 23"    "select substr($1, $2, -1)" - _null_
));
  DESCR("return portion of string");
  DATA(insert OID =  884 (  btrim           PGNSP PGUID 12 f f t f i 2 25 "25 25"    btrim - _null_ ));
  DESCR("trim both ends of string");
--- 2121,2127 ----
  DESCR("remove initial characters from string");
  DATA(insert OID =  882 (  rtrim           PGNSP PGUID 14 f f t f i 1 25 "25"  "select rtrim($1, \' \')" - _null_ ));
  DESCR("remove trailing characters from string");
! DATA(insert OID =  883 (  substr       PGNSP PGUID 12 f f t f i 2 25 "25 23"    text_substr_no_len - _null_ ));
  DESCR("return portion of string");
  DATA(insert OID =  884 (  btrim           PGNSP PGUID 12 f f t f i 2 25 "25 25"    btrim - _null_ ));
  DESCR("trim both ends of string");
***************
*** 2130,2137 ****

  DATA(insert OID =  936 (  substring    PGNSP PGUID 12 f f t f i 3 25 "25 23 23"  text_substr - _null_ ));
  DESCR("return portion of string");
! DATA(insert OID =  937 (  substring    PGNSP PGUID 14 f f t f i 2 25 "25 23"    "select substring($1, $2, -1)" -
_null_)); 
  DESCR("return portion of string");

  /* for multi-byte support */

--- 2130,2145 ----

  DATA(insert OID =  936 (  substring    PGNSP PGUID 12 f f t f i 3 25 "25 23 23"  text_substr - _null_ ));
  DESCR("return portion of string");
! DATA(insert OID =  937 (  substring    PGNSP PGUID 12 f f t f i 2 25 "25 23"    text_substr_no_len - _null_ ));
  DESCR("return portion of string");
+ DATA(insert OID =  2087 ( replace      PGNSP PGUID 12 f f t f i 3 25 "25 25 25"  replace_text - _null_ ));
+ DESCR("replace all occurrences of old_substr with new_substr in string");
+ DATA(insert OID =  2088 ( split        PGNSP PGUID 12 f f t f i 3 25 "25 25 23"  split_text - _null_ ));
+ DESCR("split string by field_sep and return field_num");
+ DATA(insert OID =  2089 ( to_hex       PGNSP PGUID 12 f f t f i 1 25 "23"  to_hex32 - _null_ ));
+ DESCR("convert int32 number to hex");
+ DATA(insert OID =  2090 ( to_hex       PGNSP PGUID 12 f f t f i 1 25 "20"  to_hex64 - _null_ ));
+ DESCR("convert int64 number to hex");

  /* for multi-byte support */

***************
*** 2778,2784 ****
  DESCR("concatenate");
  DATA(insert OID = 2012 (  substring           PGNSP PGUID 12 f f t f i 3 17 "17 23 23"  bytea_substr - _null_ ));
  DESCR("return portion of string");
! DATA(insert OID = 2013 (  substring           PGNSP PGUID 14 f f t f i 2 17 "17 23"    "select substring($1, $2, -1)"
-_null_ )); 
  DESCR("return portion of string");
  DATA(insert OID = 2014 (  position           PGNSP PGUID 12 f f t f i 2 23 "17 17"    byteapos - _null_ ));
  DESCR("return position of substring");
--- 2786,2796 ----
  DESCR("concatenate");
  DATA(insert OID = 2012 (  substring           PGNSP PGUID 12 f f t f i 3 17 "17 23 23"  bytea_substr - _null_ ));
  DESCR("return portion of string");
! DATA(insert OID = 2013 (  substring           PGNSP PGUID 12 f f t f i 2 17 "17 23"    bytea_substr_no_len - _null_
));
! DESCR("return portion of string");
! DATA(insert OID = 2085 (  substr           PGNSP PGUID 12 f f t f i 3 17 "17 23 23"  bytea_substr - _null_ ));
! DESCR("return portion of string");
! DATA(insert OID = 2086 (  substr           PGNSP PGUID 12 f f t f i 2 17 "17 23"    bytea_substr_no_len - _null_ ));
  DESCR("return portion of string");
  DATA(insert OID = 2014 (  position           PGNSP PGUID 12 f f t f i 2 23 "17 17"    byteapos - _null_ ));
  DESCR("return position of substring");
Index: src/include/utils/builtins.h
===================================================================
RCS file: /opt/src/cvs/pgsql-server/src/include/utils/builtins.h,v
retrieving revision 1.191
diff -c -r1.191 builtins.h
*** src/include/utils/builtins.h    15 Aug 2002 02:51:27 -0000    1.191
--- src/include/utils/builtins.h    16 Aug 2002 18:53:13 -0000
***************
*** 447,458 ****
--- 447,463 ----
  extern Datum textoctetlen(PG_FUNCTION_ARGS);
  extern Datum textpos(PG_FUNCTION_ARGS);
  extern Datum text_substr(PG_FUNCTION_ARGS);
+ extern Datum text_substr_no_len(PG_FUNCTION_ARGS);
  extern Datum name_text(PG_FUNCTION_ARGS);
  extern Datum text_name(PG_FUNCTION_ARGS);
  extern int    varstr_cmp(char *arg1, int len1, char *arg2, int len2);
  extern List *textToQualifiedNameList(text *textval, const char *caller);
  extern bool SplitIdentifierString(char *rawstring, char separator,
                                    List **namelist);
+ extern Datum replace_text(PG_FUNCTION_ARGS);
+ extern Datum split_text(PG_FUNCTION_ARGS);
+ extern Datum to_hex32(PG_FUNCTION_ARGS);
+ extern Datum to_hex64(PG_FUNCTION_ARGS);

  extern Datum unknownin(PG_FUNCTION_ARGS);
  extern Datum unknownout(PG_FUNCTION_ARGS);
***************
*** 476,481 ****
--- 481,487 ----
  extern Datum byteacat(PG_FUNCTION_ARGS);
  extern Datum byteapos(PG_FUNCTION_ARGS);
  extern Datum bytea_substr(PG_FUNCTION_ARGS);
+ extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS);

  /* version.c */
  extern Datum pgsql_version(PG_FUNCTION_ARGS);
Index: src/test/regress/expected/strings.out
===================================================================
RCS file: /opt/src/cvs/pgsql-server/src/test/regress/expected/strings.out,v
retrieving revision 1.12
diff -c -r1.12 strings.out
*** src/test/regress/expected/strings.out    11 Jun 2002 15:41:38 -0000    1.12
--- src/test/regress/expected/strings.out    16 Aug 2002 18:53:13 -0000
***************
*** 573,575 ****
--- 573,738 ----
   text and varchar
  (1 row)

+ --
+ -- test substr with toasted text values
+ --
+ CREATE TABLE toasttest(f1 text);
+ insert into toasttest values(repeat('1234567890',10000));
+ insert into toasttest values(repeat('1234567890',10000));
+ -- If the starting position is zero or less, then return from the start of the string
+ -- adjusting the length to be consistent with the "negative start" per SQL92.
+ SELECT substr(f1, -1, 5) from toasttest;
+  substr
+ --------
+  123
+  123
+ (2 rows)
+
+ -- If the length is less than zero, an ERROR is thrown.
+ SELECT substr(f1, 5, -1) from toasttest;
+ ERROR:  negative substring length not allowed
+ -- If no third argument (length) is provided, the length to the end of the
+ -- string is assumed.
+ SELECT substr(f1, 99995) from toasttest;
+  substr
+ --------
+  567890
+  567890
+ (2 rows)
+
+ -- If start plus length is > string length, the result is truncated to
+ -- string length
+ SELECT substr(f1, 99995, 10) from toasttest;
+  substr
+ --------
+  567890
+  567890
+ (2 rows)
+
+ DROP TABLE toasttest;
+ --
+ -- test substr with toasted bytea values
+ --
+ CREATE TABLE toasttest(f1 bytea);
+ insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
+ insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
+ -- If the starting position is zero or less, then return from the start of the string
+ -- adjusting the length to be consistent with the "negative start" per SQL92.
+ SELECT substr(f1, -1, 5) from toasttest;
+  substr
+ --------
+  123
+  123
+ (2 rows)
+
+ -- If the length is less than zero, an ERROR is thrown.
+ SELECT substr(f1, 5, -1) from toasttest;
+ ERROR:  negative substring length not allowed
+ -- If no third argument (length) is provided, the length to the end of the
+ -- string is assumed.
+ SELECT substr(f1, 99995) from toasttest;
+  substr
+ --------
+  567890
+  567890
+ (2 rows)
+
+ -- If start plus length is > string length, the result is truncated to
+ -- string length
+ SELECT substr(f1, 99995, 10) from toasttest;
+  substr
+ --------
+  567890
+  567890
+ (2 rows)
+
+ DROP TABLE toasttest;
+ --
+ -- test length
+ --
+ SELECT length('abcdef') AS "length_6";
+  length_6
+ ----------
+         6
+ (1 row)
+
+ --
+ -- test strpos
+ --
+ SELECT strpos('abcdef', 'cd') AS "pos_3";
+  pos_3
+ -------
+      3
+ (1 row)
+
+ SELECT strpos('abcdef', 'xy') AS "pos_0";
+  pos_0
+ -------
+      0
+ (1 row)
+
+ --
+ -- test replace
+ --
+ SELECT replace('abcdef', 'de', '45') AS "abc45f";
+  abc45f
+ --------
+  abc45f
+ (1 row)
+
+ SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo";
+  ya123da123doo
+ ---------------
+  ya123da123doo
+ (1 row)
+
+ SELECT replace('yabadoo', 'bad', '') AS "yaoo";
+  yaoo
+ ------
+  yaoo
+ (1 row)
+
+ --
+ -- test split
+ --
+ select split('joeuser@mydatabase','@',0) AS "an error";
+ ERROR:  field position must be > 0
+ select split('joeuser@mydatabase','@',1) AS "joeuser";
+  joeuser
+ ---------
+  joeuser
+ (1 row)
+
+ select split('joeuser@mydatabase','@',2) AS "mydatabase";
+  mydatabase
+ ------------
+  mydatabase
+ (1 row)
+
+ select split('joeuser@mydatabase','@',3) AS "empty string";
+  empty string
+ --------------
+
+ (1 row)
+
+ select split('@joeuser@mydatabase@','@',2) AS "joeuser";
+  joeuser
+ ---------
+  joeuser
+ (1 row)
+
+ --
+ -- test to_hex
+ --
+ select to_hex(256*256*256 - 1) AS "ffffff";
+  ffffff
+ --------
+  ffffff
+ (1 row)
+
+ select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff";
+  ffffffff
+ ----------
+  ffffffff
+ (1 row)
+
Index: src/test/regress/sql/strings.sql
===================================================================
RCS file: /opt/src/cvs/pgsql-server/src/test/regress/sql/strings.sql,v
retrieving revision 1.8
diff -c -r1.8 strings.sql
*** src/test/regress/sql/strings.sql    11 Jun 2002 15:41:38 -0000    1.8
--- src/test/regress/sql/strings.sql    16 Aug 2002 18:53:13 -0000
***************
*** 197,199 ****
--- 197,292 ----
  SELECT text 'text' || char(20) ' and characters' AS "Concat text to char";

  SELECT text 'text' || varchar ' and varchar' AS "Concat text to varchar";
+
+ --
+ -- test substr with toasted text values
+ --
+ CREATE TABLE toasttest(f1 text);
+
+ insert into toasttest values(repeat('1234567890',10000));
+ insert into toasttest values(repeat('1234567890',10000));
+
+ -- If the starting position is zero or less, then return from the start of the string
+ -- adjusting the length to be consistent with the "negative start" per SQL92.
+ SELECT substr(f1, -1, 5) from toasttest;
+
+ -- If the length is less than zero, an ERROR is thrown.
+ SELECT substr(f1, 5, -1) from toasttest;
+
+ -- If no third argument (length) is provided, the length to the end of the
+ -- string is assumed.
+ SELECT substr(f1, 99995) from toasttest;
+
+ -- If start plus length is > string length, the result is truncated to
+ -- string length
+ SELECT substr(f1, 99995, 10) from toasttest;
+
+ DROP TABLE toasttest;
+
+ --
+ -- test substr with toasted bytea values
+ --
+ CREATE TABLE toasttest(f1 bytea);
+
+ insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
+ insert into toasttest values(decode(repeat('1234567890',10000),'escape'));
+
+ -- If the starting position is zero or less, then return from the start of the string
+ -- adjusting the length to be consistent with the "negative start" per SQL92.
+ SELECT substr(f1, -1, 5) from toasttest;
+
+ -- If the length is less than zero, an ERROR is thrown.
+ SELECT substr(f1, 5, -1) from toasttest;
+
+ -- If no third argument (length) is provided, the length to the end of the
+ -- string is assumed.
+ SELECT substr(f1, 99995) from toasttest;
+
+ -- If start plus length is > string length, the result is truncated to
+ -- string length
+ SELECT substr(f1, 99995, 10) from toasttest;
+
+ DROP TABLE toasttest;
+
+ --
+ -- test length
+ --
+
+ SELECT length('abcdef') AS "length_6";
+
+ --
+ -- test strpos
+ --
+
+ SELECT strpos('abcdef', 'cd') AS "pos_3";
+
+ SELECT strpos('abcdef', 'xy') AS "pos_0";
+
+ --
+ -- test replace
+ --
+ SELECT replace('abcdef', 'de', '45') AS "abc45f";
+
+ SELECT replace('yabadabadoo', 'ba', '123') AS "ya123da123doo";
+
+ SELECT replace('yabadoo', 'bad', '') AS "yaoo";
+
+ --
+ -- test split
+ --
+ select split('joeuser@mydatabase','@',0) AS "an error";
+
+ select split('joeuser@mydatabase','@',1) AS "joeuser";
+
+ select split('joeuser@mydatabase','@',2) AS "mydatabase";
+
+ select split('joeuser@mydatabase','@',3) AS "empty string";
+
+ select split('@joeuser@mydatabase@','@',2) AS "joeuser";
+
+ --
+ -- test to_hex
+ --
+ select to_hex(256*256*256 - 1) AS "ffffff";
+
+ select to_hex(256::bigint*256::bigint*256::bigint*256::bigint - 1) AS "ffffffff";

В списке pgsql-patches по дате отправления:

Предыдущее
От: Bruce Momjian
Дата:
Сообщение: Re: Foreign key in pg_dump / psql
Следующее
От: Tom Lane
Дата:
Сообщение: Re: Foreign key in pg_dump / psql