Re: PATCH: Add uri percent-encoding for binary data

Поиск
Список
Период
Сортировка
От Bruce Momjian
Тема Re: PATCH: Add uri percent-encoding for binary data
Дата
Msg-id 20191007195241.GJ4732@momjian.us
обсуждение исходный текст
Ответ на PATCH: Add uri percent-encoding for binary data  (Anders Åstrand <anders@449.se>)
Ответы Re: PATCH: Add uri percent-encoding for binary data  (Anders Åstrand <anders@449.se>)
Список pgsql-hackers
On Mon, Oct  7, 2019 at 09:14:38AM +0200, Anders Åstrand wrote:
> Hello
> 
> Attached is a patch for adding uri as an encoding option for
> encode/decode. It uses what's called "percent-encoding" in rfc3986
> (https://tools.ietf.org/html/rfc3986#section-2.1).

Oh, that's a cool idea.  Can you add it to the commit-fest?

    https://commitfest.postgresql.org/25/

---------------------------------------------------------------------------


> 
> The background for this patch is that I could easily build urls in
> plpgsql, but doing the actual encoding of the url parts is painfully
> slow. The list of available encodings for encode/decode looks quite
> arbitrary to me, so I can't see any reason this one couldn't be in
> there.
> 
> In modern web scenarios one would probably most likely want to encode
> the utf8 representation of a text string for inclusion in a url, in
> which case correct invocation would be ENCODE(CONVERT_TO('some text in
> database encoding goes here', 'UTF8'), 'uri'), but uri
> percent-encoding can of course also be used for other text encodings
> and arbitrary binary data.
> 
> Regards,
> Anders

> diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
> index 7293d66de5..33cf7bb57c 100644
> --- a/src/backend/utils/adt/encode.c
> +++ b/src/backend/utils/adt/encode.c
> @@ -512,6 +512,131 @@ esc_dec_len(const char *src, unsigned srclen)
>      return len;
>  }
>  
> +/*
> + * URI percent encoding
> + *
> + * Percent encodes all byte values except the unreserved ASCII characters as per RFC3986.
> + */
> +
> +static const char upper_hex_digits[] = "0123456789ABCDEF";
> +
> +static unsigned
> +uri_encode(const char *src, unsigned srclen, char *dst)
> +{
> +    char        *d = dst;
> +
> +    for (const char *s = src; s < src + srclen; s++)
> +    {
> +        if ((*s >= 'A' && *s <= 'Z') ||
> +            (*s >= 'a' && *s <= 'z') ||
> +            (*s >= '0' && *s <= '9') ||
> +            *s == '-' ||
> +            *s == '_' ||
> +            *s == '.' ||
> +            *s == '~')
> +        {
> +            *d++ = *s;
> +        }
> +        else
> +        {
> +            *d++ = '%';
> +            *d++ = upper_hex_digits[(*s >> 4) & 0xF];
> +            *d++ = upper_hex_digits[*s & 0xF];
> +        }
> +    }
> +    return d - dst;
> +}
> +
> +static unsigned
> +uri_decode(const char *src, unsigned srclen, char *dst)
> +{
> +    const char *s = src;
> +    const char *srcend = src + srclen;
> +    char        *d = dst;
> +    char        val;
> +
> +    while (s < srcend)
> +    {
> +        if (*s == '%')
> +        {
> +            if (s > srcend - 3) {
> +                /* This will never get triggered since uri_dec_len already takes care of validation
> +                 */
> +                ereport(ERROR,
> +                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
> +                         errmsg("invalid uri percent encoding"),
> +                         errhint("Input data ends prematurely.")));
> +            }
> +
> +            /* Skip '%' */
> +            s++;
> +
> +            val = get_hex(*s++) << 4;
> +            val += get_hex(*s++);
> +            *d++ = val;
> +        }
> +        else
> +        {
> +            *d++ = *s++;
> +        }
> +    }
> +    return d - dst;
> +}
> +
> +static unsigned
> +uri_enc_len(const char *src, unsigned srclen)
> +{
> +    int            len = 0;
> +
> +    for (const char *s = src; s < src + srclen; s++)
> +    {
> +        if ((*s >= 'A' && *s <= 'Z') ||
> +            (*s >= 'a' && *s <= 'z') ||
> +            (*s >= '0' && *s <= '9') ||
> +            *s == '-' ||
> +            *s == '_' ||
> +            *s == '.' ||
> +            *s == '~')
> +        {
> +            len++;
> +        }
> +        else
> +        {
> +            len += 3;
> +        }
> +    }
> +    return len;
> +}
> +
> +static unsigned
> +uri_dec_len(const char *src, unsigned srclen)
> +{
> +    const char *s = src;
> +    const char *srcend = src + srclen;
> +    int            len = 0;
> +
> +    while (s < srcend)
> +    {
> +        if (*s == '%')
> +        {
> +            if (s > srcend - 3) {
> +                ereport(ERROR,
> +                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
> +                         errmsg("invalid uri percent encoding"),
> +                         errhint("Input data ends prematurely.")));
> +            }
> +            s++;
> +            get_hex(*s++);
> +            get_hex(*s++);
> +        }
> +        else {
> +            s++;
> +        }
> +        len++;
> +    }
> +    return len;
> +}
> +
>  /*
>   * Common
>   */
> @@ -541,6 +666,12 @@ static const struct
>              esc_enc_len, esc_dec_len, esc_encode, esc_decode
>          }
>      },
> +    {
> +        "uri",
> +        {
> +            uri_enc_len, uri_dec_len, uri_encode, uri_decode
> +        }
> +    },
>      {
>          NULL,
>          {
> diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
> index 2483966576..f89c5ec1c3 100644
> --- a/src/test/regress/expected/strings.out
> +++ b/src/test/regress/expected/strings.out
> @@ -1870,3 +1870,24 @@ SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5
>   Th\000o\x02\x03
>  (1 row)
>  
> +SET bytea_output TO hex;
> +SELECT encode(E'en\\300\\336d'::bytea, 'uri');
> +  encode   
> +-----------
> + en%C0%DEd
> +(1 row)
> +
> +SELECT decode('%De%c0%DEd', 'uri');
> +   decode   
> +------------
> + \xdec0de64
> +(1 row)
> +
> +SELECT decode('error%Ex', 'uri');
> +ERROR:  invalid hexadecimal digit: "x"
> +SELECT decode('error%E', 'uri');
> +ERROR:  invalid uri percent encoding
> +HINT:  Input data ends prematurely.
> +SELECT decode('error%', 'uri');
> +ERROR:  invalid uri percent encoding
> +HINT:  Input data ends prematurely.
> diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
> index b5e75c344f..1d03836b6e 100644
> --- a/src/test/regress/sql/strings.sql
> +++ b/src/test/regress/sql/strings.sql
> @@ -641,3 +641,10 @@ SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea);
>  SELECT encode(overlay(E'Th\\000omas'::bytea placing E'Th\\001omas'::bytea from 2),'escape');
>  SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 8),'escape');
>  SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 for 3),'escape');
> +
> +SET bytea_output TO hex;
> +SELECT encode(E'en\\300\\336d'::bytea, 'uri');
> +SELECT decode('%De%c0%DEd', 'uri');
> +SELECT decode('error%Ex', 'uri');
> +SELECT decode('error%E', 'uri');
> +SELECT decode('error%', 'uri');


-- 
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

+ As you are, so once was I.  As I am, so you will be. +
+                      Ancient Roman grave inscription +



В списке pgsql-hackers по дате отправления:

Предыдущее
От: Robert Haas
Дата:
Сообщение: Re: Transparent Data Encryption (TDE) and encrypted files
Следующее
От: Robert Haas
Дата:
Сообщение: Re: Missed check for too-many-children in bgworker spawning