Обсуждение: Cleaning up array_in()

Поиск

Список

Период

Сортировка

Cleaning up array_in()

От

Tom Lane

Дата:

02 мая 2023 г., 18:41:27

This is in response to Alexander's observation at [1], but I'm
starting a fresh thread to keep this patch separate from the plperl
fixes in the cfbot's eyes.

Alexander Lakhin <exclusion@gmail.com> writes:
> I continue watching the array handling bugs dancing Sirtaki too. Now it's
> another asymmetry:
> select '{{1},{{2}}}'::int[];
>   {{{1}},{{2}}}
> but:
> select '{{{1}},{2}}'::int[];
>   {}

Bleah.  Both of those should be rejected, for sure, but it's the same
situation as in the PLs: we weren't doing anything to enforce that all
the scalar elements appear at the same nesting depth.

I spent some time examining array_in(), and was pretty disheartened
by what a mess it is.  It looks like back in the dim mists of the
Berkeley era, there was an intentional attempt to allow
non-rectangular array input, with the missing elements automatically
filled out as NULLs.  Since that was undocumented, we concluded it was
a bug and plastered on some code to check for rectangularity of the
input.  I don't quibble with enforcing rectangularity, but the
underlying logic should have been simplified while we were at it.
The element-counting logic was basically magic (why is it okay to
increment temp[ndim - 1] when the current nest_level might be
different from that?) and the extra layers of checks didn't make it
any more intelligible.  Plus, ReadArrayStr was expending far more
cycles than it needs to given the assumption of rectangularity.

So, here's a rewrite.

Although I view this as a bug fix, AFAICT the only effects are to
accept input that should be rejected.  So again I don't advocate
back-patching.  But should we sneak it into v16, or wait for v17?

            regards, tom lane

[1] https://www.postgresql.org/message-id/9cd163da-d096-7e9e-28f6-f3620962a660%40gmail.com

From 6a9fe8117e1b91958111c679d02a2bd7944fae22 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 1 May 2023 18:31:40 -0400
Subject: [PATCH v1 1/2] Simplify and speed up ReadArrayStr().

ReadArrayStr() seems to have been written on the assumption that
non-rectangular input is fine and it should pad with NULLs anywhere
that elements are missing.  We disallowed non-rectangular input
ages ago (commit 0e13d627b), but never simplified this function
as a follow-up.  In particular, the existing code recomputes each
element's linear location from scratch, which is quite unnecessary
for rectangular input: we can just assign the elements sequentially,
saving lots of arithmetic.  Add some more commentary while at it.

(This leaves ArrayGetOffset0() unused, but I'm unsure whether to
remove that.)
---
 src/backend/utils/adt/arrayfuncs.c | 69 ++++++++++++++----------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 87c987fb27..39b5efc661 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -93,7 +93,7 @@ static bool array_isspace(char ch);
 static int    ArrayCount(const char *str, int *dim, char typdelim,
                        Node *escontext);
 static bool ReadArrayStr(char *arrayStr, const char *origStr,
-                         int nitems, int ndim, int *dim,
+                         int nitems,
                          FmgrInfo *inputproc, Oid typioparam, int32 typmod,
                          char typdelim,
                          int typlen, bool typbyval, char typalign,
@@ -391,7 +391,7 @@ array_in(PG_FUNCTION_ARGS)
     dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
     nullsPtr = (bool *) palloc(nitems * sizeof(bool));
     if (!ReadArrayStr(p, string,
-                      nitems, ndim, dim,
+                      nitems,
                       &my_extra->proc, typioparam, typmod,
                       typdelim,
                       typlen, typbyval, typalign,
@@ -457,7 +457,8 @@ array_isspace(char ch)

 /*
  * ArrayCount
- *     Determines the dimensions for an array string.
+ *     Determines the dimensions for an array string.  This includes
+ *     syntax-checking the array structure decoration (braces and delimiters).
  *
  * Returns number of dimensions as function result.  The axis lengths are
  * returned in dim[], which must be of size MAXDIM.
@@ -704,16 +705,14 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 /*
  * ReadArrayStr :
  *     parses the array string pointed to by "arrayStr" and converts the values
- *     to internal format.  Unspecified elements are initialized to nulls.
- *     The array dimensions must already have been determined.
+ *     to internal format.  The array dimensions must have been determined,
+ *     and the case of an empty array must have been handled earlier.
  *
  * Inputs:
  *    arrayStr: the string to parse.
  *              CAUTION: the contents of "arrayStr" will be modified!
  *    origStr: the unmodified input string, used only in error messages.
  *    nitems: total number of array elements, as already determined.
- *    ndim: number of array dimensions
- *    dim[]: array axis lengths
  *    inputproc: type-specific input procedure for element datatype.
  *    typioparam, typmod: auxiliary values to pass to inputproc.
  *    typdelim: the value delimiter (type-specific).
@@ -738,8 +737,6 @@ static bool
 ReadArrayStr(char *arrayStr,
              const char *origStr,
              int nitems,
-             int ndim,
-             int *dim,
              FmgrInfo *inputproc,
              Oid typioparam,
              int32 typmod,
@@ -753,20 +750,13 @@ ReadArrayStr(char *arrayStr,
              int32 *nbytes,
              Node *escontext)
 {
-    int            i,
+    int            i = 0,
                 nest_level = 0;
     char       *srcptr;
     bool        in_quotes = false;
     bool        eoArray = false;
     bool        hasnull;
     int32        totbytes;
-    int            indx[MAXDIM] = {0},
-                prod[MAXDIM];
-
-    mda_get_prod(ndim, dim, prod);
-
-    /* Initialize is-null markers to true */
-    memset(nulls, true, nitems * sizeof(bool));

     /*
      * We have to remove " and \ characters to create a clean item value to
@@ -789,11 +779,20 @@ ReadArrayStr(char *arrayStr,
         bool        itemdone = false;
         bool        leadingspace = true;
         bool        hasquoting = false;
-        char       *itemstart;
-        char       *dstptr;
-        char       *dstendptr;
+        char       *itemstart;    /* start of de-escaped text */
+        char       *dstptr;        /* next output point for de-escaped text */
+        char       *dstendptr;    /* last significant output char + 1 */

-        i = -1;
+        /*
+         * Parse next array element, collecting the de-escaped text into
+         * itemstart..dstendptr-1.
+         *
+         * Notice that we do not set "itemdone" until we see a separator
+         * (typdelim character) or the array's final right brace.  Since the
+         * array is already verified to be nonempty and rectangular, there is
+         * guaranteed to be another element to be processed in the first case,
+         * while in the second case of course we'll exit the outer loop.
+         */
         itemstart = dstptr = dstendptr = srcptr;

         while (!itemdone)
@@ -840,13 +839,7 @@ ReadArrayStr(char *arrayStr,
                 case '{':
                     if (!in_quotes)
                     {
-                        if (nest_level >= ndim)
-                            ereturn(escontext, false,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"",
-                                            origStr)));
                         nest_level++;
-                        indx[nest_level - 1] = 0;
                         srcptr++;
                     }
                     else
@@ -860,14 +853,9 @@ ReadArrayStr(char *arrayStr,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"",
                                             origStr)));
-                        if (i == -1)
-                            i = ArrayGetOffset0(ndim, indx, prod);
-                        indx[nest_level - 1] = 0;
                         nest_level--;
                         if (nest_level == 0)
                             eoArray = itemdone = true;
-                        else
-                            indx[nest_level - 1]++;
                         srcptr++;
                     }
                     else
@@ -878,10 +866,7 @@ ReadArrayStr(char *arrayStr,
                         *dstptr++ = *srcptr++;
                     else if (*srcptr == typdelim)
                     {
-                        if (i == -1)
-                            i = ArrayGetOffset0(ndim, indx, prod);
                         itemdone = true;
-                        indx[ndim - 1]++;
                         srcptr++;
                     }
                     else if (array_isspace(*srcptr))
@@ -905,15 +890,18 @@ ReadArrayStr(char *arrayStr,
             }
         }

+        /* Terminate de-escaped string */
         Assert(dstptr < srcptr);
         *dstendptr = '\0';

-        if (i < 0 || i >= nitems)
+        /* Safety check that we don't write past the output arrays */
+        if (i >= nitems)
             ereturn(escontext, false,
                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                      errmsg("malformed array literal: \"%s\"",
                             origStr)));

+        /* Convert the de-escaped string into the next output array entries */
         if (Array_nulls && !hasquoting &&
             pg_strcasecmp(itemstart, "NULL") == 0)
         {
@@ -934,8 +922,17 @@ ReadArrayStr(char *arrayStr,
                 return false;
             nulls[i] = false;
         }
+
+        i++;
     }

+    /* Cross-check that we filled all the output array entries */
+    if (i != nitems)
+        ereturn(escontext, false,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("malformed array literal: \"%s\"",
+                        origStr)));
+
     /*
      * Check for nulls, compute total data space needed
      */
--
2.31.1

From cf15943f7438306559719407126ce52958a2c061 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 2 May 2023 11:23:31 -0400
Subject: [PATCH v1 2/2] Rewrite ArrayCount() to make dimensionality checks
 more complete.

ArrayCount has had a few layers of revision over the years,
reaching a point where it's really quite baroque and unintelligible.
There are actually four different arrays holding dimensions, which
can be reduced to two with little effort.  It was also close to
impossible to figure out why the dimension-counting actually worked
correctly.  Rewrite to make it perhaps a little less opaque.
In particular, a new element or subarray is now counted when we see
the start of the element or subarray, not at some later point where
we might not even be at the same nest_level anymore.

Also, install guards to catch non-rectangularity cases that were
previously missed, along the same lines as recent plperl and plpython
fixes: we were not checking that all scalar elements appear at the
same nesting depth.  Thanks to the very different logic, this seems
not to have caused any of the sort of internal errors that the PLs
suffered from, but it's still wrong.

On the other hand, the behavior of plperl and plpython suggests
that we should allow empty sub-arrays, as in '{{},{}}'; those
languages interpret equivalent constructs such as '[[],[]]' as
an empty (hence zero-dimensional) array, and it seems unclear
why array_in should not.  Remove the hack that was installed to
reject that case.

In passing, remove ArrayParseState.ARRAY_ELEM_COMPLETED, a state
that the state machine never entered yet wasted cycles checking for.

(I reindented the code for consistency, but it's probably easiest
to review this patch by examining "git diff -b" output.)
---
 src/backend/utils/adt/arrayfuncs.c   | 367 +++++++++++++++------------
 src/test/regress/expected/arrays.out |  41 ++-
 src/test/regress/sql/arrays.sql      |   8 +-
 3 files changed, 253 insertions(+), 163 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 39b5efc661..2a8f0c048a 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -57,7 +57,6 @@ typedef enum
     ARRAY_NO_LEVEL,
     ARRAY_LEVEL_STARTED,
     ARRAY_ELEM_STARTED,
-    ARRAY_ELEM_COMPLETED,
     ARRAY_QUOTED_ELEM_STARTED,
     ARRAY_QUOTED_ELEM_COMPLETED,
     ARRAY_ELEM_DELIMITED,
@@ -472,213 +471,270 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
     int            nest_level = 0,
                 i;
     int            ndim = 1,
-                temp[MAXDIM],
-                nelems[MAXDIM],
-                nelems_last[MAXDIM];
+                nelems[MAXDIM];
+    bool        ndim_frozen = false;
     bool        in_quotes = false;
     bool        eoArray = false;
     bool        empty_array = true;
     const char *ptr;
     ArrayParseState parse_state = ARRAY_NO_LEVEL;

+    /* Initialize dim[] entries to -1 meaning "unknown" */
     for (i = 0; i < MAXDIM; ++i)
-    {
-        temp[i] = dim[i] = nelems_last[i] = 0;
-        nelems[i] = 1;
-    }
+        dim[i] = -1;

+    /* Scan string until we reach closing brace */
     ptr = str;
     while (!eoArray)
     {
-        bool        itemdone = false;
+        bool        new_element = false;

-        while (!itemdone)
+        switch (*ptr)
         {
-            if (parse_state == ARRAY_ELEM_STARTED ||
-                parse_state == ARRAY_QUOTED_ELEM_STARTED)
-                empty_array = false;
+            case '\0':
+                /* Signal a premature end of the string */
+                ereturn(escontext, -1,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", str),
+                         errdetail("Unexpected end of input.")));
+            case '\\':

-            switch (*ptr)
-            {
-                case '\0':
-                    /* Signal a premature end of the string */
+                /*
+                 * An escape must be after a level start, after an element
+                 * start, or after an element delimiter. In any case we now
+                 * must be past an element start.
+                 */
+                switch (parse_state)
+                {
+                    case ARRAY_LEVEL_STARTED:
+                    case ARRAY_ELEM_DELIMITED:
+                        /* start new unquoted element */
+                        parse_state = ARRAY_ELEM_STARTED;
+                        new_element = true;
+                        break;
+                    case ARRAY_ELEM_STARTED:
+                    case ARRAY_QUOTED_ELEM_STARTED:
+                        /* already in element */
+                        break;
+                    default:
+                        ereturn(escontext, -1,
+                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str),
+                                 errdetail("Unexpected \"%c\" character.",
+                                           '\\')));
+                }
+                /* skip the escaped character */
+                if (*(ptr + 1))
+                    ptr++;
+                else
                     ereturn(escontext, -1,
                             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                              errmsg("malformed array literal: \"%s\"", str),
                              errdetail("Unexpected end of input.")));
-                case '\\':
+                break;
+            case '"':

+                /*
+                 * A quote must be after a level start, after a quoted element
+                 * start, or after an element delimiter. In any case we now
+                 * must be past an element start.
+                 */
+                switch (parse_state)
+                {
+                    case ARRAY_LEVEL_STARTED:
+                    case ARRAY_ELEM_DELIMITED:
+                        /* start new quoted element */
+                        Assert(!in_quotes);
+                        in_quotes = true;
+                        parse_state = ARRAY_QUOTED_ELEM_STARTED;
+                        new_element = true;
+                        break;
+                    case ARRAY_QUOTED_ELEM_STARTED:
+                        /* already in element, end it */
+                        Assert(in_quotes);
+                        in_quotes = false;
+                        parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
+                        break;
+                    default:
+                        ereturn(escontext, -1,
+                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str),
+                                 errdetail("Unexpected array element.")));
+                }
+                break;
+            case '{':
+                if (!in_quotes)
+                {
                     /*
-                     * An escape must be after a level start, after an element
-                     * start, or after an element delimiter. In any case we
-                     * now must be past an element start.
+                     * A left brace can occur if no nesting has occurred yet,
+                     * after a level start, or after a level delimiter.
                      */
-                    if (parse_state != ARRAY_LEVEL_STARTED &&
-                        parse_state != ARRAY_ELEM_STARTED &&
-                        parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                        parse_state != ARRAY_ELEM_DELIMITED)
+                    if (parse_state != ARRAY_NO_LEVEL &&
+                        parse_state != ARRAY_LEVEL_STARTED &&
+                        parse_state != ARRAY_LEVEL_DELIMITED)
                         ereturn(escontext, -1,
                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                  errmsg("malformed array literal: \"%s\"", str),
                                  errdetail("Unexpected \"%c\" character.",
-                                           '\\')));
-                    if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
-                        parse_state = ARRAY_ELEM_STARTED;
-                    /* skip the escaped character */
-                    if (*(ptr + 1))
-                        ptr++;
-                    else
+                                           '{')));
+                    parse_state = ARRAY_LEVEL_STARTED;
+                    /* Nested sub-arrays count as elements of outer level */
+                    if (nest_level > 0)
+                        nelems[nest_level - 1]++;
+                    /* Initialize element counting in the new level */
+                    if (nest_level >= MAXDIM)
                         ereturn(escontext, -1,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"", str),
-                                 errdetail("Unexpected end of input.")));
-                    break;
-                case '"':
-
+                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+                                        nest_level + 1, MAXDIM)));
+                    nelems[nest_level] = 0;
+                    nest_level++;
+                    if (ndim < nest_level)
+                    {
+                        /* Can't increase ndim once it's frozen */
+                        if (ndim_frozen)
+                            ereturn(escontext, -1,
+                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                     errmsg("malformed array literal: \"%s\"", str),
+                                     errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
+                        ndim = nest_level;
+                    }
+                }
+                break;
+            case '}':
+                if (!in_quotes)
+                {
                     /*
-                     * A quote must be after a level start, after a quoted
-                     * element start, or after an element delimiter. In any
-                     * case we now must be past an element start.
+                     * A right brace can occur after an element start, an
+                     * element completion, a quoted element completion, or a
+                     * level completion.  We also allow it after a level
+                     * start, that is an empty sub-array "{}" --- but that
+                     * freezes the number of dimensions and all such
+                     * sub-arrays must be at the same level, just like
+                     * sub-arrays containing elements.
                      */
-                    if (parse_state != ARRAY_LEVEL_STARTED &&
-                        parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                        parse_state != ARRAY_ELEM_DELIMITED)
-                        ereturn(escontext, -1,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"", str),
-                                 errdetail("Unexpected array element.")));
-                    in_quotes = !in_quotes;
-                    if (in_quotes)
-                        parse_state = ARRAY_QUOTED_ELEM_STARTED;
-                    else
-                        parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
-                    break;
-                case '{':
-                    if (!in_quotes)
+                    switch (parse_state)
                     {
-                        /*
-                         * A left brace can occur if no nesting has occurred
-                         * yet, after a level start, or after a level
-                         * delimiter.
-                         */
-                        if (parse_state != ARRAY_NO_LEVEL &&
-                            parse_state != ARRAY_LEVEL_STARTED &&
-                            parse_state != ARRAY_LEVEL_DELIMITED)
+                        case ARRAY_ELEM_STARTED:
+                        case ARRAY_QUOTED_ELEM_COMPLETED:
+                        case ARRAY_LEVEL_COMPLETED:
+                            /* okay */
+                            break;
+                        case ARRAY_LEVEL_STARTED:
+                            /* empty sub-array: OK if at correct nest_level */
+                            ndim_frozen = true;
+                            if (nest_level != ndim)
+                                ereturn(escontext, -1,
+                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                         errmsg("malformed array literal: \"%s\"", str),
+                                         errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
+                            break;
+                        default:
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
                                      errdetail("Unexpected \"%c\" character.",
-                                               '{')));
-                        parse_state = ARRAY_LEVEL_STARTED;
-                        if (nest_level >= MAXDIM)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                                     errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                                            nest_level + 1, MAXDIM)));
-                        temp[nest_level] = 0;
-                        nest_level++;
-                        if (ndim < nest_level)
-                            ndim = nest_level;
+                                               '}')));
                     }
-                    break;
-                case '}':
-                    if (!in_quotes)
+                    parse_state = ARRAY_LEVEL_COMPLETED;
+                    /* The parse state check assured we're in a level. */
+                    Assert(nest_level > 0);
+                    nest_level--;
+
+                    if (dim[nest_level] < 0)
+                    {
+                        /* Save length of first sub-array of this level */
+                        dim[nest_level] = nelems[nest_level];
+                    }
+                    else if (nelems[nest_level] != dim[nest_level])
+                    {
+                        /* Subsequent sub-arrays must have same length */
+                        ereturn(escontext, -1,
+                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str),
+                                 errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
+                    }
+                    /* Done if this is the outermost level's '}' */
+                    if (nest_level == 0)
+                        eoArray = true;
+                }
+                break;
+            default:
+                if (!in_quotes)
+                {
+                    if (*ptr == typdelim)
                     {
                         /*
-                         * A right brace can occur after an element start, an
-                         * element completion, a quoted element completion, or
-                         * a level completion.
+                         * Delimiters can occur after an element start, a
+                         * quoted element completion, or a level completion.
                          */
                         if (parse_state != ARRAY_ELEM_STARTED &&
-                            parse_state != ARRAY_ELEM_COMPLETED &&
                             parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                            parse_state != ARRAY_LEVEL_COMPLETED &&
-                            !(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED))
+                            parse_state != ARRAY_LEVEL_COMPLETED)
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
                                      errdetail("Unexpected \"%c\" character.",
-                                               '}')));
-                        parse_state = ARRAY_LEVEL_COMPLETED;
-                        if (nest_level == 0)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Unmatched \"%c\" character.", '}')));
-                        nest_level--;
-
-                        if (nelems_last[nest_level] != 0 &&
-                            nelems[nest_level] != nelems_last[nest_level])
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Multidimensional arrays must have "
-                                               "sub-arrays with matching "
-                                               "dimensions.")));
-                        nelems_last[nest_level] = nelems[nest_level];
-                        nelems[nest_level] = 1;
-                        if (nest_level == 0)
-                            eoArray = itemdone = true;
+                                               typdelim)));
+                        if (parse_state == ARRAY_LEVEL_COMPLETED)
+                            parse_state = ARRAY_LEVEL_DELIMITED;
                         else
-                        {
-                            /*
-                             * We don't set itemdone here; see comments in
-                             * ReadArrayStr
-                             */
-                            temp[nest_level - 1]++;
-                        }
+                            parse_state = ARRAY_ELEM_DELIMITED;
                     }
-                    break;
-                default:
-                    if (!in_quotes)
+                    else if (!array_isspace(*ptr))
                     {
-                        if (*ptr == typdelim)
-                        {
-                            /*
-                             * Delimiters can occur after an element start, an
-                             * element completion, a quoted element
-                             * completion, or a level completion.
-                             */
-                            if (parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_ELEM_COMPLETED &&
-                                parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                                parse_state != ARRAY_LEVEL_COMPLETED)
-                                ereturn(escontext, -1,
-                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                         errmsg("malformed array literal: \"%s\"", str),
-                                         errdetail("Unexpected \"%c\" character.",
-                                                   typdelim)));
-                            if (parse_state == ARRAY_LEVEL_COMPLETED)
-                                parse_state = ARRAY_LEVEL_DELIMITED;
-                            else
-                                parse_state = ARRAY_ELEM_DELIMITED;
-                            itemdone = true;
-                            nelems[nest_level - 1]++;
-                        }
-                        else if (!array_isspace(*ptr))
+                        /*
+                         * Other non-space characters must be after a level
+                         * start, after an element start, or after an element
+                         * delimiter. In any case we now must be past an
+                         * element start.
+                         *
+                         * If it's a space character, we can ignore it; it
+                         * might be data or not, but it doesn't change the
+                         * parsing state.
+                         */
+                        switch (parse_state)
                         {
-                            /*
-                             * Other non-space characters must be after a
-                             * level start, after an element start, or after
-                             * an element delimiter. In any case we now must
-                             * be past an element start.
-                             */
-                            if (parse_state != ARRAY_LEVEL_STARTED &&
-                                parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_ELEM_DELIMITED)
+                            case ARRAY_LEVEL_STARTED:
+                            case ARRAY_ELEM_DELIMITED:
+                                /* start new unquoted element */
+                                parse_state = ARRAY_ELEM_STARTED;
+                                new_element = true;
+                                break;
+                            case ARRAY_ELEM_STARTED:
+                                /* already in element */
+                                break;
+                            default:
                                 ereturn(escontext, -1,
                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                          errmsg("malformed array literal: \"%s\"", str),
                                          errdetail("Unexpected array element.")));
-                            parse_state = ARRAY_ELEM_STARTED;
                         }
                     }
-                    break;
-            }
-            if (!itemdone)
-                ptr++;
+                }
+                break;
         }
-        temp[ndim - 1]++;
+
+        /* To reduce duplication, all new-element cases go through here. */
+        if (new_element)
+        {
+            /*
+             * Once we have found an element, the number of dimensions can no
+             * longer increase, and subsequent elements must all be at the
+             * same nesting depth.
+             */
+            ndim_frozen = true;
+            if (nest_level != ndim)
+                ereturn(escontext, -1,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", str),
+                         errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
+            /* Count the new element */
+            nelems[nest_level - 1]++;
+            /* The array's now known non-empty, too */
+            empty_array = false;
+        }
+
         ptr++;
     }

@@ -696,9 +752,6 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
     if (empty_array)
         return 0;

-    for (i = 0; i < ndim; ++i)
-        dim[i] = temp[i];
-
     return ndim;
 }

diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 7064391468..71fa12f828 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1476,11 +1476,6 @@ ERROR:  malformed array literal: "{{1,{2}},{2,3}}"
 LINE 1: select '{{1,{2}},{2,3}}'::text[];
                ^
 DETAIL:  Unexpected "{" character.
-select '{{},{}}'::text[];
-ERROR:  malformed array literal: "{{},{}}"
-LINE 1: select '{{},{}}'::text[];
-               ^
-DETAIL:  Unexpected "}" character.
 select E'{{1,2},\\{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,2},\{2,3}}"
 LINE 1: select E'{{1,2},\\{2,3}}'::text[];
@@ -1501,6 +1496,36 @@ ERROR:  malformed array literal: "{ }}"
 LINE 1: select '{ }}'::text[];
                ^
 DETAIL:  Junk after closing right brace.
+select '{{1},{{2}}}'::text[];
+ERROR:  malformed array literal: "{{1},{{2}}}"
+LINE 1: select '{{1},{{2}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{1}},{2}}'::text[];
+ERROR:  malformed array literal: "{{{1}},{2}}"
+LINE 1: select '{{{1}},{2}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{{}}}'::text[];
+ERROR:  malformed array literal: "{{},{{}}}"
+LINE 1: select '{{},{{}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{}},{}}'::text[];
+ERROR:  malformed array literal: "{{{}},{}}"
+LINE 1: select '{{{}},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{1},{}}'::text[];
+ERROR:  malformed array literal: "{{1},{}}"
+LINE 1: select '{{1},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{1}}'::text[];
+ERROR:  malformed array literal: "{{},{1}}"
+LINE 1: select '{{},{1}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
 select array[];
 ERROR:  cannot determine type of empty array
 LINE 1: select array[];
@@ -1514,6 +1539,12 @@ select '{}'::text[];
  {}
 (1 row)

+select '{{},{}}'::text[];
+ text
+------
+ {}
+(1 row)
+
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
                      text
 -----------------------------------------------
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index f1375621e0..ea7b55bd09 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -454,16 +454,22 @@ select 'foo' ilike all (array['F%', '%O']); -- t

 -- none of the following should be accepted
 select '{{1,{2}},{2,3}}'::text[];
-select '{{},{}}'::text[];
 select E'{{1,2},\\{2,3}}'::text[];
 select '{{"1 2" x},{3}}'::text[];
 select '{}}'::text[];
 select '{ }}'::text[];
+select '{{1},{{2}}}'::text[];
+select '{{{1}},{2}}'::text[];
+select '{{},{{}}}'::text[];
+select '{{{}},{}}'::text[];
+select '{{1},{}}'::text[];
+select '{{},{1}}'::text[];
 select array[];
 -- none of the above should be accepted

 -- all of the following should be accepted
 select '{}'::text[];
+select '{{},{}}'::text[];
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
 select '{0 second  ,0 second}'::interval[];
 select '{ { "," } , { 3 } }'::text[];
--
2.31.1

Re: Cleaning up array_in()

От

Nathan Bossart

Дата:

09 мая 2023 г., 02:40:54

On Tue, May 02, 2023 at 11:41:27AM -0400, Tom Lane wrote:
> It looks like back in the dim mists of the
> Berkeley era, there was an intentional attempt to allow
> non-rectangular array input, with the missing elements automatically
> filled out as NULLs.  Since that was undocumented, we concluded it was
> a bug and plastered on some code to check for rectangularity of the
> input.

Interesting.

> Although I view this as a bug fix, AFAICT the only effects are to
> accept input that should be rejected.  So again I don't advocate
> back-patching.  But should we sneak it into v16, or wait for v17?

I think it'd be okay to sneak it into v16, given it is technically a bug
fix.

> (This leaves ArrayGetOffset0() unused, but I'm unsure whether to
> remove that.)

Why's that?  Do you think it is likely to be used again in the future?
Otherwise, 0001 LGTM.

I haven't had a chance to look at 0002 closely yet.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com

Re: Cleaning up array_in()

От

Alexander Lakhin

Дата:

09 мая 2023 г., 06:00:00

02.05.2023 18:41, Tom Lane wrote:
> So, here's a rewrite.
>
> Although I view this as a bug fix, AFAICT the only effects are to
> accept input that should be rejected.  So again I don't advocate
> back-patching.  But should we sneak it into v16, or wait for v17?

I've tested the patch from a user perspective and found no interesting cases
that were valid before, but not accepted with the patch (or vice versa):
The only thing that confused me, is the error message (it's not new, too):
select '{{{{{{{{{{1}}}}}}}}}}'::int[];
or even:
select '{{{{{{{{{{'::int[];
ERROR:  number of array dimensions (7) exceeds the maximum allowed (6)

Maybe it could be reworded like that?:
too many opening braces defining dimensions (maximum dimensions allowed: 6)

Beside that, I would like to note the following error text changes
(all of these are feasible, I think):
select '{{1},{{'::int[];
Before:
ERROR:  malformed array literal: "{{1},{{"
LINE 1: select '{{1},{{'::int[];
                ^
DETAIL:  Unexpected end of input.

After:
ERROR:  malformed array literal: "{{1},{{"
LINE 1: select '{{1},{{'::int[];
                ^
DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
---
select '{{1},{{{{{{'::int[];
Before:
ERROR:  number of array dimensions (7) exceeds the maximum allowed (6)

After:
ERROR:  malformed array literal: "{{1},{{{{{{"
LINE 1: select '{{1},{{{{{{'::int[];
                ^
DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
---
select '{{1},{}}}'::int[];
Before:
ERROR:  malformed array literal: "{{1},{}}}"
LINE 1: select '{{1},{}}}'::int[];
                ^
DETAIL:  Unexpected "}" character.

After:
ERROR:  malformed array literal: "{{1},{}}}"
LINE 1: select '{{1},{}}}'::int[];
                ^
DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
---
select '{{}}}'::int[];
Before:
ERROR:  malformed array literal: "{{}}}"
LINE 1: select '{{}}}'::int[];
                ^
DETAIL:  Unexpected "}" character.

After:
ERROR:  malformed array literal: "{{}}}"
LINE 1: select '{{}}}'::int[];
                ^
DETAIL:  Junk after closing right brace.

Best regards,
Alexander

Re: Cleaning up array_in()

От

Tom Lane

Дата:

09 мая 2023 г., 06:06:47

Alexander Lakhin <exclusion@gmail.com> writes:
> The only thing that confused me, is the error message (it's not new, too):
> select '{{{{{{{{{{1}}}}}}}}}}'::int[];
> or even:
> select '{{{{{{{{{{'::int[];
> ERROR:  number of array dimensions (7) exceeds the maximum allowed (6)

Yeah, I didn't touch that, but it's pretty bogus because the first
number will always be "7" even if you wrote more than 7 left braces,
since the code errors out immediately upon finding that it's seen
too many braces.

The equivalent message in the PLs just says "number of array dimensions
exceeds the maximum allowed (6)".  I'm inclined to do likewise in
array_in, but didn't touch it here.

> Beside that, I would like to note the following error text changes
> (all of these are feasible, I think):

I'll look into whether we can improve those, unless you had a patch
in mind already?

            regards, tom lane

Re: Cleaning up array_in()

От

Alexander Lakhin

Дата:

09 мая 2023 г., 14:00:01

09.05.2023 06:06, Tom Lane wrote:
> Alexander Lakhin <exclusion@gmail.com> writes:
>> The only thing that confused me, is the error message (it's not new, too):
>> select '{{{{{{{{{{1}}}}}}}}}}'::int[];
>> or even:
>> select '{{{{{{{{{{'::int[];
>> ERROR:  number of array dimensions (7) exceeds the maximum allowed (6)
> Yeah, I didn't touch that, but it's pretty bogus because the first
> number will always be "7" even if you wrote more than 7 left braces,
> since the code errors out immediately upon finding that it's seen
> too many braces.
>
> The equivalent message in the PLs just says "number of array dimensions
> exceeds the maximum allowed (6)".  I'm inclined to do likewise in
> array_in, but didn't touch it here.

I think that, strictly speaking, we have no array dimensions in the string
'{{{{{{{{{{'; there are only characters (braces) during the string parsing.
While in the PLs we definitely deal with real arrays, which have dimensions.

>> Beside that, I would like to note the following error text changes
>> (all of these are feasible, I think):
> I'll look into whether we can improve those, unless you had a patch
> in mind already?

Those messages looked more or less correct to me, I just wanted to note how they are
changing (and haven't highlighted messages, that are not), but if you see here room
for improvement, please look into it (I have no good formulations yet).

Best regards,
Alexander

Re: Cleaning up array_in()

От

Nikhil Benesch

Дата:

05 июня 2023 г., 04:48:38

I took a look at 0002 because I attempted a similar but more surgical
fix in [0].

I spotted a few opportunities for further reducing state tracked by
`ArrayCount`. You may not find all of these suggestions to be
worthwhile.

1) `in_quotes` appears to be wholly redundant with `parse_state ==
ARRAY_QUOTED_ELEM_STARTED`.

2) The `empty_array` special case does not seem to be important to
ArrayCount's callers, which don't even special case `ndims == 0` but
rather `ArrayGetNItemsSafe(..) == 0`. Perhaps this is a philosophical
question as to whether `ArrayCount('{{}, {}}')` should return
(ndims=2, dims=[2, 0]) or (ndims=0). Obviously someone needs to do
that normalization, but `ArrayCount` could leave that normalization to
`ReadArrayStr`.

3) `eoArray` could be replaced with a new `ArrayParseState` of
`ARRAY_END`. Just a matter of taste, but "end of array" feels like a
parser state to me.

I also have a sense that `ndims_frozen` made the distinction between
`ARRAY_ELEM_DELIMITED` and `ARRAY_LEVEL_DELIMITED` unnecessary, and
the two states could be merged into a single `ARRAY_DELIMITED` state,
but I've not pulled on this thread hard enough to say so confidently.

Thanks for doing the serious overhaul. As you say, the element
counting logic is much easier to follow now. I'm much more confident
that your patch is correct than mine.

Cheers,
Nikhil

[0]: https://www.postgresql.org/message-id/CAPWqQZRHsFuvWJj%3DczXuKEB03LF4ctPpDE1k3CoexweEFicBKQ%40mail.gmail.com

On Tue, May 9, 2023 at 7:00 AM Alexander Lakhin <exclusion@gmail.com> wrote:
>
> 09.05.2023 06:06, Tom Lane wrote:
> > Alexander Lakhin <exclusion@gmail.com> writes:
> >> The only thing that confused me, is the error message (it's not new, too):
> >> select '{{{{{{{{{{1}}}}}}}}}}'::int[];
> >> or even:
> >> select '{{{{{{{{{{'::int[];
> >> ERROR:  number of array dimensions (7) exceeds the maximum allowed (6)
> > Yeah, I didn't touch that, but it's pretty bogus because the first
> > number will always be "7" even if you wrote more than 7 left braces,
> > since the code errors out immediately upon finding that it's seen
> > too many braces.
> >
> > The equivalent message in the PLs just says "number of array dimensions
> > exceeds the maximum allowed (6)".  I'm inclined to do likewise in
> > array_in, but didn't touch it here.
>
> I think that, strictly speaking, we have no array dimensions in the string
> '{{{{{{{{{{'; there are only characters (braces) during the string parsing.
> While in the PLs we definitely deal with real arrays, which have dimensions.
>
> >> Beside that, I would like to note the following error text changes
> >> (all of these are feasible, I think):
> > I'll look into whether we can improve those, unless you had a patch
> > in mind already?
>
> Those messages looked more or less correct to me, I just wanted to note how they are
> changing (and haven't highlighted messages, that are not), but if you see here room
> for improvement, please look into it (I have no good formulations yet).
>
> Best regards,
> Alexander
>
>

Re: Cleaning up array_in()

От

Tom Lane

Дата:

05 июня 2023 г., 05:38:28

Nikhil Benesch <nikhil.benesch@gmail.com> writes:
> I took a look at 0002 because I attempted a similar but more surgical
> fix in [0].
> I spotted a few opportunities for further reducing state tracked by
> `ArrayCount`.

Wow, thanks for looking!  I've not run these suggestions to ground
(and won't have time for a few days), but they sound very good.

            regards, tom lane

Re: Cleaning up array_in()

От

jian he

Дата:

03 июля 2023 г., 07:16:22

On Mon, Jun 5, 2023 at 9:48 AM Nikhil Benesch <nikhil.benesch@gmail.com> wrote:
>
> I took a look at 0002 because I attempted a similar but more surgical
> fix in [0].
>
> I spotted a few opportunities for further reducing state tracked by
> `ArrayCount`. You may not find all of these suggestions to be
> worthwhile.

I pull ArrayCount into a separate C function, regress test (manually)
based on patch regress test.

> 1) `in_quotes` appears to be wholly redundant with `parse_state ==
> ARRAY_QUOTED_ELEM_STARTED`.

removing it works as expected.

> 3) `eoArray` could be replaced with a new `ArrayParseState` of
> `ARRAY_END`. Just a matter of taste, but "end of array" feels like a
> parser state to me.

works. (reduce one variable.)

> I also have a sense that `ndims_frozen` made the distinction between
> `ARRAY_ELEM_DELIMITED` and `ARRAY_LEVEL_DELIMITED` unnecessary, and
> the two states could be merged into a single `ARRAY_DELIMITED` state,
> but I've not pulled on this thread hard enough to say so confidently.

merging these states into one work as expected.

Re: Cleaning up array_in()

От

jian he

Дата:

03 июля 2023 г., 14:52:23

based on Nikhil Benesch idea.
The attached diff is  based on
v1-0002-Rewrite-ArrayCount-to-make-dimensionality-checks.patch.

diff compare v1-0002:
select '{{1,{2}},{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,{2}},{2,3}}"
 LINE 1: select '{{1,{2}},{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "{" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
----------------------------------------------------
 select E'{{1,2},\\{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,2},\{2,3}}"
 LINE 1: select E'{{1,2},\\{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "\" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.

-------
new errors details kind of make sense.

Вложения

0003-changed-based-on-Nikhil-Benesch-idea.patch

Re: Cleaning up array_in()

От

Tom Lane

Дата:

04 июля 2023 г., 21:33:14

Nikhil Benesch <nikhil.benesch@gmail.com> writes:
> I spotted a few opportunities for further reducing state tracked by
> `ArrayCount`. You may not find all of these suggestions to be
> worthwhile.

I found some time today to look at these points.

> 1) `in_quotes` appears to be wholly redundant with `parse_state ==
> ARRAY_QUOTED_ELEM_STARTED`.

I agree that it is redundant, but I'm disinclined to remove it because
the in_quotes logic matches that in ReadArrayStr.  I think it's better
to keep those two functions in sync.  The parse_state represents an
independent set of checks that need not be repeated by ReadArrayStr,
but both functions have to track quoting.  The same for eoArray.

> 2) The `empty_array` special case does not seem to be important to
> ArrayCount's callers, which don't even special case `ndims == 0` but
> rather `ArrayGetNItemsSafe(..) == 0`. Perhaps this is a philosophical
> question as to whether `ArrayCount('{{}, {}}')` should return
> (ndims=2, dims=[2, 0]) or (ndims=0). Obviously someone needs to do
> that normalization, but `ArrayCount` could leave that normalization to
> `ReadArrayStr`.

This idea I do like.  While looking at the callers, I also noticed
that it's impossible currently to write an empty array with explicit
specification of bounds.  It seems to me that you ought to be able
to write, say,

SELECT '[1:0]={}'::int[];

but up to now you got "upper bound cannot be less than lower bound";
and if you somehow got past that, you'd get "Specified array
dimensions do not match array contents." because of ArrayCount's
premature optimization of "one-dimensional array with length zero"
to "zero-dimensional array".  We can fix that by doing what you said
and adjusting the initial bounds restriction to be "upper bound cannot
be less than lower bound minus one".

> I also have a sense that `ndims_frozen` made the distinction between
> `ARRAY_ELEM_DELIMITED` and `ARRAY_LEVEL_DELIMITED` unnecessary, and
> the two states could be merged into a single `ARRAY_DELIMITED` state,
> but I've not pulled on this thread hard enough to say so confidently.

I looked at jian he's implementation of that and was not impressed:
I do not think the logic gets any clearer, and it seems to me that
this makes a substantial dent in ArrayCount's ability to detect syntax
errors.  The fact that one of the test case error messages got better
seems pretty accidental to me.  We can get the same result in a more
purposeful way by giving a different error message for
ARRAY_ELEM_DELIMITED.

So I end up with the attached.  I went ahead and dropped
ArrayGetOffset0() as part of 0001, and I split 0002 into two patches
where the new 0002 avoids re-indenting any existing code in order
to ease review, and then 0003 is just a mechanical application
of pgindent.

I still didn't do anything about "number of array dimensions (7)
exceeds the maximum allowed (6)".  There are quite a few instances
of that wording, not only array_in's, and I'm not sure whether to
change the rest.  In any case that looks like something that
could be addressed separately.  The other error message wording
changes here seem to me to be okay.

            regards, tom lane

From 82cac24a618db69e149c140e7064eebda9f1ddfc Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 4 Jul 2023 12:39:41 -0400
Subject: [PATCH v2 1/3] Simplify and speed up ReadArrayStr().

ReadArrayStr() seems to have been written on the assumption that
non-rectangular input is fine and it should pad with NULLs anywhere
that elements are missing.  We disallowed non-rectangular input
ages ago (commit 0e13d627b), but never simplified this function
as a follow-up.  In particular, the existing code recomputes each
element's linear location from scratch, which is quite unnecessary
for rectangular input: we can just assign the elements sequentially,
saving lots of arithmetic.  Add some more commentary while at it.

ArrayGetOffset0() is no longer used anywhere, so remove it.
---
 src/backend/utils/adt/arrayfuncs.c | 69 ++++++++++++++----------------
 src/backend/utils/adt/arrayutils.c | 15 -------
 src/include/utils/array.h          |  1 -
 3 files changed, 33 insertions(+), 52 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 9000f83a83..050568808a 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -93,7 +93,7 @@ static bool array_isspace(char ch);
 static int    ArrayCount(const char *str, int *dim, char typdelim,
                        Node *escontext);
 static bool ReadArrayStr(char *arrayStr, const char *origStr,
-                         int nitems, int ndim, int *dim,
+                         int nitems,
                          FmgrInfo *inputproc, Oid typioparam, int32 typmod,
                          char typdelim,
                          int typlen, bool typbyval, char typalign,
@@ -391,7 +391,7 @@ array_in(PG_FUNCTION_ARGS)
     dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
     nullsPtr = (bool *) palloc(nitems * sizeof(bool));
     if (!ReadArrayStr(p, string,
-                      nitems, ndim, dim,
+                      nitems,
                       &my_extra->proc, typioparam, typmod,
                       typdelim,
                       typlen, typbyval, typalign,
@@ -457,7 +457,8 @@ array_isspace(char ch)

 /*
  * ArrayCount
- *     Determines the dimensions for an array string.
+ *     Determines the dimensions for an array string.  This includes
+ *     syntax-checking the array structure decoration (braces and delimiters).
  *
  * Returns number of dimensions as function result.  The axis lengths are
  * returned in dim[], which must be of size MAXDIM.
@@ -704,16 +705,14 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 /*
  * ReadArrayStr :
  *     parses the array string pointed to by "arrayStr" and converts the values
- *     to internal format.  Unspecified elements are initialized to nulls.
- *     The array dimensions must already have been determined.
+ *     to internal format.  The array dimensions must have been determined,
+ *     and the case of an empty array must have been handled earlier.
  *
  * Inputs:
  *    arrayStr: the string to parse.
  *              CAUTION: the contents of "arrayStr" will be modified!
  *    origStr: the unmodified input string, used only in error messages.
  *    nitems: total number of array elements, as already determined.
- *    ndim: number of array dimensions
- *    dim[]: array axis lengths
  *    inputproc: type-specific input procedure for element datatype.
  *    typioparam, typmod: auxiliary values to pass to inputproc.
  *    typdelim: the value delimiter (type-specific).
@@ -738,8 +737,6 @@ static bool
 ReadArrayStr(char *arrayStr,
              const char *origStr,
              int nitems,
-             int ndim,
-             int *dim,
              FmgrInfo *inputproc,
              Oid typioparam,
              int32 typmod,
@@ -753,20 +750,13 @@ ReadArrayStr(char *arrayStr,
              int32 *nbytes,
              Node *escontext)
 {
-    int            i,
+    int            i = 0,
                 nest_level = 0;
     char       *srcptr;
     bool        in_quotes = false;
     bool        eoArray = false;
     bool        hasnull;
     int32        totbytes;
-    int            indx[MAXDIM] = {0},
-                prod[MAXDIM];
-
-    mda_get_prod(ndim, dim, prod);
-
-    /* Initialize is-null markers to true */
-    memset(nulls, true, nitems * sizeof(bool));

     /*
      * We have to remove " and \ characters to create a clean item value to
@@ -789,11 +779,20 @@ ReadArrayStr(char *arrayStr,
         bool        itemdone = false;
         bool        leadingspace = true;
         bool        hasquoting = false;
-        char       *itemstart;
-        char       *dstptr;
-        char       *dstendptr;
+        char       *itemstart;    /* start of de-escaped text */
+        char       *dstptr;        /* next output point for de-escaped text */
+        char       *dstendptr;    /* last significant output char + 1 */

-        i = -1;
+        /*
+         * Parse next array element, collecting the de-escaped text into
+         * itemstart..dstendptr-1.
+         *
+         * Notice that we do not set "itemdone" until we see a separator
+         * (typdelim character) or the array's final right brace.  Since the
+         * array is already verified to be nonempty and rectangular, there is
+         * guaranteed to be another element to be processed in the first case,
+         * while in the second case of course we'll exit the outer loop.
+         */
         itemstart = dstptr = dstendptr = srcptr;

         while (!itemdone)
@@ -840,13 +839,7 @@ ReadArrayStr(char *arrayStr,
                 case '{':
                     if (!in_quotes)
                     {
-                        if (nest_level >= ndim)
-                            ereturn(escontext, false,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"",
-                                            origStr)));
                         nest_level++;
-                        indx[nest_level - 1] = 0;
                         srcptr++;
                     }
                     else
@@ -860,14 +853,9 @@ ReadArrayStr(char *arrayStr,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"",
                                             origStr)));
-                        if (i == -1)
-                            i = ArrayGetOffset0(ndim, indx, prod);
-                        indx[nest_level - 1] = 0;
                         nest_level--;
                         if (nest_level == 0)
                             eoArray = itemdone = true;
-                        else
-                            indx[nest_level - 1]++;
                         srcptr++;
                     }
                     else
@@ -878,10 +866,7 @@ ReadArrayStr(char *arrayStr,
                         *dstptr++ = *srcptr++;
                     else if (*srcptr == typdelim)
                     {
-                        if (i == -1)
-                            i = ArrayGetOffset0(ndim, indx, prod);
                         itemdone = true;
-                        indx[ndim - 1]++;
                         srcptr++;
                     }
                     else if (array_isspace(*srcptr))
@@ -905,15 +890,18 @@ ReadArrayStr(char *arrayStr,
             }
         }

+        /* Terminate de-escaped string */
         Assert(dstptr < srcptr);
         *dstendptr = '\0';

-        if (i < 0 || i >= nitems)
+        /* Safety check that we don't write past the output arrays */
+        if (i >= nitems)
             ereturn(escontext, false,
                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                      errmsg("malformed array literal: \"%s\"",
                             origStr)));

+        /* Convert the de-escaped string into the next output array entries */
         if (Array_nulls && !hasquoting &&
             pg_strcasecmp(itemstart, "NULL") == 0)
         {
@@ -934,8 +922,17 @@ ReadArrayStr(char *arrayStr,
                 return false;
             nulls[i] = false;
         }
+
+        i++;
     }

+    /* Cross-check that we filled all the output array entries */
+    if (i != nitems)
+        ereturn(escontext, false,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("malformed array literal: \"%s\"",
+                        origStr)));
+
     /*
      * Check for nulls, compute total data space needed
      */
diff --git a/src/backend/utils/adt/arrayutils.c b/src/backend/utils/adt/arrayutils.c
index 62152f79f5..b8bccc7cca 100644
--- a/src/backend/utils/adt/arrayutils.c
+++ b/src/backend/utils/adt/arrayutils.c
@@ -43,21 +43,6 @@ ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx)
     return offset;
 }

-/*
- * Same, but subscripts are assumed 0-based, and use a scale array
- * instead of raw dimension data (see mda_get_prod to create scale array)
- */
-int
-ArrayGetOffset0(int n, const int *tup, const int *scale)
-{
-    int            i,
-                lin = 0;
-
-    for (i = 0; i < n; i++)
-        lin += tup[i] * scale[i];
-    return lin;
-}
-
 /*
  * Convert array dimensions into number of elements
  *
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index b13dfb345e..d49adc38b7 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -448,7 +448,6 @@ extern void array_free_iterator(ArrayIterator iterator);
  */

 extern int    ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx);
-extern int    ArrayGetOffset0(int n, const int *tup, const int *scale);
 extern int    ArrayGetNItems(int ndim, const int *dims);
 extern int    ArrayGetNItemsSafe(int ndim, const int *dims,
                                struct Node *escontext);
--
2.39.3

From 64f318bbd5a43e59749657c3972c4c6eaa58af8f Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 4 Jul 2023 14:03:40 -0400
Subject: [PATCH v2 2/3] Rewrite ArrayCount() to make dimensionality checks
 more complete.

ArrayCount has had a few layers of revision over the years,
reaching a point where it's really quite baroque and unintelligible.
There are actually four different arrays holding dimensions, which
can be reduced to two with little effort.  It was also close to
impossible to figure out why the dimension-counting actually worked
correctly.  Rewrite to make it perhaps a little less opaque.
In particular, a new element or subarray is now counted when we see
the start of the element or subarray, not at some later point where
we might not even be at the same nest_level anymore.

Also, install guards to catch non-rectangularity cases that were
previously missed, along the same lines as recent plperl and plpython
fixes: we were not checking that all scalar elements appear at the
same nesting depth.  Thanks to the very different logic, this seems
not to have caused any of the sort of internal errors that the PLs
suffered from, but it's still wrong.

On the other hand, the behavior of plperl and plpython suggests
that we should allow empty sub-arrays, as in '{{},{}}'; those
languages interpret equivalent constructs such as '[[],[]]' as
an empty (hence zero-dimensional) array, and it seems unclear
why array_in should not.  Remove the hack that was installed to
reject that case.

Also, fix things so that it's possible to write an empty array
with explicit bounds spec, along the lines of '[1:0]={}'.
Before, that was rejected with a complaint about upper bound
less than lower, but I don't see a good reason not to allow it.

In passing, remove ArrayParseState.ARRAY_ELEM_COMPLETED, a state
that the state machine never entered yet wasted cycles checking for.

(This patch looks a bit odd because I tried to avoid reindenting
existing code, despite having removed one level of loop.  That
seemed to me to make it easier to read the code changes.  A
follow-up pgindent run is needed.)
---
 src/backend/utils/adt/arrayfuncs.c   | 241 +++++++++++++++++----------
 src/test/regress/expected/arrays.out |  68 +++++++-
 src/test/regress/sql/arrays.sql      |  13 +-
 3 files changed, 230 insertions(+), 92 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 050568808a..4fb052b23e 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -57,7 +57,6 @@ typedef enum
     ARRAY_NO_LEVEL,
     ARRAY_LEVEL_STARTED,
     ARRAY_ELEM_STARTED,
-    ARRAY_ELEM_COMPLETED,
     ARRAY_QUOTED_ELEM_STARTED,
     ARRAY_QUOTED_ELEM_COMPLETED,
     ARRAY_ELEM_DELIMITED,
@@ -302,10 +301,18 @@ array_in(PG_FUNCTION_ARGS)
         *q = '\0';
         ub = atoi(p);
         p = q + 1;
-        if (ub < lBound[ndim])
+
+        /* Upper bound of INT_MAX is disallowed, cf ArrayCheckBounds() */
+        if (ub == INT_MAX)
+            ereturn(escontext, (Datum) 0,
+                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                     errmsg("array upper bound is too large: %d",
+                            ub)));
+        /* Now it's safe to compute ub + 1 */
+        if (ub + 1 < lBound[ndim])
             ereturn(escontext, (Datum) 0,
                     (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-                     errmsg("upper bound cannot be less than lower bound")));
+                     errmsg("upper bound cannot be less than lower bound minus one")));

         dim[ndim] = ub - lBound[ndim] + 1;
         ndim++;
@@ -461,7 +468,9 @@ array_isspace(char ch)
  *     syntax-checking the array structure decoration (braces and delimiters).
  *
  * Returns number of dimensions as function result.  The axis lengths are
- * returned in dim[], which must be of size MAXDIM.
+ * returned in dim[], which must be of size MAXDIM.  This function does not
+ * special-case empty arrays: it will return a positive result with some
+ * zero axis length(s).
  *
  * If we detect an error, fill *escontext with error details and return -1
  * (unless escontext isn't provided, in which case errors will be thrown).
@@ -472,31 +481,22 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
     int            nest_level = 0,
                 i;
     int            ndim = 1,
-                temp[MAXDIM],
-                nelems[MAXDIM],
-                nelems_last[MAXDIM];
+                nelems[MAXDIM];
+    bool        ndim_frozen = false;
     bool        in_quotes = false;
     bool        eoArray = false;
-    bool        empty_array = true;
     const char *ptr;
     ArrayParseState parse_state = ARRAY_NO_LEVEL;

+    /* Initialize dim[] entries to -1 meaning "unknown" */
     for (i = 0; i < MAXDIM; ++i)
-    {
-        temp[i] = dim[i] = nelems_last[i] = 0;
-        nelems[i] = 1;
-    }
+        dim[i] = -1;

+    /* Scan string until we reach closing brace */
     ptr = str;
     while (!eoArray)
     {
-        bool        itemdone = false;
-
-        while (!itemdone)
-        {
-            if (parse_state == ARRAY_ELEM_STARTED ||
-                parse_state == ARRAY_QUOTED_ELEM_STARTED)
-                empty_array = false;
+        bool        new_element = false;

             switch (*ptr)
             {
@@ -513,17 +513,25 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                      * start, or after an element delimiter. In any case we
                      * now must be past an element start.
                      */
-                    if (parse_state != ARRAY_LEVEL_STARTED &&
-                        parse_state != ARRAY_ELEM_STARTED &&
-                        parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                        parse_state != ARRAY_ELEM_DELIMITED)
+                switch (parse_state)
+                {
+                    case ARRAY_LEVEL_STARTED:
+                    case ARRAY_ELEM_DELIMITED:
+                        /* start new unquoted element */
+                        parse_state = ARRAY_ELEM_STARTED;
+                        new_element = true;
+                        break;
+                    case ARRAY_ELEM_STARTED:
+                    case ARRAY_QUOTED_ELEM_STARTED:
+                        /* already in element */
+                        break;
+                    default:
                         ereturn(escontext, -1,
                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                  errmsg("malformed array literal: \"%s\"", str),
                                  errdetail("Unexpected \"%c\" character.",
                                            '\\')));
-                    if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
-                        parse_state = ARRAY_ELEM_STARTED;
+                }
                     /* skip the escaped character */
                     if (*(ptr + 1))
                         ptr++;
@@ -540,18 +548,28 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                      * element start, or after an element delimiter. In any
                      * case we now must be past an element start.
                      */
-                    if (parse_state != ARRAY_LEVEL_STARTED &&
-                        parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                        parse_state != ARRAY_ELEM_DELIMITED)
+                switch (parse_state)
+                {
+                    case ARRAY_LEVEL_STARTED:
+                    case ARRAY_ELEM_DELIMITED:
+                        /* start new quoted element */
+                        Assert(!in_quotes);
+                        in_quotes = true;
+                        parse_state = ARRAY_QUOTED_ELEM_STARTED;
+                        new_element = true;
+                        break;
+                    case ARRAY_QUOTED_ELEM_STARTED:
+                        /* already in element, end it */
+                        Assert(in_quotes);
+                        in_quotes = false;
+                        parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
+                        break;
+                    default:
                         ereturn(escontext, -1,
                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                  errmsg("malformed array literal: \"%s\"", str),
                                  errdetail("Unexpected array element.")));
-                    in_quotes = !in_quotes;
-                    if (in_quotes)
-                        parse_state = ARRAY_QUOTED_ELEM_STARTED;
-                    else
-                        parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
+                }
                     break;
                 case '{':
                     if (!in_quotes)
@@ -559,27 +577,52 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                         /*
                          * A left brace can occur if no nesting has occurred
                          * yet, after a level start, or after a level
-                         * delimiter.
+                         * delimiter.  If we see one after an element
+                         * delimiter, we have something like "{1,{2}}", so
+                         * complain about non-rectangularity.
                          */
-                        if (parse_state != ARRAY_NO_LEVEL &&
-                            parse_state != ARRAY_LEVEL_STARTED &&
-                            parse_state != ARRAY_LEVEL_DELIMITED)
+                    switch (parse_state)
+                    {
+                        case ARRAY_NO_LEVEL:
+                        case ARRAY_LEVEL_STARTED:
+                        case ARRAY_LEVEL_DELIMITED:
+                            /* okay */
+                            break;
+                        case ARRAY_ELEM_DELIMITED:
+                            ereturn(escontext, -1,
+                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                     errmsg("malformed array literal: \"%s\"", str),
+                                     errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
+                        default:
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
                                      errdetail("Unexpected \"%c\" character.",
                                                '{')));
+                    }
                         parse_state = ARRAY_LEVEL_STARTED;
+                    /* Nested sub-arrays count as elements of outer level */
+                    if (nest_level > 0)
+                        nelems[nest_level - 1]++;
+                    /* Initialize element counting in the new level */
                         if (nest_level >= MAXDIM)
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                      errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
                                             nest_level + 1, MAXDIM)));
-                        temp[nest_level] = 0;
+                    nelems[nest_level] = 0;
                         nest_level++;
                         if (ndim < nest_level)
+                    {
+                        /* Can't increase ndim once it's frozen */
+                        if (ndim_frozen)
+                            ereturn(escontext, -1,
+                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                     errmsg("malformed array literal: \"%s\"", str),
+                                     errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
                             ndim = nest_level;
                     }
+                }
                     break;
                 case '}':
                     if (!in_quotes)
@@ -587,46 +630,56 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                         /*
                          * A right brace can occur after an element start, an
                          * element completion, a quoted element completion, or
-                         * a level completion.
+                         * a level completion.  We also allow it after a level
+                         * start, that is an empty sub-array "{}" --- but that
+                         * freezes the number of dimensions and all such
+                         * sub-arrays must be at the same level, just like
+                         * sub-arrays containing elements.
                          */
-                        if (parse_state != ARRAY_ELEM_STARTED &&
-                            parse_state != ARRAY_ELEM_COMPLETED &&
-                            parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                            parse_state != ARRAY_LEVEL_COMPLETED &&
-                            !(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED))
+                    switch (parse_state)
+                    {
+                        case ARRAY_ELEM_STARTED:
+                        case ARRAY_QUOTED_ELEM_COMPLETED:
+                        case ARRAY_LEVEL_COMPLETED:
+                            /* okay */
+                            break;
+                        case ARRAY_LEVEL_STARTED:
+                            /* empty sub-array: OK if at correct nest_level */
+                            ndim_frozen = true;
+                            if (nest_level != ndim)
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Unexpected \"%c\" character.",
-                                               '}')));
-                        parse_state = ARRAY_LEVEL_COMPLETED;
-                        if (nest_level == 0)
+                                         errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
+                            break;
+                        default:
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Unmatched \"%c\" character.", '}')));
+                                     errdetail("Unexpected \"%c\" character.",
+                                               '}')));
+                    }
+                    parse_state = ARRAY_LEVEL_COMPLETED;
+                    /* The parse state check assured we're in a level. */
+                    Assert(nest_level > 0);
                         nest_level--;

-                        if (nelems_last[nest_level] != 0 &&
-                            nelems[nest_level] != nelems_last[nest_level])
+                    if (dim[nest_level] < 0)
+                    {
+                        /* Save length of first sub-array of this level */
+                        dim[nest_level] = nelems[nest_level];
+                    }
+                    else if (nelems[nest_level] != dim[nest_level])
+                    {
+                        /* Subsequent sub-arrays must have same length */
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Multidimensional arrays must have "
-                                               "sub-arrays with matching "
-                                               "dimensions.")));
-                        nelems_last[nest_level] = nelems[nest_level];
-                        nelems[nest_level] = 1;
-                        if (nest_level == 0)
-                            eoArray = itemdone = true;
-                        else
-                        {
-                            /*
-                             * We don't set itemdone here; see comments in
-                             * ReadArrayStr
-                             */
-                            temp[nest_level - 1]++;
+                                 errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
                         }
+                    /* Done if this is the outermost level's '}' */
+                    if (nest_level == 0)
+                        eoArray = true;
                     }
                     break;
                 default:
@@ -635,12 +688,11 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                         if (*ptr == typdelim)
                         {
                             /*
-                             * Delimiters can occur after an element start, an
-                             * element completion, a quoted element
-                             * completion, or a level completion.
+                             * Delimiters can occur after an element start, a
+                             * quoted element completion, or a level
+                             * completion.
                              */
                             if (parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_ELEM_COMPLETED &&
                                 parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
                                 parse_state != ARRAY_LEVEL_COMPLETED)
                                 ereturn(escontext, -1,
@@ -652,8 +704,6 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                 parse_state = ARRAY_LEVEL_DELIMITED;
                             else
                                 parse_state = ARRAY_ELEM_DELIMITED;
-                            itemdone = true;
-                            nelems[nest_level - 1]++;
                         }
                         else if (!array_isspace(*ptr))
                         {
@@ -662,23 +712,51 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                              * level start, after an element start, or after
                              * an element delimiter. In any case we now must
                              * be past an element start.
+                             *
+                             * If it's a space character, we can ignore it; it
+                             * might be data or not, but it doesn't change the
+                             * parsing state.
                              */
-                            if (parse_state != ARRAY_LEVEL_STARTED &&
-                                parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_ELEM_DELIMITED)
+                        switch (parse_state)
+                        {
+                            case ARRAY_LEVEL_STARTED:
+                            case ARRAY_ELEM_DELIMITED:
+                                /* start new unquoted element */
+                                parse_state = ARRAY_ELEM_STARTED;
+                                new_element = true;
+                                break;
+                            case ARRAY_ELEM_STARTED:
+                                /* already in element */
+                                break;
+                            default:
                                 ereturn(escontext, -1,
                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                          errmsg("malformed array literal: \"%s\"", str),
                                          errdetail("Unexpected array element.")));
-                            parse_state = ARRAY_ELEM_STARTED;
+                        }
                         }
                     }
                     break;
             }
-            if (!itemdone)
-                ptr++;
+
+        /* To reduce duplication, all new-element cases go through here. */
+        if (new_element)
+        {
+            /*
+             * Once we have found an element, the number of dimensions can no
+             * longer increase, and subsequent elements must all be at the
+             * same nesting depth.
+             */
+            ndim_frozen = true;
+            if (nest_level != ndim)
+                ereturn(escontext, -1,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", str),
+                         errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
+            /* Count the new element */
+            nelems[nest_level - 1]++;
         }
-        temp[ndim - 1]++;
+
         ptr++;
     }

@@ -692,13 +770,6 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                      errdetail("Junk after closing right brace.")));
     }

-    /* special case for an empty array */
-    if (empty_array)
-        return 0;
-
-    for (i = 0; i < ndim; ++i)
-        dim[i] = temp[i];
-
     return ndim;
 }

diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 7064391468..26d4281d94 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1475,12 +1475,7 @@ select '{{1,{2}},{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,{2}},{2,3}}"
 LINE 1: select '{{1,{2}},{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "{" character.
-select '{{},{}}'::text[];
-ERROR:  malformed array literal: "{{},{}}"
-LINE 1: select '{{},{}}'::text[];
-               ^
-DETAIL:  Unexpected "}" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
 select E'{{1,2},\\{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,2},\{2,3}}"
 LINE 1: select E'{{1,2},\\{2,3}}'::text[];
@@ -1501,6 +1496,49 @@ ERROR:  malformed array literal: "{ }}"
 LINE 1: select '{ }}'::text[];
                ^
 DETAIL:  Junk after closing right brace.
+select '{{1},{{2}}}'::text[];
+ERROR:  malformed array literal: "{{1},{{2}}}"
+LINE 1: select '{{1},{{2}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{1}},{2}}'::text[];
+ERROR:  malformed array literal: "{{{1}},{2}}"
+LINE 1: select '{{{1}},{2}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{{}}}'::text[];
+ERROR:  malformed array literal: "{{},{{}}}"
+LINE 1: select '{{},{{}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{}},{}}'::text[];
+ERROR:  malformed array literal: "{{{}},{}}"
+LINE 1: select '{{{}},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{1},{}}'::text[];
+ERROR:  malformed array literal: "{{1},{}}"
+LINE 1: select '{{1},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{1}}'::text[];
+ERROR:  malformed array literal: "{{},{1}}"
+LINE 1: select '{{},{1}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '[2147483646:2147483647]={1,2}'::int[];
+ERROR:  array upper bound is too large: 2147483647
+LINE 1: select '[2147483646:2147483647]={1,2}'::int[];
+               ^
+select '[1:-1]={}'::int[];
+ERROR:  upper bound cannot be less than lower bound minus one
+LINE 1: select '[1:-1]={}'::int[];
+               ^
+select '[1:0]={1}'::int[];
+ERROR:  malformed array literal: "[1:0]={1}"
+LINE 1: select '[1:0]={1}'::int[];
+               ^
+DETAIL:  Specified array dimensions do not match array contents.
 select array[];
 ERROR:  cannot determine type of empty array
 LINE 1: select array[];
@@ -1514,6 +1552,12 @@ select '{}'::text[];
  {}
 (1 row)

+select '{{},{}}'::text[];
+ text
+------
+ {}
+(1 row)
+
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
                      text
 -----------------------------------------------
@@ -1559,6 +1603,18 @@ select '[0:1]={1.1,2.2}'::float8[];
  [0:1]={1.1,2.2}
 (1 row)

+select '[2147483646:2147483646]={1}'::int[];
+            int4
+-----------------------------
+ [2147483646:2147483646]={1}
+(1 row)
+
+select '[1:0]={}'::int[];
+ int4
+------
+ {}
+(1 row)
+
 -- all of the above should be accepted
 -- tests for array aggregates
 CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]);
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index f1375621e0..b7e2b180cc 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -454,16 +454,25 @@ select 'foo' ilike all (array['F%', '%O']); -- t

 -- none of the following should be accepted
 select '{{1,{2}},{2,3}}'::text[];
-select '{{},{}}'::text[];
 select E'{{1,2},\\{2,3}}'::text[];
 select '{{"1 2" x},{3}}'::text[];
 select '{}}'::text[];
 select '{ }}'::text[];
+select '{{1},{{2}}}'::text[];
+select '{{{1}},{2}}'::text[];
+select '{{},{{}}}'::text[];
+select '{{{}},{}}'::text[];
+select '{{1},{}}'::text[];
+select '{{},{1}}'::text[];
+select '[2147483646:2147483647]={1,2}'::int[];
+select '[1:-1]={}'::int[];
+select '[1:0]={1}'::int[];
 select array[];
 -- none of the above should be accepted

 -- all of the following should be accepted
 select '{}'::text[];
+select '{{},{}}'::text[];
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
 select '{0 second  ,0 second}'::interval[];
 select '{ { "," } , { 3 } }'::text[];
@@ -474,6 +483,8 @@ select '{
          }'::interval[];
 select array[]::text[];
 select '[0:1]={1.1,2.2}'::float8[];
+select '[2147483646:2147483646]={1}'::int[];
+select '[1:0]={}'::int[];
 -- all of the above should be accepted

 -- tests for array aggregates
--
2.39.3

From 95cae8ead8264298bacf91100a1a1e02a071b7c1 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 4 Jul 2023 14:07:31 -0400
Subject: [PATCH v2 3/3] Re-indent ArrayCount().

This cleans up after the removal of the "while (!itemdone)" loop.
---
 src/backend/utils/adt/arrayfuncs.c | 214 ++++++++++++++---------------
 1 file changed, 106 insertions(+), 108 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 4fb052b23e..ca2a690768 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -498,21 +498,21 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
     {
         bool        new_element = false;

-            switch (*ptr)
-            {
-                case '\0':
-                    /* Signal a premature end of the string */
-                    ereturn(escontext, -1,
-                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                             errmsg("malformed array literal: \"%s\"", str),
-                             errdetail("Unexpected end of input.")));
-                case '\\':
+        switch (*ptr)
+        {
+            case '\0':
+                /* Signal a premature end of the string */
+                ereturn(escontext, -1,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", str),
+                         errdetail("Unexpected end of input.")));
+            case '\\':

-                    /*
-                     * An escape must be after a level start, after an element
-                     * start, or after an element delimiter. In any case we
-                     * now must be past an element start.
-                     */
+                /*
+                 * An escape must be after a level start, after an element
+                 * start, or after an element delimiter. In any case we now
+                 * must be past an element start.
+                 */
                 switch (parse_state)
                 {
                     case ARRAY_LEVEL_STARTED:
@@ -532,22 +532,22 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                  errdetail("Unexpected \"%c\" character.",
                                            '\\')));
                 }
-                    /* skip the escaped character */
-                    if (*(ptr + 1))
-                        ptr++;
-                    else
-                        ereturn(escontext, -1,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"", str),
-                                 errdetail("Unexpected end of input.")));
-                    break;
-                case '"':
+                /* skip the escaped character */
+                if (*(ptr + 1))
+                    ptr++;
+                else
+                    ereturn(escontext, -1,
+                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", str),
+                             errdetail("Unexpected end of input.")));
+                break;
+            case '"':

-                    /*
-                     * A quote must be after a level start, after a quoted
-                     * element start, or after an element delimiter. In any
-                     * case we now must be past an element start.
-                     */
+                /*
+                 * A quote must be after a level start, after a quoted element
+                 * start, or after an element delimiter. In any case we now
+                 * must be past an element start.
+                 */
                 switch (parse_state)
                 {
                     case ARRAY_LEVEL_STARTED:
@@ -570,17 +570,16 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                  errmsg("malformed array literal: \"%s\"", str),
                                  errdetail("Unexpected array element.")));
                 }
-                    break;
-                case '{':
-                    if (!in_quotes)
-                    {
-                        /*
-                         * A left brace can occur if no nesting has occurred
-                         * yet, after a level start, or after a level
-                         * delimiter.  If we see one after an element
-                         * delimiter, we have something like "{1,{2}}", so
-                         * complain about non-rectangularity.
-                         */
+                break;
+            case '{':
+                if (!in_quotes)
+                {
+                    /*
+                     * A left brace can occur if no nesting has occurred yet,
+                     * after a level start, or after a level delimiter.  If we
+                     * see one after an element delimiter, we have something
+                     * like "{1,{2}}", so complain about non-rectangularity.
+                     */
                     switch (parse_state)
                     {
                         case ARRAY_NO_LEVEL:
@@ -600,19 +599,19 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                      errdetail("Unexpected \"%c\" character.",
                                                '{')));
                     }
-                        parse_state = ARRAY_LEVEL_STARTED;
+                    parse_state = ARRAY_LEVEL_STARTED;
                     /* Nested sub-arrays count as elements of outer level */
                     if (nest_level > 0)
                         nelems[nest_level - 1]++;
                     /* Initialize element counting in the new level */
-                        if (nest_level >= MAXDIM)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                                     errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                                            nest_level + 1, MAXDIM)));
+                    if (nest_level >= MAXDIM)
+                        ereturn(escontext, -1,
+                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+                                        nest_level + 1, MAXDIM)));
                     nelems[nest_level] = 0;
-                        nest_level++;
-                        if (ndim < nest_level)
+                    nest_level++;
+                    if (ndim < nest_level)
                     {
                         /* Can't increase ndim once it's frozen */
                         if (ndim_frozen)
@@ -620,22 +619,22 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
                                      errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
-                            ndim = nest_level;
+                        ndim = nest_level;
                     }
                 }
-                    break;
-                case '}':
-                    if (!in_quotes)
-                    {
-                        /*
-                         * A right brace can occur after an element start, an
-                         * element completion, a quoted element completion, or
-                         * a level completion.  We also allow it after a level
-                         * start, that is an empty sub-array "{}" --- but that
-                         * freezes the number of dimensions and all such
-                         * sub-arrays must be at the same level, just like
-                         * sub-arrays containing elements.
-                         */
+                break;
+            case '}':
+                if (!in_quotes)
+                {
+                    /*
+                     * A right brace can occur after an element start, an
+                     * element completion, a quoted element completion, or a
+                     * level completion.  We also allow it after a level
+                     * start, that is an empty sub-array "{}" --- but that
+                     * freezes the number of dimensions and all such
+                     * sub-arrays must be at the same level, just like
+                     * sub-arrays containing elements.
+                     */
                     switch (parse_state)
                     {
                         case ARRAY_ELEM_STARTED:
@@ -647,9 +646,9 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                             /* empty sub-array: OK if at correct nest_level */
                             ndim_frozen = true;
                             if (nest_level != ndim)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
+                                ereturn(escontext, -1,
+                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                         errmsg("malformed array literal: \"%s\"", str),
                                          errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
                             break;
                         default:
@@ -662,7 +661,7 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                     parse_state = ARRAY_LEVEL_COMPLETED;
                     /* The parse state check assured we're in a level. */
                     Assert(nest_level > 0);
-                        nest_level--;
+                    nest_level--;

                     if (dim[nest_level] < 0)
                     {
@@ -672,51 +671,50 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                     else if (nelems[nest_level] != dim[nest_level])
                     {
                         /* Subsequent sub-arrays must have same length */
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
+                        ereturn(escontext, -1,
+                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str),
                                  errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
-                        }
+                    }
                     /* Done if this is the outermost level's '}' */
                     if (nest_level == 0)
                         eoArray = true;
+                }
+                break;
+            default:
+                if (!in_quotes)
+                {
+                    if (*ptr == typdelim)
+                    {
+                        /*
+                         * Delimiters can occur after an element start, a
+                         * quoted element completion, or a level completion.
+                         */
+                        if (parse_state != ARRAY_ELEM_STARTED &&
+                            parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+                            parse_state != ARRAY_LEVEL_COMPLETED)
+                            ereturn(escontext, -1,
+                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                     errmsg("malformed array literal: \"%s\"", str),
+                                     errdetail("Unexpected \"%c\" character.",
+                                               typdelim)));
+                        if (parse_state == ARRAY_LEVEL_COMPLETED)
+                            parse_state = ARRAY_LEVEL_DELIMITED;
+                        else
+                            parse_state = ARRAY_ELEM_DELIMITED;
                     }
-                    break;
-                default:
-                    if (!in_quotes)
+                    else if (!array_isspace(*ptr))
                     {
-                        if (*ptr == typdelim)
-                        {
-                            /*
-                             * Delimiters can occur after an element start, a
-                             * quoted element completion, or a level
-                             * completion.
-                             */
-                            if (parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                                parse_state != ARRAY_LEVEL_COMPLETED)
-                                ereturn(escontext, -1,
-                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                         errmsg("malformed array literal: \"%s\"", str),
-                                         errdetail("Unexpected \"%c\" character.",
-                                                   typdelim)));
-                            if (parse_state == ARRAY_LEVEL_COMPLETED)
-                                parse_state = ARRAY_LEVEL_DELIMITED;
-                            else
-                                parse_state = ARRAY_ELEM_DELIMITED;
-                        }
-                        else if (!array_isspace(*ptr))
-                        {
-                            /*
-                             * Other non-space characters must be after a
-                             * level start, after an element start, or after
-                             * an element delimiter. In any case we now must
-                             * be past an element start.
-                             *
-                             * If it's a space character, we can ignore it; it
-                             * might be data or not, but it doesn't change the
-                             * parsing state.
-                             */
+                        /*
+                         * Other non-space characters must be after a level
+                         * start, after an element start, or after an element
+                         * delimiter. In any case we now must be past an
+                         * element start.
+                         *
+                         * If it's a space character, we can ignore it; it
+                         * might be data or not, but it doesn't change the
+                         * parsing state.
+                         */
                         switch (parse_state)
                         {
                             case ARRAY_LEVEL_STARTED:
@@ -734,10 +732,10 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                          errmsg("malformed array literal: \"%s\"", str),
                                          errdetail("Unexpected array element.")));
                         }
-                        }
                     }
-                    break;
-            }
+                }
+                break;
+        }

         /* To reduce duplication, all new-element cases go through here. */
         if (new_element)
--
2.39.3

Re: Cleaning up array_in()

От

Tom Lane

Дата:

08 июля 2023 г., 19:08:52

I wrote:
> So I end up with the attached.  I went ahead and dropped
> ArrayGetOffset0() as part of 0001, and I split 0002 into two patches
> where the new 0002 avoids re-indenting any existing code in order
> to ease review, and then 0003 is just a mechanical application
> of pgindent.

That got sideswiped by ae6d06f09, so here's a trivial rebase to
pacify the cfbot.

            regards, tom lane

#text/x-diff; name="v3-0001-Simplify-and-speed-up-ReadArrayStr.patch"
[v3-0001-Simplify-and-speed-up-ReadArrayStr.patch]/home/tgl/pgsql/v3-0001-Simplify-and-speed-up-ReadArrayStr.patch 
#text/x-diff; name="v3-0002-Rewrite-ArrayCount-to-make-dimensionality-checks-.patch"
[v3-0002-Rewrite-ArrayCount-to-make-dimensionality-checks-.patch]
/home/tgl/pgsql/v3-0002-Rewrite-ArrayCount-to-make-dimensionality-checks-.patch
#text/x-diff; name="v3-0003-Re-indent-ArrayCount.patch" [v3-0003-Re-indent-ArrayCount.patch]
/home/tgl/pgsql/v3-0003-Re-indent-ArrayCount.patch

Re: Cleaning up array_in()

От

Tom Lane

Дата:

08 июля 2023 г., 22:34:51

I wrote:
> That got sideswiped by ae6d06f09, so here's a trivial rebase to
> pacify the cfbot.

Sigh, this time with patch.

            regards, tom lane

From 63ee707f6aa38043c0211c0c4a124fa5fe2ad016 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 8 Jul 2023 11:55:56 -0400
Subject: [PATCH v3 1/3] Simplify and speed up ReadArrayStr().

ReadArrayStr() seems to have been written on the assumption that
non-rectangular input is fine and it should pad with NULLs anywhere
that elements are missing.  We disallowed non-rectangular input
ages ago (commit 0e13d627b), but never simplified this function
as a follow-up.  In particular, the existing code recomputes each
element's linear location from scratch, which is quite unnecessary
for rectangular input: we can just assign the elements sequentially,
saving lots of arithmetic.  Add some more commentary while at it.

ArrayGetOffset0() is no longer used anywhere, so remove it.
---
 src/backend/utils/adt/arrayfuncs.c | 69 ++++++++++++++----------------
 src/backend/utils/adt/arrayutils.c | 15 -------
 src/include/utils/array.h          |  1 -
 3 files changed, 33 insertions(+), 52 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 4359dbd83d..967aff8cf9 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -93,7 +93,7 @@ typedef struct ArrayIteratorData
 static int    ArrayCount(const char *str, int *dim, char typdelim,
                        Node *escontext);
 static bool ReadArrayStr(char *arrayStr, const char *origStr,
-                         int nitems, int ndim, int *dim,
+                         int nitems,
                          FmgrInfo *inputproc, Oid typioparam, int32 typmod,
                          char typdelim,
                          int typlen, bool typbyval, char typalign,
@@ -391,7 +391,7 @@ array_in(PG_FUNCTION_ARGS)
     dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
     nullsPtr = (bool *) palloc(nitems * sizeof(bool));
     if (!ReadArrayStr(p, string,
-                      nitems, ndim, dim,
+                      nitems,
                       &my_extra->proc, typioparam, typmod,
                       typdelim,
                       typlen, typbyval, typalign,
@@ -436,7 +436,8 @@ array_in(PG_FUNCTION_ARGS)

 /*
  * ArrayCount
- *     Determines the dimensions for an array string.
+ *     Determines the dimensions for an array string.  This includes
+ *     syntax-checking the array structure decoration (braces and delimiters).
  *
  * Returns number of dimensions as function result.  The axis lengths are
  * returned in dim[], which must be of size MAXDIM.
@@ -683,16 +684,14 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 /*
  * ReadArrayStr :
  *     parses the array string pointed to by "arrayStr" and converts the values
- *     to internal format.  Unspecified elements are initialized to nulls.
- *     The array dimensions must already have been determined.
+ *     to internal format.  The array dimensions must have been determined,
+ *     and the case of an empty array must have been handled earlier.
  *
  * Inputs:
  *    arrayStr: the string to parse.
  *              CAUTION: the contents of "arrayStr" will be modified!
  *    origStr: the unmodified input string, used only in error messages.
  *    nitems: total number of array elements, as already determined.
- *    ndim: number of array dimensions
- *    dim[]: array axis lengths
  *    inputproc: type-specific input procedure for element datatype.
  *    typioparam, typmod: auxiliary values to pass to inputproc.
  *    typdelim: the value delimiter (type-specific).
@@ -717,8 +716,6 @@ static bool
 ReadArrayStr(char *arrayStr,
              const char *origStr,
              int nitems,
-             int ndim,
-             int *dim,
              FmgrInfo *inputproc,
              Oid typioparam,
              int32 typmod,
@@ -732,20 +729,13 @@ ReadArrayStr(char *arrayStr,
              int32 *nbytes,
              Node *escontext)
 {
-    int            i,
+    int            i = 0,
                 nest_level = 0;
     char       *srcptr;
     bool        in_quotes = false;
     bool        eoArray = false;
     bool        hasnull;
     int32        totbytes;
-    int            indx[MAXDIM] = {0},
-                prod[MAXDIM];
-
-    mda_get_prod(ndim, dim, prod);
-
-    /* Initialize is-null markers to true */
-    memset(nulls, true, nitems * sizeof(bool));

     /*
      * We have to remove " and \ characters to create a clean item value to
@@ -768,11 +758,20 @@ ReadArrayStr(char *arrayStr,
         bool        itemdone = false;
         bool        leadingspace = true;
         bool        hasquoting = false;
-        char       *itemstart;
-        char       *dstptr;
-        char       *dstendptr;
+        char       *itemstart;    /* start of de-escaped text */
+        char       *dstptr;        /* next output point for de-escaped text */
+        char       *dstendptr;    /* last significant output char + 1 */

-        i = -1;
+        /*
+         * Parse next array element, collecting the de-escaped text into
+         * itemstart..dstendptr-1.
+         *
+         * Notice that we do not set "itemdone" until we see a separator
+         * (typdelim character) or the array's final right brace.  Since the
+         * array is already verified to be nonempty and rectangular, there is
+         * guaranteed to be another element to be processed in the first case,
+         * while in the second case of course we'll exit the outer loop.
+         */
         itemstart = dstptr = dstendptr = srcptr;

         while (!itemdone)
@@ -819,13 +818,7 @@ ReadArrayStr(char *arrayStr,
                 case '{':
                     if (!in_quotes)
                     {
-                        if (nest_level >= ndim)
-                            ereturn(escontext, false,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"",
-                                            origStr)));
                         nest_level++;
-                        indx[nest_level - 1] = 0;
                         srcptr++;
                     }
                     else
@@ -839,14 +832,9 @@ ReadArrayStr(char *arrayStr,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"",
                                             origStr)));
-                        if (i == -1)
-                            i = ArrayGetOffset0(ndim, indx, prod);
-                        indx[nest_level - 1] = 0;
                         nest_level--;
                         if (nest_level == 0)
                             eoArray = itemdone = true;
-                        else
-                            indx[nest_level - 1]++;
                         srcptr++;
                     }
                     else
@@ -857,10 +845,7 @@ ReadArrayStr(char *arrayStr,
                         *dstptr++ = *srcptr++;
                     else if (*srcptr == typdelim)
                     {
-                        if (i == -1)
-                            i = ArrayGetOffset0(ndim, indx, prod);
                         itemdone = true;
-                        indx[ndim - 1]++;
                         srcptr++;
                     }
                     else if (scanner_isspace(*srcptr))
@@ -884,15 +869,18 @@ ReadArrayStr(char *arrayStr,
             }
         }

+        /* Terminate de-escaped string */
         Assert(dstptr < srcptr);
         *dstendptr = '\0';

-        if (i < 0 || i >= nitems)
+        /* Safety check that we don't write past the output arrays */
+        if (i >= nitems)
             ereturn(escontext, false,
                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                      errmsg("malformed array literal: \"%s\"",
                             origStr)));

+        /* Convert the de-escaped string into the next output array entries */
         if (Array_nulls && !hasquoting &&
             pg_strcasecmp(itemstart, "NULL") == 0)
         {
@@ -913,8 +901,17 @@ ReadArrayStr(char *arrayStr,
                 return false;
             nulls[i] = false;
         }
+
+        i++;
     }

+    /* Cross-check that we filled all the output array entries */
+    if (i != nitems)
+        ereturn(escontext, false,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("malformed array literal: \"%s\"",
+                        origStr)));
+
     /*
      * Check for nulls, compute total data space needed
      */
diff --git a/src/backend/utils/adt/arrayutils.c b/src/backend/utils/adt/arrayutils.c
index 62152f79f5..b8bccc7cca 100644
--- a/src/backend/utils/adt/arrayutils.c
+++ b/src/backend/utils/adt/arrayutils.c
@@ -43,21 +43,6 @@ ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx)
     return offset;
 }

-/*
- * Same, but subscripts are assumed 0-based, and use a scale array
- * instead of raw dimension data (see mda_get_prod to create scale array)
- */
-int
-ArrayGetOffset0(int n, const int *tup, const int *scale)
-{
-    int            i,
-                lin = 0;
-
-    for (i = 0; i < n; i++)
-        lin += tup[i] * scale[i];
-    return lin;
-}
-
 /*
  * Convert array dimensions into number of elements
  *
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index b13dfb345e..d49adc38b7 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -448,7 +448,6 @@ extern void array_free_iterator(ArrayIterator iterator);
  */

 extern int    ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx);
-extern int    ArrayGetOffset0(int n, const int *tup, const int *scale);
 extern int    ArrayGetNItems(int ndim, const int *dims);
 extern int    ArrayGetNItemsSafe(int ndim, const int *dims,
                                struct Node *escontext);
--
2.39.3

From b5a5efbd3f97f0e1cf2e16f5b2f057218b9f051b Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 8 Jul 2023 11:59:20 -0400
Subject: [PATCH v3 2/3] Rewrite ArrayCount() to make dimensionality checks
 more complete.

ArrayCount has had a few layers of revision over the years,
reaching a point where it's really quite baroque and unintelligible.
There are actually four different arrays holding dimensions, which
can be reduced to two with little effort.  It was also close to
impossible to figure out why the dimension-counting actually worked
correctly.  Rewrite to make it perhaps a little less opaque.
In particular, a new element or subarray is now counted when we see
the start of the element or subarray, not at some later point where
we might not even be at the same nest_level anymore.

Also, install guards to catch non-rectangularity cases that were
previously missed, along the same lines as recent plperl and plpython
fixes: we were not checking that all scalar elements appear at the
same nesting depth.  Thanks to the very different logic, this seems
not to have caused any of the sort of internal errors that the PLs
suffered from, but it's still wrong.

On the other hand, the behavior of plperl and plpython suggests
that we should allow empty sub-arrays, as in '{{},{}}'; those
languages interpret equivalent constructs such as '[[],[]]' as
an empty (hence zero-dimensional) array, and it seems unclear
why array_in should not.  Remove the hack that was installed to
reject that case.

Also, fix things so that it's possible to write an empty array
with explicit bounds spec, along the lines of '[1:0]={}'.
Before, that was rejected with a complaint about upper bound
less than lower, but I don't see a good reason not to allow it.

In passing, remove ArrayParseState.ARRAY_ELEM_COMPLETED, a state
that the state machine never entered yet wasted cycles checking for.

(This patch looks a bit odd because I tried to avoid reindenting
existing code, despite having removed one level of loop.  That
seemed to me to make it easier to read the code changes.  A
follow-up pgindent run is needed.)
---
 src/backend/utils/adt/arrayfuncs.c   | 241 +++++++++++++++++----------
 src/test/regress/expected/arrays.out |  68 +++++++-
 src/test/regress/sql/arrays.sql      |  13 +-
 3 files changed, 230 insertions(+), 92 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 967aff8cf9..3f8a3af689 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -58,7 +58,6 @@ typedef enum
     ARRAY_NO_LEVEL,
     ARRAY_LEVEL_STARTED,
     ARRAY_ELEM_STARTED,
-    ARRAY_ELEM_COMPLETED,
     ARRAY_QUOTED_ELEM_STARTED,
     ARRAY_QUOTED_ELEM_COMPLETED,
     ARRAY_ELEM_DELIMITED,
@@ -302,10 +301,18 @@ array_in(PG_FUNCTION_ARGS)
         *q = '\0';
         ub = atoi(p);
         p = q + 1;
-        if (ub < lBound[ndim])
+
+        /* Upper bound of INT_MAX is disallowed, cf ArrayCheckBounds() */
+        if (ub == INT_MAX)
+            ereturn(escontext, (Datum) 0,
+                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                     errmsg("array upper bound is too large: %d",
+                            ub)));
+        /* Now it's safe to compute ub + 1 */
+        if (ub + 1 < lBound[ndim])
             ereturn(escontext, (Datum) 0,
                     (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-                     errmsg("upper bound cannot be less than lower bound")));
+                     errmsg("upper bound cannot be less than lower bound minus one")));

         dim[ndim] = ub - lBound[ndim] + 1;
         ndim++;
@@ -440,7 +447,9 @@ array_in(PG_FUNCTION_ARGS)
  *     syntax-checking the array structure decoration (braces and delimiters).
  *
  * Returns number of dimensions as function result.  The axis lengths are
- * returned in dim[], which must be of size MAXDIM.
+ * returned in dim[], which must be of size MAXDIM.  This function does not
+ * special-case empty arrays: it will return a positive result with some
+ * zero axis length(s).
  *
  * If we detect an error, fill *escontext with error details and return -1
  * (unless escontext isn't provided, in which case errors will be thrown).
@@ -451,31 +460,22 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
     int            nest_level = 0,
                 i;
     int            ndim = 1,
-                temp[MAXDIM],
-                nelems[MAXDIM],
-                nelems_last[MAXDIM];
+                nelems[MAXDIM];
+    bool        ndim_frozen = false;
     bool        in_quotes = false;
     bool        eoArray = false;
-    bool        empty_array = true;
     const char *ptr;
     ArrayParseState parse_state = ARRAY_NO_LEVEL;

+    /* Initialize dim[] entries to -1 meaning "unknown" */
     for (i = 0; i < MAXDIM; ++i)
-    {
-        temp[i] = dim[i] = nelems_last[i] = 0;
-        nelems[i] = 1;
-    }
+        dim[i] = -1;

+    /* Scan string until we reach closing brace */
     ptr = str;
     while (!eoArray)
     {
-        bool        itemdone = false;
-
-        while (!itemdone)
-        {
-            if (parse_state == ARRAY_ELEM_STARTED ||
-                parse_state == ARRAY_QUOTED_ELEM_STARTED)
-                empty_array = false;
+        bool        new_element = false;

             switch (*ptr)
             {
@@ -492,17 +492,25 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                      * start, or after an element delimiter. In any case we
                      * now must be past an element start.
                      */
-                    if (parse_state != ARRAY_LEVEL_STARTED &&
-                        parse_state != ARRAY_ELEM_STARTED &&
-                        parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                        parse_state != ARRAY_ELEM_DELIMITED)
+                switch (parse_state)
+                {
+                    case ARRAY_LEVEL_STARTED:
+                    case ARRAY_ELEM_DELIMITED:
+                        /* start new unquoted element */
+                        parse_state = ARRAY_ELEM_STARTED;
+                        new_element = true;
+                        break;
+                    case ARRAY_ELEM_STARTED:
+                    case ARRAY_QUOTED_ELEM_STARTED:
+                        /* already in element */
+                        break;
+                    default:
                         ereturn(escontext, -1,
                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                  errmsg("malformed array literal: \"%s\"", str),
                                  errdetail("Unexpected \"%c\" character.",
                                            '\\')));
-                    if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
-                        parse_state = ARRAY_ELEM_STARTED;
+                }
                     /* skip the escaped character */
                     if (*(ptr + 1))
                         ptr++;
@@ -519,18 +527,28 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                      * element start, or after an element delimiter. In any
                      * case we now must be past an element start.
                      */
-                    if (parse_state != ARRAY_LEVEL_STARTED &&
-                        parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                        parse_state != ARRAY_ELEM_DELIMITED)
+                switch (parse_state)
+                {
+                    case ARRAY_LEVEL_STARTED:
+                    case ARRAY_ELEM_DELIMITED:
+                        /* start new quoted element */
+                        Assert(!in_quotes);
+                        in_quotes = true;
+                        parse_state = ARRAY_QUOTED_ELEM_STARTED;
+                        new_element = true;
+                        break;
+                    case ARRAY_QUOTED_ELEM_STARTED:
+                        /* already in element, end it */
+                        Assert(in_quotes);
+                        in_quotes = false;
+                        parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
+                        break;
+                    default:
                         ereturn(escontext, -1,
                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                  errmsg("malformed array literal: \"%s\"", str),
                                  errdetail("Unexpected array element.")));
-                    in_quotes = !in_quotes;
-                    if (in_quotes)
-                        parse_state = ARRAY_QUOTED_ELEM_STARTED;
-                    else
-                        parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
+                }
                     break;
                 case '{':
                     if (!in_quotes)
@@ -538,27 +556,52 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                         /*
                          * A left brace can occur if no nesting has occurred
                          * yet, after a level start, or after a level
-                         * delimiter.
+                         * delimiter.  If we see one after an element
+                         * delimiter, we have something like "{1,{2}}", so
+                         * complain about non-rectangularity.
                          */
-                        if (parse_state != ARRAY_NO_LEVEL &&
-                            parse_state != ARRAY_LEVEL_STARTED &&
-                            parse_state != ARRAY_LEVEL_DELIMITED)
+                    switch (parse_state)
+                    {
+                        case ARRAY_NO_LEVEL:
+                        case ARRAY_LEVEL_STARTED:
+                        case ARRAY_LEVEL_DELIMITED:
+                            /* okay */
+                            break;
+                        case ARRAY_ELEM_DELIMITED:
+                            ereturn(escontext, -1,
+                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                     errmsg("malformed array literal: \"%s\"", str),
+                                     errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
+                        default:
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
                                      errdetail("Unexpected \"%c\" character.",
                                                '{')));
+                    }
                         parse_state = ARRAY_LEVEL_STARTED;
+                    /* Nested sub-arrays count as elements of outer level */
+                    if (nest_level > 0)
+                        nelems[nest_level - 1]++;
+                    /* Initialize element counting in the new level */
                         if (nest_level >= MAXDIM)
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                      errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
                                             nest_level + 1, MAXDIM)));
-                        temp[nest_level] = 0;
+                    nelems[nest_level] = 0;
                         nest_level++;
                         if (ndim < nest_level)
+                    {
+                        /* Can't increase ndim once it's frozen */
+                        if (ndim_frozen)
+                            ereturn(escontext, -1,
+                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                     errmsg("malformed array literal: \"%s\"", str),
+                                     errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
                             ndim = nest_level;
                     }
+                }
                     break;
                 case '}':
                     if (!in_quotes)
@@ -566,46 +609,56 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                         /*
                          * A right brace can occur after an element start, an
                          * element completion, a quoted element completion, or
-                         * a level completion.
+                         * a level completion.  We also allow it after a level
+                         * start, that is an empty sub-array "{}" --- but that
+                         * freezes the number of dimensions and all such
+                         * sub-arrays must be at the same level, just like
+                         * sub-arrays containing elements.
                          */
-                        if (parse_state != ARRAY_ELEM_STARTED &&
-                            parse_state != ARRAY_ELEM_COMPLETED &&
-                            parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                            parse_state != ARRAY_LEVEL_COMPLETED &&
-                            !(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED))
+                    switch (parse_state)
+                    {
+                        case ARRAY_ELEM_STARTED:
+                        case ARRAY_QUOTED_ELEM_COMPLETED:
+                        case ARRAY_LEVEL_COMPLETED:
+                            /* okay */
+                            break;
+                        case ARRAY_LEVEL_STARTED:
+                            /* empty sub-array: OK if at correct nest_level */
+                            ndim_frozen = true;
+                            if (nest_level != ndim)
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Unexpected \"%c\" character.",
-                                               '}')));
-                        parse_state = ARRAY_LEVEL_COMPLETED;
-                        if (nest_level == 0)
+                                         errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
+                            break;
+                        default:
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Unmatched \"%c\" character.", '}')));
+                                     errdetail("Unexpected \"%c\" character.",
+                                               '}')));
+                    }
+                    parse_state = ARRAY_LEVEL_COMPLETED;
+                    /* The parse state check assured we're in a level. */
+                    Assert(nest_level > 0);
                         nest_level--;

-                        if (nelems_last[nest_level] != 0 &&
-                            nelems[nest_level] != nelems_last[nest_level])
+                    if (dim[nest_level] < 0)
+                    {
+                        /* Save length of first sub-array of this level */
+                        dim[nest_level] = nelems[nest_level];
+                    }
+                    else if (nelems[nest_level] != dim[nest_level])
+                    {
+                        /* Subsequent sub-arrays must have same length */
                             ereturn(escontext, -1,
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Multidimensional arrays must have "
-                                               "sub-arrays with matching "
-                                               "dimensions.")));
-                        nelems_last[nest_level] = nelems[nest_level];
-                        nelems[nest_level] = 1;
-                        if (nest_level == 0)
-                            eoArray = itemdone = true;
-                        else
-                        {
-                            /*
-                             * We don't set itemdone here; see comments in
-                             * ReadArrayStr
-                             */
-                            temp[nest_level - 1]++;
+                                 errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
                         }
+                    /* Done if this is the outermost level's '}' */
+                    if (nest_level == 0)
+                        eoArray = true;
                     }
                     break;
                 default:
@@ -614,12 +667,11 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                         if (*ptr == typdelim)
                         {
                             /*
-                             * Delimiters can occur after an element start, an
-                             * element completion, a quoted element
-                             * completion, or a level completion.
+                             * Delimiters can occur after an element start, a
+                             * quoted element completion, or a level
+                             * completion.
                              */
                             if (parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_ELEM_COMPLETED &&
                                 parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
                                 parse_state != ARRAY_LEVEL_COMPLETED)
                                 ereturn(escontext, -1,
@@ -631,8 +683,6 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                 parse_state = ARRAY_LEVEL_DELIMITED;
                             else
                                 parse_state = ARRAY_ELEM_DELIMITED;
-                            itemdone = true;
-                            nelems[nest_level - 1]++;
                         }
                         else if (!scanner_isspace(*ptr))
                         {
@@ -641,23 +691,51 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                              * level start, after an element start, or after
                              * an element delimiter. In any case we now must
                              * be past an element start.
+                             *
+                             * If it's a space character, we can ignore it; it
+                             * might be data or not, but it doesn't change the
+                             * parsing state.
                              */
-                            if (parse_state != ARRAY_LEVEL_STARTED &&
-                                parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_ELEM_DELIMITED)
+                        switch (parse_state)
+                        {
+                            case ARRAY_LEVEL_STARTED:
+                            case ARRAY_ELEM_DELIMITED:
+                                /* start new unquoted element */
+                                parse_state = ARRAY_ELEM_STARTED;
+                                new_element = true;
+                                break;
+                            case ARRAY_ELEM_STARTED:
+                                /* already in element */
+                                break;
+                            default:
                                 ereturn(escontext, -1,
                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                          errmsg("malformed array literal: \"%s\"", str),
                                          errdetail("Unexpected array element.")));
-                            parse_state = ARRAY_ELEM_STARTED;
+                        }
                         }
                     }
                     break;
             }
-            if (!itemdone)
-                ptr++;
+
+        /* To reduce duplication, all new-element cases go through here. */
+        if (new_element)
+        {
+            /*
+             * Once we have found an element, the number of dimensions can no
+             * longer increase, and subsequent elements must all be at the
+             * same nesting depth.
+             */
+            ndim_frozen = true;
+            if (nest_level != ndim)
+                ereturn(escontext, -1,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", str),
+                         errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
+            /* Count the new element */
+            nelems[nest_level - 1]++;
         }
-        temp[ndim - 1]++;
+
         ptr++;
     }

@@ -671,13 +749,6 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                      errdetail("Junk after closing right brace.")));
     }

-    /* special case for an empty array */
-    if (empty_array)
-        return 0;
-
-    for (i = 0; i < ndim; ++i)
-        dim[i] = temp[i];
-
     return ndim;
 }

diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 7064391468..26d4281d94 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1475,12 +1475,7 @@ select '{{1,{2}},{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,{2}},{2,3}}"
 LINE 1: select '{{1,{2}},{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "{" character.
-select '{{},{}}'::text[];
-ERROR:  malformed array literal: "{{},{}}"
-LINE 1: select '{{},{}}'::text[];
-               ^
-DETAIL:  Unexpected "}" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
 select E'{{1,2},\\{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,2},\{2,3}}"
 LINE 1: select E'{{1,2},\\{2,3}}'::text[];
@@ -1501,6 +1496,49 @@ ERROR:  malformed array literal: "{ }}"
 LINE 1: select '{ }}'::text[];
                ^
 DETAIL:  Junk after closing right brace.
+select '{{1},{{2}}}'::text[];
+ERROR:  malformed array literal: "{{1},{{2}}}"
+LINE 1: select '{{1},{{2}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{1}},{2}}'::text[];
+ERROR:  malformed array literal: "{{{1}},{2}}"
+LINE 1: select '{{{1}},{2}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{{}}}'::text[];
+ERROR:  malformed array literal: "{{},{{}}}"
+LINE 1: select '{{},{{}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{}},{}}'::text[];
+ERROR:  malformed array literal: "{{{}},{}}"
+LINE 1: select '{{{}},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{1},{}}'::text[];
+ERROR:  malformed array literal: "{{1},{}}"
+LINE 1: select '{{1},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{1}}'::text[];
+ERROR:  malformed array literal: "{{},{1}}"
+LINE 1: select '{{},{1}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '[2147483646:2147483647]={1,2}'::int[];
+ERROR:  array upper bound is too large: 2147483647
+LINE 1: select '[2147483646:2147483647]={1,2}'::int[];
+               ^
+select '[1:-1]={}'::int[];
+ERROR:  upper bound cannot be less than lower bound minus one
+LINE 1: select '[1:-1]={}'::int[];
+               ^
+select '[1:0]={1}'::int[];
+ERROR:  malformed array literal: "[1:0]={1}"
+LINE 1: select '[1:0]={1}'::int[];
+               ^
+DETAIL:  Specified array dimensions do not match array contents.
 select array[];
 ERROR:  cannot determine type of empty array
 LINE 1: select array[];
@@ -1514,6 +1552,12 @@ select '{}'::text[];
  {}
 (1 row)

+select '{{},{}}'::text[];
+ text
+------
+ {}
+(1 row)
+
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
                      text
 -----------------------------------------------
@@ -1559,6 +1603,18 @@ select '[0:1]={1.1,2.2}'::float8[];
  [0:1]={1.1,2.2}
 (1 row)

+select '[2147483646:2147483646]={1}'::int[];
+            int4
+-----------------------------
+ [2147483646:2147483646]={1}
+(1 row)
+
+select '[1:0]={}'::int[];
+ int4
+------
+ {}
+(1 row)
+
 -- all of the above should be accepted
 -- tests for array aggregates
 CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]);
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index f1375621e0..b7e2b180cc 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -454,16 +454,25 @@ select 'foo' ilike all (array['F%', '%O']); -- t

 -- none of the following should be accepted
 select '{{1,{2}},{2,3}}'::text[];
-select '{{},{}}'::text[];
 select E'{{1,2},\\{2,3}}'::text[];
 select '{{"1 2" x},{3}}'::text[];
 select '{}}'::text[];
 select '{ }}'::text[];
+select '{{1},{{2}}}'::text[];
+select '{{{1}},{2}}'::text[];
+select '{{},{{}}}'::text[];
+select '{{{}},{}}'::text[];
+select '{{1},{}}'::text[];
+select '{{},{1}}'::text[];
+select '[2147483646:2147483647]={1,2}'::int[];
+select '[1:-1]={}'::int[];
+select '[1:0]={1}'::int[];
 select array[];
 -- none of the above should be accepted

 -- all of the following should be accepted
 select '{}'::text[];
+select '{{},{}}'::text[];
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
 select '{0 second  ,0 second}'::interval[];
 select '{ { "," } , { 3 } }'::text[];
@@ -474,6 +483,8 @@ select '{
          }'::interval[];
 select array[]::text[];
 select '[0:1]={1.1,2.2}'::float8[];
+select '[2147483646:2147483646]={1}'::int[];
+select '[1:0]={}'::int[];
 -- all of the above should be accepted

 -- tests for array aggregates
--
2.39.3

From 90cbfe46315369888135e091028344a9d70351be Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 8 Jul 2023 12:03:27 -0400
Subject: [PATCH v3 3/3] Re-indent ArrayCount().

This cleans up after the removal of the "while (!itemdone)" loop.
---
 src/backend/utils/adt/arrayfuncs.c | 214 ++++++++++++++---------------
 1 file changed, 106 insertions(+), 108 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 3f8a3af689..c96fd46917 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -477,21 +477,21 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
     {
         bool        new_element = false;

-            switch (*ptr)
-            {
-                case '\0':
-                    /* Signal a premature end of the string */
-                    ereturn(escontext, -1,
-                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                             errmsg("malformed array literal: \"%s\"", str),
-                             errdetail("Unexpected end of input.")));
-                case '\\':
+        switch (*ptr)
+        {
+            case '\0':
+                /* Signal a premature end of the string */
+                ereturn(escontext, -1,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", str),
+                         errdetail("Unexpected end of input.")));
+            case '\\':

-                    /*
-                     * An escape must be after a level start, after an element
-                     * start, or after an element delimiter. In any case we
-                     * now must be past an element start.
-                     */
+                /*
+                 * An escape must be after a level start, after an element
+                 * start, or after an element delimiter. In any case we now
+                 * must be past an element start.
+                 */
                 switch (parse_state)
                 {
                     case ARRAY_LEVEL_STARTED:
@@ -511,22 +511,22 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                  errdetail("Unexpected \"%c\" character.",
                                            '\\')));
                 }
-                    /* skip the escaped character */
-                    if (*(ptr + 1))
-                        ptr++;
-                    else
-                        ereturn(escontext, -1,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"", str),
-                                 errdetail("Unexpected end of input.")));
-                    break;
-                case '"':
+                /* skip the escaped character */
+                if (*(ptr + 1))
+                    ptr++;
+                else
+                    ereturn(escontext, -1,
+                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", str),
+                             errdetail("Unexpected end of input.")));
+                break;
+            case '"':

-                    /*
-                     * A quote must be after a level start, after a quoted
-                     * element start, or after an element delimiter. In any
-                     * case we now must be past an element start.
-                     */
+                /*
+                 * A quote must be after a level start, after a quoted element
+                 * start, or after an element delimiter. In any case we now
+                 * must be past an element start.
+                 */
                 switch (parse_state)
                 {
                     case ARRAY_LEVEL_STARTED:
@@ -549,17 +549,16 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                  errmsg("malformed array literal: \"%s\"", str),
                                  errdetail("Unexpected array element.")));
                 }
-                    break;
-                case '{':
-                    if (!in_quotes)
-                    {
-                        /*
-                         * A left brace can occur if no nesting has occurred
-                         * yet, after a level start, or after a level
-                         * delimiter.  If we see one after an element
-                         * delimiter, we have something like "{1,{2}}", so
-                         * complain about non-rectangularity.
-                         */
+                break;
+            case '{':
+                if (!in_quotes)
+                {
+                    /*
+                     * A left brace can occur if no nesting has occurred yet,
+                     * after a level start, or after a level delimiter.  If we
+                     * see one after an element delimiter, we have something
+                     * like "{1,{2}}", so complain about non-rectangularity.
+                     */
                     switch (parse_state)
                     {
                         case ARRAY_NO_LEVEL:
@@ -579,19 +578,19 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                      errdetail("Unexpected \"%c\" character.",
                                                '{')));
                     }
-                        parse_state = ARRAY_LEVEL_STARTED;
+                    parse_state = ARRAY_LEVEL_STARTED;
                     /* Nested sub-arrays count as elements of outer level */
                     if (nest_level > 0)
                         nelems[nest_level - 1]++;
                     /* Initialize element counting in the new level */
-                        if (nest_level >= MAXDIM)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                                     errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                                            nest_level + 1, MAXDIM)));
+                    if (nest_level >= MAXDIM)
+                        ereturn(escontext, -1,
+                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                 errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+                                        nest_level + 1, MAXDIM)));
                     nelems[nest_level] = 0;
-                        nest_level++;
-                        if (ndim < nest_level)
+                    nest_level++;
+                    if (ndim < nest_level)
                     {
                         /* Can't increase ndim once it's frozen */
                         if (ndim_frozen)
@@ -599,22 +598,22 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                      errmsg("malformed array literal: \"%s\"", str),
                                      errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
-                            ndim = nest_level;
+                        ndim = nest_level;
                     }
                 }
-                    break;
-                case '}':
-                    if (!in_quotes)
-                    {
-                        /*
-                         * A right brace can occur after an element start, an
-                         * element completion, a quoted element completion, or
-                         * a level completion.  We also allow it after a level
-                         * start, that is an empty sub-array "{}" --- but that
-                         * freezes the number of dimensions and all such
-                         * sub-arrays must be at the same level, just like
-                         * sub-arrays containing elements.
-                         */
+                break;
+            case '}':
+                if (!in_quotes)
+                {
+                    /*
+                     * A right brace can occur after an element start, an
+                     * element completion, a quoted element completion, or a
+                     * level completion.  We also allow it after a level
+                     * start, that is an empty sub-array "{}" --- but that
+                     * freezes the number of dimensions and all such
+                     * sub-arrays must be at the same level, just like
+                     * sub-arrays containing elements.
+                     */
                     switch (parse_state)
                     {
                         case ARRAY_ELEM_STARTED:
@@ -626,9 +625,9 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                             /* empty sub-array: OK if at correct nest_level */
                             ndim_frozen = true;
                             if (nest_level != ndim)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
+                                ereturn(escontext, -1,
+                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                         errmsg("malformed array literal: \"%s\"", str),
                                          errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
                             break;
                         default:
@@ -641,7 +640,7 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                     parse_state = ARRAY_LEVEL_COMPLETED;
                     /* The parse state check assured we're in a level. */
                     Assert(nest_level > 0);
-                        nest_level--;
+                    nest_level--;

                     if (dim[nest_level] < 0)
                     {
@@ -651,51 +650,50 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                     else if (nelems[nest_level] != dim[nest_level])
                     {
                         /* Subsequent sub-arrays must have same length */
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
+                        ereturn(escontext, -1,
+                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", str),
                                  errdetail("Multidimensional arrays must have sub-arrays with matching
dimensions.")));
-                        }
+                    }
                     /* Done if this is the outermost level's '}' */
                     if (nest_level == 0)
                         eoArray = true;
+                }
+                break;
+            default:
+                if (!in_quotes)
+                {
+                    if (*ptr == typdelim)
+                    {
+                        /*
+                         * Delimiters can occur after an element start, a
+                         * quoted element completion, or a level completion.
+                         */
+                        if (parse_state != ARRAY_ELEM_STARTED &&
+                            parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+                            parse_state != ARRAY_LEVEL_COMPLETED)
+                            ereturn(escontext, -1,
+                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                     errmsg("malformed array literal: \"%s\"", str),
+                                     errdetail("Unexpected \"%c\" character.",
+                                               typdelim)));
+                        if (parse_state == ARRAY_LEVEL_COMPLETED)
+                            parse_state = ARRAY_LEVEL_DELIMITED;
+                        else
+                            parse_state = ARRAY_ELEM_DELIMITED;
                     }
-                    break;
-                default:
-                    if (!in_quotes)
+                    else if (!scanner_isspace(*ptr))
                     {
-                        if (*ptr == typdelim)
-                        {
-                            /*
-                             * Delimiters can occur after an element start, a
-                             * quoted element completion, or a level
-                             * completion.
-                             */
-                            if (parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                                parse_state != ARRAY_LEVEL_COMPLETED)
-                                ereturn(escontext, -1,
-                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                         errmsg("malformed array literal: \"%s\"", str),
-                                         errdetail("Unexpected \"%c\" character.",
-                                                   typdelim)));
-                            if (parse_state == ARRAY_LEVEL_COMPLETED)
-                                parse_state = ARRAY_LEVEL_DELIMITED;
-                            else
-                                parse_state = ARRAY_ELEM_DELIMITED;
-                        }
-                        else if (!scanner_isspace(*ptr))
-                        {
-                            /*
-                             * Other non-space characters must be after a
-                             * level start, after an element start, or after
-                             * an element delimiter. In any case we now must
-                             * be past an element start.
-                             *
-                             * If it's a space character, we can ignore it; it
-                             * might be data or not, but it doesn't change the
-                             * parsing state.
-                             */
+                        /*
+                         * Other non-space characters must be after a level
+                         * start, after an element start, or after an element
+                         * delimiter. In any case we now must be past an
+                         * element start.
+                         *
+                         * If it's a space character, we can ignore it; it
+                         * might be data or not, but it doesn't change the
+                         * parsing state.
+                         */
                         switch (parse_state)
                         {
                             case ARRAY_LEVEL_STARTED:
@@ -713,10 +711,10 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                          errmsg("malformed array literal: \"%s\"", str),
                                          errdetail("Unexpected array element.")));
                         }
-                        }
                     }
-                    break;
-            }
+                }
+                break;
+        }

         /* To reduce duplication, all new-element cases go through here. */
         if (new_element)
--
2.39.3

Re: Cleaning up array_in()

От

Heikki Linnakangas

Дата:

08 июля 2023 г., 22:38:31

On 08/07/2023 19:08, Tom Lane wrote:
> I wrote:
>> So I end up with the attached.  I went ahead and dropped
>> ArrayGetOffset0() as part of 0001, and I split 0002 into two patches
>> where the new 0002 avoids re-indenting any existing code in order
>> to ease review, and then 0003 is just a mechanical application
>> of pgindent.
> 
> That got sideswiped by ae6d06f09, so here's a trivial rebase to
> pacify the cfbot.
> 
> #text/x-diff; name="v3-0001-Simplify-and-speed-up-ReadArrayStr.patch"
[v3-0001-Simplify-and-speed-up-ReadArrayStr.patch]/home/tgl/pgsql/v3-0001-Simplify-and-speed-up-ReadArrayStr.patch

> #text/x-diff; name="v3-0002-Rewrite-ArrayCount-to-make-dimensionality-checks-.patch"
[v3-0002-Rewrite-ArrayCount-to-make-dimensionality-checks-.patch]
/home/tgl/pgsql/v3-0002-Rewrite-ArrayCount-to-make-dimensionality-checks-.patch
> #text/x-diff; name="v3-0003-Re-indent-ArrayCount.patch" [v3-0003-Re-indent-ArrayCount.patch]
/home/tgl/pgsql/v3-0003-Re-indent-ArrayCount.patch

Something's wrong with your attachments.

Hmm, I wonder if ae6d06f09 had a negative performance impact. In an 
unquoted array element, scanner_isspace() function is called for every 
character, so it might be worth inlining.

On the patches: They are a clear improvement, thanks for that. That 
said, I still find the logic very hard to follow, and there are some 
obvious performance optimizations that could be made.

ArrayCount() interprets low-level quoting and escaping, and tracks the 
dimensions at the same time. The state machine is pretty complicated. 
And when you've finally finished reading and grokking that function, you 
see that ReadArrayStr() repeats most of the same logic. Ugh.

I spent some time today refactoring it for readability and speed. I 
introduced a separate helper function to tokenize the input. It deals 
with whitespace, escapes, and backslashes. Then I merged ArrayCount() 
and ReadArrayStr() into one function that parses the elements and 
determines the dimensions in one pass. That speeds up parsing large 
arrays. With the tokenizer function, the logic in ReadArrayStr() is 
still quite readable, even though it's now checking the dimensions at 
the same time.

I also noticed that we used atoi() to parse the integers in the 
dimensions, which doesn't do much error checking. Some funny cases were 
accepted because of that, for example:

postgres=# select '[1+-+-+-+-+-+:2]={foo,bar}'::text[];
    text
-----------
  {foo,bar}
(1 row)

I tightened that up in the passing.

Attached are your patches, rebased to fix the conflicts with ae6d06f09 
like you intended. On top of that, my patches. My patches need more 
testing, benchmarking, and review, so if we want to sneak something into 
v16, better go with just your patches. If we're tightening up the 
accepted inputs, maybe fix that atoi() sloppiness, though.

-- 
Heikki Linnakangas
Neon (https://neon.tech)

Вложения

Re: Cleaning up array_in()

От

Tom Lane

Дата:

08 июля 2023 г., 22:49:31

Heikki Linnakangas <hlinnaka@iki.fi> writes:
> On 08/07/2023 19:08, Tom Lane wrote:
>> That got sideswiped by ae6d06f09, so here's a trivial rebase to
>> pacify the cfbot.

> Something's wrong with your attachments.

Yeah, I forgot to run mhbuild :-(

> I spent some time today refactoring it for readability and speed. I 
> introduced a separate helper function to tokenize the input. It deals 
> with whitespace, escapes, and backslashes. Then I merged ArrayCount() 
> and ReadArrayStr() into one function that parses the elements and 
> determines the dimensions in one pass. That speeds up parsing large 
> arrays. With the tokenizer function, the logic in ReadArrayStr() is 
> still quite readable, even though it's now checking the dimensions at 
> the same time.

Oh, thanks for taking a look!

> I also noticed that we used atoi() to parse the integers in the 
> dimensions, which doesn't do much error checking.

Yup, I'd noticed that too but not gotten around to doing anything
about it.  I agree with nailing it down better as long as we're
tightening things in this area.

> Attached are your patches, rebased to fix the conflicts with ae6d06f09 
> like you intended. On top of that, my patches. My patches need more 
> testing, benchmarking, and review, so if we want to sneak something into 
> v16, better go with just your patches.

At this point I'm only proposing this for v17, so additional cleanup
is welcome.

BTW, what's your opinion of allowing "[1:0]={}" ?  Although that was
my proposal to begin with, I'm having second thoughts about it now.
The main reason is that the input transformation would be lossy,
eg "[1:0]={}" and "[101:100]={}" would give the same results, which
seems a little ugly.  Given the lack of field complaints, maybe we
should leave that alone.

            regards, tom lane

Re: Cleaning up array_in()

От

Heikki Linnakangas

Дата:

08 июля 2023 г., 23:03:47

On 08/07/2023 22:49, Tom Lane wrote:
> BTW, what's your opinion of allowing "[1:0]={}" ?  Although that was
> my proposal to begin with, I'm having second thoughts about it now.
> The main reason is that the input transformation would be lossy,
> eg "[1:0]={}" and "[101:100]={}" would give the same results, which
> seems a little ugly.

Hmm, yeah, that would feel wrong if you did something like this:

select ('[2:1]={}'::text[]) || '{x}'::text[];

and expected it to return '[2:2]={x}'.

I guess we could allow "[1:0]={}" as a special case, but not 
"[101:100]={}", but that would be weird too.

> Given the lack of field complaints, maybe we should leave that
> alone.
+1 to leave it alone. It's a little weird either way, so better to stay 
put. We can revisit it later if we want to, but I wouldn't want to go 
back and forth on it.

-- 
Heikki Linnakangas
Neon (https://neon.tech)

Re: Cleaning up array_in()

От

jian he

Дата:

11 июля 2023 г., 06:34:37

On Sun, Jul 9, 2023 at 3:38 AM Heikki Linnakangas <hlinnaka@iki.fi> wrote:
>
> On 08/07/2023 19:08, Tom Lane wrote:
> > I wrote:
> >> So I end up with the attached.  I went ahead and dropped
> >> ArrayGetOffset0() as part of 0001, and I split 0002 into two patches
> >> where the new 0002 avoids re-indenting any existing code in order
> >> to ease review, and then 0003 is just a mechanical application
> >> of pgindent.
> >
> > That got sideswiped by ae6d06f09, so here's a trivial rebase to
> > pacify the cfbot.
> >
> > #text/x-diff; name="v3-0001-Simplify-and-speed-up-ReadArrayStr.patch"
[v3-0001-Simplify-and-speed-up-ReadArrayStr.patch]/home/tgl/pgsql/v3-0001-Simplify-and-speed-up-ReadArrayStr.patch 
> > #text/x-diff; name="v3-0002-Rewrite-ArrayCount-to-make-dimensionality-checks-.patch"
[v3-0002-Rewrite-ArrayCount-to-make-dimensionality-checks-.patch]
/home/tgl/pgsql/v3-0002-Rewrite-ArrayCount-to-make-dimensionality-checks-.patch
> > #text/x-diff; name="v3-0003-Re-indent-ArrayCount.patch" [v3-0003-Re-indent-ArrayCount.patch]
/home/tgl/pgsql/v3-0003-Re-indent-ArrayCount.patch
>
> Something's wrong with your attachments.
>
> Hmm, I wonder if ae6d06f09 had a negative performance impact. In an
> unquoted array element, scanner_isspace() function is called for every
> character, so it might be worth inlining.
>
> On the patches: They are a clear improvement, thanks for that. That
> said, I still find the logic very hard to follow, and there are some
> obvious performance optimizations that could be made.
>
> ArrayCount() interprets low-level quoting and escaping, and tracks the
> dimensions at the same time. The state machine is pretty complicated.
> And when you've finally finished reading and grokking that function, you
> see that ReadArrayStr() repeats most of the same logic. Ugh.
>
> I spent some time today refactoring it for readability and speed. I
> introduced a separate helper function to tokenize the input. It deals
> with whitespace, escapes, and backslashes. Then I merged ArrayCount()
> and ReadArrayStr() into one function that parses the elements and
> determines the dimensions in one pass. That speeds up parsing large
> arrays. With the tokenizer function, the logic in ReadArrayStr() is
> still quite readable, even though it's now checking the dimensions at
> the same time.
>
> I also noticed that we used atoi() to parse the integers in the
> dimensions, which doesn't do much error checking. Some funny cases were
> accepted because of that, for example:
>
> postgres=# select '[1+-+-+-+-+-+:2]={foo,bar}'::text[];
>     text
> -----------
>   {foo,bar}
> (1 row)
>
> I tightened that up in the passing.
>
> Attached are your patches, rebased to fix the conflicts with ae6d06f09
> like you intended. On top of that, my patches. My patches need more
> testing, benchmarking, and review, so if we want to sneak something into
> v16, better go with just your patches. If we're tightening up the
> accepted inputs, maybe fix that atoi() sloppiness, though.
>
> --
> Heikki Linnakangas
> Neon (https://neon.tech)

your idea is so clear!!!
all the Namings are way more descriptive. ArrayToken, personally
something with "state", "type" will be more clear.

> /*
> * FIXME: Is this still required? I believe all the checks it performs are
> * redundant with other checks in ReadArrayDimension() and ReadArrayStr()
> */
> nitems_according_to_dims = ArrayGetNItemsSafe(ndim, dim, escontext);
> if (nitems_according_to_dims < 0)
> PG_RETURN_NULL();
> if (nitems != nitems_according_to_dims)
> elog(ERROR, "mismatch nitems, %d vs %d", nitems, nitems_according_to_dims);
> if (!ArrayCheckBoundsSafe(ndim, dim, lBound, escontext))
> PG_RETURN_NULL();

--first time run
select '[0:3][0:2]={{1,2,3}, {4,5,6}, {7,8,9},{1,2,3}}'::int[];
INFO:  253 after ReadArrayDimensions dim:  4 3 71803430 21998
103381120 21998 ndim: 2
INFO:  770 after ReadArrayStr: dim:  4 3 71803430 21998 103381120
21998 nitems:12, ndim:2

--second time run.
INFO:  253 after ReadArrayDimensions dim:  4 3 0 0 0 0 ndim: 2
INFO:  770 after ReadArrayStr: dim:  4 3 0 0 0 0 nitems:12, ndim:2

select '{{1,2,3}, {4,5,6}, {7,8,9},{1,2,3}}'::int[]; --every time run,
the result is the same.
INFO:  253 after ReadArrayDimensions dim:  0 0 0 0 0 0 ndim: 0
INFO:  770 after ReadArrayStr: dim:  4 3 -1 -1 -1 -1 nitems:12, ndim:2

I think the reason is that the dim int array didn't explicitly assign
value when initializing it.

> /* Now it's safe to compute ub + 1 */
> if (ub + 1 < lBound[ndim])
> ereturn(escontext, false,
> (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
> errmsg("upper bound cannot be less than lower bound minus one")));

This part seems safe against cases like select
'[-2147483649:-2147483648]={1,2}'::int[];
but I am not sure. If so, then ArrayCheckBoundsSafe is unnecessary.

Another corner case successed: select '{1,}'::int[]; should fail.

Re: Cleaning up array_in()

От

jian he

Дата:

07 августа 2023 г., 08:35:03

hi.
based on Heikki v3.
I made some changes:
array_in: dim[6] all initialize with -1, lBound[6] all initialize with 1.
if ReadArrayDimensions called, then corresponding dimension lBound
will replace the initialized default 1 value.
ReadArrayStr, since array_in main function initialized dim array,
dimensions_specified true or false, I don't need to initialize again,
so I deleted that part.

to solve corner cases like  '{{1,},{1},}'::text[]. in ReadArrayStr
main switch function, like other ArrayToken, first evaluate
expect_delim then assign expect_delim.
In ATOK_LEVEL_END. if non-empty array, closing bracket either precede
with an element or another closing element. In both cases, the
previous expect_delim should be true.

in
         * FIXME: Is this still required? I believe all the checks it
performs are
         * redundant with other checks in ReadArrayDimension() and
ReadArrayStr()
         */
I deleted
-       nitems_according_to_dims = ArrayGetNItemsSafe(ndim, dim, escontext);
-       if (nitems_according_to_dims < 0)
-               PG_RETURN_NULL();
-       if (nitems != nitems_according_to_dims)
-               elog(ERROR, "mismatch nitems, %d vs %d", nitems,
nitems_according_to_dims);
but I am not sure if the following is necessary.
      if (!ArrayCheckBoundsSafe(ndim, dim, lBound, escontext))
                PG_RETURN_NULL();

I added some corner case tests like select '{{1,},{1},}'::text[];

some changes broken:
select '{{1},{}}'::text[];
-DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+DETAIL:  Unexpected "," character.
I added some error checks in ATOK_LEVEL_END. The first expect_delim
part check will first generate an error, the dimension error part will
not be reached.

Вложения

v4-0001-based-on-Heikki-verion3.patch

Re: Cleaning up array_in()

От

jian he

Дата:

04 сентября 2023 г., 03:00:00

hi.
attached v4.
v4, 0001 to 0005 is the same as v3 in
https://www.postgresql.org/message-id/5859ce4e-2be4-92b0-c85c-e1e24eab57c6%40iki.fi

v4-0006 doing some modifications to address the corner case mentioned
in the previous thread (like select '{{1,},{1},}'::text[]).
also fixed all these FIXME, Heikki mentioned in the code.

v4-0007 refactor ReadDimensionInt. to make the array dimension bound
variables within the INT_MIN and INT_MAX. so it will make select
'[21474836488:21474836489]={1,2}'::int[]; fail.

Вложения

Re: Cleaning up array_in()

От

jian he

Дата:

05 сентября 2023 г., 02:53:13

On Mon, Sep 4, 2023 at 8:00 AM jian he <jian.universality@gmail.com> wrote:
>
> hi.
> attached v4.
> v4, 0001 to 0005 is the same as v3 in
> https://www.postgresql.org/message-id/5859ce4e-2be4-92b0-c85c-e1e24eab57c6%40iki.fi
>
> v4-0006 doing some modifications to address the corner case mentioned
> in the previous thread (like select '{{1,},{1},}'::text[]).
> also fixed all these FIXME, Heikki mentioned in the code.
>
> v4-0007 refactor ReadDimensionInt. to make the array dimension bound
> variables within the INT_MIN and INT_MAX. so it will make select
> '[21474836488:21474836489]={1,2}'::int[]; fail.


attached, same as v4, but delete unused variable {nitems_according_to_dims}.

Вложения

Re: Cleaning up array_in()

От

Alexander Lakhin

Дата:

10 сентября 2023 г., 13:00:00

Hello Jian,

05.09.2023 02:53, jian he wrote:
> On Mon, Sep 4, 2023 at 8:00 AM jian he <jian.universality@gmail.com> wrote:
>> hi.
>> attached v4.
>> v4, 0001 to 0005 is the same as v3 in
>> https://www.postgresql.org/message-id/5859ce4e-2be4-92b0-c85c-e1e24eab57c6%40iki.fi
>>
>> v4-0006 doing some modifications to address the corner case mentioned
>> in the previous thread (like select '{{1,},{1},}'::text[]).
>> also fixed all these FIXME, Heikki mentioned in the code.
>>
>> v4-0007 refactor ReadDimensionInt. to make the array dimension bound
>> variables within the INT_MIN and INT_MAX. so it will make select
>> '[21474836488:21474836489]={1,2}'::int[]; fail.
>
> attached, same as v4, but delete unused variable {nitems_according_to_dims}.

Please look at the differences, I've observed with the latest patches
applied, old vs new behavior:

Case 1:
SELECT '{1,'::integer[];
ERROR:  malformed array literal: "{1,"
LINE 1: SELECT '{1,'::integer[];
                ^
DETAIL:  Unexpected end of input.

vs

ERROR:  malformed array literal: "{1,"
LINE 1: SELECT '{1,'::integer[];
                ^

(no DETAIL)

Case 2:
SELECT '{{},}'::text[];
ERROR:  malformed array literal: "{{},}"
LINE 1: SELECT '{{},}'::text[];
                ^
DETAIL:  Unexpected "}" character

vs
  text
------
  {}
(1 row)

Case 3:
select '{\{}'::text[];
  text
-------
  {"{"}
(1 row)

vs
  text
------
  {""}

Best regards,
Alexander

Re: Cleaning up array_in()

От

jian he

Дата:

11 сентября 2023 г., 08:26:16

On Sun, Sep 10, 2023 at 6:00 PM Alexander Lakhin <exclusion@gmail.com> wrote:
> Case 1:
> SELECT '{1,'::integer[];
> ERROR:  malformed array literal: "{1,"
> LINE 1: SELECT '{1,'::integer[];
>                 ^
> DETAIL:  Unexpected end of input.
>
> vs
>
> ERROR:  malformed array literal: "{1,"
> LINE 1: SELECT '{1,'::integer[];
>                 ^
>
> (no DETAIL)
>
> Case 2:
> SELECT '{{},}'::text[];
> ERROR:  malformed array literal: "{{},}"
> LINE 1: SELECT '{{},}'::text[];
>                 ^
> DETAIL:  Unexpected "}" character
>
> vs
>   text
> ------
>   {}
> (1 row)
>
> Case 3:
> select '{\{}'::text[];
>   text
> -------
>   {"{"}
> (1 row)
>
> vs
>   text
> ------
>   {""}
>
> Best regards,
> Alexander

hi.
Thanks for reviewing it.

> DETAIL:  Unexpected end of input.
In many cases, ending errors will happen, so I consolidate it.

SELECT '{{},}'::text[];
solved by tracking current token type and previous token type.

select '{\{}'::text[];
solved by update dstendptr.

attached.

Вложения

Re: Cleaning up array_in()

От

Alexander Lakhin

Дата:

11 сентября 2023 г., 15:00:00

11.09.2023 08:26, jian he wrote:
> hi.
> Thanks for reviewing it.
>
>> DETAIL:  Unexpected end of input.
> In many cases, ending errors will happen, so I consolidate it.
>
> SELECT '{{},}'::text[];
> solved by tracking current token type and previous token type.
>
> select '{\{}'::text[];
> solved by update dstendptr.
>
> attached.

Thank you!
I can confirm that all those anomalies are fixed now.
But new version brings a warning when compiled with gcc:
arrayfuncs.c:659:9: warning: variable 'prev_tok' is uninitialized when used here [-Wuninitialized]
                                 if (prev_tok == ATOK_DELIM || nest_level == 0)
                                     ^~~~~~~~
arrayfuncs.c:628:3: note: variable 'prev_tok' is declared here
                 ArrayToken      prev_tok;
                 ^
1 warning generated.

Also it looks like an updated comment needs fixing/improving:
  /* No array dimensions, so first literal character should be oepn curl-braces */
(should be an opening brace?)

(I haven't look at the code closely.)

Best regards,
Alexander

Re: Cleaning up array_in()

От

jian he

Дата:

12 сентября 2023 г., 11:45:29

On Mon, Sep 11, 2023 at 8:00 PM Alexander Lakhin <exclusion@gmail.com> wrote:
>
> I can confirm that all those anomalies are fixed now.
> But new version brings a warning when compiled with gcc:
> arrayfuncs.c:659:9: warning: variable 'prev_tok' is uninitialized when used here [-Wuninitialized]
>                                  if (prev_tok == ATOK_DELIM || nest_level == 0)
>                                      ^~~~~~~~
> arrayfuncs.c:628:3: note: variable 'prev_tok' is declared here
>                  ArrayToken      prev_tok;
>                  ^
> 1 warning generated.
>
> Also it looks like an updated comment needs fixing/improving:
>   /* No array dimensions, so first literal character should be oepn curl-braces */
> (should be an opening brace?)
>

fixed these 2 issues.
--query
SELECT ('{ ' || string_agg(chr((ascii('B') + round(random() * 25)) ::
integer),', ') || ' }')::text[]
FROM generate_series(1,1e6) \watch i=0.1 c=1

After applying the patch, the above query runs slightly faster.

Вложения

Re: Cleaning up array_in()

От

Alexander Lakhin

Дата:

13 сентября 2023 г., 09:00:00

12.09.2023 11:45, jian he wrote:
> On Mon, Sep 11, 2023 at 8:00 PM Alexander Lakhin <exclusion@gmail.com> wrote:
>> I can confirm that all those anomalies are fixed now.
>> But new version brings a warning when compiled with gcc:
>> arrayfuncs.c:659:9: warning: variable 'prev_tok' is uninitialized when used here [-Wuninitialized]
>>                                   if (prev_tok == ATOK_DELIM || nest_level == 0)
>>                                       ^~~~~~~~
>> arrayfuncs.c:628:3: note: variable 'prev_tok' is declared here
>>                   ArrayToken      prev_tok;
>>                   ^
>> 1 warning generated.
>>
>> Also it looks like an updated comment needs fixing/improving:
>>    /* No array dimensions, so first literal character should be oepn curl-braces */
>> (should be an opening brace?)
>>
> fixed these 2 issues.
> --query
> SELECT ('{ ' || string_agg(chr((ascii('B') + round(random() * 25)) ::
> integer),', ') || ' }')::text[]
> FROM generate_series(1,1e6) \watch i=0.1 c=1
>
> After applying the patch, the above query runs slightly faster.

Thank you, Jian He!

Now I see only a few wrinkles.
1) A minor asymmetry in providing details appeared:
select E'{"a"a}'::text[];
ERROR:  malformed array literal: "{"a"a}"
LINE 1: select E'{"a"a}'::text[];
                ^
DETAIL:  Unexpected array element.

select E'{a"a"}'::text[];
ERROR:  malformed array literal: "{a"a"}"
LINE 1: select E'{a"a"}'::text[];
                ^
(no DETAIL)

Old behavior:
select E'{a"a"}'::text[];
ERROR:  malformed array literal: "{a"a"}"
LINE 1: select E'{a"a"}'::text[];
                ^
DETAIL:  Unexpected array element.

select E'{"a"a}'::text[];
ERROR:  malformed array literal: "{"a"a}"
LINE 1: select E'{"a"a}'::text[];
                ^
DETAIL:  Unexpected array element.

2) CPPFLAGS="-DARRAYDEBUG" ./configure ... breaks "make check", maybe change elog(NOTICE) to elog(DEBUG1)?
2a) a message logged there lacks some delimiter before "lBound info":
NOTICE:  array_in- ndim 1 ( 3 -1 -1 -1 -1 -1lBound info 1 1 1 1 1 1) for {red,green,blue}
what about changing the format to "ndim 1 ( 3 -1 -1 -1 -1 -1; lBound info: 1 1 1 1 1 1)"?

3) It seems that new comments need polishing, in particular:
  /* initialize dim, lBound. useful for ReadArrayDimensions ReadArrayStr */
->?
  /* Initialize dim, lBound for ReadArrayDimensions, ReadArrayStr */

Otherwise, we determine the dimensions from the in curly-braces
->?
Otherwise, we determine the dimensions from the curly braces.

Best regards,
Alexander

Re: Cleaning up array_in()

От

jian he

Дата:

13 сентября 2023 г., 11:55:23

On Wed, Sep 13, 2023 at 2:00 PM Alexander Lakhin <exclusion@gmail.com> wrote:
>
>
> Now I see only a few wrinkles.
> 1) A minor asymmetry in providing details appeared:
> select E'{"a"a}'::text[];
> ERROR:  malformed array literal: "{"a"a}"
> LINE 1: select E'{"a"a}'::text[];
>                 ^
> DETAIL:  Unexpected array element.
>
> select E'{a"a"}'::text[];
> ERROR:  malformed array literal: "{a"a"}"
> LINE 1: select E'{a"a"}'::text[];
>                 ^
> (no DETAIL)
>
> Old behavior:
> select E'{a"a"}'::text[];
> ERROR:  malformed array literal: "{a"a"}"
> LINE 1: select E'{a"a"}'::text[];
>                 ^
> DETAIL:  Unexpected array element.
>
> select E'{"a"a}'::text[];
> ERROR:  malformed array literal: "{"a"a}"
> LINE 1: select E'{"a"a}'::text[];
>                 ^
> DETAIL:  Unexpected array element.

fixed and added these two query to the test.

> 2) CPPFLAGS="-DARRAYDEBUG" ./configure ... breaks "make check", maybe change elog(NOTICE) to elog(DEBUG1)?
> 2a) a message logged there lacks some delimiter before "lBound info":
> NOTICE:  array_in- ndim 1 ( 3 -1 -1 -1 -1 -1lBound info 1 1 1 1 1 1) for {red,green,blue}
> what about changing the format to "ndim 1 ( 3 -1 -1 -1 -1 -1; lBound info: 1 1 1 1 1 1)"?

fixed. Use elog(DEBUG1) now.

> 3) It seems that new comments need polishing, in particular:
>   /* initialize dim, lBound. useful for ReadArrayDimensions ReadArrayStr */
> ->?
>   /* Initialize dim, lBound for ReadArrayDimensions, ReadArrayStr */
>
> Otherwise, we determine the dimensions from the in curly-braces
> ->?
> Otherwise, we determine the dimensions from the curly braces.
>
> Best regards,
> Alexander

comments updates. please check the attached.

Вложения

Re: Cleaning up array_in()

От

Alexander Lakhin

Дата:

14 сентября 2023 г., 09:00:01

13.09.2023 11:55, jian he wrote:
>> 2) CPPFLAGS="-DARRAYDEBUG" ./configure ... breaks "make check", maybe change elog(NOTICE) to elog(DEBUG1)?
>> 2a) a message logged there lacks some delimiter before "lBound info":
>> NOTICE:  array_in- ndim 1 ( 3 -1 -1 -1 -1 -1lBound info 1 1 1 1 1 1) for {red,green,blue}
>> what about changing the format to "ndim 1 ( 3 -1 -1 -1 -1 -1; lBound info: 1 1 1 1 1 1)"?
> fixed. Use elog(DEBUG1) now.

Thanks for the fixes!

I didn't mean to remove the prefix "array_in-", but in fact I was confused
by the "{function_name}-" syntax, and now when I've looked at it closely, I
see that that syntax was quite popular ("date_in- ", "single_decode- ", ...)
back in 1997 (see 9d8ae7977). But nowadays it is out of fashion, with most
of such debugging prints were gone with 7a877dfd2 and the next-to-last one
with 50861cd68. Moreover, as the latter commit shows, such debugging output
can be eliminated completely without remorse. (And I couldn't find mentions
of ARRAYDEBUG in pgsql-bugs, pgsql-hackers archives, so probably no one used
that debugging facility since it's introduction.)
As of now, the output still weird (I mean the excessive right parenthesis):
DEBUG:  ndim 1 ( 2 -1 -1 -1 -1 -1); lBound info: 1 1 1 1 1 1) for {0,0}

Otherwise, from a user perspective, the patch set looks good to me. (Though
maybe English language editorialization still needed before committing it.)

Best regards,
Alexander

Re: Cleaning up array_in()

От

jian he

Дата:

14 сентября 2023 г., 11:14:01

On Thu, Sep 14, 2023 at 2:00 PM Alexander Lakhin <exclusion@gmail.com> wrote:
>
>
> I didn't mean to remove the prefix "array_in-", but in fact I was confused
> by the "{function_name}-" syntax, and now when I've looked at it closely, I
> see that that syntax was quite popular ("date_in- ", "single_decode- ", ...)
> back in 1997 (see 9d8ae7977). But nowadays it is out of fashion, with most
> of such debugging prints were gone with 7a877dfd2 and the next-to-last one
> with 50861cd68. Moreover, as the latter commit shows, such debugging output
> can be eliminated completely without remorse. (And I couldn't find mentions
> of ARRAYDEBUG in pgsql-bugs, pgsql-hackers archives, so probably no one used
> that debugging facility since it's introduction.)
> As of now, the output still weird (I mean the excessive right parenthesis):
> DEBUG:  ndim 1 ( 2 -1 -1 -1 -1 -1); lBound info: 1 1 1 1 1 1) for {0,0}
>

hi.
similar to NUMERIC_DEBUG. I made the following adjustments.
if unnecessary, removing this part seems also fine, in GDB, you can
print it out directly.

/* ----------
 * Uncomment the following to get a dump of a array's ndim, dim, lBound.
 * ----------
#define ARRAYDEBUG
 */
#ifdef ARRAYDEBUG
{
StringInfoData buf;

initStringInfo(&buf);

appendStringInfo(&buf, "array_in- ndim %d, dim info(", ndim);
for (int i = 0; i < MAXDIM; i++)
appendStringInfo(&buf, " %d", dim[i]);
appendStringInfo(&buf, "); lBound info(");
for (int i = 0; i < MAXDIM; i++)
appendStringInfo(&buf, " %d", lBound[i]);
appendStringInfo(&buf, ") for %s", string);
elog(DEBUG1, "%s", buf.data);
pfree(buf.data);
}
#endif

other than this, no other changes.

Вложения

Re: Cleaning up array_in()

От

jian he

Дата:

30 октября 2023 г., 03:00:00

rebase after commit
(https://git.postgresql.org/cgit/postgresql.git/commit/?id=611806cd726fc92989ac918eac48fd8d684869c7)

Вложения

Re: Cleaning up array_in()

От

Tom Lane

Дата:

08 ноября 2023 г., 02:52:09

I got back to looking at this today (sorry for delay), and did a pass
of code review.  I think we are getting pretty close to something
committable.  The one loose end IMO is this bit in ReadArrayToken:

+            case '"':
+
+                /*
+                 * XXX "Unexpected %c character" would be more apropos, but
+                 * this message is what the pre-v17 implementation produced,
+                 * so we'll keep it for now.
+                 */
+                errsave(escontext,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", origStr),
+                         errdetail("Unexpected array element.")));
+                return ATOK_ERROR;

This comes out when you write something like '{foo"bar"}', and I'd
say the choice of message is not great.  On the other hand, it's
consistent with what you get from '{"foo""bar"}', and if we wanted
to change that too then some tweaking of the state machine in
ReadArrayStr would be required (or else modify ReadArrayToken so
it doesn't return instantly upon seeing the second quote mark).
I'm not sure that this is worth messing with.

Anyway, I think we are well past the point where splitting the patch
into multiple parts is worth doing, because we've rewritten pretty
much all of this code, and the intermediate versions are not terribly
helpful.  So I just folded it all into one patch.

Some notes about specific points:

* Per previous discussion, I undid the change to allow "[1:0]"
dimensions, but I left a comment behind about that.

* Removing the ArrayGetNItemsSafe/ArrayCheckBoundsSafe calls
seems OK, but then we need to be more careful about detecting
overflows and disallowed cases in ReadArrayDimensions.

* I don't think the ARRAYDEBUG code is of any value whatever.
The fact that nobody bothered to improve it to print more than
the dim[] values proves it hasn't been used in decades.
Let's just nuke it.

* We can simplify the state machine in ReadArrayStr some more: it
seems to me it's sufficient to track "expect_delim", as long as you
realize that that really means "expect typdelim or right brace".
(Maybe another name would be better?  I couldn't think of anything
short though.)

* I switched to using a StringInfo instead of a fixed-size elembuf,
as Heikki speculated about.

* I added some more test cases to cover things that evidently weren't
sufficiently tested, like the has_escapes business which was flat
out broken in v10, and to improve the code coverage report.

Comments?

            regards, tom lane

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 3ff13eb419..638e4f0382 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -54,18 +54,16 @@ bool        Array_nulls = true;
             PG_FREE_IF_COPY(array, n); \
     } while (0)

+/* ReadArrayToken return type */
 typedef enum
 {
-    ARRAY_NO_LEVEL,
-    ARRAY_LEVEL_STARTED,
-    ARRAY_ELEM_STARTED,
-    ARRAY_ELEM_COMPLETED,
-    ARRAY_QUOTED_ELEM_STARTED,
-    ARRAY_QUOTED_ELEM_COMPLETED,
-    ARRAY_ELEM_DELIMITED,
-    ARRAY_LEVEL_COMPLETED,
-    ARRAY_LEVEL_DELIMITED,
-} ArrayParseState;
+    ATOK_LEVEL_START,
+    ATOK_LEVEL_END,
+    ATOK_DELIM,
+    ATOK_ELEM,
+    ATOK_ELEM_NULL,
+    ATOK_ERROR,
+} ArrayToken;

 /* Working state for array_iterate() */
 typedef struct ArrayIteratorData
@@ -91,15 +89,21 @@ typedef struct ArrayIteratorData
     int            current_item;    /* the item # we're at in the array */
 }            ArrayIteratorData;

-static int    ArrayCount(const char *str, int *dim, char typdelim,
-                       Node *escontext);
-static bool ReadArrayStr(char *arrayStr, const char *origStr,
-                         int nitems, int ndim, int *dim,
+static bool ReadArrayDimensions(char **srcptr, int *ndim_p,
+                                int *dim, int *lBound,
+                                const char *origStr, Node *escontext);
+static bool ReadDimensionInt(char **srcptr, int *result,
+                             const char *origStr, Node *escontext);
+static bool ReadArrayStr(char **srcptr,
                          FmgrInfo *inputproc, Oid typioparam, int32 typmod,
                          char typdelim,
                          int typlen, bool typbyval, char typalign,
-                         Datum *values, bool *nulls,
-                         bool *hasnulls, int32 *nbytes, Node *escontext);
+                         int *ndim_p, int *dim,
+                         int *nitems_p,
+                         Datum **values_p, bool **nulls_p,
+                         const char *origStr, Node *escontext);
+static ArrayToken ReadArrayToken(char **srcptr, StringInfo elembuf, char typdelim,
+                                 const char *origStr, Node *escontext);
 static void ReadArrayBinary(StringInfo buf, int nitems,
                             FmgrInfo *receiveproc, Oid typioparam, int32 typmod,
                             int typlen, bool typbyval, char typalign,
@@ -185,12 +189,10 @@ array_in(PG_FUNCTION_ARGS)
     char        typalign;
     char        typdelim;
     Oid            typioparam;
-    char       *string_save,
-               *p;
-    int            i,
-                nitems;
-    Datum       *dataPtr;
-    bool       *nullsPtr;
+    char       *p;
+    int            nitems;
+    Datum       *values;
+    bool       *nulls;
     bool        hasnulls;
     int32        nbytes;
     int32        dataoffset;
@@ -233,104 +235,34 @@ array_in(PG_FUNCTION_ARGS)
     typdelim = my_extra->typdelim;
     typioparam = my_extra->typioparam;

-    /* Make a modifiable copy of the input */
-    string_save = pstrdup(string);
+    /* Initialize dim, lBound for ReadArrayDimensions, ReadArrayStr */
+    for (int i = 0; i < MAXDIM; i++)
+    {
+        dim[i] = -1;            /* indicates "not yet known" */
+        lBound[i] = 1;            /* default lower bound */
+    }

     /*
-     * If the input string starts with dimension info, read and use that.
-     * Otherwise, we require the input to be in curly-brace style, and we
-     * prescan the input to determine dimensions.
+     * Start processing the input string.
      *
-     * Dimension info takes the form of one or more [n] or [m:n] items. The
-     * outer loop iterates once per dimension item.
+     * If the input string starts with dimension info, read and use that.
+     * Otherwise, we'll determine the dimensions during ReadArrayStr.
      */
-    p = string_save;
-    ndim = 0;
-    for (;;)
-    {
-        char       *q;
-        int            ub;
-
-        /*
-         * Note: we currently allow whitespace between, but not within,
-         * dimension items.
-         */
-        while (scanner_isspace(*p))
-            p++;
-        if (*p != '[')
-            break;                /* no more dimension items */
-        p++;
-        if (ndim >= MAXDIM)
-            ereturn(escontext, (Datum) 0,
-                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                     errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                            ndim + 1, MAXDIM)));
-
-        for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++)
-             /* skip */ ;
-        if (q == p)                /* no digits? */
-            ereturn(escontext, (Datum) 0,
-                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                     errmsg("malformed array literal: \"%s\"", string),
-                     errdetail("\"[\" must introduce explicitly-specified array dimensions.")));
-
-        if (*q == ':')
-        {
-            /* [m:n] format */
-            *q = '\0';
-            lBound[ndim] = atoi(p);
-            p = q + 1;
-            for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++)
-                 /* skip */ ;
-            if (q == p)            /* no digits? */
-                ereturn(escontext, (Datum) 0,
-                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                         errmsg("malformed array literal: \"%s\"", string),
-                         errdetail("Missing array dimension value.")));
-        }
-        else
-        {
-            /* [n] format */
-            lBound[ndim] = 1;
-        }
-        if (*q != ']')
-            ereturn(escontext, (Datum) 0,
-                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                     errmsg("malformed array literal: \"%s\"", string),
-                     errdetail("Missing \"%s\" after array dimensions.",
-                               "]")));
-
-        *q = '\0';
-        ub = atoi(p);
-        p = q + 1;
-        if (ub < lBound[ndim])
-            ereturn(escontext, (Datum) 0,
-                    (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-                     errmsg("upper bound cannot be less than lower bound")));
-
-        dim[ndim] = ub - lBound[ndim] + 1;
-        ndim++;
-    }
+    p = string;
+    if (!ReadArrayDimensions(&p, &ndim, dim, lBound, string, escontext))
+        return (Datum) 0;

     if (ndim == 0)
     {
-        /* No array dimensions, so intuit dimensions from brace structure */
+        /* No array dimensions, so next character should be a left brace */
         if (*p != '{')
             ereturn(escontext, (Datum) 0,
                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                      errmsg("malformed array literal: \"%s\"", string),
                      errdetail("Array value must start with \"{\" or dimension information.")));
-        ndim = ArrayCount(p, dim, typdelim, escontext);
-        if (ndim < 0)
-            PG_RETURN_NULL();
-        for (i = 0; i < ndim; i++)
-            lBound[i] = 1;
     }
     else
     {
-        int            ndim_braces,
-                    dim_braces[MAXDIM];
-
         /* If array dimensions are given, expect '=' operator */
         if (strncmp(p, ASSGN, strlen(ASSGN)) != 0)
             ereturn(escontext, (Datum) 0,
@@ -339,66 +271,68 @@ array_in(PG_FUNCTION_ARGS)
                      errdetail("Missing \"%s\" after array dimensions.",
                                ASSGN)));
         p += strlen(ASSGN);
+        /* Allow whitespace after it */
         while (scanner_isspace(*p))
             p++;

-        /*
-         * intuit dimensions from brace structure -- it better match what we
-         * were given
-         */
         if (*p != '{')
             ereturn(escontext, (Datum) 0,
                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                      errmsg("malformed array literal: \"%s\"", string),
                      errdetail("Array contents must start with \"{\".")));
-        ndim_braces = ArrayCount(p, dim_braces, typdelim, escontext);
-        if (ndim_braces < 0)
-            PG_RETURN_NULL();
-        if (ndim_braces != ndim)
+    }
+
+    /* Parse the value part, in the curly braces: { ... } */
+    if (!ReadArrayStr(&p,
+                      &my_extra->proc, typioparam, typmod,
+                      typdelim,
+                      typlen, typbyval, typalign,
+                      &ndim,
+                      dim,
+                      &nitems,
+                      &values, &nulls,
+                      string,
+                      escontext))
+        return (Datum) 0;
+
+    /* only whitespace is allowed after the closing brace */
+    while (*p)
+    {
+        if (!scanner_isspace(*p++))
             ereturn(escontext, (Datum) 0,
                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                      errmsg("malformed array literal: \"%s\"", string),
-                     errdetail("Specified array dimensions do not match array contents.")));
-        for (i = 0; i < ndim; ++i)
-        {
-            if (dim[i] != dim_braces[i])
-                ereturn(escontext, (Datum) 0,
-                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                         errmsg("malformed array literal: \"%s\"", string),
-                         errdetail("Specified array dimensions do not match array contents.")));
-        }
+                     errdetail("Junk after closing right brace.")));
     }

-#ifdef ARRAYDEBUG
-    printf("array_in- ndim %d (", ndim);
-    for (i = 0; i < ndim; i++)
-    {
-        printf(" %d", dim[i]);
-    };
-    printf(") for %s\n", string);
-#endif
-
-    /* This checks for overflow of the array dimensions */
-    nitems = ArrayGetNItemsSafe(ndim, dim, escontext);
-    if (nitems < 0)
-        PG_RETURN_NULL();
-    if (!ArrayCheckBoundsSafe(ndim, dim, lBound, escontext))
-        PG_RETURN_NULL();
-
     /* Empty array? */
     if (nitems == 0)
         PG_RETURN_ARRAYTYPE_P(construct_empty_array(element_type));

-    dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
-    nullsPtr = (bool *) palloc(nitems * sizeof(bool));
-    if (!ReadArrayStr(p, string,
-                      nitems, ndim, dim,
-                      &my_extra->proc, typioparam, typmod,
-                      typdelim,
-                      typlen, typbyval, typalign,
-                      dataPtr, nullsPtr,
-                      &hasnulls, &nbytes, escontext))
-        PG_RETURN_NULL();
+    /*
+     * Check for nulls, compute total data space needed
+     */
+    hasnulls = false;
+    nbytes = 0;
+    for (int i = 0; i < nitems; i++)
+    {
+        if (nulls[i])
+            hasnulls = true;
+        else
+        {
+            /* let's just make sure data is not toasted */
+            if (typlen == -1)
+                values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
+            nbytes = att_addlength_datum(nbytes, typlen, values[i]);
+            nbytes = att_align_nominal(nbytes, typalign);
+            /* check for overflow of total request */
+            if (!AllocSizeIsValid(nbytes))
+                ereturn(escontext, (Datum) 0,
+                        (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                         errmsg("array size exceeds the maximum allowed (%d)",
+                                (int) MaxAllocSize)));
+        }
+    }
     if (hasnulls)
     {
         dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems);
@@ -409,6 +343,10 @@ array_in(PG_FUNCTION_ARGS)
         dataoffset = 0;            /* marker for no null bitmap */
         nbytes += ARR_OVERHEAD_NONULLS(ndim);
     }
+
+    /*
+     * Construct the final array datum
+     */
     retval = (ArrayType *) palloc0(nbytes);
     SET_VARSIZE(retval, nbytes);
     retval->ndim = ndim;
@@ -424,302 +362,216 @@ array_in(PG_FUNCTION_ARGS)
     memcpy(ARR_LBOUND(retval), lBound, ndim * sizeof(int));

     CopyArrayEls(retval,
-                 dataPtr, nullsPtr, nitems,
+                 values, nulls, nitems,
                  typlen, typbyval, typalign,
                  true);

-    pfree(dataPtr);
-    pfree(nullsPtr);
-    pfree(string_save);
+    pfree(values);
+    pfree(nulls);

     PG_RETURN_ARRAYTYPE_P(retval);
 }

 /*
- * ArrayCount
- *     Determines the dimensions for an array string.
+ * ReadArrayDimensions
+ *     parses the array dimensions part of the input and converts the values
+ *     to internal format.
+ *
+ * On entry, *srcptr points to the string to parse. It is advanced to point
+ * after whitespace (if any) and dimension info (if any).
+ *
+ * *ndim_p, *dim, and *lBound are output variables. They are filled with the
+ * number of dimensions (<= MAXDIM), the length of each dimension, and the
+ * lower bounds of the slices, respectively.  *ndim_p will be set to zero
+ * when no dimensions appear.
+ *
+ * 'origStr' is the original input string, used only in error messages.
+ * If *escontext points to an ErrorSaveContext, details of any error are
+ * reported there.
  *
- * Returns number of dimensions as function result.  The axis lengths are
- * returned in dim[], which must be of size MAXDIM.
+ * Result:
+ *    true for success, false for failure (if escontext is provided).
  *
- * If we detect an error, fill *escontext with error details and return -1
- * (unless escontext isn't provided, in which case errors will be thrown).
+ * Note that dim[] and lBound[] are allocated by the caller, and must have
+ * MAXDIM elements.
  */
-static int
-ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
+static bool
+ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
+                    const char *origStr, Node *escontext)
 {
-    int            nest_level = 0,
-                i;
-    int            ndim = 1,
-                temp[MAXDIM],
-                nelems[MAXDIM],
-                nelems_last[MAXDIM];
-    bool        in_quotes = false;
-    bool        eoArray = false;
-    bool        empty_array = true;
-    const char *ptr;
-    ArrayParseState parse_state = ARRAY_NO_LEVEL;
+    char       *p = *srcptr;
+    int            ndim;

-    for (i = 0; i < MAXDIM; ++i)
+    /*
+     * Dimension info takes the form of one or more [n] or [m:n] items.  This
+     * loop iterates once per dimension item.
+     */
+    ndim = 0;
+    for (;;)
     {
-        temp[i] = dim[i] = nelems_last[i] = 0;
-        nelems[i] = 1;
-    }
+        char       *q;
+        int            ub;
+        int            i;

-    ptr = str;
-    while (!eoArray)
-    {
-        bool        itemdone = false;
+        /*
+         * Note: we currently allow whitespace between, but not within,
+         * dimension items.
+         */
+        while (scanner_isspace(*p))
+            p++;
+        if (*p != '[')
+            break;                /* no more dimension items */
+        p++;
+        if (ndim >= MAXDIM)
+            ereturn(escontext, false,
+                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                     errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+                            ndim + 1, MAXDIM)));

-        while (!itemdone)
-        {
-            if (parse_state == ARRAY_ELEM_STARTED ||
-                parse_state == ARRAY_QUOTED_ELEM_STARTED)
-                empty_array = false;
+        q = p;
+        if (!ReadDimensionInt(&p, &i, origStr, escontext))
+            return false;
+        if (p == q)                /* no digits? */
+            ereturn(escontext, false,
+                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                     errmsg("malformed array literal: \"%s\"", origStr),
+                     errdetail("\"[\" must introduce explicitly-specified array dimensions.")));

-            switch (*ptr)
-            {
-                case '\0':
-                    /* Signal a premature end of the string */
-                    ereturn(escontext, -1,
-                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                             errmsg("malformed array literal: \"%s\"", str),
-                             errdetail("Unexpected end of input.")));
-                case '\\':
-
-                    /*
-                     * An escape must be after a level start, after an element
-                     * start, or after an element delimiter. In any case we
-                     * now must be past an element start.
-                     */
-                    if (parse_state != ARRAY_LEVEL_STARTED &&
-                        parse_state != ARRAY_ELEM_STARTED &&
-                        parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                        parse_state != ARRAY_ELEM_DELIMITED)
-                        ereturn(escontext, -1,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"", str),
-                                 errdetail("Unexpected \"%c\" character.",
-                                           '\\')));
-                    if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
-                        parse_state = ARRAY_ELEM_STARTED;
-                    /* skip the escaped character */
-                    if (*(ptr + 1))
-                        ptr++;
-                    else
-                        ereturn(escontext, -1,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"", str),
-                                 errdetail("Unexpected end of input.")));
-                    break;
-                case '"':
-
-                    /*
-                     * A quote must be after a level start, after a quoted
-                     * element start, or after an element delimiter. In any
-                     * case we now must be past an element start.
-                     */
-                    if (parse_state != ARRAY_LEVEL_STARTED &&
-                        parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                        parse_state != ARRAY_ELEM_DELIMITED)
-                        ereturn(escontext, -1,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"", str),
-                                 errdetail("Unexpected array element.")));
-                    in_quotes = !in_quotes;
-                    if (in_quotes)
-                        parse_state = ARRAY_QUOTED_ELEM_STARTED;
-                    else
-                        parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
-                    break;
-                case '{':
-                    if (!in_quotes)
-                    {
-                        /*
-                         * A left brace can occur if no nesting has occurred
-                         * yet, after a level start, or after a level
-                         * delimiter.
-                         */
-                        if (parse_state != ARRAY_NO_LEVEL &&
-                            parse_state != ARRAY_LEVEL_STARTED &&
-                            parse_state != ARRAY_LEVEL_DELIMITED)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Unexpected \"%c\" character.",
-                                               '{')));
-                        parse_state = ARRAY_LEVEL_STARTED;
-                        if (nest_level >= MAXDIM)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                                     errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                                            nest_level + 1, MAXDIM)));
-                        temp[nest_level] = 0;
-                        nest_level++;
-                        if (ndim < nest_level)
-                            ndim = nest_level;
-                    }
-                    break;
-                case '}':
-                    if (!in_quotes)
-                    {
-                        /*
-                         * A right brace can occur after an element start, an
-                         * element completion, a quoted element completion, or
-                         * a level completion.
-                         */
-                        if (parse_state != ARRAY_ELEM_STARTED &&
-                            parse_state != ARRAY_ELEM_COMPLETED &&
-                            parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                            parse_state != ARRAY_LEVEL_COMPLETED &&
-                            !(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED))
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Unexpected \"%c\" character.",
-                                               '}')));
-                        parse_state = ARRAY_LEVEL_COMPLETED;
-                        if (nest_level == 0)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Unmatched \"%c\" character.", '}')));
-                        nest_level--;
-
-                        if (nelems_last[nest_level] != 0 &&
-                            nelems[nest_level] != nelems_last[nest_level])
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Multidimensional arrays must have "
-                                               "sub-arrays with matching "
-                                               "dimensions.")));
-                        nelems_last[nest_level] = nelems[nest_level];
-                        nelems[nest_level] = 1;
-                        if (nest_level == 0)
-                            eoArray = itemdone = true;
-                        else
-                        {
-                            /*
-                             * We don't set itemdone here; see comments in
-                             * ReadArrayStr
-                             */
-                            temp[nest_level - 1]++;
-                        }
-                    }
-                    break;
-                default:
-                    if (!in_quotes)
-                    {
-                        if (*ptr == typdelim)
-                        {
-                            /*
-                             * Delimiters can occur after an element start, an
-                             * element completion, a quoted element
-                             * completion, or a level completion.
-                             */
-                            if (parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_ELEM_COMPLETED &&
-                                parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                                parse_state != ARRAY_LEVEL_COMPLETED)
-                                ereturn(escontext, -1,
-                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                         errmsg("malformed array literal: \"%s\"", str),
-                                         errdetail("Unexpected \"%c\" character.",
-                                                   typdelim)));
-                            if (parse_state == ARRAY_LEVEL_COMPLETED)
-                                parse_state = ARRAY_LEVEL_DELIMITED;
-                            else
-                                parse_state = ARRAY_ELEM_DELIMITED;
-                            itemdone = true;
-                            nelems[nest_level - 1]++;
-                        }
-                        else if (!scanner_isspace(*ptr))
-                        {
-                            /*
-                             * Other non-space characters must be after a
-                             * level start, after an element start, or after
-                             * an element delimiter. In any case we now must
-                             * be past an element start.
-                             */
-                            if (parse_state != ARRAY_LEVEL_STARTED &&
-                                parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_ELEM_DELIMITED)
-                                ereturn(escontext, -1,
-                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                         errmsg("malformed array literal: \"%s\"", str),
-                                         errdetail("Unexpected array element.")));
-                            parse_state = ARRAY_ELEM_STARTED;
-                        }
-                    }
-                    break;
-            }
-            if (!itemdone)
-                ptr++;
+        if (*p == ':')
+        {
+            /* [m:n] format */
+            lBound[ndim] = i;
+            p++;
+            q = p;
+            if (!ReadDimensionInt(&p, &ub, origStr, escontext))
+                return false;
+            if (p == q)            /* no digits? */
+                ereturn(escontext, false,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", origStr),
+                         errdetail("Missing array dimension value.")));
+        }
+        else
+        {
+            /* [n] format */
+            lBound[ndim] = 1;
+            ub = i;
         }
-        temp[ndim - 1]++;
-        ptr++;
+        if (*p != ']')
+            ereturn(escontext, false,
+                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                     errmsg("malformed array literal: \"%s\"", origStr),
+                     errdetail("Missing \"%s\" after array dimensions.",
+                               "]")));
+        p++;
+
+        /*
+         * Note: we could accept ub = lb-1 to represent a zero-length
+         * dimension.  However, that would result in an empty array, for which
+         * we don't keep any dimension data, so that e.g. [1:0] and [101:100]
+         * would be equivalent.  Given the lack of field demand, there seems
+         * little point in allowing such cases.
+         */
+        if (ub < lBound[ndim])
+            ereturn(escontext, false,
+                    (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                     errmsg("upper bound cannot be less than lower bound")));
+
+        /* Upper bound of INT_MAX must be disallowed, cf ArrayCheckBounds() */
+        if (ub == INT_MAX)
+            ereturn(escontext, false,
+                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                     errmsg("array upper bound is too large: %d", ub)));
+
+        /* Compute "ub - lBound[ndim] + 1", detecting overflow */
+        if (pg_sub_s32_overflow(ub, lBound[ndim], &ub) ||
+            pg_add_s32_overflow(ub, 1, &ub))
+            ereport(ERROR,
+                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                     errmsg("array size exceeds the maximum allowed (%d)",
+                            (int) MaxArraySize)));
+
+        dim[ndim] = ub;
+        ndim++;
     }

-    /* only whitespace is allowed after the closing brace */
-    while (*ptr)
+    *srcptr = p;
+    *ndim_p = ndim;
+    return true;
+}
+
+/*
+ * ReadDimensionInt
+ *     parse an integer, for the array dimensions
+ *
+ * On entry, *srcptr points to the string to parse. It is advanced past the
+ * digits of the integer. If there are no digits, returns true and leaves
+ * *srcptr unchanged.
+ *
+ * Result:
+ *    true for success, false for failure (if escontext is provided).
+ *  On success, the parsed integer is returned in *result.
+ */
+static bool
+ReadDimensionInt(char **srcptr, int *result,
+                 const char *origStr, Node *escontext)
+{
+    char       *p = *srcptr;
+    long        l;
+
+    /* don't accept leading whitespace */
+    if (!isdigit((unsigned char) *p) && *p != '-' && *p != '+')
     {
-        if (!scanner_isspace(*ptr++))
-            ereturn(escontext, -1,
-                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                     errmsg("malformed array literal: \"%s\"", str),
-                     errdetail("Junk after closing right brace.")));
+        *result = 0;
+        return true;
     }

-    /* special case for an empty array */
-    if (empty_array)
-        return 0;
+    errno = 0;
+    l = strtol(p, srcptr, 10);

-    for (i = 0; i < ndim; ++i)
-        dim[i] = temp[i];
+    if (errno == ERANGE || l > PG_INT32_MAX || l < PG_INT32_MIN)
+        ereturn(escontext, false,
+                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                 errmsg("array bound is out of integer range")));

-    return ndim;
+    *result = (int) l;
+    return true;
 }

 /*
  * ReadArrayStr :
- *     parses the array string pointed to by "arrayStr" and converts the values
- *     to internal format.  Unspecified elements are initialized to nulls.
- *     The array dimensions must already have been determined.
+ *     parses the array string pointed to by *srcptr and converts the values
+ *     to internal format.  Determines the array dimensions as it goes.
  *
- * Inputs:
- *    arrayStr: the string to parse.
- *              CAUTION: the contents of "arrayStr" will be modified!
- *    origStr: the unmodified input string, used only in error messages.
- *    nitems: total number of array elements, as already determined.
- *    ndim: number of array dimensions
- *    dim[]: array axis lengths
+ * On entry, *srcptr points to the string to parse (it must point to a '{').
+ * On successful return, it is advanced to point past the closing '}'.
+ *
+ * If dimensions were specified explicitly, they are passed in *ndim_p and
+ * dim. This function will check that the array values match the specified
+ * dimensions. If *ndim_p == 0, this function deduces the dimensions from
+ * the structure of the input, and stores them in *ndim_p and the dim array.
+ *
+ * Element type information:
  *    inputproc: type-specific input procedure for element datatype.
  *    typioparam, typmod: auxiliary values to pass to inputproc.
  *    typdelim: the value delimiter (type-specific).
  *    typlen, typbyval, typalign: storage parameters of element datatype.
  *
  * Outputs:
- *    values[]: filled with converted data values.
- *    nulls[]: filled with is-null markers.
- *    *hasnulls: set true iff there are any null elements.
- *    *nbytes: set to total size of data area needed (including alignment
- *        padding but not including array header overhead).
- *    *escontext: if this points to an ErrorSaveContext, details of
- *        any error are reported there.
+ *  *ndim_p, dim: dimensions deduced from the input structure.
+ *  *nitems_p: total number of elements.
+ *    *values_p[]: palloc'd array, filled with converted data values.
+ *    *nulls_p[]: palloc'd array, filled with is-null markers.
+ *
+ * 'origStr' is the original input string, used only in error messages.
+ * If *escontext points to an ErrorSaveContext, details of any error are
+ * reported there.
  *
  * Result:
  *    true for success, false for failure (if escontext is provided).
- *
- * Note that values[] and nulls[] are allocated by the caller, and must have
- * nitems elements.
  */
 static bool
-ReadArrayStr(char *arrayStr,
-             const char *origStr,
-             int nitems,
-             int ndim,
-             int *dim,
+ReadArrayStr(char **srcptr,
              FmgrInfo *inputproc,
              Oid typioparam,
              int32 typmod,
@@ -727,224 +579,351 @@ ReadArrayStr(char *arrayStr,
              int typlen,
              bool typbyval,
              char typalign,
-             Datum *values,
-             bool *nulls,
-             bool *hasnulls,
-             int32 *nbytes,
+             int *ndim_p,
+             int *dim,
+             int *nitems_p,
+             Datum **values_p,
+             bool **nulls_p,
+             const char *origStr,
              Node *escontext)
 {
-    int            i,
-                nest_level = 0;
-    char       *srcptr;
-    bool        in_quotes = false;
-    bool        eoArray = false;
-    bool        hasnull;
-    int32        totbytes;
-    int            indx[MAXDIM] = {0},
-                prod[MAXDIM];
+    int            ndim = *ndim_p;
+    bool        dimensions_specified = (ndim != 0);
+    int            maxitems;
+    Datum       *values;
+    bool       *nulls;
+    StringInfoData elembuf;
+    int            nest_level;
+    int            nitems;
+    bool        ndim_frozen;
+    bool        expect_delim;
+    int            nelems[MAXDIM];
+
+    /* Allocate some starting output workspace; we'll enlarge as needed */
+    maxitems = 16;
+    values = palloc_array(Datum, maxitems);
+    nulls = palloc_array(bool, maxitems);
+
+    /* Allocate workspace to hold (string representation of) one element */
+    initStringInfo(&elembuf);
+
+    /* Loop below assumes first token is ATOK_LEVEL_START */
+    Assert(**srcptr == '{');
+
+    /* Parse tokens until we reach the matching right brace */
+    nest_level = 0;
+    nitems = 0;
+    ndim_frozen = dimensions_specified;
+    expect_delim = false;
+    do
+    {
+        ArrayToken    tok;

-    mda_get_prod(ndim, dim, prod);
+        tok = ReadArrayToken(srcptr, &elembuf, typdelim, origStr, escontext);

-    /* Initialize is-null markers to true */
-    memset(nulls, true, nitems * sizeof(bool));
+        switch (tok)
+        {
+            case ATOK_LEVEL_START:
+                /* Can't write left brace where delim is expected */
+                if (expect_delim)
+                    ereturn(escontext, false,
+                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", origStr),
+                             errdetail("Unexpected \"%c\" character.", '{')));

-    /*
-     * We have to remove " and \ characters to create a clean item value to
-     * pass to the datatype input routine.  We overwrite each item value
-     * in-place within arrayStr to do this.  srcptr is the current scan point,
-     * and dstptr is where we are copying to.
-     *
-     * We also want to suppress leading and trailing unquoted whitespace. We
-     * use the leadingspace flag to suppress leading space.  Trailing space is
-     * tracked by using dstendptr to point to the last significant output
-     * character.
-     *
-     * The error checking in this routine is mostly pro-forma, since we expect
-     * that ArrayCount() already validated the string.  So we don't bother
-     * with errdetail messages.
-     */
-    srcptr = arrayStr;
-    while (!eoArray)
-    {
-        bool        itemdone = false;
-        bool        leadingspace = true;
-        bool        hasquoting = false;
-        char       *itemstart;
-        char       *dstptr;
-        char       *dstendptr;
+                /* Initialize element counting in the new level */
+                if (nest_level >= MAXDIM)
+                    ereturn(escontext, false,
+                            (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                             errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+                                    nest_level + 1, MAXDIM)));

-        i = -1;
-        itemstart = dstptr = dstendptr = srcptr;
+                nelems[nest_level] = 0;
+                nest_level++;
+                if (nest_level > ndim)
+                {
+                    /* Can't increase ndim once it's frozen */
+                    if (ndim_frozen)
+                        goto dimension_error;
+                    ndim = nest_level;
+                }
+                break;

-        while (!itemdone)
-        {
-            switch (*srcptr)
-            {
-                case '\0':
-                    /* Signal a premature end of the string */
+            case ATOK_LEVEL_END:
+                /* Can't get here with nest_level == 0 */
+                Assert(nest_level > 0);
+
+                /*
+                 * We allow a right brace to terminate an empty sub-array,
+                 * otherwise it must occur where we expect a delimiter.
+                 */
+                if (nelems[nest_level - 1] > 0 && !expect_delim)
                     ereturn(escontext, false,
                             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                             errmsg("malformed array literal: \"%s\"",
-                                    origStr)));
-                    break;
-                case '\\':
-                    /* Skip backslash, copy next character as-is. */
-                    srcptr++;
-                    if (*srcptr == '\0')
+                             errmsg("malformed array literal: \"%s\"", origStr),
+                             errdetail("Unexpected \"%c\" character.",
+                                       '}')));
+                nest_level--;
+                /* Nested sub-arrays count as elements of outer level */
+                if (nest_level > 0)
+                    nelems[nest_level - 1]++;
+
+                /*
+                 * Note: if we had dimensionality info, then dim[nest_level]
+                 * is initially non-negative, and we'll check each sub-array's
+                 * length against that.
+                 */
+                if (dim[nest_level] < 0)
+                {
+                    /* Save length of first sub-array of this level */
+                    dim[nest_level] = nelems[nest_level];
+                }
+                else if (nelems[nest_level] != dim[nest_level])
+                {
+                    /* Subsequent sub-arrays must have same length */
+                    goto dimension_error;
+                }
+
+                /*
+                 * Must have a delim or another right brace following, unless
+                 * we have reached nest_level 0, where this won't matter.
+                 */
+                expect_delim = true;
+                break;
+
+            case ATOK_DELIM:
+                if (!expect_delim)
+                    ereturn(escontext, false,
+                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", origStr),
+                             errdetail("Unexpected \"%c\" character.",
+                                       typdelim)));
+                expect_delim = false;
+                break;
+
+            case ATOK_ELEM:
+            case ATOK_ELEM_NULL:
+                /* Can't get here with nest_level == 0 */
+                Assert(nest_level > 0);
+
+                /* Disallow consecutive ELEM tokens */
+                if (expect_delim)
+                    ereturn(escontext, false,
+                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", origStr),
+                             errdetail("Unexpected array element.")));
+
+                /* Enlarge the values/nulls arrays if needed */
+                if (nitems >= maxitems)
+                {
+                    if (maxitems >= MaxArraySize)
                         ereturn(escontext, false,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"",
-                                        origStr)));
-                    *dstptr++ = *srcptr++;
-                    /* Treat the escaped character as non-whitespace */
-                    leadingspace = false;
-                    dstendptr = dstptr;
-                    hasquoting = true;    /* can't be a NULL marker */
-                    break;
-                case '"':
-                    in_quotes = !in_quotes;
-                    if (in_quotes)
-                        leadingspace = false;
-                    else
-                    {
-                        /*
-                         * Advance dstendptr when we exit in_quotes; this
-                         * saves having to do it in all the other in_quotes
-                         * cases.
-                         */
-                        dstendptr = dstptr;
-                    }
-                    hasquoting = true;    /* can't be a NULL marker */
-                    srcptr++;
-                    break;
-                case '{':
-                    if (!in_quotes)
-                    {
-                        if (nest_level >= ndim)
-                            ereturn(escontext, false,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"",
-                                            origStr)));
-                        nest_level++;
-                        indx[nest_level - 1] = 0;
-                        srcptr++;
-                    }
-                    else
-                        *dstptr++ = *srcptr++;
-                    break;
-                case '}':
-                    if (!in_quotes)
-                    {
-                        if (nest_level == 0)
-                            ereturn(escontext, false,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"",
-                                            origStr)));
-                        if (i == -1)
-                            i = ArrayGetOffset0(ndim, indx, prod);
-                        indx[nest_level - 1] = 0;
-                        nest_level--;
-                        if (nest_level == 0)
-                            eoArray = itemdone = true;
-                        else
-                            indx[nest_level - 1]++;
-                        srcptr++;
-                    }
-                    else
-                        *dstptr++ = *srcptr++;
-                    break;
-                default:
-                    if (in_quotes)
-                        *dstptr++ = *srcptr++;
-                    else if (*srcptr == typdelim)
-                    {
-                        if (i == -1)
-                            i = ArrayGetOffset0(ndim, indx, prod);
-                        itemdone = true;
-                        indx[ndim - 1]++;
-                        srcptr++;
-                    }
-                    else if (scanner_isspace(*srcptr))
-                    {
-                        /*
-                         * If leading space, drop it immediately.  Else, copy
-                         * but don't advance dstendptr.
-                         */
-                        if (leadingspace)
-                            srcptr++;
-                        else
-                            *dstptr++ = *srcptr++;
-                    }
-                    else
-                    {
-                        *dstptr++ = *srcptr++;
-                        leadingspace = false;
-                        dstendptr = dstptr;
-                    }
-                    break;
-            }
+                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                 errmsg("array size exceeds the maximum allowed (%d)",
+                                        (int) MaxArraySize)));
+                    maxitems = Min(maxitems * 2, MaxArraySize);
+                    values = repalloc_array(values, Datum, maxitems);
+                    nulls = repalloc_array(nulls, bool, maxitems);
+                }
+
+                /* Read the element's value, or check that NULL is allowed */
+                if (!InputFunctionCallSafe(inputproc,
+                                           (tok == ATOK_ELEM_NULL) ? NULL : elembuf.data,
+                                           typioparam, typmod,
+                                           escontext,
+                                           &values[nitems]))
+                    return false;
+                nulls[nitems] = (tok == ATOK_ELEM_NULL);
+                nitems++;
+
+                /*
+                 * Once we have found an element, the number of dimensions can
+                 * no longer increase, and subsequent elements must all be at
+                 * the same nesting depth.
+                 */
+                ndim_frozen = true;
+                if (nest_level != ndim)
+                    goto dimension_error;
+                /* Count the new element */
+                nelems[nest_level - 1]++;
+
+                /* Must have a delim or a right brace following */
+                expect_delim = true;
+                break;
+
+            case ATOK_ERROR:
+                return false;
         }
+    } while (nest_level > 0);

-        Assert(dstptr < srcptr);
-        *dstendptr = '\0';
+    /* Clean up and return results */
+    pfree(elembuf.data);

-        if (i < 0 || i >= nitems)
-            ereturn(escontext, false,
-                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                     errmsg("malformed array literal: \"%s\"",
-                            origStr)));
+    *ndim_p = ndim;
+    *nitems_p = nitems;
+    *values_p = values;
+    *nulls_p = nulls;
+    return true;
+
+dimension_error:
+    if (dimensions_specified)
+        errsave(escontext,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("malformed array literal: \"%s\"", origStr),
+                 errdetail("Specified array dimensions do not match array contents.")));
+    else
+        errsave(escontext,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("malformed array literal: \"%s\"", origStr),
+                 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
+    return false;
+}
+
+/*
+ * ReadArrayToken
+ *     read one token from an array value string
+ *
+ * Starts scanning from *srcptr.  On non-error return, *srcptr is
+ * advanced past the token.
+ *
+ * If the token is ATOK_ELEM, the de-escaped string is returned in elembuf.
+ */
+static ArrayToken
+ReadArrayToken(char **srcptr, StringInfo elembuf, char typdelim,
+               const char *origStr, Node *escontext)
+{
+    char       *p = *srcptr;
+    int            dstlen;
+    bool        has_escapes;
+
+    resetStringInfo(elembuf);

-        if (Array_nulls && !hasquoting &&
-            pg_strcasecmp(itemstart, "NULL") == 0)
+    /* Identify token type.  Loop advances over leading whitespace. */
+    for (;;)
+    {
+        switch (*p)
         {
-            /* it's a NULL item */
-            if (!InputFunctionCallSafe(inputproc, NULL,
-                                       typioparam, typmod,
-                                       escontext,
-                                       &values[i]))
-                return false;
-            nulls[i] = true;
+            case '\0':
+                goto ending_error;
+            case '{':
+                *srcptr = p + 1;
+                return ATOK_LEVEL_START;
+            case '}':
+                *srcptr = p + 1;
+                return ATOK_LEVEL_END;
+            case '"':
+                p++;
+                goto quoted_element;
+            default:
+                if (*p == typdelim)
+                {
+                    *srcptr = p + 1;
+                    return ATOK_DELIM;
+                }
+                if (scanner_isspace(*p))
+                {
+                    p++;
+                    continue;
+                }
+                goto unquoted_element;
         }
-        else
+    }
+
+quoted_element:
+    for (;;)
+    {
+        switch (*p)
         {
-            if (!InputFunctionCallSafe(inputproc, itemstart,
-                                       typioparam, typmod,
-                                       escontext,
-                                       &values[i]))
-                return false;
-            nulls[i] = false;
+            case '\0':
+                goto ending_error;
+            case '\\':
+                /* Skip backslash, copy next character as-is. */
+                p++;
+                if (*p == '\0')
+                    goto ending_error;
+                appendStringInfoChar(elembuf, *p++);
+                break;
+            case '"':
+                *srcptr = p + 1;
+                return ATOK_ELEM;
+            default:
+                appendStringInfoChar(elembuf, *p++);
+                break;
         }
     }

+unquoted_element:
+
     /*
-     * Check for nulls, compute total data space needed
+     * We don't include trailing whitespace in the result.  dstlen tracks how
+     * much of the output string is known to not be trailing whitespace.
      */
-    hasnull = false;
-    totbytes = 0;
-    for (i = 0; i < nitems; i++)
+    dstlen = 0;
+    has_escapes = false;
+    for (;;)
     {
-        if (nulls[i])
-            hasnull = true;
-        else
+        switch (*p)
         {
-            /* let's just make sure data is not toasted */
-            if (typlen == -1)
-                values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
-            totbytes = att_addlength_datum(totbytes, typlen, values[i]);
-            totbytes = att_align_nominal(totbytes, typalign);
-            /* check for overflow of total request */
-            if (!AllocSizeIsValid(totbytes))
-                ereturn(escontext, false,
-                        (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                         errmsg("array size exceeds the maximum allowed (%d)",
-                                (int) MaxAllocSize)));
+            case '\0':
+                goto ending_error;
+            case '{':
+                errsave(escontext,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", origStr),
+                         errdetail("Unexpected \"%c\" character.",
+                                   '{')));
+                return ATOK_ERROR;
+            case '"':
+
+                /*
+                 * XXX "Unexpected %c character" would be more apropos, but
+                 * this message is what the pre-v17 implementation produced,
+                 * so we'll keep it for now.
+                 */
+                errsave(escontext,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", origStr),
+                         errdetail("Unexpected array element.")));
+                return ATOK_ERROR;
+            case '\\':
+                /* Skip backslash, copy next character as-is. */
+                p++;
+                if (*p == '\0')
+                    goto ending_error;
+                appendStringInfoChar(elembuf, *p++);
+                dstlen = elembuf->len;    /* treat it as non-whitespace */
+                has_escapes = true;
+                break;
+            default:
+                /* End of elem? */
+                if (*p == typdelim || *p == '}')
+                {
+                    /* hack: truncate the output string to dstlen */
+                    elembuf->data[dstlen] = '\0';
+                    elembuf->len = dstlen;
+                    *srcptr = p;
+                    /* Check if it's unquoted "NULL" */
+                    if (Array_nulls && !has_escapes &&
+                        pg_strcasecmp(elembuf->data, "NULL") == 0)
+                        return ATOK_ELEM_NULL;
+                    else
+                        return ATOK_ELEM;
+                }
+                appendStringInfoChar(elembuf, *p);
+                if (!scanner_isspace(*p))
+                    dstlen = elembuf->len;
+                p++;
+                break;
         }
     }
-    *hasnulls = hasnull;
-    *nbytes = totbytes;
-    return true;
-}

+ending_error:
+    errsave(escontext,
+            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+             errmsg("malformed array literal: \"%s\"", origStr),
+             errdetail("Unexpected end of input.")));
+    return ATOK_ERROR;
+}

 /*
  * Copy data into an array object from a temporary array of Datums.
diff --git a/src/backend/utils/adt/arrayutils.c b/src/backend/utils/adt/arrayutils.c
index aed799234c..6243971543 100644
--- a/src/backend/utils/adt/arrayutils.c
+++ b/src/backend/utils/adt/arrayutils.c
@@ -43,21 +43,6 @@ ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx)
     return offset;
 }

-/*
- * Same, but subscripts are assumed 0-based, and use a scale array
- * instead of raw dimension data (see mda_get_prod to create scale array)
- */
-int
-ArrayGetOffset0(int n, const int *tup, const int *scale)
-{
-    int            i,
-                lin = 0;
-
-    for (i = 0; i < n; i++)
-        lin += tup[i] * scale[i];
-    return lin;
-}
-
 /*
  * Convert array dimensions into number of elements
  *
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index e6c8d88d9f..7308007185 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -455,7 +455,6 @@ extern void array_free_iterator(ArrayIterator iterator);
  */

 extern int    ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx);
-extern int    ArrayGetOffset0(int n, const int *tup, const int *scale);
 extern int    ArrayGetNItems(int ndim, const int *dims);
 extern int    ArrayGetNItemsSafe(int ndim, const int *dims,
                                struct Node *escontext);
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 957498432d..2212851538 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1492,17 +1492,22 @@ select '{{1,{2}},{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,{2}},{2,3}}"
 LINE 1: select '{{1,{2}},{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "{" character.
-select '{{},{}}'::text[];
-ERROR:  malformed array literal: "{{},{}}"
-LINE 1: select '{{},{}}'::text[];
-               ^
-DETAIL:  Unexpected "}" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
 select E'{{1,2},\\{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,2},\{2,3}}"
 LINE 1: select E'{{1,2},\\{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "\" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select E'{"a"a}'::text[];
+ERROR:  malformed array literal: "{"a"a}"
+LINE 1: select E'{"a"a}'::text[];
+               ^
+DETAIL:  Unexpected array element.
+select E'{a"a"}'::text[];
+ERROR:  malformed array literal: "{a"a"}"
+LINE 1: select E'{a"a"}'::text[];
+               ^
+DETAIL:  Unexpected array element.
 select '{{"1 2" x},{3}}'::text[];
 ERROR:  malformed array literal: "{{"1 2" x},{3}}"
 LINE 1: select '{{"1 2" x},{3}}'::text[];
@@ -1518,11 +1523,116 @@ ERROR:  malformed array literal: "{ }}"
 LINE 1: select '{ }}'::text[];
                ^
 DETAIL:  Junk after closing right brace.
+select '}{'::text[];
+ERROR:  malformed array literal: "}{"
+LINE 1: select '}{'::text[];
+               ^
+DETAIL:  Array value must start with "{" or dimension information.
+select '{foo{}}'::text[];
+ERROR:  malformed array literal: "{foo{}}"
+LINE 1: select '{foo{}}'::text[];
+               ^
+DETAIL:  Unexpected "{" character.
+select '{"foo"{}}'::text[];
+ERROR:  malformed array literal: "{"foo"{}}"
+LINE 1: select '{"foo"{}}'::text[];
+               ^
+DETAIL:  Unexpected "{" character.
+select '{foo,,bar}'::text[];
+ERROR:  malformed array literal: "{foo,,bar}"
+LINE 1: select '{foo,,bar}'::text[];
+               ^
+DETAIL:  Unexpected "," character.
+select '{{1},{{2}}}'::text[];
+ERROR:  malformed array literal: "{{1},{{2}}}"
+LINE 1: select '{{1},{{2}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{1}},{2}}'::text[];
+ERROR:  malformed array literal: "{{{1}},{2}}"
+LINE 1: select '{{{1}},{2}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{{}}}'::text[];
+ERROR:  malformed array literal: "{{},{{}}}"
+LINE 1: select '{{},{{}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{}},{}}'::text[];
+ERROR:  malformed array literal: "{{{}},{}}"
+LINE 1: select '{{{}},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{1},{}}'::text[];
+ERROR:  malformed array literal: "{{1},{}}"
+LINE 1: select '{{1},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{1}}'::text[];
+ERROR:  malformed array literal: "{{},{1}}"
+LINE 1: select '{{},{1}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '[1:0]={}'::int[];
+ERROR:  upper bound cannot be less than lower bound
+LINE 1: select '[1:0]={}'::int[];
+               ^
+select '[2147483646:2147483647]={1,2}'::int[];
+ERROR:  array upper bound is too large: 2147483647
+LINE 1: select '[2147483646:2147483647]={1,2}'::int[];
+               ^
+select '[1:-1]={}'::int[];
+ERROR:  upper bound cannot be less than lower bound
+LINE 1: select '[1:-1]={}'::int[];
+               ^
+select '[2]={1}'::int[];
+ERROR:  malformed array literal: "[2]={1}"
+LINE 1: select '[2]={1}'::int[];
+               ^
+DETAIL:  Specified array dimensions do not match array contents.
+select '[1:]={1}'::int[];
+ERROR:  malformed array literal: "[1:]={1}"
+LINE 1: select '[1:]={1}'::int[];
+               ^
+DETAIL:  Missing array dimension value.
+select '[:1]={1}'::int[];
+ERROR:  malformed array literal: "[:1]={1}"
+LINE 1: select '[:1]={1}'::int[];
+               ^
+DETAIL:  "[" must introduce explicitly-specified array dimensions.
 select array[];
 ERROR:  cannot determine type of empty array
 LINE 1: select array[];
                ^
 HINT:  Explicitly cast to the desired type, for example ARRAY[]::integer[].
+select '{{1,},{1},}'::text[];
+ERROR:  malformed array literal: "{{1,},{1},}"
+LINE 1: select '{{1,},{1},}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{{1,},{1}}'::text[];
+ERROR:  malformed array literal: "{{1,},{1}}"
+LINE 1: select '{{1,},{1}}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{{1,}}'::text[];
+ERROR:  malformed array literal: "{{1,}}"
+LINE 1: select '{{1,}}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{1,}'::text[];
+ERROR:  malformed array literal: "{1,}"
+LINE 1: select '{1,}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '[21474836488:21474836489]={1,2}'::int[];
+ERROR:  array bound is out of integer range
+LINE 1: select '[21474836488:21474836489]={1,2}'::int[];
+               ^
+select '[-2147483649:-2147483648]={1,2}'::int[];
+ERROR:  array bound is out of integer range
+LINE 1: select '[-2147483649:-2147483648]={1,2}'::int[];
+               ^
 -- none of the above should be accepted
 -- all of the following should be accepted
 select '{}'::text[];
@@ -1531,12 +1641,30 @@ select '{}'::text[];
  {}
 (1 row)

+select '{{},{}}'::text[];
+ text
+------
+ {}
+(1 row)
+
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
                      text
 -----------------------------------------------
  {{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}
 (1 row)

+select '{null,n\ull,"null"}'::text[];
+         text
+----------------------
+ {NULL,"null","null"}
+(1 row)
+
+select '{ ab\c , "ab\"c" }'::text[];
+     text
+---------------
+ {abc,"ab\"c"}
+(1 row)
+
 select '{0 second  ,0 second}'::interval[];
    interval
 ---------------
@@ -1570,12 +1698,30 @@ select array[]::text[];
  {}
 (1 row)

+select '[2]={1,7}'::int[];
+ int4
+-------
+ {1,7}
+(1 row)
+
 select '[0:1]={1.1,2.2}'::float8[];
      float8
 -----------------
  [0:1]={1.1,2.2}
 (1 row)

+select '[2147483646:2147483646]={1}'::int[];
+            int4
+-----------------------------
+ [2147483646:2147483646]={1}
+(1 row)
+
+select '[-2147483648:-2147483647]={1,2}'::int[];
+              int4
+---------------------------------
+ [-2147483648:-2147483647]={1,2}
+(1 row)
+
 -- all of the above should be accepted
 -- tests for array aggregates
 CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]);
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index daf805c382..8181e172a9 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -473,17 +473,43 @@ select 'foo' ilike all (array['F%', '%O']); -- t

 -- none of the following should be accepted
 select '{{1,{2}},{2,3}}'::text[];
-select '{{},{}}'::text[];
 select E'{{1,2},\\{2,3}}'::text[];
+select E'{"a"a}'::text[];
+select E'{a"a"}'::text[];
 select '{{"1 2" x},{3}}'::text[];
 select '{}}'::text[];
 select '{ }}'::text[];
+select '}{'::text[];
+select '{foo{}}'::text[];
+select '{"foo"{}}'::text[];
+select '{foo,,bar}'::text[];
+select '{{1},{{2}}}'::text[];
+select '{{{1}},{2}}'::text[];
+select '{{},{{}}}'::text[];
+select '{{{}},{}}'::text[];
+select '{{1},{}}'::text[];
+select '{{},{1}}'::text[];
+select '[1:0]={}'::int[];
+select '[2147483646:2147483647]={1,2}'::int[];
+select '[1:-1]={}'::int[];
+select '[2]={1}'::int[];
+select '[1:]={1}'::int[];
+select '[:1]={1}'::int[];
 select array[];
+select '{{1,},{1},}'::text[];
+select '{{1,},{1}}'::text[];
+select '{{1,}}'::text[];
+select '{1,}'::text[];
+select '[21474836488:21474836489]={1,2}'::int[];
+select '[-2147483649:-2147483648]={1,2}'::int[];
 -- none of the above should be accepted

 -- all of the following should be accepted
 select '{}'::text[];
+select '{{},{}}'::text[];
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
+select '{null,n\ull,"null"}'::text[];
+select '{ ab\c , "ab\"c" }'::text[];
 select '{0 second  ,0 second}'::interval[];
 select '{ { "," } , { 3 } }'::text[];
 select '  {   {  "  0 second  "   ,  0 second  }   }'::text[];
@@ -492,7 +518,10 @@ select '{
            @ 1 hour @ 42 minutes @ 20 seconds
          }'::interval[];
 select array[]::text[];
+select '[2]={1,7}'::int[];
 select '[0:1]={1.1,2.2}'::float8[];
+select '[2147483646:2147483646]={1}'::int[];
+select '[-2147483648:-2147483647]={1,2}'::int[];
 -- all of the above should be accepted

 -- tests for array aggregates
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 87c1aee379..5c4c1e1600 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -148,8 +148,8 @@ ArrayIOData
 ArrayIterator
 ArrayMapState
 ArrayMetaState
-ArrayParseState
 ArraySubWorkspace
+ArrayToken
 ArrayType
 AsyncQueueControl
 AsyncQueueEntry

Re: Cleaning up array_in()

От

Alexander Lakhin

Дата:

08 ноября 2023 г., 18:00:00

Hello Tom,

08.11.2023 02:52, Tom Lane wrote:
> Comments?

Thank you for the update! I haven't looked into the code, just did manual
testing and rechecked commands given in the arrays documentation ([1]).
Everything works correctly, except for one minor difference:
INSERT INTO sal_emp
     VALUES ('Bill',
     '{10000, 10000, 10000, 10000}',
     '{{"meeting", "lunch"}, {"meeting"}}');

currently gives:
ERROR:  malformed array literal: "{{"meeting", "lunch"}, {"meeting"}}"
LINE 4:     '{{"meeting", "lunch"}, {"meeting"}}');
             ^
DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.

not
ERROR:  multidimensional arrays must have array expressions with matching dimensions

It seems that this inconsistency appeared with 475aedd1e, so it's not new
at all, but maybe fix it or describe the error more generally. (Though it
might be supposed that "for example" covers slight deviations.)

[1] https://www.postgresql.org/docs/devel/arrays.html

Best regards,
Alexander

Re: Cleaning up array_in()

От

Tom Lane

Дата:

08 ноября 2023 г., 18:56:57

Alexander Lakhin <exclusion@gmail.com> writes:
> Thank you for the update! I haven't looked into the code, just did manual
> testing and rechecked commands given in the arrays documentation ([1]).
> Everything works correctly, except for one minor difference:
> INSERT INTO sal_emp
>      VALUES ('Bill',
>      '{10000, 10000, 10000, 10000}',
>      '{{"meeting", "lunch"}, {"meeting"}}');

> currently gives:
> ERROR:  malformed array literal: "{{"meeting", "lunch"}, {"meeting"}}"
> LINE 4:     '{{"meeting", "lunch"}, {"meeting"}}');
>              ^
> DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.

> not
> ERROR:  multidimensional arrays must have array expressions with matching dimensions

Oh!  I had not realized we had actual documentation examples covering
this area.  Yeah, that doc needs to be updated to show the current
wording of the error.  Thanks for catching that.

            regards, tom lane

Re: Cleaning up array_in()

От

Tom Lane

Дата:

09 ноября 2023 г., 19:57:47

I wrote:
> This comes out when you write something like '{foo"bar"}', and I'd
> say the choice of message is not great.  On the other hand, it's
> consistent with what you get from '{"foo""bar"}', and if we wanted
> to change that too then some tweaking of the state machine in
> ReadArrayStr would be required (or else modify ReadArrayToken so
> it doesn't return instantly upon seeing the second quote mark).
> I'm not sure that this is worth messing with.

After further thought I concluded that this area is worth spending
a little more code for.  If we have input like '{foo"bar"}' or
'{"foo"bar}' or '{"foo""bar"}', what it most likely means is that
the user misunderstood the quoting rules.  A message like "Unexpected
array element" is pretty completely unhelpful for figuring that out.
The alternative I was considering, "Unexpected """ character", would
not be much better.  What we want to say is something like "Incorrectly
quoted array element", and the attached v12 makes ReadArrayToken do
that for both quoted and unquoted cases.

I also fixed the obsolete documentation that Alexander noted, and
cleaned up a couple other infelicities (notably, I'd blindly written
ereport(ERROR) in one place where ereturn is now the way).

Barring objections, I think v12 is committable.

            regards, tom lane

diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml
index 56185b9b03..ce338c770c 100644
--- a/doc/src/sgml/array.sgml
+++ b/doc/src/sgml/array.sgml
@@ -171,7 +171,8 @@ INSERT INTO sal_emp
     VALUES ('Bill',
     '{10000, 10000, 10000, 10000}',
     '{{"meeting", "lunch"}, {"meeting"}}');
-ERROR:  multidimensional arrays must have array expressions with matching dimensions
+ERROR:  malformed array literal: "{{"meeting", "lunch"}, {"meeting"}}"
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
 </programlisting>
  </para>

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 3ff13eb419..b0532bb45b 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -54,18 +54,16 @@ bool        Array_nulls = true;
             PG_FREE_IF_COPY(array, n); \
     } while (0)

+/* ReadArrayToken return type */
 typedef enum
 {
-    ARRAY_NO_LEVEL,
-    ARRAY_LEVEL_STARTED,
-    ARRAY_ELEM_STARTED,
-    ARRAY_ELEM_COMPLETED,
-    ARRAY_QUOTED_ELEM_STARTED,
-    ARRAY_QUOTED_ELEM_COMPLETED,
-    ARRAY_ELEM_DELIMITED,
-    ARRAY_LEVEL_COMPLETED,
-    ARRAY_LEVEL_DELIMITED,
-} ArrayParseState;
+    ATOK_LEVEL_START,
+    ATOK_LEVEL_END,
+    ATOK_DELIM,
+    ATOK_ELEM,
+    ATOK_ELEM_NULL,
+    ATOK_ERROR,
+} ArrayToken;

 /* Working state for array_iterate() */
 typedef struct ArrayIteratorData
@@ -91,15 +89,21 @@ typedef struct ArrayIteratorData
     int            current_item;    /* the item # we're at in the array */
 }            ArrayIteratorData;

-static int    ArrayCount(const char *str, int *dim, char typdelim,
-                       Node *escontext);
-static bool ReadArrayStr(char *arrayStr, const char *origStr,
-                         int nitems, int ndim, int *dim,
+static bool ReadArrayDimensions(char **srcptr, int *ndim_p,
+                                int *dim, int *lBound,
+                                const char *origStr, Node *escontext);
+static bool ReadDimensionInt(char **srcptr, int *result,
+                             const char *origStr, Node *escontext);
+static bool ReadArrayStr(char **srcptr,
                          FmgrInfo *inputproc, Oid typioparam, int32 typmod,
                          char typdelim,
                          int typlen, bool typbyval, char typalign,
-                         Datum *values, bool *nulls,
-                         bool *hasnulls, int32 *nbytes, Node *escontext);
+                         int *ndim_p, int *dim,
+                         int *nitems_p,
+                         Datum **values_p, bool **nulls_p,
+                         const char *origStr, Node *escontext);
+static ArrayToken ReadArrayToken(char **srcptr, StringInfo elembuf, char typdelim,
+                                 const char *origStr, Node *escontext);
 static void ReadArrayBinary(StringInfo buf, int nitems,
                             FmgrInfo *receiveproc, Oid typioparam, int32 typmod,
                             int typlen, bool typbyval, char typalign,
@@ -185,12 +189,10 @@ array_in(PG_FUNCTION_ARGS)
     char        typalign;
     char        typdelim;
     Oid            typioparam;
-    char       *string_save,
-               *p;
-    int            i,
-                nitems;
-    Datum       *dataPtr;
-    bool       *nullsPtr;
+    char       *p;
+    int            nitems;
+    Datum       *values;
+    bool       *nulls;
     bool        hasnulls;
     int32        nbytes;
     int32        dataoffset;
@@ -233,104 +235,34 @@ array_in(PG_FUNCTION_ARGS)
     typdelim = my_extra->typdelim;
     typioparam = my_extra->typioparam;

-    /* Make a modifiable copy of the input */
-    string_save = pstrdup(string);
+    /* Initialize dim, lBound for ReadArrayDimensions, ReadArrayStr */
+    for (int i = 0; i < MAXDIM; i++)
+    {
+        dim[i] = -1;            /* indicates "not yet known" */
+        lBound[i] = 1;            /* default lower bound */
+    }

     /*
-     * If the input string starts with dimension info, read and use that.
-     * Otherwise, we require the input to be in curly-brace style, and we
-     * prescan the input to determine dimensions.
+     * Start processing the input string.
      *
-     * Dimension info takes the form of one or more [n] or [m:n] items. The
-     * outer loop iterates once per dimension item.
+     * If the input string starts with dimension info, read and use that.
+     * Otherwise, we'll determine the dimensions during ReadArrayStr.
      */
-    p = string_save;
-    ndim = 0;
-    for (;;)
-    {
-        char       *q;
-        int            ub;
-
-        /*
-         * Note: we currently allow whitespace between, but not within,
-         * dimension items.
-         */
-        while (scanner_isspace(*p))
-            p++;
-        if (*p != '[')
-            break;                /* no more dimension items */
-        p++;
-        if (ndim >= MAXDIM)
-            ereturn(escontext, (Datum) 0,
-                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                     errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                            ndim + 1, MAXDIM)));
-
-        for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++)
-             /* skip */ ;
-        if (q == p)                /* no digits? */
-            ereturn(escontext, (Datum) 0,
-                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                     errmsg("malformed array literal: \"%s\"", string),
-                     errdetail("\"[\" must introduce explicitly-specified array dimensions.")));
-
-        if (*q == ':')
-        {
-            /* [m:n] format */
-            *q = '\0';
-            lBound[ndim] = atoi(p);
-            p = q + 1;
-            for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++)
-                 /* skip */ ;
-            if (q == p)            /* no digits? */
-                ereturn(escontext, (Datum) 0,
-                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                         errmsg("malformed array literal: \"%s\"", string),
-                         errdetail("Missing array dimension value.")));
-        }
-        else
-        {
-            /* [n] format */
-            lBound[ndim] = 1;
-        }
-        if (*q != ']')
-            ereturn(escontext, (Datum) 0,
-                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                     errmsg("malformed array literal: \"%s\"", string),
-                     errdetail("Missing \"%s\" after array dimensions.",
-                               "]")));
-
-        *q = '\0';
-        ub = atoi(p);
-        p = q + 1;
-        if (ub < lBound[ndim])
-            ereturn(escontext, (Datum) 0,
-                    (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-                     errmsg("upper bound cannot be less than lower bound")));
-
-        dim[ndim] = ub - lBound[ndim] + 1;
-        ndim++;
-    }
+    p = string;
+    if (!ReadArrayDimensions(&p, &ndim, dim, lBound, string, escontext))
+        return (Datum) 0;

     if (ndim == 0)
     {
-        /* No array dimensions, so intuit dimensions from brace structure */
+        /* No array dimensions, so next character should be a left brace */
         if (*p != '{')
             ereturn(escontext, (Datum) 0,
                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                      errmsg("malformed array literal: \"%s\"", string),
                      errdetail("Array value must start with \"{\" or dimension information.")));
-        ndim = ArrayCount(p, dim, typdelim, escontext);
-        if (ndim < 0)
-            PG_RETURN_NULL();
-        for (i = 0; i < ndim; i++)
-            lBound[i] = 1;
     }
     else
     {
-        int            ndim_braces,
-                    dim_braces[MAXDIM];
-
         /* If array dimensions are given, expect '=' operator */
         if (strncmp(p, ASSGN, strlen(ASSGN)) != 0)
             ereturn(escontext, (Datum) 0,
@@ -339,66 +271,68 @@ array_in(PG_FUNCTION_ARGS)
                      errdetail("Missing \"%s\" after array dimensions.",
                                ASSGN)));
         p += strlen(ASSGN);
+        /* Allow whitespace after it */
         while (scanner_isspace(*p))
             p++;

-        /*
-         * intuit dimensions from brace structure -- it better match what we
-         * were given
-         */
         if (*p != '{')
             ereturn(escontext, (Datum) 0,
                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                      errmsg("malformed array literal: \"%s\"", string),
                      errdetail("Array contents must start with \"{\".")));
-        ndim_braces = ArrayCount(p, dim_braces, typdelim, escontext);
-        if (ndim_braces < 0)
-            PG_RETURN_NULL();
-        if (ndim_braces != ndim)
+    }
+
+    /* Parse the value part, in the curly braces: { ... } */
+    if (!ReadArrayStr(&p,
+                      &my_extra->proc, typioparam, typmod,
+                      typdelim,
+                      typlen, typbyval, typalign,
+                      &ndim,
+                      dim,
+                      &nitems,
+                      &values, &nulls,
+                      string,
+                      escontext))
+        return (Datum) 0;
+
+    /* only whitespace is allowed after the closing brace */
+    while (*p)
+    {
+        if (!scanner_isspace(*p++))
             ereturn(escontext, (Datum) 0,
                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                      errmsg("malformed array literal: \"%s\"", string),
-                     errdetail("Specified array dimensions do not match array contents.")));
-        for (i = 0; i < ndim; ++i)
-        {
-            if (dim[i] != dim_braces[i])
-                ereturn(escontext, (Datum) 0,
-                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                         errmsg("malformed array literal: \"%s\"", string),
-                         errdetail("Specified array dimensions do not match array contents.")));
-        }
+                     errdetail("Junk after closing right brace.")));
     }

-#ifdef ARRAYDEBUG
-    printf("array_in- ndim %d (", ndim);
-    for (i = 0; i < ndim; i++)
-    {
-        printf(" %d", dim[i]);
-    };
-    printf(") for %s\n", string);
-#endif
-
-    /* This checks for overflow of the array dimensions */
-    nitems = ArrayGetNItemsSafe(ndim, dim, escontext);
-    if (nitems < 0)
-        PG_RETURN_NULL();
-    if (!ArrayCheckBoundsSafe(ndim, dim, lBound, escontext))
-        PG_RETURN_NULL();
-
     /* Empty array? */
     if (nitems == 0)
         PG_RETURN_ARRAYTYPE_P(construct_empty_array(element_type));

-    dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
-    nullsPtr = (bool *) palloc(nitems * sizeof(bool));
-    if (!ReadArrayStr(p, string,
-                      nitems, ndim, dim,
-                      &my_extra->proc, typioparam, typmod,
-                      typdelim,
-                      typlen, typbyval, typalign,
-                      dataPtr, nullsPtr,
-                      &hasnulls, &nbytes, escontext))
-        PG_RETURN_NULL();
+    /*
+     * Check for nulls, compute total data space needed
+     */
+    hasnulls = false;
+    nbytes = 0;
+    for (int i = 0; i < nitems; i++)
+    {
+        if (nulls[i])
+            hasnulls = true;
+        else
+        {
+            /* let's just make sure data is not toasted */
+            if (typlen == -1)
+                values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
+            nbytes = att_addlength_datum(nbytes, typlen, values[i]);
+            nbytes = att_align_nominal(nbytes, typalign);
+            /* check for overflow of total request */
+            if (!AllocSizeIsValid(nbytes))
+                ereturn(escontext, (Datum) 0,
+                        (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                         errmsg("array size exceeds the maximum allowed (%d)",
+                                (int) MaxAllocSize)));
+        }
+    }
     if (hasnulls)
     {
         dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems);
@@ -409,6 +343,10 @@ array_in(PG_FUNCTION_ARGS)
         dataoffset = 0;            /* marker for no null bitmap */
         nbytes += ARR_OVERHEAD_NONULLS(ndim);
     }
+
+    /*
+     * Construct the final array datum
+     */
     retval = (ArrayType *) palloc0(nbytes);
     SET_VARSIZE(retval, nbytes);
     retval->ndim = ndim;
@@ -424,302 +362,216 @@ array_in(PG_FUNCTION_ARGS)
     memcpy(ARR_LBOUND(retval), lBound, ndim * sizeof(int));

     CopyArrayEls(retval,
-                 dataPtr, nullsPtr, nitems,
+                 values, nulls, nitems,
                  typlen, typbyval, typalign,
                  true);

-    pfree(dataPtr);
-    pfree(nullsPtr);
-    pfree(string_save);
+    pfree(values);
+    pfree(nulls);

     PG_RETURN_ARRAYTYPE_P(retval);
 }

 /*
- * ArrayCount
- *     Determines the dimensions for an array string.
+ * ReadArrayDimensions
+ *     parses the array dimensions part of the input and converts the values
+ *     to internal format.
+ *
+ * On entry, *srcptr points to the string to parse. It is advanced to point
+ * after whitespace (if any) and dimension info (if any).
  *
- * Returns number of dimensions as function result.  The axis lengths are
- * returned in dim[], which must be of size MAXDIM.
+ * *ndim_p, *dim, and *lBound are output variables. They are filled with the
+ * number of dimensions (<= MAXDIM), the length of each dimension, and the
+ * lower bounds of the slices, respectively.  *ndim_p will be set to zero
+ * when no dimensions appear.
  *
- * If we detect an error, fill *escontext with error details and return -1
- * (unless escontext isn't provided, in which case errors will be thrown).
+ * 'origStr' is the original input string, used only in error messages.
+ * If *escontext points to an ErrorSaveContext, details of any error are
+ * reported there.
+ *
+ * Result:
+ *    true for success, false for failure (if escontext is provided).
+ *
+ * Note that dim[] and lBound[] are allocated by the caller, and must have
+ * MAXDIM elements.
  */
-static int
-ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
+static bool
+ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
+                    const char *origStr, Node *escontext)
 {
-    int            nest_level = 0,
-                i;
-    int            ndim = 1,
-                temp[MAXDIM],
-                nelems[MAXDIM],
-                nelems_last[MAXDIM];
-    bool        in_quotes = false;
-    bool        eoArray = false;
-    bool        empty_array = true;
-    const char *ptr;
-    ArrayParseState parse_state = ARRAY_NO_LEVEL;
+    char       *p = *srcptr;
+    int            ndim;

-    for (i = 0; i < MAXDIM; ++i)
+    /*
+     * Dimension info takes the form of one or more [n] or [m:n] items.  This
+     * loop iterates once per dimension item.
+     */
+    ndim = 0;
+    for (;;)
     {
-        temp[i] = dim[i] = nelems_last[i] = 0;
-        nelems[i] = 1;
-    }
+        char       *q;
+        int            ub;
+        int            i;

-    ptr = str;
-    while (!eoArray)
-    {
-        bool        itemdone = false;
+        /*
+         * Note: we currently allow whitespace between, but not within,
+         * dimension items.
+         */
+        while (scanner_isspace(*p))
+            p++;
+        if (*p != '[')
+            break;                /* no more dimension items */
+        p++;
+        if (ndim >= MAXDIM)
+            ereturn(escontext, false,
+                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                     errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+                            ndim + 1, MAXDIM)));

-        while (!itemdone)
-        {
-            if (parse_state == ARRAY_ELEM_STARTED ||
-                parse_state == ARRAY_QUOTED_ELEM_STARTED)
-                empty_array = false;
+        q = p;
+        if (!ReadDimensionInt(&p, &i, origStr, escontext))
+            return false;
+        if (p == q)                /* no digits? */
+            ereturn(escontext, false,
+                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                     errmsg("malformed array literal: \"%s\"", origStr),
+                     errdetail("\"[\" must introduce explicitly-specified array dimensions.")));

-            switch (*ptr)
-            {
-                case '\0':
-                    /* Signal a premature end of the string */
-                    ereturn(escontext, -1,
-                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                             errmsg("malformed array literal: \"%s\"", str),
-                             errdetail("Unexpected end of input.")));
-                case '\\':
-
-                    /*
-                     * An escape must be after a level start, after an element
-                     * start, or after an element delimiter. In any case we
-                     * now must be past an element start.
-                     */
-                    if (parse_state != ARRAY_LEVEL_STARTED &&
-                        parse_state != ARRAY_ELEM_STARTED &&
-                        parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                        parse_state != ARRAY_ELEM_DELIMITED)
-                        ereturn(escontext, -1,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"", str),
-                                 errdetail("Unexpected \"%c\" character.",
-                                           '\\')));
-                    if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
-                        parse_state = ARRAY_ELEM_STARTED;
-                    /* skip the escaped character */
-                    if (*(ptr + 1))
-                        ptr++;
-                    else
-                        ereturn(escontext, -1,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"", str),
-                                 errdetail("Unexpected end of input.")));
-                    break;
-                case '"':
-
-                    /*
-                     * A quote must be after a level start, after a quoted
-                     * element start, or after an element delimiter. In any
-                     * case we now must be past an element start.
-                     */
-                    if (parse_state != ARRAY_LEVEL_STARTED &&
-                        parse_state != ARRAY_QUOTED_ELEM_STARTED &&
-                        parse_state != ARRAY_ELEM_DELIMITED)
-                        ereturn(escontext, -1,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"", str),
-                                 errdetail("Unexpected array element.")));
-                    in_quotes = !in_quotes;
-                    if (in_quotes)
-                        parse_state = ARRAY_QUOTED_ELEM_STARTED;
-                    else
-                        parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
-                    break;
-                case '{':
-                    if (!in_quotes)
-                    {
-                        /*
-                         * A left brace can occur if no nesting has occurred
-                         * yet, after a level start, or after a level
-                         * delimiter.
-                         */
-                        if (parse_state != ARRAY_NO_LEVEL &&
-                            parse_state != ARRAY_LEVEL_STARTED &&
-                            parse_state != ARRAY_LEVEL_DELIMITED)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Unexpected \"%c\" character.",
-                                               '{')));
-                        parse_state = ARRAY_LEVEL_STARTED;
-                        if (nest_level >= MAXDIM)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                                     errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                                            nest_level + 1, MAXDIM)));
-                        temp[nest_level] = 0;
-                        nest_level++;
-                        if (ndim < nest_level)
-                            ndim = nest_level;
-                    }
-                    break;
-                case '}':
-                    if (!in_quotes)
-                    {
-                        /*
-                         * A right brace can occur after an element start, an
-                         * element completion, a quoted element completion, or
-                         * a level completion.
-                         */
-                        if (parse_state != ARRAY_ELEM_STARTED &&
-                            parse_state != ARRAY_ELEM_COMPLETED &&
-                            parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                            parse_state != ARRAY_LEVEL_COMPLETED &&
-                            !(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED))
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Unexpected \"%c\" character.",
-                                               '}')));
-                        parse_state = ARRAY_LEVEL_COMPLETED;
-                        if (nest_level == 0)
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Unmatched \"%c\" character.", '}')));
-                        nest_level--;
-
-                        if (nelems_last[nest_level] != 0 &&
-                            nelems[nest_level] != nelems_last[nest_level])
-                            ereturn(escontext, -1,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"", str),
-                                     errdetail("Multidimensional arrays must have "
-                                               "sub-arrays with matching "
-                                               "dimensions.")));
-                        nelems_last[nest_level] = nelems[nest_level];
-                        nelems[nest_level] = 1;
-                        if (nest_level == 0)
-                            eoArray = itemdone = true;
-                        else
-                        {
-                            /*
-                             * We don't set itemdone here; see comments in
-                             * ReadArrayStr
-                             */
-                            temp[nest_level - 1]++;
-                        }
-                    }
-                    break;
-                default:
-                    if (!in_quotes)
-                    {
-                        if (*ptr == typdelim)
-                        {
-                            /*
-                             * Delimiters can occur after an element start, an
-                             * element completion, a quoted element
-                             * completion, or a level completion.
-                             */
-                            if (parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_ELEM_COMPLETED &&
-                                parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
-                                parse_state != ARRAY_LEVEL_COMPLETED)
-                                ereturn(escontext, -1,
-                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                         errmsg("malformed array literal: \"%s\"", str),
-                                         errdetail("Unexpected \"%c\" character.",
-                                                   typdelim)));
-                            if (parse_state == ARRAY_LEVEL_COMPLETED)
-                                parse_state = ARRAY_LEVEL_DELIMITED;
-                            else
-                                parse_state = ARRAY_ELEM_DELIMITED;
-                            itemdone = true;
-                            nelems[nest_level - 1]++;
-                        }
-                        else if (!scanner_isspace(*ptr))
-                        {
-                            /*
-                             * Other non-space characters must be after a
-                             * level start, after an element start, or after
-                             * an element delimiter. In any case we now must
-                             * be past an element start.
-                             */
-                            if (parse_state != ARRAY_LEVEL_STARTED &&
-                                parse_state != ARRAY_ELEM_STARTED &&
-                                parse_state != ARRAY_ELEM_DELIMITED)
-                                ereturn(escontext, -1,
-                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                         errmsg("malformed array literal: \"%s\"", str),
-                                         errdetail("Unexpected array element.")));
-                            parse_state = ARRAY_ELEM_STARTED;
-                        }
-                    }
-                    break;
-            }
-            if (!itemdone)
-                ptr++;
+        if (*p == ':')
+        {
+            /* [m:n] format */
+            lBound[ndim] = i;
+            p++;
+            q = p;
+            if (!ReadDimensionInt(&p, &ub, origStr, escontext))
+                return false;
+            if (p == q)            /* no digits? */
+                ereturn(escontext, false,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", origStr),
+                         errdetail("Missing array dimension value.")));
         }
-        temp[ndim - 1]++;
-        ptr++;
+        else
+        {
+            /* [n] format */
+            lBound[ndim] = 1;
+            ub = i;
+        }
+        if (*p != ']')
+            ereturn(escontext, false,
+                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                     errmsg("malformed array literal: \"%s\"", origStr),
+                     errdetail("Missing \"%s\" after array dimensions.",
+                               "]")));
+        p++;
+
+        /*
+         * Note: we could accept ub = lb-1 to represent a zero-length
+         * dimension.  However, that would result in an empty array, for which
+         * we don't keep any dimension data, so that e.g. [1:0] and [101:100]
+         * would be equivalent.  Given the lack of field demand, there seems
+         * little point in allowing such cases.
+         */
+        if (ub < lBound[ndim])
+            ereturn(escontext, false,
+                    (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                     errmsg("upper bound cannot be less than lower bound")));
+
+        /* Upper bound of INT_MAX must be disallowed, cf ArrayCheckBounds() */
+        if (ub == INT_MAX)
+            ereturn(escontext, false,
+                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                     errmsg("array upper bound is too large: %d", ub)));
+
+        /* Compute "ub - lBound[ndim] + 1", detecting overflow */
+        if (pg_sub_s32_overflow(ub, lBound[ndim], &ub) ||
+            pg_add_s32_overflow(ub, 1, &ub))
+            ereturn(escontext, false,
+                    (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                     errmsg("array size exceeds the maximum allowed (%d)",
+                            (int) MaxArraySize)));
+
+        dim[ndim] = ub;
+        ndim++;
     }

-    /* only whitespace is allowed after the closing brace */
-    while (*ptr)
+    *srcptr = p;
+    *ndim_p = ndim;
+    return true;
+}
+
+/*
+ * ReadDimensionInt
+ *     parse an integer, for the array dimensions
+ *
+ * On entry, *srcptr points to the string to parse. It is advanced past the
+ * digits of the integer. If there are no digits, returns true and leaves
+ * *srcptr unchanged.
+ *
+ * Result:
+ *    true for success, false for failure (if escontext is provided).
+ *  On success, the parsed integer is returned in *result.
+ */
+static bool
+ReadDimensionInt(char **srcptr, int *result,
+                 const char *origStr, Node *escontext)
+{
+    char       *p = *srcptr;
+    long        l;
+
+    /* don't accept leading whitespace */
+    if (!isdigit((unsigned char) *p) && *p != '-' && *p != '+')
     {
-        if (!scanner_isspace(*ptr++))
-            ereturn(escontext, -1,
-                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                     errmsg("malformed array literal: \"%s\"", str),
-                     errdetail("Junk after closing right brace.")));
+        *result = 0;
+        return true;
     }

-    /* special case for an empty array */
-    if (empty_array)
-        return 0;
+    errno = 0;
+    l = strtol(p, srcptr, 10);

-    for (i = 0; i < ndim; ++i)
-        dim[i] = temp[i];
+    if (errno == ERANGE || l > PG_INT32_MAX || l < PG_INT32_MIN)
+        ereturn(escontext, false,
+                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                 errmsg("array bound is out of integer range")));

-    return ndim;
+    *result = (int) l;
+    return true;
 }

 /*
  * ReadArrayStr :
- *     parses the array string pointed to by "arrayStr" and converts the values
- *     to internal format.  Unspecified elements are initialized to nulls.
- *     The array dimensions must already have been determined.
+ *     parses the array string pointed to by *srcptr and converts the values
+ *     to internal format.  Determines the array dimensions as it goes.
  *
- * Inputs:
- *    arrayStr: the string to parse.
- *              CAUTION: the contents of "arrayStr" will be modified!
- *    origStr: the unmodified input string, used only in error messages.
- *    nitems: total number of array elements, as already determined.
- *    ndim: number of array dimensions
- *    dim[]: array axis lengths
+ * On entry, *srcptr points to the string to parse (it must point to a '{').
+ * On successful return, it is advanced to point past the closing '}'.
+ *
+ * If dimensions were specified explicitly, they are passed in *ndim_p and
+ * dim. This function will check that the array values match the specified
+ * dimensions. If *ndim_p == 0, this function deduces the dimensions from
+ * the structure of the input, and stores them in *ndim_p and the dim array.
+ *
+ * Element type information:
  *    inputproc: type-specific input procedure for element datatype.
  *    typioparam, typmod: auxiliary values to pass to inputproc.
  *    typdelim: the value delimiter (type-specific).
  *    typlen, typbyval, typalign: storage parameters of element datatype.
  *
  * Outputs:
- *    values[]: filled with converted data values.
- *    nulls[]: filled with is-null markers.
- *    *hasnulls: set true iff there are any null elements.
- *    *nbytes: set to total size of data area needed (including alignment
- *        padding but not including array header overhead).
- *    *escontext: if this points to an ErrorSaveContext, details of
- *        any error are reported there.
+ *  *ndim_p, dim: dimensions deduced from the input structure.
+ *  *nitems_p: total number of elements.
+ *    *values_p[]: palloc'd array, filled with converted data values.
+ *    *nulls_p[]: palloc'd array, filled with is-null markers.
+ *
+ * 'origStr' is the original input string, used only in error messages.
+ * If *escontext points to an ErrorSaveContext, details of any error are
+ * reported there.
  *
  * Result:
  *    true for success, false for failure (if escontext is provided).
- *
- * Note that values[] and nulls[] are allocated by the caller, and must have
- * nitems elements.
  */
 static bool
-ReadArrayStr(char *arrayStr,
-             const char *origStr,
-             int nitems,
-             int ndim,
-             int *dim,
+ReadArrayStr(char **srcptr,
              FmgrInfo *inputproc,
              Oid typioparam,
              int32 typmod,
@@ -727,224 +579,363 @@ ReadArrayStr(char *arrayStr,
              int typlen,
              bool typbyval,
              char typalign,
-             Datum *values,
-             bool *nulls,
-             bool *hasnulls,
-             int32 *nbytes,
+             int *ndim_p,
+             int *dim,
+             int *nitems_p,
+             Datum **values_p,
+             bool **nulls_p,
+             const char *origStr,
              Node *escontext)
 {
-    int            i,
-                nest_level = 0;
-    char       *srcptr;
-    bool        in_quotes = false;
-    bool        eoArray = false;
-    bool        hasnull;
-    int32        totbytes;
-    int            indx[MAXDIM] = {0},
-                prod[MAXDIM];
+    int            ndim = *ndim_p;
+    bool        dimensions_specified = (ndim != 0);
+    int            maxitems;
+    Datum       *values;
+    bool       *nulls;
+    StringInfoData elembuf;
+    int            nest_level;
+    int            nitems;
+    bool        ndim_frozen;
+    bool        expect_delim;
+    int            nelems[MAXDIM];
+
+    /* Allocate some starting output workspace; we'll enlarge as needed */
+    maxitems = 16;
+    values = palloc_array(Datum, maxitems);
+    nulls = palloc_array(bool, maxitems);
+
+    /* Allocate workspace to hold (string representation of) one element */
+    initStringInfo(&elembuf);
+
+    /* Loop below assumes first token is ATOK_LEVEL_START */
+    Assert(**srcptr == '{');
+
+    /* Parse tokens until we reach the matching right brace */
+    nest_level = 0;
+    nitems = 0;
+    ndim_frozen = dimensions_specified;
+    expect_delim = false;
+    do
+    {
+        ArrayToken    tok;

-    mda_get_prod(ndim, dim, prod);
+        tok = ReadArrayToken(srcptr, &elembuf, typdelim, origStr, escontext);

-    /* Initialize is-null markers to true */
-    memset(nulls, true, nitems * sizeof(bool));
+        switch (tok)
+        {
+            case ATOK_LEVEL_START:
+                /* Can't write left brace where delim is expected */
+                if (expect_delim)
+                    ereturn(escontext, false,
+                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", origStr),
+                             errdetail("Unexpected \"%c\" character.", '{')));

-    /*
-     * We have to remove " and \ characters to create a clean item value to
-     * pass to the datatype input routine.  We overwrite each item value
-     * in-place within arrayStr to do this.  srcptr is the current scan point,
-     * and dstptr is where we are copying to.
-     *
-     * We also want to suppress leading and trailing unquoted whitespace. We
-     * use the leadingspace flag to suppress leading space.  Trailing space is
-     * tracked by using dstendptr to point to the last significant output
-     * character.
-     *
-     * The error checking in this routine is mostly pro-forma, since we expect
-     * that ArrayCount() already validated the string.  So we don't bother
-     * with errdetail messages.
-     */
-    srcptr = arrayStr;
-    while (!eoArray)
-    {
-        bool        itemdone = false;
-        bool        leadingspace = true;
-        bool        hasquoting = false;
-        char       *itemstart;
-        char       *dstptr;
-        char       *dstendptr;
+                /* Initialize element counting in the new level */
+                if (nest_level >= MAXDIM)
+                    ereturn(escontext, false,
+                            (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                             errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+                                    nest_level + 1, MAXDIM)));

-        i = -1;
-        itemstart = dstptr = dstendptr = srcptr;
+                nelems[nest_level] = 0;
+                nest_level++;
+                if (nest_level > ndim)
+                {
+                    /* Can't increase ndim once it's frozen */
+                    if (ndim_frozen)
+                        goto dimension_error;
+                    ndim = nest_level;
+                }
+                break;

-        while (!itemdone)
-        {
-            switch (*srcptr)
-            {
-                case '\0':
-                    /* Signal a premature end of the string */
+            case ATOK_LEVEL_END:
+                /* Can't get here with nest_level == 0 */
+                Assert(nest_level > 0);
+
+                /*
+                 * We allow a right brace to terminate an empty sub-array,
+                 * otherwise it must occur where we expect a delimiter.
+                 */
+                if (nelems[nest_level - 1] > 0 && !expect_delim)
                     ereturn(escontext, false,
                             (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                             errmsg("malformed array literal: \"%s\"",
-                                    origStr)));
-                    break;
-                case '\\':
-                    /* Skip backslash, copy next character as-is. */
-                    srcptr++;
-                    if (*srcptr == '\0')
+                             errmsg("malformed array literal: \"%s\"", origStr),
+                             errdetail("Unexpected \"%c\" character.",
+                                       '}')));
+                nest_level--;
+                /* Nested sub-arrays count as elements of outer level */
+                if (nest_level > 0)
+                    nelems[nest_level - 1]++;
+
+                /*
+                 * Note: if we had dimensionality info, then dim[nest_level]
+                 * is initially non-negative, and we'll check each sub-array's
+                 * length against that.
+                 */
+                if (dim[nest_level] < 0)
+                {
+                    /* Save length of first sub-array of this level */
+                    dim[nest_level] = nelems[nest_level];
+                }
+                else if (nelems[nest_level] != dim[nest_level])
+                {
+                    /* Subsequent sub-arrays must have same length */
+                    goto dimension_error;
+                }
+
+                /*
+                 * Must have a delim or another right brace following, unless
+                 * we have reached nest_level 0, where this won't matter.
+                 */
+                expect_delim = true;
+                break;
+
+            case ATOK_DELIM:
+                if (!expect_delim)
+                    ereturn(escontext, false,
+                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", origStr),
+                             errdetail("Unexpected \"%c\" character.",
+                                       typdelim)));
+                expect_delim = false;
+                break;
+
+            case ATOK_ELEM:
+            case ATOK_ELEM_NULL:
+                /* Can't get here with nest_level == 0 */
+                Assert(nest_level > 0);
+
+                /* Disallow consecutive ELEM tokens */
+                if (expect_delim)
+                    ereturn(escontext, false,
+                            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                             errmsg("malformed array literal: \"%s\"", origStr),
+                             errdetail("Unexpected array element.")));
+
+                /* Enlarge the values/nulls arrays if needed */
+                if (nitems >= maxitems)
+                {
+                    if (maxitems >= MaxArraySize)
                         ereturn(escontext, false,
-                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                 errmsg("malformed array literal: \"%s\"",
-                                        origStr)));
-                    *dstptr++ = *srcptr++;
-                    /* Treat the escaped character as non-whitespace */
-                    leadingspace = false;
-                    dstendptr = dstptr;
-                    hasquoting = true;    /* can't be a NULL marker */
-                    break;
-                case '"':
-                    in_quotes = !in_quotes;
-                    if (in_quotes)
-                        leadingspace = false;
-                    else
-                    {
-                        /*
-                         * Advance dstendptr when we exit in_quotes; this
-                         * saves having to do it in all the other in_quotes
-                         * cases.
-                         */
-                        dstendptr = dstptr;
-                    }
-                    hasquoting = true;    /* can't be a NULL marker */
-                    srcptr++;
-                    break;
-                case '{':
-                    if (!in_quotes)
-                    {
-                        if (nest_level >= ndim)
-                            ereturn(escontext, false,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"",
-                                            origStr)));
-                        nest_level++;
-                        indx[nest_level - 1] = 0;
-                        srcptr++;
-                    }
-                    else
-                        *dstptr++ = *srcptr++;
-                    break;
-                case '}':
-                    if (!in_quotes)
-                    {
-                        if (nest_level == 0)
-                            ereturn(escontext, false,
-                                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                     errmsg("malformed array literal: \"%s\"",
-                                            origStr)));
-                        if (i == -1)
-                            i = ArrayGetOffset0(ndim, indx, prod);
-                        indx[nest_level - 1] = 0;
-                        nest_level--;
-                        if (nest_level == 0)
-                            eoArray = itemdone = true;
-                        else
-                            indx[nest_level - 1]++;
-                        srcptr++;
-                    }
-                    else
-                        *dstptr++ = *srcptr++;
-                    break;
-                default:
-                    if (in_quotes)
-                        *dstptr++ = *srcptr++;
-                    else if (*srcptr == typdelim)
-                    {
-                        if (i == -1)
-                            i = ArrayGetOffset0(ndim, indx, prod);
-                        itemdone = true;
-                        indx[ndim - 1]++;
-                        srcptr++;
-                    }
-                    else if (scanner_isspace(*srcptr))
-                    {
-                        /*
-                         * If leading space, drop it immediately.  Else, copy
-                         * but don't advance dstendptr.
-                         */
-                        if (leadingspace)
-                            srcptr++;
-                        else
-                            *dstptr++ = *srcptr++;
-                    }
-                    else
-                    {
-                        *dstptr++ = *srcptr++;
-                        leadingspace = false;
-                        dstendptr = dstptr;
-                    }
-                    break;
-            }
+                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                 errmsg("array size exceeds the maximum allowed (%d)",
+                                        (int) MaxArraySize)));
+                    maxitems = Min(maxitems * 2, MaxArraySize);
+                    values = repalloc_array(values, Datum, maxitems);
+                    nulls = repalloc_array(nulls, bool, maxitems);
+                }
+
+                /* Read the element's value, or check that NULL is allowed */
+                if (!InputFunctionCallSafe(inputproc,
+                                           (tok == ATOK_ELEM_NULL) ? NULL : elembuf.data,
+                                           typioparam, typmod,
+                                           escontext,
+                                           &values[nitems]))
+                    return false;
+                nulls[nitems] = (tok == ATOK_ELEM_NULL);
+                nitems++;
+
+                /*
+                 * Once we have found an element, the number of dimensions can
+                 * no longer increase, and subsequent elements must all be at
+                 * the same nesting depth.
+                 */
+                ndim_frozen = true;
+                if (nest_level != ndim)
+                    goto dimension_error;
+                /* Count the new element */
+                nelems[nest_level - 1]++;
+
+                /* Must have a delim or a right brace following */
+                expect_delim = true;
+                break;
+
+            case ATOK_ERROR:
+                return false;
         }
+    } while (nest_level > 0);

-        Assert(dstptr < srcptr);
-        *dstendptr = '\0';
+    /* Clean up and return results */
+    pfree(elembuf.data);

-        if (i < 0 || i >= nitems)
-            ereturn(escontext, false,
-                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                     errmsg("malformed array literal: \"%s\"",
-                            origStr)));
+    *ndim_p = ndim;
+    *nitems_p = nitems;
+    *values_p = values;
+    *nulls_p = nulls;
+    return true;
+
+dimension_error:
+    if (dimensions_specified)
+        ereturn(escontext, false,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("malformed array literal: \"%s\"", origStr),
+                 errdetail("Specified array dimensions do not match array contents.")));
+    else
+        ereturn(escontext, false,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("malformed array literal: \"%s\"", origStr),
+                 errdetail("Multidimensional arrays must have sub-arrays with matching dimensions.")));
+}

-        if (Array_nulls && !hasquoting &&
-            pg_strcasecmp(itemstart, "NULL") == 0)
+/*
+ * ReadArrayToken
+ *     read one token from an array value string
+ *
+ * Starts scanning from *srcptr.  On non-error return, *srcptr is
+ * advanced past the token.
+ *
+ * If the token is ATOK_ELEM, the de-escaped string is returned in elembuf.
+ */
+static ArrayToken
+ReadArrayToken(char **srcptr, StringInfo elembuf, char typdelim,
+               const char *origStr, Node *escontext)
+{
+    char       *p = *srcptr;
+    int            dstlen;
+    bool        has_escapes;
+
+    resetStringInfo(elembuf);
+
+    /* Identify token type.  Loop advances over leading whitespace. */
+    for (;;)
+    {
+        switch (*p)
         {
-            /* it's a NULL item */
-            if (!InputFunctionCallSafe(inputproc, NULL,
-                                       typioparam, typmod,
-                                       escontext,
-                                       &values[i]))
-                return false;
-            nulls[i] = true;
+            case '\0':
+                goto ending_error;
+            case '{':
+                *srcptr = p + 1;
+                return ATOK_LEVEL_START;
+            case '}':
+                *srcptr = p + 1;
+                return ATOK_LEVEL_END;
+            case '"':
+                p++;
+                goto quoted_element;
+            default:
+                if (*p == typdelim)
+                {
+                    *srcptr = p + 1;
+                    return ATOK_DELIM;
+                }
+                if (scanner_isspace(*p))
+                {
+                    p++;
+                    continue;
+                }
+                goto unquoted_element;
         }
-        else
+    }
+
+quoted_element:
+    for (;;)
+    {
+        switch (*p)
         {
-            if (!InputFunctionCallSafe(inputproc, itemstart,
-                                       typioparam, typmod,
-                                       escontext,
-                                       &values[i]))
-                return false;
-            nulls[i] = false;
+            case '\0':
+                goto ending_error;
+            case '\\':
+                /* Skip backslash, copy next character as-is. */
+                p++;
+                if (*p == '\0')
+                    goto ending_error;
+                appendStringInfoChar(elembuf, *p++);
+                break;
+            case '"':
+
+                /*
+                 * If next non-whitespace isn't typdelim or a brace, complain
+                 * about incorrect quoting.  While we could leave such cases
+                 * to be detected as incorrect token sequences, the resulting
+                 * message wouldn't be as helpful.  (We could also give the
+                 * incorrect-quoting error when next is '{', but treating that
+                 * as a token sequence error seems better.)
+                 */
+                while (*(++p) != '\0')
+                {
+                    if (*p == typdelim || *p == '}' || *p == '{')
+                    {
+                        *srcptr = p;
+                        return ATOK_ELEM;
+                    }
+                    if (!scanner_isspace(*p))
+                        ereturn(escontext, ATOK_ERROR,
+                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                 errmsg("malformed array literal: \"%s\"", origStr),
+                                 errdetail("Incorrectly quoted array element.")));
+                }
+                goto ending_error;
+            default:
+                appendStringInfoChar(elembuf, *p++);
+                break;
         }
     }

+unquoted_element:
+
     /*
-     * Check for nulls, compute total data space needed
+     * We don't include trailing whitespace in the result.  dstlen tracks how
+     * much of the output string is known to not be trailing whitespace.
      */
-    hasnull = false;
-    totbytes = 0;
-    for (i = 0; i < nitems; i++)
+    dstlen = 0;
+    has_escapes = false;
+    for (;;)
     {
-        if (nulls[i])
-            hasnull = true;
-        else
+        switch (*p)
         {
-            /* let's just make sure data is not toasted */
-            if (typlen == -1)
-                values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
-            totbytes = att_addlength_datum(totbytes, typlen, values[i]);
-            totbytes = att_align_nominal(totbytes, typalign);
-            /* check for overflow of total request */
-            if (!AllocSizeIsValid(totbytes))
-                ereturn(escontext, false,
-                        (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                         errmsg("array size exceeds the maximum allowed (%d)",
-                                (int) MaxAllocSize)));
+            case '\0':
+                goto ending_error;
+            case '{':
+                ereturn(escontext, ATOK_ERROR,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", origStr),
+                         errdetail("Unexpected \"%c\" character.",
+                                   '{')));
+            case '"':
+                /* Must double-quote all or none of an element. */
+                ereturn(escontext, ATOK_ERROR,
+                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                         errmsg("malformed array literal: \"%s\"", origStr),
+                         errdetail("Incorrectly quoted array element.")));
+            case '\\':
+                /* Skip backslash, copy next character as-is. */
+                p++;
+                if (*p == '\0')
+                    goto ending_error;
+                appendStringInfoChar(elembuf, *p++);
+                dstlen = elembuf->len;    /* treat it as non-whitespace */
+                has_escapes = true;
+                break;
+            default:
+                /* End of elem? */
+                if (*p == typdelim || *p == '}')
+                {
+                    /* hack: truncate the output string to dstlen */
+                    elembuf->data[dstlen] = '\0';
+                    elembuf->len = dstlen;
+                    *srcptr = p;
+                    /* Check if it's unquoted "NULL" */
+                    if (Array_nulls && !has_escapes &&
+                        pg_strcasecmp(elembuf->data, "NULL") == 0)
+                        return ATOK_ELEM_NULL;
+                    else
+                        return ATOK_ELEM;
+                }
+                appendStringInfoChar(elembuf, *p);
+                if (!scanner_isspace(*p))
+                    dstlen = elembuf->len;
+                p++;
+                break;
         }
     }
-    *hasnulls = hasnull;
-    *nbytes = totbytes;
-    return true;
-}

+ending_error:
+    ereturn(escontext, ATOK_ERROR,
+            (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+             errmsg("malformed array literal: \"%s\"", origStr),
+             errdetail("Unexpected end of input.")));
+}

 /*
  * Copy data into an array object from a temporary array of Datums.
diff --git a/src/backend/utils/adt/arrayutils.c b/src/backend/utils/adt/arrayutils.c
index aed799234c..6243971543 100644
--- a/src/backend/utils/adt/arrayutils.c
+++ b/src/backend/utils/adt/arrayutils.c
@@ -43,21 +43,6 @@ ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx)
     return offset;
 }

-/*
- * Same, but subscripts are assumed 0-based, and use a scale array
- * instead of raw dimension data (see mda_get_prod to create scale array)
- */
-int
-ArrayGetOffset0(int n, const int *tup, const int *scale)
-{
-    int            i,
-                lin = 0;
-
-    for (i = 0; i < n; i++)
-        lin += tup[i] * scale[i];
-    return lin;
-}
-
 /*
  * Convert array dimensions into number of elements
  *
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index e6c8d88d9f..7308007185 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -455,7 +455,6 @@ extern void array_free_iterator(ArrayIterator iterator);
  */

 extern int    ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx);
-extern int    ArrayGetOffset0(int n, const int *tup, const int *scale);
 extern int    ArrayGetNItems(int ndim, const int *dims);
 extern int    ArrayGetNItemsSafe(int ndim, const int *dims,
                                struct Node *escontext);
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 957498432d..23404982f7 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1492,21 +1492,36 @@ select '{{1,{2}},{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,{2}},{2,3}}"
 LINE 1: select '{{1,{2}},{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "{" character.
-select '{{},{}}'::text[];
-ERROR:  malformed array literal: "{{},{}}"
-LINE 1: select '{{},{}}'::text[];
-               ^
-DETAIL:  Unexpected "}" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
 select E'{{1,2},\\{2,3}}'::text[];
 ERROR:  malformed array literal: "{{1,2},\{2,3}}"
 LINE 1: select E'{{1,2},\\{2,3}}'::text[];
                ^
-DETAIL:  Unexpected "\" character.
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{"a"b}'::text[];
+ERROR:  malformed array literal: "{"a"b}"
+LINE 1: select '{"a"b}'::text[];
+               ^
+DETAIL:  Incorrectly quoted array element.
+select '{a"b"}'::text[];
+ERROR:  malformed array literal: "{a"b"}"
+LINE 1: select '{a"b"}'::text[];
+               ^
+DETAIL:  Incorrectly quoted array element.
+select '{"a""b"}'::text[];
+ERROR:  malformed array literal: "{"a""b"}"
+LINE 1: select '{"a""b"}'::text[];
+               ^
+DETAIL:  Incorrectly quoted array element.
 select '{{"1 2" x},{3}}'::text[];
 ERROR:  malformed array literal: "{{"1 2" x},{3}}"
 LINE 1: select '{{"1 2" x},{3}}'::text[];
                ^
+DETAIL:  Incorrectly quoted array element.
+select '{{"1 2"} x,{3}}'::text[];
+ERROR:  malformed array literal: "{{"1 2"} x,{3}}"
+LINE 1: select '{{"1 2"} x,{3}}'::text[];
+               ^
 DETAIL:  Unexpected array element.
 select '{}}'::text[];
 ERROR:  malformed array literal: "{}}"
@@ -1518,11 +1533,116 @@ ERROR:  malformed array literal: "{ }}"
 LINE 1: select '{ }}'::text[];
                ^
 DETAIL:  Junk after closing right brace.
+select '}{'::text[];
+ERROR:  malformed array literal: "}{"
+LINE 1: select '}{'::text[];
+               ^
+DETAIL:  Array value must start with "{" or dimension information.
+select '{foo{}}'::text[];
+ERROR:  malformed array literal: "{foo{}}"
+LINE 1: select '{foo{}}'::text[];
+               ^
+DETAIL:  Unexpected "{" character.
+select '{"foo"{}}'::text[];
+ERROR:  malformed array literal: "{"foo"{}}"
+LINE 1: select '{"foo"{}}'::text[];
+               ^
+DETAIL:  Unexpected "{" character.
+select '{foo,,bar}'::text[];
+ERROR:  malformed array literal: "{foo,,bar}"
+LINE 1: select '{foo,,bar}'::text[];
+               ^
+DETAIL:  Unexpected "," character.
+select '{{1},{{2}}}'::text[];
+ERROR:  malformed array literal: "{{1},{{2}}}"
+LINE 1: select '{{1},{{2}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{1}},{2}}'::text[];
+ERROR:  malformed array literal: "{{{1}},{2}}"
+LINE 1: select '{{{1}},{2}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{{}}}'::text[];
+ERROR:  malformed array literal: "{{},{{}}}"
+LINE 1: select '{{},{{}}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{{}},{}}'::text[];
+ERROR:  malformed array literal: "{{{}},{}}"
+LINE 1: select '{{{}},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{1},{}}'::text[];
+ERROR:  malformed array literal: "{{1},{}}"
+LINE 1: select '{{1},{}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '{{},{1}}'::text[];
+ERROR:  malformed array literal: "{{},{1}}"
+LINE 1: select '{{},{1}}'::text[];
+               ^
+DETAIL:  Multidimensional arrays must have sub-arrays with matching dimensions.
+select '[1:0]={}'::int[];
+ERROR:  upper bound cannot be less than lower bound
+LINE 1: select '[1:0]={}'::int[];
+               ^
+select '[2147483646:2147483647]={1,2}'::int[];
+ERROR:  array upper bound is too large: 2147483647
+LINE 1: select '[2147483646:2147483647]={1,2}'::int[];
+               ^
+select '[1:-1]={}'::int[];
+ERROR:  upper bound cannot be less than lower bound
+LINE 1: select '[1:-1]={}'::int[];
+               ^
+select '[2]={1}'::int[];
+ERROR:  malformed array literal: "[2]={1}"
+LINE 1: select '[2]={1}'::int[];
+               ^
+DETAIL:  Specified array dimensions do not match array contents.
+select '[1:]={1}'::int[];
+ERROR:  malformed array literal: "[1:]={1}"
+LINE 1: select '[1:]={1}'::int[];
+               ^
+DETAIL:  Missing array dimension value.
+select '[:1]={1}'::int[];
+ERROR:  malformed array literal: "[:1]={1}"
+LINE 1: select '[:1]={1}'::int[];
+               ^
+DETAIL:  "[" must introduce explicitly-specified array dimensions.
 select array[];
 ERROR:  cannot determine type of empty array
 LINE 1: select array[];
                ^
 HINT:  Explicitly cast to the desired type, for example ARRAY[]::integer[].
+select '{{1,},{1},}'::text[];
+ERROR:  malformed array literal: "{{1,},{1},}"
+LINE 1: select '{{1,},{1},}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{{1,},{1}}'::text[];
+ERROR:  malformed array literal: "{{1,},{1}}"
+LINE 1: select '{{1,},{1}}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{{1,}}'::text[];
+ERROR:  malformed array literal: "{{1,}}"
+LINE 1: select '{{1,}}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '{1,}'::text[];
+ERROR:  malformed array literal: "{1,}"
+LINE 1: select '{1,}'::text[];
+               ^
+DETAIL:  Unexpected "}" character.
+select '[21474836488:21474836489]={1,2}'::int[];
+ERROR:  array bound is out of integer range
+LINE 1: select '[21474836488:21474836489]={1,2}'::int[];
+               ^
+select '[-2147483649:-2147483648]={1,2}'::int[];
+ERROR:  array bound is out of integer range
+LINE 1: select '[-2147483649:-2147483648]={1,2}'::int[];
+               ^
 -- none of the above should be accepted
 -- all of the following should be accepted
 select '{}'::text[];
@@ -1531,12 +1651,30 @@ select '{}'::text[];
  {}
 (1 row)

+select '{{},{}}'::text[];
+ text
+------
+ {}
+(1 row)
+
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
                      text
 -----------------------------------------------
  {{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}
 (1 row)

+select '{null,n\ull,"null"}'::text[];
+         text
+----------------------
+ {NULL,"null","null"}
+(1 row)
+
+select '{ ab\c , "ab\"c" }'::text[];
+     text
+---------------
+ {abc,"ab\"c"}
+(1 row)
+
 select '{0 second  ,0 second}'::interval[];
    interval
 ---------------
@@ -1570,12 +1708,30 @@ select array[]::text[];
  {}
 (1 row)

+select '[2]={1,7}'::int[];
+ int4
+-------
+ {1,7}
+(1 row)
+
 select '[0:1]={1.1,2.2}'::float8[];
      float8
 -----------------
  [0:1]={1.1,2.2}
 (1 row)

+select '[2147483646:2147483646]={1}'::int[];
+            int4
+-----------------------------
+ [2147483646:2147483646]={1}
+(1 row)
+
+select '[-2147483648:-2147483647]={1,2}'::int[];
+              int4
+---------------------------------
+ [-2147483648:-2147483647]={1,2}
+(1 row)
+
 -- all of the above should be accepted
 -- tests for array aggregates
 CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]);
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index daf805c382..50aa539fdc 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -473,17 +473,45 @@ select 'foo' ilike all (array['F%', '%O']); -- t

 -- none of the following should be accepted
 select '{{1,{2}},{2,3}}'::text[];
-select '{{},{}}'::text[];
 select E'{{1,2},\\{2,3}}'::text[];
+select '{"a"b}'::text[];
+select '{a"b"}'::text[];
+select '{"a""b"}'::text[];
 select '{{"1 2" x},{3}}'::text[];
+select '{{"1 2"} x,{3}}'::text[];
 select '{}}'::text[];
 select '{ }}'::text[];
+select '}{'::text[];
+select '{foo{}}'::text[];
+select '{"foo"{}}'::text[];
+select '{foo,,bar}'::text[];
+select '{{1},{{2}}}'::text[];
+select '{{{1}},{2}}'::text[];
+select '{{},{{}}}'::text[];
+select '{{{}},{}}'::text[];
+select '{{1},{}}'::text[];
+select '{{},{1}}'::text[];
+select '[1:0]={}'::int[];
+select '[2147483646:2147483647]={1,2}'::int[];
+select '[1:-1]={}'::int[];
+select '[2]={1}'::int[];
+select '[1:]={1}'::int[];
+select '[:1]={1}'::int[];
 select array[];
+select '{{1,},{1},}'::text[];
+select '{{1,},{1}}'::text[];
+select '{{1,}}'::text[];
+select '{1,}'::text[];
+select '[21474836488:21474836489]={1,2}'::int[];
+select '[-2147483649:-2147483648]={1,2}'::int[];
 -- none of the above should be accepted

 -- all of the following should be accepted
 select '{}'::text[];
+select '{{},{}}'::text[];
 select '{{{1,2,3,4},{2,3,4,5}},{{3,4,5,6},{4,5,6,7}}}'::text[];
+select '{null,n\ull,"null"}'::text[];
+select '{ ab\c , "ab\"c" }'::text[];
 select '{0 second  ,0 second}'::interval[];
 select '{ { "," } , { 3 } }'::text[];
 select '  {   {  "  0 second  "   ,  0 second  }   }'::text[];
@@ -492,7 +520,10 @@ select '{
            @ 1 hour @ 42 minutes @ 20 seconds
          }'::interval[];
 select array[]::text[];
+select '[2]={1,7}'::int[];
 select '[0:1]={1.1,2.2}'::float8[];
+select '[2147483646:2147483646]={1}'::int[];
+select '[-2147483648:-2147483647]={1,2}'::int[];
 -- all of the above should be accepted

 -- tests for array aggregates
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index bf50a32119..92c0003ab1 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -148,8 +148,8 @@ ArrayIOData
 ArrayIterator
 ArrayMapState
 ArrayMetaState
-ArrayParseState
 ArraySubWorkspace
+ArrayToken
 ArrayType
 AsyncQueueControl
 AsyncQueueEntry

Re: Cleaning up array_in()

От

Heikki Linnakangas

Дата:

13 ноября 2023 г., 02:30:52

On 09/11/2023 18:57, Tom Lane wrote:
> After further thought I concluded that this area is worth spending
> a little more code for.  If we have input like '{foo"bar"}' or
> '{"foo"bar}' or '{"foo""bar"}', what it most likely means is that
> the user misunderstood the quoting rules.  A message like "Unexpected
> array element" is pretty completely unhelpful for figuring that out.
> The alternative I was considering, "Unexpected """ character", would
> not be much better.  What we want to say is something like "Incorrectly
> quoted array element", and the attached v12 makes ReadArrayToken do
> that for both quoted and unquoted cases.

+1

> I also fixed the obsolete documentation that Alexander noted, and
> cleaned up a couple other infelicities (notably, I'd blindly written
> ereport(ERROR) in one place where ereturn is now the way).
> 
> Barring objections, I think v12 is committable.

Looks good to me. Just two little things caught my eye:

1.

>     /* Initialize dim, lBound for ReadArrayDimensions, ReadArrayStr */
>     for (int i = 0; i < MAXDIM; i++)
>     {
>         dim[i] = -1;            /* indicates "not yet known" */
>         lBound[i] = 1;            /* default lower bound */
>     }

The function comments in ReadArrayDimensions and ReadArrayStr don't make 
it clear that these arrays need to be initialized like this. 
ReadArrayDimensions() says that they are output variables, and 
ReadArrayStr() doesn't mention anything about having to initialize them.


2. This was the same before this patch, but:

postgres=# select '{{{{{{{{{{1}}}}}}}}}}'::int[];
ERROR:  number of array dimensions (7) exceeds the maximum allowed (6)
LINE 1: select '{{{{{{{{{{1}}}}}}}}}}'::int[];
                ^

The error message isn't great, as the literal contains 10 dimensions, 
not 7 as the error message claims.

-- 
Heikki Linnakangas
Neon (https://neon.tech)

Re: Cleaning up array_in()

От

Tom Lane

Дата:

13 ноября 2023 г., 03:11:48

Heikki Linnakangas <hlinnaka@iki.fi> writes:
> On 09/11/2023 18:57, Tom Lane wrote:
>> Barring objections, I think v12 is committable.

> Looks good to me. Just two little things caught my eye:

> 1.
> The function comments in ReadArrayDimensions and ReadArrayStr don't make 
> it clear that these arrays need to be initialized like this. 
> ReadArrayDimensions() says that they are output variables, and 
> ReadArrayStr() doesn't mention anything about having to initialize them.

Roger, will fix that.

> 2. This was the same before this patch, but:

> postgres=# select '{{{{{{{{{{1}}}}}}}}}}'::int[];
> ERROR:  number of array dimensions (7) exceeds the maximum allowed (6)
> LINE 1: select '{{{{{{{{{{1}}}}}}}}}}'::int[];
>                 ^
> The error message isn't great, as the literal contains 10 dimensions, 
> not 7 as the error message claims.

Yeah.  To make that report accurate, we'd have to somehow postpone
issuing the error until we've seen all the left braces (or at least
all the initial ones).  There's a related problem in reading an
explicitly-dimensioned array:

postgres=# select '[1][2][3][4][5][6][7][8][9]={}'::text[];
ERROR:  number of array dimensions (7) exceeds the maximum allowed (6)

I kind of think it's not worth the trouble.  What was discussed
upthread was revising the message to not claim it knows how many
dimensions there are.  The related cases in plperl and plpython just
say "number of array dimensions exceeds the maximum allowed (6)",
and there's a case to be made for adjusting the core messages
similarly.  I figured that could be a separate patch though,
since it'd touch more than array_in (there's about a dozen
occurrences of the former wording).

            regards, tom lane

Re: Cleaning up array_in()

От

Tom Lane

Дата:

13 ноября 2023 г., 21:23:30

I wrote:
> Heikki Linnakangas <hlinnaka@iki.fi> writes:
>> 2. This was the same before this patch, but:

>> postgres=# select '{{{{{{{{{{1}}}}}}}}}}'::int[];
>> ERROR:  number of array dimensions (7) exceeds the maximum allowed (6)
>> LINE 1: select '{{{{{{{{{{1}}}}}}}}}}'::int[];
>> ^
>> The error message isn't great, as the literal contains 10 dimensions, 
>> not 7 as the error message claims.

> Yeah.  To make that report accurate, we'd have to somehow postpone
> issuing the error until we've seen all the left braces (or at least
> all the initial ones).  There's a related problem in reading an
> explicitly-dimensioned array:

> postgres=# select '[1][2][3][4][5][6][7][8][9]={}'::text[];
> ERROR:  number of array dimensions (7) exceeds the maximum allowed (6)

> I kind of think it's not worth the trouble.  What was discussed
> upthread was revising the message to not claim it knows how many
> dimensions there are.

I pushed the main patch.  Here's a proposed delta to deal with
the bogus-dimensionality-count issue.  There are a few more places
where I left things alone because the code does know what the
intended dimensionality will be; so there are still two versions
of the translatable error message.

            regards, tom lane

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index d71967de01..631012a0f2 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -429,8 +429,8 @@ ReadArrayDimensions(char **srcptr, int *ndim_p, int *dim, int *lBound,
         if (ndim >= MAXDIM)
             ereturn(escontext, false,
                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                     errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                            ndim + 1, MAXDIM)));
+                     errmsg("number of array dimensions exceeds the maximum allowed (%d)",
+                            MAXDIM)));

         q = p;
         if (!ReadDimensionInt(&p, &i, origStr, escontext))
@@ -641,8 +641,8 @@ ReadArrayStr(char **srcptr,
                 if (nest_level >= MAXDIM)
                     ereturn(escontext, false,
                             (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                             errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                                    nest_level + 1, MAXDIM)));
+                             errmsg("number of array dimensions exceeds the maximum allowed (%d)",
+                                    MAXDIM)));

                 nelems[nest_level] = 0;
                 nest_level++;
diff --git a/src/pl/plperl/expected/plperl_array.out b/src/pl/plperl/expected/plperl_array.out
index bd04a062fb..260a55ea7e 100644
--- a/src/pl/plperl/expected/plperl_array.out
+++ b/src/pl/plperl/expected/plperl_array.out
@@ -61,7 +61,7 @@ select plperl_sum_array('{{{{{{{1,2},{3,4}},{{5,6},{7,8}}},{{{9,10},{11,12}},
 {{{{{1,2},{3,4}},{{5,6},{7,8}}},{{{9,10},{11,12}},{{13,14},{15,16}}}},
 {{{{17,18},{19,20}},{{21,22},{23,24}}},{{{25,26},{27,28}},{{29,30},{31,32}}}}}}}'
 );
-ERROR:  number of array dimensions (7) exceeds the maximum allowed (6)
+ERROR:  number of array dimensions exceeds the maximum allowed (6)
 LINE 1: select plperl_sum_array('{{{{{{{1,2},{3,4}},{{5,6},{7,8}}},{...
                                 ^
 select plperl_sum_array('{{{1,2,3}, {4,5,6,7}}, {{7,8,9}, {10, 11, 12}}}');
diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c
index 863864253f..d68ad7be34 100644
--- a/src/pl/plperl/plperl.c
+++ b/src/pl/plperl/plperl.c
@@ -1201,8 +1201,8 @@ array_to_datum_internal(AV *av, ArrayBuildState **astatep,
                 if (cur_depth + 1 > MAXDIM)
                     ereport(ERROR,
                             (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-                             errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
-                                    cur_depth + 1, MAXDIM)));
+                             errmsg("number of array dimensions exceeds the maximum allowed (%d)",
+                                    MAXDIM)));
                 /* OK, add a dimension */
                 dims[*ndims] = av_len(nav) + 1;
                 (*ndims)++;

Вход в личный кабинет

Восстановление пароля

Подтверждение аккаунта

Изменение пароля

Обсуждение: Cleaning up array_in()

Вложения

Вложения

Вложения

Вложения

Вложения

Вложения

Вложения

Вложения

Вложения

Вложения