Re: BUG #17912: Invalid memory access when converting plpython' array containing empty array

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: BUG #17912: Invalid memory access when converting plpython' array containing empty array
Дата
Msg-id 1483429.1682709429@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: BUG #17912: Invalid memory access when converting plpython' array containing empty array  (Tom Lane <tgl@sss.pgh.pa.us>)
Ответы Re: BUG #17912: Invalid memory access when converting plpython' array containing empty array  (Alexander Lakhin <exclusion@gmail.com>)
Список pgsql-bugs
I wrote:
> Yeah.  AFAICT, the idea of the existing code is to descend through
> the first item at each nest level till we hit a non-list, then take
> the lengths of those lists as the array dimensions, and then complain
> if we find any later items that don't fit those dimensions.  That leads
> to some symmetry problems, in that the error you get for inconsistent
> sequence lengths depends on the order in which the items are presented.

The real problem here is that we don't check that the list nesting
depth is the same throughout the array: if lists are more deeply
nested in later elements, we'll treat those sub-lists as scalars,
leading to inconsistent results.  Conversely, a less-deeply-nested
list structure in a later element might still work, if it can be
treated as a sequence.  I think the second and third examples
I gave should both throw errors.

I also notice that the error messages in this area speak of "sequences",
but it is more correct to call them "lists", because Python draws a
distinction.  (All lists are sequences, but not vice versa, eg a
string is a sequence but not a list.)

So I'm thinking about the attached.  I do not propose this for
back-patching, because it could break applications that work today.
But it seems like good tightening-up for HEAD, or maybe we should
wait for v17 at this point?

            regards, tom lane

diff --git a/src/pl/plpython/expected/plpython_types.out b/src/pl/plpython/expected/plpython_types.out
index c4e749a5a8..f477e239bc 100644
--- a/src/pl/plpython/expected/plpython_types.out
+++ b/src/pl/plpython/expected/plpython_types.out
@@ -700,10 +700,26 @@ CREATE FUNCTION test_type_conversion_mdarray_malformed() RETURNS int[] AS $$
 return [[1,2,3],[4,5]]
 $$ LANGUAGE plpython3u;
 SELECT * FROM test_type_conversion_mdarray_malformed();
-ERROR:  wrong length of inner sequence: has length 2, but 3 was expected
-DETAIL:  To construct a multidimensional array, the inner sequences must all have the same length.
+ERROR:  wrong length of inner list: has length 2, but 3 was expected
+DETAIL:  To construct a multidimensional array, the inner lists must all have the same length.
 CONTEXT:  while creating return value
 PL/Python function "test_type_conversion_mdarray_malformed"
+CREATE FUNCTION test_type_conversion_mdarray_malformed2() RETURNS text[] AS $$
+return [[1,2,3], "abc"]
+$$ LANGUAGE plpython3u;
+SELECT * FROM test_type_conversion_mdarray_malformed2();
+ERROR:  array element is a scalar, but should be a list
+DETAIL:  To construct a multidimensional array, the inner lists must be nested to a uniform depth.
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_mdarray_malformed2"
+CREATE FUNCTION test_type_conversion_mdarray_malformed3() RETURNS text[] AS $$
+return ["abc", [1,2,3]]
+$$ LANGUAGE plpython3u;
+SELECT * FROM test_type_conversion_mdarray_malformed3();
+ERROR:  array element is a list, but should be a scalar
+DETAIL:  To construct a multidimensional array, the inner lists must be nested to a uniform depth.
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_mdarray_malformed3"
 CREATE FUNCTION test_type_conversion_mdarray_toodeep() RETURNS int[] AS $$
 return [[[[[[[1]]]]]]]
 $$ LANGUAGE plpython3u;
diff --git a/src/pl/plpython/plpy_typeio.c b/src/pl/plpython/plpy_typeio.c
index 864b5f1765..eddedec3d4 100644
--- a/src/pl/plpython/plpy_typeio.c
+++ b/src/pl/plpython/plpy_typeio.c
@@ -1126,7 +1126,7 @@ PLyObject_ToTransform(PLyObToDatum *arg, PyObject *plrv,


 /*
- * Convert Python sequence to SQL array.
+ * Convert Python sequence (or list of lists) to SQL array.
  */
 static Datum
 PLySequence_ToArray(PLyObToDatum *arg, PyObject *plrv,
@@ -1205,8 +1205,15 @@ PLySequence_ToArray(PLyObToDatum *arg, PyObject *plrv,
         dims[0] = PySequence_Length(plrv);
     }

-    /* Allocate space for work arrays, after detecting array size overflow */
     len = ArrayGetNItems(ndim, dims);
+    if (len == 0)
+    {
+        /* If no elements, ensure we return a zero-D array */
+        array = construct_empty_array(arg->u.array.elmbasetype);
+        return PointerGetDatum(array);
+    }
+
+    /* Allocate space for work arrays, after detecting array size overflow */
     elems = palloc(sizeof(Datum) * len);
     nulls = palloc(sizeof(bool) * len);

@@ -1246,35 +1253,55 @@ PLySequence_ToArray_recurse(PLyObToDatum *elm, PyObject *list,
 {
     int            i;

-    if (PySequence_Length(list) != dims[dim])
-        ereport(ERROR,
-                (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-                 errmsg("wrong length of inner sequence: has length %d, but %d was expected",
-                        (int) PySequence_Length(list), dims[dim]),
-                 (errdetail("To construct a multidimensional array, the inner sequences must all have the same
length."))));
-
-    if (dim < ndim - 1)
+    /*
+     * The list nesting depth in all array items must match what
+     * PLySequence_ToArray saw in the first item.  At the first dimension, we
+     * might have a non-list sequence, but recurse anyway.
+     */
+    if (dim == 0 || PyList_Check(list))
     {
-        for (i = 0; i < dims[dim]; i++)
-        {
-            PyObject   *sublist = PySequence_GetItem(list, i);
+        int            thisdim = PySequence_Length(list);

-            PLySequence_ToArray_recurse(elm, sublist, dims, ndim, dim + 1,
-                                        elems, nulls, currelem);
-            Py_XDECREF(sublist);
-        }
-    }
-    else
-    {
-        for (i = 0; i < dims[dim]; i++)
+        if (thisdim < 0)
+            PLy_elog(ERROR, "could not determine sequence length for function return value");
+
+        /* like PLySequence_ToArray, we treat 0-length list as a scalar */
+        if (thisdim > 0)
         {
-            PyObject   *obj = PySequence_GetItem(list, i);
+            /* Check for uniform array structure */
+            if (dim >= ndim)
+                ereport(ERROR,
+                        (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                         errmsg("array element is a list, but should be a scalar"),
+                         (errdetail("To construct a multidimensional array, the inner lists must be nested to a
uniformdepth.")))); 
+            if (thisdim != dims[dim])
+                ereport(ERROR,
+                        (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                         errmsg("wrong length of inner list: has length %d, but %d was expected",
+                                thisdim, dims[dim]),
+                         (errdetail("To construct a multidimensional array, the inner lists must all have the same
length."))));
+            /* OK, so recurse */
+            for (i = 0; i < thisdim; i++)
+            {
+                PyObject   *sublist = PySequence_GetItem(list, i);

-            elems[*currelem] = elm->func(elm, obj, &nulls[*currelem], true);
-            Py_XDECREF(obj);
-            (*currelem)++;
+                PLySequence_ToArray_recurse(elm, sublist, dims, ndim, dim + 1,
+                                            elems, nulls, currelem);
+                Py_XDECREF(sublist);
+            }
+            return;
         }
     }
+
+    /* The "list" is, or should be treated as, a scalar */
+    if (dim != ndim)
+        ereport(ERROR,
+                (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                 errmsg("array element is a scalar, but should be a list"),
+                 (errdetail("To construct a multidimensional array, the inner lists must be nested to a uniform
depth."))));
+
+    elems[*currelem] = elm->func(elm, list, &nulls[*currelem], true);
+    (*currelem)++;
 }


diff --git a/src/pl/plpython/sql/plpython_types.sql b/src/pl/plpython/sql/plpython_types.sql
index 9702a10a72..b57b9a6463 100644
--- a/src/pl/plpython/sql/plpython_types.sql
+++ b/src/pl/plpython/sql/plpython_types.sql
@@ -341,6 +341,18 @@ $$ LANGUAGE plpython3u;

 SELECT * FROM test_type_conversion_mdarray_malformed();

+CREATE FUNCTION test_type_conversion_mdarray_malformed2() RETURNS text[] AS $$
+return [[1,2,3], "abc"]
+$$ LANGUAGE plpython3u;
+
+SELECT * FROM test_type_conversion_mdarray_malformed2();
+
+CREATE FUNCTION test_type_conversion_mdarray_malformed3() RETURNS text[] AS $$
+return ["abc", [1,2,3]]
+$$ LANGUAGE plpython3u;
+
+SELECT * FROM test_type_conversion_mdarray_malformed3();
+
 CREATE FUNCTION test_type_conversion_mdarray_toodeep() RETURNS int[] AS $$
 return [[[[[[[1]]]]]]]
 $$ LANGUAGE plpython3u;

В списке pgsql-bugs по дате отправления:

Предыдущее
От: Bruce Momjian
Дата:
Сообщение: Re: BUG #16628: Hostame and string connection functions
Следующее
От: Bruce Momjian
Дата:
Сообщение: Re: BUG #17911: Database or JDBC Driver Provides Incorrect Type