From 510e1b8ecf2a6f0d91d50f41f6b7fd75242273a0 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Wed, 26 Oct 2016 10:38:56 +0300
Subject: [PATCH] Give a hint, when [] is incorrectly used for a composite type
 in array.

That used to be accepted, so let's try to give a hint to users on why
their PL/python functions no longer work.

Reviewed by Pavel Stehule.

Discussion: <CAH38_tmbqwaUyKs9yagyRra=SMaT45FPBxk1pmTYcM0TyXGG7Q@mail.gmail.com>
---
 .../plpython/expected/plpython_composite.out  |  13 ++
 src/pl/plpython/plpy_cursorobject.c           |   3 +-
 src/pl/plpython/plpy_exec.c                   |   7 +-
 src/pl/plpython/plpy_spi.c                    |   3 +-
 src/pl/plpython/plpy_typeio.c                 | 116 +++++++++++++-----
 src/pl/plpython/plpy_typeio.h                 |  17 ++-
 src/pl/plpython/sql/plpython_composite.sql    |   9 ++
 7 files changed, 129 insertions(+), 39 deletions(-)

diff --git a/src/pl/plpython/expected/plpython_composite.out b/src/pl/plpython/expected/plpython_composite.out
index 1ab3b31e684..c6964841fba 100644
--- a/src/pl/plpython/expected/plpython_composite.out
+++ b/src/pl/plpython/expected/plpython_composite.out
@@ -579,3 +579,16 @@ SELECT * FROM composite_type_as_list();
  {{"(first,1)","(second,1)"},{"(first,2)","(second,2)"},{"(first,3)","(second,3)"}}
 (1 row)
 
+-- Starting with PostgreSQL 10, a composite type in an array cannot be
+-- represented as a Python list, because it's ambiguous with multi-dimensional
+-- arrays. So this throws an error now. The error should contain a useful hint
+-- on the issue.
+CREATE FUNCTION composite_type_as_list_broken()  RETURNS type_record[] AS $$
+  return [['first', 1]];
+$$ LANGUAGE plpythonu;
+SELECT * FROM composite_type_as_list_broken();
+ERROR:  malformed record literal: "first"
+DETAIL:  Missing left parenthesis.
+HINT:  To return a composite type in an array, return the composite type as a Python tuple, e.g. "[('foo')]"
+CONTEXT:  while creating return value
+PL/Python function "composite_type_as_list_broken"
diff --git a/src/pl/plpython/plpy_cursorobject.c b/src/pl/plpython/plpy_cursorobject.c
index 0e17a03ce7b..e682bfe566a 100644
--- a/src/pl/plpython/plpy_cursorobject.c
+++ b/src/pl/plpython/plpy_cursorobject.c
@@ -240,7 +240,8 @@ PLy_cursor_plan(PyObject *ob, PyObject *args)
 					plan->values[j] =
 						plan->args[j].out.d.func(&(plan->args[j].out.d),
 												 -1,
-												 elem);
+												 elem,
+												 false);
 				}
 				PG_CATCH();
 				{
diff --git a/src/pl/plpython/plpy_exec.c b/src/pl/plpython/plpy_exec.c
index 25e4744c7d8..fa583fab164 100644
--- a/src/pl/plpython/plpy_exec.c
+++ b/src/pl/plpython/plpy_exec.c
@@ -245,7 +245,7 @@ PLy_exec_function(FunctionCallInfo fcinfo, PLyProcedure *proc)
 			desc = lookup_rowtype_tupdesc(proc->result.out.d.typoid,
 										  proc->result.out.d.typmod);
 
-			rv = PLyObject_ToCompositeDatum(&proc->result, desc, plrv);
+			rv = PLyObject_ToCompositeDatum(&proc->result, desc, plrv, false);
 			fcinfo->isnull = (rv == (Datum) NULL);
 
 			ReleaseTupleDesc(desc);
@@ -253,7 +253,7 @@ PLy_exec_function(FunctionCallInfo fcinfo, PLyProcedure *proc)
 		else
 		{
 			fcinfo->isnull = false;
-			rv = (proc->result.out.d.func) (&proc->result.out.d, -1, plrv);
+			rv = (proc->result.out.d.func) (&proc->result.out.d, -1, plrv, false);
 		}
 	}
 	PG_CATCH();
@@ -984,7 +984,8 @@ PLy_modify_tuple(PLyProcedure *proc, PyObject *pltd, TriggerData *tdata,
 
 				modvalues[i] = (att->func) (att,
 											tupdesc->attrs[atti]->atttypmod,
-											plval);
+											plval,
+											false);
 				modnulls[i] = ' ';
 			}
 			else
diff --git a/src/pl/plpython/plpy_spi.c b/src/pl/plpython/plpy_spi.c
index 09ee06d9e86..0d556a2ec28 100644
--- a/src/pl/plpython/plpy_spi.c
+++ b/src/pl/plpython/plpy_spi.c
@@ -264,7 +264,8 @@ PLy_spi_execute_plan(PyObject *ob, PyObject *list, long limit)
 					plan->values[j] =
 						plan->args[j].out.d.func(&(plan->args[j].out.d),
 												 -1,
-												 elem);
+												 elem,
+												 false);
 				}
 				PG_CATCH();
 				{
diff --git a/src/pl/plpython/plpy_typeio.c b/src/pl/plpython/plpy_typeio.c
index 85d50c2b0c3..d346e225919 100644
--- a/src/pl/plpython/plpy_typeio.c
+++ b/src/pl/plpython/plpy_typeio.c
@@ -14,6 +14,7 @@
 #include "parser/parse_type.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
+#include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/numeric.h"
@@ -49,21 +50,21 @@ static PyObject *PLyList_FromArray_recurse(PLyDatumToOb *elm, int *dims, int ndi
 						  char **dataptr_p, bits8 **bitmap_p, int *bitmask_p);
 
 /* conversion from Python objects to Datums */
-static Datum PLyObject_ToBool(PLyObToDatum *arg, int32 typmod, PyObject *plrv);
-static Datum PLyObject_ToBytea(PLyObToDatum *arg, int32 typmod, PyObject *plrv);
-static Datum PLyObject_ToComposite(PLyObToDatum *arg, int32 typmod, PyObject *plrv);
-static Datum PLyObject_ToDatum(PLyObToDatum *arg, int32 typmod, PyObject *plrv);
-static Datum PLyObject_ToTransform(PLyObToDatum *arg, int32 typmod, PyObject *plrv);
-static Datum PLySequence_ToArray(PLyObToDatum *arg, int32 typmod, PyObject *plrv);
+static Datum PLyObject_ToBool(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray);
+static Datum PLyObject_ToBytea(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray);
+static Datum PLyObject_ToComposite(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray);
+static Datum PLyObject_ToDatum(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray);
+static Datum PLyObject_ToTransform(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray);
+static Datum PLySequence_ToArray(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray);
 static void PLySequence_ToArray_recurse(PLyObToDatum *elm, PyObject *list,
 							int *dims, int ndim, int dim,
 							Datum *elems, bool *nulls, int *currelem);
 
 /* conversion from Python objects to composite Datums (used by triggers and SRFs) */
-static Datum PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string);
+static Datum PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string, bool inarray);
 static Datum PLyMapping_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *mapping);
 static Datum PLySequence_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *sequence);
-static Datum PLyGenericObject_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *object);
+static Datum PLyGenericObject_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *object, bool inarray);
 
 void
 PLy_typeinfo_init(PLyTypeInfo *arg, MemoryContext mcxt)
@@ -341,12 +342,12 @@ PLyDict_FromTuple(PLyTypeInfo *info, HeapTuple tuple, TupleDesc desc)
  *	as an object that has __getattr__ support.
  */
 Datum
-PLyObject_ToCompositeDatum(PLyTypeInfo *info, TupleDesc desc, PyObject *plrv)
+PLyObject_ToCompositeDatum(PLyTypeInfo *info, TupleDesc desc, PyObject *plrv, bool inarray)
 {
 	Datum		datum;
 
 	if (PyString_Check(plrv) || PyUnicode_Check(plrv))
-		datum = PLyString_ToComposite(info, desc, plrv);
+		datum = PLyString_ToComposite(info, desc, plrv, inarray);
 	else if (PySequence_Check(plrv))
 		/* composite type as sequence (tuple, list etc) */
 		datum = PLySequence_ToComposite(info, desc, plrv);
@@ -355,7 +356,7 @@ PLyObject_ToCompositeDatum(PLyTypeInfo *info, TupleDesc desc, PyObject *plrv)
 		datum = PLyMapping_ToComposite(info, desc, plrv);
 	else
 		/* returned as smth, must provide method __getattr__(name) */
-		datum = PLyGenericObject_ToComposite(info, desc, plrv);
+		datum = PLyGenericObject_ToComposite(info, desc, plrv, inarray);
 
 	return datum;
 }
@@ -746,7 +747,7 @@ PLyList_FromArray_recurse(PLyDatumToOb *elm, int *dims, int ndim, int dim,
  * type can parse.
  */
 static Datum
-PLyObject_ToBool(PLyObToDatum *arg, int32 typmod, PyObject *plrv)
+PLyObject_ToBool(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray)
 {
 	Datum		rv;
 
@@ -765,7 +766,7 @@ PLyObject_ToBool(PLyObToDatum *arg, int32 typmod, PyObject *plrv)
  * with embedded nulls.  And it's faster this way.
  */
 static Datum
-PLyObject_ToBytea(PLyObToDatum *arg, int32 typmod, PyObject *plrv)
+PLyObject_ToBytea(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray)
 {
 	PyObject   *volatile plrv_so = NULL;
 	Datum		rv;
@@ -809,7 +810,7 @@ PLyObject_ToBytea(PLyObToDatum *arg, int32 typmod, PyObject *plrv)
  * for obtaining PostgreSQL tuples.
  */
 static Datum
-PLyObject_ToComposite(PLyObToDatum *arg, int32 typmod, PyObject *plrv)
+PLyObject_ToComposite(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray)
 {
 	Datum		rv;
 	PLyTypeInfo info;
@@ -836,7 +837,7 @@ PLyObject_ToComposite(PLyObToDatum *arg, int32 typmod, PyObject *plrv)
 	 * that info instead of looking it up every time a tuple is returned from
 	 * the function.
 	 */
-	rv = PLyObject_ToCompositeDatum(&info, desc, plrv);
+	rv = PLyObject_ToCompositeDatum(&info, desc, plrv, inarray);
 
 	ReleaseTupleDesc(desc);
 
@@ -908,26 +909,70 @@ PLyObject_AsString(PyObject *plrv)
  * cstring into PostgreSQL type.
  */
 static Datum
-PLyObject_ToDatum(PLyObToDatum *arg, int32 typmod, PyObject *plrv)
+PLyObject_ToDatum(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray)
 {
+	char	   *str;
+
 	Assert(plrv != Py_None);
 
+	str = PLyObject_AsString(plrv);
+
+	/*
+	 * If we are parsing a composite type within an array, and the string
+	 * isn't a valid record literal, there's a high chance that the function
+	 * did something like:
+	 *
+	 * CREATE FUNCTION .. RETURNS comptype[] AS $$ return [['foo', 'bar']] $$
+	 * LANGUAGE plpython;
+	 *
+	 * Before PostgreSQL 10, that was interpreted as a single-dimensional
+	 * array, containing record ('foo', 'bar'). PostgreSQL 10 added support
+	 * for multi-dimensional arrays, and it is now interpreted as a
+	 * two-dimensional array, containing two records, 'foo', and 'bar'.
+	 * record_in() will throw an error, because "foo" is not a valid record
+	 * literal.
+	 *
+	 * To make that less confusing to users who are upgrading from older
+	 * versions, try to give a hint in the typical instances of that. If we are
+	 * parsing an array of composite types, and we see a string literal that
+	 * is not a valid record literal, give a hint. We only want to give the
+	 * hint in the narrow case of a malformed string literal, not any error
+	 * from record_in(), so check for that case here specifically.
+	 *
+	 * This check better match the one in record_in(), so that we don't forbid
+	 * literals that are actually valid!
+	 */
+	if (inarray && arg->typfunc.fn_oid == F_RECORD_IN)
+	{
+		char	   *ptr = str;
+
+		/* Allow leading whitespace */
+		while (*ptr && isspace((unsigned char) *ptr))
+			ptr++;
+		if (*ptr++ != '(')
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("malformed record literal: \"%s\"", str),
+					 errdetail("Missing left parenthesis."),
+					 errhint("To return a composite type in an array, return the composite type as a Python tuple, e.g. \"[('foo')]\"")));
+	}
+
 	return InputFunctionCall(&arg->typfunc,
-							 PLyObject_AsString(plrv),
+							 str,
 							 arg->typioparam,
 							 typmod);
 }
 
 
 static Datum
-PLyObject_ToTransform(PLyObToDatum *arg, int32 typmod, PyObject *plrv)
+PLyObject_ToTransform(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray)
 {
 	return FunctionCall1(&arg->typtransform, PointerGetDatum(plrv));
 }
 
 
 static Datum
-PLySequence_ToArray(PLyObToDatum *arg, int32 typmod, PyObject *plrv)
+PLySequence_ToArray(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray)
 {
 	ArrayType  *array;
 	int			i;
@@ -1085,7 +1130,7 @@ PLySequence_ToArray_recurse(PLyObToDatum *elm, PyObject *list,
 			else
 			{
 				nulls[*currelem] = false;
-				elems[*currelem] = elm->func(elm, -1, obj);
+				elems[*currelem] = elm->func(elm, -1, obj, true);
 			}
 			Py_XDECREF(obj);
 			(*currelem)++;
@@ -1095,7 +1140,7 @@ PLySequence_ToArray_recurse(PLyObToDatum *elm, PyObject *list,
 
 
 static Datum
-PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string)
+PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string, bool inarray)
 {
 	Datum		result;
 	HeapTuple	typeTup;
@@ -1120,7 +1165,7 @@ PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string)
 
 	ReleaseSysCache(typeTup);
 
-	result = PLyObject_ToDatum(&locinfo.out.d, desc->tdtypmod, string);
+	result = PLyObject_ToDatum(&locinfo.out.d, desc->tdtypmod, string, inarray);
 
 	MemoryContextDelete(cxt);
 
@@ -1172,7 +1217,7 @@ PLyMapping_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *mapping)
 			}
 			else if (value)
 			{
-				values[i] = (att->func) (att, -1, value);
+				values[i] = (att->func) (att, -1, value, false);
 				nulls[i] = false;
 			}
 			else
@@ -1265,7 +1310,7 @@ PLySequence_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *sequence)
 			}
 			else if (value)
 			{
-				values[i] = (att->func) (att, -1, value);
+				values[i] = (att->func) (att, -1, value, false);
 				nulls[i] = false;
 			}
 
@@ -1294,7 +1339,7 @@ PLySequence_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *sequence)
 
 
 static Datum
-PLyGenericObject_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *object)
+PLyGenericObject_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *object, bool inarray)
 {
 	Datum		result;
 	HeapTuple	tuple;
@@ -1335,16 +1380,29 @@ PLyGenericObject_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *object
 			}
 			else if (value)
 			{
-				values[i] = (att->func) (att, -1, value);
+				values[i] = (att->func) (att, -1, value, false);
 				nulls[i] = false;
 			}
 			else
+			{
+				/*
+				 * No attribute for this column in the object.
+				 *
+				 * If we are parsing a composite type in an array, a likely
+				 * cause is that the function contained something like "[[123,
+				 * 'foo']]". Before PostgreSQL 10, that was interpreted as an
+				 * array, with a composite type (123, 'foo') in it. But now
+				 * it's interpreted as a two-dimensional array, and we try to
+				 * interpret "123" as the composite type. See also similar
+				 * heuristic in PLyObject_ToDatum().
+				 */
 				ereport(ERROR,
 						(errcode(ERRCODE_UNDEFINED_COLUMN),
 						 errmsg("attribute \"%s\" does not exist in Python object", key),
-						 errhint("To return null in a column, "
-						   "let the returned object have an attribute named "
-								 "after column with value None.")));
+						 inarray ?
+						 errhint("To return a composite type in an array, return the composite type as a Python tuple, e.g. \"[('foo')]\"") :
+						 errhint("To return null in a column, let the returned object have an attribute named after column with value None.")));
+			}
 
 			Py_XDECREF(value);
 			value = NULL;
diff --git a/src/pl/plpython/plpy_typeio.h b/src/pl/plpython/plpy_typeio.h
index 29fff61dc56..5f5c1ad5c6b 100644
--- a/src/pl/plpython/plpy_typeio.h
+++ b/src/pl/plpython/plpy_typeio.h
@@ -10,8 +10,11 @@
 #include "fmgr.h"
 #include "storage/itemptr.h"
 
+/*
+ * Conversion from PostgreSQL Datum to a Python object.
+ */
 struct PLyDatumToOb;
-typedef PyObject *(*PLyDatumToObFunc) (struct PLyDatumToOb *, Datum);
+typedef PyObject *(*PLyDatumToObFunc) (struct PLyDatumToOb *arg, Datum val);
 
 typedef struct PLyDatumToOb
 {
@@ -39,11 +42,15 @@ typedef union PLyTypeInput
 	PLyTupleToOb r;
 } PLyTypeInput;
 
-/* convert PyObject to a Postgresql Datum or tuple.
- * output from Python
+/*
+ * Conversion from Python object to a Postgresql Datum.
+ *
+ * The 'inarray' argument to the conversion function is true, if the
+ * converted value was in an array (Python list). It is used to give a
+ * better error message in some cases.
  */
 struct PLyObToDatum;
-typedef Datum (*PLyObToDatumFunc) (struct PLyObToDatum *, int32, PyObject *);
+typedef Datum (*PLyObToDatumFunc) (struct PLyObToDatum *arg, int32 typmod, PyObject *val, bool inarray);
 
 typedef struct PLyObToDatum
 {
@@ -104,7 +111,7 @@ extern void PLy_output_tuple_funcs(PLyTypeInfo *arg, TupleDesc desc);
 extern void PLy_output_record_funcs(PLyTypeInfo *arg, TupleDesc desc);
 
 /* conversion from Python objects to composite Datums */
-extern Datum PLyObject_ToCompositeDatum(PLyTypeInfo *info, TupleDesc desc, PyObject *plrv);
+extern Datum PLyObject_ToCompositeDatum(PLyTypeInfo *info, TupleDesc desc, PyObject *plrv, bool isarray);
 
 /* conversion from heap tuples to Python dictionaries */
 extern PyObject *PLyDict_FromTuple(PLyTypeInfo *info, HeapTuple tuple, TupleDesc desc);
diff --git a/src/pl/plpython/sql/plpython_composite.sql b/src/pl/plpython/sql/plpython_composite.sql
index 7a60b1f6b79..0fd2f5d5e3b 100644
--- a/src/pl/plpython/sql/plpython_composite.sql
+++ b/src/pl/plpython/sql/plpython_composite.sql
@@ -213,3 +213,12 @@ CREATE FUNCTION composite_type_as_list()  RETURNS type_record[] AS $$
   return [[('first', 1), ('second', 1)], [('first', 2), ('second', 2)], [('first', 3), ('second', 3)]];
 $$ LANGUAGE plpythonu;
 SELECT * FROM composite_type_as_list();
+
+-- Starting with PostgreSQL 10, a composite type in an array cannot be
+-- represented as a Python list, because it's ambiguous with multi-dimensional
+-- arrays. So this throws an error now. The error should contain a useful hint
+-- on the issue.
+CREATE FUNCTION composite_type_as_list_broken()  RETURNS type_record[] AS $$
+  return [['first', 1]];
+$$ LANGUAGE plpythonu;
+SELECT * FROM composite_type_as_list_broken();
-- 
GitLab