From 510e1b8ecf2a6f0d91d50f41f6b7fd75242273a0 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas <heikki.linnakangas@iki.fi> Date: Wed, 26 Oct 2016 10:38:56 +0300 Subject: [PATCH] Give a hint, when [] is incorrectly used for a composite type in array. That used to be accepted, so let's try to give a hint to users on why their PL/python functions no longer work. Reviewed by Pavel Stehule. Discussion: <CAH38_tmbqwaUyKs9yagyRra=SMaT45FPBxk1pmTYcM0TyXGG7Q@mail.gmail.com> --- .../plpython/expected/plpython_composite.out | 13 ++ src/pl/plpython/plpy_cursorobject.c | 3 +- src/pl/plpython/plpy_exec.c | 7 +- src/pl/plpython/plpy_spi.c | 3 +- src/pl/plpython/plpy_typeio.c | 116 +++++++++++++----- src/pl/plpython/plpy_typeio.h | 17 ++- src/pl/plpython/sql/plpython_composite.sql | 9 ++ 7 files changed, 129 insertions(+), 39 deletions(-) diff --git a/src/pl/plpython/expected/plpython_composite.out b/src/pl/plpython/expected/plpython_composite.out index 1ab3b31e684..c6964841fba 100644 --- a/src/pl/plpython/expected/plpython_composite.out +++ b/src/pl/plpython/expected/plpython_composite.out @@ -579,3 +579,16 @@ SELECT * FROM composite_type_as_list(); {{"(first,1)","(second,1)"},{"(first,2)","(second,2)"},{"(first,3)","(second,3)"}} (1 row) +-- Starting with PostgreSQL 10, a composite type in an array cannot be +-- represented as a Python list, because it's ambiguous with multi-dimensional +-- arrays. So this throws an error now. The error should contain a useful hint +-- on the issue. +CREATE FUNCTION composite_type_as_list_broken() RETURNS type_record[] AS $$ + return [['first', 1]]; +$$ LANGUAGE plpythonu; +SELECT * FROM composite_type_as_list_broken(); +ERROR: malformed record literal: "first" +DETAIL: Missing left parenthesis. +HINT: To return a composite type in an array, return the composite type as a Python tuple, e.g. "[('foo')]" +CONTEXT: while creating return value +PL/Python function "composite_type_as_list_broken" diff --git a/src/pl/plpython/plpy_cursorobject.c b/src/pl/plpython/plpy_cursorobject.c index 0e17a03ce7b..e682bfe566a 100644 --- a/src/pl/plpython/plpy_cursorobject.c +++ b/src/pl/plpython/plpy_cursorobject.c @@ -240,7 +240,8 @@ PLy_cursor_plan(PyObject *ob, PyObject *args) plan->values[j] = plan->args[j].out.d.func(&(plan->args[j].out.d), -1, - elem); + elem, + false); } PG_CATCH(); { diff --git a/src/pl/plpython/plpy_exec.c b/src/pl/plpython/plpy_exec.c index 25e4744c7d8..fa583fab164 100644 --- a/src/pl/plpython/plpy_exec.c +++ b/src/pl/plpython/plpy_exec.c @@ -245,7 +245,7 @@ PLy_exec_function(FunctionCallInfo fcinfo, PLyProcedure *proc) desc = lookup_rowtype_tupdesc(proc->result.out.d.typoid, proc->result.out.d.typmod); - rv = PLyObject_ToCompositeDatum(&proc->result, desc, plrv); + rv = PLyObject_ToCompositeDatum(&proc->result, desc, plrv, false); fcinfo->isnull = (rv == (Datum) NULL); ReleaseTupleDesc(desc); @@ -253,7 +253,7 @@ PLy_exec_function(FunctionCallInfo fcinfo, PLyProcedure *proc) else { fcinfo->isnull = false; - rv = (proc->result.out.d.func) (&proc->result.out.d, -1, plrv); + rv = (proc->result.out.d.func) (&proc->result.out.d, -1, plrv, false); } } PG_CATCH(); @@ -984,7 +984,8 @@ PLy_modify_tuple(PLyProcedure *proc, PyObject *pltd, TriggerData *tdata, modvalues[i] = (att->func) (att, tupdesc->attrs[atti]->atttypmod, - plval); + plval, + false); modnulls[i] = ' '; } else diff --git a/src/pl/plpython/plpy_spi.c b/src/pl/plpython/plpy_spi.c index 09ee06d9e86..0d556a2ec28 100644 --- a/src/pl/plpython/plpy_spi.c +++ b/src/pl/plpython/plpy_spi.c @@ -264,7 +264,8 @@ PLy_spi_execute_plan(PyObject *ob, PyObject *list, long limit) plan->values[j] = plan->args[j].out.d.func(&(plan->args[j].out.d), -1, - elem); + elem, + false); } PG_CATCH(); { diff --git a/src/pl/plpython/plpy_typeio.c b/src/pl/plpython/plpy_typeio.c index 85d50c2b0c3..d346e225919 100644 --- a/src/pl/plpython/plpy_typeio.c +++ b/src/pl/plpython/plpy_typeio.c @@ -14,6 +14,7 @@ #include "parser/parse_type.h" #include "utils/array.h" #include "utils/builtins.h" +#include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/numeric.h" @@ -49,21 +50,21 @@ static PyObject *PLyList_FromArray_recurse(PLyDatumToOb *elm, int *dims, int ndi char **dataptr_p, bits8 **bitmap_p, int *bitmask_p); /* conversion from Python objects to Datums */ -static Datum PLyObject_ToBool(PLyObToDatum *arg, int32 typmod, PyObject *plrv); -static Datum PLyObject_ToBytea(PLyObToDatum *arg, int32 typmod, PyObject *plrv); -static Datum PLyObject_ToComposite(PLyObToDatum *arg, int32 typmod, PyObject *plrv); -static Datum PLyObject_ToDatum(PLyObToDatum *arg, int32 typmod, PyObject *plrv); -static Datum PLyObject_ToTransform(PLyObToDatum *arg, int32 typmod, PyObject *plrv); -static Datum PLySequence_ToArray(PLyObToDatum *arg, int32 typmod, PyObject *plrv); +static Datum PLyObject_ToBool(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray); +static Datum PLyObject_ToBytea(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray); +static Datum PLyObject_ToComposite(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray); +static Datum PLyObject_ToDatum(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray); +static Datum PLyObject_ToTransform(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray); +static Datum PLySequence_ToArray(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray); static void PLySequence_ToArray_recurse(PLyObToDatum *elm, PyObject *list, int *dims, int ndim, int dim, Datum *elems, bool *nulls, int *currelem); /* conversion from Python objects to composite Datums (used by triggers and SRFs) */ -static Datum PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string); +static Datum PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string, bool inarray); static Datum PLyMapping_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *mapping); static Datum PLySequence_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *sequence); -static Datum PLyGenericObject_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *object); +static Datum PLyGenericObject_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *object, bool inarray); void PLy_typeinfo_init(PLyTypeInfo *arg, MemoryContext mcxt) @@ -341,12 +342,12 @@ PLyDict_FromTuple(PLyTypeInfo *info, HeapTuple tuple, TupleDesc desc) * as an object that has __getattr__ support. */ Datum -PLyObject_ToCompositeDatum(PLyTypeInfo *info, TupleDesc desc, PyObject *plrv) +PLyObject_ToCompositeDatum(PLyTypeInfo *info, TupleDesc desc, PyObject *plrv, bool inarray) { Datum datum; if (PyString_Check(plrv) || PyUnicode_Check(plrv)) - datum = PLyString_ToComposite(info, desc, plrv); + datum = PLyString_ToComposite(info, desc, plrv, inarray); else if (PySequence_Check(plrv)) /* composite type as sequence (tuple, list etc) */ datum = PLySequence_ToComposite(info, desc, plrv); @@ -355,7 +356,7 @@ PLyObject_ToCompositeDatum(PLyTypeInfo *info, TupleDesc desc, PyObject *plrv) datum = PLyMapping_ToComposite(info, desc, plrv); else /* returned as smth, must provide method __getattr__(name) */ - datum = PLyGenericObject_ToComposite(info, desc, plrv); + datum = PLyGenericObject_ToComposite(info, desc, plrv, inarray); return datum; } @@ -746,7 +747,7 @@ PLyList_FromArray_recurse(PLyDatumToOb *elm, int *dims, int ndim, int dim, * type can parse. */ static Datum -PLyObject_ToBool(PLyObToDatum *arg, int32 typmod, PyObject *plrv) +PLyObject_ToBool(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray) { Datum rv; @@ -765,7 +766,7 @@ PLyObject_ToBool(PLyObToDatum *arg, int32 typmod, PyObject *plrv) * with embedded nulls. And it's faster this way. */ static Datum -PLyObject_ToBytea(PLyObToDatum *arg, int32 typmod, PyObject *plrv) +PLyObject_ToBytea(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray) { PyObject *volatile plrv_so = NULL; Datum rv; @@ -809,7 +810,7 @@ PLyObject_ToBytea(PLyObToDatum *arg, int32 typmod, PyObject *plrv) * for obtaining PostgreSQL tuples. */ static Datum -PLyObject_ToComposite(PLyObToDatum *arg, int32 typmod, PyObject *plrv) +PLyObject_ToComposite(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray) { Datum rv; PLyTypeInfo info; @@ -836,7 +837,7 @@ PLyObject_ToComposite(PLyObToDatum *arg, int32 typmod, PyObject *plrv) * that info instead of looking it up every time a tuple is returned from * the function. */ - rv = PLyObject_ToCompositeDatum(&info, desc, plrv); + rv = PLyObject_ToCompositeDatum(&info, desc, plrv, inarray); ReleaseTupleDesc(desc); @@ -908,26 +909,70 @@ PLyObject_AsString(PyObject *plrv) * cstring into PostgreSQL type. */ static Datum -PLyObject_ToDatum(PLyObToDatum *arg, int32 typmod, PyObject *plrv) +PLyObject_ToDatum(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray) { + char *str; + Assert(plrv != Py_None); + str = PLyObject_AsString(plrv); + + /* + * If we are parsing a composite type within an array, and the string + * isn't a valid record literal, there's a high chance that the function + * did something like: + * + * CREATE FUNCTION .. RETURNS comptype[] AS $$ return [['foo', 'bar']] $$ + * LANGUAGE plpython; + * + * Before PostgreSQL 10, that was interpreted as a single-dimensional + * array, containing record ('foo', 'bar'). PostgreSQL 10 added support + * for multi-dimensional arrays, and it is now interpreted as a + * two-dimensional array, containing two records, 'foo', and 'bar'. + * record_in() will throw an error, because "foo" is not a valid record + * literal. + * + * To make that less confusing to users who are upgrading from older + * versions, try to give a hint in the typical instances of that. If we are + * parsing an array of composite types, and we see a string literal that + * is not a valid record literal, give a hint. We only want to give the + * hint in the narrow case of a malformed string literal, not any error + * from record_in(), so check for that case here specifically. + * + * This check better match the one in record_in(), so that we don't forbid + * literals that are actually valid! + */ + if (inarray && arg->typfunc.fn_oid == F_RECORD_IN) + { + char *ptr = str; + + /* Allow leading whitespace */ + while (*ptr && isspace((unsigned char) *ptr)) + ptr++; + if (*ptr++ != '(') + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed record literal: \"%s\"", str), + errdetail("Missing left parenthesis."), + errhint("To return a composite type in an array, return the composite type as a Python tuple, e.g. \"[('foo')]\""))); + } + return InputFunctionCall(&arg->typfunc, - PLyObject_AsString(plrv), + str, arg->typioparam, typmod); } static Datum -PLyObject_ToTransform(PLyObToDatum *arg, int32 typmod, PyObject *plrv) +PLyObject_ToTransform(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray) { return FunctionCall1(&arg->typtransform, PointerGetDatum(plrv)); } static Datum -PLySequence_ToArray(PLyObToDatum *arg, int32 typmod, PyObject *plrv) +PLySequence_ToArray(PLyObToDatum *arg, int32 typmod, PyObject *plrv, bool inarray) { ArrayType *array; int i; @@ -1085,7 +1130,7 @@ PLySequence_ToArray_recurse(PLyObToDatum *elm, PyObject *list, else { nulls[*currelem] = false; - elems[*currelem] = elm->func(elm, -1, obj); + elems[*currelem] = elm->func(elm, -1, obj, true); } Py_XDECREF(obj); (*currelem)++; @@ -1095,7 +1140,7 @@ PLySequence_ToArray_recurse(PLyObToDatum *elm, PyObject *list, static Datum -PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string) +PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string, bool inarray) { Datum result; HeapTuple typeTup; @@ -1120,7 +1165,7 @@ PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string) ReleaseSysCache(typeTup); - result = PLyObject_ToDatum(&locinfo.out.d, desc->tdtypmod, string); + result = PLyObject_ToDatum(&locinfo.out.d, desc->tdtypmod, string, inarray); MemoryContextDelete(cxt); @@ -1172,7 +1217,7 @@ PLyMapping_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *mapping) } else if (value) { - values[i] = (att->func) (att, -1, value); + values[i] = (att->func) (att, -1, value, false); nulls[i] = false; } else @@ -1265,7 +1310,7 @@ PLySequence_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *sequence) } else if (value) { - values[i] = (att->func) (att, -1, value); + values[i] = (att->func) (att, -1, value, false); nulls[i] = false; } @@ -1294,7 +1339,7 @@ PLySequence_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *sequence) static Datum -PLyGenericObject_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *object) +PLyGenericObject_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *object, bool inarray) { Datum result; HeapTuple tuple; @@ -1335,16 +1380,29 @@ PLyGenericObject_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *object } else if (value) { - values[i] = (att->func) (att, -1, value); + values[i] = (att->func) (att, -1, value, false); nulls[i] = false; } else + { + /* + * No attribute for this column in the object. + * + * If we are parsing a composite type in an array, a likely + * cause is that the function contained something like "[[123, + * 'foo']]". Before PostgreSQL 10, that was interpreted as an + * array, with a composite type (123, 'foo') in it. But now + * it's interpreted as a two-dimensional array, and we try to + * interpret "123" as the composite type. See also similar + * heuristic in PLyObject_ToDatum(). + */ ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("attribute \"%s\" does not exist in Python object", key), - errhint("To return null in a column, " - "let the returned object have an attribute named " - "after column with value None."))); + inarray ? + errhint("To return a composite type in an array, return the composite type as a Python tuple, e.g. \"[('foo')]\"") : + errhint("To return null in a column, let the returned object have an attribute named after column with value None."))); + } Py_XDECREF(value); value = NULL; diff --git a/src/pl/plpython/plpy_typeio.h b/src/pl/plpython/plpy_typeio.h index 29fff61dc56..5f5c1ad5c6b 100644 --- a/src/pl/plpython/plpy_typeio.h +++ b/src/pl/plpython/plpy_typeio.h @@ -10,8 +10,11 @@ #include "fmgr.h" #include "storage/itemptr.h" +/* + * Conversion from PostgreSQL Datum to a Python object. + */ struct PLyDatumToOb; -typedef PyObject *(*PLyDatumToObFunc) (struct PLyDatumToOb *, Datum); +typedef PyObject *(*PLyDatumToObFunc) (struct PLyDatumToOb *arg, Datum val); typedef struct PLyDatumToOb { @@ -39,11 +42,15 @@ typedef union PLyTypeInput PLyTupleToOb r; } PLyTypeInput; -/* convert PyObject to a Postgresql Datum or tuple. - * output from Python +/* + * Conversion from Python object to a Postgresql Datum. + * + * The 'inarray' argument to the conversion function is true, if the + * converted value was in an array (Python list). It is used to give a + * better error message in some cases. */ struct PLyObToDatum; -typedef Datum (*PLyObToDatumFunc) (struct PLyObToDatum *, int32, PyObject *); +typedef Datum (*PLyObToDatumFunc) (struct PLyObToDatum *arg, int32 typmod, PyObject *val, bool inarray); typedef struct PLyObToDatum { @@ -104,7 +111,7 @@ extern void PLy_output_tuple_funcs(PLyTypeInfo *arg, TupleDesc desc); extern void PLy_output_record_funcs(PLyTypeInfo *arg, TupleDesc desc); /* conversion from Python objects to composite Datums */ -extern Datum PLyObject_ToCompositeDatum(PLyTypeInfo *info, TupleDesc desc, PyObject *plrv); +extern Datum PLyObject_ToCompositeDatum(PLyTypeInfo *info, TupleDesc desc, PyObject *plrv, bool isarray); /* conversion from heap tuples to Python dictionaries */ extern PyObject *PLyDict_FromTuple(PLyTypeInfo *info, HeapTuple tuple, TupleDesc desc); diff --git a/src/pl/plpython/sql/plpython_composite.sql b/src/pl/plpython/sql/plpython_composite.sql index 7a60b1f6b79..0fd2f5d5e3b 100644 --- a/src/pl/plpython/sql/plpython_composite.sql +++ b/src/pl/plpython/sql/plpython_composite.sql @@ -213,3 +213,12 @@ CREATE FUNCTION composite_type_as_list() RETURNS type_record[] AS $$ return [[('first', 1), ('second', 1)], [('first', 2), ('second', 2)], [('first', 3), ('second', 3)]]; $$ LANGUAGE plpythonu; SELECT * FROM composite_type_as_list(); + +-- Starting with PostgreSQL 10, a composite type in an array cannot be +-- represented as a Python list, because it's ambiguous with multi-dimensional +-- arrays. So this throws an error now. The error should contain a useful hint +-- on the issue. +CREATE FUNCTION composite_type_as_list_broken() RETURNS type_record[] AS $$ + return [['first', 1]]; +$$ LANGUAGE plpythonu; +SELECT * FROM composite_type_as_list_broken(); -- GitLab