From 01f7d29902cb27fb698e5078d72cb837398e074c Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 10 Jan 2010 17:15:18 +0000
Subject: [PATCH] Improve plpgsql's handling of record field references by
 forcing all potential field references in SQL expressions to have RECFIELD
 datum-array entries at parse time.  If it turns out that the reference is
 actually to a SQL column, the RECFIELD entry is useless, but it costs little.
  This allows us to get rid of the previous use of FieldSelect applied to a
 whole-row Param for the record variable; which was not only slower than a
 direct RECFIELD reference, but failed for references to system columns of a
 trigger's NEW or OLD record. Per report and fix suggestion from Dean Rasheed.

---
 src/pl/plpgsql/src/gram.y       | 38 +++++++--------
 src/pl/plpgsql/src/pl_comp.c    | 86 ++++++++++++++++-----------------
 src/pl/plpgsql/src/pl_scanner.c | 15 +++---
 src/pl/plpgsql/src/plpgsql.h    | 12 ++++-
 4 files changed, 77 insertions(+), 74 deletions(-)

diff --git a/src/pl/plpgsql/src/gram.y b/src/pl/plpgsql/src/gram.y
index 2e3d504adee..77afcbec8bb 100644
--- a/src/pl/plpgsql/src/gram.y
+++ b/src/pl/plpgsql/src/gram.y
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.137 2010/01/02 16:58:12 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.138 2010/01/10 17:15:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -369,21 +369,21 @@ pl_block		: decl_sect K_BEGIN proc_sect exception_sect K_END opt_label
 decl_sect		: opt_block_label
 					{
 						/* done with decls, so resume identifier lookup */
-						plpgsql_LookupIdentifiers = true;
+						plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
 						$$.label	  = $1;
 						$$.n_initvars = 0;
 						$$.initvarnos = NULL;
 					}
 				| opt_block_label decl_start
 					{
-						plpgsql_LookupIdentifiers = true;
+						plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
 						$$.label	  = $1;
 						$$.n_initvars = 0;
 						$$.initvarnos = NULL;
 					}
 				| opt_block_label decl_start decl_stmts
 					{
-						plpgsql_LookupIdentifiers = true;
+						plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
 						if ($3 != NULL)
 							$$.label = $3;
 						else
@@ -401,7 +401,7 @@ decl_start		: K_DECLARE
 						 * Disable scanner lookup of identifiers while
 						 * we process the decl_stmts
 						 */
-						plpgsql_LookupIdentifiers = false;
+						plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_DECLARE;
 					}
 				;
 
@@ -2121,7 +2121,7 @@ read_sql_construct(int until,
 {
 	int					tok;
 	StringInfoData		ds;
-	bool				save_LookupIdentifiers;
+	IdentifierLookup	save_IdentifierLookup;
 	int					startlocation = -1;
 	int					parenlevel = 0;
 	PLpgSQL_expr		*expr;
@@ -2129,9 +2129,9 @@ read_sql_construct(int until,
 	initStringInfo(&ds);
 	appendStringInfoString(&ds, sqlstart);
 
-	/* no need to lookup identifiers within the SQL text */
-	save_LookupIdentifiers = plpgsql_LookupIdentifiers;
-	plpgsql_LookupIdentifiers = false;
+	/* special lookup mode for identifiers within the SQL text */
+	save_IdentifierLookup = plpgsql_IdentifierLookup;
+	plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_EXPR;
 
 	for (;;)
 	{
@@ -2176,7 +2176,7 @@ read_sql_construct(int until,
 		}
 	}
 
-	plpgsql_LookupIdentifiers = save_LookupIdentifiers;
+	plpgsql_IdentifierLookup = save_IdentifierLookup;
 
 	if (startloc)
 		*startloc = startlocation;
@@ -2221,8 +2221,8 @@ read_datatype(int tok)
 	PLpgSQL_type		*result;
 	int					parenlevel = 0;
 
-	/* Should always be called with LookupIdentifiers off */
-	Assert(!plpgsql_LookupIdentifiers);
+	/* Should only be called while parsing DECLARE sections */
+	Assert(plpgsql_IdentifierLookup == IDENTIFIER_LOOKUP_DECLARE);
 
 	/* Often there will be a lookahead token, but if not, get one */
 	if (tok == YYEMPTY)
@@ -2327,7 +2327,7 @@ static PLpgSQL_stmt *
 make_execsql_stmt(int firsttoken, int location)
 {
 	StringInfoData		ds;
-	bool				save_LookupIdentifiers;
+	IdentifierLookup	save_IdentifierLookup;
 	PLpgSQL_stmt_execsql *execsql;
 	PLpgSQL_expr		*expr;
 	PLpgSQL_row			*row = NULL;
@@ -2341,9 +2341,9 @@ make_execsql_stmt(int firsttoken, int location)
 
 	initStringInfo(&ds);
 
-	/* no need to lookup identifiers within the SQL text */
-	save_LookupIdentifiers = plpgsql_LookupIdentifiers;
-	plpgsql_LookupIdentifiers = false;
+	/* special lookup mode for identifiers within the SQL text */
+	save_IdentifierLookup = plpgsql_IdentifierLookup;
+	plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_EXPR;
 
 	/*
 	 * We have to special-case the sequence INSERT INTO, because we don't want
@@ -2371,13 +2371,13 @@ make_execsql_stmt(int firsttoken, int location)
 				yyerror("INTO specified more than once");
 			have_into = true;
 			into_start_loc = yylloc;
-			plpgsql_LookupIdentifiers = true;
+			plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
 			read_into_target(&rec, &row, &have_strict);
-			plpgsql_LookupIdentifiers = false;
+			plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_EXPR;
 		}
 	}
 
-	plpgsql_LookupIdentifiers = save_LookupIdentifiers;
+	plpgsql_IdentifierLookup = save_IdentifierLookup;
 
 	if (have_into)
 	{
diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c
index f3adeb63a50..cc0c3c870cf 100644
--- a/src/pl/plpgsql/src/pl_comp.c
+++ b/src/pl/plpgsql/src/pl_comp.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.147 2010/01/02 16:58:12 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.148 2010/01/10 17:15:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1132,11 +1132,7 @@ resolve_column_ref(PLpgSQL_expr *expr, ColumnRef *cref)
 				return make_datum_param(expr, nse->itemno, cref->location);
 			if (nnames == nnames_field)
 			{
-				/* colname must be a field in this record */
-				PLpgSQL_rec *rec = (PLpgSQL_rec *) estate->datums[nse->itemno];
-				FieldSelect *fselect;
-				Oid			fldtype;
-				int			fldno;
+				/* colname could be a field in this record */
 				int			i;
 
 				/* search for a datum referencing this field */
@@ -1153,20 +1149,11 @@ resolve_column_ref(PLpgSQL_expr *expr, ColumnRef *cref)
 				}
 
 				/*
-				 * We can't readily add a recfield datum at runtime, so
-				 * instead build a whole-row Param and a FieldSelect node.
-				 * This is a bit less efficient, so we prefer the recfield
-				 * way when possible.
+				 * We should not get here, because a RECFIELD datum should
+				 * have been built at parse time for every possible qualified
+				 * reference to fields of this record.  But if we do, fall
+				 * out and return NULL.
 				 */
-				fldtype = exec_get_rec_fieldtype(rec, colname,
-												 &fldno);
-				fselect = makeNode(FieldSelect);
-				fselect->arg = (Expr *) make_datum_param(expr, nse->itemno,
-														 cref->location);
-				fselect->fieldnum = fldno;
-				fselect->resulttype = fldtype;
-				fselect->resulttypmod = -1;
-				return (Node *) fselect;
 			}
 			break;
 		case PLPGSQL_NSTYPE_ROW:
@@ -1174,7 +1161,7 @@ resolve_column_ref(PLpgSQL_expr *expr, ColumnRef *cref)
 				return make_datum_param(expr, nse->itemno, cref->location);
 			if (nnames == nnames_field)
 			{
-				/* colname must be a field in this row */
+				/* colname could be a field in this row */
 				PLpgSQL_row *row = (PLpgSQL_row *) estate->datums[nse->itemno];
 				int			i;
 
@@ -1187,10 +1174,7 @@ resolve_column_ref(PLpgSQL_expr *expr, ColumnRef *cref)
 												cref->location);
 					}
 				}
-				ereport(ERROR,
-						(errcode(ERRCODE_UNDEFINED_COLUMN),
-						 errmsg("row \"%s\" has no field \"%s\"",
-								row->refname, colname)));
+				/* Not found, so return NULL */
 			}
 			break;
 		default:
@@ -1257,8 +1241,12 @@ plpgsql_parse_word(char *word1, const char *yytxt,
 {
 	PLpgSQL_nsitem *ns;
 
-	/* No lookup if disabled */
-	if (plpgsql_LookupIdentifiers)
+	/*
+	 * We should do nothing in DECLARE sections.  In SQL expressions, there's
+	 * no need to do anything either --- lookup will happen when the expression
+	 * is compiled.
+	 */
+	if (plpgsql_IdentifierLookup == IDENTIFIER_LOOKUP_NORMAL)
 	{
 		/*
 		 * Do a lookup in the current namespace stack
@@ -1281,6 +1269,7 @@ plpgsql_parse_word(char *word1, const char *yytxt,
 					return true;
 
 				default:
+					/* plpgsql_ns_lookup should never return anything else */
 					elog(ERROR, "unrecognized plpgsql itemtype: %d",
 						 ns->itemtype);
 			}
@@ -1313,8 +1302,12 @@ plpgsql_parse_dblword(char *word1, char *word2,
 	idents = list_make2(makeString(word1),
 						makeString(word2));
 
-	/* No lookup if disabled */
-	if (plpgsql_LookupIdentifiers)
+	/*
+	 * We should do nothing in DECLARE sections.  In SQL expressions,
+	 * we really only need to make sure that RECFIELD datums are created
+	 * when needed.
+	 */
+	if (plpgsql_IdentifierLookup != IDENTIFIER_LOOKUP_DECLARE)
 	{
 		/*
 		 * Do a lookup in the current namespace stack
@@ -1338,8 +1331,10 @@ plpgsql_parse_dblword(char *word1, char *word2,
 					if (nnames == 1)
 					{
 						/*
-						 * First word is a record name, so second word must be
-						 * a field in this record.
+						 * First word is a record name, so second word could
+						 * be a field in this record.  We build a RECFIELD
+						 * datum whether it is or not --- any error will be
+						 * detected later.
 						 */
 						PLpgSQL_recfield *new;
 
@@ -1366,8 +1361,9 @@ plpgsql_parse_dblword(char *word1, char *word2,
 					if (nnames == 1)
 					{
 						/*
-						 * First word is a row name, so second word must be a
-						 * field in this row.
+						 * First word is a row name, so second word could be
+						 * a field in this row.  Again, no error now if it
+						 * isn't.
 						 */
 						PLpgSQL_row *row;
 						int			i;
@@ -1385,10 +1381,7 @@ plpgsql_parse_dblword(char *word1, char *word2,
 								return true;
 							}
 						}
-						ereport(ERROR,
-								(errcode(ERRCODE_UNDEFINED_COLUMN),
-								 errmsg("row \"%s\" has no field \"%s\"",
-										word1, word2)));
+						/* fall through to return CWORD */
 					}
 					else
 					{
@@ -1399,6 +1392,7 @@ plpgsql_parse_dblword(char *word1, char *word2,
 						wdatum->idents = idents;
 						return true;
 					}
+					break;
 
 				default:
 					break;
@@ -1429,8 +1423,12 @@ plpgsql_parse_tripword(char *word1, char *word2, char *word3,
 						makeString(word2),
 						makeString(word3));
 
-	/* No lookup if disabled */
-	if (plpgsql_LookupIdentifiers)
+	/*
+	 * We should do nothing in DECLARE sections.  In SQL expressions,
+	 * we really only need to make sure that RECFIELD datums are created
+	 * when needed.
+	 */
+	if (plpgsql_IdentifierLookup != IDENTIFIER_LOOKUP_DECLARE)
 	{
 		/*
 		 * Do a lookup in the current namespace stack. Must find a qualified
@@ -1446,7 +1444,7 @@ plpgsql_parse_tripword(char *word1, char *word2, char *word3,
 				case PLPGSQL_NSTYPE_REC:
 				{
 					/*
-					 * words 1/2 are a record name, so third word must be a
+					 * words 1/2 are a record name, so third word could be a
 					 * field in this record.
 					 */
 					PLpgSQL_recfield *new;
@@ -1468,8 +1466,8 @@ plpgsql_parse_tripword(char *word1, char *word2, char *word3,
 				case PLPGSQL_NSTYPE_ROW:
 				{
 					/*
-					 * words 1/2 are a row name, so third word must be a field
-					 * in this row.
+					 * words 1/2 are a row name, so third word could be a
+					 * field in this row.
 					 */
 					PLpgSQL_row *row;
 					int			i;
@@ -1487,10 +1485,8 @@ plpgsql_parse_tripword(char *word1, char *word2, char *word3,
 							return true;
 						}
 					}
-					ereport(ERROR,
-							(errcode(ERRCODE_UNDEFINED_COLUMN),
-							 errmsg("row \"%s.%s\" has no field \"%s\"",
-									word1, word2, word3)));
+					/* fall through to return CWORD */
+					break;
 				}
 
 				default:
diff --git a/src/pl/plpgsql/src/pl_scanner.c b/src/pl/plpgsql/src/pl_scanner.c
index 0a5767ef950..8e97ffde915 100644
--- a/src/pl/plpgsql/src/pl_scanner.c
+++ b/src/pl/plpgsql/src/pl_scanner.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_scanner.c,v 1.3 2010/01/02 16:58:13 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_scanner.c,v 1.4 2010/01/10 17:15:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -23,8 +23,8 @@
 #define PG_KEYWORD(a,b,c) {a,b,c},
 
 
-/* Klugy flag to tell scanner whether to lookup identifiers */
-bool	plpgsql_LookupIdentifiers = true;
+/* Klugy flag to tell scanner how to look up identifiers */
+IdentifierLookup	plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
 
 /*
  * A word about keywords:
@@ -33,11 +33,10 @@ bool	plpgsql_LookupIdentifiers = true;
  * reserved keywords are passed to the core scanner, so they will be
  * recognized before (and instead of) any variable name.  Unreserved
  * words are checked for separately, after determining that the identifier
- * isn't a known variable name.  If plpgsql_LookupIdentifiers is off then
+ * isn't a known variable name.  If plpgsql_IdentifierLookup is DECLARE then
  * no variable names will be recognized, so the unreserved words always work.
  * (Note in particular that this helps us avoid reserving keywords that are
- * only needed in DECLARE sections, since we scan those sections with
- * plpgsql_LookupIdentifiers off.)
+ * only needed in DECLARE sections.)
  *
  * In certain contexts it is desirable to prefer recognizing an unreserved
  * keyword over recognizing a variable name.  Those cases are handled in
@@ -193,7 +192,7 @@ static void location_lineno_init(void);
  * It is a wrapper around the core lexer, with the ability to recognize
  * PL/pgSQL variables and return them as special T_DATUM tokens.  If a
  * word or compound word does not match any variable name, or if matching
- * is turned off by plpgsql_LookupIdentifiers, it is returned as
+ * is turned off by plpgsql_IdentifierLookup, it is returned as
  * T_WORD or T_CWORD respectively, or as an unreserved keyword if it
  * matches one of those.
  */
@@ -567,7 +566,7 @@ plpgsql_scanner_init(const char *str)
 	scanorig = str;
 
 	/* Other setup */
-	plpgsql_LookupIdentifiers = true;
+	plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
 
 	num_pushbacks = 0;
 
diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h
index c2d49cb6d9e..4601a4f8136 100644
--- a/src/pl/plpgsql/src/plpgsql.h
+++ b/src/pl/plpgsql/src/plpgsql.h
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.127 2010/01/02 16:58:13 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.128 2010/01/10 17:15:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -795,11 +795,19 @@ typedef struct
  * Global variable declarations
  **********************************************************************/
 
+typedef enum
+{
+	IDENTIFIER_LOOKUP_NORMAL,		/* normal processing of var names */
+	IDENTIFIER_LOOKUP_DECLARE,		/* In DECLARE --- don't look up names */
+	IDENTIFIER_LOOKUP_EXPR			/* In SQL expression --- special case */
+} IdentifierLookup;
+
+extern IdentifierLookup plpgsql_IdentifierLookup;
+
 extern int	plpgsql_variable_conflict;
 
 extern bool plpgsql_check_syntax;
 extern bool plpgsql_DumpExecTree;
-extern bool plpgsql_LookupIdentifiers;
 
 extern PLpgSQL_stmt_block *plpgsql_parse_result;
 
-- 
GitLab