From eb62398f391eedee7953becb410bf3ae86b9872b Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter_e@gmx.net>
Date: Sun, 13 Sep 2009 22:07:06 +0000
Subject: [PATCH] Fix Unicode support in PL/Python

Check calls of PyUnicode_AsEncodedString() for NULL return, probably
because the encoding name is not known.  Add special treatment for
SQL_ASCII, which Python definitely does not know.

Since using SQL_ASCII produces errors in the regression tests when
non-ASCII characters are involved, we have to put back various regression
test result variants.
---
 src/pl/plpython/expected/README               |  5 ++
 .../plpython/expected/plpython_unicode_2.out  | 52 +++++++++++++++++++
 .../plpython/expected/plpython_unicode_3.out  | 52 +++++++++++++++++++
 .../plpython/expected/plpython_unicode_5.out  | 52 +++++++++++++++++++
 src/pl/plpython/plpython.c                    | 18 +++++--
 5 files changed, 175 insertions(+), 4 deletions(-)
 create mode 100644 src/pl/plpython/expected/plpython_unicode_2.out
 create mode 100644 src/pl/plpython/expected/plpython_unicode_3.out
 create mode 100644 src/pl/plpython/expected/plpython_unicode_5.out

diff --git a/src/pl/plpython/expected/README b/src/pl/plpython/expected/README
index 574062dd77e..24f5e80a299 100644
--- a/src/pl/plpython/expected/README
+++ b/src/pl/plpython/expected/README
@@ -2,3 +2,8 @@ Guide to alternative expected files:
 
 plpython_error_2.out		Python 2.2, 2.3, 2.4
 plpython_error.out			Python 2.5, 2.6
+
+plpython_unicode.out		any version, when server encoding != SQL_ASCII; else ...
+plpython_unicode_2.out		Python 2.2
+plpython_unicode_3.out		Python 2.3, 2.4
+plpython_unicode_5.out		Python 2.5, 2.6
diff --git a/src/pl/plpython/expected/plpython_unicode_2.out b/src/pl/plpython/expected/plpython_unicode_2.out
new file mode 100644
index 00000000000..5e44e4a799c
--- /dev/null
+++ b/src/pl/plpython/expected/plpython_unicode_2.out
@@ -0,0 +1,52 @@
+--
+-- Unicode handling
+--
+CREATE TABLE unicode_test (
+	testvalue  text NOT NULL
+);
+CREATE FUNCTION unicode_return() RETURNS text AS E'
+return u"\\x80"
+' LANGUAGE plpythonu;
+CREATE FUNCTION unicode_trigger() RETURNS trigger AS E'
+TD["new"]["testvalue"] = u"\\x80"
+return "MODIFY"
+' LANGUAGE plpythonu;
+CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test
+  FOR EACH ROW EXECUTE PROCEDURE unicode_trigger();
+CREATE FUNCTION unicode_plan1() RETURNS text AS E'
+plan = plpy.prepare("SELECT $1 AS testvalue", ["text"])
+rv = plpy.execute(plan, [u"\\x80"], 1)
+return rv[0]["testvalue"]
+' LANGUAGE plpythonu;
+CREATE FUNCTION unicode_plan2() RETURNS text AS E'
+plan = plpy.prepare("SELECT $1 || $2 AS testvalue", ["text", u"text"])
+rv = plpy.execute(plan, ["foo", "bar"], 1)
+return rv[0]["testvalue"]
+' LANGUAGE plpythonu;
+SELECT unicode_return();
+ERROR:  PL/Python: could not convert Python Unicode object to PostgreSQL server encoding
+DETAIL:  exceptions.UnicodeError: ASCII encoding error: ordinal not in range(128)
+CONTEXT:  while creating return value
+PL/Python function "unicode_return"
+INSERT INTO unicode_test (testvalue) VALUES ('test');
+ERROR:  PL/Python: could not convert Python Unicode object to PostgreSQL server encoding
+DETAIL:  exceptions.UnicodeError: ASCII encoding error: ordinal not in range(128)
+CONTEXT:  while modifying trigger row
+PL/Python function "unicode_trigger"
+SELECT * FROM unicode_test;
+ testvalue 
+-----------
+(0 rows)
+
+SELECT unicode_plan1();
+WARNING:  PL/Python: plpy.Error: unrecognized error in PLy_spi_execute_plan
+CONTEXT:  PL/Python function "unicode_plan1"
+ERROR:  PL/Python: could not convert Python Unicode object to PostgreSQL server encoding
+DETAIL:  exceptions.UnicodeError: ASCII encoding error: ordinal not in range(128)
+CONTEXT:  PL/Python function "unicode_plan1"
+SELECT unicode_plan2();
+ unicode_plan2 
+---------------
+ foobar
+(1 row)
+
diff --git a/src/pl/plpython/expected/plpython_unicode_3.out b/src/pl/plpython/expected/plpython_unicode_3.out
new file mode 100644
index 00000000000..8e48c20a62b
--- /dev/null
+++ b/src/pl/plpython/expected/plpython_unicode_3.out
@@ -0,0 +1,52 @@
+--
+-- Unicode handling
+--
+CREATE TABLE unicode_test (
+	testvalue  text NOT NULL
+);
+CREATE FUNCTION unicode_return() RETURNS text AS E'
+return u"\\x80"
+' LANGUAGE plpythonu;
+CREATE FUNCTION unicode_trigger() RETURNS trigger AS E'
+TD["new"]["testvalue"] = u"\\x80"
+return "MODIFY"
+' LANGUAGE plpythonu;
+CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test
+  FOR EACH ROW EXECUTE PROCEDURE unicode_trigger();
+CREATE FUNCTION unicode_plan1() RETURNS text AS E'
+plan = plpy.prepare("SELECT $1 AS testvalue", ["text"])
+rv = plpy.execute(plan, [u"\\x80"], 1)
+return rv[0]["testvalue"]
+' LANGUAGE plpythonu;
+CREATE FUNCTION unicode_plan2() RETURNS text AS E'
+plan = plpy.prepare("SELECT $1 || $2 AS testvalue", ["text", u"text"])
+rv = plpy.execute(plan, ["foo", "bar"], 1)
+return rv[0]["testvalue"]
+' LANGUAGE plpythonu;
+SELECT unicode_return();
+ERROR:  PL/Python: could not convert Python Unicode object to PostgreSQL server encoding
+DETAIL:  exceptions.UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
+CONTEXT:  while creating return value
+PL/Python function "unicode_return"
+INSERT INTO unicode_test (testvalue) VALUES ('test');
+ERROR:  PL/Python: could not convert Python Unicode object to PostgreSQL server encoding
+DETAIL:  exceptions.UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
+CONTEXT:  while modifying trigger row
+PL/Python function "unicode_trigger"
+SELECT * FROM unicode_test;
+ testvalue 
+-----------
+(0 rows)
+
+SELECT unicode_plan1();
+WARNING:  PL/Python: plpy.Error: unrecognized error in PLy_spi_execute_plan
+CONTEXT:  PL/Python function "unicode_plan1"
+ERROR:  PL/Python: could not convert Python Unicode object to PostgreSQL server encoding
+DETAIL:  exceptions.UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
+CONTEXT:  PL/Python function "unicode_plan1"
+SELECT unicode_plan2();
+ unicode_plan2 
+---------------
+ foobar
+(1 row)
+
diff --git a/src/pl/plpython/expected/plpython_unicode_5.out b/src/pl/plpython/expected/plpython_unicode_5.out
new file mode 100644
index 00000000000..76ff8f23cdf
--- /dev/null
+++ b/src/pl/plpython/expected/plpython_unicode_5.out
@@ -0,0 +1,52 @@
+--
+-- Unicode handling
+--
+CREATE TABLE unicode_test (
+	testvalue  text NOT NULL
+);
+CREATE FUNCTION unicode_return() RETURNS text AS E'
+return u"\\x80"
+' LANGUAGE plpythonu;
+CREATE FUNCTION unicode_trigger() RETURNS trigger AS E'
+TD["new"]["testvalue"] = u"\\x80"
+return "MODIFY"
+' LANGUAGE plpythonu;
+CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test
+  FOR EACH ROW EXECUTE PROCEDURE unicode_trigger();
+CREATE FUNCTION unicode_plan1() RETURNS text AS E'
+plan = plpy.prepare("SELECT $1 AS testvalue", ["text"])
+rv = plpy.execute(plan, [u"\\x80"], 1)
+return rv[0]["testvalue"]
+' LANGUAGE plpythonu;
+CREATE FUNCTION unicode_plan2() RETURNS text AS E'
+plan = plpy.prepare("SELECT $1 || $2 AS testvalue", ["text", u"text"])
+rv = plpy.execute(plan, ["foo", "bar"], 1)
+return rv[0]["testvalue"]
+' LANGUAGE plpythonu;
+SELECT unicode_return();
+ERROR:  PL/Python: could not convert Python Unicode object to PostgreSQL server encoding
+DETAIL:  <type 'exceptions.UnicodeEncodeError'>: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
+CONTEXT:  while creating return value
+PL/Python function "unicode_return"
+INSERT INTO unicode_test (testvalue) VALUES ('test');
+ERROR:  PL/Python: could not convert Python Unicode object to PostgreSQL server encoding
+DETAIL:  <type 'exceptions.UnicodeEncodeError'>: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
+CONTEXT:  while modifying trigger row
+PL/Python function "unicode_trigger"
+SELECT * FROM unicode_test;
+ testvalue 
+-----------
+(0 rows)
+
+SELECT unicode_plan1();
+WARNING:  PL/Python: <class 'plpy.Error'>: unrecognized error in PLy_spi_execute_plan
+CONTEXT:  PL/Python function "unicode_plan1"
+ERROR:  PL/Python: could not convert Python Unicode object to PostgreSQL server encoding
+DETAIL:  <type 'exceptions.UnicodeEncodeError'>: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128)
+CONTEXT:  PL/Python function "unicode_plan1"
+SELECT unicode_plan2();
+ unicode_plan2 
+---------------
+ foobar
+(1 row)
+
diff --git a/src/pl/plpython/plpython.c b/src/pl/plpython/plpython.c
index ae898385b56..c37993829f1 100644
--- a/src/pl/plpython/plpython.c
+++ b/src/pl/plpython/plpython.c
@@ -1,7 +1,7 @@
 /**********************************************************************
  * plpython.c - python as a procedural language for PostgreSQL
  *
- *	$PostgreSQL: pgsql/src/pl/plpython/plpython.c,v 1.129 2009/09/12 22:13:12 petere Exp $
+ *	$PostgreSQL: pgsql/src/pl/plpython/plpython.c,v 1.130 2009/09/13 22:07:06 petere Exp $
  *
  *********************************************************************
  */
@@ -3343,11 +3343,21 @@ PLy_free(void *ptr)
 static PyObject*
 PLyUnicode_Str(PyObject *unicode)
 {
+	PyObject *rv;
+	const char *serverenc;
+
 	/*
-	 * This assumes that the PostgreSQL encoding names are acceptable
-	 * to Python, but that appears to be the case.
+	 * Python understands almost all PostgreSQL encoding names, but it
+	 * doesn't know SQL_ASCII.
 	 */
-	return PyUnicode_AsEncodedString(unicode, GetDatabaseEncodingName(), "strict");
+	if (GetDatabaseEncoding() == PG_SQL_ASCII)
+		serverenc = "ascii";
+	else
+		serverenc = GetDatabaseEncodingName();
+	rv = PyUnicode_AsEncodedString(unicode, serverenc, "strict");
+	if (rv == NULL)
+		PLy_elog(ERROR, "could not convert Python Unicode object to PostgreSQL server encoding");
+	return rv;
 }
 
 /*
-- 
GitLab