From e61fd4ac7414a52714c3288889cc754d2e11c4a8 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 10 Aug 2009 18:29:27 +0000
Subject: [PATCH] Support EEEE (scientific notation) in to_char().

Pavel Stehule, Brendan Jurd
---
 doc/src/sgml/func.sgml                |  17 ++-
 src/backend/utils/adt/formatting.c    | 188 ++++++++++++++++++++++++--
 src/backend/utils/adt/numeric.c       | 133 +++++++++++++++++-
 src/include/utils/numeric.h           |   7 +-
 src/test/regress/expected/numeric.out |  15 ++
 src/test/regress/sql/numeric.sql      |   1 +
 6 files changed, 347 insertions(+), 14 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 4a890cb8b89..545ab495058 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.485 2009/08/10 16:10:19 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.486 2009/08/10 18:29:26 tgl Exp $ -->
 
  <chapter id="functions">
   <title>Functions and Operators</title>
@@ -5345,7 +5345,7 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})');
        </row>
        <row>
         <entry><literal>EEEE</literal></entry>
-        <entry>scientific notation (not implemented)</entry>
+        <entry>exponent for scientific notation</entry>
        </row>
       </tbody>
      </tgroup>
@@ -5404,6 +5404,15 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})');
        (e.g., <literal>99.9V99</literal> is not allowed).
       </para>
      </listitem>
+
+     <listitem>
+      <para>
+       <literal>EEEE</literal> (scientific notation) cannot be used in
+       combination with any of the other special formatting patterns or
+       modifiers, and must be at the end of the format string
+       (e.g., <literal>9.99EEEE</literal> is a valid pattern).
+      </para>
+     </listitem>
     </itemizedlist>
    </para>
 
@@ -5605,6 +5614,10 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})');
         <entry><literal>to_char(12.45, '99V9')</literal></entry>
         <entry><literal>'&nbsp;125'</literal></entry>
        </row>
+       <row>
+        <entry><literal>to_char(0.0004859, '9.99EEEE')</literal></entry>
+        <entry><literal>' 4.86e-04'</literal></entry>
+       </row>
       </tbody>
      </tgroup>
     </table>
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 7dd1c1d1168..6c1b35fbe1e 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1,7 +1,7 @@
 /* -----------------------------------------------------------------------
  * formatting.c
  *
- * $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.159 2009/07/06 19:11:39 heikki Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.160 2009/08/10 18:29:26 tgl Exp $
  *
  *
  *	 Portions Copyright (c) 1999-2009, PostgreSQL Global Development Group
@@ -335,6 +335,7 @@ typedef struct
 #define NUM_F_MULTI			(1 << 11)
 #define NUM_F_PLUS_POST		(1 << 12)
 #define NUM_F_MINUS_POST	(1 << 13)
+#define NUM_F_EEEE			(1 << 14)
 
 #define NUM_LSIGN_PRE	(-1)
 #define NUM_LSIGN_POST	1
@@ -355,6 +356,7 @@ typedef struct
 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
 #define IS_ROMAN(_f)	((_f)->flag & NUM_F_ROMAN)
 #define IS_MULTI(_f)	((_f)->flag & NUM_F_MULTI)
+#define IS_EEEE(_f)		((_f)->flag & NUM_F_EEEE)
 
 /* ----------
  * Format picture cache
@@ -821,7 +823,7 @@ static const KeyWord NUM_keywords[] = {
 	{"B", 1, NUM_B},			/* B */
 	{"C", 1, NUM_C},			/* C */
 	{"D", 1, NUM_D},			/* D */
-	{"E", 1, NUM_E},			/* E */
+	{"EEEE", 4, NUM_E},			/* E */
 	{"FM", 2, NUM_FM},			/* F */
 	{"G", 1, NUM_G},			/* G */
 	{"L", 1, NUM_L},			/* L */
@@ -837,7 +839,7 @@ static const KeyWord NUM_keywords[] = {
 	{"b", 1, NUM_B},			/* b */
 	{"c", 1, NUM_C},			/* c */
 	{"d", 1, NUM_D},			/* d */
-	{"e", 1, NUM_E},			/* e */
+	{"eeee", 4, NUM_E},			/* e */
 	{"fm", 2, NUM_FM},			/* f */
 	{"g", 1, NUM_G},			/* g */
 	{"l", 1, NUM_L},			/* l */
@@ -1044,6 +1046,14 @@ NUMDesc_prepare(NUMDesc *num, FormatNode *n)
 	if (n->type != NODE_TYPE_ACTION)
 		return;
 
+	if (IS_EEEE(num) && n->key->id != NUM_E)
+	{
+		NUM_cache_remove(last_NUMCacheEntry);
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("\"EEEE\" must be the last pattern used")));
+	}
+
 	switch (n->key->id)
 	{
 		case NUM_9:
@@ -1217,10 +1227,25 @@ NUMDesc_prepare(NUMDesc *num, FormatNode *n)
 			break;
 
 		case NUM_E:
-			NUM_cache_remove(last_NUMCacheEntry);
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("\"E\" is not supported")));
+			if (IS_EEEE(num))
+			{
+				NUM_cache_remove(last_NUMCacheEntry);
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("cannot use \"EEEE\" twice")));
+			}
+			if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
+				IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
+				IS_ROMAN(num) || IS_MULTI(num))
+			{
+				NUM_cache_remove(last_NUMCacheEntry);
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("\"EEEE\" is incompatible with other formats"),
+						 errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
+			}
+			num->flag |= NUM_F_EEEE;
+			break;
 	}
 
 	return;
@@ -4145,6 +4170,15 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
 	if (Np->Num->zero_start)
 		--Np->Num->zero_start;
 
+	if (IS_EEEE(Np->Num))
+	{
+		if (!Np->is_to_char)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("\"EEEE\" not supported for input")));
+		return strcpy(inout, number);
+	}
+
 	/*
 	 * Roman correction
 	 */
@@ -4153,7 +4187,7 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
 		if (!Np->is_to_char)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("\"RN\" not supported")));
+					 errmsg("\"RN\" not supported for input")));
 
 		Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
 			Np->Num->pre = Np->num_pre = Np->sign = 0;
@@ -4240,7 +4274,7 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
 
 #ifdef DEBUG_TO_FROM_CHAR
 	elog(DEBUG_elog_output,
-		 "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s",
+		 "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
 		 Np->sign,
 		 Np->number,
 		 Np->Num->pre,
@@ -4256,7 +4290,8 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
 		 IS_PLUS(Np->Num) ? "Yes" : "No",
 		 IS_MINUS(Np->Num) ? "Yes" : "No",
 		 IS_FILLMODE(Np->Num) ? "Yes" : "No",
-		 IS_ROMAN(Np->Num) ? "Yes" : "No"
+		 IS_ROMAN(Np->Num) ? "Yes" : "No",
+		 IS_EEEE(Np->Num) ? "Yes" : "No"
 		);
 #endif
 
@@ -4626,6 +4661,39 @@ numeric_to_char(PG_FUNCTION_ARGS)
 			int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4,
 													   NumericGetDatum(x))));
 	}
+	else if (IS_EEEE(&Num))
+	{
+		orgnum = numeric_out_sci(value, Num.post);
+
+		/*
+		 * numeric_out_sci() does not emit a sign for positive numbers.  We
+		 * need to add a space in this case so that positive and negative
+		 * numbers are aligned.  We also have to do the right thing for NaN.
+		 */
+		if (strcmp(orgnum, "NaN") == 0)
+		{
+			/*
+			 * Allow 6 characters for the leading sign, the decimal point, "e",
+			 * the exponent's sign and two exponent digits.
+			 */
+			numstr = (char *) palloc(Num.pre + Num.post + 7);
+			fill_str(numstr, '#', Num.pre + Num.post + 6);
+			*numstr = ' ';
+			*(numstr + Num.pre + 1) = '.';
+		}
+		else if (*orgnum != '-')
+		{
+			numstr = (char *) palloc(strlen(orgnum) + 2);
+			*numstr = ' ';
+			strcpy(numstr + 1, orgnum);
+			len = strlen(numstr);
+		}
+		else
+		{
+			numstr = orgnum;
+			len = strlen(orgnum);
+		}
+	}
 	else
 	{
 		Numeric		val = value;
@@ -4707,6 +4775,23 @@ int4_to_char(PG_FUNCTION_ARGS)
 	 */
 	if (IS_ROMAN(&Num))
 		numstr = orgnum = int_to_roman(value);
+	else if (IS_EEEE(&Num))
+	{
+		/* we can do it easily because float8 won't lose any precision */
+		float8	val = (float8) value;
+
+		orgnum = (char *) palloc(MAXDOUBLEWIDTH + 1);
+		snprintf(orgnum, MAXDOUBLEWIDTH + 1, "%+.*e", Num.post, val);
+
+		/*
+		 * Swap a leading positive sign for a space.
+		 */
+		if (*orgnum == '+')
+			*orgnum = ' ';
+
+		len = strlen(orgnum);
+		numstr = orgnum;
+	}
 	else
 	{
 		if (IS_MULTI(&Num))
@@ -4785,6 +4870,33 @@ int8_to_char(PG_FUNCTION_ARGS)
 		numstr = orgnum = int_to_roman(DatumGetInt32(
 						  DirectFunctionCall1(int84, Int64GetDatum(value))));
 	}
+	else if (IS_EEEE(&Num))
+	{
+		/* to avoid loss of precision, must go via numeric not float8 */
+		Numeric	val;
+
+		val = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
+												  Int64GetDatum(value)));
+		orgnum = numeric_out_sci(val, Num.post);
+
+		/*
+		 * numeric_out_sci() does not emit a sign for positive numbers.  We
+		 * need to add a space in this case so that positive and negative
+		 * numbers are aligned.  We don't have to worry about NaN here.
+		 */
+		if (*orgnum != '-')
+		{
+			numstr = (char *) palloc(strlen(orgnum) + 2);
+			*numstr = ' ';
+			strcpy(numstr + 1, orgnum);
+			len = strlen(numstr);
+		}
+		else
+		{
+			numstr = orgnum;
+			len = strlen(orgnum);
+		}
+	}
 	else
 	{
 		if (IS_MULTI(&Num))
@@ -4859,6 +4971,34 @@ float4_to_char(PG_FUNCTION_ARGS)
 
 	if (IS_ROMAN(&Num))
 		numstr = orgnum = int_to_roman((int) rint(value));
+	else if (IS_EEEE(&Num))
+	{
+		numstr = orgnum = (char *) palloc(MAXDOUBLEWIDTH + 1);
+		if (isnan(value) || is_infinite(value))
+		{
+			/*
+			 * Allow 6 characters for the leading sign, the decimal point, "e",
+			 * the exponent's sign and two exponent digits.
+			 */
+			numstr = (char *) palloc(Num.pre + Num.post + 7);
+			fill_str(numstr, '#', Num.pre + Num.post + 6);
+			*numstr = ' ';
+			*(numstr + Num.pre + 1) = '.';
+		}
+		else
+		{
+			snprintf(orgnum, MAXDOUBLEWIDTH + 1, "%+.*e", Num.post, value);
+
+			/*
+			 * Swap a leading positive sign for a space.
+			 */
+			if (*orgnum == '+')
+				*orgnum = ' ';
+
+			len = strlen(orgnum);
+			numstr = orgnum;
+		}
+	}
 	else
 	{
 		float4		val = value;
@@ -4935,6 +5075,34 @@ float8_to_char(PG_FUNCTION_ARGS)
 
 	if (IS_ROMAN(&Num))
 		numstr = orgnum = int_to_roman((int) rint(value));
+	else if (IS_EEEE(&Num))
+	{
+		numstr = orgnum = (char *) palloc(MAXDOUBLEWIDTH + 1);
+		if (isnan(value) || is_infinite(value))
+		{
+			/*
+			 * Allow 6 characters for the leading sign, the decimal point, "e",
+			 * the exponent's sign and two exponent digits.
+			 */
+			numstr = (char *) palloc(Num.pre + Num.post + 7);
+			fill_str(numstr, '#', Num.pre + Num.post + 6);
+			*numstr = ' ';
+			*(numstr + Num.pre + 1) = '.';
+		}
+		else
+		{
+			snprintf(orgnum, MAXDOUBLEWIDTH + 1, "%+.*e", Num.post, value);
+
+			/*
+			 * Swap a leading positive sign for a space.
+			 */
+			if (*orgnum == '+')
+				*orgnum = ' ';
+
+			len = strlen(orgnum);
+			numstr = orgnum;
+		}
+	}
 	else
 	{
 		float8		val = value;
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index ba3721b12b3..29b00e08e00 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -14,7 +14,7 @@
  * Copyright (c) 1998-2009, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.118 2009/06/11 14:49:03 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.119 2009/08/10 18:29:27 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -247,6 +247,7 @@ static const char *set_var_from_str(const char *str, const char *cp,
 static void set_var_from_num(Numeric value, NumericVar *dest);
 static void set_var_from_var(NumericVar *value, NumericVar *dest);
 static char *get_str_from_var(NumericVar *var, int dscale);
+static char *get_str_from_var_sci(NumericVar *var, int rscale);
 
 static Numeric make_result(NumericVar *var);
 
@@ -426,6 +427,32 @@ numeric_out(PG_FUNCTION_ARGS)
 	PG_RETURN_CSTRING(str);
 }
 
+/*
+ * numeric_out_sci() -
+ *
+ *	Output function for numeric data type in scientific notation.
+ */
+char *
+numeric_out_sci(Numeric num, int scale)
+{
+	NumericVar	x;
+	char	   *str;
+
+	/*
+	 * Handle NaN
+	 */
+	if (NUMERIC_IS_NAN(num))
+		return pstrdup("NaN");
+
+	init_var(&x);
+	set_var_from_num(num, &x);
+
+	str = get_str_from_var_sci(&x, scale);
+
+	free_var(&x);
+	return str;
+}
+
 /*
  *		numeric_recv			- converts external binary format to numeric
  *
@@ -3364,6 +3391,110 @@ get_str_from_var(NumericVar *var, int dscale)
 	return str;
 }
 
+/*
+ * get_str_from_var_sci() -
+ *
+ *	Convert a var to a normalised scientific notation text representation.
+ *	This function does the heavy lifting for numeric_out_sci().
+ *
+ *	This notation has the general form a * 10^b, where a is known as the
+ *	"significand" and b is known as the "exponent".
+ *
+ *	Because we can't do superscript in ASCII (and because we want to copy
+ *	printf's behaviour) we display the exponent using E notation, with a
+ *	minimum of two exponent digits.
+ *
+ *	For example, the value 1234 could be output as 1.2e+03.
+ *
+ *	We assume that the exponent can fit into an int32.
+ *
+ *	rscale is the number of decimal digits desired after the decimal point in
+ *	the output, negative values will be treated as meaning zero.
+ *
+ *	CAUTION: var's contents may be modified by rounding!
+ *
+ *	Returns a palloc'd string.
+ */
+static char *
+get_str_from_var_sci(NumericVar *var, int rscale)
+{
+	int32		exponent;
+	NumericVar  denominator;
+	NumericVar	significand;
+	int			denom_scale;
+	size_t		len;
+	char	   *str;
+	char	   *sig_out;
+
+	if (rscale < 0)
+		rscale = 0;
+
+	/*
+	 * Determine the exponent of this number in normalised form.
+	 *
+	 * This is the exponent required to represent the number with only one
+	 * significant digit before the decimal place.
+	 */
+	if (var->ndigits > 0)
+	{
+		exponent = (var->weight + 1) * DEC_DIGITS;
+
+		/*
+		 * Compensate for leading decimal zeroes in the first numeric digit by
+		 * decrementing the exponent.
+		 */
+		exponent -= DEC_DIGITS - (int) log10(var->digits[0]);
+	}
+	else
+	{
+		/*
+		 * If var has no digits, then it must be zero.
+		 *
+		 * Zero doesn't technically have a meaningful exponent in normalised
+		 * notation, but we just display the exponent as zero for consistency
+		 * of output.
+		 */
+		exponent = 0;
+	}
+
+	/*
+	 * The denominator is set to 10 raised to the power of the exponent.
+	 *
+	 * We then divide var by the denominator to get the significand, rounding
+	 * to rscale decimal digits in the process.
+	 */
+	if (exponent < 0)
+		denom_scale = -exponent;
+	else
+		denom_scale = 0;
+
+	init_var(&denominator);
+	init_var(&significand);
+
+	int8_to_numericvar((int64) 10, &denominator);
+	power_var_int(&denominator, exponent, &denominator, denom_scale);
+	div_var(var, &denominator, &significand, rscale, true);
+	sig_out = get_str_from_var(&significand, rscale);
+
+	free_var(&denominator);
+	free_var(&significand);
+
+	/*
+	 * Allocate space for the result.
+	 *
+	 * In addition to the significand, we need room for the exponent decoration
+	 * ("e"), the sign of the exponent, up to 10 digits for the exponent
+	 * itself, and of course the null terminator.
+	 */
+	len = strlen(sig_out) + 13;
+	str = palloc(len);
+	snprintf(str, len, "%se%+03d", sig_out, exponent);
+
+	pfree(sig_out);
+
+	return str;
+}
+
 
 /*
  * make_result() -
diff --git a/src/include/utils/numeric.h b/src/include/utils/numeric.h
index 19452b54fa3..b1aa27d88dd 100644
--- a/src/include/utils/numeric.h
+++ b/src/include/utils/numeric.h
@@ -7,7 +7,7 @@
  *
  * Copyright (c) 1998-2009, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/utils/numeric.h,v 1.27 2009/01/01 17:24:02 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/numeric.h,v 1.28 2009/08/10 18:29:27 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -84,4 +84,9 @@ typedef NumericData *Numeric;
 #define PG_GETARG_NUMERIC_COPY(n) DatumGetNumericCopy(PG_GETARG_DATUM(n))
 #define PG_RETURN_NUMERIC(x)	  return NumericGetDatum(x)
 
+/*
+ * Utility functions in numeric.c
+ */
+extern char *numeric_out_sci(Numeric num, int scale);
+
 #endif   /* _PG_NUMERIC_H_ */
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
index a3b631aabe0..857e1d83198 100644
--- a/src/test/regress/expected/numeric.out
+++ b/src/test/regress/expected/numeric.out
@@ -1139,6 +1139,21 @@ SELECT '' AS to_char_22, to_char(val, 'FM9999999999999999.999999999999999')	FROM
             | -24926804.04504742
 (10 rows)
 
+SELECT '' AS to_char_23, to_char(val, '9.999EEEE')				FROM num_data;
+ to_char_23 |  to_char   
+------------+------------
+            |  0.000e+00
+            |  0.000e+00
+            | -3.434e+07
+            |  4.310e+00
+            |  7.799e+06
+            |  1.640e+04
+            |  9.390e+04
+            | -8.303e+07
+            |  7.488e+04
+            | -2.493e+07
+(10 rows)
+
 -- TO_NUMBER()
 --
 SELECT '' AS to_number_1,  to_number('-34,338,492', '99G999G999');
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql
index 9fd6bba31ee..8814bba486b 100644
--- a/src/test/regress/sql/numeric.sql
+++ b/src/test/regress/sql/numeric.sql
@@ -762,6 +762,7 @@ SELECT '' AS to_char_19, to_char(val, 'FMS 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9
 SELECT '' AS to_char_20, to_char(val, E'99999 "text" 9999 "9999" 999 "\\"text between quote marks\\"" 9999') FROM num_data;
 SELECT '' AS to_char_21, to_char(val, '999999SG9999999999')			FROM num_data;
 SELECT '' AS to_char_22, to_char(val, 'FM9999999999999999.999999999999999')	FROM num_data;
+SELECT '' AS to_char_23, to_char(val, '9.999EEEE')				FROM num_data;
 
 -- TO_NUMBER()
 --
-- 
GitLab