From 9b9c5f279e8261ab90dc64559911d2578288b7e9 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 21 Jan 2020 16:17:21 -0500
Subject: [PATCH] Clarify behavior of adding and altering a column in same
 ALTER command.

The behavior of something like

ALTER TABLE transactions
  ADD COLUMN status varchar(30) DEFAULT 'old',
  ALTER COLUMN status SET default 'current';

is to fill existing table rows with 'old', not 'current'.  That's
intentional and desirable for a couple of reasons:

* It makes the behavior the same whether you merge the sub-commands
into one ALTER command or give them separately;

* If we applied the new default while filling the table, there would
be no way to get the existing behavior in one SQL command.

The same reasoning applies in cases that add a column and then
manipulate its GENERATED/IDENTITY status in a second sub-command,
since the generation expression is really just a kind of default.
However, that wasn't very obvious (at least not to me; earlier in
the referenced discussion thread I'd thought it was a bug to be
fixed).  And it certainly wasn't documented.

Hence, add documentation, code comments, and a test case to clarify
that this behavior is all intentional.

In passing, adjust ATExecAddColumn's defaults-related relkind check
so that it matches up exactly with ATRewriteTables, instead of being
effectively (though not literally) the negated inverse condition.
The reasoning can be explained a lot more concisely that way, too
(not to mention that the comment now matches the code, which it
did not before).

Discussion: https://postgr.es/m/10365.1558909428@sss.pgh.pa.us
---
 doc/src/sgml/ref/alter_table.sgml      | 39 +++++++++++++++++++++++---
 src/backend/commands/tablecmds.c       | 20 +++++++------
 src/test/regress/expected/identity.out |  6 ++++
 src/test/regress/sql/identity.sql      |  6 ++++
 4 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index 4bf449587cc..1c222d63e0b 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -203,10 +203,11 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
     <term><literal>SET</literal>/<literal>DROP DEFAULT</literal></term>
     <listitem>
      <para>
-      These forms set or remove the default value for a column.
-      Default values only apply in subsequent <command>INSERT</command>
-      or <command>UPDATE</command> commands; they do not cause rows already in the
-      table to change.
+      These forms set or remove the default value for a column (where
+      removal is equivalent to setting the default value to NULL).  The new
+      default value will only apply in subsequent <command>INSERT</command>
+      or <command>UPDATE</command> commands; it does not cause rows already
+      in the table to change.
      </para>
     </listitem>
    </varlistentry>
@@ -268,6 +269,10 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
       These forms change whether a column is an identity column or change the
       generation attribute of an existing identity column.
       See <xref linkend="sql-createtable"/> for details.
+      Like <literal>SET DEFAULT</literal>, these forms only affect the
+      behavior of subsequent <command>INSERT</command>
+      and <command>UPDATE</command> commands; they do not cause rows
+      already in the table to change.
      </para>
 
      <para>
@@ -1370,6 +1375,32 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
 <programlisting>
 ALTER TABLE distributors ADD COLUMN address varchar(30);
 </programlisting>
+   That will cause all existing rows in the table to be filled with null
+   values for the new column.
+  </para>
+
+  <para>
+   To add a column with a non-null default:
+<programlisting>
+ALTER TABLE measurements
+  ADD COLUMN mtime timestamp with time zone DEFAULT now();
+</programlisting>
+   Existing rows will be filled with the current time as the value of the
+   new column, and then new rows will receive the time of their insertion.
+  </para>
+
+  <para>
+   To add a column and fill it with a value different from the default to
+   be used later:
+<programlisting>
+ALTER TABLE transactions
+  ADD COLUMN status varchar(30) DEFAULT 'old',
+  ALTER COLUMN status SET default 'current';
+</programlisting>
+   Existing rows will be filled with <literal>old</literal>, but then
+   the default for subsequent commands will be <literal>current</literal>.
+   The effects are the same as if the two sub-commands had been issued
+   in separate <command>ALTER TABLE</command> commands.
   </para>
 
   <para>
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 30b72b62971..faf0db99e2a 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -6126,14 +6126,18 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
 	 * returned by AddRelationNewConstraints, so that the right thing happens
 	 * when a datatype's default applies.
 	 *
-	 * We skip this step completely for views and foreign tables.  For a view,
-	 * we can only get here from CREATE OR REPLACE VIEW, which historically
-	 * doesn't set up defaults, not even for domain-typed columns.  And in any
-	 * case we mustn't invoke Phase 3 on a view or foreign table, since they
-	 * have no storage.
-	 */
-	if (relkind != RELKIND_VIEW && relkind != RELKIND_COMPOSITE_TYPE
-		&& relkind != RELKIND_FOREIGN_TABLE && attribute.attnum > 0)
+	 * Note: it might seem that this should happen at the end of Phase 2, so
+	 * that the effects of subsequent subcommands can be taken into account.
+	 * It's intentional that we do it now, though.  The new column should be
+	 * filled according to what is said in the ADD COLUMN subcommand, so that
+	 * the effects are the same as if this subcommand had been run by itself
+	 * and the later subcommands had been issued in new ALTER TABLE commands.
+	 *
+	 * We can skip this entirely for relations without storage, since Phase 3
+	 * is certainly not going to touch them.  System attributes don't have
+	 * interesting defaults, either.
+	 */
+	if (RELKIND_HAS_STORAGE(relkind) && attribute.attnum > 0)
 	{
 		/*
 		 * For an identity column, we can't use build_column_default(),
diff --git a/src/test/regress/expected/identity.out b/src/test/regress/expected/identity.out
index 7cf4696ec95..1a614b85f99 100644
--- a/src/test/regress/expected/identity.out
+++ b/src/test/regress/expected/identity.out
@@ -409,6 +409,12 @@ ALTER TABLE itest8
   ALTER COLUMN f5 DROP NOT NULL,
   ALTER COLUMN f5 SET DATA TYPE bigint;
 INSERT INTO itest8 VALUES(0), (1);
+-- This does not work when the table isn't empty.  That's intentional,
+-- since ADD GENERATED should only affect later insertions:
+ALTER TABLE itest8
+  ADD COLUMN f22 int NOT NULL,
+  ALTER COLUMN f22 ADD GENERATED ALWAYS AS IDENTITY;
+ERROR:  column "f22" contains null values
 TABLE itest8;
  f1 | f2 | f3 | f4 | f5 
 ----+----+----+----+----
diff --git a/src/test/regress/sql/identity.sql b/src/test/regress/sql/identity.sql
index 685607c90c1..b4cdd21bdd4 100644
--- a/src/test/regress/sql/identity.sql
+++ b/src/test/regress/sql/identity.sql
@@ -269,6 +269,12 @@ ALTER TABLE itest8
 
 INSERT INTO itest8 VALUES(0), (1);
 
+-- This does not work when the table isn't empty.  That's intentional,
+-- since ADD GENERATED should only affect later insertions:
+ALTER TABLE itest8
+  ADD COLUMN f22 int NOT NULL,
+  ALTER COLUMN f22 ADD GENERATED ALWAYS AS IDENTITY;
+
 TABLE itest8;
 \d+ itest8
 \d itest8_f2_seq
-- 
GitLab