diff --git a/doc/src/sgml/advanced.sgml b/doc/src/sgml/advanced.sgml
index 6980dc47752fa4d7c30533066b5810526b69ea71..475ba910a1eaccc77740ba130026c6643b413f60 100644
--- a/doc/src/sgml/advanced.sgml
+++ b/doc/src/sgml/advanced.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/advanced.sgml,v 1.41 2004/03/31 16:20:53 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/advanced.sgml,v 1.42 2004/08/01 17:32:11 tgl Exp $
 -->
 
  <chapter id="tutorial-advanced">
@@ -257,6 +257,64 @@ COMMIT;
      you are using.
     </para>
    </note>
+
+   <para>
+    It's possible to control the statements in a transaction in a more
+    granular fashion through the use of <firstterm>savepoints</>.  Savepoints
+    allow you to selectively discard parts of the transaction, while
+    committing the rest.  After defining a savepoint with
+    <command>SAVEPOINT</>, you can if needed roll back to the savepoint
+    with <command>ROLLBACK TO</>.  All the transaction's database changes
+    between defining the savepoint and rolling back to it are discarded, but
+    changes earlier than the savepoint are kept.
+   </para> 
+
+   <para>
+    After rolling back to a savepoint, it continues to be defined, so you can
+    roll back to it several times.  Conversely, if you are sure you won't need
+    to roll back to a particular savepoint again, it can be released, so the
+    system can free some resources.  Keep in mind that either releasing or
+    rolling back to a savepoint
+    will automatically release all savepoints that were defined after it.
+   </para> 
+
+   <para>
+    All this is happening within the transaction block, so none of it
+    is visible to other database sessions.  When and if you commit the
+    transaction block, the committed actions become visible as a unit
+    to other sessions, while the rolled-back actions never become visible
+    at all.
+   </para> 
+
+   <para>
+    Remembering the bank database, suppose we debit $100.00 from Alice's
+    account, and credit Bob's account, only to find later that we should
+    have credited Wally's account.  We could do it using savepoints like
+
+<programlisting>
+BEGIN;
+UPDATE accounts SET balance = balance - 100.00
+    WHERE name = 'Alice';
+SAVEPOINT my_savepoint;
+UPDATE accounts SET balance = balance + 100.00
+    WHERE name = 'Bob';
+-- oops ... forget that and use Wally's account
+ROLLBACK TO my_savepoint;
+UPDATE accounts SET balance = balance + 100.00
+    WHERE name = 'Wally';
+COMMIT;
+</programlisting>
+   </para>
+
+   <para>
+    This example is, of course, oversimplified, but there's a lot of control
+    to be had over a transaction block through the use of savepoints.
+    Moreover, <command>ROLLBACK TO</> is the only way to regain control of a
+    transaction block that was put in aborted state by the
+    system due to an error, short of rolling it back completely and starting
+    again.
+   </para>
+
   </sect1>
 
 
diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml
index f02edd4cff2f5653c4c6595c832d87727af28d6c..2ab20c9c690934a58477b4337f31e39e6afe9efb 100644
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/allfiles.sgml,v 1.59 2004/06/25 21:55:50 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/allfiles.sgml,v 1.60 2004/08/01 17:32:13 tgl Exp $
 PostgreSQL documentation
 Complete list of usable sgml source files in this directory.
 -->
@@ -88,9 +88,12 @@ Complete list of usable sgml source files in this directory.
 <!entity notify             system "notify.sgml">
 <!entity prepare            system "prepare.sgml">
 <!entity reindex            system "reindex.sgml">
+<!entity releaseSavepoint   system "release.sgml">
 <!entity reset              system "reset.sgml">
 <!entity revoke             system "revoke.sgml">
 <!entity rollback           system "rollback.sgml">
+<!entity rollbackTo         system "rollback_to.sgml">
+<!entity savepoint          system "savepoint.sgml">
 <!entity select             system "select.sgml">
 <!entity selectInto         system "select_into.sgml">
 <!entity set                system "set.sgml">
diff --git a/doc/src/sgml/ref/begin.sgml b/doc/src/sgml/ref/begin.sgml
index d8ddf81ee06652f135a0c49b113551837dbe46bb..d40cb416bc51129bbf6774102e9e297eccd94688 100644
--- a/doc/src/sgml/ref/begin.sgml
+++ b/doc/src/sgml/ref/begin.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/begin.sgml,v 1.30 2004/01/11 09:24:17 petere Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/begin.sgml,v 1.31 2004/08/01 17:32:13 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -31,7 +31,7 @@ BEGIN [ WORK | TRANSACTION ]
 
   <para>
    <command>BEGIN</command> initiates a transaction block, that is,
-   all statements after <command>BEGIN</command> command will be
+   all statements after a <command>BEGIN</command> command will be
    executed in a single transaction until an explicit <xref
    linkend="sql-commit" endterm="sql-commit-title"> or <xref
    linkend="sql-rollback" endterm="sql-rollback-title"> is given.
@@ -145,6 +145,7 @@ BEGIN;
    <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
    <member><xref linkend="sql-rollback" endterm="sql-rollback-title"></member>
    <member><xref linkend="sql-start-transaction" endterm="sql-start-transaction-title"></member>
+   <member><xref linkend="sql-savepoint" endterm="sql-savepoint-title"></member>
   </simplelist>
  </refsect1>
 </refentry>
diff --git a/doc/src/sgml/ref/release.sgml b/doc/src/sgml/ref/release.sgml
new file mode 100644
index 0000000000000000000000000000000000000000..17ef14ee3c5feac1f1ab83fc33b92f2c9cfdc7b2
--- /dev/null
+++ b/doc/src/sgml/ref/release.sgml
@@ -0,0 +1,143 @@
+<!--
+$PostgreSQL: pgsql/doc/src/sgml/ref/release.sgml,v 1.1 2004/08/01 17:32:13 tgl Exp $
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-RELEASE">
+ <refmeta>
+  <refentrytitle id="SQL-RELEASE-TITLE">RELEASE</refentrytitle>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>RELEASE</refname>
+  <refpurpose>destroy a previously defined savepoint</refpurpose>
+ </refnamediv>
+
+ <indexterm zone="sql-release">
+  <primary>RELEASE</primary>
+ </indexterm>
+
+ <indexterm zone="sql-release">
+  <primary>savepoints</primary>
+  <secondary>releasing</secondary>
+ </indexterm>
+
+ <refsynopsisdiv>
+<synopsis>
+RELEASE <replaceable>savepoint_name</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+  
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>RELEASE</command> destroys a savepoint previously defined
+   in the current transaction.
+  </para>
+
+  <para>
+   Destroying a savepoint makes it unavailable as a rollback point,
+   but it has no other user visible behavior.  It does not undo the
+   effects of commands executed after the savepoint was established.
+   (To do that, see <xref linkend="sql-rollback-to"
+   endterm="sql-rollback-to-title">.)  Destroying a savepoint when
+   it is no longer needed may allow the system to reclaim some resources
+   earlier than transaction end.
+  </para>
+
+  <para>
+   <command>RELEASE</command> also destroys all savepoints that were
+   established after the named savepoint was established.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable>savepoint_name</replaceable></term>
+    <listitem>
+     <para>
+      The name of the savepoint to destroy.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   Specifying a savepoint name that was not previously defined is an error.
+  </para>
+
+  <para>
+   It is not possible to release a savepoint when the transaction is in
+   aborted state.
+  </para>
+
+  <para>
+   If multiple savepoints have the same name, only the one that was most
+   recently defined is released.
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   To establish and later destroy a savepoint:
+<programlisting>
+BEGIN;
+    INSERT INTO table VALUES (3);
+    SAVEPOINT my_savepoint;
+    INSERT INTO table VALUES (4);
+    RELEASE my_savepoint;
+COMMIT;
+</programlisting>
+   The above transaction will insert both 3 and 4.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+  
+  <para>
+   RELEASE is fully conforming to the SQL standard.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-begin" endterm="sql-begin-title"></member>
+   <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
+   <member><xref linkend="sql-rollback" endterm="sql-rollback-title"></member>
+   <member><xref linkend="sql-rollback-to" endterm="sql-rollback-to-title"></member>
+   <member><xref linkend="sql-savepoint" endterm="sql-savepoint-title"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:nil
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:"../reference.ced"
+sgml-exposed-tags:nil
+sgml-local-catalogs:"/usr/lib/sgml/catalog"
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/src/sgml/ref/rollback.sgml b/doc/src/sgml/ref/rollback.sgml
index f7e5e9fa281366d5b3625d1d6d8fb7e4dbe0b5eb..53b7af3dd15cafab93dc2887b013570de751d00b 100644
--- a/doc/src/sgml/ref/rollback.sgml
+++ b/doc/src/sgml/ref/rollback.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/rollback.sgml,v 1.17 2003/11/29 19:51:39 pgsql Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/rollback.sgml,v 1.18 2004/08/01 17:32:13 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -90,6 +90,7 @@ ROLLBACK;
   <simplelist type="inline">
    <member><xref linkend="sql-begin" endterm="sql-begin-title"></member>
    <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
+   <member><xref linkend="sql-rollback-to" endterm="sql-rollback-to-title"></member>
   </simplelist>
  </refsect1>
 </refentry>
diff --git a/doc/src/sgml/ref/rollback_to.sgml b/doc/src/sgml/ref/rollback_to.sgml
new file mode 100644
index 0000000000000000000000000000000000000000..e38c4f4c4fd87352e7375fdeaf8f6b95ba93f1b4
--- /dev/null
+++ b/doc/src/sgml/ref/rollback_to.sgml
@@ -0,0 +1,163 @@
+<!--
+$PostgreSQL: pgsql/doc/src/sgml/ref/rollback_to.sgml,v 1.1 2004/08/01 17:32:13 tgl Exp $
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-ROLLBACK-TO">
+ <refmeta>
+  <refentrytitle id="SQL-ROLLBACK-TO-TITLE">ROLLBACK TO</refentrytitle>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>ROLLBACK TO</refname>
+  <refpurpose>roll back to a savepoint</refpurpose>
+ </refnamediv>
+
+ <indexterm zone="sql-rollback-to">
+  <primary>ROLLBACK TO</primary>
+ </indexterm>
+
+ <indexterm zone="sql-rollback-to">
+  <primary>savepoints</primary>
+  <secondary>rolling back</secondary>
+ </indexterm>
+
+ <refsynopsisdiv>
+<synopsis>
+ROLLBACK TO <replaceable>savepoint_name</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   Roll back all commands that were executed after the savepoint was
+   established.  The savepoint remains valid and can be rolled back to
+   again later, if needed.
+  </para>
+
+  <para>
+   <command>ROLLBACK TO</> implicitly destroys all savepoints that
+   were established after the named savepoint.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable class="PARAMETER">savepoint_name</></term>
+    <listitem>
+     <para>
+      The savepoint to roll back to.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   Use <xref linkend="SQL-RELEASE" endterm="SQL-RELEASE-TITLE"> to
+   destroy a savepoint without discarding the effects of commands executed
+   after it was established.
+  </para>
+
+  <para>
+   Specifying a savepoint name that has not been established is an error.
+  </para>
+
+  <para>
+   Cursors have somewhat non-transactional behavior with respect to
+   savepoints.  Any cursor that is opened inside the savepoint is not closed
+   when the savepoint is rolled back.  If a cursor is affected by a
+   <command>FETCH</> command inside a savepoint that is later rolled
+   back, the cursor position remains at the position that <command>FETCH</>
+   left it pointing to (that is, <command>FETCH</> is not rolled back).
+   A cursor whose execution causes a transaction to abort is put in a
+   can't-execute state, so while the transaction can be restored using
+   <command>ROLLBACK TO</>, the cursor can no longer be used.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   To undo the effects of the commands executed after <literal>my_savepoint</literal>
+   was established:
+<programlisting>
+ROLLBACK TO my_savepoint;
+</programlisting>
+  </para>
+
+  <para>
+   Cursor positions are not affected by savepoint rollback:
+<programlisting>
+BEGIN;
+
+DECLARE foo CURSOR FOR SELECT 1 UNION SELECT 2;
+
+SAVEPOINT foo;
+
+FETCH 1 FROM foo;
+ ?column? 
+----------
+        1
+
+ROLLBACK TO foo;
+
+FETCH 1 FROM foo;
+ ?column? 
+----------
+        2
+
+COMMIT;
+</programlisting>
+   </para>
+
+
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   This command is fully SQL standard conforming.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-begin" endterm="sql-begin-title"></member>
+   <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
+   <member><xref linkend="sql-savepoint" endterm="sql-savepoint-title"></member>
+   <member><xref linkend="sql-release" endterm="sql-release-title"></member>
+   <member><xref linkend="sql-rollback" endterm="sql-rollback-title"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:nil
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:"../reference.ced"
+sgml-exposed-tags:nil
+sgml-local-catalogs:"/usr/lib/sgml/catalog"
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/src/sgml/ref/savepoint.sgml b/doc/src/sgml/ref/savepoint.sgml
new file mode 100644
index 0000000000000000000000000000000000000000..b881191c4365fa093d9378293c25ab1822b8ad7e
--- /dev/null
+++ b/doc/src/sgml/ref/savepoint.sgml
@@ -0,0 +1,152 @@
+<!--
+$PostgreSQL: pgsql/doc/src/sgml/ref/savepoint.sgml,v 1.1 2004/08/01 17:32:13 tgl Exp $
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-SAVEPOINT">
+ <refmeta>
+  <refentrytitle id="SQL-SAVEPOINT-TITLE">SAVEPOINT</refentrytitle>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>SAVEPOINT</refname>
+  <refpurpose>define a new savepoint within the current transaction</refpurpose>
+ </refnamediv>
+
+ <indexterm zone="sql-savepoint">
+  <primary>SAVEPOINT</primary>
+ </indexterm>
+
+ <indexterm zone="sql-savepoint">
+  <primary>savepoints</primary>
+  <secondary>defining</secondary>
+ </indexterm>
+
+ <refsynopsisdiv>
+<synopsis>
+SAVEPOINT <replaceable>savepoint_name</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+  
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>SAVEPOINT</command> establishes a new savepoint within
+   the current transaction.
+  </para>
+
+  <para>
+   A savepoint is a special mark inside a transaction that allows all commands
+   that are executed after it was established to be rolled back, restoring
+   the transaction state to what it was at the time of the savepoint.
+  </para>
+ </refsect1>
+  
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable>savepoint_name</replaceable></term>
+    <listitem>
+     <para>
+      The name to give to the new savepoint.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   Use <xref linkend="SQL-ROLLBACK-TO" endterm="SQL-ROLLBACK-TO-TITLE"> to
+   rollback to a savepoint.  Use <xref linkend="SQL-RELEASE"
+   endterm="SQL-RELEASE-TITLE"> to destroy a savepoint, keeping
+   the effects of commands executed after it was established.
+  </para>
+
+  <para>
+   Savepoints can only be established when inside a transaction block.
+   There can be multiple savepoints defined within a transaction.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   To establish a savepoint and later undo the effects of all commands executed
+   after it was established:
+<programlisting>
+BEGIN;
+    INSERT INTO table VALUES (1);
+    SAVEPOINT my_savepoint;
+    INSERT INTO table VALUES (2);
+    ROLLBACK TO my_savepoint;
+    INSERT INTO table VALUES (3);
+COMMIT;
+</programlisting>
+   The above transaction will insert the values 1 and 3, but not 2.
+  </para>
+
+  <para>
+   To establish and later destroy a savepoint:
+<programlisting>
+BEGIN;
+    INSERT INTO table VALUES (3);
+    SAVEPOINT my_savepoint;
+    INSERT INTO table VALUES (4);
+    RELEASE my_savepoint;
+COMMIT;
+</programlisting>
+   The above transaction will insert both 3 and 4.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+  
+  <para>
+   SQL requires a savepoint to be destroyed automatically when another
+   savepoint with the same name is established.  In
+   <productname>PostgreSQL</>, the old savepoint is kept, though only the more
+   recent one will be used when rolling back or releasing.  (Releasing the
+   newer savepoint will cause the older one to again become accessible to
+   <command>ROLLBACK TO</> and <command>RELEASE</>.)
+   Other than that, <command>SAVEPOINT</command> is fully SQL conforming.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-begin" endterm="sql-begin-title"></member>
+   <member><xref linkend="sql-rollback" endterm="sql-rollback-title"></member>
+   <member><xref linkend="sql-rollback-to" endterm="sql-rollback-to-title"></member>
+   <member><xref linkend="sql-release" endterm="sql-release-title"></member>
+   <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:nil
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:"../reference.ced"
+sgml-exposed-tags:nil
+sgml-local-catalogs:"/usr/lib/sgml/catalog"
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/src/sgml/ref/start_transaction.sgml b/doc/src/sgml/ref/start_transaction.sgml
index 5cecbf755650b35819e286e24907ed7995741f46..1a7bc363ce7fc2ced03323dcc2dd6bb26a555f9f 100644
--- a/doc/src/sgml/ref/start_transaction.sgml
+++ b/doc/src/sgml/ref/start_transaction.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/start_transaction.sgml,v 1.11 2004/01/11 05:46:58 neilc Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/start_transaction.sgml,v 1.12 2004/08/01 17:32:13 tgl Exp $
 PostgreSQL documentation
 -->
 
@@ -66,6 +66,7 @@ START TRANSACTION
    <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
    <member><xref linkend="sql-rollback" endterm="sql-rollback-title"></member>
    <member><xref linkend="sql-set-transaction" endterm="sql-set-transaction-title"></member>
+   <member><xref linkend="sql-savepoint" endterm="sql-savepoint-title"></member>
   </simplelist>
  </refsect1>
 </refentry>
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml
index 5230e07119e98d0c4eb0c6e65df90298ed705ff2..23164a57c707b3450cbc5d375bb5c23ac01b1526 100644
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -1,5 +1,5 @@
 <!-- reference.sgml
-$PostgreSQL: pgsql/doc/src/sgml/reference.sgml,v 1.50 2004/06/25 21:55:51 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/reference.sgml,v 1.51 2004/08/01 17:32:11 tgl Exp $
 
 PostgreSQL Reference Manual
 -->
@@ -120,9 +120,12 @@ PostgreSQL Reference Manual
    &notify;
    &prepare;
    &reindex;
+   &releaseSavepoint;
    &reset;
    &revoke;
    &rollback;
+   &rollbackTo;
+   &savepoint;
    &select;
    &selectInto;
    &set;
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index 9d3b0b323aab9d4e3dad7ccab0fb64fa568a9c0a..81b60c9fda6cbb471750fd5b8ee85129c4635788 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2000-2003, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.56 2004/07/01 00:49:42 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.57 2004/08/01 17:32:13 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -68,10 +68,10 @@ GetNewTransactionId(bool isSubXact)
 	TransactionIdAdvance(ShmemVariableCache->nextXid);
 
 	/*
-	 * Must set MyProc->xid before releasing XidGenLock.  This ensures
-	 * that when GetSnapshotData calls ReadNewTransactionId, all active
-	 * XIDs before the returned value of nextXid are already present in
-	 * the shared PGPROC array.  Else we have a race condition.
+	 * We must store the new XID into the shared PGPROC array before releasing
+	 * XidGenLock.  This ensures that when GetSnapshotData calls
+	 * ReadNewTransactionId, all active XIDs before the returned value of
+	 * nextXid are already present in PGPROC.  Else we have a race condition.
 	 *
 	 * XXX by storing xid into MyProc without acquiring SInvalLock, we are
 	 * relying on fetch/store of an xid to be atomic, else other backends
@@ -82,16 +82,41 @@ GetNewTransactionId(bool isSubXact)
 	 * the value only once, rather than assume they can read it multiple
 	 * times and get the same answer each time.
 	 *
+	 * The same comments apply to the subxact xid count and overflow fields.
+	 *
 	 * A solution to the atomic-store problem would be to give each PGPROC
-	 * its own spinlock used only for fetching/storing that PGPROC's xid.
-	 * (SInvalLock would then mean primarily that PGPROCs couldn't be added/
-	 * removed while holding the lock.)
+	 * its own spinlock used only for fetching/storing that PGPROC's xid
+	 * and related fields.  (SInvalLock would then mean primarily that
+	 * PGPROCs couldn't be added/removed while holding the lock.)
 	 *
-	 * We don't want a subtransaction to update the stored Xid; we'll check
-	 * if a transaction Xid is a running subxact by checking pg_subtrans.
+	 * If there's no room to fit a subtransaction XID into PGPROC, set the
+	 * cache-overflowed flag instead.  This forces readers to look in
+	 * pg_subtrans to map subtransaction XIDs up to top-level XIDs.
+	 * There is a race-condition window, in that the new XID will not
+	 * appear as running until its parent link has been placed into
+	 * pg_subtrans.  However, that will happen before anyone could possibly
+	 * have a reason to inquire about the status of the XID, so it seems
+	 * OK.  (Snapshots taken during this window *will* include the parent
+	 * XID, so they will deliver the correct answer later on when someone
+	 * does have a reason to inquire.)
 	 */
-	if (MyProc != NULL && !isSubXact)
-		MyProc->xid = xid;
+	if (MyProc != NULL)
+	{
+		if (!isSubXact)
+			MyProc->xid = xid;
+		else
+		{
+			if (MyProc->subxids.nxids < PGPROC_MAX_CACHED_SUBXIDS)
+			{
+				MyProc->subxids.xids[MyProc->subxids.nxids] = xid;
+				MyProc->subxids.nxids++;
+			}
+			else
+			{
+				MyProc->subxids.overflowed = true;
+			}
+		}
+	}
 
 	LWLockRelease(XidGenLock);
 
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index b6758a14b227a1376ee805b4b2a46f44b67bdab5..486f85be5d9395a5e33b04fa497122e8813eac41 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.174 2004/07/31 07:39:18 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.175 2004/08/01 17:32:13 tgl Exp $
  *
  * NOTES
  *		Transaction aborts can now occur two ways:
@@ -168,7 +168,6 @@
 #include "pgstat.h"
 
 
-
 /*
  *	transaction states - transaction state from server perspective
  */
@@ -230,6 +229,14 @@ typedef struct TransactionStateData
 
 typedef TransactionStateData *TransactionState;
 
+/*
+ * childXids is currently implemented as an integer List, relying on the
+ * assumption that TransactionIds are no wider than int.  We use these
+ * macros to provide some isolation in case that changes in the future.
+ */
+#define lfirst_xid(lc)				((TransactionId) lfirst_int(lc))
+#define lappend_xid(list, datum)	lappend_int(list, (int) (datum))
+
 
 static void AbortTransaction(void);
 static void AtAbort_Memory(void);
@@ -239,7 +246,7 @@ static void AtCommit_Memory(void);
 static void AtStart_Cache(void);
 static void AtStart_Memory(void);
 static void AtStart_ResourceOwner(void);
-static void CallEOXactCallbacks(bool isCommit);
+static void CallXactCallbacks(XactEvent event, TransactionId parentXid);
 static void CleanupTransaction(void);
 static void CommitTransaction(void);
 static void RecordTransactionAbort(void);
@@ -315,16 +322,16 @@ int			CommitSiblings = 5; /* number of concurrent xacts needed to
 
 
 /*
- * List of add-on end-of-xact callbacks
+ * List of add-on start- and end-of-xact callbacks
  */
-typedef struct EOXactCallbackItem
+typedef struct XactCallbackItem
 {
-	struct EOXactCallbackItem *next;
-	EOXactCallback callback;
+	struct XactCallbackItem *next;
+	XactCallback callback;
 	void	   *arg;
-} EOXactCallbackItem;
+} XactCallbackItem;
 
-static EOXactCallbackItem *EOXact_callbacks = NULL;
+static XactCallbackItem *Xact_callbacks = NULL;
 
 static void (*_RollbackFunc) (void *) = NULL;
 static void *_RollbackData = NULL;
@@ -490,7 +497,7 @@ TransactionIdIsCurrentTransactionId(TransactionId xid)
 			return true;
 		foreach(cell, s->childXids)
 		{
-			if (TransactionIdEquals(xid, lfirst_int(cell)))
+			if (TransactionIdEquals(xid, lfirst_xid(cell)))
 				return true;
 		}
 
@@ -877,12 +884,12 @@ AtSubCommit_childXids(void)
 
 	old_cxt = MemoryContextSwitchTo(s->parent->curTransactionContext);
 
+	s->parent->childXids = lappend_xid(s->parent->childXids,
+									   s->transactionIdData);
+
 	s->parent->childXids = list_concat(s->parent->childXids, s->childXids);
 	s->childXids = NIL;			/* ensure list not doubly referenced */
 
-	s->parent->childXids = lappend_int(s->parent->childXids,
-									   s->transactionIdData);
-
 	MemoryContextSwitchTo(old_cxt);
 }
 
@@ -1083,6 +1090,7 @@ RecordSubTransactionAbort(void)
 {
 	int			nrels;
 	RelFileNode *rptr;
+	TransactionId	xid = GetCurrentTransactionId();
 	int 			nchildren;
 	TransactionId  *children;
 
@@ -1104,8 +1112,6 @@ RecordSubTransactionAbort(void)
 	 */
 	if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0)
 	{
-		TransactionId	xid = GetCurrentTransactionId();
-
 		START_CRIT_SECTION();
 
 		/*
@@ -1162,6 +1168,15 @@ RecordSubTransactionAbort(void)
 		END_CRIT_SECTION();
 	}
 
+	/*
+	 * We can immediately remove failed XIDs from PGPROC's cache of
+	 * running child XIDs. It's easiest to do it here while we have the
+	 * child XID array at hand, even though in the main-transaction
+	 * case the equivalent work happens just after return from
+	 * RecordTransactionAbort.
+	 */
+	XidCacheRemoveRunningXids(xid, nchildren, children);
+
 	/* And clean up local data */
 	if (rptr)
 		pfree(rptr);
@@ -1389,6 +1404,11 @@ CommitTransaction(void)
 		LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
 		MyProc->xid = InvalidTransactionId;
 		MyProc->xmin = InvalidTransactionId;
+
+		/* Clear the subtransaction-XID cache too while holding the lock */
+		MyProc->subxids.nxids = 0;
+		MyProc->subxids.overflowed = false;
+
 		LWLockRelease(SInvalLock);
 	}
 
@@ -1411,6 +1431,8 @@ CommitTransaction(void)
 	smgrDoPendingDeletes(true);
 	/* smgrcommit already done */
 
+	CallXactCallbacks(XACT_EVENT_COMMIT, InvalidTransactionId);
+
 	ResourceOwnerRelease(TopTransactionResourceOwner,
 						 RESOURCE_RELEASE_BEFORE_LOCKS,
 						 true, true);
@@ -1431,7 +1453,6 @@ CommitTransaction(void)
 						 RESOURCE_RELEASE_AFTER_LOCKS,
 						 true, true);
 
-	CallEOXactCallbacks(true);
 	AtEOXact_GUC(true, false);
 	AtEOXact_SPI(true);
 	AtEOXact_on_commit_actions(true, s->transactionIdData);
@@ -1540,6 +1561,11 @@ AbortTransaction(void)
 		LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
 		MyProc->xid = InvalidTransactionId;
 		MyProc->xmin = InvalidTransactionId;
+
+		/* Clear the subtransaction-XID cache too while holding the lock */
+		MyProc->subxids.nxids = 0;
+		MyProc->subxids.overflowed = false;
+
 		LWLockRelease(SInvalLock);
 	}
 
@@ -1551,6 +1577,8 @@ AbortTransaction(void)
 	smgrDoPendingDeletes(false);
 	smgrabort();
 
+	CallXactCallbacks(XACT_EVENT_ABORT, InvalidTransactionId);
+
 	ResourceOwnerRelease(TopTransactionResourceOwner,
 						 RESOURCE_RELEASE_BEFORE_LOCKS,
 						 false, true);
@@ -1562,13 +1590,11 @@ AbortTransaction(void)
 						 RESOURCE_RELEASE_AFTER_LOCKS,
 						 false, true);
 
-	CallEOXactCallbacks(false);
 	AtEOXact_GUC(false, false);
 	AtEOXact_SPI(false);
 	AtEOXact_on_commit_actions(false, s->transactionIdData);
 	AtEOXact_Namespace(false);
 	AtEOXact_Files();
-	SetReindexProcessing(InvalidOid, InvalidOid);
 	pgstat_count_xact_rollback();
 
 	/*
@@ -2158,43 +2184,46 @@ IsInTransactionChain(void *stmtNode)
 
 
 /*
- * Register or deregister callback functions for end-of-xact cleanup
+ * Register or deregister callback functions for start- and end-of-xact
+ * operations.
  *
  * These functions are intended for use by dynamically loaded modules.
  * For built-in modules we generally just hardwire the appropriate calls
  * (mainly because it's easier to control the order that way, where needed).
  *
- * Note that the callback occurs post-commit or post-abort, so the callback
- * functions can only do noncritical cleanup.
+ * At transaction end, the callback occurs post-commit or post-abort, so the
+ * callback functions can only do noncritical cleanup.  At subtransaction
+ * start, the callback is called when the subtransaction has finished 
+ * initializing.
  */
 void
-RegisterEOXactCallback(EOXactCallback callback, void *arg)
+RegisterXactCallback(XactCallback callback, void *arg)
 {
-	EOXactCallbackItem *item;
+	XactCallbackItem *item;
 
-	item = (EOXactCallbackItem *)
-		MemoryContextAlloc(TopMemoryContext, sizeof(EOXactCallbackItem));
+	item = (XactCallbackItem *)
+		MemoryContextAlloc(TopMemoryContext, sizeof(XactCallbackItem));
 	item->callback = callback;
 	item->arg = arg;
-	item->next = EOXact_callbacks;
-	EOXact_callbacks = item;
+	item->next = Xact_callbacks;
+	Xact_callbacks = item;
 }
 
 void
-UnregisterEOXactCallback(EOXactCallback callback, void *arg)
+UnregisterXactCallback(XactCallback callback, void *arg)
 {
-	EOXactCallbackItem *item;
-	EOXactCallbackItem *prev;
+	XactCallbackItem *item;
+	XactCallbackItem *prev;
 
 	prev = NULL;
-	for (item = EOXact_callbacks; item; prev = item, item = item->next)
+	for (item = Xact_callbacks; item; prev = item, item = item->next)
 	{
 		if (item->callback == callback && item->arg == arg)
 		{
 			if (prev)
 				prev->next = item->next;
 			else
-				EOXact_callbacks = item->next;
+				Xact_callbacks = item->next;
 			pfree(item);
 			break;
 		}
@@ -2202,13 +2231,13 @@ UnregisterEOXactCallback(EOXactCallback callback, void *arg)
 }
 
 static void
-CallEOXactCallbacks(bool isCommit)
+CallXactCallbacks(XactEvent event, TransactionId parentXid)
 {
-	EOXactCallbackItem *item;
+	XactCallbackItem *item;
 
-	for (item = EOXact_callbacks; item; item = item->next)
+	for (item = Xact_callbacks; item; item = item->next)
 	{
-		(*item->callback) (isCommit, item->arg);
+		(*item->callback) (event, parentXid, item->arg);
 	}
 }
 
@@ -2948,32 +2977,11 @@ bool
 IsSubTransaction(void)
 {
 	TransactionState s = CurrentTransactionState;
-	
-	switch (s->blockState)
-	{
-		case TBLOCK_DEFAULT:
-		case TBLOCK_STARTED:
-		case TBLOCK_BEGIN:
-		case TBLOCK_INPROGRESS:
-		case TBLOCK_END:
-		case TBLOCK_ABORT:
-		case TBLOCK_ENDABORT:
-			return false;
-		case TBLOCK_SUBBEGIN:
-		case TBLOCK_SUBINPROGRESS:
-		case TBLOCK_SUBABORT:
-		case TBLOCK_SUBEND:
-		case TBLOCK_SUBENDABORT_ALL:
-		case TBLOCK_SUBENDABORT:
-		case TBLOCK_SUBABORT_PENDING:
-		case TBLOCK_SUBENDABORT_RELEASE:
-			return true;
-	}
 
-	/* should never get here */
-	elog(FATAL, "invalid transaction block state: %s",
-		 BlockStateAsString(s->blockState));
-	return false;				/* keep compiler quiet */
+	if (s->nestingLevel >= 2)
+		return true;
+
+	return false;
 }
 
 /*
@@ -2997,7 +3005,10 @@ StartSubTransaction(void)
 	AtSubStart_ResourceOwner();
 
 	/*
-	 * Generate a new Xid and record it in pg_subtrans.
+	 * Generate a new Xid and record it in pg_subtrans.  NB: we must make
+	 * the subtrans entry BEFORE the Xid appears anywhere in shared storage,
+	 * such as in the lock table; because until it's made the Xid may not
+	 * appear to be "running" to other backends. See GetNewTransactionId.
 	 */
 	s->transactionIdData = GetNewTransactionId(true);
 
@@ -3020,6 +3031,11 @@ StartSubTransaction(void)
 
 	s->state = TRANS_INPROGRESS;
 
+	/*
+	 * Call start-of-subxact callbacks 
+	 */
+	CallXactCallbacks(XACT_EVENT_START_SUB, s->parent->transactionIdData);
+
 	ShowTransactionState("StartSubTransaction");
 }
 
@@ -3037,10 +3053,7 @@ CommitSubTransaction(void)
 		elog(WARNING, "CommitSubTransaction while in %s state",
 			 TransStateAsString(s->state));
 
-	/* Pre-commit processing */
-	AtSubCommit_Portals(s->parent->transactionIdData,
-						s->parent->curTransactionOwner);
-	DeferredTriggerEndSubXact(true);
+	/* Pre-commit processing goes here -- nothing to do at the moment */
 
 	s->state = TRANS_COMMIT;
 
@@ -3050,19 +3063,17 @@ CommitSubTransaction(void)
 	AtSubCommit_childXids();
 
 	/* Post-commit cleanup */
-	AtSubCommit_smgr();
-
-	AtEOSubXact_Inval(true);
-	AtEOSubXact_SPI(true, s->transactionIdData);
-
+	DeferredTriggerEndSubXact(true);
+	AtSubCommit_Portals(s->parent->transactionIdData,
+						s->parent->curTransactionOwner);
 	AtEOSubXact_LargeObject(true, s->transactionIdData,
 							s->parent->transactionIdData);
+	AtSubCommit_Notify();
 	AtEOSubXact_UpdatePasswordFile(true, s->transactionIdData,
 								   s->parent->transactionIdData);
-	AtEOSubXact_Files(true, s->transactionIdData,
-					  s->parent->transactionIdData);
-	AtEOSubXact_Namespace(true, s->transactionIdData,
-						  s->parent->transactionIdData);
+	AtSubCommit_smgr();
+
+	CallXactCallbacks(XACT_EVENT_COMMIT_SUB, s->parent->transactionIdData);
 
 	/*
 	 * Note that we just release the resource owner's resources and don't
@@ -3074,15 +3085,20 @@ CommitSubTransaction(void)
 	ResourceOwnerRelease(s->curTransactionOwner,
 						 RESOURCE_RELEASE_BEFORE_LOCKS,
 						 true, false);
+	AtEOSubXact_Inval(true);
 	/* we can skip the LOCKS phase */
 	ResourceOwnerRelease(s->curTransactionOwner,
 						 RESOURCE_RELEASE_AFTER_LOCKS,
 						 true, false);
 
-	AtSubCommit_Notify();
 	AtEOXact_GUC(true, true);
+	AtEOSubXact_SPI(true, s->transactionIdData);
 	AtEOSubXact_on_commit_actions(true, s->transactionIdData,
 								  s->parent->transactionIdData);
+	AtEOSubXact_Namespace(true, s->transactionIdData,
+						  s->parent->transactionIdData);
+	AtEOSubXact_Files(true, s->transactionIdData,
+					  s->parent->transactionIdData);
 
 	/*
 	 * We need to restore the upper transaction's read-only state,
@@ -3134,35 +3150,32 @@ AbortSubTransaction(void)
 
 	LockWaitCancel();
 
-	AtSubAbort_Memory();
-
 	/*
 	 * do abort processing
 	 */
-
-	RecordSubTransactionAbort();
-
-	/* Post-abort cleanup */
-	AtSubAbort_smgr();
+	AtSubAbort_Memory();
 
 	DeferredTriggerEndSubXact(false);
-	AtEOSubXact_SPI(false, s->transactionIdData);
 	AtSubAbort_Portals(s->parent->transactionIdData,
 					   s->parent->curTransactionOwner);
-	AtEOSubXact_Inval(false);
-
 	AtEOSubXact_LargeObject(false, s->transactionIdData,
 							s->parent->transactionIdData);
+	AtSubAbort_Notify();
 	AtEOSubXact_UpdatePasswordFile(false, s->transactionIdData,
 								   s->parent->transactionIdData);
-	AtEOSubXact_Files(false, s->transactionIdData,
-					  s->parent->transactionIdData);
-	AtEOSubXact_Namespace(false, s->transactionIdData,
-						  s->parent->transactionIdData);
+
+	/* Advertise the fact that we aborted in pg_clog. */
+	RecordSubTransactionAbort();
+
+	/* Post-abort cleanup */
+	AtSubAbort_smgr();
+
+	CallXactCallbacks(XACT_EVENT_ABORT_SUB, s->parent->transactionIdData);
 
 	ResourceOwnerRelease(s->curTransactionOwner,
 						 RESOURCE_RELEASE_BEFORE_LOCKS,
 						 false, false);
+	AtEOSubXact_Inval(false);
 	ResourceOwnerRelease(s->curTransactionOwner,
 						 RESOURCE_RELEASE_LOCKS,
 						 false, false);
@@ -3170,10 +3183,14 @@ AbortSubTransaction(void)
 						 RESOURCE_RELEASE_AFTER_LOCKS,
 						 false, false);
 
-	AtSubAbort_Notify();
 	AtEOXact_GUC(false, true);
+	AtEOSubXact_SPI(false, s->transactionIdData);
 	AtEOSubXact_on_commit_actions(false, s->transactionIdData,
 								  s->parent->transactionIdData);
+	AtEOSubXact_Namespace(false, s->transactionIdData,
+						  s->parent->transactionIdData);
+	AtEOSubXact_Files(false, s->transactionIdData,
+					  s->parent->transactionIdData);
 
 	/*
 	 * Reset user id which might have been changed transiently.  Here we
@@ -3196,8 +3213,6 @@ AbortSubTransaction(void)
 	 */
 	XactReadOnly = s->prevXactReadOnly;
 
-	CommandCounterIncrement();
-
 	RESUME_INTERRUPTS();
 }
 
@@ -3481,7 +3496,7 @@ xactGetCommittedChildren(TransactionId **ptr)
 
 	foreach(p, s->childXids)
 	{
-		TransactionId child = lfirst_int(p);
+		TransactionId child = lfirst_xid(p);
 
 		*children++ = child;
 	}
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 581799fc5f555f561b77066903302dff8cc3bfff..9cb3c56110faa8cec0d6609b660f22256bd4bd6a 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.234 2004/06/18 06:13:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.235 2004/08/01 17:32:14 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1646,7 +1646,6 @@ reindex_index(Oid indexId)
 {
 	Relation	iRel,
 				heapRelation;
-	IndexInfo  *indexInfo;
 	Oid			heapId;
 	bool		inplace;
 
@@ -1671,8 +1670,6 @@ reindex_index(Oid indexId)
 	/* Open and lock the parent heap relation */
 	heapRelation = heap_open(heapId, AccessExclusiveLock);
 
-	SetReindexProcessing(heapId, indexId);
-
 	/*
 	 * If it's a shared index, we must do inplace processing (because we
 	 * have no way to update relfilenode in other databases).  Otherwise
@@ -1690,36 +1687,51 @@ reindex_index(Oid indexId)
 				 errmsg("shared index \"%s\" can only be reindexed in stand-alone mode",
 						RelationGetRelationName(iRel))));
 
-	/* Fetch info needed for index_build */
-	indexInfo = BuildIndexInfo(iRel);
-
-	if (inplace)
+	PG_TRY();
 	{
+		IndexInfo  *indexInfo;
+
+		/* Suppress use of the target index while rebuilding it */
+		SetReindexProcessing(heapId, indexId);
+
+		/* Fetch info needed for index_build */
+		indexInfo = BuildIndexInfo(iRel);
+
+		if (inplace)
+		{
+			/*
+			 * Release any buffers associated with this index.	If they're
+			 * dirty, they're just dropped without bothering to flush to disk.
+			 */
+			DropRelationBuffers(iRel);
+
+			/* Now truncate the actual data */
+			RelationTruncate(iRel, 0);
+		}
+		else
+		{
+			/*
+			 * We'll build a new physical relation for the index.
+			 */
+			setNewRelfilenode(iRel);
+		}
+
+		/* Initialize the index and rebuild */
+		index_build(heapRelation, iRel, indexInfo);
+
 		/*
-		 * Release any buffers associated with this index.	If they're
-		 * dirty, they're just dropped without bothering to flush to disk.
+		 * index_build will close both the heap and index relations (but not
+		 * give up the locks we hold on them).	So we're done.
 		 */
-		DropRelationBuffers(iRel);
-
-		/* Now truncate the actual data */
-		RelationTruncate(iRel, 0);
 	}
-	else
+	PG_CATCH();
 	{
-		/*
-		 * We'll build a new physical relation for the index.
-		 */
-		setNewRelfilenode(iRel);
+		/* Make sure flag gets cleared on error exit */
+		ResetReindexProcessing();
+		PG_RE_THROW();
 	}
-
-	/* Initialize the index and rebuild */
-	index_build(heapRelation, iRel, indexInfo);
-
-	/*
-	 * index_build will close both the heap and index relations (but not
-	 * give up the locks we hold on them).	So we're done.
-	 */
-	SetReindexProcessing(InvalidOid, InvalidOid);
+	PG_END_TRY();
+	ResetReindexProcessing();
 }
 
 /*
diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c
index f5e909c672c5eb63446f228bbe20b899ea7ed3bd..57e39da4a4d7c0ebd218a91c1269cb3a5fc3f3e8 100644
--- a/src/backend/storage/ipc/sinval.c
+++ b/src/backend/storage/ipc/sinval.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/ipc/sinval.c,v 1.66 2004/07/01 03:13:05 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/ipc/sinval.c,v 1.67 2004/08/01 17:32:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,6 +28,30 @@
 #include "miscadmin.h"
 
 
+#ifdef XIDCACHE_DEBUG
+
+/* counters for XidCache measurement */
+static long xc_by_recent_xmin = 0;
+static long xc_by_main_xid = 0;
+static long xc_by_child_xid = 0;
+static long xc_slow_answer = 0;
+
+#define xc_by_recent_xmin_inc()		(xc_by_recent_xmin++)
+#define xc_by_main_xid_inc()		(xc_by_main_xid++)
+#define xc_by_child_xid_inc()		(xc_by_child_xid++)
+#define xc_slow_answer_inc()		(xc_slow_answer++)
+
+static void DisplayXidCache(int code, Datum arg);
+
+#else /* !XIDCACHE_DEBUG */
+
+#define xc_by_recent_xmin_inc()		((void) 0)
+#define xc_by_main_xid_inc()		((void) 0)
+#define xc_by_child_xid_inc()		((void) 0)
+#define xc_slow_answer_inc()		((void) 0)
+
+#endif /* XIDCACHE_DEBUG */
+
 /*
  * Because backends sitting idle will not be reading sinval events, we
  * need a way to give an idle backend a swift kick in the rear and make
@@ -80,6 +104,10 @@ InitBackendSharedInvalidationState(void)
 		ereport(FATAL,
 				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
 				 errmsg("sorry, too many clients already")));
+
+#ifdef XIDCACHE_DEBUG
+	on_proc_exit(DisplayXidCache, (Datum) 0);
+#endif /* XIDCACHE_DEBUG */
 }
 
 /*
@@ -393,7 +421,6 @@ ProcessCatchupEvent(void)
  * to the doomed database, so additional interlocking is needed during
  * backend startup.
  */
-
 bool
 DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself)
 {
@@ -429,7 +456,41 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself)
 }
 
 /*
- * TransactionIdIsInProgress -- is given transaction running by some backend
+ * IsBackendPid -- is a given pid a running backend
+ */
+bool
+IsBackendPid(int pid)
+{
+	bool		result = false;
+	SISeg	   *segP = shmInvalBuffer;
+	ProcState  *stateP = segP->procState;
+	int			index;
+
+	LWLockAcquire(SInvalLock, LW_SHARED);
+
+	for (index = 0; index < segP->lastBackend; index++)
+	{
+		SHMEM_OFFSET pOffset = stateP[index].procStruct;
+
+		if (pOffset != INVALID_OFFSET)
+		{
+			PGPROC	   *proc = (PGPROC *) MAKE_PTR(pOffset);
+
+			if (proc->pid == pid)
+			{
+				result = true;
+				break;
+			}
+		}
+	}
+
+	LWLockRelease(SInvalLock);
+
+	return result;
+}
+
+/*
+ * TransactionIdIsInProgress -- is given transaction running in some backend
  *
  * There are three possibilities for finding a running transaction:
  *
@@ -439,13 +500,15 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself)
  * 2. the given Xid is one of the cached subxact Xids in the PGPROC array.
  * We can find this out cheaply too.
  *
- * 3. Search the SubTrans tree.  This is the slowest, but sadly it has to be
- * done always if the other two failed.
+ * 3. Search the SubTrans tree to find the Xid's topmost parent, and then
+ * see if that is running according to PGPROC.  This is the slowest, but
+ * sadly it has to be done always if the other two failed, unless we see
+ * that the cached subxact sets are complete (none have overflowed).
  *
- * SInvalLock has to be held while we do 1 and 2.  If we save all the Xids
+ * SInvalLock has to be held while we do 1 and 2.  If we save the top Xids
  * while doing 1, we can release the SInvalLock while we do 3.  This buys back
- * some concurrency (we can't retrieve the main Xids from PGPROC again anyway,
- * see GetNewTransactionId)
+ * some concurrency (we can't retrieve the main Xids from PGPROC again anyway;
+ * see GetNewTransactionId).
  */
 bool
 TransactionIdIsInProgress(TransactionId xid)
@@ -453,13 +516,27 @@ TransactionIdIsInProgress(TransactionId xid)
 	bool			result = false;
 	SISeg		   *segP = shmInvalBuffer;
 	ProcState	   *stateP = segP->procState;
-	int				i;
+	int				i,
+					j;
 	int				nxids = 0;
 	TransactionId  *xids;
+	TransactionId	topxid;
+	bool			locked;
+
+	/*
+	 * Don't bother checking a very old transaction.
+	 */
+	if (TransactionIdPrecedes(xid, RecentGlobalXmin))
+	{
+		xc_by_recent_xmin_inc();
+		return false;
+	}
 
-	xids = (TransactionId *)palloc(sizeof(TransactionId) * segP->maxBackends);
+	/* Get workspace to remember main XIDs in */
+	xids = (TransactionId *) palloc(sizeof(TransactionId) * segP->maxBackends);
 
 	LWLockAcquire(SInvalLock, LW_SHARED);
+	locked = true;
 
 	for (i = 0; i < segP->lastBackend; i++)
 	{
@@ -472,101 +549,90 @@ TransactionIdIsInProgress(TransactionId xid)
 			/* Fetch xid just once - see GetNewTransactionId */
 			TransactionId pxid = proc->xid;
 
+			if (!TransactionIdIsValid(pxid))
+				continue;
+
 			/*
-			 * check the main Xid (step 1 above)
+			 * Step 1: check the main Xid
 			 */
 			if (TransactionIdEquals(pxid, xid))
 			{
+				xc_by_main_xid_inc();
 				result = true;
-				break;
+				goto result_known;
 			}
 
 			/*
-			 * save the main Xid for step 3.
+			 * We can ignore main Xids that are younger than the target Xid,
+			 * since the target could not possibly be their child.
 			 */
-			xids[nxids++] = pxid;
-
-#ifdef NOT_USED
-			FIXME -- waiting to save the Xids in PGPROC ...
+			if (TransactionIdPrecedes(xid, pxid))
+				continue;
 
 			/*
-			 * check the saved Xids array (step 2)
+			 * Step 2: check the cached child-Xids arrays
 			 */
-			for (j = 0; j < PGPROC_MAX_SAVED_XIDS; j++)
+			for (j = proc->subxids.nxids - 1; j >= 0; j--)
 			{
-				pxid = proc->savedxids[j];
-
-				if (!TransactionIdIsValid(pxids))
-					break;
+				/* Fetch xid just once - see GetNewTransactionId */
+				TransactionId cxid = proc->subxids.xids[j];
 
-				if (TransactionIdEquals(pxid, xid))
+				if (TransactionIdEquals(cxid, xid))
 				{
+					xc_by_child_xid_inc();
 					result = true;
-					break;
+					goto result_known;
 				}
 			}
-#endif
 
-			if (result)
-				break;
+			/*
+			 * Save the main Xid for step 3.  We only need to remember main
+			 * Xids that have uncached children.  (Note: there is no race
+			 * condition here because the overflowed flag cannot be cleared,
+			 * only set, while we hold SInvalLock.  So we can't miss an Xid
+			 * that we need to worry about.)
+			 */
+			if (proc->subxids.overflowed)
+				xids[nxids++] = pxid;
 		}
 	}
 
 	LWLockRelease(SInvalLock);
+	locked = false;
 
 	/*
-	 * Step 3: have to check pg_subtrans.  Use the saved Xids.
-	 *
-	 * XXX Could save the cached Xids too for further improvement.
+	 * If none of the relevant caches overflowed, we know the Xid is
+	 * not running without looking at pg_subtrans.
 	 */
-	if (!result)
-	{
-		/* this is a potentially expensive call. */
-		xid = SubTransGetTopmostTransaction(xid);
-		
-		Assert(TransactionIdIsValid(xid));
-
-		/*
-		 * We don't care if it aborted, because if it did, we won't find
-		 * it in the array.
-		 */
-		for (i = 0; i < nxids; i++)
-		{
-			if (TransactionIdEquals(xids[i], xid))
-			{
-				result = true;
-				break;
-			}
-		}
-	}
-
-	pfree(xids);
+	if (nxids == 0)
+		goto result_known;
 
-	return result;
-}
-
-/*
- * IsBackendPid -- is a given pid a running backend
- */
-bool
-IsBackendPid(int pid)
-{
-	bool		result = false;
-	SISeg	   *segP = shmInvalBuffer;
-	ProcState  *stateP = segP->procState;
-	int			index;
+	/*
+	 * Step 3: have to check pg_subtrans.
+	 *
+	 * At this point, we know it's either a subtransaction of one of the
+	 * Xids in xids[], or it's not running.  If it's an already-failed
+	 * subtransaction, we want to say "not running" even though its parent may
+	 * still be running.  So first, check pg_clog to see if it's been aborted.
+	 */
+	xc_slow_answer_inc();
 
-	LWLockAcquire(SInvalLock, LW_SHARED);
+	if (TransactionIdDidAbort(xid))
+		goto result_known;
 
-	for (index = 0; index < segP->lastBackend; index++)
+	/*
+	 * It isn't aborted, so check whether the transaction tree it
+	 * belongs to is still running (or, more precisely, whether it
+	 * was running when this routine started -- note that we already
+	 * released SInvalLock).
+	 */
+	topxid = SubTransGetTopmostTransaction(xid);
+	Assert(TransactionIdIsValid(topxid));
+	if (!TransactionIdEquals(topxid, xid))
 	{
-		SHMEM_OFFSET pOffset = stateP[index].procStruct;
-
-		if (pOffset != INVALID_OFFSET)
+		for (i = 0; i < nxids; i++)
 		{
-			PGPROC	   *proc = (PGPROC *) MAKE_PTR(pOffset);
-
-			if (proc->pid == pid)
+			if (TransactionIdEquals(xids[i], topxid))
 			{
 				result = true;
 				break;
@@ -574,7 +640,11 @@ IsBackendPid(int pid)
 		}
 	}
 
-	LWLockRelease(SInvalLock);
+result_known:
+	if (locked)
+		LWLockRelease(SInvalLock);
+
+	pfree(xids);
 
 	return result;
 }
@@ -928,3 +998,85 @@ CountEmptyBackendSlots(void)
 
 	return count;
 }
+
+#define XidCacheRemove(i) \
+	do { \
+		MyProc->subxids.xids[i] = MyProc->subxids.xids[MyProc->subxids.nxids - 1]; \
+		MyProc->subxids.nxids--; \
+	} while (0)
+
+/*
+ * XidCacheRemoveRunningXids
+ *
+ * Remove a bunch of TransactionIds from the list of known-running
+ * subtransactions for my backend.  Both the specified xid and those in
+ * the xids[] array (of length nxids) are removed from the subxids cache.
+ */
+void
+XidCacheRemoveRunningXids(TransactionId xid, int nxids, TransactionId *xids)
+{
+	int		i, j;
+
+	Assert(!TransactionIdEquals(xid, InvalidTransactionId));
+
+	/*
+	 * We must hold SInvalLock exclusively in order to remove transactions
+	 * from the PGPROC array.  (See notes in GetSnapshotData.)  It's
+	 * possible this could be relaxed since we know this routine is only
+	 * used to abort subtransactions, but pending closer analysis we'd
+	 * best be conservative.
+	 */
+	LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
+
+	/*
+	 * Under normal circumstances xid and xids[] will be in increasing order,
+	 * as will be the entries in subxids.  Scan backwards to avoid O(N^2)
+	 * behavior when removing a lot of xids.
+	 */
+	for (i = nxids - 1; i >= 0; i--)
+	{
+		TransactionId	anxid = xids[i];
+
+		for (j = MyProc->subxids.nxids - 1; j >= 0; j--)
+		{
+			if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
+			{
+				XidCacheRemove(j);
+				break;
+			}
+		}
+		/* We should have found it, unless the cache has overflowed */
+		Assert(j >= 0 || MyProc->subxids.overflowed);
+	}
+
+	for (j = MyProc->subxids.nxids - 1; j >= 0; j--)
+	{
+		if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
+		{
+			XidCacheRemove(j);
+			break;
+		}
+	}
+	/* We should have found it, unless the cache has overflowed */
+	Assert(j >= 0 || MyProc->subxids.overflowed);
+
+	LWLockRelease(SInvalLock);
+}
+
+#ifdef XIDCACHE_DEBUG
+
+/*
+ * on_proc_exit hook to print stats about effectiveness of XID cache
+ */
+static void
+DisplayXidCache(int code, Datum arg)
+{
+	fprintf(stderr,
+			"XidCache: xmin: %ld, mainxid: %ld, childxid: %ld, slow: %ld\n",
+			xc_by_recent_xmin,
+			xc_by_main_xid,
+			xc_by_child_xid,
+			xc_slow_answer);
+}
+
+#endif /* XIDCACHE_DEBUG */
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 6c32c6c3d788117588df3058dcecf75a7fd24f03..d8d718a18239ff17f7d268d1720e43f993d04744 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.221 2004/07/27 05:11:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.222 2004/08/01 17:32:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -326,8 +326,7 @@ ProcessUtility(Node *parsetree,
 				{
 					/*
 					 * START TRANSACTION, as defined by SQL99:
-					 * Identical to BEGIN, except that it takes a few
-					 * additional options.  Same code for both.
+					 * Identical to BEGIN.  Same code for both.
 					 */
 					case TRANS_STMT_BEGIN:
 					case TRANS_STMT_START:
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index 2ae41c013c1b86f4b540cbee44d566c6c6a3d305..e5e21e8719b66c79973e4ead419b8ea0830bc044 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.129 2004/07/12 00:09:06 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.130 2004/08/01 17:32:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -115,19 +115,29 @@ ReindexIsProcessingIndex(Oid indexOid)
 /*
  * SetReindexProcessing
  *		Set flag that specified heap/index are being reindexed.
- *		Pass InvalidOid to indicate that reindexing is not active.
  */
 void
 SetReindexProcessing(Oid heapOid, Oid indexOid)
 {
-	/* Args should be both, or neither, InvalidOid */
-	Assert((heapOid == InvalidOid) == (indexOid == InvalidOid));
+	Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
 	/* Reindexing is not re-entrant. */
-	Assert(indexOid == InvalidOid || currentlyReindexedIndex == InvalidOid);
+	if (OidIsValid(currentlyReindexedIndex))
+		elog(ERROR, "cannot reindex while reindexing");
 	currentlyReindexedHeap = heapOid;
 	currentlyReindexedIndex = indexOid;
 }
 
+/*
+ * ResetReindexProcessing
+ * 		Unset reindexing status.
+ */
+void
+ResetReindexProcessing(void)
+{
+	currentlyReindexedHeap = InvalidOid;
+	currentlyReindexedIndex = InvalidOid;
+}
+
 /* ----------------------------------------------------------------
  *				database path / name support stuff
  * ----------------------------------------------------------------
diff --git a/src/include/access/htup.h b/src/include/access/htup.h
index 37ba1164ec1f1be3c8aa6d03e8b6144edd0e0b6b..971a279b9c939ad42137863c97751ec620fb5cb4 100644
--- a/src/include/access/htup.h
+++ b/src/include/access/htup.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.67 2004/07/11 18:01:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.68 2004/08/01 17:32:19 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -68,34 +68,17 @@
  *			object ID (if HEAP_HASOID is set in t_infomask)
  *			user data fields
  *
- * We store five "virtual" fields Xmin, Cmin, Xmax, Cmax, and Xvac
- * in just three physical fields.  Xmin is always really stored, but
- * Cmin and Xmax share a field, as do Cmax and Xvac.  This works because
- * we know that there are only a limited number of states that a tuple can
- * be in, and that Cmin and Cmax are only interesting for the lifetime of
- * the inserting and deleting transactions respectively.  We have the
- * following possible states of a tuple:
+ * We store five "virtual" fields Xmin, Cmin, Xmax, Cmax, and Xvac in four
+ * physical fields.  Xmin, Cmin and Xmax are always really stored, but
+ * Cmax and Xvac share a field.  This works because we know that there are
+ * only a limited number of states that a tuple can be in, and that Cmax
+ * is only interesting for the lifetime of the deleting transaction.
+ * This assumes that VACUUM FULL never tries to move a tuple whose Cmax
+ * is still interesting (ie, delete-in-progress).
  *
- *		XMIN		CMIN		XMAX		CMAX		XVAC
- *
- * NEW (never deleted, not moved by vacuum):
- *		valid		valid		invalid		invalid		invalid
- *
- * DELETED BY CREATING XACT:
- *		valid		valid		= XMIN		valid		invalid
- *
- * DELETED BY OTHER XACT:
- *		valid		unneeded	valid		valid		invalid
- *
- * MOVED BY VACUUM FULL:
- *		valid		unneeded	maybe-valid unneeded	valid
- *
- * This assumes that VACUUM FULL never tries to move a tuple whose Cmin or
- * Cmax is still interesting (ie, insert-in-progress or delete-in-progress).
- *
- * This table shows that if we use an infomask bit to handle the case
- * XMAX=XMIN specially, we never need to store Cmin and Xmax at the same
- * time.  Nor do we need to store Cmax and Xvac at the same time.
+ * Note that in 7.3 and 7.4 a similar idea was applied to Xmax and Cmin.
+ * However, with the advent of subtransactions, a tuple may need both Xmax
+ * and Cmin simultaneously, so this is no longer possible.
  *
  * Following the fixed header fields, the nulls bitmap is stored (beginning
  * at t_bits).	The bitmap is *not* stored if t_infomask shows that there
@@ -416,7 +399,7 @@ typedef HeapTupleData *HeapTuple;
  * WAL record definitions for heapam.c's WAL operations
  *
  * XLOG allows to store some information in high 4 bits of log
- * record xl_info field
+ * record xl_info field.  We use 3 for opcode and one for init bit.
  */
 #define XLOG_HEAP_INSERT	0x00
 #define XLOG_HEAP_DELETE	0x10
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 532dcf51b0e97e7a4e5a331e05df6ab4df747c94..16b7de333a4c14d8cfeb80c085293b4d93c4e3fa 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.68 2004/07/31 07:39:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.69 2004/08/01 17:32:19 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -42,9 +42,18 @@ extern bool DefaultXactReadOnly;
 extern bool XactReadOnly;
 
 /*
- *	end-of-transaction cleanup callbacks for dynamically loaded modules
+ *	start- and end-of-transaction callbacks for dynamically loaded modules
  */
-typedef void (*EOXactCallback) (bool isCommit, void *arg);
+typedef enum
+{
+	XACT_EVENT_ABORT,
+	XACT_EVENT_COMMIT,
+	XACT_EVENT_START_SUB,
+	XACT_EVENT_ABORT_SUB,
+	XACT_EVENT_COMMIT_SUB
+} XactEvent;
+
+typedef void (*XactCallback) (XactEvent event, TransactionId parentXid, void *arg);
 
 
 /* ----------------
@@ -118,8 +127,8 @@ extern void AbortOutOfAnyTransaction(void);
 extern void PreventTransactionChain(void *stmtNode, const char *stmtType);
 extern void RequireTransactionChain(void *stmtNode, const char *stmtType);
 extern bool IsInTransactionChain(void *stmtNode);
-extern void RegisterEOXactCallback(EOXactCallback callback, void *arg);
-extern void UnregisterEOXactCallback(EOXactCallback callback, void *arg);
+extern void RegisterXactCallback(XactCallback callback, void *arg);
+extern void UnregisterXactCallback(XactCallback callback, void *arg);
 
 extern void RecordTransactionCommit(void);
 
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 0a508861b270150e2acefae1474f2613789fe3d2..3f7c0946d745496bbf7a1bc9852d9a9476cd2901 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -13,7 +13,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/miscadmin.h,v 1.163 2004/06/18 06:14:10 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/miscadmin.h,v 1.164 2004/08/01 17:32:20 tgl Exp $
  *
  * NOTES
  *	  some of the information in this file should be moved to other files.
@@ -308,6 +308,7 @@ extern void BaseInit(void);
 extern void IgnoreSystemIndexes(bool mode);
 extern bool IsIgnoringSystemIndexes(void);
 extern void SetReindexProcessing(Oid heapOid, Oid indexOid);
+extern void ResetReindexProcessing(void);
 extern bool ReindexIsProcessingHeap(Oid heapOid);
 extern bool ReindexIsProcessingIndex(Oid indexOid);
 extern void CreateDataDirLockFile(const char *datadir, bool amPostmaster);
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index c2411bef073c6e83ed7a6e84cc41230b8bb8892e..4f7f39003adc9df3d8cecc08aa92845e8154a175 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.71 2004/07/21 20:34:49 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.72 2004/08/01 17:32:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,6 +20,25 @@
 #include "storage/pg_sema.h"
 
 
+/*
+ * Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
+ * for non-aborted subtransactions of its current top transaction.  These
+ * have to be treated as running XIDs by other backends.
+ *
+ * We also keep track of whether the cache overflowed (ie, the transaction has
+ * generated at least one subtransaction that didn't fit in the cache).
+ * If none of the caches have overflowed, we can assume that an XID that's not
+ * listed anywhere in the PGPROC array is not a running transaction.  Else we
+ * have to look at pg_subtrans.
+ */
+#define PGPROC_MAX_CACHED_SUBXIDS 64		/* XXX guessed-at value */
+
+struct XidCache {
+	bool			overflowed;
+	int				nxids;
+	TransactionId	xids[PGPROC_MAX_CACHED_SUBXIDS];
+};
+
 /*
  * Each backend has a PGPROC struct in shared memory.  There is also a list of
  * currently-unused PGPROC structs that will be reallocated to new backends.
@@ -68,6 +87,8 @@ struct PGPROC
 
 	SHM_QUEUE	procHolders;	/* list of PROCLOCK objects for locks held
 								 * or awaited by this backend */
+
+	struct XidCache	subxids;	/* cache for subtransaction XIDs */
 };
 
 /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h
index 84769200504450f8c5089c028465e8429c9592ba..5ac995a29d4b1e392273c4902b5fb9145f34a1dd 100644
--- a/src/include/storage/sinval.h
+++ b/src/include/storage/sinval.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.35 2004/06/02 21:29:29 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.36 2004/08/01 17:32:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -104,6 +104,9 @@ extern int	CountEmptyBackendSlots(void);
 /* Use "struct PGPROC", not PGPROC, to avoid including proc.h here */
 extern struct PGPROC *BackendIdGetProc(BackendId procId);
 
+extern void XidCacheRemoveRunningXids(TransactionId xid,
+									  int nxids, TransactionId *xids);
+
 /* signal handler for catchup events (SIGUSR1) */
 extern void CatchupInterruptHandler(SIGNAL_ARGS);
 
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index 4c579223d91a0fff0bd8c0fafc30ab1442f44f83..f075b96c0fc6af539c92578da358a2f4121c49eb 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -3,7 +3,7 @@
  *			  procedural language
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.111 2004/07/31 23:04:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.112 2004/08/01 17:32:21 tgl Exp $
  *
  *	  This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -4216,26 +4216,44 @@ exec_set_found(PLpgSQL_execstate * estate, bool state)
  * structs that are using it as no longer active.
  */
 void
-plpgsql_eoxact(bool isCommit, void *arg)
+plpgsql_xact_cb(XactEvent event, TransactionId parentXid, void *arg)
 {
 	PLpgSQL_expr *expr;
 	PLpgSQL_expr *enext;
 
-	/* Mark all active exprs as inactive */
-	for (expr = active_simple_exprs; expr; expr = enext)
+	switch (event)
 	{
-		enext = expr->expr_simple_next;
-		expr->expr_simple_state = NULL;
-		expr->expr_simple_next = NULL;
+			/*
+			 * Nothing to do at subtransaction events
+			 *
+			 * XXX really?  Maybe subtransactions need to have their own
+			 * simple_eval_estate?  It would get a lot messier, so for now
+			 * let's assume we don't need that.
+			 */
+		case XACT_EVENT_START_SUB:
+		case XACT_EVENT_ABORT_SUB:
+		case XACT_EVENT_COMMIT_SUB:
+			break;
+
+		case XACT_EVENT_ABORT:
+		case XACT_EVENT_COMMIT:
+			/* Mark all active exprs as inactive */
+			for (expr = active_simple_exprs; expr; expr = enext)
+			{
+				enext = expr->expr_simple_next;
+				expr->expr_simple_state = NULL;
+				expr->expr_simple_next = NULL;
+			}
+			active_simple_exprs = NULL;
+			/*
+			 * If we are doing a clean transaction shutdown, free the EState
+			 * (so that any remaining resources will be released correctly).
+			 * In an abort, we expect the regular abort recovery procedures to
+			 * release everything of interest.
+			 */
+			if (event == XACT_EVENT_COMMIT && simple_eval_estate)
+				FreeExecutorState(simple_eval_estate);
+			simple_eval_estate = NULL;
+			break;
 	}
-	active_simple_exprs = NULL;
-	/*
-	 * If we are doing a clean transaction shutdown, free the EState
-	 * (so that any remaining resources will be released correctly).
-	 * In an abort, we expect the regular abort recovery procedures to
-	 * release everything of interest.
-	 */
-	if (isCommit && simple_eval_estate)
-		FreeExecutorState(simple_eval_estate);
-	simple_eval_estate = NULL;
 }
diff --git a/src/pl/plpgsql/src/pl_handler.c b/src/pl/plpgsql/src/pl_handler.c
index 5f6a83c11d6c2867490cfb91faa45c66ec27d89e..d4e892eb719e2465ea9957b5e41948d0f48e62ed 100644
--- a/src/pl/plpgsql/src/pl_handler.c
+++ b/src/pl/plpgsql/src/pl_handler.c
@@ -3,7 +3,7 @@
  *			  procedural language
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_handler.c,v 1.22 2004/07/31 20:55:44 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_handler.c,v 1.23 2004/08/01 17:32:22 tgl Exp $
  *
  *	  This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -66,7 +66,7 @@ plpgsql_init(void)
 
 	plpgsql_HashTableInit();
 
-	RegisterEOXactCallback(plpgsql_eoxact, NULL);
+	RegisterXactCallback(plpgsql_xact_cb, NULL);
 
 	plpgsql_firstcall = 0;
 }
diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h
index d57d4a7025ecc85917543cdc7c33ddfdbbec71de..e054d5b25f1ec5197ebdf53a6e6345271e6104ca 100644
--- a/src/pl/plpgsql/src/plpgsql.h
+++ b/src/pl/plpgsql/src/plpgsql.h
@@ -3,7 +3,7 @@
  *			  procedural language
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.49 2004/07/31 23:04:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.50 2004/08/01 17:32:22 tgl Exp $
  *
  *	  This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -702,7 +702,7 @@ extern Datum plpgsql_exec_function(PLpgSQL_function * func,
 					  FunctionCallInfo fcinfo);
 extern HeapTuple plpgsql_exec_trigger(PLpgSQL_function * func,
 					 TriggerData *trigdata);
-extern void plpgsql_eoxact(bool isCommit, void *arg);
+extern void plpgsql_xact_cb(XactEvent event, TransactionId parentXid, void *arg);
 
 /* ----------
  * Functions for the dynamic string handling in pl_funcs.c