diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 41d7a4e34d5e2328a985636cfc54ec28400cb625..c5473b9501b29958f25c3e24f3dfb3bec8ed63bf 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -1,6 +1,6 @@
 <!--
  Documentation of the system catalogs, directed toward PostgreSQL developers
- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.103 2005/06/13 23:14:47 tgl Exp $
+ $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.104 2005/06/17 22:32:41 tgl Exp $
  -->
 
 <chapter id="catalogs">
@@ -3932,6 +3932,11 @@
       <entry>currently held locks</entry>
      </row>
 
+     <row>
+      <entry><link linkend="view-pg-prepared-xacts"><structname>pg_prepared_xacts</structname></link></entry>
+      <entry>currently prepared transactions</entry>
+     </row>
+
      <row>
       <entry><link linkend="view-pg-rules"><structname>pg_rules</structname></link></entry>
       <entry>rules</entry>
@@ -4167,8 +4172,10 @@
       <entry><structfield>pid</structfield></entry>
       <entry><type>integer</type></entry>
       <entry></entry>
-      <entry>process ID of the server process holding or awaiting this
-      lock</entry>
+      <entry>
+       Process ID of the server process holding or awaiting this
+       lock.  Zero if the lock is held by a prepared transaction.
+      </entry>
      </row>
      <row>
       <entry><structfield>mode</structfield></entry>
@@ -4250,6 +4257,87 @@
 
  </sect1>
 
+ <sect1 id="view-pg-prepared-xacts">
+  <title><structname>pg_prepared_xacts</structname></title>
+
+  <indexterm zone="view-pg-prepared-xacts">
+   <primary>pg_prepared_xacts</primary>
+  </indexterm>
+
+  <para>
+   The view <structname>pg_prepared_xacts</structname> displays
+   information about transactions that are currently prepared for two-phase
+   commit (see <xref linkend="sql-prepare-transaction"
+   endterm="sql-prepare-transaction-title"> for details).
+  </para>
+
+  <para>
+   <structname>pg_prepared_xacts</structname> contains one row per prepared
+   transaction.  An entry is removed when the transaction is committed or
+   rolled back.
+  </para>
+
+  <table>
+   <title><structname>pg_prepared_xacts</> Columns</title>
+
+   <tgroup cols=4>
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+    <tbody>
+     <row>
+      <entry><structfield>transaction</structfield></entry>
+      <entry><type>xid</type></entry>
+      <entry></entry>
+      <entry>
+       Numeric transaction identifier of the prepared transaction
+      </entry>
+     </row>
+     <row>
+      <entry><structfield>gid</structfield></entry>
+      <entry><type>text</type></entry>
+      <entry></entry>
+      <entry>
+       Global transaction identifier that was assigned to the transaction
+      </entry>
+     </row>
+     <row>
+      <entry><structfield>owner</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry><literal><link linkend="catalog-pg-shadow"><structname>pg_shadow</structname></link>.usename</literal></entry>
+      <entry>
+       Name of the user that executed the transaction
+      </entry>
+     </row>
+     <row>
+      <entry><structfield>database</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry><literal><link linkend="catalog-pg-database"><structname>pg_database</structname></link>.datname</literal></entry>
+      <entry>
+       Name of the database in which the transaction was executed
+      </entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+  <para>
+   When the <structname>pg_prepared_xacts</structname> view is accessed, the
+   internal transaction manager data structures are momentarily locked, and
+   a copy is made for the view to display.  This ensures that the
+   view produces a consistent set of results, while not blocking
+   normal operations longer than necessary.  Nonetheless
+   there could be some impact on database performance if this view is
+   read often.
+  </para>
+
+ </sect1>
+
  <sect1 id="view-pg-rules">
   <title><structname>pg_rules</structname></title>
 
diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml
index 6326f96f72f7f81d895886613b261c21233a6459..33e9e68b9d55b2dc757253fd2cfaac543cac17a9 100644
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/allfiles.sgml,v 1.62 2004/08/21 16:16:04 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/allfiles.sgml,v 1.63 2005/06/17 22:32:42 tgl Exp $
 PostgreSQL documentation
 Complete list of usable sgml source files in this directory.
 -->
@@ -30,6 +30,7 @@ Complete list of usable sgml source files in this directory.
 <!entity cluster            system "cluster.sgml">
 <!entity commentOn          system "comment.sgml">
 <!entity commit             system "commit.sgml">
+<!entity commitPrepared     system "commit_prepared.sgml">
 <!entity copyTable          system "copy.sgml">
 <!entity createAggregate    system "create_aggregate.sgml">
 <!entity createCast         system "create_cast.sgml">
@@ -88,11 +89,13 @@ Complete list of usable sgml source files in this directory.
 <!entity move               system "move.sgml">
 <!entity notify             system "notify.sgml">
 <!entity prepare            system "prepare.sgml">
+<!entity prepareTransaction system "prepare_transaction.sgml">
 <!entity reindex            system "reindex.sgml">
 <!entity releaseSavepoint   system "release_savepoint.sgml">
 <!entity reset              system "reset.sgml">
 <!entity revoke             system "revoke.sgml">
 <!entity rollback           system "rollback.sgml">
+<!entity rollbackPrepared   system "rollback_prepared.sgml">
 <!entity rollbackTo         system "rollback_to.sgml">
 <!entity savepoint          system "savepoint.sgml">
 <!entity select             system "select.sgml">
diff --git a/doc/src/sgml/ref/commit_prepared.sgml b/doc/src/sgml/ref/commit_prepared.sgml
new file mode 100644
index 0000000000000000000000000000000000000000..b18175815b27290cd9649bb96aec72c51e6365c6
--- /dev/null
+++ b/doc/src/sgml/ref/commit_prepared.sgml
@@ -0,0 +1,111 @@
+<!--
+$PostgreSQL: pgsql/doc/src/sgml/ref/commit_prepared.sgml,v 1.1 2005/06/17 22:32:42 tgl Exp $
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-COMMIT-PREPARED">
+ <refmeta>
+  <refentrytitle id="sql-commit-prepared-title">COMMIT PREPARED</refentrytitle>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>COMMIT PREPARED</refname>
+  <refpurpose>commit a transaction that was earlier prepared for two-phase commit</refpurpose>
+ </refnamediv>
+
+ <indexterm zone="sql-commit-prepared">
+  <primary>COMMIT PREPARED</primary>
+ </indexterm>
+
+ <refsynopsisdiv>
+<synopsis>
+COMMIT PREPARED <replaceable class="PARAMETER">transaction_id</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>COMMIT PREPARED</command> commits a transaction that is in 
+   prepared state.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable class="PARAMETER">transaction_id</replaceable></term>
+    <listitem>
+     <para>
+      The transaction identifier of the transaction that is to be
+      committed.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   To commit a prepared transaction, you must be either the same user that
+   executed the transaction originally, or a superuser.  But you do not
+   have to be in the same session that executed the transaction.
+  </para>
+
+  <para>
+   This command cannot be executed inside a transaction block. The prepared
+   transaction is committed immediately.
+  </para>
+
+  <para>
+   All currently available prepared transactions are listed in the
+   <structname>pg_prepared_xacts</> system view.
+  </para>
+ </refsect1>
+
+ <refsect1 id="sql-commit-prepared-examples">
+  <title id="sql-commit-prepared-examples-title">Examples</title>
+  <para>
+   Commit the transaction identified by the transaction
+   identifier <literal>foobar</>:
+   
+<programlisting>
+COMMIT PREPARED 'foobar';
+</programlisting>
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-prepare-transaction" endterm="sql-prepare-transaction-title"></member>
+   <member><xref linkend="sql-rollback-prepared" endterm="sql-rollback-prepared-title"></member>
+  </simplelist>
+ </refsect1>
+
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:nil
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:"../reference.ced"
+sgml-exposed-tags:nil
+sgml-local-catalogs:"/usr/lib/sgml/catalog"
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/src/sgml/ref/prepare_transaction.sgml b/doc/src/sgml/ref/prepare_transaction.sgml
new file mode 100644
index 0000000000000000000000000000000000000000..773689ae06d69382232bbccf22b10af48a97e0dc
--- /dev/null
+++ b/doc/src/sgml/ref/prepare_transaction.sgml
@@ -0,0 +1,160 @@
+<!--
+$PostgreSQL: pgsql/doc/src/sgml/ref/prepare_transaction.sgml,v 1.1 2005/06/17 22:32:42 tgl Exp $
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-PREPARE-TRANSACTION">
+ <refmeta>
+  <refentrytitle id="sql-prepare-transaction-title">PREPARE TRANSACTION</refentrytitle>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>PREPARE TRANSACTION</refname>
+  <refpurpose>prepare the current transaction for two-phase commit</refpurpose>
+ </refnamediv>
+
+ <indexterm zone="sql-prepare-transaction">
+  <primary>PREPARE TRANSACTION</primary>
+ </indexterm>
+
+ <refsynopsisdiv>
+<synopsis>
+PREPARE TRANSACTION <replaceable class="PARAMETER">transaction_id</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>PREPARE TRANSACTION</command> prepares the current transaction
+   for two-phase commit. After this command, the transaction is no longer 
+   associated with the current session; instead, its state is fully stored on
+   disk, and there is a very high probability that it can be committed
+   successfully, even if a database crash occurs before the commit is
+   requested.
+  </para>
+
+  <para>
+   Once prepared, a transaction can later be committed or rolled
+   back with <command>COMMIT PREPARED</command> or 
+   <command>ROLLBACK PREPARED</command>, respectively.  Those commands
+   can be issued from any session, not only the one that executed the
+   original transaction.
+  </para>
+
+  <para>
+   From the point of view of the issuing session, <command>PREPARE
+   TRANSACTION</command> is not unlike a <command>ROLLBACK</> command:
+   after executing it, there is no active current transaction, and the
+   effects of the prepared transaction are no longer visible.  (The effects
+   will become visible again if the transaction is committed.)
+  </para>
+
+  <para>
+   If the <command>PREPARE TRANSACTION</command> command fails for any
+   reason, it becomes a <command>ROLLBACK</>: the current transaction
+   is canceled.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable class="PARAMETER">transaction_id</replaceable></term>
+    <listitem>
+     <para>
+      An arbitrary identifier that later identifies this transaction for
+      <command>COMMIT PREPARED</> or <command>ROLLBACK PREPARED</>.
+      The identifier must be written as a string literal, and must be
+      less than 200 bytes long.  It must not be the same as the identifier
+      used for any currently prepared transaction.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   This command must be used inside a transaction block. Use
+   <command>BEGIN</command> to start one.
+  </para>
+
+  <para>
+   It is not currently allowed to <command>PREPARE</> a transaction that
+   has executed any operations involving temporary tables nor
+   created any cursors <literal>WITH HOLD</>.  Those features are too tightly
+   tied to the current session to be useful in a transaction to be prepared.
+  </para>
+
+  <para>
+   If the transaction modified any run-time parameters with <command>SET</>,
+   those effects persist after <command>PREPARE TRANSACTION</>, and will not
+   be affected by any later <command>COMMIT PREPARED</command> or 
+   <command>ROLLBACK PREPARED</command>.  Thus, in this one respect
+   <command>PREPARE TRANSACTION</> acts more like <command>COMMIT</> than
+   <command>ROLLBACK</>.
+  </para>
+
+  <para>
+   All currently available prepared transactions are listed in the
+   <structname>pg_prepared_xacts</> system view.
+  </para>
+
+  <para>
+   From a performance standpoint, it is unwise to leave transactions in
+   the prepared state for a long time: this will for instance interfere with
+   the ability of <command>VACUUM</> to reclaim storage.  Keep in mind also
+   that the transaction continues to hold whatever locks it held.
+   The intended
+   usage of the feature is that a prepared transaction will normally be
+   committed or rolled back as soon as an external transaction manager
+   has verified that other databases are also prepared to commit.
+  </para>
+ </refsect1>
+
+ <refsect1 id="sql-prepare-transaction-examples">
+  <title id="sql-prepare-transaction-examples-title">Examples</title>
+  <para>
+   Prepare the current transaction for two-phase commit, using
+   <literal>foobar</> as the transaction identifier:
+   
+<programlisting>
+PREPARE TRANSACTION 'foobar';
+</programlisting>
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-commit-prepared" endterm="sql-commit-prepared-title"></member>
+   <member><xref linkend="sql-rollback-prepared" endterm="sql-rollback-prepared-title"></member>
+  </simplelist>
+ </refsect1>
+
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:nil
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:"../reference.ced"
+sgml-exposed-tags:nil
+sgml-local-catalogs:"/usr/lib/sgml/catalog"
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/src/sgml/ref/rollback_prepared.sgml b/doc/src/sgml/ref/rollback_prepared.sgml
new file mode 100644
index 0000000000000000000000000000000000000000..51df92263219612dca5cb876a30e47eb6aebb9b3
--- /dev/null
+++ b/doc/src/sgml/ref/rollback_prepared.sgml
@@ -0,0 +1,111 @@
+<!--
+$PostgreSQL: pgsql/doc/src/sgml/ref/rollback_prepared.sgml,v 1.1 2005/06/17 22:32:42 tgl Exp $
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-ROLLBACK-PREPARED">
+ <refmeta>
+  <refentrytitle id="sql-rollback-prepared-title">ROLLBACK PREPARED</refentrytitle>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>ROLLBACK PREPARED</refname>
+  <refpurpose>cancel a transaction that was earlier prepared for two-phase commit</refpurpose>
+ </refnamediv>
+
+ <indexterm zone="sql-rollback-prepared">
+  <primary>ROLLBACK PREPARED</primary>
+ </indexterm>
+
+ <refsynopsisdiv>
+<synopsis>
+ROLLBACK PREPARED <replaceable class="PARAMETER">transaction_id</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>ROLLBACK PREPARED</command> rolls back a transaction that is in 
+   prepared state.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable class="PARAMETER">transaction_id</replaceable></term>
+    <listitem>
+     <para>
+      The transaction identifier of the transaction that is to be
+      rolled back.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   To roll back a prepared transaction, you must be either the same user that
+   executed the transaction originally, or a superuser.  But you do not
+   have to be in the same session that executed the transaction.
+  </para>
+
+  <para>
+   This command cannot be executed inside a transaction block. The prepared
+   transaction is rolled back immediately.
+  </para>
+
+  <para>
+   All currently available prepared transactions are listed in the
+   <structname>pg_prepared_xacts</> system view.
+  </para>
+ </refsect1>
+
+ <refsect1 id="sql-rollback-prepared-examples">
+  <title id="sql-rollback-prepared-examples-title">Examples</title>
+  <para>
+   Roll back the transaction identified by the transaction
+   identifier <literal>foobar</>:
+   
+<programlisting>
+ROLLBACK PREPARED 'foobar';
+</programlisting>
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-prepare-transaction" endterm="sql-prepare-transaction-title"></member>
+   <member><xref linkend="sql-commit-prepared" endterm="sql-commit-prepared-title"></member>
+  </simplelist>
+ </refsect1>
+
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:nil
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:"../reference.ced"
+sgml-exposed-tags:nil
+sgml-local-catalogs:"/usr/lib/sgml/catalog"
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml
index 01dc578b9d3e2bdc9a698da4084100153337c860..4edec85c1225d17abcd7fd3859ebc07c42a8b71a 100644
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -1,5 +1,5 @@
 <!-- reference.sgml
-$PostgreSQL: pgsql/doc/src/sgml/reference.sgml,v 1.52 2004/08/21 16:16:03 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/reference.sgml,v 1.53 2005/06/17 22:32:42 tgl Exp $
 
 PostgreSQL Reference Manual
 -->
@@ -62,6 +62,7 @@ PostgreSQL Reference Manual
    &cluster;
    &commentOn;
    &commit;
+   &commitPrepared;
    &copyTable;
    &createAggregate;
    &createCast;
@@ -120,11 +121,13 @@ PostgreSQL Reference Manual
    &move;
    &notify;
    &prepare;
+   &prepareTransaction;
    &reindex;
    &releaseSavepoint;
    &reset;
    &revoke;
    &rollback;
+   &rollbackPrepared;
    &rollbackTo;
    &savepoint;
    &select;
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index dfb86511c3e1ca682a1f0d885b58974a395bf201..1a2a9935cc3489bc7e827a898306449604298154 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.328 2005/06/17 13:12:01 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.329 2005/06/17 22:32:42 tgl Exp $
 -->
 
 <chapter Id="runtime">
@@ -956,7 +956,7 @@ SET ENABLE_SEQSCAN TO OFF;
        <para>
         Sets the location of the Kerberos server key file. See
         <xref linkend="kerberos-auth"> for details. This parameter
-		can only be set at server start.
+        can only be set at server start.
        </para>
       </listitem>
      </varlistentry>
@@ -1113,6 +1113,33 @@ SET ENABLE_SEQSCAN TO OFF;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-max-prepared-transactions" xreflabel="max_prepared_transactions">
+      <term><varname>max_prepared_transactions</varname> (<type>integer</type>)</term>
+      <indexterm>
+       <primary><varname>max_prepared_transactions</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+        Sets the maximum number of transactions that can be in the
+        <quote>prepared</> state simultaneously (see <xref
+        linkend="sql-prepare-transaction"
+        endterm="sql-prepare-transaction-title">).
+        Setting this parameter to zero disables the prepared-transaction
+        feature.
+        The default is 50.
+        This option can only be set at server start.
+       </para>
+
+       <para>
+        Increasing this parameter may cause <productname>PostgreSQL</>
+        to request more <systemitem class="osname">System V</> shared
+        memory than your operating system's default configuration
+        allows. See <xref linkend="sysvipc"> for information on how to
+        adjust those parameters, if necessary.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-work-mem" xreflabel="work_mem">
       <term><varname>work_mem</varname> (<type>integer</type>)</term>
       <indexterm>
diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile
index 295ecf9e14f64d0ba94f657251f729583441a400..c6ecef17246d1d8ba62c5fdc76070095c8f26bda 100644
--- a/src/backend/access/transam/Makefile
+++ b/src/backend/access/transam/Makefile
@@ -4,7 +4,7 @@
 #    Makefile for access/transam
 #
 # IDENTIFICATION
-#    $PostgreSQL: pgsql/src/backend/access/transam/Makefile,v 1.20 2005/04/28 21:47:10 tgl Exp $
+#    $PostgreSQL: pgsql/src/backend/access/transam/Makefile,v 1.21 2005/06/17 22:32:42 tgl Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -12,7 +12,7 @@ subdir = src/backend/access/transam
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = clog.o transam.o varsup.o xact.o xlog.o xlogutils.o rmgr.o slru.o subtrans.o multixact.o
+OBJS = clog.o transam.o varsup.o xact.o xlog.o xlogutils.o rmgr.o slru.o subtrans.o multixact.o twophase.o twophase_rmgr.o
 
 all: SUBSYS.o
 
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 0b774363888d949e35853f8eef6b76d34bb9e6e7..cea778d6a1ce76f7a4c88adcdf31b0067a902e50 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -22,7 +22,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.8 2005/05/19 21:35:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.9 2005/06/17 22:32:42 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -222,22 +222,33 @@ ZeroSUBTRANSPage(int pageno)
 /*
  * This must be called ONCE during postmaster or standalone-backend startup,
  * after StartupXLOG has initialized ShmemVariableCache->nextXid.
+ *
+ * oldestActiveXID is the oldest XID of any prepared transaction, or nextXid
+ * if there are none.
  */
 void
-StartupSUBTRANS(void)
+StartupSUBTRANS(TransactionId oldestActiveXID)
 {
 	int			startPage;
+	int			endPage;
 
 	/*
 	 * Since we don't expect pg_subtrans to be valid across crashes, we
-	 * initialize the currently-active page to zeroes during startup.
+	 * initialize the currently-active page(s) to zeroes during startup.
 	 * Whenever we advance into a new page, ExtendSUBTRANS will likewise
 	 * zero the new page without regard to whatever was previously on
 	 * disk.
 	 */
 	LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
 
-	startPage = TransactionIdToPage(ShmemVariableCache->nextXid);
+	startPage = TransactionIdToPage(oldestActiveXID);
+	endPage = TransactionIdToPage(ShmemVariableCache->nextXid);
+
+	while (startPage != endPage)
+	{
+		(void) ZeroSUBTRANSPage(startPage);
+		startPage++;
+	}
 	(void) ZeroSUBTRANSPage(startPage);
 
 	LWLockRelease(SubtransControlLock);
diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c
index f88c25a37db72868234506017e1af3fe89f79ac2..5fa6f82daf48e9bb3661be03a069e96da95dccb3 100644
--- a/src/backend/access/transam/transam.c
+++ b/src/backend/access/transam/transam.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/transam/transam.c,v 1.64 2005/02/20 21:46:48 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/transam/transam.c,v 1.65 2005/06/17 22:32:42 tgl Exp $
  *
  * NOTES
  *	  This file contains the high level access-method interface to the
@@ -173,6 +173,14 @@ TransactionIdDidCommit(TransactionId transactionId)
 	 * recursively. However, if it's older than TransactionXmin, we can't
 	 * look at pg_subtrans; instead assume that the parent crashed without
 	 * cleaning up its children.
+	 *
+	 * Originally we Assert'ed that the result of SubTransGetParent was
+	 * not zero.  However with the introduction of prepared transactions,
+	 * there can be a window just after database startup where we do not
+	 * have complete knowledge in pg_subtrans of the transactions after
+	 * TransactionXmin.  StartupSUBTRANS() has ensured that any missing
+	 * information will be zeroed.  Since this case should not happen under
+	 * normal conditions, it seems reasonable to emit a WARNING for it.
 	 */
 	if (xidstatus == TRANSACTION_STATUS_SUB_COMMITTED)
 	{
@@ -181,7 +189,12 @@ TransactionIdDidCommit(TransactionId transactionId)
 		if (TransactionIdPrecedes(transactionId, TransactionXmin))
 			return false;
 		parentXid = SubTransGetParent(transactionId);
-		Assert(TransactionIdIsValid(parentXid));
+		if (!TransactionIdIsValid(parentXid))
+		{
+			elog(WARNING, "no pg_subtrans entry for subcommitted XID %u",
+				 transactionId);
+			return false;
+		}
 		return TransactionIdDidCommit(parentXid);
 	}
 
@@ -224,7 +237,13 @@ TransactionIdDidAbort(TransactionId transactionId)
 		if (TransactionIdPrecedes(transactionId, TransactionXmin))
 			return true;
 		parentXid = SubTransGetParent(transactionId);
-		Assert(TransactionIdIsValid(parentXid));
+		if (!TransactionIdIsValid(parentXid))
+		{
+			/* see notes in TransactionIdDidCommit */
+			elog(WARNING, "no pg_subtrans entry for subcommitted XID %u",
+				 transactionId);
+			return true;
+		}
 		return TransactionIdDidAbort(parentXid);
 	}
 
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
new file mode 100644
index 0000000000000000000000000000000000000000..01cc50a6a4670aa50df53a8eb62ac1392b063f36
--- /dev/null
+++ b/src/backend/access/transam/twophase.c
@@ -0,0 +1,1659 @@
+/*-------------------------------------------------------------------------
+ *
+ * twophase.c
+ *		Two-phase commit support functions.
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		$PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.1 2005/06/17 22:32:42 tgl Exp $
+ *
+ * NOTES
+ *		Each global transaction is associated with a global transaction
+ *		identifier (GID). The client assigns a GID to a postgres
+ *		transaction with the PREPARE TRANSACTION command.
+ *
+ *		We keep all active global transactions in a shared memory array.
+ *		When the PREPARE TRANSACTION command is issued, the GID is
+ *		reserved for the transaction in the array. This is done before
+ *		a WAL entry is made, because the reservation checks for duplicate
+ *		GIDs and aborts the transaction if there already is a global
+ *		transaction in prepared state with the same GID.
+ *
+ *		A global transaction (gxact) also has a dummy PGPROC that is entered
+ *		into the ProcArray array; this is what keeps the XID considered
+ *		running by TransactionIdIsInProgress.  It is also convenient as a
+ *		PGPROC to hook the gxact's locks to.
+ *
+ *		In order to survive crashes and shutdowns, all prepared
+ *		transactions must be stored in permanent storage. This includes
+ *		locking information, pending notifications etc. All that state
+ *		information is written to the per-transaction state file in
+ *		the pg_twophase directory.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "access/heapam.h"
+#include "access/subtrans.h"
+#include "access/twophase.h"
+#include "access/twophase_rmgr.h"
+#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/fd.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "storage/smgr.h"
+#include "utils/builtins.h"
+#include "pgstat.h"
+
+
+/*
+ * Directory where Two-phase commit files reside within PGDATA
+ */
+#define TWOPHASE_DIR "pg_twophase"
+
+/* GUC variable, can't be changed after startup */
+int max_prepared_xacts = 50;
+
+/*
+ * This struct describes one global transaction that is in prepared state
+ * or attempting to become prepared.
+ *
+ * The first component of the struct is a dummy PGPROC that is inserted
+ * into the global ProcArray so that the transaction appears to still be
+ * running and holding locks.  It must be first because we cast pointers
+ * to PGPROC and pointers to GlobalTransactionData back and forth.
+ *
+ * The lifecycle of a global transaction is:
+ *
+ * 1. After checking that the requested GID is not in use, set up an
+ * entry in the TwoPhaseState->prepXacts array with the correct XID and GID,
+ * with locking_xid = my own XID and valid = false.
+ *
+ * 2. After successfully completing prepare, set valid = true and enter the
+ * contained PGPROC into the global ProcArray.
+ *
+ * 3. To begin COMMIT PREPARED or ROLLBACK PREPARED, check that the entry
+ * is valid and its locking_xid is no longer active, then store my current
+ * XID into locking_xid.  This prevents concurrent attempts to commit or
+ * rollback the same prepared xact.
+ *
+ * 4. On completion of COMMIT PREPARED or ROLLBACK PREPARED, remove the entry
+ * from the ProcArray and the TwoPhaseState->prepXacts array and return it to
+ * the freelist.
+ *
+ * Note that if the preparing transaction fails between steps 1 and 2, the
+ * entry will remain in prepXacts until recycled.  We can detect recyclable
+ * entries by checking for valid = false and locking_xid no longer active.
+ *
+ * typedef struct GlobalTransactionData *GlobalTransaction appears in 
+ * twophase.h
+ */
+#define GIDSIZE 200
+
+typedef struct GlobalTransactionData
+{
+	PGPROC		proc;			/* dummy proc */
+	AclId		owner;			/* ID of user that executed the xact */
+	TransactionId locking_xid;	/* top-level XID of backend working on xact */
+	bool		valid;			/* TRUE if fully prepared */
+	char gid[GIDSIZE];			/* The GID assigned to the prepared xact */
+} GlobalTransactionData;
+
+/*
+ * Two Phase Commit shared state.  Access to this struct is protected
+ * by TwoPhaseStateLock.
+ */
+typedef struct TwoPhaseStateData
+{
+	/* Head of linked list of free GlobalTransactionData structs */
+	SHMEM_OFFSET freeGXacts;
+
+	/* Number of valid prepXacts entries. */
+	int		numPrepXacts;
+
+	/*
+	 * There are max_prepared_xacts items in this array, but C wants a
+	 * fixed-size array.
+	 */
+	GlobalTransaction	prepXacts[1]; /* VARIABLE LENGTH ARRAY */
+} TwoPhaseStateData;			/* VARIABLE LENGTH STRUCT */
+
+static TwoPhaseStateData *TwoPhaseState;
+
+
+static void RecordTransactionCommitPrepared(TransactionId xid,
+											int nchildren,
+											TransactionId *children,
+											int nrels,
+											RelFileNode *rels);
+static void RecordTransactionAbortPrepared(TransactionId xid,
+											int nchildren,
+											TransactionId *children,
+											int nrels,
+											RelFileNode *rels);
+static void ProcessRecords(char *bufptr, TransactionId xid,
+						   const TwoPhaseCallback callbacks[]);
+
+
+/*
+ * Initialization of shared memory
+ */
+int
+TwoPhaseShmemSize(void)
+{
+	/* Need the fixed struct, the array of pointers, and the GTD structs */
+	return MAXALIGN(offsetof(TwoPhaseStateData, prepXacts) + 
+					sizeof(GlobalTransaction) * max_prepared_xacts) +
+		sizeof(GlobalTransactionData) * max_prepared_xacts;
+}
+
+void
+TwoPhaseShmemInit(void)
+{
+	bool found;
+
+	TwoPhaseState = ShmemInitStruct("Prepared Transaction Table",
+									TwoPhaseShmemSize(),
+									&found);
+	if (!IsUnderPostmaster)
+	{
+		GlobalTransaction gxacts;
+		int			i;
+
+		Assert(!found);
+		TwoPhaseState->freeGXacts = INVALID_OFFSET;
+		TwoPhaseState->numPrepXacts = 0;
+
+		/*
+		 * Initialize the linked list of free GlobalTransactionData structs
+		 */
+		gxacts = (GlobalTransaction)
+			((char *) TwoPhaseState +
+			 MAXALIGN(offsetof(TwoPhaseStateData, prepXacts) + 
+					  sizeof(GlobalTransaction) * max_prepared_xacts));
+		for (i = 0; i < max_prepared_xacts; i++)
+		{
+			gxacts[i].proc.links.next = TwoPhaseState->freeGXacts;
+			TwoPhaseState->freeGXacts = MAKE_OFFSET(&gxacts[i]);
+		}
+	}
+	else
+		Assert(found);
+}
+
+
+/*
+ * MarkAsPreparing
+ * 		Reserve the GID for the given transaction.
+ *
+ * Internally, this creates a gxact struct and puts it into the active array.
+ * NOTE: this is also used when reloading a gxact after a crash; so avoid
+ * assuming that we can use very much backend context.
+ */
+GlobalTransaction
+MarkAsPreparing(TransactionId xid, Oid databaseid, char *gid, AclId owner)
+{
+	GlobalTransaction	gxact;
+	int i;
+
+	if (strlen(gid) >= GIDSIZE)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("global transaction identifier \"%s\" is too long",
+						gid)));
+
+	LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
+
+	/*
+	 * First, find and recycle any gxacts that failed during prepare.
+	 * We do this partly to ensure we don't mistakenly say their GIDs
+	 * are still reserved, and partly so we don't fail on out-of-slots
+	 * unnecessarily.
+	 */
+	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
+	{
+		gxact = TwoPhaseState->prepXacts[i];
+		if (!gxact->valid && !TransactionIdIsActive(gxact->locking_xid))
+		{
+			/* It's dead Jim ... remove from the active array */
+			TwoPhaseState->numPrepXacts--;
+			TwoPhaseState->prepXacts[i] = TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts];
+			/* and put it back in the freelist */
+			gxact->proc.links.next = TwoPhaseState->freeGXacts;
+			TwoPhaseState->freeGXacts = MAKE_OFFSET(gxact);
+			/* Back up index count too, so we don't miss scanning one */
+			i--;
+		}
+	}
+
+	/* Check for conflicting GID */
+	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
+	{
+		gxact = TwoPhaseState->prepXacts[i];
+		if (strcmp(gxact->gid, gid) == 0)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("global transaction identifier \"%s\" is already in use",
+							gid)));
+		}
+	}
+
+	/* Get a free gxact from the freelist */
+	if (TwoPhaseState->freeGXacts == INVALID_OFFSET)
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("maximum number of prepared transactions reached"),
+				 errhint("Increase max_prepared_transactions (currently %d).",
+						 max_prepared_xacts)));
+	gxact = (GlobalTransaction) MAKE_PTR(TwoPhaseState->freeGXacts);
+	TwoPhaseState->freeGXacts = gxact->proc.links.next;
+
+	/* Initialize it */
+	MemSet(&gxact->proc, 0, sizeof(PGPROC));
+	SHMQueueElemInit(&(gxact->proc.links));
+	gxact->proc.waitStatus = STATUS_OK;
+	gxact->proc.xid = xid;
+	gxact->proc.xmin = InvalidTransactionId;
+	gxact->proc.pid = 0;
+	gxact->proc.databaseId = databaseid;
+	gxact->proc.lwWaiting = false;
+	gxact->proc.lwExclusive = false;
+	gxact->proc.lwWaitLink = NULL;
+	gxact->proc.waitLock = NULL;
+	gxact->proc.waitProcLock = NULL;
+	SHMQueueInit(&(gxact->proc.procLocks));
+	/* subxid data must be filled later by GXactLoadSubxactData */
+	gxact->proc.subxids.overflowed = false;
+	gxact->proc.subxids.nxids = 0;
+
+	gxact->owner = owner;
+	gxact->locking_xid = xid;
+	gxact->valid = false;
+	strcpy(gxact->gid, gid);
+
+	/* And insert it into the active array */
+	Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts);
+	TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts++] = gxact;
+
+	LWLockRelease(TwoPhaseStateLock);
+
+	return gxact;
+}
+
+/*
+ * GXactLoadSubxactData
+ *
+ * If the transaction being persisted had any subtransactions, this must
+ * be called before MarkAsPrepared() to load information into the dummy
+ * PGPROC.
+ */
+static void
+GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts,
+					 TransactionId *children)
+{
+	/* We need no extra lock since the GXACT isn't valid yet */
+	if (nsubxacts > PGPROC_MAX_CACHED_SUBXIDS)
+	{
+		gxact->proc.subxids.overflowed = true;
+		nsubxacts = PGPROC_MAX_CACHED_SUBXIDS;
+	}
+	if (nsubxacts > 0)
+	{
+		memcpy(gxact->proc.subxids.xids, children,
+			   nsubxacts * sizeof(TransactionId));
+		gxact->proc.subxids.nxids = nsubxacts;
+	}
+}
+
+/*
+ * MarkAsPrepared
+ *		Mark the GXACT as fully valid, and enter it into the global ProcArray.
+ */
+void
+MarkAsPrepared(GlobalTransaction gxact)
+{
+	/* Lock here may be overkill, but I'm not convinced of that ... */
+	LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
+	Assert(!gxact->valid);
+	gxact->valid = true;
+	LWLockRelease(TwoPhaseStateLock);
+
+	/*
+	 * Put it into the global ProcArray so TransactionIdInProgress considers
+	 * the XID as still running.
+	 */
+	ProcArrayAdd(&gxact->proc);
+}
+
+/*
+ * LockGXact
+ *		Locate the prepared transaction and mark it busy for COMMIT or PREPARE.
+ */
+static GlobalTransaction
+LockGXact(char *gid, AclId user)
+{
+	int i;
+
+	LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
+
+	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
+	{
+		GlobalTransaction	gxact = TwoPhaseState->prepXacts[i];
+
+		/* Ignore not-yet-valid GIDs */
+		if (!gxact->valid)
+			continue;
+		if (strcmp(gxact->gid, gid) != 0)
+			continue;
+
+		/* Found it, but has someone else got it locked? */
+		if (TransactionIdIsValid(gxact->locking_xid))
+		{
+			if (TransactionIdIsActive(gxact->locking_xid))
+				ereport(ERROR,
+						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						 errmsg("prepared transaction with gid \"%s\" is busy",
+								gid)));
+			gxact->locking_xid = InvalidTransactionId;
+		}
+
+		if (user != gxact->owner && !superuser_arg(user))
+			ereport(ERROR,
+					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+					 errmsg("permission denied to finish prepared transaction"),
+					 errhint("Must be superuser or the user that prepared the transaction.")));
+
+		/* OK for me to lock it */
+		gxact->locking_xid = GetTopTransactionId();
+
+		LWLockRelease(TwoPhaseStateLock);
+
+		return gxact;
+	}
+
+	LWLockRelease(TwoPhaseStateLock);
+
+	ereport(ERROR,
+			(errcode(ERRCODE_UNDEFINED_OBJECT),
+			 errmsg("prepared transaction with gid \"%s\" does not exist",
+					gid)));
+
+	/* NOTREACHED */
+	return NULL;
+}
+
+/*
+ * RemoveGXact
+ *		Remove the prepared transaction from the shared memory array.
+ *
+ * NB: caller should have already removed it from ProcArray
+ */
+static void
+RemoveGXact(GlobalTransaction gxact)
+{
+	int i;
+
+	LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
+
+	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
+	{
+		if (gxact == TwoPhaseState->prepXacts[i])
+		{
+			/* remove from the active array */
+			TwoPhaseState->numPrepXacts--;
+			TwoPhaseState->prepXacts[i] = TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts];
+
+			/* and put it back in the freelist */
+			gxact->proc.links.next = TwoPhaseState->freeGXacts;
+			TwoPhaseState->freeGXacts = MAKE_OFFSET(gxact);
+
+			LWLockRelease(TwoPhaseStateLock);
+
+			return;
+		}
+	}
+
+	LWLockRelease(TwoPhaseStateLock);
+
+	elog(ERROR, "failed to find %p in GlobalTransaction array", gxact);
+}
+
+/*
+ * Returns an array of all prepared transactions for the user-level
+ * function pg_prepared_xact.
+ *
+ * The returned array and all its elements are copies of internal data
+ * structures, to minimize the time we need to hold the TwoPhaseStateLock.
+ *
+ * WARNING -- we return even those transactions that are not fully prepared
+ * yet.  The caller should filter them out if he doesn't want them.
+ *
+ * The returned array is palloc'd.
+ */
+static int
+GetPreparedTransactionList(GlobalTransaction *gxacts)
+{
+	GlobalTransaction array;
+	int		num;
+	int		i;
+
+	LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
+
+	if (TwoPhaseState->numPrepXacts == 0)
+	{
+		LWLockRelease(TwoPhaseStateLock);
+
+		*gxacts = NULL;
+		return 0;
+	}
+
+	num = TwoPhaseState->numPrepXacts;
+	array = (GlobalTransaction) palloc(sizeof(GlobalTransactionData) * num);
+	*gxacts = array;
+	for (i = 0; i < num; i++)
+		memcpy(array + i, TwoPhaseState->prepXacts[i],
+			   sizeof(GlobalTransactionData));
+
+	LWLockRelease(TwoPhaseStateLock);
+
+	return num;
+}
+
+
+/* Working status for pg_prepared_xact */
+typedef struct
+{
+	GlobalTransaction array;
+	int		ngxacts;
+	int		currIdx;
+} Working_State;
+
+/*
+ * pg_prepared_xact
+ * 		Produce a view with one row per prepared transaction.
+ *
+ * This function is here so we don't have to export the
+ * GlobalTransactionData struct definition.
+ */
+Datum
+pg_prepared_xact(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	Working_State *status;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		TupleDesc	tupdesc;
+		MemoryContext oldcontext;
+
+		/* create a function context for cross-call persistence */
+		funcctx = SRF_FIRSTCALL_INIT();
+
+		/*
+		 * Switch to memory context appropriate for multiple function
+		 * calls
+		 */
+		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+		/* build tupdesc for result tuples */
+		/* this had better match pg_prepared_xacts view in system_views.sql */
+		tupdesc = CreateTemplateTupleDesc(4, false);
+		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "transaction",
+						   XIDOID, -1, 0);
+		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "gid",
+						   TEXTOID, -1, 0);
+		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "ownerid",
+						   INT4OID, -1, 0);
+		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "dbid",
+						   OIDOID, -1, 0);
+
+		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+		/*
+		 * Collect all the 2PC status information that we will format and
+		 * send out as a result set.
+		 */
+		status = (Working_State *) palloc(sizeof(Working_State));
+		funcctx->user_fctx = (void *) status;
+
+		status->ngxacts = GetPreparedTransactionList(&status->array);
+		status->currIdx = 0;
+
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+	status = (Working_State *) funcctx->user_fctx;
+
+	while (status->array != NULL && status->currIdx < status->ngxacts)
+	{
+		GlobalTransaction gxact = &status->array[status->currIdx++];
+		Datum		values[4];
+		bool		nulls[4];
+		HeapTuple	tuple;
+		Datum		result;
+
+		if (!gxact->valid)
+			continue;
+
+		/*
+		 * Form tuple with appropriate data.
+		 */
+		MemSet(values, 0, sizeof(values));
+		MemSet(nulls, 0, sizeof(nulls));
+
+		values[0] = TransactionIdGetDatum(gxact->proc.xid);
+		values[1] = DirectFunctionCall1(textin, CStringGetDatum(gxact->gid));
+		values[2] = Int32GetDatum(gxact->owner);
+		values[3] = ObjectIdGetDatum(gxact->proc.databaseId);
+
+		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+		result = HeapTupleGetDatum(tuple);
+		SRF_RETURN_NEXT(funcctx, result);
+	}
+
+	SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * TwoPhaseGetDummyProc
+ *		Get the PGPROC that represents a prepared transaction specified by XID
+ */
+PGPROC *
+TwoPhaseGetDummyProc(TransactionId xid)
+{
+	PGPROC	   *result = NULL;
+	int			i;
+
+	static TransactionId cached_xid = InvalidTransactionId;
+	static PGPROC *cached_proc = NULL;
+
+	/*
+	 * During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called
+	 * repeatedly for the same XID.  We can save work with a simple cache.
+	 */
+	if (xid == cached_xid)
+		return cached_proc;
+
+	LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
+
+	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
+	{
+		GlobalTransaction	gxact = TwoPhaseState->prepXacts[i];
+
+		if (gxact->proc.xid == xid)
+		{
+			result = &gxact->proc;
+			break;
+		}
+	}
+
+	LWLockRelease(TwoPhaseStateLock);
+
+	if (result == NULL)			/* should not happen */
+		elog(ERROR, "failed to find dummy PGPROC for xid %u", xid);
+
+	cached_xid = xid;
+	cached_proc = result;
+
+	return result;
+}
+
+/************************************************************************/
+/* State file support                                                   */
+/************************************************************************/
+
+#define TwoPhaseFilePath(path, xid) \
+	snprintf(path, MAXPGPATH, "%s/%s/%08X", DataDir, TWOPHASE_DIR, xid)
+
+/*
+ * 2PC state file format:
+ *
+ *  1. TwoPhaseFileHeader
+ *  2. TransactionId[] (subtransactions)
+ *	3. RelFileNode[] (files to be deleted at commit)
+ *	4. RelFileNode[] (files to be deleted at abort)
+ *  5. TwoPhaseRecordOnDisk
+ *  6. ...
+ *  7. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID)
+ *  8. CRC32
+ *
+ * Each segment except the final CRC32 is MAXALIGN'd.
+ */
+
+/*
+ * Header for a 2PC state file
+ */
+#define TWOPHASE_MAGIC	0x57F94530		/* format identifier */
+
+typedef struct TwoPhaseFileHeader
+{
+	uint32			magic;				/* format identifier */
+	uint32			total_len;			/* actual file length */
+	TransactionId	xid;				/* original transaction XID */
+	Oid				database;			/* OID of database it was in */
+	AclId			owner;				/* user running the transaction */
+	int32			nsubxacts;			/* number of following subxact XIDs */
+	int32			ncommitrels;		/* number of delete-on-commit rels */
+	int32			nabortrels;			/* number of delete-on-abort rels */
+	char			gid[GIDSIZE];		/* GID for transaction */
+} TwoPhaseFileHeader;
+
+/*
+ * Header for each record in a state file
+ *
+ * NOTE: len counts only the rmgr data, not the TwoPhaseRecordOnDisk header.
+ * The rmgr data will be stored starting on a MAXALIGN boundary.
+ */
+typedef struct TwoPhaseRecordOnDisk
+{
+	uint32			len;		/* length of rmgr data */
+	TwoPhaseRmgrId	rmid;		/* resource manager for this record */
+	uint16			info;		/* flag bits for use by rmgr */
+} TwoPhaseRecordOnDisk;
+
+/*
+ * During prepare, the state file is assembled in memory before writing it
+ * to WAL and the actual state file.  We use a chain of XLogRecData blocks
+ * so that we will be able to pass the state file contents directly to
+ * XLogInsert.
+ */
+static struct xllist
+{
+	XLogRecData *head;			/* first data block in the chain */
+	XLogRecData *tail;			/* last block in chain */
+	uint32 bytes_free;			/* free bytes left in tail block */
+	uint32 total_len;			/* total data bytes in chain */
+} records;
+
+
+/*
+ * Append a block of data to records data structure.
+ *
+ * NB: each block is padded to a MAXALIGN multiple.  This must be
+ * accounted for when the file is later read!
+ *
+ * The data is copied, so the caller is free to modify it afterwards.
+ */
+static void
+save_state_data(const void *data, uint32 len)
+{
+	uint32	padlen = MAXALIGN(len);
+
+	if (padlen > records.bytes_free)
+	{
+		records.tail->next = palloc0(sizeof(XLogRecData));
+		records.tail = records.tail->next;
+		records.tail->buffer = InvalidBuffer;
+		records.tail->len = 0;
+		records.tail->next = NULL;
+
+		records.bytes_free = Max(padlen, 512);
+		records.tail->data = palloc(records.bytes_free);
+	}
+
+	memcpy(((char *) records.tail->data) + records.tail->len, data, len);
+	records.tail->len += padlen;
+	records.bytes_free -= padlen;
+	records.total_len += padlen;
+}
+
+/*
+ * Start preparing a state file.
+ *
+ * Initializes data structure and inserts the 2PC file header record.
+ */
+void
+StartPrepare(GlobalTransaction gxact)
+{
+	TransactionId	xid = gxact->proc.xid;
+	TwoPhaseFileHeader hdr;
+	TransactionId *children;
+	RelFileNode *commitrels;
+	RelFileNode *abortrels;
+
+	/* Initialize linked list */
+	records.head = palloc0(sizeof(XLogRecData));
+	records.head->buffer = InvalidBuffer;
+	records.head->len = 0;
+	records.head->next = NULL;
+
+	records.bytes_free = Max(sizeof(TwoPhaseFileHeader), 512);
+	records.head->data = palloc(records.bytes_free);
+
+	records.tail = records.head;
+
+	records.total_len = 0;
+
+	/* Create header */
+	hdr.magic = TWOPHASE_MAGIC;
+	hdr.total_len = 0;			/* EndPrepare will fill this in */
+	hdr.xid = xid;
+	hdr.database = MyDatabaseId;
+	hdr.owner = GetUserId();
+	hdr.nsubxacts = xactGetCommittedChildren(&children);
+	hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels);
+	hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels);
+	StrNCpy(hdr.gid, gxact->gid, GIDSIZE);
+
+	save_state_data(&hdr, sizeof(TwoPhaseFileHeader));
+
+	/* Add the additional info about subxacts and deletable files */
+	if (hdr.nsubxacts > 0)
+	{
+		save_state_data(children, hdr.nsubxacts * sizeof(TransactionId));
+		/* While we have the child-xact data, stuff it in the gxact too */
+		GXactLoadSubxactData(gxact, hdr.nsubxacts, children);
+		pfree(children);
+	}
+	if (hdr.ncommitrels > 0)
+	{
+		save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileNode));
+		pfree(commitrels);
+	}
+	if (hdr.nabortrels > 0)
+	{
+		save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileNode));
+		pfree(abortrels);
+	}
+}
+
+/*
+ * Finish preparing state file.
+ *
+ * Calculates CRC and writes state file to WAL and in pg_twophase directory.
+ */
+void
+EndPrepare(GlobalTransaction gxact)
+{
+	TransactionId	xid = gxact->proc.xid;
+	TwoPhaseFileHeader *hdr;
+	char			path[MAXPGPATH];
+	XLogRecData	   *record;
+	XLogRecPtr		recptr;
+	pg_crc32		statefile_crc;
+	pg_crc32		bogus_crc;
+	int				fd;
+
+	/* Add the end sentinel to the list of 2PC records */
+	RegisterTwoPhaseRecord(TWOPHASE_RM_END_ID, 0,
+						   NULL, 0);
+
+	/* Go back and fill in total_len in the file header record */
+	hdr = (TwoPhaseFileHeader *) records.head->data;
+	Assert(hdr->magic == TWOPHASE_MAGIC);
+	hdr->total_len = records.total_len + sizeof(pg_crc32);
+
+	/*
+	 * Create the 2PC state file.
+	 *
+	 * Note: because we use BasicOpenFile(), we are responsible for ensuring
+	 * the FD gets closed in any error exit path.  Once we get into the
+	 * critical section, though, it doesn't matter since any failure causes
+	 * PANIC anyway.
+	 */
+	TwoPhaseFilePath(path, xid);
+
+	fd = BasicOpenFile(path,
+					   O_CREAT | O_EXCL | O_WRONLY | PG_BINARY,
+					   S_IRUSR | S_IWUSR);
+	if (fd < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not create twophase state file \"%s\": %m",
+						path)));
+
+	/* Write data to file, and calculate CRC as we pass over it */
+	INIT_CRC32(statefile_crc);
+
+	for (record = records.head; record != NULL; record = record->next)
+	{
+		COMP_CRC32(statefile_crc, record->data, record->len);
+		if ((write(fd, record->data, record->len)) != record->len)
+		{
+			close(fd);
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not write twophase state file: %m")));
+		}
+	}
+
+	FIN_CRC32(statefile_crc);
+
+	/*
+	 * Write a deliberately bogus CRC to the state file, and flush it to disk.
+	 * This is to minimize the odds of failure within the critical section
+	 * below --- in particular, running out of disk space.
+	 *
+	 * On most filesystems, write() rather than fsync() detects out-of-space,
+	 * so the fsync might be considered optional.  Using it means there
+	 * are three fsyncs not two associated with preparing a transaction; is
+	 * the risk of an error from fsync high enough to justify that?
+	 */
+	bogus_crc = ~ statefile_crc;
+
+	if ((write(fd, &bogus_crc, sizeof(pg_crc32))) != sizeof(pg_crc32))
+	{
+		close(fd);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write twophase state file: %m")));
+	}
+
+	if (pg_fsync(fd) != 0)
+	{
+		close(fd);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync twophase state file: %m")));
+	}
+
+	/* Back up to prepare for rewriting the CRC */
+	if (lseek(fd, -((off_t) sizeof(pg_crc32)), SEEK_CUR) < 0)
+	{
+		close(fd);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not seek twophase state file: %m")));
+	}
+
+	/*
+	 * The state file isn't valid yet, because we haven't written the correct
+	 * CRC yet.  Before we do that, insert entry in WAL and flush it to disk.
+	 *
+	 * Between the time we have written the WAL entry and the time we
+	 * flush the correct state file CRC to disk, we have an inconsistency:
+	 * the xact is prepared according to WAL but not according to our on-disk
+	 * state.  We use a critical section to force a PANIC if we are unable to
+	 * complete the flush --- then, WAL replay should repair the
+	 * inconsistency.
+	 *
+	 * We have to lock out checkpoint start here, too; otherwise a checkpoint
+	 * starting immediately after the WAL record is inserted could complete
+	 * before we've finished flushing, meaning that the WAL record would not
+	 * get replayed if a crash follows.
+	 */
+	START_CRIT_SECTION();
+
+	LWLockAcquire(CheckpointStartLock, LW_SHARED);
+
+	recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE, records.head);
+	XLogFlush(recptr);
+
+	/* If we crash now, we have prepared: WAL replay will fix things */
+
+	/* write correct CRC, flush, and close file */
+	if ((write(fd, &statefile_crc, sizeof(pg_crc32))) != sizeof(pg_crc32))
+	{
+		close(fd);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write twophase state file: %m")));
+	}
+
+	if (pg_fsync(fd) != 0)
+	{
+		close(fd);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync twophase state file: %m")));
+	}
+
+	if (close(fd) != 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not close twophase state file: %m")));
+
+	LWLockRelease(CheckpointStartLock);
+
+	END_CRIT_SECTION();
+
+	records.tail = records.head = NULL;
+}
+
+/*
+ * Register a 2PC record to be written to state file.
+ */
+void
+RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
+					   const void *data, uint32 len)
+{
+	TwoPhaseRecordOnDisk record;
+
+	record.rmid = rmid;
+	record.info = info;
+	record.len = len;
+	save_state_data(&record, sizeof(TwoPhaseRecordOnDisk));
+	if (len > 0)
+		save_state_data(data, len);
+}
+
+
+/*
+ * Read and validate the state file for xid.
+ *
+ * If it looks OK (has a valid magic number and CRC), return the palloc'd
+ * contents of the file.  Otherwise return NULL.
+ */
+static char *
+ReadTwoPhaseFile(TransactionId xid)
+{
+	char		path[MAXPGPATH];
+	char	   *buf;
+	TwoPhaseFileHeader *hdr;
+	int			fd;
+	struct stat	stat;
+	uint32		crc_offset;
+	pg_crc32	calc_crc, file_crc;
+
+	TwoPhaseFilePath(path, xid);
+
+	fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
+	if (fd < 0)
+	{
+		ereport(WARNING,
+				(errcode_for_file_access(),
+				 errmsg("could not open twophase state file \"%s\": %m",
+						path)));
+		return NULL;
+	}
+
+	/*
+	 * Check file length.  We can determine a lower bound pretty easily.
+	 * We set an upper bound mainly to avoid palloc() failure on a corrupt
+	 * file.
+	 */
+	if (fstat(fd, &stat))
+	{
+		close(fd);
+		ereport(WARNING,
+				(errcode_for_file_access(),
+				 errmsg("could not stat twophase state file \"%s\": %m",
+						path)));
+		return NULL;
+	}
+
+	if (stat.st_size < (MAXALIGN(sizeof(TwoPhaseFileHeader)) +
+						MAXALIGN(sizeof(TwoPhaseRecordOnDisk)) +
+						sizeof(pg_crc32)) ||
+		stat.st_size > 10000000)
+	{
+		close(fd);
+		return NULL;
+	}
+
+	crc_offset = stat.st_size - sizeof(pg_crc32);
+	if (crc_offset != MAXALIGN(crc_offset))
+	{
+		close(fd);
+		return NULL;
+	}
+
+	/*
+	 * OK, slurp in the file.
+	 */
+	buf = (char *) palloc(stat.st_size);
+
+	if (read(fd, buf, stat.st_size) != stat.st_size)
+	{
+		close(fd);
+		ereport(WARNING,
+				(errcode_for_file_access(),
+				 errmsg("could not read twophase state file \"%s\": %m",
+						path)));
+		pfree(buf);
+		return NULL;
+	}
+
+	close(fd);
+
+	hdr = (TwoPhaseFileHeader *) buf;
+	if (hdr->magic != TWOPHASE_MAGIC || hdr->total_len != stat.st_size)
+	{
+		pfree(buf);
+		return NULL;
+	}
+
+	INIT_CRC32(calc_crc);
+	COMP_CRC32(calc_crc, buf, crc_offset);
+	FIN_CRC32(calc_crc);
+
+	file_crc = *((pg_crc32 *) (buf + crc_offset));
+
+	if (!EQ_CRC32(calc_crc, file_crc))
+	{
+		pfree(buf);
+		return NULL;
+	}
+
+	return buf;
+}
+
+
+/*
+ * FinishPreparedTransaction: execute COMMIT PREPARED or ROLLBACK PREPARED
+ */
+void
+FinishPreparedTransaction(char *gid, bool isCommit)
+{
+	GlobalTransaction gxact;
+	TransactionId xid;
+	char *buf;
+	char *bufptr;
+	TwoPhaseFileHeader *hdr;
+	TransactionId *children;
+	RelFileNode *commitrels;
+	RelFileNode *abortrels;
+	int		i;
+
+	/*
+	 * Validate the GID, and lock the GXACT to ensure that two backends
+	 * do not try to commit the same GID at once.
+	 */
+	gxact = LockGXact(gid, GetUserId());
+	xid = gxact->proc.xid;
+
+	/*
+	 * Read and validate the state file
+	 */
+	buf = ReadTwoPhaseFile(xid);
+	if (buf == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg("twophase state file for transaction %u is corrupt",
+						xid)));
+
+	/*
+	 * Disassemble the header area
+	 */
+	hdr = (TwoPhaseFileHeader *) buf;
+	Assert(TransactionIdEquals(hdr->xid, xid));
+	bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
+	children = (TransactionId *) bufptr;
+	bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
+	commitrels = (RelFileNode *) bufptr;
+	bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
+	abortrels = (RelFileNode *) bufptr;
+	bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
+
+	/*
+	 * The order of operations here is critical: make the XLOG entry for
+	 * commit or abort, then mark the transaction committed or aborted in
+	 * pg_clog, then remove its PGPROC from the global ProcArray (which
+	 * means TransactionIdIsInProgress will stop saying the prepared xact
+	 * is in progress), then run the post-commit or post-abort callbacks.
+	 * The callbacks will release the locks the transaction held.
+	 */
+	if (isCommit)
+		RecordTransactionCommitPrepared(xid,
+										hdr->nsubxacts, children,
+										hdr->ncommitrels, commitrels);
+	else
+		RecordTransactionAbortPrepared(xid,
+									   hdr->nsubxacts, children,
+									   hdr->nabortrels, abortrels);
+
+	ProcArrayRemove(&gxact->proc);
+
+	/*
+	 * In case we fail while running the callbacks, mark the gxact invalid
+	 * so no one else will try to commit/rollback, and so it can be recycled
+	 * properly later.  It is still locked by our XID so it won't go away yet.
+	 */
+	gxact->valid = false;
+
+	if (isCommit)
+		ProcessRecords(bufptr, xid, twophase_postcommit_callbacks);
+	else
+		ProcessRecords(bufptr, xid, twophase_postabort_callbacks);
+
+	/*
+	 * We also have to remove any files that were supposed to be dropped.
+	 * NB: this code knows that we couldn't be dropping any temp rels ...
+	 */
+	if (isCommit)
+	{
+		for (i = 0; i < hdr->ncommitrels; i++)
+			smgrdounlink(smgropen(commitrels[i]), false, false);
+	}
+	else
+	{
+		for (i = 0; i < hdr->nabortrels; i++)
+			smgrdounlink(smgropen(abortrels[i]), false, false);
+	}
+
+	pgstat_count_xact_commit();
+
+	/*
+	 * And now we can clean up our mess.
+	 */
+	RemoveTwoPhaseFile(xid, true);
+
+	RemoveGXact(gxact);
+
+	pfree(buf);
+}
+
+/*
+ * Scan a 2PC state file (already read into memory by ReadTwoPhaseFile)
+ * and call the indicated callbacks for each 2PC record.
+ */
+static void
+ProcessRecords(char *bufptr, TransactionId xid,
+			   const TwoPhaseCallback callbacks[])
+{
+	for (;;)
+	{
+		TwoPhaseRecordOnDisk *record = (TwoPhaseRecordOnDisk *) bufptr;
+
+		Assert(record->rmid <= TWOPHASE_RM_MAX_ID);
+		if (record->rmid == TWOPHASE_RM_END_ID)
+			break;
+
+		bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk));
+
+		if (callbacks[record->rmid] != NULL)
+			callbacks[record->rmid](xid, record->info,
+									(void *) bufptr, record->len);
+
+		bufptr += MAXALIGN(record->len);
+	}
+}
+
+/*
+ * Remove the 2PC file for the specified XID.
+ *
+ * If giveWarning is false, do not complain about file-not-present;
+ * this is an expected case during WAL replay.
+ */
+void
+RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
+{
+	char path[MAXPGPATH];
+
+	TwoPhaseFilePath(path, xid);
+	if (unlink(path))
+		if (errno != ENOENT || giveWarning)
+			ereport(WARNING,
+					(errcode_for_file_access(),
+					 errmsg("could not remove two-phase state file \"%s\": %m",
+							path)));
+}
+
+/*
+ * Recreates a state file. This is used in WAL replay.
+ *
+ * Note: content and len don't include CRC.
+ */
+void
+RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
+{
+	char		path[MAXPGPATH];
+	pg_crc32	statefile_crc;
+	int			fd;
+
+	/* Recompute CRC */
+	INIT_CRC32(statefile_crc);
+	COMP_CRC32(statefile_crc, content, len);
+	FIN_CRC32(statefile_crc);
+
+	TwoPhaseFilePath(path, xid);
+
+	fd = BasicOpenFile(path,
+					   O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY,
+					   S_IRUSR | S_IWUSR);
+	if (fd < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not recreate twophase state file \"%s\": %m",
+						path)));
+
+	/* Write content and CRC */
+	if (write(fd, content, len) != len)
+	{
+		close(fd);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write twophase state file: %m")));
+	}
+	if (write(fd, &statefile_crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
+	{
+		close(fd);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write twophase state file: %m")));
+	}
+
+	/* Sync and close the file */
+	if (pg_fsync(fd) != 0)
+	{
+		close(fd);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync twophase state file: %m")));
+	}
+
+	if (close(fd) != 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not close twophase state file: %m")));
+}
+
+/*
+ * PrescanPreparedTransactions
+ *
+ * Scan the pg_twophase directory and determine the range of valid XIDs
+ * present.  This is run during database startup, after we have completed
+ * reading WAL.  ShmemVariableCache->nextXid has been set to one more than
+ * the highest XID for which evidence exists in WAL.
+ *
+ * We throw away any prepared xacts with main XID beyond nextXid --- if any
+ * are present, it suggests that the DBA has done a PITR recovery to an
+ * earlier point in time without cleaning out pg_twophase.  We dare not
+ * try to recover such prepared xacts since they likely depend on database
+ * state that doesn't exist now.
+ *
+ * However, we will advance nextXid beyond any subxact XIDs belonging to
+ * valid prepared xacts.  We need to do this since subxact commit doesn't
+ * write a WAL entry, and so there might be no evidence in WAL of those
+ * subxact XIDs.
+ *
+ * Our other responsibility is to determine and return the oldest valid XID
+ * among the prepared xacts (if none, return ShmemVariableCache->nextXid).
+ * This is needed to synchronize pg_subtrans startup properly.
+ */
+TransactionId
+PrescanPreparedTransactions(void)
+{
+	TransactionId origNextXid = ShmemVariableCache->nextXid;
+	TransactionId result = origNextXid;
+	char	dir[MAXPGPATH];
+	DIR		*cldir;
+	struct dirent *clde;
+
+	snprintf(dir, MAXPGPATH, "%s/%s", DataDir, TWOPHASE_DIR);
+
+	cldir = AllocateDir(dir);
+	if (cldir == NULL)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not open directory \"%s\": %m", dir)));
+
+	errno = 0;
+	while ((clde = readdir(cldir)) != NULL)
+	{
+		if (strlen(clde->d_name) == 8 &&
+			strspn(clde->d_name, "0123456789ABCDEF") == 8)
+		{
+			TransactionId xid;
+			char *buf;
+			TwoPhaseFileHeader	*hdr;
+			TransactionId *subxids;
+			int i;
+
+			xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
+
+			/* Reject XID if too new */
+			if (TransactionIdFollowsOrEquals(xid, origNextXid))
+			{
+				ereport(WARNING,
+						(errmsg("removing future twophase state file \"%s\"",
+								clde->d_name)));
+				RemoveTwoPhaseFile(xid, true);
+				errno = 0;
+				continue;
+			}
+
+			/*
+			 * Note: we can't check if already processed because clog
+			 * subsystem isn't up yet.
+			 */
+
+			/* Read and validate file */
+			buf = ReadTwoPhaseFile(xid);
+			if (buf == NULL)
+			{
+				ereport(WARNING,
+						(errmsg("removing corrupt twophase state file \"%s\"",
+								clde->d_name)));
+				RemoveTwoPhaseFile(xid, true);
+				errno = 0;
+				continue;
+			}
+
+			/* Deconstruct header */
+			hdr = (TwoPhaseFileHeader *) buf;
+			if (!TransactionIdEquals(hdr->xid, xid))
+			{
+				ereport(WARNING,
+						(errmsg("removing corrupt twophase state file \"%s\"",
+								clde->d_name)));
+				RemoveTwoPhaseFile(xid, true);
+				pfree(buf);
+				errno = 0;
+				continue;
+			}
+
+			/*
+			 * OK, we think this file is valid.  Incorporate xid into the
+			 * running-minimum result.
+			 */
+			if (TransactionIdPrecedes(xid, result))
+				result = xid;
+
+			/*
+			 * Examine subtransaction XIDs ... they should all follow main
+			 * XID, and they may force us to advance nextXid.
+			 */
+			subxids = (TransactionId *)
+				(buf + MAXALIGN(sizeof(TwoPhaseFileHeader)));
+			for (i = 0; i < hdr->nsubxacts; i++)
+			{
+				TransactionId subxid = subxids[i];
+
+				Assert(TransactionIdFollows(subxid, xid));
+				if (TransactionIdFollowsOrEquals(subxid,
+												 ShmemVariableCache->nextXid))
+				{
+					ShmemVariableCache->nextXid = subxid;
+					TransactionIdAdvance(ShmemVariableCache->nextXid);
+				}
+			}
+
+			pfree(buf);
+		}
+		errno = 0;
+	}
+#ifdef WIN32
+
+	/*
+	 * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
+	 * not in released version
+	 */
+	if (GetLastError() == ERROR_NO_MORE_FILES)
+		errno = 0;
+#endif
+	if (errno)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not read directory \"%s\": %m", dir)));
+
+	FreeDir(cldir);
+
+	return result;
+}
+
+/*
+ * RecoverPreparedTransactions
+ *
+ * Scan the pg_twophase directory and reload shared-memory state for each
+ * prepared transaction (reacquire locks, etc).  This is run during database
+ * startup.
+ */
+void
+RecoverPreparedTransactions(void)
+{
+	char	dir[MAXPGPATH];
+	DIR		*cldir;
+	struct dirent *clde;
+
+	snprintf(dir, MAXPGPATH, "%s/%s", DataDir, TWOPHASE_DIR);
+
+	cldir = AllocateDir(dir);
+	if (cldir == NULL)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not open directory \"%s\": %m", dir)));
+
+	errno = 0;
+	while ((clde = readdir(cldir)) != NULL)
+	{
+		if (strlen(clde->d_name) == 8 &&
+			strspn(clde->d_name, "0123456789ABCDEF") == 8)
+		{
+			TransactionId xid;
+			char *buf;
+			char *bufptr;
+			TwoPhaseFileHeader	*hdr;
+			TransactionId *subxids;
+			GlobalTransaction	gxact;
+			int i;
+
+			xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
+
+			/* Already processed? */
+			if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
+			{
+				ereport(WARNING,
+						(errmsg("removing stale twophase state file \"%s\"",
+								clde->d_name)));
+				RemoveTwoPhaseFile(xid, true);
+				errno = 0;
+				continue;
+			}
+
+			/* Read and validate file */
+			buf = ReadTwoPhaseFile(xid);
+			if (buf == NULL)
+			{
+				ereport(WARNING,
+						(errmsg("removing corrupt twophase state file \"%s\"",
+								clde->d_name)));
+				RemoveTwoPhaseFile(xid, true);
+				errno = 0;
+				continue;
+			}
+
+			ereport(LOG,
+					(errmsg("recovering prepared transaction %u", xid)));
+
+			/* Deconstruct header */
+			hdr = (TwoPhaseFileHeader *) buf;
+			Assert(TransactionIdEquals(hdr->xid, xid));
+			bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
+			subxids = (TransactionId *) bufptr;
+			bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
+			bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
+			bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
+
+			/*
+			 * Reconstruct subtrans state for the transaction --- needed
+			 * because pg_subtrans is not preserved over a restart
+			 */
+			for (i = 0; i < hdr->nsubxacts; i++)
+				SubTransSetParent(subxids[i], xid);
+
+			/*
+			 * Recreate its GXACT and dummy PGPROC
+			 */
+			gxact = MarkAsPreparing(xid, hdr->database, hdr->gid, hdr->owner);
+			GXactLoadSubxactData(gxact, hdr->nsubxacts, subxids);
+			MarkAsPrepared(gxact);
+
+			/*
+			 * Recover other state (notably locks) using resource managers
+			 */
+			ProcessRecords(bufptr, xid, twophase_recover_callbacks);
+
+			pfree(buf);
+		}
+		errno = 0;
+	}
+#ifdef WIN32
+
+	/*
+	 * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
+	 * not in released version
+	 */
+	if (GetLastError() == ERROR_NO_MORE_FILES)
+		errno = 0;
+#endif
+	if (errno)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not read directory \"%s\": %m", dir)));
+
+	FreeDir(cldir);
+}
+
+/*
+ *	RecordTransactionCommitPrepared
+ *
+ * This is basically the same as RecordTransactionCommit: in particular,
+ * we must take the CheckpointStartLock to avoid a race condition.
+ *
+ * We know the transaction made at least one XLOG entry (its PREPARE),
+ * so it is never possible to optimize out the commit record.
+ */
+static void
+RecordTransactionCommitPrepared(TransactionId xid,
+								int nchildren,
+								TransactionId *children,
+								int nrels,
+								RelFileNode *rels)
+{
+	XLogRecData rdata[3];
+	int			lastrdata = 0;
+	xl_xact_commit_prepared xlrec;
+	XLogRecPtr	recptr;
+
+	START_CRIT_SECTION();
+
+	/* See notes in RecordTransactionCommit */
+	LWLockAcquire(CheckpointStartLock, LW_SHARED);
+
+	/* Emit the XLOG commit record */
+	xlrec.xid = xid;
+	xlrec.crec.xtime = time(NULL);
+	xlrec.crec.nrels = nrels;
+	xlrec.crec.nsubxacts = nchildren;
+	rdata[0].data = (char *) (&xlrec);
+	rdata[0].len = MinSizeOfXactCommitPrepared;
+	rdata[0].buffer = InvalidBuffer;
+	/* dump rels to delete */
+	if (nrels > 0)
+	{
+		rdata[0].next = &(rdata[1]);
+		rdata[1].data = (char *) rels;
+		rdata[1].len = nrels * sizeof(RelFileNode);
+		rdata[1].buffer = InvalidBuffer;
+		lastrdata = 1;
+	}
+	/* dump committed child Xids */
+	if (nchildren > 0)
+	{
+		rdata[lastrdata].next = &(rdata[2]);
+		rdata[2].data = (char *) children;
+		rdata[2].len = nchildren * sizeof(TransactionId);
+		rdata[2].buffer = InvalidBuffer;
+		lastrdata = 2;
+	}
+	rdata[lastrdata].next = NULL;
+
+	recptr = XLogInsert(RM_XACT_ID,
+						XLOG_XACT_COMMIT_PREPARED | XLOG_NO_TRAN,
+						rdata);
+
+	/* we don't currently try to sleep before flush here ... */
+
+	/* Flush XLOG to disk */
+	XLogFlush(recptr);
+
+	/* Mark the transaction committed in pg_clog */
+	TransactionIdCommit(xid);
+	/* to avoid race conditions, the parent must commit first */
+	TransactionIdCommitTree(nchildren, children);
+
+	/* Checkpoint is allowed again */
+	LWLockRelease(CheckpointStartLock);
+
+	END_CRIT_SECTION();
+}
+
+/*
+ *	RecordTransactionAbortPrepared
+ *
+ * This is basically the same as RecordTransactionAbort.
+ *
+ * We know the transaction made at least one XLOG entry (its PREPARE),
+ * so it is never possible to optimize out the abort record.
+ */
+static void
+RecordTransactionAbortPrepared(TransactionId xid,
+							   int nchildren,
+							   TransactionId *children,
+							   int nrels,
+							   RelFileNode *rels)
+{
+	XLogRecData rdata[3];
+	int			lastrdata = 0;
+	xl_xact_abort_prepared xlrec;
+	XLogRecPtr	recptr;
+
+	/*
+	 * Catch the scenario where we aborted partway through
+	 * RecordTransactionCommitPrepared ...
+	 */
+	if (TransactionIdDidCommit(xid))
+		elog(PANIC, "cannot abort transaction %u, it was already committed",
+			 xid);
+
+	START_CRIT_SECTION();
+
+	/* Emit the XLOG abort record */
+	xlrec.xid = xid;
+	xlrec.arec.xtime = time(NULL);
+	xlrec.arec.nrels = nrels;
+	xlrec.arec.nsubxacts = nchildren;
+	rdata[0].data = (char *) (&xlrec);
+	rdata[0].len = MinSizeOfXactAbortPrepared;
+	rdata[0].buffer = InvalidBuffer;
+	/* dump rels to delete */
+	if (nrels > 0)
+	{
+		rdata[0].next = &(rdata[1]);
+		rdata[1].data = (char *) rels;
+		rdata[1].len = nrels * sizeof(RelFileNode);
+		rdata[1].buffer = InvalidBuffer;
+		lastrdata = 1;
+	}
+	/* dump committed child Xids */
+	if (nchildren > 0)
+	{
+		rdata[lastrdata].next = &(rdata[2]);
+		rdata[2].data = (char *) children;
+		rdata[2].len = nchildren * sizeof(TransactionId);
+		rdata[2].buffer = InvalidBuffer;
+		lastrdata = 2;
+	}
+	rdata[lastrdata].next = NULL;
+
+	recptr = XLogInsert(RM_XACT_ID,
+						XLOG_XACT_ABORT_PREPARED | XLOG_NO_TRAN,
+						rdata);
+
+	/* Always flush, since we're about to remove the 2PC state file */
+	XLogFlush(recptr);
+
+	/*
+	 * Mark the transaction aborted in clog.  This is not absolutely
+	 * necessary but we may as well do it while we are here.
+	 */
+	TransactionIdAbort(xid);
+	TransactionIdAbortTree(nchildren, children);
+
+	END_CRIT_SECTION();
+}
+
diff --git a/src/backend/access/transam/twophase_rmgr.c b/src/backend/access/transam/twophase_rmgr.c
new file mode 100644
index 0000000000000000000000000000000000000000..e78f8b2fbb386e6e6dbb77afce5adb01494a65ca
--- /dev/null
+++ b/src/backend/access/transam/twophase_rmgr.c
@@ -0,0 +1,49 @@
+/*-------------------------------------------------------------------------
+ *
+ * twophase_rmgr.c
+ *	  Two-phase-commit resource managers tables
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/access/transam/twophase_rmgr.c,v 1.1 2005/06/17 22:32:42 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/twophase_rmgr.h"
+#include "commands/async.h"
+#include "storage/lock.h"
+#include "utils/flatfiles.h"
+#include "utils/inval.h"
+
+
+const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID + 1] = 
+{
+	NULL,							/* END ID */
+	lock_twophase_recover,			/* Lock */
+	NULL,							/* Inval */
+	NULL,							/* flat file update */
+	NULL							/* notify/listen */
+};
+
+const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] = 
+{
+	NULL,							/* END ID */
+	lock_twophase_postcommit,		/* Lock */
+	inval_twophase_postcommit,		/* Inval */
+	flatfile_twophase_postcommit,	/* flat file update */
+	notify_twophase_postcommit		/* notify/listen */
+};
+
+const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] = 
+{
+	NULL,							/* END ID */
+	lock_twophase_postabort,		/* Lock */
+	NULL,							/* Inval */
+	NULL,							/* flat file update */
+	NULL							/* notify/listen */
+};
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 040a4ab0b79068faa24e961237e30a436560f5b0..74163b7f576f4bda6f2589a9305c89b6a267efa5 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.204 2005/06/06 20:22:57 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.205 2005/06/17 22:32:42 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,6 +22,7 @@
 
 #include "access/multixact.h"
 #include "access/subtrans.h"
+#include "access/twophase.h"
 #include "access/xact.h"
 #include "catalog/heap.h"
 #include "catalog/index.h"
@@ -68,7 +69,8 @@ typedef enum TransState
 	TRANS_START,
 	TRANS_INPROGRESS,
 	TRANS_COMMIT,
-	TRANS_ABORT
+	TRANS_ABORT,
+	TRANS_PREPARE
 } TransState;
 
 /*
@@ -90,6 +92,7 @@ typedef enum TBlockState
 	TBLOCK_ABORT,				/* failed xact, awaiting ROLLBACK */
 	TBLOCK_ABORT_END,			/* failed xact, ROLLBACK received */
 	TBLOCK_ABORT_PENDING,		/* live xact, ROLLBACK received */
+	TBLOCK_PREPARE,				/* live xact, PREPARE received */
 
 	/* subtransaction states */
 	TBLOCK_SUBBEGIN,			/* starting a subtransaction */
@@ -172,6 +175,12 @@ static CommandId currentCommandId;
 static AbsoluteTime xactStartTime;		/* integer part */
 static int	xactStartTimeUsec;	/* microsecond part */
 
+/*
+ * GID to be used for preparing the current transaction.  This is also
+ * global to a whole transaction, so we don't keep it in the state stack.
+ */
+static char *prepareGID;
+
 
 /*
  * List of add-on start- and end-of-xact callbacks
@@ -267,10 +276,12 @@ IsTransactionState(void)
 			return true;
 		case TRANS_ABORT:
 			return true;
+		case TRANS_PREPARE:
+			return true;
 	}
 
 	/*
-	 * Shouldn't get here, but lint is not happy with this...
+	 * Shouldn't get here, but lint is not happy without this...
 	 */
 	return false;
 }
@@ -660,12 +671,12 @@ void
 RecordTransactionCommit(void)
 {
 	int			nrels;
-	RelFileNode *rptr;
+	RelFileNode *rels;
 	int			nchildren;
 	TransactionId *children;
 
 	/* Get data needed for commit record */
-	nrels = smgrGetPendingDeletes(true, &rptr);
+	nrels = smgrGetPendingDeletes(true, &rels);
 	nchildren = xactGetCommittedChildren(&children);
 
 	/*
@@ -726,7 +737,7 @@ RecordTransactionCommit(void)
 			if (nrels > 0)
 			{
 				rdata[0].next = &(rdata[1]);
-				rdata[1].data = (char *) rptr;
+				rdata[1].data = (char *) rels;
 				rdata[1].len = nrels * sizeof(RelFileNode);
 				rdata[1].buffer = InvalidBuffer;
 				lastrdata = 1;
@@ -809,12 +820,9 @@ RecordTransactionCommit(void)
 	MyXactMadeXLogEntry = false;
 	MyXactMadeTempRelUpdate = false;
 
-	/* Show myself as out of the transaction in PGPROC array */
-	MyProc->logRec.xrecoff = 0;
-
 	/* And clean up local data */
-	if (rptr)
-		pfree(rptr);
+	if (rels)
+		pfree(rels);
 	if (children)
 		pfree(children);
 }
@@ -970,12 +978,12 @@ static void
 RecordTransactionAbort(void)
 {
 	int			nrels;
-	RelFileNode *rptr;
+	RelFileNode *rels;
 	int			nchildren;
 	TransactionId *children;
 
 	/* Get data needed for abort record */
-	nrels = smgrGetPendingDeletes(false, &rptr);
+	nrels = smgrGetPendingDeletes(false, &rels);
 	nchildren = xactGetCommittedChildren(&children);
 
 	/*
@@ -1026,7 +1034,7 @@ RecordTransactionAbort(void)
 			if (nrels > 0)
 			{
 				rdata[0].next = &(rdata[1]);
-				rdata[1].data = (char *) rptr;
+				rdata[1].data = (char *) rels;
 				rdata[1].len = nrels * sizeof(RelFileNode);
 				rdata[1].buffer = InvalidBuffer;
 				lastrdata = 1;
@@ -1069,12 +1077,9 @@ RecordTransactionAbort(void)
 	MyXactMadeXLogEntry = false;
 	MyXactMadeTempRelUpdate = false;
 
-	/* Show myself as out of the transaction in PGPROC array */
-	MyProc->logRec.xrecoff = 0;
-
 	/* And clean up local data */
-	if (rptr)
-		pfree(rptr);
+	if (rels)
+		pfree(rels);
 	if (children)
 		pfree(children);
 }
@@ -1166,13 +1171,13 @@ static void
 RecordSubTransactionAbort(void)
 {
 	int			nrels;
-	RelFileNode *rptr;
+	RelFileNode *rels;
 	TransactionId xid = GetCurrentTransactionId();
 	int			nchildren;
 	TransactionId *children;
 
 	/* Get data needed for abort record */
-	nrels = smgrGetPendingDeletes(false, &rptr);
+	nrels = smgrGetPendingDeletes(false, &rels);
 	nchildren = xactGetCommittedChildren(&children);
 
 	/*
@@ -1212,7 +1217,7 @@ RecordSubTransactionAbort(void)
 			if (nrels > 0)
 			{
 				rdata[0].next = &(rdata[1]);
-				rdata[1].data = (char *) rptr;
+				rdata[1].data = (char *) rels;
 				rdata[1].len = nrels * sizeof(RelFileNode);
 				rdata[1].buffer = InvalidBuffer;
 				lastrdata = 1;
@@ -1256,8 +1261,8 @@ RecordSubTransactionAbort(void)
 	XidCacheRemoveRunningXids(xid, nchildren, children);
 
 	/* And clean up local data */
-	if (rptr)
-		pfree(rptr);
+	if (rels)
+		pfree(rels);
 	if (children)
 		pfree(children);
 }
@@ -1419,8 +1424,11 @@ StartTransaction(void)
 	ShowTransactionState("StartTransaction");
 }
 
+
 /*
  *	CommitTransaction
+ *
+ * NB: if you change this routine, better look at PrepareTransaction too!
  */
 static void
 CommitTransaction(void)
@@ -1510,6 +1518,8 @@ CommitTransaction(void)
 	 * xid 0 as running as well, or it will be able to see two tuple versions
 	 * - one deleted by xid 1 and one inserted by xid 0.  See notes in
 	 * GetSnapshotData.
+	 *
+	 * Note: MyProc may be null during bootstrap.
 	 *----------
 	 */
 	if (MyProc != NULL)
@@ -1608,6 +1618,225 @@ CommitTransaction(void)
 	RESUME_INTERRUPTS();
 }
 
+
+/*
+ *	PrepareTransaction
+ *
+ * NB: if you change this routine, better look at CommitTransaction too!
+ */
+static void
+PrepareTransaction(void)
+{
+	TransactionState	s = CurrentTransactionState;
+	TransactionId		xid = GetCurrentTransactionId();
+	GlobalTransaction	gxact;
+
+	ShowTransactionState("PrepareTransaction");
+
+	/*
+	 * check the current transaction state
+	 */
+	if (s->state != TRANS_INPROGRESS)
+		elog(WARNING, "PrepareTransaction while in %s state",
+			 TransStateAsString(s->state));
+	Assert(s->parent == NULL);
+
+	/*
+	 * Do pre-commit processing (most of this stuff requires database
+	 * access, and in fact could still cause an error...)
+	 *
+	 * It is possible for PrepareHoldablePortals to invoke functions that
+	 * queue deferred triggers, and it's also possible that triggers create
+	 * holdable cursors.  So we have to loop until there's nothing left to
+	 * do.
+	 */
+	for (;;)
+	{
+		/*
+		 * Fire all currently pending deferred triggers.
+		 */
+		AfterTriggerFireDeferred();
+
+		/*
+		 * Convert any open holdable cursors into static portals.  If there
+		 * weren't any, we are done ... otherwise loop back to check if they
+		 * queued deferred triggers.  Lather, rinse, repeat.
+		 */
+		if (!PrepareHoldablePortals())
+			break;
+	}
+
+	/* Now we can shut down the deferred-trigger manager */
+	AfterTriggerEndXact(true);
+
+	/* Close any open regular cursors */
+	AtCommit_Portals();
+
+	/*
+	 * Let ON COMMIT management do its thing (must happen after closing
+	 * cursors, to avoid dangling-reference problems)
+	 */
+	PreCommit_on_commit_actions();
+
+	/* close large objects before lower-level cleanup */
+	AtEOXact_LargeObject(true);
+
+	/* NOTIFY and flatfiles will be handled below */
+
+	/* Prevent cancel/die interrupt while cleaning up */
+	HOLD_INTERRUPTS();
+
+	/*
+	 * set the current transaction state information appropriately during
+	 * the processing
+	 */
+	s->state = TRANS_PREPARE;
+
+	/* Tell bufmgr and smgr to prepare for commit */
+	BufmgrCommit();
+
+	/*
+	 * Reserve the GID for this transaction. This could fail if the
+	 * requested GID is invalid or already in use.
+	 */
+	gxact = MarkAsPreparing(xid, MyDatabaseId, prepareGID, GetUserId());
+	prepareGID = NULL;
+
+	/*
+	 * Collect data for the 2PC state file.  Note that in general, no actual
+	 * state change should happen in the called modules during this step,
+	 * since it's still possible to fail before commit, and in that case we
+	 * want transaction abort to be able to clean up.  (In particular, the
+	 * AtPrepare routines may error out if they find cases they cannot
+	 * handle.)  State cleanup should happen in the PostPrepare routines
+	 * below.  However, some modules can go ahead and clear state here
+	 * because they wouldn't do anything with it during abort anyway.
+	 *
+	 * Note: because the 2PC state file records will be replayed in the same
+	 * order they are made, the order of these calls has to match the order
+	 * in which we want things to happen during COMMIT PREPARED or
+	 * ROLLBACK PREPARED; in particular, pay attention to whether things
+	 * should happen before or after releasing the transaction's locks.
+	 */
+	StartPrepare(gxact);
+
+	AtPrepare_Notify();
+	AtPrepare_UpdateFlatFiles();
+	AtPrepare_Inval();
+	AtPrepare_Locks();
+
+	/*
+	 * Here is where we really truly prepare.
+	 *
+	 * We have to record transaction prepares even if we didn't
+	 * make any updates, because the transaction manager might
+	 * get confused if we lose a global transaction.
+	 */
+	EndPrepare(gxact);
+
+	/*
+	 * Mark the prepared transaction as valid.  As soon as we mark ourselves
+	 * not running in MyProc below, others can commit/rollback the xact.
+	 *
+	 * NB: a side effect of this is to make a dummy ProcArray entry for the
+	 * prepared XID.  This must happen before we clear the XID from MyProc,
+	 * else there is a window where the XID is not running according to
+	 * TransactionIdInProgress, and onlookers would be entitled to assume
+	 * the xact crashed.  Instead we have a window where the same XID
+	 * appears twice in ProcArray, which is OK.
+	 */
+	MarkAsPrepared(gxact);
+
+	/*
+	 * Now we clean up backend-internal state and release internal
+	 * resources.
+	 */
+
+	/* Break the chain of back-links in the XLOG records I output */
+	MyLastRecPtr.xrecoff = 0;
+	MyXactMadeXLogEntry = false;
+	MyXactMadeTempRelUpdate = false;
+
+	/*
+	 * Let others know about no transaction in progress by me.  This has
+	 * to be done *after* the prepared transaction has been marked valid,
+	 * else someone may think it is unlocked and recyclable.
+	 */
+
+	/* Lock ProcArrayLock because that's what GetSnapshotData uses. */
+	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	MyProc->xid = InvalidTransactionId;
+	MyProc->xmin = InvalidTransactionId;
+
+	/* Clear the subtransaction-XID cache too while holding the lock */
+	MyProc->subxids.nxids = 0;
+	MyProc->subxids.overflowed = false;
+
+	LWLockRelease(ProcArrayLock);
+
+	/*
+	 * This is all post-transaction cleanup.  Note that if an error is raised
+	 * here, it's too late to abort the transaction.  This should be just
+	 * noncritical resource releasing.  See notes in CommitTransaction.
+	 */
+
+	CallXactCallbacks(XACT_EVENT_PREPARE);
+
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_BEFORE_LOCKS,
+						 true, true);
+
+	/* Check we've released all buffer pins */
+	AtEOXact_Buffers(true);
+
+	/* notify and flatfiles don't need a postprepare call */
+
+	PostPrepare_Inval();
+
+	PostPrepare_smgr();
+
+	AtEOXact_MultiXact();
+
+	PostPrepare_Locks(xid);
+
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_LOCKS,
+						 true, true);
+	ResourceOwnerRelease(TopTransactionResourceOwner,
+						 RESOURCE_RELEASE_AFTER_LOCKS,
+						 true, true);
+
+	/* PREPARE acts the same as COMMIT as far as GUC is concerned */
+	AtEOXact_GUC(true, false);
+	AtEOXact_SPI(true);
+	AtEOXact_on_commit_actions(true);
+	AtEOXact_Namespace(true);
+	/* smgrcommit already done */
+	AtEOXact_Files();
+
+	CurrentResourceOwner = NULL;
+	ResourceOwnerDelete(TopTransactionResourceOwner);
+	s->curTransactionOwner = NULL;
+	CurTransactionResourceOwner = NULL;
+	TopTransactionResourceOwner = NULL;
+
+	AtCommit_Memory();
+
+	s->transactionId = InvalidTransactionId;
+	s->subTransactionId = InvalidSubTransactionId;
+	s->nestingLevel = 0;
+	s->childXids = NIL;
+
+	/*
+	 * done with 1st phase commit processing, set current transaction
+	 * state back to default
+	 */
+	s->state = TRANS_DEFAULT;
+
+	RESUME_INTERRUPTS();
+}
+
+
 /*
  *	AbortTransaction
  */
@@ -1640,7 +1869,7 @@ AbortTransaction(void)
 	/*
 	 * check the current transaction state
 	 */
-	if (s->state != TRANS_INPROGRESS)
+	if (s->state != TRANS_INPROGRESS && s->state != TRANS_PREPARE)
 		elog(WARNING, "AbortTransaction while in %s state",
 			 TransStateAsString(s->state));
 	Assert(s->parent == NULL);
@@ -1833,6 +2062,7 @@ StartTransactionCommand(void)
 		case TBLOCK_SUBABORT_PENDING:
 		case TBLOCK_SUBRESTART:
 		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
 			elog(ERROR, "StartTransactionCommand: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
@@ -1934,6 +2164,15 @@ CommitTransactionCommand(void)
 			s->blockState = TBLOCK_DEFAULT;
 			break;
 
+			/*
+			 * We are completing a "PREPARE TRANSACTION" command.  Do it and
+			 * return to the idle state.
+			 */
+		case TBLOCK_PREPARE:
+			PrepareTransaction();
+			s->blockState = TBLOCK_DEFAULT;
+			break;
+
 			/*
 			 * We were just issued a SAVEPOINT inside a transaction block.
 			 * Start a subtransaction.	(DefineSavepoint already did
@@ -1964,6 +2203,12 @@ CommitTransactionCommand(void)
 				CommitTransaction();
 				s->blockState = TBLOCK_DEFAULT;
 			}
+			else if (s->blockState == TBLOCK_PREPARE)
+			{
+				Assert(s->parent == NULL);
+				PrepareTransaction();
+				s->blockState = TBLOCK_DEFAULT;
+			}
 			else
 			{
 				Assert(s->blockState == TBLOCK_INPROGRESS ||
@@ -2155,6 +2400,17 @@ AbortCurrentTransaction(void)
 			s->blockState = TBLOCK_DEFAULT;
 			break;
 
+			/*
+			 * Here, we failed while trying to PREPARE.  Clean up the
+			 * transaction and return to idle state (we do not want to
+			 * stay in the transaction).
+			 */
+		case TBLOCK_PREPARE:
+			AbortTransaction();
+			CleanupTransaction();
+			s->blockState = TBLOCK_DEFAULT;
+			break;
+
 			/*
 			 * We got an error inside a subtransaction.  Abort just the
 			 * subtransaction, and go to the persistent SUBABORT state
@@ -2487,12 +2743,64 @@ BeginTransactionBlock(void)
 		case TBLOCK_SUBABORT_PENDING:
 		case TBLOCK_SUBRESTART:
 		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
 			elog(FATAL, "BeginTransactionBlock: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
 	}
 }
 
+/*
+ *	PrepareTransactionBlock
+ *		This executes a PREPARE command.
+ *
+ * Since PREPARE may actually do a ROLLBACK, the result indicates what
+ * happened: TRUE for PREPARE, FALSE for ROLLBACK.
+ *
+ * Note that we don't actually do anything here except change blockState.
+ * The real work will be done in the upcoming PrepareTransaction().
+ * We do it this way because it's not convenient to change memory context,
+ * resource owner, etc while executing inside a Portal.
+ */
+bool
+PrepareTransactionBlock(char *gid)
+{
+	TransactionState s;
+	bool result;
+
+	/* Set up to commit the current transaction */
+	result = EndTransactionBlock();
+
+	/* If successful, change outer tblock state to PREPARE */
+	if (result)
+	{
+		s = CurrentTransactionState;
+
+		while (s->parent != NULL)
+			s = s->parent;
+
+		if (s->blockState == TBLOCK_END)
+		{
+			/* Save GID where PrepareTransaction can find it again */
+			prepareGID = MemoryContextStrdup(TopTransactionContext, gid);
+
+			s->blockState = TBLOCK_PREPARE;
+		}
+		else
+		{
+			/*
+			 * ignore case where we are not in a transaction;
+			 * EndTransactionBlock already issued a warning.
+			 */
+			Assert(s->blockState == TBLOCK_STARTED);
+			/* Don't send back a PREPARE result tag... */
+			result = false;
+		}
+	}
+
+	return result;
+}
+
 /*
  *	EndTransactionBlock
  *		This executes a COMMIT command.
@@ -2603,6 +2911,7 @@ EndTransactionBlock(void)
 		case TBLOCK_SUBABORT_PENDING:
 		case TBLOCK_SUBRESTART:
 		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
 			elog(FATAL, "EndTransactionBlock: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
@@ -2694,6 +3003,7 @@ UserAbortTransactionBlock(void)
 		case TBLOCK_SUBABORT_PENDING:
 		case TBLOCK_SUBRESTART:
 		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
 			elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
@@ -2740,6 +3050,7 @@ DefineSavepoint(char *name)
 		case TBLOCK_SUBABORT_PENDING:
 		case TBLOCK_SUBRESTART:
 		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
 			elog(FATAL, "DefineSavepoint: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
@@ -2795,6 +3106,7 @@ ReleaseSavepoint(List *options)
 		case TBLOCK_SUBABORT_PENDING:
 		case TBLOCK_SUBRESTART:
 		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
 			elog(FATAL, "ReleaseSavepoint: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
@@ -2892,6 +3204,7 @@ RollbackToSavepoint(List *options)
 		case TBLOCK_SUBABORT_PENDING:
 		case TBLOCK_SUBRESTART:
 		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
 			elog(FATAL, "RollbackToSavepoint: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
@@ -2999,6 +3312,7 @@ BeginInternalSubTransaction(char *name)
 		case TBLOCK_SUBABORT_PENDING:
 		case TBLOCK_SUBRESTART:
 		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
 			elog(FATAL, "BeginInternalSubTransaction: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
@@ -3064,6 +3378,7 @@ RollbackAndReleaseCurrentSubTransaction(void)
 		case TBLOCK_SUBABORT_PENDING:
 		case TBLOCK_SUBRESTART:
 		case TBLOCK_SUBABORT_RESTART:
+		case TBLOCK_PREPARE:
 			elog(FATAL, "RollbackAndReleaseCurrentSubTransaction: unexpected state %s",
 				 BlockStateAsString(s->blockState));
 			break;
@@ -3111,6 +3426,7 @@ AbortOutOfAnyTransaction(void)
 			case TBLOCK_INPROGRESS:
 			case TBLOCK_END:
 			case TBLOCK_ABORT_PENDING:
+			case TBLOCK_PREPARE:
 				/* In a transaction, so clean up */
 				AbortTransaction();
 				CleanupTransaction();
@@ -3202,6 +3518,7 @@ TransactionBlockStatusCode(void)
 		case TBLOCK_SUBINPROGRESS:
 		case TBLOCK_END:
 		case TBLOCK_SUBEND:
+		case TBLOCK_PREPARE:
 			return 'T';			/* in transaction */
 		case TBLOCK_ABORT:
 		case TBLOCK_SUBABORT:
@@ -3684,6 +4001,8 @@ BlockStateAsString(TBlockState blockState)
 			return "ABORT END";
 		case TBLOCK_ABORT_PENDING:
 			return "ABORT PEND";
+		case TBLOCK_PREPARE:
+			return "PREPARE";
 		case TBLOCK_SUBBEGIN:
 			return "SUB BEGIN";
 		case TBLOCK_SUBINPROGRESS:
@@ -3717,12 +4036,14 @@ TransStateAsString(TransState state)
 			return "DEFAULT";
 		case TRANS_START:
 			return "START";
+		case TRANS_INPROGRESS:
+			return "INPROGR";
 		case TRANS_COMMIT:
 			return "COMMIT";
 		case TRANS_ABORT:
 			return "ABORT";
-		case TRANS_INPROGRESS:
-			return "INPROGR";
+		case TRANS_PREPARE:
+			return "PREPARE";
 	}
 	return "UNRECOGNIZED";
 }
@@ -3767,6 +4088,76 @@ xactGetCommittedChildren(TransactionId **ptr)
  *	XLOG support routines
  */
 
+static void
+xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid)
+{
+	TransactionId *sub_xids;
+	TransactionId max_xid;
+	int			i;
+
+	TransactionIdCommit(xid);
+
+	/* Mark committed subtransactions as committed */
+	sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
+	TransactionIdCommitTree(xlrec->nsubxacts, sub_xids);
+
+	/* Make sure nextXid is beyond any XID mentioned in the record */
+	max_xid = xid;
+	for (i = 0; i < xlrec->nsubxacts; i++)
+	{
+		if (TransactionIdPrecedes(max_xid, sub_xids[i]))
+			max_xid = sub_xids[i];
+	}
+	if (TransactionIdFollowsOrEquals(max_xid,
+									 ShmemVariableCache->nextXid))
+	{
+		ShmemVariableCache->nextXid = max_xid;
+		TransactionIdAdvance(ShmemVariableCache->nextXid);
+	}
+
+	/* Make sure files supposed to be dropped are dropped */
+	for (i = 0; i < xlrec->nrels; i++)
+	{
+		XLogCloseRelation(xlrec->xnodes[i]);
+		smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+	}
+}
+
+static void
+xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
+{
+	TransactionId *sub_xids;
+	TransactionId max_xid;
+	int			i;
+
+	TransactionIdAbort(xid);
+
+	/* Mark subtransactions as aborted */
+	sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
+	TransactionIdAbortTree(xlrec->nsubxacts, sub_xids);
+
+	/* Make sure nextXid is beyond any XID mentioned in the record */
+	max_xid = xid;
+	for (i = 0; i < xlrec->nsubxacts; i++)
+	{
+		if (TransactionIdPrecedes(max_xid, sub_xids[i]))
+			max_xid = sub_xids[i];
+	}
+	if (TransactionIdFollowsOrEquals(max_xid,
+									 ShmemVariableCache->nextXid))
+	{
+		ShmemVariableCache->nextXid = max_xid;
+		TransactionIdAdvance(ShmemVariableCache->nextXid);
+	}
+
+	/* Make sure files supposed to be dropped are dropped */
+	for (i = 0; i < xlrec->nrels; i++)
+	{
+		XLogCloseRelation(xlrec->xnodes[i]);
+		smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+	}
+}
+
 void
 xact_redo(XLogRecPtr lsn, XLogRecord *record)
 {
@@ -3775,138 +4166,137 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record)
 	if (info == XLOG_XACT_COMMIT)
 	{
 		xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
-		TransactionId *sub_xids;
-		TransactionId max_xid;
-		int			i;
 
-		TransactionIdCommit(record->xl_xid);
+		xact_redo_commit(xlrec, record->xl_xid);
+	}
+	else if (info == XLOG_XACT_ABORT)
+	{
+		xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
 
-		/* Mark committed subtransactions as committed */
-		sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
-		TransactionIdCommitTree(xlrec->nsubxacts, sub_xids);
+		xact_redo_abort(xlrec, record->xl_xid);
+	}
+	else if (info == XLOG_XACT_PREPARE)
+	{
+		/* the record contents are exactly the 2PC file */
+		RecreateTwoPhaseFile(record->xl_xid,
+							 XLogRecGetData(record), record->xl_len);
+	}
+	else if (info == XLOG_XACT_COMMIT_PREPARED)
+	{
+		xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record);
 
-		/* Make sure nextXid is beyond any XID mentioned in the record */
-		max_xid = record->xl_xid;
-		for (i = 0; i < xlrec->nsubxacts; i++)
-		{
-			if (TransactionIdPrecedes(max_xid, sub_xids[i]))
-				max_xid = sub_xids[i];
-		}
-		if (TransactionIdFollowsOrEquals(max_xid,
-										 ShmemVariableCache->nextXid))
-		{
-			ShmemVariableCache->nextXid = max_xid;
-			TransactionIdAdvance(ShmemVariableCache->nextXid);
-		}
+		xact_redo_commit(&xlrec->crec, xlrec->xid);
+		RemoveTwoPhaseFile(xlrec->xid, false);
+	}
+	else if (info == XLOG_XACT_ABORT_PREPARED)
+	{
+		xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) XLogRecGetData(record);
 
-		/* Make sure files supposed to be dropped are dropped */
+		xact_redo_abort(&xlrec->arec, xlrec->xid);
+		RemoveTwoPhaseFile(xlrec->xid, false);
+	}
+	else
+		elog(PANIC, "xact_redo: unknown op code %u", info);
+}
+
+static void
+xact_desc_commit(char *buf, xl_xact_commit *xlrec)
+{
+	struct tm  *tm = localtime(&xlrec->xtime);
+	int			i;
+
+	sprintf(buf + strlen(buf), "%04u-%02u-%02u %02u:%02u:%02u",
+			tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+			tm->tm_hour, tm->tm_min, tm->tm_sec);
+	if (xlrec->nrels > 0)
+	{
+		sprintf(buf + strlen(buf), "; rels:");
 		for (i = 0; i < xlrec->nrels; i++)
 		{
-			XLogCloseRelation(xlrec->xnodes[i]);
-			smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+			RelFileNode rnode = xlrec->xnodes[i];
+
+			sprintf(buf + strlen(buf), " %u/%u/%u",
+					rnode.spcNode, rnode.dbNode, rnode.relNode);
 		}
 	}
-	else if (info == XLOG_XACT_ABORT)
+	if (xlrec->nsubxacts > 0)
 	{
-		xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
-		TransactionId *sub_xids;
-		TransactionId max_xid;
-		int			i;
-
-		TransactionIdAbort(record->xl_xid);
-
-		/* Mark subtransactions as aborted */
-		sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
-		TransactionIdAbortTree(xlrec->nsubxacts, sub_xids);
+		TransactionId *xacts = (TransactionId *)
+			&xlrec->xnodes[xlrec->nrels];
 
-		/* Make sure nextXid is beyond any XID mentioned in the record */
-		max_xid = record->xl_xid;
+		sprintf(buf + strlen(buf), "; subxacts:");
 		for (i = 0; i < xlrec->nsubxacts; i++)
-		{
-			if (TransactionIdPrecedes(max_xid, sub_xids[i]))
-				max_xid = sub_xids[i];
-		}
-		if (TransactionIdFollowsOrEquals(max_xid,
-										 ShmemVariableCache->nextXid))
-		{
-			ShmemVariableCache->nextXid = max_xid;
-			TransactionIdAdvance(ShmemVariableCache->nextXid);
-		}
+			sprintf(buf + strlen(buf), " %u", xacts[i]);
+	}
+}
+
+static void
+xact_desc_abort(char *buf, xl_xact_abort *xlrec)
+{
+	struct tm  *tm = localtime(&xlrec->xtime);
+	int			i;
 
-		/* Make sure files supposed to be dropped are dropped */
+	sprintf(buf + strlen(buf), "%04u-%02u-%02u %02u:%02u:%02u",
+			tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+			tm->tm_hour, tm->tm_min, tm->tm_sec);
+	if (xlrec->nrels > 0)
+	{
+		sprintf(buf + strlen(buf), "; rels:");
 		for (i = 0; i < xlrec->nrels; i++)
 		{
-			XLogCloseRelation(xlrec->xnodes[i]);
-			smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+			RelFileNode rnode = xlrec->xnodes[i];
+
+			sprintf(buf + strlen(buf), " %u/%u/%u",
+					rnode.spcNode, rnode.dbNode, rnode.relNode);
 		}
 	}
-	else
-		elog(PANIC, "xact_redo: unknown op code %u", info);
+	if (xlrec->nsubxacts > 0)
+	{
+		TransactionId *xacts = (TransactionId *)
+			&xlrec->xnodes[xlrec->nrels];
+
+		sprintf(buf + strlen(buf), "; subxacts:");
+		for (i = 0; i < xlrec->nsubxacts; i++)
+			sprintf(buf + strlen(buf), " %u", xacts[i]);
+	}
 }
 
 void
 xact_desc(char *buf, uint8 xl_info, char *rec)
 {
 	uint8		info = xl_info & ~XLR_INFO_MASK;
-	int			i;
 
 	if (info == XLOG_XACT_COMMIT)
 	{
 		xl_xact_commit *xlrec = (xl_xact_commit *) rec;
-		struct tm  *tm = localtime(&xlrec->xtime);
-
-		sprintf(buf + strlen(buf), "commit: %04u-%02u-%02u %02u:%02u:%02u",
-				tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
-				tm->tm_hour, tm->tm_min, tm->tm_sec);
-		if (xlrec->nrels > 0)
-		{
-			sprintf(buf + strlen(buf), "; rels:");
-			for (i = 0; i < xlrec->nrels; i++)
-			{
-				RelFileNode rnode = xlrec->xnodes[i];
 
-				sprintf(buf + strlen(buf), " %u/%u/%u",
-						rnode.spcNode, rnode.dbNode, rnode.relNode);
-			}
-		}
-		if (xlrec->nsubxacts > 0)
-		{
-			TransactionId *xacts = (TransactionId *)
-			&xlrec->xnodes[xlrec->nrels];
-
-			sprintf(buf + strlen(buf), "; subxacts:");
-			for (i = 0; i < xlrec->nsubxacts; i++)
-				sprintf(buf + strlen(buf), " %u", xacts[i]);
-		}
+		strcat(buf, "commit: ");
+		xact_desc_commit(buf, xlrec);
 	}
 	else if (info == XLOG_XACT_ABORT)
 	{
 		xl_xact_abort *xlrec = (xl_xact_abort *) rec;
-		struct tm  *tm = localtime(&xlrec->xtime);
 
-		sprintf(buf + strlen(buf), "abort: %04u-%02u-%02u %02u:%02u:%02u",
-				tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
-				tm->tm_hour, tm->tm_min, tm->tm_sec);
-		if (xlrec->nrels > 0)
-		{
-			sprintf(buf + strlen(buf), "; rels:");
-			for (i = 0; i < xlrec->nrels; i++)
-			{
-				RelFileNode rnode = xlrec->xnodes[i];
+		strcat(buf, "abort: ");
+		xact_desc_abort(buf, xlrec);
+	}
+	else if (info == XLOG_XACT_PREPARE)
+	{
+		strcat(buf, "prepare");
+	}
+	else if (info == XLOG_XACT_COMMIT_PREPARED)
+	{
+		xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) rec;
 
-				sprintf(buf + strlen(buf), " %u/%u/%u",
-						rnode.spcNode, rnode.dbNode, rnode.relNode);
-			}
-		}
-		if (xlrec->nsubxacts > 0)
-		{
-			TransactionId *xacts = (TransactionId *)
-			&xlrec->xnodes[xlrec->nrels];
+		sprintf(buf + strlen(buf), "commit %u: ", xlrec->xid);
+		xact_desc_commit(buf, &xlrec->crec);
+	}
+	else if (info == XLOG_XACT_ABORT_PREPARED)
+	{
+		xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) rec;
 
-			sprintf(buf + strlen(buf), "; subxacts:");
-			for (i = 0; i < xlrec->nsubxacts; i++)
-				sprintf(buf + strlen(buf), " %u", xacts[i]);
-		}
+		sprintf(buf + strlen(buf), "abort %u: ", xlrec->xid);
+		xact_desc_abort(buf, &xlrec->arec);
 	}
 	else
 		strcat(buf, "UNKNOWN");
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index c5469d174f2ad38640c7bb85d7b27a08c33f75ed..15b82ee9be8fdf6d4fb78b5a08ae523080796257 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.200 2005/06/15 01:36:08 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.201 2005/06/17 22:32:43 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -25,6 +25,7 @@
 #include "access/clog.h"
 #include "access/multixact.h"
 #include "access/subtrans.h"
+#include "access/twophase.h"
 #include "access/xact.h"
 #include "access/xlog.h"
 #include "access/xlog_internal.h"
@@ -814,18 +815,6 @@ begin:;
 	/* Compute record's XLOG location */
 	INSERT_RECPTR(RecPtr, Insert, curridx);
 
-	/* If first XLOG record of transaction, save it in PGPROC array */
-	if (MyLastRecPtr.xrecoff == 0 && !no_tran)
-	{
-		/*
-		 * We do not acquire ProcArrayLock here because of possible deadlock.
-		 * Anyone who wants to inspect other procs' logRec must acquire
-		 * WALInsertLock, instead.	A better solution would be a per-PROC
-		 * spinlock, but no time for that before 7.2 --- tgl 12/19/01.
-		 */
-		MyProc->logRec = RecPtr;
-	}
-
 #ifdef WAL_DEBUG
 	if (XLOG_DEBUG)
 	{
@@ -3827,6 +3816,7 @@ BootStrapXLOG(void)
 	BootStrapCLOG();
 	BootStrapSUBTRANS();
 	BootStrapMultiXact();
+
 	free(buffer);
 }
 
@@ -4268,6 +4258,7 @@ StartupXLOG(void)
 	uint32		endLogSeg;
 	XLogRecord *record;
 	uint32		freespace;
+	TransactionId oldestActiveXID;
 
 	CritSectionCount++;
 
@@ -4678,33 +4669,8 @@ StartupXLOG(void)
 		XLogCtl->Write.curridx = NextBufIdx(0);
 	}
 
-#ifdef NOT_USED
-	/* UNDO */
-	if (InRecovery)
-	{
-		RecPtr = ReadRecPtr;
-		if (XLByteLT(checkPoint.undo, RecPtr))
-		{
-			ereport(LOG,
-					(errmsg("undo starts at %X/%X",
-							RecPtr.xlogid, RecPtr.xrecoff)));
-			do
-			{
-				record = ReadRecord(&RecPtr, PANIC);
-				if (TransactionIdIsValid(record->xl_xid) &&
-					!TransactionIdDidCommit(record->xl_xid))
-					RmgrTable[record->xl_rmid].rm_undo(EndRecPtr, record);
-				RecPtr = record->xl_prev;
-			} while (XLByteLE(checkPoint.undo, RecPtr));
-			ereport(LOG,
-					(errmsg("undo done at %X/%X",
-							ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
-		}
-		else
-			ereport(LOG,
-					(errmsg("undo is not required")));
-	}
-#endif
+	/* Pre-scan prepared transactions to find out the range of XIDs present */
+	oldestActiveXID = PrescanPreparedTransactions();
 
 	if (InRecovery)
 	{
@@ -4767,9 +4733,12 @@ StartupXLOG(void)
 
 	/* Start up the commit log and related stuff, too */
 	StartupCLOG();
-	StartupSUBTRANS();
+	StartupSUBTRANS(oldestActiveXID);
 	StartupMultiXact();
 
+	/* Reload shared-memory state for prepared transactions */
+	RecoverPreparedTransactions();
+
 	ereport(LOG,
 			(errmsg("database system is ready")));
 	CritSectionCount--;
@@ -5095,31 +5064,6 @@ CreateCheckPoint(bool shutdown, bool force)
 		SpinLockRelease_NoHoldoff(&xlogctl->info_lck);
 	}
 
-	/*
-	 * Get UNDO record ptr - this is oldest of PGPROC->logRec values. We
-	 * do this while holding insert lock to ensure that we won't miss any
-	 * about-to-commit transactions (UNDO must include all xacts that have
-	 * commits after REDO point).
-	 *
-	 * XXX temporarily ifdef'd out to avoid three-way deadlock condition:
-	 * GetUndoRecPtr needs to grab ProcArrayLock to ensure that it is looking
-	 * at a stable set of proc records, but grabbing ProcArrayLock while
-	 * holding WALInsertLock is no good.  GetNewTransactionId may cause a
-	 * WAL record to be written while holding XidGenLock, and
-	 * GetSnapshotData needs to get XidGenLock while holding ProcArrayLock,
-	 * so there's a risk of deadlock. Need to find a better solution.  See
-	 * pgsql-hackers discussion of 17-Dec-01.
-	 *
-	 * XXX actually, the whole UNDO code is dead code and unlikely to ever be
-	 * revived, so the lack of a good solution here is not troubling.
-	 */
-#ifdef NOT_USED
-	checkPoint.undo = GetUndoRecPtr();
-
-	if (shutdown && checkPoint.undo.xrecoff != 0)
-		elog(PANIC, "active transaction while database system is shutting down");
-#endif
-
 	/*
 	 * Now we can release insert lock and checkpoint start lock, allowing
 	 * other xacts to proceed even while we are flushing disk buffers.
@@ -5195,22 +5139,8 @@ CreateCheckPoint(bool shutdown, bool force)
 	/*
 	 * Select point at which we can truncate the log, which we base on the
 	 * prior checkpoint's earliest info.
-	 *
-	 * With UNDO support: oldest item is redo or undo, whichever is older;
-	 * but watch out for case that undo = 0.
-	 *
-	 * Without UNDO support: just use the redo pointer.  This allows xlog
-	 * space to be freed much faster when there are long-running
-	 * transactions.
 	 */
-#ifdef NOT_USED
-	if (ControlFile->checkPointCopy.undo.xrecoff != 0 &&
-		XLByteLT(ControlFile->checkPointCopy.undo,
-				 ControlFile->checkPointCopy.redo))
-		XLByteToSeg(ControlFile->checkPointCopy.undo, _logId, _logSeg);
-	else
-#endif
-		XLByteToSeg(ControlFile->checkPointCopy.redo, _logId, _logSeg);
+	XLByteToSeg(ControlFile->checkPointCopy.redo, _logId, _logSeg);
 
 	/*
 	 * Update the control file.
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index d66bb780b4fce15f085f9d813e849f92e7f59468..2e9a3ecf7c704e16c827bd24c259eac52dd20811 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1996-2005, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.13 2005/05/17 21:46:09 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.14 2005/06/17 22:32:43 tgl Exp $
  */
 
 CREATE VIEW pg_user AS 
@@ -102,6 +102,39 @@ CREATE VIEW pg_stats AS
 
 REVOKE ALL on pg_statistic FROM public;
 
+CREATE VIEW pg_locks AS 
+    SELECT * 
+    FROM pg_lock_status() AS L
+    (locktype text, database oid, relation oid, page int4, tuple int2,
+     transaction xid, classid oid, objid oid, objsubid int2,
+     pid int4, mode text, granted boolean);
+
+CREATE VIEW pg_prepared_xacts AS
+    SELECT P.transaction, P.gid, U.usename AS owner, D.datname AS database
+    FROM pg_prepared_xact() AS P
+    (transaction xid, gid text, ownerid int4, dbid oid)
+         LEFT JOIN pg_database D ON P.dbid = D.oid
+         LEFT JOIN pg_shadow U ON P.ownerid = U.usesysid;
+
+CREATE VIEW pg_settings AS 
+    SELECT * 
+    FROM pg_show_all_settings() AS A 
+    (name text, setting text, category text, short_desc text, extra_desc text,
+     context text, vartype text, source text, min_val text, max_val text);
+
+CREATE RULE pg_settings_u AS 
+    ON UPDATE TO pg_settings 
+    WHERE new.name = old.name DO 
+    SELECT set_config(old.name, new.setting, 'f');
+
+CREATE RULE pg_settings_n AS 
+    ON UPDATE TO pg_settings 
+    DO INSTEAD NOTHING;
+
+GRANT SELECT, UPDATE ON pg_settings TO PUBLIC;
+
+-- Statistics views
+
 CREATE VIEW pg_stat_all_tables AS 
     SELECT 
             C.oid AS relid, 
@@ -258,27 +291,3 @@ CREATE VIEW pg_stat_database AS
                     pg_stat_get_db_blocks_hit(D.oid) AS blks_read, 
             pg_stat_get_db_blocks_hit(D.oid) AS blks_hit 
     FROM pg_database D;
-
-CREATE VIEW pg_locks AS 
-    SELECT * 
-    FROM pg_lock_status() AS L
-    (locktype text, database oid, relation oid, page int4, tuple int2,
-     transaction xid, classid oid, objid oid, objsubid int2,
-     pid int4, mode text, granted boolean);
-
-CREATE VIEW pg_settings AS 
-    SELECT * 
-    FROM pg_show_all_settings() AS A 
-    (name text, setting text, category text, short_desc text, extra_desc text,
-     context text, vartype text, source text, min_val text, max_val text);
-
-CREATE RULE pg_settings_u AS 
-    ON UPDATE TO pg_settings 
-    WHERE new.name = old.name DO 
-    SELECT set_config(old.name, new.setting, 'f');
-
-CREATE RULE pg_settings_n AS 
-    ON UPDATE TO pg_settings 
-    DO INSTEAD NOTHING;
-
-GRANT SELECT, UPDATE ON pg_settings TO PUBLIC;
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 9914c724b710aadcdba3f683097806042a42b1bc..142b02dfaf88d7cf9eabe16e022c59379ab0b5f5 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/async.c,v 1.122 2005/05/06 17:24:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/async.c,v 1.123 2005/06/17 22:32:43 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -78,6 +78,7 @@
 #include <netinet/in.h>
 
 #include "access/heapam.h"
+#include "access/twophase_rmgr.h"
 #include "catalog/pg_listener.h"
 #include "commands/async.h"
 #include "libpq/libpq.h"
@@ -407,6 +408,36 @@ Async_UnlistenOnExit(int code, Datum arg)
 	CommitTransactionCommand();
 }
 
+
+/*
+ *--------------------------------------------------------------
+ * AtPrepare_Notify
+ *
+ *		This is called at the prepare phase of a two-phase 
+ *		transaction.  Save the state for possible commit later.
+ *--------------------------------------------------------------
+ */
+void
+AtPrepare_Notify(void)
+{
+	ListCell *p;
+
+	foreach(p, pendingNotifies)
+	{
+		const char *relname = (const char *) lfirst(p);
+
+		RegisterTwoPhaseRecord(TWOPHASE_RM_NOTIFY_ID, 0,
+							   relname, strlen(relname) + 1);
+	}
+
+	/*
+	 * We can clear the state immediately, rather than needing a separate
+	 * PostPrepare call, because if the transaction fails we'd just
+	 * discard the state anyway.
+	 */
+	ClearPendingNotifies();
+}
+
 /*
  *--------------------------------------------------------------
  * AtCommit_Notify
@@ -1016,8 +1047,9 @@ AsyncExistsPendingNotify(const char *relname)
 
 	foreach(p, pendingNotifies)
 	{
-		/* Use NAMEDATALEN for relname comparison.	  DZ - 26-08-1996 */
-		if (strncmp((const char *) lfirst(p), relname, NAMEDATALEN) == 0)
+		const char *prelname = (const char *) lfirst(p);
+
+		if (strcmp(prelname, relname) == 0)
 			return true;
 	}
 
@@ -1037,3 +1069,22 @@ ClearPendingNotifies(void)
 	 */
 	pendingNotifies = NIL;
 }
+
+/*
+ * 2PC processing routine for COMMIT PREPARED case.
+ *
+ * (We don't have to do anything for ROLLBACK PREPARED.)
+ */
+void
+notify_twophase_postcommit(TransactionId xid, uint16 info,
+						   void *recdata, uint32 len)
+{
+	/*
+	 * Set up to issue the NOTIFY at the end of my own
+	 * current transaction.  (XXX this has some issues if my own
+	 * transaction later rolls back, or if there is any significant
+	 * delay before I commit.  OK for now because we disallow
+	 * COMMIT PREPARED inside a transaction block.)
+	 */
+	Async_Notify((char *) recdata);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 25a7056500b34117b5b5289b2dd0aae0c1920fd2..2e0d8dbc1a2124790932ee886610182c81bc479b 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.306 2005/06/09 04:18:58 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.307 2005/06/17 22:32:43 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2085,6 +2085,7 @@ _copyTransactionStmt(TransactionStmt *from)
 
 	COPY_SCALAR_FIELD(kind);
 	COPY_NODE_FIELD(options);
+	COPY_STRING_FIELD(gid);
 
 	return newnode;
 }
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index e625ca7f32c325b1bcfb18955dd1dfce46d6adab..a991cf5eed3b98fdebe5360ef530f32e5d44a5b0 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -18,7 +18,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.243 2005/06/09 04:18:58 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.244 2005/06/17 22:32:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1053,6 +1053,7 @@ _equalTransactionStmt(TransactionStmt *a, TransactionStmt *b)
 {
 	COMPARE_SCALAR_FIELD(kind);
 	COMPARE_NODE_FIELD(options);
+	COMPARE_STRING_FIELD(gid);
 
 	return true;
 }
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 6ee5332786577c67dd4f695e844efdf94ec5a73c..d12fa9fa0527c3df0e8b707232a6d0dff8f6837c 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.494 2005/06/15 19:44:05 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.495 2005/06/17 22:32:44 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -387,7 +387,7 @@ static void doNegateFloat(Value *v);
 	ORDER OUT_P OUTER_P OVERLAPS OVERLAY OWNER
 
 	PARTIAL PASSWORD PLACING POSITION
-	PRECISION PRESERVE PREPARE PRIMARY
+	PRECISION PRESERVE PREPARE PREPARED PRIMARY
 	PRIOR PRIVILEGES PROCEDURAL PROCEDURE
 
 	QUOTE
@@ -4121,6 +4121,27 @@ TransactionStmt:
 														(Node *)makeString($4)));
 					$$ = (Node *)n;
 				}
+			| PREPARE TRANSACTION Sconst
+				{
+					TransactionStmt *n = makeNode(TransactionStmt);
+					n->kind = TRANS_STMT_PREPARE;
+					n->gid = $3;
+					$$ = (Node *)n;
+				}
+			| COMMIT PREPARED Sconst
+				{
+					TransactionStmt *n = makeNode(TransactionStmt);
+					n->kind = TRANS_STMT_COMMIT_PREPARED;
+					n->gid = $3;
+					$$ = (Node *)n;
+				}
+			| ROLLBACK PREPARED Sconst
+				{
+					TransactionStmt *n = makeNode(TransactionStmt);
+					n->kind = TRANS_STMT_ROLLBACK_PREPARED;
+					n->gid = $3;
+					$$ = (Node *)n;
+				}
 		;
 
 opt_transaction:	WORK							{}
@@ -6334,19 +6355,18 @@ a_expr:		c_expr									{ $$ = $1; }
 				{
 					$$ = (Node *) makeSimpleA_Expr(AEXPR_OF, "!=", $1, (Node *) $6);
 				}
-			| a_expr BETWEEN opt_asymmetric b_expr AND b_expr			%prec BETWEEN
+			| a_expr BETWEEN opt_asymmetric b_expr AND b_expr		%prec BETWEEN
 				{
 					$$ = (Node *) makeA_Expr(AEXPR_AND, NIL,
 						(Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $4),
 						(Node *) makeSimpleA_Expr(AEXPR_OP, "<=", $1, $6));
 				}
-			| a_expr NOT BETWEEN opt_asymmetric b_expr AND b_expr		%prec BETWEEN
+			| a_expr NOT BETWEEN opt_asymmetric b_expr AND b_expr	%prec BETWEEN
 				{
 					$$ = (Node *) makeA_Expr(AEXPR_OR, NIL,
 						(Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $5),
 						(Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $7));
 				}
-				
 			| a_expr BETWEEN SYMMETRIC b_expr AND b_expr			%prec BETWEEN
 				{
 					$$ = (Node *) makeA_Expr(AEXPR_OR, NIL,
@@ -6367,8 +6387,6 @@ a_expr:		c_expr									{ $$ = $1; }
 						    (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $7),
 						    (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $5)));					
 				}
-				
-				
 			| a_expr IN_P in_expr
 				{
 					/* in_expr returns a SubLink or a list of a_exprs */
@@ -6467,11 +6485,6 @@ a_expr:		c_expr									{ $$ = $1; }
 				}
 		;
 
-opt_asymmetric:		ASYMMETRIC									{}
-			    | /*EMPTY*/								{}
-		;
-
-
 /*
  * Restricted expressions
  *
@@ -7401,6 +7414,10 @@ opt_indirection:
 			| opt_indirection indirection_el		{ $$ = lappend($1, $2); }
 		;
 
+opt_asymmetric: ASYMMETRIC
+			| /*EMPTY*/
+		;
+
 
 /*****************************************************************************
  *
@@ -7855,6 +7872,7 @@ unreserved_keyword:
 			| PARTIAL
 			| PASSWORD
 			| PREPARE
+			| PREPARED
 			| PRESERVE
 			| PRIOR
 			| PRIVILEGES
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c
index 9296067dfaef3376436d993ab7df48f879eec96c..1e0c63a9ce2830b7c219e7f930c2b8b586f74125 100644
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.156 2005/06/14 23:47:39 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.157 2005/06/17 22:32:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -243,6 +243,7 @@ static const ScanKeyword ScanKeywords[] = {
 	{"position", POSITION},
 	{"precision", PRECISION},
 	{"prepare", PREPARE},
+	{"prepared", PREPARED},
 	{"preserve", PRESERVE},
 	{"primary", PRIMARY},
 	{"prior", PRIOR},
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 03057dbcb210765fbb25011ff41d0a8ef58ab358..6ca13944f8aca04833869659a80a9a437cdf0710 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.453 2005/06/14 21:04:39 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.454 2005/06/17 22:32:44 tgl Exp $
  *
  * NOTES
  *
@@ -252,7 +252,7 @@ static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
 static void pmdaemonize(void);
 static Port *ConnCreate(int serverFd);
 static void ConnFree(Port *port);
-static void reset_shared(unsigned short port);
+static void reset_shared(int port);
 static void SIGHUP_handler(SIGNAL_ARGS);
 static void pmdie(SIGNAL_ARGS);
 static void reaper(SIGNAL_ARGS);
@@ -1783,7 +1783,7 @@ ClosePostmasterPorts(bool am_syslogger)
  * reset_shared -- reset shared memory and semaphores
  */
 static void
-reset_shared(unsigned short port)
+reset_shared(int port)
 {
 	/*
 	 * Create or re-create shared memory and semaphores.
@@ -1793,7 +1793,7 @@ reset_shared(unsigned short port)
 	 * used to determine IPC keys.	This helps ensure that we will clean
 	 * up dead IPC objects if the postmaster crashes and is restarted.
 	 */
-	CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
+	CreateSharedMemoryAndSemaphores(false, port);
 }
 
 
@@ -3182,7 +3182,7 @@ SubPostmasterMain(int argc, char *argv[])
 		/* BackendRun will close sockets */
 
 		/* Attach process to shared data structures */
-		CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
+		CreateSharedMemoryAndSemaphores(false, 0);
 
 #ifdef USE_SSL
 		/*
@@ -3203,7 +3203,7 @@ SubPostmasterMain(int argc, char *argv[])
 		ClosePostmasterPorts(false);
 
 		/* Attach process to shared data structures */
-		CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
+		CreateSharedMemoryAndSemaphores(false, 0);
 
 		BootstrapMain(argc - 2, argv + 2);
 		proc_exit(0);
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 22333a1f558a07c68ea0e8c4a1099e6d39d18987..0761a8fdf51197491c88393eec75024e92142a42 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.76 2005/05/19 21:35:46 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.77 2005/06/17 22:32:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,6 +17,7 @@
 #include "access/clog.h"
 #include "access/multixact.h"
 #include "access/subtrans.h"
+#include "access/twophase.h"
 #include "access/xlog.h"
 #include "miscadmin.h"
 #include "postmaster/bgwriter.h"
@@ -54,9 +55,7 @@
  * memory.	This is true for a standalone backend, false for a postmaster.
  */
 void
-CreateSharedMemoryAndSemaphores(bool makePrivate,
-								int maxBackends,
-								int port)
+CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 {
 	PGShmemHeader *seghdr = NULL;
 
@@ -72,15 +71,16 @@ CreateSharedMemoryAndSemaphores(bool makePrivate,
 		 */
 		size = hash_estimate_size(SHMEM_INDEX_SIZE, sizeof(ShmemIndexEnt));
 		size += BufferShmemSize();
-		size += LockShmemSize(maxBackends);
-		size += ProcGlobalShmemSize(maxBackends);
+		size += LockShmemSize();
+		size += ProcGlobalShmemSize();
 		size += XLOGShmemSize();
 		size += CLOGShmemSize();
 		size += SUBTRANSShmemSize();
+		size += TwoPhaseShmemSize();
 		size += MultiXactShmemSize();
 		size += LWLockShmemSize();
-		size += ProcArrayShmemSize(maxBackends);
-		size += SInvalShmemSize(maxBackends);
+		size += ProcArrayShmemSize();
+		size += SInvalShmemSize(MaxBackends);
 		size += FreeSpaceShmemSize();
 		size += BgWriterShmemSize();
 #ifdef EXEC_BACKEND
@@ -100,7 +100,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate,
 		/*
 		 * Create semaphores
 		 */
-		numSemas = ProcGlobalSemas(maxBackends);
+		numSemas = ProcGlobalSemas();
 		numSemas += SpinlockSemas();
 		PGReserveSemaphores(numSemas, port);
 	}
@@ -144,6 +144,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate,
 	XLOGShmemInit();
 	CLOGShmemInit();
 	SUBTRANSShmemInit();
+	TwoPhaseShmemInit();
 	MultiXactShmemInit();
 	InitBufferPool();
 
@@ -151,18 +152,18 @@ CreateSharedMemoryAndSemaphores(bool makePrivate,
 	 * Set up lock manager
 	 */
 	InitLocks();
-	InitLockTable(maxBackends);
+	InitLockTable();
 
 	/*
 	 * Set up process table
 	 */
-	InitProcGlobal(maxBackends);
-	CreateSharedProcArray(maxBackends);
+	InitProcGlobal();
+	CreateSharedProcArray();
 
 	/*
 	 * Set up shared-inval messaging
 	 */
-	CreateSharedInvalidationState(maxBackends);
+	CreateSharedInvalidationState(MaxBackends);
 
 	/*
 	 * Set up free-space map
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 23b198b1497843ad2d3781f00910d39eebcf3219..7c2766e6285bc3a53783c2f39dddac62a1e58e77 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -11,6 +11,11 @@
  * Because of various subtle race conditions it is critical that a backend
  * hold the correct locks while setting or clearing its MyProc->xid field.
  * See notes in GetSnapshotData.
+ *
+ * The process array now also includes PGPROC structures representing
+ * prepared transactions.  The xid and subxids fields of these are valid,
+ * as is the procLocks list.  They can be distinguished from regular backend
+ * PGPROCs at need by checking for pid == 0.
  * 
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
@@ -18,13 +23,14 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.2 2005/05/19 23:57:11 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.3 2005/06/17 22:32:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include "access/subtrans.h"
+#include "access/twophase.h"
 #include "miscadmin.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
@@ -76,25 +82,23 @@ static void DisplayXidCache(void);
  * Report shared-memory space needed by CreateSharedProcArray.
  */
 int
-ProcArrayShmemSize(int maxBackends)
+ProcArrayShmemSize(void)
 {
-	/* sizeof(ProcArrayStruct) includes the first array element */
-	return MAXALIGN(sizeof(ProcArrayStruct) +
-					(maxBackends - 1) * sizeof(PGPROC *));
+	return MAXALIGN(offsetof(ProcArrayStruct, procs) +
+					(MaxBackends + max_prepared_xacts) * sizeof(PGPROC *));
 }
 
 /*
  * Initialize the shared PGPROC array during postmaster startup.
  */
 void
-CreateSharedProcArray(int maxBackends)
+CreateSharedProcArray(void)
 {
 	bool		found;
 
 	/* Create or attach to the ProcArray shared structure */
 	procArray = (ProcArrayStruct *)
-		ShmemInitStruct("Proc Array", ProcArrayShmemSize(maxBackends),
-						&found);
+		ShmemInitStruct("Proc Array", ProcArrayShmemSize(), &found);
 
 	if (!found)
 	{
@@ -102,18 +106,15 @@ CreateSharedProcArray(int maxBackends)
 		 * We're the first - initialize.
 		 */
 		procArray->numProcs = 0;
-		procArray->maxProcs = maxBackends;
+		procArray->maxProcs = MaxBackends + max_prepared_xacts;
 	}
 }
 
 /*
- * Add my own PGPROC (found in the global MyProc) to the shared array.
- *
- * This must be called during backend startup, after fully initializing
- * the contents of MyProc.
+ * Add the specified PGPROC to the shared array.
  */
 void
-ProcArrayAddMyself(void)
+ProcArrayAdd(PGPROC *proc)
 {
 	ProcArrayStruct *arrayP = procArray;
 
@@ -132,32 +133,32 @@ ProcArrayAddMyself(void)
 				 errmsg("sorry, too many clients already")));
 	}
 
-	arrayP->procs[arrayP->numProcs] = MyProc;
+	arrayP->procs[arrayP->numProcs] = proc;
 	arrayP->numProcs++;
 
 	LWLockRelease(ProcArrayLock);
 }
 
 /*
- * Remove my own PGPROC (found in the global MyProc) from the shared array.
- *
- * This must be called during backend shutdown.
+ * Remove the specified PGPROC from the shared array.
  */
 void
-ProcArrayRemoveMyself(void)
+ProcArrayRemove(PGPROC *proc)
 {
 	ProcArrayStruct *arrayP = procArray;
 	int			index;
 
 #ifdef XIDCACHE_DEBUG
-	DisplayXidCache();
+	/* dump stats at backend shutdown, but not prepared-xact end */
+	if (proc->pid != 0)
+		DisplayXidCache();
 #endif
 
 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
 
 	for (index = 0; index < arrayP->numProcs; index++)
 	{
-		if (arrayP->procs[index] == MyProc)
+		if (arrayP->procs[index] == proc)
 		{
 			arrayP->procs[index] = arrayP->procs[arrayP->numProcs - 1];
 			arrayP->numProcs--;
@@ -169,7 +170,7 @@ ProcArrayRemoveMyself(void)
 	/* Ooops */
 	LWLockRelease(ProcArrayLock);
 
-	elog(LOG, "failed to find my own proc %p in ProcArray", MyProc);
+	elog(LOG, "failed to find proc %p in ProcArray", proc);
 }
 
 
@@ -329,6 +330,55 @@ result_known:
 	return result;
 }
 
+/*
+ * TransactionIdIsActive -- is xid the top-level XID of an active backend?
+ *
+ * This differs from TransactionIdIsInProgress in that it ignores prepared
+ * transactions.  Also, we ignore subtransactions since that's not needed
+ * for current uses.
+ */
+bool
+TransactionIdIsActive(TransactionId xid)
+{
+	bool		result = false;
+	ProcArrayStruct *arrayP = procArray;
+	int			i;
+
+	/*
+	 * Don't bother checking a transaction older than RecentXmin; it
+	 * could not possibly still be running.
+	 */
+	if (TransactionIdPrecedes(xid, RecentXmin))
+		return false;
+
+	LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+	for (i = 0; i < arrayP->numProcs; i++)
+	{
+		PGPROC	   *proc = arrayP->procs[i];
+
+		/* Fetch xid just once - see GetNewTransactionId */
+		TransactionId pxid = proc->xid;
+
+		if (!TransactionIdIsValid(pxid))
+			continue;
+
+		if (proc->pid == 0)
+			continue;			/* ignore prepared transactions */
+
+		if (TransactionIdEquals(pxid, xid))
+		{
+			result = true;
+			break;
+		}
+	}
+
+	LWLockRelease(ProcArrayLock);
+
+	return result;
+}
+
+
 /*
  * GetOldestXmin -- returns oldest transaction that was running
  *					when any current transaction was started.
@@ -441,12 +491,12 @@ GetSnapshotData(Snapshot snapshot, bool serializable)
 		   TransactionIdIsValid(MyProc->xmin));
 
 	/*
-	 * Allocating space for MaxBackends xids is usually overkill;
+	 * Allocating space for maxProcs xids is usually overkill;
 	 * numProcs would be sufficient.  But it seems better to do the
 	 * malloc while not holding the lock, so we can't look at numProcs.
 	 *
 	 * This does open a possibility for avoiding repeated malloc/free: since
-	 * MaxBackends does not change at runtime, we can simply reuse the
+	 * maxProcs does not change at runtime, we can simply reuse the
 	 * previous xip array if any.  (This relies on the fact that all
 	 * callers pass static SnapshotData structs.)
 	 */
@@ -456,7 +506,7 @@ GetSnapshotData(Snapshot snapshot, bool serializable)
 		 * First call for this snapshot
 		 */
 		snapshot->xip = (TransactionId *)
-			malloc(MaxBackends * sizeof(TransactionId));
+			malloc(arrayP->maxProcs * sizeof(TransactionId));
 		if (snapshot->xip == NULL)
 			ereport(ERROR,
 					(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -602,14 +652,21 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself)
 
 /*
  * BackendPidGetProc -- get a backend's PGPROC given its PID
+ *
+ * Returns NULL if not found.  Note that it is up to the caller to be
+ * sure that the question remains meaningful for long enough for the
+ * answer to be used ...
  */
-struct PGPROC *
+PGPROC *
 BackendPidGetProc(int pid)
 {
 	PGPROC	   *result = NULL;
 	ProcArrayStruct *arrayP = procArray;
 	int			index;
 
+	if (pid == 0)				/* never match dummy PGPROCs */
+		return NULL;
+
 	LWLockAcquire(ProcArrayLock, LW_SHARED);
 
 	for (index = 0; index < arrayP->numProcs; index++)
@@ -642,10 +699,8 @@ IsBackendPid(int pid)
  *		active transactions.  This is used as a heuristic to decide if
  *		a pre-XLOG-flush delay is worthwhile during commit.
  *
- * An active transaction is something that has written at least one XLOG
- * record; read-only transactions don't count.  Also, do not count backends
- * that are blocked waiting for locks, since they are not going to get to
- * run until someone else commits.
+ * Do not count backends that are blocked waiting for locks, since they are
+ * not going to get to run until someone else commits.
  */
 int
 CountActiveBackends(void)
@@ -656,7 +711,7 @@ CountActiveBackends(void)
 
 	/*
 	 * Note: for speed, we don't acquire ProcArrayLock.  This is a little bit
-	 * bogus, but since we are only testing xrecoff for zero or nonzero,
+	 * bogus, but since we are only testing fields for zero or nonzero,
 	 * it should be OK.  The result is only used for heuristic purposes
 	 * anyway...
 	 */
@@ -666,7 +721,9 @@ CountActiveBackends(void)
 
 		if (proc == MyProc)
 			continue;			/* do not count myself */
-		if (proc->logRec.xrecoff == 0)
+		if (proc->pid == 0)
+			continue;			/* do not count prepared xacts */
+		if (proc->xid == InvalidTransactionId)
 			continue;			/* do not count if not in a transaction */
 		if (proc->waitLock != NULL)
 			continue;			/* do not count if blocked on a lock */
@@ -676,25 +733,6 @@ CountActiveBackends(void)
 	return count;
 }
 
-/*
- * CountEmptyBackendSlots - count empty slots in backend process table
- *
- * Acquiring the lock here is almost certainly overkill, but just in
- * case fetching an int is not atomic on your machine ...
- */
-int
-CountEmptyBackendSlots(void)
-{
-	int			count;
-
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
-
-	count = procArray->maxProcs - procArray->numProcs;
-
-	LWLockRelease(ProcArrayLock);
-
-	return count;
-}
 
 #define XidCacheRemove(i) \
 	do { \
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index 351ef27bee7d263074e361f05f4b1b066c0132eb..91afd4ed9a5ee5767b30588eb7ccbe299d83cd21 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.76 2005/06/14 22:15:32 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.77 2005/06/17 22:32:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -77,7 +77,7 @@ static LOCKMETHODID LockTableId = INVALID_LOCKMETHOD;
  * Create the lock table described by LockConflicts
  */
 void
-InitLockTable(int maxBackends)
+InitLockTable(void)
 {
 	LOCKMETHODID LongTermTableId;
 
@@ -91,8 +91,7 @@ InitLockTable(int maxBackends)
 	/* number of lock modes is lengthof()-1 because of dummy zero */
 	LockTableId = LockMethodTableInit("LockTable",
 									  LockConflicts,
-									  lengthof(LockConflicts) - 1,
-									  maxBackends);
+									  lengthof(LockConflicts) - 1);
 	if (!LockMethodIsValid(LockTableId))
 		elog(ERROR, "could not initialize lock table");
 	Assert(LockTableId == DEFAULT_LOCKMETHOD);
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 08e579486e0a983b9c31cc141857816fadd19cdc..22d389488d851ef03652a5cd491bb92a2da4a015 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.155 2005/06/14 22:15:32 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.156 2005/06/17 22:32:45 tgl Exp $
  *
  * NOTES
  *	  Outside modules can create a lock table and acquire/release
@@ -33,6 +33,8 @@
 #include <signal.h>
 #include <unistd.h>
 
+#include "access/twophase.h"
+#include "access/twophase_rmgr.h"
 #include "access/xact.h"
 #include "miscadmin.h"
 #include "storage/proc.h"
@@ -44,7 +46,15 @@
 /* This configuration variable is used to set the lock table size */
 int			max_locks_per_xact; /* set by guc.c */
 
-#define NLOCKENTS(maxBackends)	(max_locks_per_xact * (maxBackends))
+#define NLOCKENTS()	(max_locks_per_xact * (MaxBackends + max_prepared_xacts))
+
+
+/* Record that's written to 2PC state file when a lock is persisted */
+typedef struct TwoPhaseLockRecord
+{
+	LOCKTAG		locktag;
+	LOCKMODE	lockmode;
+} TwoPhaseLockRecord;
 
 
 /*
@@ -168,8 +178,7 @@ static void CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock,
 
 
 /*
- * InitLocks -- Init the lock module.  Create a private data
- *		structure for constructing conflict masks.
+ * InitLocks -- Init the lock module.  Nothing to do here at present.
  */
 void
 InitLocks(void)
@@ -222,8 +231,7 @@ LockMethodInit(LockMethod lockMethodTable,
 LOCKMETHODID
 LockMethodTableInit(const char *tabName,
 					const LOCKMASK *conflictsP,
-					int numModes,
-					int maxBackends)
+					int numModes)
 {
 	LockMethod	newLockMethod;
 	LOCKMETHODID lockmethodid;
@@ -239,7 +247,7 @@ LockMethodTableInit(const char *tabName,
 			 numModes, MAX_LOCKMODES - 1);
 
 	/* Compute init/max size to request for lock hashtables */
-	max_table_size = NLOCKENTS(maxBackends);
+	max_table_size = NLOCKENTS();
 	init_table_size = max_table_size / 2;
 
 	/* Allocate a string for the shmem index table lookups. */
@@ -1418,10 +1426,10 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
 	while (proclock)
 	{
 		bool		wakeupNeeded = false;
-		PROCLOCK   *nextHolder;
+		PROCLOCK   *nextplock;
 
 		/* Get link first, since we may unlink/delete this proclock */
-		nextHolder = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
+		nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
 										   offsetof(PROCLOCK, procLink));
 
 		Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
@@ -1474,7 +1482,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
 		CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
 
 next_item:
-		proclock = nextHolder;
+		proclock = nextplock;
 	}
 
 	LWLockRelease(masterLock);
@@ -1605,14 +1613,262 @@ LockReassignCurrentOwner(void)
 }
 
 
+/*
+ * AtPrepare_Locks
+ *		Do the preparatory work for a PREPARE: make 2PC state file records
+ *		for all locks currently held.
+ *
+ * User locks are non-transactional and are therefore ignored.
+ *
+ * There are some special cases that we error out on: we can't be holding
+ * any session locks (should be OK since only VACUUM uses those) and we
+ * can't be holding any locks on temporary objects (since that would mess
+ * up the current backend if it tries to exit before the prepared xact is
+ * committed).
+ */
+void
+AtPrepare_Locks(void)
+{
+	LOCKMETHODID lockmethodid = DEFAULT_LOCKMETHOD;
+	HASH_SEQ_STATUS status;
+	LOCALLOCK  *locallock;
+
+	/*
+	 * We don't need to touch shared memory for this --- all the necessary
+	 * state information is in the locallock table.
+	 */
+	hash_seq_init(&status, LockMethodLocalHash[lockmethodid]);
+
+	while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL)
+	{
+		TwoPhaseLockRecord record;
+		LOCALLOCKOWNER *lockOwners = locallock->lockOwners;
+		int		i;
+
+		/* Ignore items that are not of the lockmethod to be processed */
+		if (LOCALLOCK_LOCKMETHOD(*locallock) != lockmethodid)
+			continue;
+
+		/* Ignore it if we don't actually hold the lock */
+		if (locallock->nLocks <= 0)
+			continue;
+
+		/* Scan to verify there are no session locks */
+		for (i = locallock->numLockOwners - 1; i >= 0; i--)
+		{
+			/* elog not ereport since this should not happen */
+			if (lockOwners[i].owner == NULL)
+				elog(ERROR, "cannot PREPARE when session locks exist");
+		}
+
+		/* Can't handle it if the lock is on a temporary object */
+		if (locallock->isTempObject)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot PREPARE a transaction that has operated on temporary tables")));
+
+		/*
+		 * Create a 2PC record.
+		 */
+		memcpy(&(record.locktag), &(locallock->tag.lock), sizeof(LOCKTAG));
+		record.lockmode = locallock->tag.mode;
+
+		RegisterTwoPhaseRecord(TWOPHASE_RM_LOCK_ID, 0,
+							   &record, sizeof(TwoPhaseLockRecord));
+	}
+}
+
+/*
+ * PostPrepare_Locks
+ *		Clean up after successful PREPARE
+ *
+ * Here, we want to transfer ownership of our locks to a dummy PGPROC
+ * that's now associated with the prepared transaction, and we want to
+ * clean out the corresponding entries in the LOCALLOCK table.
+ *
+ * Note: by removing the LOCALLOCK entries, we are leaving dangling
+ * pointers in the transaction's resource owner.  This is OK at the
+ * moment since resowner.c doesn't try to free locks retail at a toplevel
+ * transaction commit or abort.  We could alternatively zero out nLocks
+ * and leave the LOCALLOCK entries to be garbage-collected by LockReleaseAll,
+ * but that probably costs more cycles.
+ */
+void
+PostPrepare_Locks(TransactionId xid)
+{
+	PGPROC	   *newproc = TwoPhaseGetDummyProc(xid);
+	LOCKMETHODID lockmethodid = DEFAULT_LOCKMETHOD;
+	HASH_SEQ_STATUS status;
+	SHM_QUEUE  *procLocks = &(MyProc->procLocks);
+	LWLockId	masterLock;
+	LockMethod	lockMethodTable;
+	int			numLockModes;
+	LOCALLOCK  *locallock;
+	PROCLOCK   *proclock;
+	PROCLOCKTAG proclocktag;
+	bool		found;
+	LOCK	   *lock;
+
+	/* This is a critical section: any error means big trouble */
+	START_CRIT_SECTION();
+
+	lockMethodTable = LockMethods[lockmethodid];
+	if (!lockMethodTable)
+		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
+
+	numLockModes = lockMethodTable->numLockModes;
+	masterLock = lockMethodTable->masterLock;
+
+	/*
+	 * First we run through the locallock table and get rid of unwanted
+	 * entries, then we scan the process's proclocks and transfer them
+	 * to the target proc.
+	 *
+	 * We do this separately because we may have multiple locallock
+	 * entries pointing to the same proclock, and we daren't end up with
+	 * any dangling pointers.
+	 */
+	hash_seq_init(&status, LockMethodLocalHash[lockmethodid]);
+
+	while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL)
+	{
+		if (locallock->proclock == NULL || locallock->lock == NULL)
+		{
+			/*
+			 * We must've run out of shared memory while trying to set up
+			 * this lock.  Just forget the local entry.
+			 */
+			Assert(locallock->nLocks == 0);
+			RemoveLocalLock(locallock);
+			continue;
+		}
+
+		/* Ignore items that are not of the lockmethod to be removed */
+		if (LOCALLOCK_LOCKMETHOD(*locallock) != lockmethodid)
+			continue;
+
+		/* We already checked there are no session locks */
+
+		/* Mark the proclock to show we need to release this lockmode */
+		if (locallock->nLocks > 0)
+			locallock->proclock->releaseMask |= LOCKBIT_ON(locallock->tag.mode);
+
+		/* And remove the locallock hashtable entry */
+		RemoveLocalLock(locallock);
+	}
+
+	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+
+	proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
+										 offsetof(PROCLOCK, procLink));
+
+	while (proclock)
+	{
+		PROCLOCK   *nextplock;
+		LOCKMASK	holdMask;
+		PROCLOCK   *newproclock;
+
+		/* Get link first, since we may unlink/delete this proclock */
+		nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
+										   offsetof(PROCLOCK, procLink));
+
+		Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
+
+		lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+
+		/* Ignore items that are not of the lockmethod to be removed */
+		if (LOCK_LOCKMETHOD(*lock) != lockmethodid)
+			goto next_item;
+
+		PROCLOCK_PRINT("PostPrepare_Locks", proclock);
+		LOCK_PRINT("PostPrepare_Locks", lock, 0);
+		Assert(lock->nRequested >= 0);
+		Assert(lock->nGranted >= 0);
+		Assert(lock->nGranted <= lock->nRequested);
+		Assert((proclock->holdMask & ~lock->grantMask) == 0);
+
+		/*
+		 * Since there were no session locks, we should be releasing all locks
+		 */
+		if (proclock->releaseMask != proclock->holdMask)
+			elog(PANIC, "we seem to have dropped a bit somewhere");
+
+		holdMask = proclock->holdMask;
+
+		/*
+		 * We cannot simply modify proclock->tag.proc to reassign ownership
+		 * of the lock, because that's part of the hash key and the proclock
+		 * would then be in the wrong hash chain.  So, unlink and delete the
+		 * old proclock; create a new one with the right contents; and link
+		 * it into place.  We do it in this order to be certain we won't
+		 * run out of shared memory (the way dynahash.c works, the deleted
+		 * object is certain to be available for reallocation).
+		 */
+		SHMQueueDelete(&proclock->lockLink);
+		SHMQueueDelete(&proclock->procLink);
+		if (!hash_search(LockMethodProcLockHash[lockmethodid],
+						 (void *) &(proclock->tag),
+						 HASH_REMOVE, NULL))
+			elog(PANIC, "proclock table corrupted");
+
+		/*
+		 * Create the hash key for the new proclock table.
+		 */
+		MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG));
+		proclocktag.lock = MAKE_OFFSET(lock);
+		proclocktag.proc = MAKE_OFFSET(newproc);
+
+		newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[lockmethodid],
+											   (void *) &proclocktag,
+											   HASH_ENTER_NULL, &found);
+		if (!newproclock)
+		    ereport(PANIC,		/* should not happen */
+					(errcode(ERRCODE_OUT_OF_MEMORY),
+					 errmsg("out of shared memory"),
+					 errdetail("Not enough memory for reassigning the prepared transaction's locks.")));
+
+		/*
+		 * If new, initialize the new entry
+		 */
+		if (!found)
+		{
+			newproclock->holdMask = 0;
+			newproclock->releaseMask = 0;
+			/* Add new proclock to appropriate lists */
+			SHMQueueInsertBefore(&lock->procLocks, &newproclock->lockLink);
+			SHMQueueInsertBefore(&newproc->procLocks, &newproclock->procLink);
+			PROCLOCK_PRINT("PostPrepare_Locks: new", newproclock);
+		}
+		else
+		{
+			PROCLOCK_PRINT("PostPrepare_Locks: found", newproclock);
+			Assert((newproclock->holdMask & ~lock->grantMask) == 0);
+		}
+
+		/*
+		 * Pass over the identified lock ownership.
+		 */
+		Assert((newproclock->holdMask & holdMask) == 0);
+		newproclock->holdMask |= holdMask;
+
+next_item:
+		proclock = nextplock;
+	}
+
+	LWLockRelease(masterLock);
+
+	END_CRIT_SECTION();
+}
+
+
 /*
  * Estimate shared-memory space used for lock tables
  */
 int
-LockShmemSize(int maxBackends)
+LockShmemSize(void)
 {
 	int			size = 0;
-	long		max_table_size = NLOCKENTS(maxBackends);
+	long		max_table_size = NLOCKENTS();
 
 	/* lock method headers */
 	size += MAX_LOCK_METHODS * MAXALIGN(sizeof(LockMethodData));
@@ -1704,21 +1960,19 @@ GetLockmodeName(LOCKMODE mode)
 
 #ifdef LOCK_DEBUG
 /*
- * Dump all locks in the MyProc->procLocks list.
+ * Dump all locks in the given proc's procLocks list.
  *
  * Must have already acquired the masterLock.
  */
 void
-DumpLocks(void)
+DumpLocks(PGPROC *proc)
 {
-	PGPROC	   *proc;
 	SHM_QUEUE  *procLocks;
 	PROCLOCK   *proclock;
 	LOCK	   *lock;
 	int			lockmethodid = DEFAULT_LOCKMETHOD;
 	LockMethod	lockMethodTable;
 
-	proc = MyProc;
 	if (proc == NULL)
 		return;
 
@@ -1793,3 +2047,254 @@ DumpAllLocks(void)
 }
 
 #endif   /* LOCK_DEBUG */
+
+/*
+ * LOCK 2PC resource manager's routines
+ */
+
+/*
+ * Re-acquire a lock belonging to a transaction that was prepared.
+ *
+ * Because this function is run at db startup, re-acquiring the locks should
+ * never conflict with running transactions because there are none.  We
+ * assume that the lock state represented by the stored 2PC files is legal.
+ */
+void
+lock_twophase_recover(TransactionId xid, uint16 info,
+					  void *recdata, uint32 len)
+{
+	TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata;
+	PGPROC	   *proc = TwoPhaseGetDummyProc(xid);
+	LOCKTAG	   *locktag;
+	LOCKMODE	lockmode;
+	LOCKMETHODID lockmethodid;
+	LOCK	   *lock;
+	PROCLOCK   *proclock;
+	PROCLOCKTAG proclocktag;
+	bool		found;
+	LWLockId	masterLock;
+	LockMethod	lockMethodTable;
+
+	Assert(len == sizeof(TwoPhaseLockRecord));
+	locktag = &rec->locktag;
+	lockmode = rec->lockmode;
+	lockmethodid = locktag->locktag_lockmethodid;
+
+	Assert(lockmethodid < NumLockMethods);
+	lockMethodTable = LockMethods[lockmethodid];
+	if (!lockMethodTable)
+		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
+
+	masterLock = lockMethodTable->masterLock;
+
+	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+
+	/*
+	 * Find or create a lock with this tag.
+	 */
+	lock = (LOCK *) hash_search(LockMethodLockHash[lockmethodid],
+								(void *) locktag,
+								HASH_ENTER_NULL, &found);
+	if (!lock)
+	{
+		LWLockRelease(masterLock);
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of shared memory"),
+		errhint("You may need to increase max_locks_per_transaction.")));
+	}
+
+	/*
+	 * if it's a new lock object, initialize it
+	 */
+	if (!found)
+	{
+		lock->grantMask = 0;
+		lock->waitMask = 0;
+		SHMQueueInit(&(lock->procLocks));
+		ProcQueueInit(&(lock->waitProcs));
+		lock->nRequested = 0;
+		lock->nGranted = 0;
+		MemSet(lock->requested, 0, sizeof(int) * MAX_LOCKMODES);
+		MemSet(lock->granted, 0, sizeof(int) * MAX_LOCKMODES);
+		LOCK_PRINT("lock_twophase_recover: new", lock, lockmode);
+	}
+	else
+	{
+		LOCK_PRINT("lock_twophase_recover: found", lock, lockmode);
+		Assert((lock->nRequested >= 0) && (lock->requested[lockmode] >= 0));
+		Assert((lock->nGranted >= 0) && (lock->granted[lockmode] >= 0));
+		Assert(lock->nGranted <= lock->nRequested);
+	}
+
+	/*
+	 * Create the hash key for the proclock table.
+	 */
+	MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG));	/* must clear padding */
+	proclocktag.lock = MAKE_OFFSET(lock);
+	proclocktag.proc = MAKE_OFFSET(proc);
+
+	/*
+	 * Find or create a proclock entry with this tag
+	 */
+	proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[lockmethodid],
+										(void *) &proclocktag,
+										HASH_ENTER_NULL, &found);
+	if (!proclock)
+	{
+		/* Ooops, not enough shmem for the proclock */
+		if (lock->nRequested == 0)
+		{
+			/*
+			 * There are no other requestors of this lock, so garbage-collect
+			 * the lock object.  We *must* do this to avoid a permanent leak
+			 * of shared memory, because there won't be anything to cause
+			 * anyone to release the lock object later.
+			 */
+			Assert(SHMQueueEmpty(&(lock->procLocks)));
+			if (!hash_search(LockMethodLockHash[lockmethodid],
+							 (void *) &(lock->tag),
+							 HASH_REMOVE, NULL))
+				elog(PANIC, "lock table corrupted");
+		}
+		LWLockRelease(masterLock);
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of shared memory"),
+		errhint("You may need to increase max_locks_per_transaction.")));
+	}
+
+	/*
+	 * If new, initialize the new entry
+	 */
+	if (!found)
+	{
+		proclock->holdMask = 0;
+		proclock->releaseMask = 0;
+		/* Add proclock to appropriate lists */
+		SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink);
+		SHMQueueInsertBefore(&proc->procLocks, &proclock->procLink);
+		PROCLOCK_PRINT("lock_twophase_recover: new", proclock);
+	}
+	else
+	{
+		PROCLOCK_PRINT("lock_twophase_recover: found", proclock);
+		Assert((proclock->holdMask & ~lock->grantMask) == 0);
+	}
+
+	/*
+	 * lock->nRequested and lock->requested[] count the total number of
+	 * requests, whether granted or waiting, so increment those
+	 * immediately.
+	 */
+	lock->nRequested++;
+	lock->requested[lockmode]++;
+	Assert((lock->nRequested > 0) && (lock->requested[lockmode] > 0));
+
+	/*
+	 * We shouldn't already hold the desired lock.
+	 */
+	if (proclock->holdMask & LOCKBIT_ON(lockmode))
+		elog(ERROR, "lock %s on object %u/%u/%u is already held",
+			 lock_mode_names[lockmode],
+			 lock->tag.locktag_field1, lock->tag.locktag_field2,
+			 lock->tag.locktag_field3);
+
+	/*
+	 * We ignore any possible conflicts and just grant ourselves the lock.
+	 */
+	GrantLock(lock, proclock, lockmode);
+
+	LWLockRelease(masterLock);
+}
+
+/*
+ * 2PC processing routine for COMMIT PREPARED case.
+ *
+ * Find and release the lock indicated by the 2PC record.
+ */
+void
+lock_twophase_postcommit(TransactionId xid, uint16 info,
+						 void *recdata, uint32 len)
+{
+	TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata;
+	PGPROC	   *proc = TwoPhaseGetDummyProc(xid);
+	LOCKTAG	   *locktag;
+	LOCKMODE	lockmode;
+	LOCKMETHODID lockmethodid;
+	PROCLOCKTAG proclocktag;
+	LOCK	   *lock;
+	PROCLOCK   *proclock;
+	LWLockId	masterLock;
+	LockMethod	lockMethodTable;
+	bool		wakeupNeeded;
+
+	Assert(len == sizeof(TwoPhaseLockRecord));
+	locktag = &rec->locktag;
+	lockmode = rec->lockmode;
+	lockmethodid = locktag->locktag_lockmethodid;
+
+	Assert(lockmethodid < NumLockMethods);
+	lockMethodTable = LockMethods[lockmethodid];
+	if (!lockMethodTable)
+		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
+
+	masterLock = lockMethodTable->masterLock;
+
+	LWLockAcquire(masterLock, LW_EXCLUSIVE);
+
+	/*
+	 * Re-find the lock object (it had better be there).
+	 */
+	lock = (LOCK *) hash_search(LockMethodLockHash[lockmethodid],
+								(void *) locktag,
+								HASH_FIND, NULL);
+	if (!lock)
+		elog(PANIC, "failed to re-find shared lock object");
+
+	/*
+	 * Re-find the proclock object (ditto).
+	 */
+	MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG));	/* must clear padding */
+	proclocktag.lock = MAKE_OFFSET(lock);
+	proclocktag.proc = MAKE_OFFSET(proc);
+	proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[lockmethodid],
+										(void *) &proclocktag,
+										HASH_FIND, NULL);
+	if (!proclock)
+		elog(PANIC, "failed to re-find shared proclock object");
+
+	/*
+	 * Double-check that we are actually holding a lock of the type we
+	 * want to release.
+	 */
+	if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
+	{
+		PROCLOCK_PRINT("lock_twophase_postcommit: WRONGTYPE", proclock);
+		LWLockRelease(masterLock);
+		elog(WARNING, "you don't own a lock of type %s",
+			 lock_mode_names[lockmode]);
+		return;
+	}
+
+	/*
+	 * Do the releasing.  CleanUpLock will waken any now-wakable waiters.
+	 */
+	wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable);
+
+	CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
+
+	LWLockRelease(masterLock);
+}
+
+/*
+ * 2PC processing routine for ROLLBACK PREPARED case.
+ *
+ * This is actually just the same as the COMMIT case.
+ */
+void
+lock_twophase_postabort(TransactionId xid, uint16 info,
+						void *recdata, uint32 len)
+{
+	lock_twophase_postcommit(xid, info, recdata, len);
+}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index b62283cffb4aecacb05f3c575a342c38fc430d62..227c3694788e9f94d323424714583ca5a34fc9d9 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.159 2005/06/14 22:15:32 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.160 2005/06/17 22:32:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -92,13 +92,13 @@ static bool CheckStatementTimeout(void);
  * Report shared-memory space needed by InitProcGlobal.
  */
 int
-ProcGlobalShmemSize(int maxBackends)
+ProcGlobalShmemSize(void)
 {
 	int			size = 0;
 
 	size += MAXALIGN(sizeof(PROC_HDR)); /* ProcGlobal */
 	size += MAXALIGN(NUM_DUMMY_PROCS * sizeof(PGPROC));	/* DummyProcs */
-	size += MAXALIGN(maxBackends * sizeof(PGPROC));		/* MyProcs */
+	size += MAXALIGN(MaxBackends * sizeof(PGPROC));		/* MyProcs */
 	size += MAXALIGN(sizeof(slock_t)); /* ProcStructLock */
 
 	return size;
@@ -108,10 +108,10 @@ ProcGlobalShmemSize(int maxBackends)
  * Report number of semaphores needed by InitProcGlobal.
  */
 int
-ProcGlobalSemas(int maxBackends)
+ProcGlobalSemas(void)
 {
 	/* We need a sema per backend, plus one for each dummy process. */
-	return maxBackends + NUM_DUMMY_PROCS;
+	return MaxBackends + NUM_DUMMY_PROCS;
 }
 
 /*
@@ -134,7 +134,7 @@ ProcGlobalSemas(int maxBackends)
  *	  postmaster, not in backends.
  */
 void
-InitProcGlobal(int maxBackends)
+InitProcGlobal(void)
 {
 	bool		foundProcGlobal,
 				foundDummy;
@@ -170,13 +170,13 @@ InitProcGlobal(int maxBackends)
 		 * Pre-create the PGPROC structures and create a semaphore for
 		 * each.
 		 */
-		procs = (PGPROC *) ShmemAlloc(maxBackends * sizeof(PGPROC));
+		procs = (PGPROC *) ShmemAlloc(MaxBackends * sizeof(PGPROC));
 		if (!procs)
 			ereport(FATAL,
 					(errcode(ERRCODE_OUT_OF_MEMORY),
 					 errmsg("out of shared memory")));
-		MemSet(procs, 0, maxBackends * sizeof(PGPROC));
-		for (i = 0; i < maxBackends; i++)
+		MemSet(procs, 0, MaxBackends * sizeof(PGPROC));
+		for (i = 0; i < MaxBackends; i++)
 		{
 			PGSemaphoreCreate(&(procs[i].sem));
 			procs[i].links.next = ProcGlobal->freeProcs;
@@ -254,7 +254,6 @@ InitProcess(void)
 	MyProc->xmin = InvalidTransactionId;
 	MyProc->pid = MyProcPid;
 	MyProc->databaseId = MyDatabaseId;
-	MyProc->logRec.xrecoff = 0;
 	MyProc->lwWaiting = false;
 	MyProc->lwExclusive = false;
 	MyProc->lwWaitLink = NULL;
@@ -265,7 +264,7 @@ InitProcess(void)
 	/*
 	 * Add our PGPROC to the PGPROC array in shared memory.
 	 */
-	ProcArrayAddMyself();
+	ProcArrayAdd(MyProc);
 
 	/*
 	 * Arrange to clean up at backend exit.
@@ -332,7 +331,6 @@ InitDummyProcess(int proctype)
 	MyProc->xid = InvalidTransactionId;
 	MyProc->xmin = InvalidTransactionId;
 	MyProc->databaseId = MyDatabaseId;
-	MyProc->logRec.xrecoff = 0;
 	MyProc->lwWaiting = false;
 	MyProc->lwExclusive = false;
 	MyProc->lwWaitLink = NULL;
@@ -352,6 +350,35 @@ InitDummyProcess(int proctype)
 	PGSemaphoreReset(&MyProc->sem);
 }
 
+/*
+ * Check whether there are at least N free PGPROC objects.
+ *
+ * Note: this is designed on the assumption that N will generally be small.
+ */
+bool
+HaveNFreeProcs(int n)
+{
+	SHMEM_OFFSET offset;
+	PGPROC	   *proc;
+	/* use volatile pointer to prevent code rearrangement */
+	volatile PROC_HDR *procglobal = ProcGlobal;
+
+	SpinLockAcquire(ProcStructLock);
+
+	offset = procglobal->freeProcs;
+
+	while (n > 0 && offset != INVALID_OFFSET)
+	{
+		proc = (PGPROC *) MAKE_PTR(offset);
+		offset = proc->links.next;
+		n--;
+	}
+
+	SpinLockRelease(ProcStructLock);
+
+	return (n <= 0);
+}
+
 /*
  * Cancel any pending wait for lock, when aborting a transaction.
  *
@@ -478,7 +505,7 @@ ProcKill(int code, Datum arg)
 #endif
 
 	/* Remove our PGPROC from the PGPROC array in shared memory */
-	ProcArrayRemoveMyself();
+	ProcArrayRemove(MyProc);
 
 	SpinLockAcquire(ProcStructLock);
 
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 13ad72a3755da61645e118f4a8d7b6d4fdc4f96f..2c8cf07eec83993d063ad7fffb6b3ca90c3053ce 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.89 2005/06/06 20:22:58 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.90 2005/06/17 22:32:46 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -434,7 +434,7 @@ smgrscheduleunlink(SMgrRelation reln, bool isTemp)
  *		during transactional operations, since it can't be undone.
  *
  *		If isRedo is true, it is okay for the underlying file to be gone
- *		already.  (In practice isRedo will always be true.)
+ *		already.
  *
  * This also implies smgrclose() on the SMgrRelation object.
  */
@@ -677,6 +677,30 @@ smgrimmedsync(SMgrRelation reln)
 						reln->smgr_rnode.relNode)));
 }
 
+
+/*
+ *	PostPrepare_smgr -- Clean up after a successful PREPARE
+ *
+ * What we have to do here is throw away the in-memory state about pending
+ * relation deletes.  It's all been recorded in the 2PC state file and
+ * it's no longer smgr's job to worry about it.
+ */
+void
+PostPrepare_smgr(void)
+{
+	PendingRelDelete *pending;
+	PendingRelDelete *next;
+
+	for (pending = pendingDeletes; pending != NULL; pending = next)
+	{
+		next = pending->next;
+		pendingDeletes = next;
+		/* must explicitly free the list entry */
+		pfree(pending);
+	}
+}
+
+
 /*
  *	smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
  *
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index f1c65bacc43cf4cfef5996c98c80930b739d6897..454bc2577e1cbf2074e0742e007f5ed2d7567074 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.448 2005/06/14 21:04:40 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.449 2005/06/17 22:32:46 tgl Exp $
  *
  * NOTES
  *	  this is the "main" module of the postgres backend and
@@ -930,6 +930,7 @@ exec_simple_query(const char *query_string)
 				TransactionStmt *stmt = (TransactionStmt *) parsetree;
 
 				if (stmt->kind == TRANS_STMT_COMMIT ||
+					stmt->kind == TRANS_STMT_PREPARE ||
 					stmt->kind == TRANS_STMT_ROLLBACK ||
 					stmt->kind == TRANS_STMT_ROLLBACK_TO)
 					allowit = true;
@@ -1261,6 +1262,7 @@ exec_parse_message(const char *query_string,	/* string to execute */
 				TransactionStmt *stmt = (TransactionStmt *) parsetree;
 
 				if (stmt->kind == TRANS_STMT_COMMIT ||
+					stmt->kind == TRANS_STMT_PREPARE ||
 					stmt->kind == TRANS_STMT_ROLLBACK ||
 					stmt->kind == TRANS_STMT_ROLLBACK_TO)
 					allowit = true;
@@ -1751,6 +1753,7 @@ exec_execute_message(const char *portal_name, long max_rows)
 
 			is_trans_stmt = true;
 			if (stmt->kind == TRANS_STMT_COMMIT ||
+				stmt->kind == TRANS_STMT_PREPARE ||
 				stmt->kind == TRANS_STMT_ROLLBACK ||
 				stmt->kind == TRANS_STMT_ROLLBACK_TO)
 				is_trans_exit = true;
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 73be822a83e7f1b92e86fc362820c366992e3855..f948b0f854ed0c9b73c341ca81ed32c8b4a7d496 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,13 +10,14 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.236 2005/04/28 21:47:15 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.237 2005/06/17 22:32:46 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include "access/heapam.h"
+#include "access/twophase.h"
 #include "catalog/catalog.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_shadow.h"
@@ -383,11 +384,11 @@ ProcessUtility(Node *parsetree,
 								if (strcmp(item->defname, "transaction_isolation") == 0)
 									SetPGVariable("transaction_isolation",
 												  list_make1(item->arg),
-												  false);
+												  true);
 								else if (strcmp(item->defname, "transaction_read_only") == 0)
 									SetPGVariable("transaction_read_only",
 												  list_make1(item->arg),
-												  false);
+												  true);
 							}
 						}
 						break;
@@ -401,6 +402,25 @@ ProcessUtility(Node *parsetree,
 						}
 						break;
 
+					case TRANS_STMT_PREPARE:
+						if (!PrepareTransactionBlock(stmt->gid))
+						{
+							/* report unsuccessful commit in completionTag */
+							if (completionTag)
+								strcpy(completionTag, "ROLLBACK");
+						}
+						break;
+
+					case TRANS_STMT_COMMIT_PREPARED:
+						PreventTransactionChain(stmt, "COMMIT PREPARED");
+						FinishPreparedTransaction(stmt->gid, true);
+						break;
+
+					case TRANS_STMT_ROLLBACK_PREPARED:
+						PreventTransactionChain(stmt, "ROLLBACK PREPARED");
+						FinishPreparedTransaction(stmt->gid, false);
+						break;
+
 					case TRANS_STMT_ROLLBACK:
 						UserAbortTransactionBlock();
 						break;
@@ -1215,6 +1235,18 @@ CreateCommandTag(Node *parsetree)
 						tag = "RELEASE";
 						break;
 
+					case TRANS_STMT_PREPARE:
+						tag = "PREPARE TRANSACTION";
+						break;
+
+					case TRANS_STMT_COMMIT_PREPARED:
+						tag = "COMMIT PREPARED";
+						break;
+
+					case TRANS_STMT_ROLLBACK_PREPARED:
+						tag = "ROLLBACK PREPARED";
+						break;
+
 					default:
 						tag = "???";
 						break;
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 61b7522f8c92c5b86fb55b038075ea1d67f1f37b..da0ffad16b2c094519185207cf22933897acc76b 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -80,12 +80,13 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.71 2005/04/14 01:38:19 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.72 2005/06/17 22:32:46 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
+#include "access/twophase_rmgr.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
 #include "miscadmin.h"
@@ -171,6 +172,13 @@ static struct CACHECALLBACK
 
 static int	cache_callback_count = 0;
 
+/* info values for 2PC callback */
+#define TWOPHASE_INFO_MSG			0		/* SharedInvalidationMessage */
+#define TWOPHASE_INFO_FILE_BEFORE	1		/* relcache file inval */
+#define TWOPHASE_INFO_FILE_AFTER	2		/* relcache file inval */
+
+static void PersistInvalidationMessage(SharedInvalidationMessage *msg);
+
 
 /* ----------------------------------------------------------------
  *				Invalidation list support functions
@@ -636,6 +644,56 @@ AtStart_Inval(void)
 	transInvalInfo->my_level = GetCurrentTransactionNestLevel();
 }
 
+/*
+ * AtPrepare_Inval
+ * 		Save the inval lists state at 2PC transaction prepare.
+ *
+ * In this phase we just generate 2PC records for all the pending invalidation
+ * work.
+ */
+void
+AtPrepare_Inval(void)
+{
+	/* Must be at top of stack */
+	Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
+
+	/*
+	 * Relcache init file invalidation requires processing both before
+	 * and after we send the SI messages.
+	 */
+	if (transInvalInfo->RelcacheInitFileInval)
+		RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_BEFORE,
+							   NULL, 0);
+
+	AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
+							   &transInvalInfo->CurrentCmdInvalidMsgs);
+
+	ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
+								PersistInvalidationMessage);
+
+	if (transInvalInfo->RelcacheInitFileInval)
+		RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_AFTER,
+							   NULL, 0);
+}
+
+/*
+ * PostPrepare_Inval
+ * 		Clean up after successful PREPARE.
+ *
+ * Here, we want to act as though the transaction aborted, so that we will
+ * undo any syscache changes it made, thereby bringing us into sync with the
+ * outside world, which doesn't believe the transaction committed yet.
+ *
+ * If the prepared transaction is later aborted, there is nothing more to
+ * do; if it commits, we will receive the consequent inval messages just
+ * like everyone else.
+ */
+void
+PostPrepare_Inval(void)
+{
+	AtEOXact_Inval(false);
+}
+
 /*
  * AtSubStart_Inval
  *		Initialize inval lists at start of a subtransaction.
@@ -654,6 +712,47 @@ AtSubStart_Inval(void)
 	transInvalInfo = myInfo;
 }
 
+/*
+ * PersistInvalidationMessage
+ * 		Write an invalidation message to the 2PC state file.
+ */
+static void
+PersistInvalidationMessage(SharedInvalidationMessage *msg)
+{
+	RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_MSG,
+						   msg, sizeof(SharedInvalidationMessage));
+}
+
+/*
+ * inval_twophase_postcommit
+ *		Process an invalidation message from the 2PC state file.
+ */
+void
+inval_twophase_postcommit(TransactionId xid, uint16 info,
+						  void *recdata, uint32 len)
+{
+	SharedInvalidationMessage *msg;
+
+	switch (info)
+	{
+		case TWOPHASE_INFO_MSG:
+			msg = (SharedInvalidationMessage *) recdata;	
+			Assert(len == sizeof(SharedInvalidationMessage));
+			SendSharedInvalidMessage(msg);
+			break;
+		case TWOPHASE_INFO_FILE_BEFORE:
+			RelationCacheInitFileInvalidate(true);
+			break;
+		case TWOPHASE_INFO_FILE_AFTER:
+			RelationCacheInitFileInvalidate(false);
+			break;
+		default:
+			Assert(false);
+			break;
+	}
+}
+
+
 /*
  * AtEOXact_Inval
  *		Process queued-up invalidation messages at end of main transaction.
diff --git a/src/backend/utils/init/flatfiles.c b/src/backend/utils/init/flatfiles.c
index 49495abfdd6920d519a1f85607d1a3677bc8e613..fcbc99189ffd7768b2f7f5341165cc9f8ee76371 100644
--- a/src/backend/utils/init/flatfiles.c
+++ b/src/backend/utils/init/flatfiles.c
@@ -22,7 +22,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/utils/init/flatfiles.c,v 1.7 2005/06/06 17:01:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/init/flatfiles.c,v 1.8 2005/06/17 22:32:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -32,6 +32,7 @@
 #include <unistd.h>
 
 #include "access/heapam.h"
+#include "access/twophase_rmgr.h"
 #include "catalog/pg_database.h"
 #include "catalog/pg_group.h"
 #include "catalog/pg_namespace.h"
@@ -48,10 +49,16 @@
 #include "utils/syscache.h"
 
 
+/* Actual names of the flat files (within $PGDATA/global/) */
 #define DATABASE_FLAT_FILE	"pg_database"
 #define GROUP_FLAT_FILE		"pg_group"
 #define USER_FLAT_FILE		"pg_pwd"
 
+/* Info bits in a flatfiles 2PC record */
+#define FF_BIT_DATABASE	1
+#define FF_BIT_GROUP	2
+#define FF_BIT_USER		4
+
 
 /*
  * The need-to-update-files flags are SubTransactionIds that show
@@ -757,6 +764,43 @@ AtEOXact_UpdateFlatFiles(bool isCommit)
 	SendPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE);
 }
 
+
+/*
+ * This routine is called during transaction prepare.
+ *
+ * Record which files need to be refreshed if this transaction later
+ * commits.
+ *
+ * Note: it's OK to clear the flags immediately, since if the PREPARE fails
+ * further on, we'd only reset the flags anyway. So there's no need for a
+ * separate PostPrepare call.
+ */
+void
+AtPrepare_UpdateFlatFiles(void)
+{
+	uint16		info = 0;
+
+	if (database_file_update_subid != InvalidSubTransactionId)
+	{
+		database_file_update_subid = InvalidSubTransactionId;
+		info |= FF_BIT_DATABASE;
+	}
+	if (group_file_update_subid != InvalidSubTransactionId)
+	{
+		group_file_update_subid = InvalidSubTransactionId;
+		info |= FF_BIT_GROUP;
+	}
+	if (user_file_update_subid != InvalidSubTransactionId)
+	{
+		user_file_update_subid = InvalidSubTransactionId;
+		info |= FF_BIT_USER;
+	}
+	if (info != 0)
+		RegisterTwoPhaseRecord(TWOPHASE_RM_FLATFILES_ID, info,
+							   NULL, 0);
+}
+
+
 /*
  * AtEOSubXact_UpdateFlatFiles
  *
@@ -831,3 +875,28 @@ flatfile_update_trigger(PG_FUNCTION_ARGS)
 
 	return PointerGetDatum(NULL);
 }
+
+
+/*
+ * 2PC processing routine for COMMIT PREPARED case.
+ *
+ * (We don't have to do anything for ROLLBACK PREPARED.)
+ */
+void
+flatfile_twophase_postcommit(TransactionId xid, uint16 info,
+							 void *recdata, uint32 len)
+{
+	/*
+	 * Set flags to do the needed file updates at the end of my own
+	 * current transaction.  (XXX this has some issues if my own
+	 * transaction later rolls back, or if there is any significant
+	 * delay before I commit.  OK for now because we disallow
+	 * COMMIT PREPARED inside a transaction block.)
+	 */
+	if (info & FF_BIT_DATABASE)
+		database_file_update_needed();
+	if (info & FF_BIT_GROUP)
+		group_file_update_needed();
+	if (info & FF_BIT_USER)
+		user_file_update_needed();
+}
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 7130365d4fd061c3b963615831205046240a023e..72fba39df4640604fe9d4c57f3e7a1055e61894f 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.147 2005/05/19 21:35:47 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.148 2005/06/17 22:32:47 tgl Exp $
  *
  *
  *-------------------------------------------------------------------------
@@ -232,7 +232,7 @@ InitCommunication(void)
 		 * We're running a postgres bootstrap process or a standalone
 		 * backend. Create private "shmem" and semaphores.
 		 */
-		CreateSharedMemoryAndSemaphores(true, MaxBackends, 0);
+		CreateSharedMemoryAndSemaphores(true, 0);
 	}
 }
 
@@ -456,7 +456,7 @@ InitPostgres(const char *dbname, const char *username)
 	 */
 	if (!am_superuser &&
 		ReservedBackends > 0 &&
-		CountEmptyBackendSlots() < ReservedBackends)
+		!HaveNFreeProcs(ReservedBackends))
 		ereport(FATAL,
 				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
 				 errmsg("connection limit exceeded for non-superusers")));
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8ee03e4d5e33647a616ac1cc35c0069d0ebfa877..05ce93cf58ff2b5d0bd3f07856d557087db428dc 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.267 2005/06/16 20:47:20 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.268 2005/06/17 22:32:47 tgl Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -25,6 +25,7 @@
 #include "utils/guc.h"
 #include "utils/guc_tables.h"
 
+#include "access/twophase.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_type.h"
 #include "commands/async.h"
@@ -1102,6 +1103,15 @@ static struct config_int ConfigureNamesInt[] =
 		1000, 25, INT_MAX, NULL, NULL
 	},
 
+	{
+		{"max_prepared_transactions", PGC_POSTMASTER, RESOURCES,
+			gettext_noop("Sets the maximum number of simultaneously prepared transactions."),
+			NULL
+		},
+		&max_prepared_xacts,
+		50, 0, 10000, NULL, NULL
+	},
+
 #ifdef LOCK_DEBUG
 	{
 		{"trace_lock_oidmin", PGC_SUSET, DEVELOPER_OPTIONS,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 5d957ee247d31313c982a650e4ad8d2d337a7097..4c5a1ed59e31d6eeccdc7a872e8734affea306d1 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -79,6 +79,7 @@
 
 #shared_buffers = 1000		# min 16, at least max_connections*2, 8KB each
 #temp_buffers = 1000		# min 100, 8KB each
+#max_prepared_transactions = 50 # 0-10000
 #work_mem = 1024		# min 64, size in KB
 #maintenance_work_mem = 16384	# min 1024, size in KB
 #max_stack_depth = 2048		# min 100, size in KB
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index 26c5dd02a313c7d079bfcc6560def8afc0e3cbbb..b55a3430256525c3aab2445ca84cd6c17e5fee5c 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.80 2005/05/29 04:23:06 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.81 2005/06/17 22:32:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -466,6 +466,48 @@ CommitHoldablePortals(void)
 	return result;
 }
 
+/*
+ * Pre-prepare processing for portals.
+ *
+ * Currently we refuse PREPARE if the transaction created any holdable
+ * cursors, since it's quite unclear what to do with one.  However, this
+ * has the same API as CommitHoldablePortals and is invoked in the same
+ * way by xact.c, so that we can easily do something reasonable if anyone
+ * comes up with something reasonable to do.
+ *
+ * Returns TRUE if any holdable cursors were processed, FALSE if not.
+ */
+bool
+PrepareHoldablePortals(void)
+{
+	bool result = false;
+	HASH_SEQ_STATUS status;
+	PortalHashEnt *hentry;
+
+	hash_seq_init(&status, PortalHashTable);
+
+	while ((hentry = (PortalHashEnt *) hash_seq_search(&status)) != NULL)
+	{
+		Portal		portal = hentry->portal;
+
+		/* Is it a holdable portal created in the current xact? */
+		if ((portal->cursorOptions & CURSOR_OPT_HOLD) &&
+			portal->createSubid != InvalidSubTransactionId &&
+			portal->status == PORTAL_READY)
+		{
+			/*
+			 * We are exiting the transaction that created a holdable
+			 * cursor.	Can't do PREPARE.
+			 */
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("cannot PREPARE a transaction that has created a cursor WITH HOLD")));
+		}
+	}
+
+	return result;
+}
+
 /*
  * Pre-commit processing for portals.
  *
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index f4951c3f1bbdc6d44cfa81ab694820d76aeeaa06..9e42c902f1cb06dbfb8e38e831702bf12e170b84 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -39,7 +39,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  * Portions taken from FreeBSD.
  *
- * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.83 2005/04/30 08:08:51 neilc Exp $
+ * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.84 2005/06/17 22:32:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2124,6 +2124,7 @@ main(int argc, char *argv[])
 		"pg_xlog/archive_status",
 		"pg_clog",
 		"pg_subtrans",
+		"pg_twophase",
 		"pg_multixact/members",
 		"pg_multixact/offsets",
 		"base",
diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c
index df17d0404bc9e4bd5045b3e8f280e6bbfc57bbb7..fed2275f46919ad44565a0f4b5ed6cbc77bc6794 100644
--- a/src/bin/psql/common.c
+++ b/src/bin/psql/common.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2005, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.102 2005/06/14 02:57:41 momjian Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.103 2005/06/17 22:32:47 tgl Exp $
  */
 #include "postgres_fe.h"
 #include "common.h"
@@ -1216,6 +1216,21 @@ command_no_begin(const char *query)
 		return true;
 	if (wordlen == 8 && pg_strncasecmp(query, "rollback", 8) == 0)
 		return true;
+	if (wordlen == 7 && pg_strncasecmp(query, "prepare", 7) == 0)
+	{
+		/* PREPARE TRANSACTION is a TC command, PREPARE foo is not */
+		query += wordlen;
+
+		query = skip_white_space(query);
+
+		wordlen = 0;
+		while (isalpha((unsigned char) query[wordlen]))
+			wordlen += PQmblen(&query[wordlen], pset.encoding);
+
+		if (wordlen == 11 && pg_strncasecmp(query, "transaction", 11) == 0)
+			return true;
+		return false;
+	}
 
 	/*
 	 * Commands not allowed within transactions.  The statements checked
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
index ac884775a09cfeef5dd9611aa3b47f7cda3f2b9b..2810e438d54c203c08480105f3e33ed0d4f04bb2 100644
--- a/src/include/access/subtrans.h
+++ b/src/include/access/subtrans.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/subtrans.h,v 1.5 2004/12/31 22:03:21 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/access/subtrans.h,v 1.6 2005/06/17 22:32:48 tgl Exp $
  */
 #ifndef SUBTRANS_H
 #define SUBTRANS_H
@@ -18,7 +18,7 @@ extern TransactionId SubTransGetTopmostTransaction(TransactionId xid);
 extern int	SUBTRANSShmemSize(void);
 extern void SUBTRANSShmemInit(void);
 extern void BootStrapSUBTRANS(void);
-extern void StartupSUBTRANS(void);
+extern void StartupSUBTRANS(TransactionId oldestActiveXID);
 extern void ShutdownSUBTRANS(void);
 extern void CheckPointSUBTRANS(void);
 extern void ExtendSUBTRANS(TransactionId newestXact);
diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h
new file mode 100644
index 0000000000000000000000000000000000000000..ac2e05f33ce68223b2a7825c4972c3398ee3e261
--- /dev/null
+++ b/src/include/access/twophase.h
@@ -0,0 +1,49 @@
+/*-------------------------------------------------------------------------
+ *
+ * twophase.h
+ *	  Two-phase-commit related declarations.
+ *
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/access/twophase.h,v 1.1 2005/06/17 22:32:48 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TWOPHASE_H
+#define TWOPHASE_H
+
+#include "storage/lock.h"
+
+
+/*
+ * GlobalTransactionData is defined in twophase.c; other places have no
+ * business knowing the internal definition.
+ */
+typedef struct GlobalTransactionData *GlobalTransaction;
+
+/* GUC variable */
+extern int max_prepared_xacts;
+
+extern int	TwoPhaseShmemSize(void);
+extern void TwoPhaseShmemInit(void);
+
+extern PGPROC *TwoPhaseGetDummyProc(TransactionId xid);
+
+extern GlobalTransaction MarkAsPreparing(TransactionId xid, Oid databaseid,
+										 char *gid, AclId owner);
+extern void MarkAsPrepared(GlobalTransaction gxact);
+
+extern void StartPrepare(GlobalTransaction gxact);
+extern void EndPrepare(GlobalTransaction gxact);
+
+extern TransactionId PrescanPreparedTransactions(void);
+extern void RecoverPreparedTransactions(void);
+
+extern void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);
+extern void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning);
+
+extern void FinishPreparedTransaction(char *gid, bool isCommit);
+
+#endif   /* TWOPHASE_H */
diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h
new file mode 100644
index 0000000000000000000000000000000000000000..f15233ba2f63318fe6d051458547664069ccaa50
--- /dev/null
+++ b/src/include/access/twophase_rmgr.h
@@ -0,0 +1,39 @@
+/*-------------------------------------------------------------------------
+ *
+ * twophase_rmgr.h
+ *	  Two-phase-commit resource managers definition
+ *
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.1 2005/06/17 22:32:48 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TWOPHASE_RMGR_H
+#define TWOPHASE_RMGR_H
+
+typedef void (*TwoPhaseCallback) (TransactionId xid, uint16 info,
+								  void *recdata, uint32 len);
+typedef uint8 TwoPhaseRmgrId;
+
+/*
+ * Built-in resource managers
+ */
+#define TWOPHASE_RM_END_ID			0
+#define TWOPHASE_RM_LOCK_ID			1
+#define TWOPHASE_RM_INVAL_ID		2
+#define TWOPHASE_RM_FLATFILES_ID   	3
+#define TWOPHASE_RM_NOTIFY_ID   	4
+#define TWOPHASE_RM_MAX_ID			TWOPHASE_RM_NOTIFY_ID
+
+extern const TwoPhaseCallback twophase_recover_callbacks[];
+extern const TwoPhaseCallback twophase_postcommit_callbacks[];
+extern const TwoPhaseCallback twophase_postabort_callbacks[];
+
+
+extern void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
+								   const void *data, uint32 len);
+
+#endif   /* TWOPHASE_RMGR_H */
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 7c949899ac4cc7b456c87b5132b9c45fda004dae..0c6b5580688e666b3e308a025d1101b6e90f423d 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.76 2005/06/06 17:01:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.77 2005/06/17 22:32:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -47,7 +47,8 @@ extern bool XactReadOnly;
 typedef enum
 {
 	XACT_EVENT_COMMIT,
-	XACT_EVENT_ABORT
+	XACT_EVENT_ABORT,
+	XACT_EVENT_PREPARE
 } XactEvent;
 
 typedef void (*XactCallback) (XactEvent event, void *arg);
@@ -72,8 +73,11 @@ typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
  * XLOG allows to store some information in high 4 bits of log
  * record xl_info field
  */
-#define XLOG_XACT_COMMIT	0x00
-#define XLOG_XACT_ABORT		0x20
+#define XLOG_XACT_COMMIT			0x00
+#define XLOG_XACT_PREPARE			0x10
+#define XLOG_XACT_ABORT				0x20
+#define XLOG_XACT_COMMIT_PREPARED	0x30
+#define XLOG_XACT_ABORT_PREPARED	0x40
 
 typedef struct xl_xact_commit
 {
@@ -99,6 +103,31 @@ typedef struct xl_xact_abort
 
 #define MinSizeOfXactAbort offsetof(xl_xact_abort, xnodes)
 
+/*
+ * COMMIT_PREPARED and ABORT_PREPARED are identical to COMMIT/ABORT records
+ * except that we have to store the XID of the prepared transaction explicitly
+ * --- the XID in the record header will be for the transaction doing the
+ * COMMIT PREPARED or ABORT PREPARED command.
+ */
+
+typedef struct xl_xact_commit_prepared
+{
+	TransactionId xid;			/* XID of prepared xact */
+	xl_xact_commit crec;		/* COMMIT record */
+	/* MORE DATA FOLLOWS AT END OF STRUCT */
+} xl_xact_commit_prepared;
+
+#define MinSizeOfXactCommitPrepared offsetof(xl_xact_commit_prepared, crec.xnodes)
+
+typedef struct xl_xact_abort_prepared
+{
+	TransactionId xid;			/* XID of prepared xact */
+	xl_xact_abort arec;			/* ABORT record */
+	/* MORE DATA FOLLOWS AT END OF STRUCT */
+} xl_xact_abort_prepared;
+
+#define MinSizeOfXactAbortPrepared offsetof(xl_xact_abort_prepared, arec.xnodes)
+
 
 /* ----------------
  *		extern definitions
@@ -121,6 +150,7 @@ extern void CommitTransactionCommand(void);
 extern void AbortCurrentTransaction(void);
 extern void BeginTransactionBlock(void);
 extern bool EndTransactionBlock(void);
+extern bool PrepareTransactionBlock(char *gid);
 extern void UserAbortTransactionBlock(void);
 extern void ReleaseSavepoint(List *options);
 extern void DefineSavepoint(char *name);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 535e5bb01ba3cb60db0256111198d6d97a943041..de422a41a9f3ddb0bac5cc53cc2dac5a704f7fdd 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.276 2005/06/15 12:56:35 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.277 2005/06/17 22:32:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200506151
+#define CATALOG_VERSION_NO	200506171
 
 #endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index c06c4a7d41f1adced6503050f1dc5e9256cf7b64..a05a4f3a62cf8e728606b008db8f3f7260f6c9b5 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.367 2005/06/14 21:04:41 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.368 2005/06/17 22:32:48 tgl Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -3005,6 +3005,8 @@ DATA(insert OID = 2084 (  pg_show_all_settings	PGNSP PGUID 12 f f t t s 0 2249 "
 DESCR("SHOW ALL as a function");
 DATA(insert OID = 1371 (  pg_lock_status   PGNSP PGUID 12 f f t t v 0 2249 "" _null_ _null_ _null_ pg_lock_status - _null_ ));
 DESCR("view system lock information");
+DATA(insert OID = 1065 (  pg_prepared_xact PGNSP PGUID 12 f f t t v 0 2249 "" _null_ _null_ _null_ pg_prepared_xact - _null_ ));
+DESCR("view two-phase transactions");
 
 DATA(insert OID = 2079 (  pg_table_is_visible		PGNSP PGUID 12 f f t f s 1 16 "26" _null_ _null_ _null_	pg_table_is_visible - _null_ ));
 DESCR("is table visible in search path?");
diff --git a/src/include/commands/async.h b/src/include/commands/async.h
index 08855ea540380d2721ce28c825146b2864591e7c..b893771b0f709d6933b223e05e7aa1329a2ca6e8 100644
--- a/src/include/commands/async.h
+++ b/src/include/commands/async.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/commands/async.h,v 1.27 2004/12/31 22:03:28 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/commands/async.h,v 1.28 2005/06/17 22:32:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -26,6 +26,7 @@ extern void AtAbort_Notify(void);
 extern void AtSubStart_Notify(void);
 extern void AtSubCommit_Notify(void);
 extern void AtSubAbort_Notify(void);
+extern void AtPrepare_Notify(void);
 
 /* signal handler for inbound notifies (SIGUSR2) */
 extern void NotifyInterruptHandler(SIGNAL_ARGS);
@@ -38,4 +39,7 @@ extern void NotifyInterruptHandler(SIGNAL_ARGS);
 extern void EnableNotifyInterrupt(void);
 extern bool DisableNotifyInterrupt(void);
 
+extern void notify_twophase_postcommit(TransactionId xid, uint16 info,
+									   void *recdata, uint32 len);
+
 #endif   /* ASYNC_H */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 04b32082ebf59fcd9c62b7f49215274a4827a542..993a240faa1b0f9ceaa36417c30697e062928d41 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.281 2005/06/05 22:32:57 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.282 2005/06/17 22:32:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1556,7 +1556,10 @@ typedef enum TransactionStmtKind
 	TRANS_STMT_ROLLBACK,
 	TRANS_STMT_SAVEPOINT,
 	TRANS_STMT_RELEASE,
-	TRANS_STMT_ROLLBACK_TO
+	TRANS_STMT_ROLLBACK_TO,
+	TRANS_STMT_PREPARE,
+	TRANS_STMT_COMMIT_PREPARED,
+	TRANS_STMT_ROLLBACK_PREPARED
 } TransactionStmtKind;
 
 typedef struct TransactionStmt
@@ -1564,6 +1567,7 @@ typedef struct TransactionStmt
 	NodeTag		type;
 	TransactionStmtKind kind;	/* see above */
 	List	   *options;		/* for BEGIN/START and savepoint commands */
+	char       *gid;			/* for two-phase-commit related commands */
 } TransactionStmt;
 
 /* ----------------------
diff --git a/src/include/storage/ipc.h b/src/include/storage/ipc.h
index d08d0f10be0896952e158b43e5bee6dec4fab509..3d325b10af6431f661e53286b647a289453e022f 100644
--- a/src/include/storage/ipc.h
+++ b/src/include/storage/ipc.h
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/ipc.h,v 1.70 2004/12/31 22:03:42 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/storage/ipc.h,v 1.71 2005/06/17 22:32:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -29,8 +29,6 @@ extern void on_shmem_exit(void (*function) (int code, Datum arg), Datum arg);
 extern void on_exit_reset(void);
 
 /* ipci.c */
-extern void CreateSharedMemoryAndSemaphores(bool makePrivate,
-								int maxBackends,
-								int port);
+extern void CreateSharedMemoryAndSemaphores(bool makePrivate, int port);
 
 #endif   /* IPC_H */
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h
index cdbf8ad406c1fd65c0ce6fd496e31cfb0df462c9..3a83c26c7df7bd4efe12b2fe52f4b57be76eddf7 100644
--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lmgr.h,v 1.49 2005/06/14 22:15:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lmgr.h,v 1.50 2005/06/17 22:32:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -41,7 +41,7 @@
  * so increase that if you want to add more modes.
  */
 
-extern void InitLockTable(int maxBackends);
+extern void InitLockTable(void);
 extern void RelationInitLockInfo(Relation relation);
 
 /* Lock a relation */
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index a471ff32a3355d85e393bc6f4981e9f597abf0ed..e434683d0e5ea5ead215a5dda299503c2025d12b 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.88 2005/06/14 22:15:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.89 2005/06/17 22:32:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -370,7 +370,7 @@ extern void InitLocks(void);
 extern LockMethod GetLocksMethodTable(LOCK *lock);
 extern LOCKMETHODID LockMethodTableInit(const char *tabName,
 					const LOCKMASK *conflictsP,
-					int numModes, int maxBackends);
+					int numModes);
 extern LOCKMETHODID LockMethodTableRename(LOCKMETHODID lockmethodid);
 extern LockAcquireResult LockAcquire(LOCKMETHODID lockmethodid,
 									 LOCKTAG *locktag,
@@ -383,13 +383,15 @@ extern bool LockRelease(LOCKMETHODID lockmethodid, LOCKTAG *locktag,
 extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
 extern void LockReleaseCurrentOwner(void);
 extern void LockReassignCurrentOwner(void);
+extern void AtPrepare_Locks(void);
+extern void PostPrepare_Locks(TransactionId xid);
 extern int LockCheckConflicts(LockMethod lockMethodTable,
 				   LOCKMODE lockmode,
 				   LOCK *lock, PROCLOCK *proclock, PGPROC *proc);
 extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
 extern void GrantAwaitedLock(void);
 extern void RemoveFromWaitQueue(PGPROC *proc);
-extern int	LockShmemSize(int maxBackends);
+extern int	LockShmemSize(void);
 extern bool DeadLockCheck(PGPROC *proc);
 extern void DeadLockReport(void);
 extern void RememberSimpleDeadLock(PGPROC *proc1,
@@ -400,8 +402,15 @@ extern void InitDeadLockChecking(void);
 extern LockData *GetLockStatusData(void);
 extern const char *GetLockmodeName(LOCKMODE mode);
 
+extern void lock_twophase_recover(TransactionId xid, uint16 info,
+								  void *recdata, uint32 len);
+extern void lock_twophase_postcommit(TransactionId xid, uint16 info,
+									 void *recdata, uint32 len);
+extern void lock_twophase_postabort(TransactionId xid, uint16 info,
+									void *recdata, uint32 len);
+
 #ifdef LOCK_DEBUG
-extern void DumpLocks(void);
+extern void DumpLocks(PGPROC *proc);
 extern void DumpAllLocks(void);
 #endif
 
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index eb45f3e39aef8c5b1003edacb8ea3937ea1ce9e4..b4ebdb85d5961d8cbf6fe5f6718e59a6db2ff1e2 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.19 2005/05/19 21:35:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.20 2005/06/17 22:32:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,6 +46,7 @@ typedef enum LWLockId
 	MultiXactMemberControlLock,
 	RelCacheInitLock,
 	BgWriterCommLock,
+	TwoPhaseStateLock,
 
 	NumFixedLWLocks,			/* must be last except for
 								 * MaxDynamicLWLock */
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index f771d71933c3c1ab37cb0ba05f86defbeb20371a..ece321028fce8841065137f628c27461ecb64c48 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.78 2005/05/19 21:35:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.79 2005/06/17 22:32:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,6 +46,13 @@ struct XidCache
  * links: list link for any list the PGPROC is in.	When waiting for a lock,
  * the PGPROC is linked into that lock's waitProcs queue.  A recycled PGPROC
  * is linked into ProcGlobal's freeProcs list.
+ *
+ * Note: twophase.c also sets up a dummy PGPROC struct for each currently
+ * prepared transaction.  These PGPROCs appear in the ProcArray data structure
+ * so that the prepared transactions appear to be still running and are
+ * correctly shown as holding locks.  A prepared transaction PGPROC can be
+ * distinguished from a real one at need by the fact that it has pid == 0.
+ * The semaphore and lock-related fields in a prepared-xact PGPROC are unused.
  */
 struct PGPROC
 {
@@ -62,16 +69,9 @@ struct PGPROC
 								 * were starting our xact: vacuum must not
 								 * remove tuples deleted by xid >= xmin ! */
 
-	int			pid;			/* This backend's process id */
+	int			pid;			/* This backend's process id, or 0 */
 	Oid			databaseId;		/* OID of database this backend is using */
 
-	/*
-	 * XLOG location of first XLOG record written by this backend's
-	 * current transaction.  If backend is not in a transaction or hasn't
-	 * yet modified anything, logRec.xrecoff is zero.
-	 */
-	XLogRecPtr	logRec;
-
 	/* Info about LWLock the process is currently waiting for, if any. */
 	bool		lwWaiting;		/* true if waiting for an LW lock */
 	bool		lwExclusive;	/* true if waiting for exclusive access */
@@ -120,11 +120,12 @@ extern int	StatementTimeout;
 /*
  * Function Prototypes
  */
-extern int	ProcGlobalSemas(int maxBackends);
-extern int	ProcGlobalShmemSize(int maxBackends);
-extern void InitProcGlobal(int maxBackends);
+extern int	ProcGlobalSemas(void);
+extern int	ProcGlobalShmemSize(void);
+extern void InitProcGlobal(void);
 extern void InitProcess(void);
 extern void InitDummyProcess(int proctype);
+extern bool HaveNFreeProcs(int n);
 extern void ProcReleaseLocks(bool isCommit);
 
 extern void ProcQueueInit(PROC_QUEUE *queue);
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index 437ac306e055439068a4fa163aa1ad454c90c467..d1780bcca185e13ee205990ee8e3450928e621a8 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -7,28 +7,30 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.1 2005/05/19 21:35:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.2 2005/06/17 22:32:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef PROCARRAY_H
 #define PROCARRAY_H
 
-extern int	ProcArrayShmemSize(int maxBackends);
-extern void CreateSharedProcArray(int maxBackends);
-extern void ProcArrayAddMyself(void);
-extern void ProcArrayRemoveMyself(void);
+#include "storage/lock.h"
+
+
+extern int	ProcArrayShmemSize(void);
+extern void CreateSharedProcArray(void);
+extern void ProcArrayAdd(PGPROC *proc);
+extern void ProcArrayRemove(PGPROC *proc);
 
 extern bool TransactionIdIsInProgress(TransactionId xid);
+extern bool TransactionIdIsActive(TransactionId xid);
 extern TransactionId GetOldestXmin(bool allDbs);
 
-/* Use "struct PGPROC", not PGPROC, to avoid including proc.h here */
-extern struct PGPROC *BackendPidGetProc(int pid);
+extern PGPROC *BackendPidGetProc(int pid);
 extern bool IsBackendPid(int pid);
 extern bool DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself);
 
 extern int	CountActiveBackends(void);
-extern int	CountEmptyBackendSlots(void);
 
 extern void XidCacheRemoveRunningXids(TransactionId xid,
 						  int nxids, TransactionId *xids);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index f00a8aeb83310640b4670c4a4b6636bf7c855303..ab3c39fd1c6e9e220523ff522b86a4349ca05f48 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.51 2005/06/06 17:01:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.52 2005/06/17 22:32:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -79,6 +79,7 @@ extern void smgrDoPendingDeletes(bool isCommit);
 extern int	smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr);
 extern void AtSubCommit_smgr(void);
 extern void AtSubAbort_smgr(void);
+extern void PostPrepare_smgr(void);
 extern void smgrcommit(void);
 extern void smgrabort(void);
 extern void smgrsync(void);
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index b781a270c60778d51fb9f70d9498961cebf3a5b8..37423dc73c2e14976d009ef45e6589bcd923f55f 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.257 2005/05/27 00:57:49 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.258 2005/06/17 22:32:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -825,6 +825,9 @@ extern Datum show_all_settings(PG_FUNCTION_ARGS);
 /* lockfuncs.c */
 extern Datum pg_lock_status(PG_FUNCTION_ARGS);
 
+/* access/transam/twophase.c */
+extern Datum pg_prepared_xact(PG_FUNCTION_ARGS);
+
 /* catalog/pg_conversion.c */
 extern Datum pg_convert_using(PG_FUNCTION_ARGS);
 
diff --git a/src/include/utils/flatfiles.h b/src/include/utils/flatfiles.h
index 04a901ab008412e6c4014db6551f184f7d3e0435..939239aa1b94c45c5912ca9a99abad9d48e43552 100644
--- a/src/include/utils/flatfiles.h
+++ b/src/include/utils/flatfiles.h
@@ -4,7 +4,7 @@
  *	  Routines for maintaining "flat file" images of the shared catalogs.
  *
  *
- * $PostgreSQL: pgsql/src/include/utils/flatfiles.h,v 1.3 2005/05/10 22:27:30 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/flatfiles.h,v 1.4 2005/06/17 22:32:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -23,6 +23,7 @@ extern char *user_getflatfilename(void);
 
 extern void BuildFlatFiles(bool database_only);
 
+extern void AtPrepare_UpdateFlatFiles(void);
 extern void AtEOXact_UpdateFlatFiles(bool isCommit);
 extern void AtEOSubXact_UpdateFlatFiles(bool isCommit,
 										SubTransactionId mySubid,
@@ -30,4 +31,7 @@ extern void AtEOSubXact_UpdateFlatFiles(bool isCommit,
 
 extern Datum flatfile_update_trigger(PG_FUNCTION_ARGS);
 
+extern void flatfile_twophase_postcommit(TransactionId xid, uint16 info,
+										 void *recdata, uint32 len);
+
 #endif   /* FLATFILES_H */
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index ac486e53ea0f4ad2aefa913755d60a443410037d..372d34a22c34f7b814fd48de799f3dbdae7f6135 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.35 2004/12/31 22:03:46 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.36 2005/06/17 22:32:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -30,6 +30,10 @@ extern void AtEOXact_Inval(bool isCommit);
 
 extern void AtEOSubXact_Inval(bool isCommit);
 
+extern void AtPrepare_Inval(void);
+
+extern void PostPrepare_Inval(void);
+
 extern void CommandEndInvalidationMessages(void);
 
 extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple);
@@ -47,4 +51,7 @@ extern void CacheRegisterSyscacheCallback(int cacheid,
 extern void CacheRegisterRelcacheCallback(CacheCallbackFunction func,
 							  Datum arg);
 
+extern void inval_twophase_postcommit(TransactionId xid, uint16 info,
+									  void *recdata, uint32 len);
+
 #endif   /* INVAL_H */
diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h
index b8bcc33f583f6fa152b6ad2ba889914594a2b5a6..33de53eee86ab7101f8c871ad669e63c3837d809 100644
--- a/src/include/utils/portal.h
+++ b/src/include/utils/portal.h
@@ -39,7 +39,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/portal.h,v 1.55 2005/04/11 19:51:16 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/portal.h,v 1.56 2005/06/17 22:32:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -183,6 +183,7 @@ typedef struct PortalData
 /* Prototypes for functions in utils/mmgr/portalmem.c */
 extern void EnablePortalManager(void);
 extern bool CommitHoldablePortals(void);
+extern bool PrepareHoldablePortals(void);
 extern void AtCommit_Portals(void);
 extern void AtAbort_Portals(void);
 extern void AtCleanup_Portals(void);
diff --git a/src/test/regress/expected/prepared_xacts.out b/src/test/regress/expected/prepared_xacts.out
new file mode 100644
index 0000000000000000000000000000000000000000..d6a165e94e41528b386ba5f3cfa58abed63bbe82
--- /dev/null
+++ b/src/test/regress/expected/prepared_xacts.out
@@ -0,0 +1,213 @@
+--
+-- PREPARED TRANSACTIONS (two-phase commit)
+--
+-- We can't readily test persistence of prepared xacts within the
+-- regression script framework, unfortunately.  Note that a crash
+-- isn't really needed ... stopping and starting the postmaster would
+-- be enough, but we can't even do that here.
+-- create a simple table that we'll use in the tests
+CREATE TABLE pxtest1 (foobar VARCHAR(10));
+INSERT INTO pxtest1 VALUES ('aaa');
+-- Test PREPARE TRANSACTION
+BEGIN;
+UPDATE pxtest1 SET foobar = 'bbb' WHERE foobar = 'aaa';
+SELECT * FROM pxtest1;
+ foobar 
+--------
+ bbb
+(1 row)
+
+PREPARE TRANSACTION 'foo1';
+SELECT * FROM pxtest1;
+ foobar 
+--------
+ aaa
+(1 row)
+
+-- Test pg_prepared_xacts system view
+SELECT gid FROM pg_prepared_xacts;
+ gid  
+------
+ foo1
+(1 row)
+
+-- Test ROLLBACK PREPARED
+ROLLBACK PREPARED 'foo1';
+SELECT * FROM pxtest1;
+ foobar 
+--------
+ aaa
+(1 row)
+
+SELECT gid FROM pg_prepared_xacts;
+ gid 
+-----
+(0 rows)
+
+-- Test COMMIT PREPARED
+BEGIN;
+INSERT INTO pxtest1 VALUES ('ddd');
+SELECT * FROM pxtest1;
+ foobar 
+--------
+ aaa
+ ddd
+(2 rows)
+
+PREPARE TRANSACTION 'foo2';
+SELECT * FROM pxtest1;
+ foobar 
+--------
+ aaa
+(1 row)
+
+COMMIT PREPARED 'foo2';
+SELECT * FROM pxtest1;
+ foobar 
+--------
+ aaa
+ ddd
+(2 rows)
+
+-- Test duplicate gids
+BEGIN;
+UPDATE pxtest1 SET foobar = 'eee' WHERE foobar = 'ddd';
+SELECT * FROM pxtest1;
+ foobar 
+--------
+ aaa
+ eee
+(2 rows)
+
+PREPARE TRANSACTION 'foo3';
+SELECT gid FROM pg_prepared_xacts;
+ gid  
+------
+ foo3
+(1 row)
+
+BEGIN;
+INSERT INTO pxtest1 VALUES ('fff');
+SELECT * FROM pxtest1;
+ foobar 
+--------
+ aaa
+ ddd
+ fff
+(3 rows)
+
+-- This should fail, because the gid foo3 is already in use
+PREPARE TRANSACTION 'foo3';
+ERROR:  global transaction identifier "foo3" is already in use
+SELECT * FROM pxtest1;
+ foobar 
+--------
+ aaa
+ ddd
+(2 rows)
+
+ROLLBACK PREPARED 'foo3';
+SELECT * FROM pxtest1;
+ foobar 
+--------
+ aaa
+ ddd
+(2 rows)
+
+-- Clean up
+DROP TABLE pxtest1;
+-- Test subtransactions
+BEGIN;
+  CREATE TABLE pxtest2 (a int);
+  INSERT INTO pxtest2 VALUES (1);
+  SAVEPOINT a;
+    INSERT INTO pxtest2 VALUES (2);
+  ROLLBACK TO a;
+  SAVEPOINT b;
+  INSERT INTO pxtest2 VALUES (3);
+PREPARE TRANSACTION 'regress-one';
+CREATE TABLE pxtest3(fff int);
+-- Test shared invalidation
+BEGIN;
+  DROP TABLE pxtest3;
+  CREATE TABLE pxtest4 (a int);
+  INSERT INTO pxtest4 VALUES (1);
+  INSERT INTO pxtest4 VALUES (2);
+  DECLARE foo CURSOR FOR SELECT * FROM pxtest4;
+  -- Fetch 1 tuple, keeping the cursor open
+  FETCH 1 FROM foo;
+ a 
+---
+ 1
+(1 row)
+
+PREPARE TRANSACTION 'regress-two';
+-- No such cursor
+FETCH 1 FROM foo;
+ERROR:  cursor "foo" does not exist
+-- Table doesn't exist, the creation hasn't been committed yet
+SELECT * FROM pxtest2;
+ERROR:  relation "pxtest2" does not exist
+-- There should be two prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+     gid     
+-------------
+ regress-one
+ regress-two
+(2 rows)
+
+-- pxtest3 should be locked because of the pending DROP
+set statement_timeout to 1000;
+SELECT * FROM pxtest3;
+ERROR:  canceling query due to user request
+reset statement_timeout;
+-- Disconnect, we will continue testing in a different backend
+\c -
+-- There should still be two prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+     gid     
+-------------
+ regress-one
+ regress-two
+(2 rows)
+
+-- pxtest3 should still be locked because of the pending DROP
+set statement_timeout to 1000;
+SELECT * FROM pxtest3;
+ERROR:  canceling query due to user request
+reset statement_timeout;
+-- Commit table creation
+COMMIT PREPARED 'regress-one';
+\d pxtest2
+    Table "public.pxtest2"
+ Column |  Type   | Modifiers 
+--------+---------+-----------
+ a      | integer | 
+
+SELECT * FROM pxtest2;
+ a 
+---
+ 1
+ 3
+(2 rows)
+
+-- There should be one prepared transaction
+SELECT gid FROM pg_prepared_xacts;
+     gid     
+-------------
+ regress-two
+(1 row)
+
+-- Commit table drop
+COMMIT PREPARED 'regress-two';
+SELECT * FROM pxtest3;
+ERROR:  relation "pxtest3" does not exist
+-- There should be no prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+ gid 
+-----
+(0 rows)
+
+-- Clean up
+DROP TABLE pxtest2;
+DROP TABLE pxtest4;
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 5eccf2d44a0f03082efeae40015428ec1c66e984..acd2d25b35de296439accebd28eb28b9d5831baa 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1279,6 +1279,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
  iexit                    | SELECT ih.name, ih.thepath, interpt_pp(ih.thepath, r.thepath) AS exit FROM ihighway ih, ramp r WHERE (ih.thepath ## r.thepath);
  pg_indexes               | SELECT n.nspname AS schemaname, c.relname AS tablename, i.relname AS indexname, t.spcname AS "tablespace", pg_get_indexdef(i.oid) AS indexdef FROM ((((pg_index x JOIN pg_class c ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN pg_tablespace t ON ((t.oid = i.reltablespace))) WHERE ((c.relkind = 'r'::"char") AND (i.relkind = 'i'::"char"));
  pg_locks                 | SELECT l.locktype, l."database", l.relation, l.page, l.tuple, l."transaction", l.classid, l.objid, l.objsubid, l.pid, l."mode", l.granted FROM pg_lock_status() l(locktype text, "database" oid, relation oid, page integer, tuple smallint, "transaction" xid, classid oid, objid oid, objsubid smallint, pid integer, "mode" text, granted boolean);
+ pg_prepared_xacts        | SELECT p."transaction", p.gid, u.usename AS "owner", d.datname AS "database" FROM ((pg_prepared_xact() p("transaction" xid, gid text, ownerid integer, dbid oid) LEFT JOIN pg_database d ON ((p.dbid = d.oid))) LEFT JOIN pg_shadow u ON ((p.ownerid = u.usesysid)));
  pg_rules                 | SELECT n.nspname AS schemaname, c.relname AS tablename, r.rulename, pg_get_ruledef(r.oid) AS definition FROM ((pg_rewrite r JOIN pg_class c ON ((c.oid = r.ev_class))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (r.rulename <> '_RETURN'::name);
  pg_settings              | SELECT a.name, a.setting, a.category, a.short_desc, a.extra_desc, a.context, a.vartype, a.source, a.min_val, a.max_val FROM pg_show_all_settings() a(name text, setting text, category text, short_desc text, extra_desc text, context text, vartype text, source text, min_val text, max_val text);
  pg_stat_activity         | SELECT d.oid AS datid, d.datname, pg_stat_get_backend_pid(s.backendid) AS procpid, pg_stat_get_backend_userid(s.backendid) AS usesysid, u.usename, pg_stat_get_backend_activity(s.backendid) AS current_query, pg_stat_get_backend_activity_start(s.backendid) AS query_start, pg_stat_get_backend_start(s.backendid) AS backend_start, pg_stat_get_backend_client_addr(s.backendid) AS client_addr, pg_stat_get_backend_client_port(s.backendid) AS client_port FROM pg_database d, (SELECT pg_stat_get_backend_idset() AS backendid) s, pg_shadow u WHERE ((pg_stat_get_backend_dbid(s.backendid) = d.oid) AND (pg_stat_get_backend_userid(s.backendid) = u.usesysid));
@@ -1316,7 +1317,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
  shoelace_obsolete        | SELECT shoelace.sl_name, shoelace.sl_avail, shoelace.sl_color, shoelace.sl_len, shoelace.sl_unit, shoelace.sl_len_cm FROM shoelace WHERE (NOT (EXISTS (SELECT shoe.shoename FROM shoe WHERE (shoe.slcolor = shoelace.sl_color))));
  street                   | SELECT r.name, r.thepath, c.cname FROM ONLY road r, real_city c WHERE (c.outline ## r.thepath);
  toyemp                   | SELECT emp.name, emp.age, emp."location", (12 * emp.salary) AS annualsal FROM emp;
-(40 rows)
+(41 rows)
 
 SELECT tablename, rulename, definition FROM pg_rules 
 	ORDER BY tablename, rulename;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index b3a4de3b56ff3b2ee792e87c38d6a6ca7cc16bad..9a3f7927328c5ca8a7e127b061753f47da8cee39 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -60,7 +60,7 @@ ignore: random
 # ----------
 # The fourth group of parallel test
 # ----------
-test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update namespace
+test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update namespace prepared_xacts
 
 test: privileges
 test: misc
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index f8621b404acf4a75eb0e13adc82a10b9cb22f2e5..bb60dc0a10573641229a003d5e4c1578fc97ab1b 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.26 2004/06/18 06:14:25 tgl Exp $
+# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.27 2005/06/17 22:32:50 tgl Exp $
 # This should probably be in an order similar to parallel_schedule.
 test: boolean
 test: char
@@ -74,6 +74,7 @@ test: btree_index
 test: hash_index
 test: update
 test: namespace
+test: prepared_xacts
 test: privileges
 test: misc
 test: select_views
diff --git a/src/test/regress/sql/prepared_xacts.sql b/src/test/regress/sql/prepared_xacts.sql
new file mode 100644
index 0000000000000000000000000000000000000000..39de88faab6f735c60fa342b622cd299a4342441
--- /dev/null
+++ b/src/test/regress/sql/prepared_xacts.sql
@@ -0,0 +1,137 @@
+--
+-- PREPARED TRANSACTIONS (two-phase commit)
+--
+-- We can't readily test persistence of prepared xacts within the
+-- regression script framework, unfortunately.  Note that a crash
+-- isn't really needed ... stopping and starting the postmaster would
+-- be enough, but we can't even do that here.
+
+
+-- create a simple table that we'll use in the tests
+CREATE TABLE pxtest1 (foobar VARCHAR(10));
+
+INSERT INTO pxtest1 VALUES ('aaa');
+
+
+-- Test PREPARE TRANSACTION
+BEGIN;
+UPDATE pxtest1 SET foobar = 'bbb' WHERE foobar = 'aaa';
+SELECT * FROM pxtest1;
+PREPARE TRANSACTION 'foo1';
+
+SELECT * FROM pxtest1;
+
+-- Test pg_prepared_xacts system view
+SELECT gid FROM pg_prepared_xacts;
+
+-- Test ROLLBACK PREPARED
+ROLLBACK PREPARED 'foo1';
+
+SELECT * FROM pxtest1;
+
+SELECT gid FROM pg_prepared_xacts;
+
+
+-- Test COMMIT PREPARED
+BEGIN;
+INSERT INTO pxtest1 VALUES ('ddd');
+SELECT * FROM pxtest1;
+PREPARE TRANSACTION 'foo2';
+
+SELECT * FROM pxtest1;
+
+COMMIT PREPARED 'foo2';
+
+SELECT * FROM pxtest1;
+
+-- Test duplicate gids
+BEGIN;
+UPDATE pxtest1 SET foobar = 'eee' WHERE foobar = 'ddd';
+SELECT * FROM pxtest1;
+PREPARE TRANSACTION 'foo3';
+
+SELECT gid FROM pg_prepared_xacts;
+
+BEGIN;
+INSERT INTO pxtest1 VALUES ('fff');
+SELECT * FROM pxtest1;
+
+-- This should fail, because the gid foo3 is already in use
+PREPARE TRANSACTION 'foo3';
+
+SELECT * FROM pxtest1;
+
+ROLLBACK PREPARED 'foo3';
+
+SELECT * FROM pxtest1;
+
+-- Clean up
+DROP TABLE pxtest1;
+
+-- Test subtransactions
+BEGIN;
+  CREATE TABLE pxtest2 (a int);
+  INSERT INTO pxtest2 VALUES (1);
+  SAVEPOINT a;
+    INSERT INTO pxtest2 VALUES (2);
+  ROLLBACK TO a;
+  SAVEPOINT b;
+  INSERT INTO pxtest2 VALUES (3);
+PREPARE TRANSACTION 'regress-one';
+
+CREATE TABLE pxtest3(fff int);
+
+-- Test shared invalidation
+BEGIN;
+  DROP TABLE pxtest3;
+  CREATE TABLE pxtest4 (a int);
+  INSERT INTO pxtest4 VALUES (1);
+  INSERT INTO pxtest4 VALUES (2);
+  DECLARE foo CURSOR FOR SELECT * FROM pxtest4;
+  -- Fetch 1 tuple, keeping the cursor open
+  FETCH 1 FROM foo;
+PREPARE TRANSACTION 'regress-two';
+
+-- No such cursor
+FETCH 1 FROM foo;
+
+-- Table doesn't exist, the creation hasn't been committed yet
+SELECT * FROM pxtest2;
+
+-- There should be two prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+
+-- pxtest3 should be locked because of the pending DROP
+set statement_timeout to 1000;
+SELECT * FROM pxtest3;
+reset statement_timeout;
+
+-- Disconnect, we will continue testing in a different backend
+\c -
+
+-- There should still be two prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+
+-- pxtest3 should still be locked because of the pending DROP
+set statement_timeout to 1000;
+SELECT * FROM pxtest3;
+reset statement_timeout;
+
+-- Commit table creation
+COMMIT PREPARED 'regress-one';
+\d pxtest2
+SELECT * FROM pxtest2;
+
+-- There should be one prepared transaction
+SELECT gid FROM pg_prepared_xacts;
+
+-- Commit table drop
+COMMIT PREPARED 'regress-two';
+SELECT * FROM pxtest3;
+
+-- There should be no prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+
+-- Clean up
+DROP TABLE pxtest2;
+DROP TABLE pxtest4;