From d583f10b7e0b9e1ed18f339f3177ed42ac2f7570 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 2 Dec 2010 20:50:48 -0500
Subject: [PATCH] Create core infrastructure for KNNGIST.

This is a heavily revised version of builtin_knngist_core-0.9.  The
ordering operators are no longer mixed in with actual quals, which would
have confused not only humans but significant parts of the planner.
Instead, ordering operators are carried separately throughout planning and
execution.

Since the API for ambeginscan and amrescan functions had to be changed
anyway, this commit takes the opportunity to rationalize that a bit.
RelationGetIndexScan no longer forces a premature index_rescan call;
instead, callers of index_beginscan must call index_rescan too.  Aside from
making the AM-side initialization logic a bit less peculiar, this has the
advantage that we do not make a useless extra am_rescan call when there are
runtime key values.  AMs formerly could not assume that the key values
passed to amrescan were actually valid; now they can.

Teodor Sigaev and Tom Lane
---
 doc/src/sgml/catalogs.sgml                 |   6 +-
 doc/src/sgml/indexam.sgml                  |  69 ++++---
 src/backend/access/gin/ginscan.c           |  55 +++---
 src/backend/access/gist/gistscan.c         |  74 ++++----
 src/backend/access/hash/hash.c             |  44 ++---
 src/backend/access/index/genam.c           |  33 ++--
 src/backend/access/index/indexam.c         |  42 +++--
 src/backend/access/nbtree/nbtree.c         |  39 ++--
 src/backend/commands/cluster.c             |   4 +-
 src/backend/commands/explain.c             |   2 +
 src/backend/executor/execQual.c            |   2 +-
 src/backend/executor/execUtils.c           |   4 +-
 src/backend/executor/nodeBitmapIndexscan.c |  24 ++-
 src/backend/executor/nodeIndexscan.c       | 160 ++++++++++++-----
 src/backend/executor/nodeMergejoin.c       |   2 +-
 src/backend/nodes/copyfuncs.c              |   2 +
 src/backend/nodes/outfuncs.c               |   3 +
 src/backend/optimizer/path/costsize.c      |  11 +-
 src/backend/optimizer/path/indxpath.c      | 198 ++++++++++++++++++++-
 src/backend/optimizer/plan/createplan.c    |  86 ++++++++-
 src/backend/optimizer/plan/planner.c       |   2 +-
 src/backend/optimizer/plan/setrefs.c       |   4 +
 src/backend/optimizer/plan/subselect.c     |   5 +-
 src/backend/optimizer/util/pathnode.c      |   6 +-
 src/backend/utils/adt/selfuncs.c           |  93 ++++++----
 src/backend/utils/cache/lsyscache.c        |  31 +++-
 src/include/access/genam.h                 |  10 +-
 src/include/access/relscan.h               |   6 +-
 src/include/access/skey.h                  |   6 +
 src/include/catalog/catversion.h           |   2 +-
 src/include/catalog/pg_am.h                |   6 +-
 src/include/catalog/pg_proc.h              |  16 +-
 src/include/executor/nodeIndexscan.h       |   4 +-
 src/include/nodes/execnodes.h              |  18 +-
 src/include/nodes/plannodes.h              |  19 +-
 src/include/nodes/relation.h               |   8 +
 src/include/optimizer/cost.h               |   2 +-
 src/include/optimizer/pathnode.h           |   1 +
 src/include/pg_config_manual.h             |   2 +-
 src/include/utils/lsyscache.h              |   3 +-
 40 files changed, 790 insertions(+), 314 deletions(-)

diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 54a6dcc102c..217a04e4998 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -510,7 +510,7 @@
       <entry><structfield>ambeginscan</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Start new scan</quote> function</entry>
+      <entry><quote>Prepare for index scan</quote> function</entry>
      </row>
 
      <row>
@@ -531,14 +531,14 @@
       <entry><structfield>amrescan</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Restart this scan</quote> function</entry>
+      <entry><quote>(Re)start index scan</quote> function</entry>
      </row>
 
      <row>
       <entry><structfield>amendscan</structfield></entry>
       <entry><type>regproc</type></entry>
       <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>End this scan</quote> function</entry>
+      <entry><quote>Clean up after index scan</quote> function</entry>
      </row>
 
      <row>
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index 925aac4571f..d0905eb3e28 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -268,6 +268,7 @@ void
 amcostestimate (PlannerInfo *root,
                 IndexOptInfo *index,
                 List *indexQuals,
+                List *indexOrderBys,
                 RelOptInfo *outer_rel,
                 Cost *indexStartupCost,
                 Cost *indexTotalCost,
@@ -318,19 +319,42 @@ amoptions (ArrayType *reloptions,
 IndexScanDesc
 ambeginscan (Relation indexRelation,
              int nkeys,
-             ScanKey key);
+             int norderbys);
 </programlisting>
-   Begin a new scan.  The <literal>key</> array (of length <literal>nkeys</>)
-   describes the scan key(s) for the index scan.  The result must be a
-   palloc'd struct. For implementation reasons the index access method
+   Prepare for an index scan.  The <literal>nkeys</> and <literal>norderbys</>
+   parameters indicate the number of quals and ordering operators that will be
+   used in the scan; these may be useful for space allocation purposes.
+   Note that the actual values of the scan keys aren't provided yet.
+   The result must be a palloc'd struct.
+   For implementation reasons the index access method
    <emphasis>must</> create this struct by calling
    <function>RelationGetIndexScan()</>.  In most cases
-   <function>ambeginscan</> itself does little beyond making that call;
+   <function>ambeginscan</> does little beyond making that call and perhaps
+   acquiring locks;
    the interesting parts of index-scan startup are in <function>amrescan</>.
   </para>
 
   <para>
 <programlisting>
+void
+amrescan (IndexScanDesc scan,
+          ScanKey keys,
+          int nkeys,
+          ScanKey orderbys,
+          int norderbys);
+</programlisting>
+   Start or restart an indexscan, possibly with new scan keys.  (To restart
+   using previously-passed keys, NULL is passed for <literal>keys</> and/or
+   <literal>orderbys</>.)  Note that it is not allowed for
+   the number of keys or order-by operators to be larger than
+   what was passed to <function>ambeginscan</>.  In practice the restart
+   feature is used when a new outer tuple is selected by a nested-loop join
+   and so a new key comparison value is needed, but the scan key structure
+   remains the same.
+  </para>
+
+  <para>
+<programlisting>
 boolean
 amgettuple (IndexScanDesc scan,
             ScanDirection direction);
@@ -393,22 +417,6 @@ amgetbitmap (IndexScanDesc scan,
   <para>
 <programlisting>
 void
-amrescan (IndexScanDesc scan,
-          ScanKey key);
-</programlisting>
-   Restart the given scan, possibly with new scan keys (to continue using
-   the old keys, NULL is passed for <literal>key</>).  Note that it is not
-   possible for the number of keys to be changed.  In practice the restart
-   feature is used when a new outer tuple is selected by a nested-loop join
-   and so a new key comparison value is needed, but the scan key structure
-   remains the same.  This function is also called by
-   <function>RelationGetIndexScan()</>, so it is used for initial setup
-   of an index scan as well as rescanning.
-  </para>
-
-  <para>
-<programlisting>
-void
 amendscan (IndexScanDesc scan);
 </programlisting>
    End a scan and release resources.  The <literal>scan</> struct itself
@@ -820,8 +828,9 @@ amrestrpos (IndexScanDesc scan);
   <title>Index Cost Estimation Functions</title>
 
   <para>
-   The <function>amcostestimate</> function is given a list of WHERE clauses that have
-   been determined to be usable with the index.  It must return estimates
+   The <function>amcostestimate</> function is given information describing
+   a possible index scan, including lists of WHERE and ORDER BY clauses that
+   have been determined to be usable with the index.  It must return estimates
    of the cost of accessing the index and the selectivity of the WHERE
    clauses (that is, the fraction of parent-table rows that will be
    retrieved during the index scan).  For simple cases, nearly all the
@@ -839,6 +848,7 @@ void
 amcostestimate (PlannerInfo *root,
                 IndexOptInfo *index,
                 List *indexQuals,
+                List *indexOrderBys,
                 RelOptInfo *outer_rel,
                 Cost *indexStartupCost,
                 Cost *indexTotalCost,
@@ -846,7 +856,7 @@ amcostestimate (PlannerInfo *root,
                 double *indexCorrelation);
 </programlisting>
 
-   The first four parameters are inputs:
+   The first five parameters are inputs:
 
    <variablelist>
     <varlistentry>
@@ -873,6 +883,17 @@ amcostestimate (PlannerInfo *root,
       <para>
        List of index qual clauses (implicitly ANDed);
        a <symbol>NIL</> list indicates no qualifiers are available.
+       Note that the list contains expression trees with RestrictInfo nodes
+       at the top, not ScanKeys.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term><parameter>indexOrderBys</></term>
+     <listitem>
+      <para>
+       List of indexable ORDER BY operators, or <symbol>NIL</> if none.
        Note that the list contains expression trees, not ScanKeys.
       </para>
      </listitem>
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c
index a6604c4c934..3a5e52dc383 100644
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -26,11 +26,28 @@ Datum
 ginbeginscan(PG_FUNCTION_ARGS)
 {
 	Relation	rel = (Relation) PG_GETARG_POINTER(0);
-	int			keysz = PG_GETARG_INT32(1);
-	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(2);
+	int			nkeys = PG_GETARG_INT32(1);
+	int			norderbys = PG_GETARG_INT32(2);
 	IndexScanDesc scan;
+	GinScanOpaque so;
+
+	/* no order by operators allowed */
+	Assert(norderbys == 0);
+
+	scan = RelationGetIndexScan(rel, nkeys, norderbys);
+
+	/* allocate private workspace */
+	so = (GinScanOpaque) palloc(sizeof(GinScanOpaqueData));
+	so->keys = NULL;
+	so->nkeys = 0;
+	so->tempCtx = AllocSetContextCreate(CurrentMemoryContext,
+										"Gin scan temporary context",
+										ALLOCSET_DEFAULT_MINSIZE,
+										ALLOCSET_DEFAULT_INITSIZE,
+										ALLOCSET_DEFAULT_MAXSIZE);
+	initGinState(&so->ginstate, scan->indexRelation);
 
-	scan = RelationGetIndexScan(rel, keysz, scankey);
+	scan->opaque = so;
 
 	PG_RETURN_POINTER(scan);
 }
@@ -241,27 +258,10 @@ ginrescan(PG_FUNCTION_ARGS)
 {
 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
 	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(1);
-	GinScanOpaque so;
-
-	so = (GinScanOpaque) scan->opaque;
-
-	if (so == NULL)
-	{
-		/* if called from ginbeginscan */
-		so = (GinScanOpaque) palloc(sizeof(GinScanOpaqueData));
-		so->tempCtx = AllocSetContextCreate(CurrentMemoryContext,
-											"Gin scan temporary context",
-											ALLOCSET_DEFAULT_MINSIZE,
-											ALLOCSET_DEFAULT_INITSIZE,
-											ALLOCSET_DEFAULT_MAXSIZE);
-		initGinState(&so->ginstate, scan->indexRelation);
-		scan->opaque = so;
-	}
-	else
-	{
-		freeScanKeys(so->keys, so->nkeys);
-	}
+	/* remaining arguments are ignored */
+	GinScanOpaque so = (GinScanOpaque) scan->opaque;
 
+	freeScanKeys(so->keys, so->nkeys);
 	so->keys = NULL;
 
 	if (scankey && scan->numberOfKeys > 0)
@@ -280,14 +280,11 @@ ginendscan(PG_FUNCTION_ARGS)
 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
 	GinScanOpaque so = (GinScanOpaque) scan->opaque;
 
-	if (so != NULL)
-	{
-		freeScanKeys(so->keys, so->nkeys);
+	freeScanKeys(so->keys, so->nkeys);
 
-		MemoryContextDelete(so->tempCtx);
+	MemoryContextDelete(so->tempCtx);
 
-		pfree(so);
-	}
+	pfree(so);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index 21f4ea54b7d..106714511a8 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -28,10 +28,24 @@ gistbeginscan(PG_FUNCTION_ARGS)
 {
 	Relation	r = (Relation) PG_GETARG_POINTER(0);
 	int			nkeys = PG_GETARG_INT32(1);
-	ScanKey		key = (ScanKey) PG_GETARG_POINTER(2);
+	int			norderbys = PG_GETARG_INT32(2);
 	IndexScanDesc scan;
+	GISTScanOpaque so;
+
+	/* no order by operators allowed */
+	Assert(norderbys == 0);
+
+	scan = RelationGetIndexScan(r, nkeys, norderbys);
+
+	/* initialize opaque data */
+	so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData));
+	so->stack = NULL;
+	so->tempCxt = createTempGistContext();
+	so->curbuf = InvalidBuffer;
+	so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
+	initGISTstate(so->giststate, scan->indexRelation);
 
-	scan = RelationGetIndexScan(r, nkeys, key);
+	scan->opaque = so;
 
 	PG_RETURN_POINTER(scan);
 }
@@ -41,33 +55,18 @@ gistrescan(PG_FUNCTION_ARGS)
 {
 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
 	ScanKey		key = (ScanKey) PG_GETARG_POINTER(1);
-	GISTScanOpaque so;
+	/* remaining arguments are ignored */
+	GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
 	int			i;
 
-	so = (GISTScanOpaque) scan->opaque;
-	if (so != NULL)
+	/* rescan an existing indexscan --- reset state */
+	gistfreestack(so->stack);
+	so->stack = NULL;
+	/* drop pins on buffers -- no locks held */
+	if (BufferIsValid(so->curbuf))
 	{
-		/* rescan an existing indexscan --- reset state */
-		gistfreestack(so->stack);
-		so->stack = NULL;
-		/* drop pins on buffers -- no locks held */
-		if (BufferIsValid(so->curbuf))
-		{
-			ReleaseBuffer(so->curbuf);
-			so->curbuf = InvalidBuffer;
-		}
-	}
-	else
-	{
-		/* initialize opaque data */
-		so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData));
-		so->stack = NULL;
-		so->tempCxt = createTempGistContext();
+		ReleaseBuffer(so->curbuf);
 		so->curbuf = InvalidBuffer;
-		so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
-		initGISTstate(so->giststate, scan->indexRelation);
-
-		scan->opaque = so;
 	}
 
 	/*
@@ -130,21 +129,16 @@ Datum
 gistendscan(PG_FUNCTION_ARGS)
 {
 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
-	GISTScanOpaque so;
-
-	so = (GISTScanOpaque) scan->opaque;
-
-	if (so != NULL)
-	{
-		gistfreestack(so->stack);
-		if (so->giststate != NULL)
-			freeGISTstate(so->giststate);
-		/* drop pins on buffers -- we aren't holding any locks */
-		if (BufferIsValid(so->curbuf))
-			ReleaseBuffer(so->curbuf);
-		MemoryContextDelete(so->tempCxt);
-		pfree(scan->opaque);
-	}
+	GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
+
+	gistfreestack(so->stack);
+	if (so->giststate != NULL)
+		freeGISTstate(so->giststate);
+	/* drop pins on buffers -- we aren't holding any locks */
+	if (BufferIsValid(so->curbuf))
+		ReleaseBuffer(so->curbuf);
+	MemoryContextDelete(so->tempCxt);
+	pfree(so);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index bb46446d713..e53ec3d5eaa 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -366,12 +366,16 @@ Datum
 hashbeginscan(PG_FUNCTION_ARGS)
 {
 	Relation	rel = (Relation) PG_GETARG_POINTER(0);
-	int			keysz = PG_GETARG_INT32(1);
-	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(2);
+	int			nkeys = PG_GETARG_INT32(1);
+	int			norderbys = PG_GETARG_INT32(2);
 	IndexScanDesc scan;
 	HashScanOpaque so;
 
-	scan = RelationGetIndexScan(rel, keysz, scankey);
+	/* no order by operators allowed */
+	Assert(norderbys == 0);
+
+	scan = RelationGetIndexScan(rel, nkeys, norderbys);
+
 	so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
 	so->hashso_bucket_valid = false;
 	so->hashso_bucket_blkno = 0;
@@ -396,26 +400,23 @@ hashrescan(PG_FUNCTION_ARGS)
 {
 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
 	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(1);
+	/* remaining arguments are ignored */
 	HashScanOpaque so = (HashScanOpaque) scan->opaque;
 	Relation	rel = scan->indexRelation;
 
-	/* if we are called from beginscan, so is still NULL */
-	if (so)
-	{
-		/* release any pin we still hold */
-		if (BufferIsValid(so->hashso_curbuf))
-			_hash_dropbuf(rel, so->hashso_curbuf);
-		so->hashso_curbuf = InvalidBuffer;
-
-		/* release lock on bucket, too */
-		if (so->hashso_bucket_blkno)
-			_hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE);
-		so->hashso_bucket_blkno = 0;
-
-		/* set position invalid (this will cause _hash_first call) */
-		ItemPointerSetInvalid(&(so->hashso_curpos));
-		ItemPointerSetInvalid(&(so->hashso_heappos));
-	}
+	/* release any pin we still hold */
+	if (BufferIsValid(so->hashso_curbuf))
+		_hash_dropbuf(rel, so->hashso_curbuf);
+	so->hashso_curbuf = InvalidBuffer;
+
+	/* release lock on bucket, too */
+	if (so->hashso_bucket_blkno)
+		_hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE);
+	so->hashso_bucket_blkno = 0;
+
+	/* set position invalid (this will cause _hash_first call) */
+	ItemPointerSetInvalid(&(so->hashso_curpos));
+	ItemPointerSetInvalid(&(so->hashso_heappos));
 
 	/* Update scan key, if a new one is given */
 	if (scankey && scan->numberOfKeys > 0)
@@ -423,8 +424,7 @@ hashrescan(PG_FUNCTION_ARGS)
 		memmove(scan->keyData,
 				scankey,
 				scan->numberOfKeys * sizeof(ScanKeyData));
-		if (so)
-			so->hashso_bucket_valid = false;
+		so->hashso_bucket_valid = false;
 	}
 
 	PG_RETURN_VOID();
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index cd0212aa94d..d0eaa36b3b5 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -57,22 +57,20 @@
 /* ----------------
  *	RelationGetIndexScan -- Create and fill an IndexScanDesc.
  *
- *		This routine creates an index scan structure and sets its contents
- *		up correctly. This routine calls AMrescan to set up the scan with
- *		the passed key.
+ *		This routine creates an index scan structure and sets up initial
+ *		contents for it.
  *
  *		Parameters:
  *				indexRelation -- index relation for scan.
- *				nkeys -- count of scan keys.
- *				key -- array of scan keys to restrict the index scan.
+ *				nkeys -- count of scan keys (index qual conditions).
+ *				norderbys -- count of index order-by operators.
  *
  *		Returns:
  *				An initialized IndexScanDesc.
  * ----------------
  */
 IndexScanDesc
-RelationGetIndexScan(Relation indexRelation,
-					 int nkeys, ScanKey key)
+RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
 {
 	IndexScanDesc scan;
 
@@ -82,15 +80,19 @@ RelationGetIndexScan(Relation indexRelation,
 	scan->indexRelation = indexRelation;
 	scan->xs_snapshot = SnapshotNow;	/* may be set later */
 	scan->numberOfKeys = nkeys;
+	scan->numberOfOrderBys = norderbys;
 
 	/*
-	 * We allocate the key space here, but the AM is responsible for actually
-	 * filling it from the passed key array.
+	 * We allocate key workspace here, but it won't get filled until amrescan.
 	 */
 	if (nkeys > 0)
 		scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
 	else
 		scan->keyData = NULL;
+	if (norderbys > 0)
+		scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys);
+	else
+		scan->orderByData = NULL;
 
 	/*
 	 * During recovery we ignore killed tuples and don't bother to kill them
@@ -115,11 +117,6 @@ RelationGetIndexScan(Relation indexRelation,
 	scan->xs_next_hot = InvalidOffsetNumber;
 	scan->xs_prev_xmax = InvalidTransactionId;
 
-	/*
-	 * Let the AM fill in the key and any opaque data it wants.
-	 */
-	index_rescan(scan, key);
-
 	return scan;
 }
 
@@ -140,6 +137,8 @@ IndexScanEnd(IndexScanDesc scan)
 {
 	if (scan->keyData != NULL)
 		pfree(scan->keyData);
+	if (scan->orderByData != NULL)
+		pfree(scan->orderByData);
 
 	pfree(scan);
 }
@@ -286,7 +285,8 @@ systable_beginscan(Relation heapRelation,
 		}
 
 		sysscan->iscan = index_beginscan(heapRelation, irel,
-										 snapshot, nkeys, key);
+										 snapshot, nkeys, 0);
+		index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
 		sysscan->scan = NULL;
 	}
 	else
@@ -450,7 +450,8 @@ systable_beginscan_ordered(Relation heapRelation,
 	}
 
 	sysscan->iscan = index_beginscan(heapRelation, indexRelation,
-									 snapshot, nkeys, key);
+									 snapshot, nkeys, 0);
+	index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
 	sysscan->scan = NULL;
 
 	return sysscan;
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index d151ffda8c0..8c79c6149b6 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -114,7 +114,7 @@ do { \
 } while(0)
 
 static IndexScanDesc index_beginscan_internal(Relation indexRelation,
-						 int nkeys, ScanKey key);
+						 int nkeys, int norderbys);
 
 
 /* ----------------------------------------------------------------
@@ -213,11 +213,11 @@ IndexScanDesc
 index_beginscan(Relation heapRelation,
 				Relation indexRelation,
 				Snapshot snapshot,
-				int nkeys, ScanKey key)
+				int nkeys, int norderbys)
 {
 	IndexScanDesc scan;
 
-	scan = index_beginscan_internal(indexRelation, nkeys, key);
+	scan = index_beginscan_internal(indexRelation, nkeys, norderbys);
 
 	/*
 	 * Save additional parameters into the scandesc.  Everything else was set
@@ -238,11 +238,11 @@ index_beginscan(Relation heapRelation,
 IndexScanDesc
 index_beginscan_bitmap(Relation indexRelation,
 					   Snapshot snapshot,
-					   int nkeys, ScanKey key)
+					   int nkeys)
 {
 	IndexScanDesc scan;
 
-	scan = index_beginscan_internal(indexRelation, nkeys, key);
+	scan = index_beginscan_internal(indexRelation, nkeys, 0);
 
 	/*
 	 * Save additional parameters into the scandesc.  Everything else was set
@@ -258,7 +258,7 @@ index_beginscan_bitmap(Relation indexRelation,
  */
 static IndexScanDesc
 index_beginscan_internal(Relation indexRelation,
-						 int nkeys, ScanKey key)
+						 int nkeys, int norderbys)
 {
 	IndexScanDesc scan;
 	FmgrInfo   *procedure;
@@ -278,7 +278,7 @@ index_beginscan_internal(Relation indexRelation,
 		DatumGetPointer(FunctionCall3(procedure,
 									  PointerGetDatum(indexRelation),
 									  Int32GetDatum(nkeys),
-									  PointerGetDatum(key)));
+									  Int32GetDatum(norderbys)));
 
 	return scan;
 }
@@ -286,23 +286,28 @@ index_beginscan_internal(Relation indexRelation,
 /* ----------------
  *		index_rescan  - (re)start a scan of an index
  *
- * The caller may specify a new set of scankeys (but the number of keys
- * cannot change).	To restart the scan without changing keys, pass NULL
- * for the key array.
- *
- * Note that this is also called when first starting an indexscan;
- * see RelationGetIndexScan.  Keys *must* be passed in that case,
- * unless scan->numberOfKeys is zero.
+ * During a restart, the caller may specify a new set of scankeys and/or
+ * orderbykeys; but the number of keys cannot differ from what index_beginscan
+ * was told.  (Later we might relax that to "must not exceed", but currently
+ * the index AMs tend to assume that scan->numberOfKeys is what to believe.)
+ * To restart the scan without changing keys, pass NULL for the key arrays.
+ * (Of course, keys *must* be passed on the first call, unless
+ * scan->numberOfKeys is zero.)
  * ----------------
  */
 void
-index_rescan(IndexScanDesc scan, ScanKey key)
+index_rescan(IndexScanDesc scan,
+			 ScanKey keys, int nkeys,
+			 ScanKey orderbys, int norderbys)
 {
 	FmgrInfo   *procedure;
 
 	SCAN_CHECKS;
 	GET_SCAN_PROCEDURE(amrescan);
 
+	Assert(nkeys == scan->numberOfKeys);
+	Assert(norderbys == scan->numberOfOrderBys);
+
 	/* Release any held pin on a heap page */
 	if (BufferIsValid(scan->xs_cbuf))
 	{
@@ -314,9 +319,12 @@ index_rescan(IndexScanDesc scan, ScanKey key)
 
 	scan->kill_prior_tuple = false;		/* for safety */
 
-	FunctionCall2(procedure,
+	FunctionCall5(procedure,
 				  PointerGetDatum(scan),
-				  PointerGetDatum(key));
+				  PointerGetDatum(keys),
+				  Int32GetDatum(nkeys),
+				  PointerGetDatum(orderbys),
+				  Int32GetDatum(norderbys));
 }
 
 /* ----------------
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 46aeb9e6adb..655a40090e9 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -337,12 +337,27 @@ Datum
 btbeginscan(PG_FUNCTION_ARGS)
 {
 	Relation	rel = (Relation) PG_GETARG_POINTER(0);
-	int			keysz = PG_GETARG_INT32(1);
-	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(2);
+	int			nkeys = PG_GETARG_INT32(1);
+	int			norderbys = PG_GETARG_INT32(2);
 	IndexScanDesc scan;
+	BTScanOpaque so;
+
+	/* no order by operators allowed */
+	Assert(norderbys == 0);
 
 	/* get the scan */
-	scan = RelationGetIndexScan(rel, keysz, scankey);
+	scan = RelationGetIndexScan(rel, nkeys, norderbys);
+
+	/* allocate private workspace */
+	so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
+	so->currPos.buf = so->markPos.buf = InvalidBuffer;
+	if (scan->numberOfKeys > 0)
+		so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
+	else
+		so->keyData = NULL;
+	so->killedItems = NULL;		/* until needed */
+	so->numKilled = 0;
+	scan->opaque = so;
 
 	PG_RETURN_POINTER(scan);
 }
@@ -355,22 +370,8 @@ btrescan(PG_FUNCTION_ARGS)
 {
 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
 	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(1);
-	BTScanOpaque so;
-
-	so = (BTScanOpaque) scan->opaque;
-
-	if (so == NULL)				/* if called from btbeginscan */
-	{
-		so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
-		so->currPos.buf = so->markPos.buf = InvalidBuffer;
-		if (scan->numberOfKeys > 0)
-			so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
-		else
-			so->keyData = NULL;
-		so->killedItems = NULL; /* until needed */
-		so->numKilled = 0;
-		scan->opaque = so;
-	}
+	/* remaining arguments are ignored */
+	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 
 	/* we aren't holding any read locks, but gotta drop the pins */
 	if (BTScanPosIsValid(so->currPos))
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index bb7cd746b1b..e1dbd6d985b 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -875,8 +875,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 	if (OldIndex != NULL && !use_sort)
 	{
 		heapScan = NULL;
-		indexScan = index_beginscan(OldHeap, OldIndex,
-									SnapshotAny, 0, (ScanKey) NULL);
+		indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
+		index_rescan(indexScan, NULL, 0, NULL, 0);
 	}
 	else
 	{
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index a5e44c046f7..81885b4fb74 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1017,6 +1017,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		case T_IndexScan:
 			show_scan_qual(((IndexScan *) plan)->indexqualorig,
 						   "Index Cond", planstate, ancestors, es);
+			show_scan_qual(((IndexScan *) plan)->indexorderbyorig,
+						   "Order By", planstate, ancestors, es);
 			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 			break;
 		case T_BitmapIndexScan:
diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c
index 27ea91c0140..6bac6d06236 100644
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -4694,7 +4694,7 @@ ExecInitExpr(Expr *node, PlanState *parent)
 					Oid			righttype;
 					Oid			proc;
 
-					get_op_opfamily_properties(opno, opfamily,
+					get_op_opfamily_properties(opno, opfamily, false,
 											   &strategy,
 											   &lefttype,
 											   &righttype);
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 57806ca8f0f..6ad0f1e52ad 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -1211,8 +1211,8 @@ check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
 retry:
 	conflict = false;
 	found_self = false;
-	index_scan = index_beginscan(heap, index, &DirtySnapshot,
-								 index_natts, scankeys);
+	index_scan = index_beginscan(heap, index, &DirtySnapshot, index_natts, 0);
+	index_rescan(index_scan, scankeys, index_natts, NULL, 0);
 
 	while ((tup = index_getnext(index_scan,
 								ForwardScanDirection)) != NULL)
diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c
index 97ce0dde294..573e294882c 100644
--- a/src/backend/executor/nodeBitmapIndexscan.c
+++ b/src/backend/executor/nodeBitmapIndexscan.c
@@ -95,7 +95,9 @@ MultiExecBitmapIndexScan(BitmapIndexScanState *node)
 		doscan = ExecIndexAdvanceArrayKeys(node->biss_ArrayKeys,
 										   node->biss_NumArrayKeys);
 		if (doscan)				/* reset index scan */
-			index_rescan(node->biss_ScanDesc, node->biss_ScanKeys);
+			index_rescan(node->biss_ScanDesc,
+						 node->biss_ScanKeys, node->biss_NumScanKeys,
+						 NULL, 0);
 	}
 
 	/* must provide our own instrumentation support */
@@ -147,7 +149,9 @@ ExecReScanBitmapIndexScan(BitmapIndexScanState *node)
 
 	/* reset index scan */
 	if (node->biss_RuntimeKeysReady)
-		index_rescan(node->biss_ScanDesc, node->biss_ScanKeys);
+		index_rescan(node->biss_ScanDesc,
+					 node->biss_ScanKeys, node->biss_NumScanKeys,
+					 NULL, 0);
 }
 
 /* ----------------------------------------------------------------
@@ -256,6 +260,8 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
 	 * Initialize index-specific scan state
 	 */
 	indexstate->biss_RuntimeKeysReady = false;
+	indexstate->biss_RuntimeKeys = NULL;
+	indexstate->biss_NumRuntimeKeys = 0;
 
 	/*
 	 * build the index scan keys from the index qualification
@@ -264,6 +270,7 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
 						   indexstate->biss_RelationDesc,
 						   node->scan.scanrelid,
 						   node->indexqual,
+						   false,
 						   &indexstate->biss_ScanKeys,
 						   &indexstate->biss_NumScanKeys,
 						   &indexstate->biss_RuntimeKeys,
@@ -297,8 +304,17 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
 	indexstate->biss_ScanDesc =
 		index_beginscan_bitmap(indexstate->biss_RelationDesc,
 							   estate->es_snapshot,
-							   indexstate->biss_NumScanKeys,
-							   indexstate->biss_ScanKeys);
+							   indexstate->biss_NumScanKeys);
+
+	/*
+	 * If no run-time keys to calculate, go ahead and pass the scankeys to
+	 * the index AM.
+	 */
+	if (indexstate->biss_NumRuntimeKeys == 0 &&
+		indexstate->biss_NumArrayKeys == 0)
+		index_rescan(indexstate->biss_ScanDesc,
+					 indexstate->biss_ScanKeys, indexstate->biss_NumScanKeys,
+					 NULL, 0);
 
 	/*
 	 * all done.
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index ee5fc72c209..3aed2960d3f 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -181,7 +181,9 @@ ExecReScanIndexScan(IndexScanState *node)
 	node->iss_RuntimeKeysReady = true;
 
 	/* reset index scan */
-	index_rescan(node->iss_ScanDesc, node->iss_ScanKeys);
+	index_rescan(node->iss_ScanDesc,
+				 node->iss_ScanKeys, node->iss_NumScanKeys,
+				 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
 
 	ExecScanReScan(&node->ss);
 }
@@ -480,10 +482,11 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 	 * initialize child expressions
 	 *
 	 * Note: we don't initialize all of the indexqual expression, only the
-	 * sub-parts corresponding to runtime keys (see below).  The indexqualorig
-	 * expression is always initialized even though it will only be used in
-	 * some uncommon cases --- would be nice to improve that.  (Problem is
-	 * that any SubPlans present in the expression must be found now...)
+	 * sub-parts corresponding to runtime keys (see below).  Likewise for
+	 * indexorderby, if any.  But the indexqualorig expression is always
+	 * initialized even though it will only be used in some uncommon cases ---
+	 * would be nice to improve that.  (Problem is that any SubPlans present
+	 * in the expression must be found now...)
 	 */
 	indexstate->ss.ps.targetlist = (List *)
 		ExecInitExpr((Expr *) node->scan.plan.targetlist,
@@ -543,6 +546,8 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 	 * Initialize index-specific scan state
 	 */
 	indexstate->iss_RuntimeKeysReady = false;
+	indexstate->iss_RuntimeKeys = NULL;
+	indexstate->iss_NumRuntimeKeys = 0;
 
 	/*
 	 * build the index scan keys from the index qualification
@@ -551,6 +556,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 						   indexstate->iss_RelationDesc,
 						   node->scan.scanrelid,
 						   node->indexqual,
+						   false,
 						   &indexstate->iss_ScanKeys,
 						   &indexstate->iss_NumScanKeys,
 						   &indexstate->iss_RuntimeKeys,
@@ -558,6 +564,21 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 						   NULL,	/* no ArrayKeys */
 						   NULL);
 
+	/*
+	 * any ORDER BY exprs have to be turned into scankeys in the same way
+	 */
+	ExecIndexBuildScanKeys((PlanState *) indexstate,
+						   indexstate->iss_RelationDesc,
+						   node->scan.scanrelid,
+						   node->indexorderby,
+						   true,
+						   &indexstate->iss_OrderByKeys,
+						   &indexstate->iss_NumOrderByKeys,
+						   &indexstate->iss_RuntimeKeys,
+						   &indexstate->iss_NumRuntimeKeys,
+						   NULL,	/* no ArrayKeys */
+						   NULL);
+
 	/*
 	 * If we have runtime keys, we need an ExprContext to evaluate them. The
 	 * node's standard context won't do because we want to reset that context
@@ -584,7 +605,16 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 											   indexstate->iss_RelationDesc,
 											   estate->es_snapshot,
 											   indexstate->iss_NumScanKeys,
-											   indexstate->iss_ScanKeys);
+											   indexstate->iss_NumOrderByKeys);
+
+	/*
+	 * If no run-time keys to calculate, go ahead and pass the scankeys to
+	 * the index AM.
+	 */
+	if (indexstate->iss_NumRuntimeKeys == 0)
+		index_rescan(indexstate->iss_ScanDesc,
+					 indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys,
+					 indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys);
 
 	/*
 	 * all done.
@@ -624,12 +654,20 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
  * 5. NullTest ("indexkey IS NULL/IS NOT NULL").  We just fill in the
  * ScanKey properly.
  *
+ * This code is also used to prepare ORDER BY expressions for amcanorderbyop
+ * indexes.  The behavior is exactly the same, except that we have to look up
+ * the operator differently.  Note that only cases 1 and 2 are currently
+ * possible for ORDER BY.
+ *
  * Input params are:
  *
  * planstate: executor state node we are working for
  * index: the index we are building scan keys for
  * scanrelid: varno of the index's relation within current query
- * quals: indexquals expressions
+ * quals: indexquals (or indexorderbys) expressions
+ * isorderby: true if processing ORDER BY exprs, false if processing quals
+ * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none
+ * *numRuntimeKeys: number of pre-existing runtime keys
  *
  * Output params are:
  *
@@ -645,7 +683,8 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
  */
 void
 ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
-					   List *quals, ScanKey *scanKeys, int *numScanKeys,
+					   List *quals, bool isorderby,
+					   ScanKey *scanKeys, int *numScanKeys,
 					   IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys,
 					   IndexArrayKeyInfo **arrayKeys, int *numArrayKeys)
 {
@@ -654,42 +693,30 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
 	IndexRuntimeKeyInfo *runtime_keys;
 	IndexArrayKeyInfo *array_keys;
 	int			n_scan_keys;
-	int			extra_scan_keys;
 	int			n_runtime_keys;
+	int			max_runtime_keys;
 	int			n_array_keys;
 	int			j;
 
+	/* Allocate array for ScanKey structs: one per qual */
+	n_scan_keys = list_length(quals);
+	scan_keys = (ScanKey) palloc(n_scan_keys * sizeof(ScanKeyData));
+
 	/*
-	 * If there are any RowCompareExpr quals, we need extra ScanKey entries
-	 * for them, and possibly extra runtime-key entries.  Count up what's
-	 * needed.	(The subsidiary ScanKey arrays for the RowCompareExprs could
-	 * be allocated as separate chunks, but we have to count anyway to make
-	 * runtime_keys large enough, so might as well just do one palloc.)
+	 * runtime_keys array is dynamically resized as needed.  We handle it
+	 * this way so that the same runtime keys array can be shared between
+	 * indexquals and indexorderbys, which will be processed in separate
+	 * calls of this function.  Caller must be sure to pass in NULL/0 for
+	 * first call.
 	 */
-	n_scan_keys = list_length(quals);
-	extra_scan_keys = 0;
-	foreach(qual_cell, quals)
-	{
-		if (IsA(lfirst(qual_cell), RowCompareExpr))
-			extra_scan_keys +=
-				list_length(((RowCompareExpr *) lfirst(qual_cell))->opnos);
-	}
-	scan_keys = (ScanKey)
-		palloc((n_scan_keys + extra_scan_keys) * sizeof(ScanKeyData));
-	/* Allocate these arrays as large as they could possibly need to be */
-	runtime_keys = (IndexRuntimeKeyInfo *)
-		palloc((n_scan_keys + extra_scan_keys) * sizeof(IndexRuntimeKeyInfo));
+	runtime_keys = *runtimeKeys;
+	n_runtime_keys = max_runtime_keys = *numRuntimeKeys;
+
+	/* Allocate array_keys as large as it could possibly need to be */
 	array_keys = (IndexArrayKeyInfo *)
 		palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo));
-	n_runtime_keys = 0;
 	n_array_keys = 0;
 
-	/*
-	 * Below here, extra_scan_keys is index of first cell to use for next
-	 * RowCompareExpr
-	 */
-	extra_scan_keys = n_scan_keys;
-
 	/*
 	 * for each opclause in the given qual, convert the opclause into a single
 	 * scan key
@@ -742,11 +769,14 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
 			 */
 			opfamily = index->rd_opfamily[varattno - 1];
 
-			get_op_opfamily_properties(opno, opfamily,
+			get_op_opfamily_properties(opno, opfamily, isorderby,
 									   &op_strategy,
 									   &op_lefttype,
 									   &op_righttype);
 
+			if (isorderby)
+				flags |= SK_ORDER_BY;
+
 			/*
 			 * rightop is the constant or variable comparison value
 			 */
@@ -767,6 +797,21 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
 			else
 			{
 				/* Need to treat this one as a runtime key */
+				if (n_runtime_keys >= max_runtime_keys)
+				{
+					if (max_runtime_keys == 0)
+					{
+						max_runtime_keys = 8;
+						runtime_keys = (IndexRuntimeKeyInfo *)
+							palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
+					}
+					else
+					{
+						max_runtime_keys *= 2;
+						runtime_keys = (IndexRuntimeKeyInfo *)
+							repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
+					}
+				}
 				runtime_keys[n_runtime_keys].scan_key = this_scan_key;
 				runtime_keys[n_runtime_keys].key_expr =
 					ExecInitExpr(rightop, planstate);
@@ -794,12 +839,19 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
 			ListCell   *largs_cell = list_head(rc->largs);
 			ListCell   *rargs_cell = list_head(rc->rargs);
 			ListCell   *opnos_cell = list_head(rc->opnos);
-			ScanKey		first_sub_key = &scan_keys[extra_scan_keys];
+			ScanKey		first_sub_key;
+			int			n_sub_key;
+
+			Assert(!isorderby);
+
+			first_sub_key = (ScanKey)
+				palloc(list_length(rc->opnos) * sizeof(ScanKeyData));
+			n_sub_key = 0;
 
 			/* Scan RowCompare columns and generate subsidiary ScanKey items */
 			while (opnos_cell != NULL)
 			{
-				ScanKey		this_sub_key = &scan_keys[extra_scan_keys];
+				ScanKey		this_sub_key = &first_sub_key[n_sub_key];
 				int			flags = SK_ROW_MEMBER;
 				Datum		scanvalue;
 
@@ -832,7 +884,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
 					elog(ERROR, "bogus RowCompare index qualification");
 				opfamily = index->rd_opfamily[varattno - 1];
 
-				get_op_opfamily_properties(opno, opfamily,
+				get_op_opfamily_properties(opno, opfamily, isorderby,
 										   &op_strategy,
 										   &op_lefttype,
 										   &op_righttype);
@@ -866,6 +918,21 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
 				else
 				{
 					/* Need to treat this one as a runtime key */
+					if (n_runtime_keys >= max_runtime_keys)
+					{
+						if (max_runtime_keys == 0)
+						{
+							max_runtime_keys = 8;
+							runtime_keys = (IndexRuntimeKeyInfo *)
+								palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
+						}
+						else
+						{
+							max_runtime_keys *= 2;
+							runtime_keys = (IndexRuntimeKeyInfo *)
+								repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
+						}
+					}
 					runtime_keys[n_runtime_keys].scan_key = this_sub_key;
 					runtime_keys[n_runtime_keys].key_expr =
 						ExecInitExpr(rightop, planstate);
@@ -885,11 +952,11 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
 									   op_righttype,	/* strategy subtype */
 									   opfuncid,		/* reg proc to use */
 									   scanvalue);		/* constant */
-				extra_scan_keys++;
+				n_sub_key++;
 			}
 
 			/* Mark the last subsidiary scankey correctly */
-			scan_keys[extra_scan_keys - 1].sk_flags |= SK_ROW_END;
+			first_sub_key[n_sub_key - 1].sk_flags |= SK_ROW_END;
 
 			/*
 			 * We don't use ScanKeyEntryInitialize for the header because it
@@ -907,6 +974,8 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
 			/* indexkey op ANY (array-expression) */
 			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
 
+			Assert(!isorderby);
+
 			Assert(saop->useOr);
 			opno = saop->opno;
 			opfuncid = saop->opfuncid;
@@ -935,7 +1004,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
 			 */
 			opfamily = index->rd_opfamily[varattno - 1];
 
-			get_op_opfamily_properties(opno, opfamily,
+			get_op_opfamily_properties(opno, opfamily, isorderby,
 									   &op_strategy,
 									   &op_lefttype,
 									   &op_righttype);
@@ -973,6 +1042,8 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
 			NullTest   *ntest = (NullTest *) clause;
 			int			flags;
 
+			Assert(!isorderby);
+
 			/*
 			 * argument should be the index key Var, possibly relabeled
 			 */
@@ -1020,12 +1091,9 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
 				 (int) nodeTag(clause));
 	}
 
+	Assert(n_runtime_keys <= max_runtime_keys);
+
 	/* Get rid of any unused arrays */
-	if (n_runtime_keys == 0)
-	{
-		pfree(runtime_keys);
-		runtime_keys = NULL;
-	}
 	if (n_array_keys == 0)
 	{
 		pfree(array_keys);
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index e8ce5bc02b3..98d1615514b 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -201,7 +201,7 @@ MJExamineQuals(List *mergeclauses,
 		clause->rexpr = ExecInitExpr((Expr *) lsecond(qual->args), parent);
 
 		/* Extract the operator's declared left/right datatypes */
-		get_op_opfamily_properties(qual->opno, opfamily,
+		get_op_opfamily_properties(qual->opno, opfamily, false,
 								   &op_strategy,
 								   &op_lefttype,
 								   &op_righttype);
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 0e0b4dc598a..4506518768d 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -363,6 +363,8 @@ _copyIndexScan(IndexScan *from)
 	COPY_SCALAR_FIELD(indexid);
 	COPY_NODE_FIELD(indexqual);
 	COPY_NODE_FIELD(indexqualorig);
+	COPY_NODE_FIELD(indexorderby);
+	COPY_NODE_FIELD(indexorderbyorig);
 	COPY_SCALAR_FIELD(indexorderdir);
 
 	return newnode;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index afbfccabda5..5d09e16477d 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -439,6 +439,8 @@ _outIndexScan(StringInfo str, IndexScan *node)
 	WRITE_OID_FIELD(indexid);
 	WRITE_NODE_FIELD(indexqual);
 	WRITE_NODE_FIELD(indexqualorig);
+	WRITE_NODE_FIELD(indexorderby);
+	WRITE_NODE_FIELD(indexorderbyorig);
 	WRITE_ENUM_FIELD(indexorderdir, ScanDirection);
 }
 
@@ -1424,6 +1426,7 @@ _outIndexPath(StringInfo str, IndexPath *node)
 	WRITE_NODE_FIELD(indexinfo);
 	WRITE_NODE_FIELD(indexclauses);
 	WRITE_NODE_FIELD(indexquals);
+	WRITE_NODE_FIELD(indexorderbys);
 	WRITE_BOOL_FIELD(isjoininner);
 	WRITE_ENUM_FIELD(indexscandir, ScanDirection);
 	WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 0724f9a6c9c..e6edbdb1e84 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -209,6 +209,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
  *
  * 'index' is the index to be used
  * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
+ * 'indexOrderBys' is the list of ORDER BY operators for amcanorderbyop indexes
  * 'outer_rel' is the outer relation when we are considering using the index
  *		scan as the inside of a nestloop join (hence, some of the indexQuals
  *		are join clauses, and we should expect repeated scans of the index);
@@ -218,18 +219,19 @@ cost_seqscan(Path *path, PlannerInfo *root,
  * additional fields of the IndexPath besides startup_cost and total_cost.
  * These fields are needed if the IndexPath is used in a BitmapIndexScan.
  *
+ * indexQuals is a list of RestrictInfo nodes, but indexOrderBys is a list of
+ * bare expressions.
+ *
  * NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
  * Any additional quals evaluated as qpquals may reduce the number of returned
  * tuples, but they won't reduce the number of tuples we have to fetch from
  * the table, so they don't reduce the scan cost.
- *
- * NOTE: as of 8.0, indexQuals is a list of RestrictInfo nodes, where formerly
- * it was a list of bare clause expressions.
  */
 void
 cost_index(IndexPath *path, PlannerInfo *root,
 		   IndexOptInfo *index,
 		   List *indexQuals,
+		   List *indexOrderBys,
 		   RelOptInfo *outer_rel)
 {
 	RelOptInfo *baserel = index->rel;
@@ -263,10 +265,11 @@ cost_index(IndexPath *path, PlannerInfo *root,
 	 * the fraction of main-table tuples we will have to retrieve) and its
 	 * correlation to the main-table tuple order.
 	 */
-	OidFunctionCall8(index->amcostestimate,
+	OidFunctionCall9(index->amcostestimate,
 					 PointerGetDatum(root),
 					 PointerGetDatum(index),
 					 PointerGetDatum(indexQuals),
+					 PointerGetDatum(indexOrderBys),
 					 PointerGetDatum(outer_rel),
 					 PointerGetDatum(&indexStartupCost),
 					 PointerGetDatum(&indexTotalCost),
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index f73e0e6dc60..90ccb3928b9 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -89,6 +89,9 @@ static bool match_rowcompare_to_indexcol(IndexOptInfo *index,
 							 Oid opfamily,
 							 RowCompareExpr *clause,
 							 Relids outer_relids);
+static List *match_index_to_pathkeys(IndexOptInfo *index, List *pathkeys);
+static Expr *match_clause_to_ordering_op(IndexOptInfo *index,
+							int indexcol, Expr *clause, Oid pk_opfamily);
 static Relids indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel);
 static bool matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel,
 				  Relids outer_relids);
@@ -286,6 +289,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
 		IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
 		IndexPath  *ipath;
 		List	   *restrictclauses;
+		List	   *orderbyclauses;
 		List	   *index_pathkeys;
 		List	   *useful_pathkeys;
 		bool		useful_predicate;
@@ -388,9 +392,24 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
 												  ForwardScanDirection);
 			useful_pathkeys = truncate_useless_pathkeys(root, rel,
 														index_pathkeys);
+			orderbyclauses = NIL;
+		}
+		else if (index->amcanorderbyop && possibly_useful_pathkeys &&
+				 istoplevel && outer_rel == NULL && scantype != ST_BITMAPSCAN)
+		{
+			/* see if we can generate ordering operators for query_pathkeys */
+			orderbyclauses = match_index_to_pathkeys(index,
+													 root->query_pathkeys);
+			if (orderbyclauses)
+				useful_pathkeys = root->query_pathkeys;
+			else
+				useful_pathkeys = NIL;
 		}
 		else
+		{
 			useful_pathkeys = NIL;
+			orderbyclauses = NIL;
+		}
 
 		/*
 		 * 3. Generate an indexscan path if there are relevant restriction
@@ -402,6 +421,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
 		{
 			ipath = create_index_path(root, index,
 									  restrictclauses,
+									  orderbyclauses,
 									  useful_pathkeys,
 									  index_is_ordered ?
 									  ForwardScanDirection :
@@ -425,6 +445,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
 			{
 				ipath = create_index_path(root, index,
 										  restrictclauses,
+										  NIL,
 										  useful_pathkeys,
 										  BackwardScanDirection,
 										  outer_rel);
@@ -1384,6 +1405,179 @@ match_rowcompare_to_indexcol(IndexOptInfo *index,
 }
 
 
+/****************************************************************************
+ *				----  ROUTINES TO CHECK ORDERING OPERATORS  ----
+ ****************************************************************************/
+
+/*
+ * match_index_to_pathkeys
+ *		Test whether an index can produce output ordered according to the
+ *		given pathkeys using "ordering operators".
+ *
+ * If it can, return a list of suitable ORDER BY expressions, each of the form
+ * "indexedcol operator pseudoconstant".  If not, return NIL.
+ */
+static List *
+match_index_to_pathkeys(IndexOptInfo *index, List *pathkeys)
+{
+	List	   *orderbyexprs = NIL;
+	ListCell   *lc1;
+
+	/* Only indexes with the amcanorderbyop property are interesting here */
+	if (!index->amcanorderbyop)
+		return NIL;
+
+	foreach(lc1, pathkeys)
+	{
+		PathKey	   *pathkey = (PathKey *) lfirst(lc1);
+		bool		found = false;
+		ListCell   *lc2;
+
+		/*
+		 * Note: for any failure to match, we just return NIL immediately.
+		 * There is no value in matching just some of the pathkeys.
+		 */
+
+		/* Pathkey must request default sort order for the target opfamily */
+		if (pathkey->pk_strategy != BTLessStrategyNumber ||
+			pathkey->pk_nulls_first)
+			return NIL;
+
+		/* If eclass is volatile, no hope of using an indexscan */
+		if (pathkey->pk_eclass->ec_has_volatile)
+			return NIL;
+
+		/* Try to match eclass member expression(s) to index */
+		foreach(lc2, pathkey->pk_eclass->ec_members)
+		{
+			EquivalenceMember *member = (EquivalenceMember *) lfirst(lc2);
+			int		indexcol;
+
+			/* No possibility of match if it references other relations */
+			if (!bms_equal(member->em_relids, index->rel->relids))
+				continue;
+
+			for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+			{
+				Expr   *expr;
+
+				expr = match_clause_to_ordering_op(index,
+												   indexcol,
+												   member->em_expr,
+												   pathkey->pk_opfamily);
+				if (expr)
+				{
+					orderbyexprs = lappend(orderbyexprs, expr);
+					found = true;
+					break;
+				}
+			}
+
+			if (found)			/* don't want to look at remaining members */
+				break;
+		}
+
+		if (!found)				/* fail if no match for this pathkey */
+			return NIL;
+	}
+
+	return orderbyexprs;		/* success! */
+}
+
+/*
+ * match_clause_to_ordering_op
+ *	  Determines whether an ordering operator expression matches an
+ *	  index column.
+ *
+ *	  This is similar to, but simpler than, match_clause_to_indexcol.
+ *	  We only care about simple OpExpr cases.  The input is a bare
+ *	  expression that is being ordered by, which must be of the form
+ *	  (indexkey op const) or (const op indexkey) where op is an ordering
+ *	  operator for the column's opfamily.
+ *
+ * 'index' is the index of interest.
+ * 'indexcol' is a column number of 'index' (counting from 0).
+ * 'clause' is the ordering expression to be tested.
+ * 'pk_opfamily' is the btree opfamily describing the required sort order.
+ *
+ * If successful, return 'clause' as-is if the indexkey is on the left,
+ * otherwise a commuted copy of 'clause'.  If no match, return NULL.
+ */
+static Expr *
+match_clause_to_ordering_op(IndexOptInfo *index,
+							int indexcol,
+							Expr *clause,
+							Oid pk_opfamily)
+{
+	Oid			opfamily = index->opfamily[indexcol];
+	Node	   *leftop,
+			   *rightop;
+	Oid			expr_op;
+	Oid			sortfamily;
+	bool		commuted;
+
+	/*
+	 * Clause must be a binary opclause.
+	 */
+	if (!is_opclause(clause))
+		return NULL;
+	leftop = get_leftop(clause);
+	rightop = get_rightop(clause);
+	if (!leftop || !rightop)
+		return NULL;
+	expr_op = ((OpExpr *) clause)->opno;
+
+	/*
+	 * Check for clauses of the form: (indexkey operator constant) or
+	 * (constant operator indexkey).
+	 */
+	if (match_index_to_operand(leftop, indexcol, index) &&
+		!contain_var_clause(rightop) &&
+		!contain_volatile_functions(rightop))
+	{
+		commuted = false;
+	}
+	else if (match_index_to_operand(rightop, indexcol, index) &&
+			 !contain_var_clause(leftop) &&
+			 !contain_volatile_functions(leftop))
+	{
+		/* Might match, but we need a commuted operator */
+		expr_op = get_commutator(expr_op);
+		if (expr_op == InvalidOid)
+			return NULL;
+		commuted = true;
+	}
+	else
+		return NULL;
+
+	/*
+	 * Is the (commuted) operator an ordering operator for the opfamily?
+	 * And if so, does it yield the right sorting semantics?
+	 */
+	sortfamily = get_op_opfamily_sortfamily(expr_op, opfamily);
+	if (sortfamily != pk_opfamily)
+		return NULL;
+
+	/* We have a match.  Return clause or a commuted version thereof. */
+	if (commuted)
+	{
+		OpExpr	   *newclause = makeNode(OpExpr);
+
+		/* flat-copy all the fields of clause */
+		memcpy(newclause, clause, sizeof(OpExpr));
+
+		/* commute it */
+		newclause->opno = expr_op;
+		newclause->opfuncid = InvalidOid;
+		newclause->args = list_make2(rightop, leftop);
+
+		clause = (Expr *) newclause;
+	}
+
+	return clause;
+}
+
+
 /****************************************************************************
  *				----  ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS	----
  ****************************************************************************/
@@ -2630,7 +2824,7 @@ expand_indexqual_rowcompare(RestrictInfo *rinfo,
 	expr_op = linitial_oid(clause->opnos);
 	if (!var_on_left)
 		expr_op = get_commutator(expr_op);
-	get_op_opfamily_properties(expr_op, index->opfamily[indexcol],
+	get_op_opfamily_properties(expr_op, index->opfamily[indexcol], false,
 							   &op_strategy,
 							   &op_lefttype,
 							   &op_righttype);
@@ -2698,7 +2892,7 @@ expand_indexqual_rowcompare(RestrictInfo *rinfo,
 			break;
 
 		/* Add opfamily and datatypes to lists */
-		get_op_opfamily_properties(expr_op, index->opfamily[i],
+		get_op_opfamily_properties(expr_op, index->opfamily[i], false,
 								   &op_strategy,
 								   &op_lefttype,
 								   &op_righttype);
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 41ad512a296..1bbf35ed74d 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -81,6 +81,8 @@ static Node *replace_nestloop_params(PlannerInfo *root, Node *expr);
 static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root);
 static List *fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
 						 List *indexquals);
+static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path,
+							List *indexorderbys);
 static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index);
 static List *get_switched_clauses(List *clauses, Relids outerrelids);
 static List *order_qual_clauses(PlannerInfo *root, List *clauses);
@@ -89,6 +91,7 @@ static void copy_plan_costsize(Plan *dest, Plan *src);
 static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
 static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
 			   Oid indexid, List *indexqual, List *indexqualorig,
+			   List *indexorderby, List *indexorderbyorig,
 			   ScanDirection indexscandir);
 static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid,
 					  List *indexqual,
@@ -1028,11 +1031,13 @@ create_indexscan_plan(PlannerInfo *root,
 					  List *scan_clauses)
 {
 	List	   *indexquals = best_path->indexquals;
+	List	   *indexorderbys = best_path->indexorderbys;
 	Index		baserelid = best_path->path.parent->relid;
 	Oid			indexoid = best_path->indexinfo->indexoid;
 	List	   *qpqual;
 	List	   *stripped_indexquals;
 	List	   *fixed_indexquals;
+	List	   *fixed_indexorderbys;
 	ListCell   *l;
 	IndexScan  *scan_plan;
 
@@ -1052,6 +1057,11 @@ create_indexscan_plan(PlannerInfo *root,
 	 */
 	fixed_indexquals = fix_indexqual_references(root, best_path, indexquals);
 
+	/*
+	 * Likewise fix up index attr references in the ORDER BY expressions.
+	 */
+	fixed_indexorderbys = fix_indexorderby_references(root, best_path, indexorderbys);
+
 	/*
 	 * If this is an innerjoin scan, the indexclauses will contain join
 	 * clauses that are not present in scan_clauses (since the passed-in value
@@ -1123,11 +1133,12 @@ create_indexscan_plan(PlannerInfo *root,
 
 	/*
 	 * We have to replace any outer-relation variables with nestloop params
-	 * in the indexqualorig and qpqual expressions.  A bit annoying to have to
-	 * do this separately from the processing in fix_indexqual_references ---
-	 * rethink this when generalizing the inner indexscan support.  But note
-	 * we can't really do this earlier because it'd break the comparisons to
-	 * predicates above ... (or would it?  Those wouldn't have outer refs)
+	 * in the indexqualorig, qpqual, and indexorderbyorig expressions.  A bit
+	 * annoying to have to do this separately from the processing in
+	 * fix_indexqual_references --- rethink this when generalizing the inner
+	 * indexscan support.  But note we can't really do this earlier because
+	 * it'd break the comparisons to predicates above ... (or would it?  Those
+	 * wouldn't have outer refs)
 	 */
 	if (best_path->isjoininner)
 	{
@@ -1135,6 +1146,8 @@ create_indexscan_plan(PlannerInfo *root,
 			replace_nestloop_params(root, (Node *) stripped_indexquals);
 		qpqual = (List *)
 			replace_nestloop_params(root, (Node *) qpqual);
+		indexorderbys = (List *)
+			replace_nestloop_params(root, (Node *) indexorderbys);
 	}
 
 	/* Finally ready to build the plan node */
@@ -1144,6 +1157,8 @@ create_indexscan_plan(PlannerInfo *root,
 							   indexoid,
 							   fixed_indexquals,
 							   stripped_indexquals,
+							   fixed_indexorderbys,
+							   indexorderbys,
 							   best_path->indexscandir);
 
 	copy_path_costsize(&scan_plan->scan.plan, &best_path->path);
@@ -2394,6 +2409,63 @@ fix_indexqual_references(PlannerInfo *root, IndexPath *index_path,
 	return fixed_indexquals;
 }
 
+/*
+ * fix_indexorderby_references
+ *	  Adjust indexorderby clauses to the form the executor's index
+ *	  machinery needs.
+ *
+ * This is a simplified version of fix_indexqual_references.  The input does
+ * not have RestrictInfo nodes, and we assume that indxqual.c already
+ * commuted the clauses to put the index keys on the left.  Also, we don't
+ * bother to support any cases except simple OpExprs, since nothing else
+ * is allowed for ordering operators.
+ */
+static List *
+fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path,
+							List *indexorderbys)
+{
+	IndexOptInfo *index = index_path->indexinfo;
+	List	   *fixed_indexorderbys;
+	ListCell   *l;
+
+	fixed_indexorderbys = NIL;
+
+	foreach(l, indexorderbys)
+	{
+		Node	   *clause = (Node *) lfirst(l);
+
+		/*
+		 * Replace any outer-relation variables with nestloop params.
+		 *
+		 * This also makes a copy of the clause, so it's safe to modify it
+		 * in-place below.
+		 */
+		clause = replace_nestloop_params(root, clause);
+
+		if (IsA(clause, OpExpr))
+		{
+			OpExpr	   *op = (OpExpr *) clause;
+
+			if (list_length(op->args) != 2)
+				elog(ERROR, "indexorderby clause is not binary opclause");
+
+			/*
+			 * Now, determine which index attribute this is and change the
+			 * indexkey operand as needed.
+			 */
+			linitial(op->args) = fix_indexqual_operand(linitial(op->args),
+													   index);
+		}
+		else
+			elog(ERROR, "unsupported indexorderby type: %d",
+				 (int) nodeTag(clause));
+
+		fixed_indexorderbys = lappend(fixed_indexorderbys, clause);
+	}
+
+	return fixed_indexorderbys;
+}
+
 /*
  * fix_indexqual_operand
  *	  Convert an indexqual expression to a Var referencing the index column.
@@ -2685,6 +2757,8 @@ make_indexscan(List *qptlist,
 			   Oid indexid,
 			   List *indexqual,
 			   List *indexqualorig,
+			   List *indexorderby,
+			   List *indexorderbyorig,
 			   ScanDirection indexscandir)
 {
 	IndexScan  *node = makeNode(IndexScan);
@@ -2699,6 +2773,8 @@ make_indexscan(List *qptlist,
 	node->indexid = indexid;
 	node->indexqual = indexqual;
 	node->indexqualorig = indexqualorig;
+	node->indexorderby = indexorderby;
+	node->indexorderbyorig = indexorderbyorig;
 	node->indexorderdir = indexscandir;
 
 	return node;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a1e59005921..6d0b3dbce95 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3135,7 +3135,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
 
 	/* Estimate the cost of index scan */
 	indexScanPath = create_index_path(root, indexInfo,
-									  NIL, NIL,
+									  NIL, NIL, NIL,
 									  ForwardScanDirection, NULL);
 
 	return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 9aef7fc35a2..0074679207a 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -301,6 +301,10 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset)
 					fix_scan_list(glob, splan->indexqual, rtoffset);
 				splan->indexqualorig =
 					fix_scan_list(glob, splan->indexqualorig, rtoffset);
+				splan->indexorderby =
+					fix_scan_list(glob, splan->indexorderby, rtoffset);
+				splan->indexorderbyorig =
+					fix_scan_list(glob, splan->indexorderbyorig, rtoffset);
 			}
 			break;
 		case T_BitmapIndexScan:
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 754753cc12d..39ef420284d 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1942,10 +1942,13 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
 		case T_IndexScan:
 			finalize_primnode((Node *) ((IndexScan *) plan)->indexqual,
 							  &context);
+			finalize_primnode((Node *) ((IndexScan *) plan)->indexorderby,
+							  &context);
 
 			/*
 			 * we need not look at indexqualorig, since it will have the same
-			 * param references as indexqual.
+			 * param references as indexqual.  Likewise, we can ignore
+			 * indexorderbyorig.
 			 */
 			context.paramids = bms_add_members(context.paramids, scan_params);
 			break;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 231d221b21e..2439d814ce8 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -414,6 +414,8 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
  * 'index' is a usable index.
  * 'clause_groups' is a list of lists of RestrictInfo nodes
  *			to be used as index qual conditions in the scan.
+ * 'indexorderbys' is a list of bare expressions (no RestrictInfos)
+ *			to be used as index ordering operators in the scan.
  * 'pathkeys' describes the ordering of the path.
  * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
  *			for an ordered index, or NoMovementScanDirection for
@@ -427,6 +429,7 @@ IndexPath *
 create_index_path(PlannerInfo *root,
 				  IndexOptInfo *index,
 				  List *clause_groups,
+				  List *indexorderbys,
 				  List *pathkeys,
 				  ScanDirection indexscandir,
 				  RelOptInfo *outer_rel)
@@ -463,6 +466,7 @@ create_index_path(PlannerInfo *root,
 	pathnode->indexinfo = index;
 	pathnode->indexclauses = allclauses;
 	pathnode->indexquals = indexquals;
+	pathnode->indexorderbys = indexorderbys;
 
 	pathnode->isjoininner = (outer_rel != NULL);
 	pathnode->indexscandir = indexscandir;
@@ -504,7 +508,7 @@ create_index_path(PlannerInfo *root,
 		pathnode->rows = rel->rows;
 	}
 
-	cost_index(pathnode, root, index, indexquals, outer_rel);
+	cost_index(pathnode, root, index, indexquals, indexorderbys, outer_rel);
 
 	return pathnode;
 }
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 95397aa7cee..ef87f724ae9 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2631,7 +2631,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
 	examine_variable(root, right, 0, &rightvar);
 
 	/* Extract the operator's declared left/right datatypes */
-	get_op_opfamily_properties(opno, opfamily,
+	get_op_opfamily_properties(opno, opfamily, false,
 							   &op_strategy,
 							   &op_lefttype,
 							   &op_righttype);
@@ -4646,7 +4646,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
 			if (min)
 			{
 				index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
-											 1, scankeys);
+											 1, 0);
+				index_rescan(index_scan, scankeys, 1, NULL, 0);
 
 				/* Fetch first tuple in sortop's direction */
 				if ((tup = index_getnext(index_scan,
@@ -4677,7 +4678,8 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
 			if (max && have_data)
 			{
 				index_scan = index_beginscan(heapRel, indexRel, SnapshotNow,
-											 1, scankeys);
+											 1, 0);
+				index_rescan(index_scan, scankeys, 1, NULL, 0);
 
 				/* Fetch first tuple in reverse direction */
 				if ((tup = index_getnext(index_scan,
@@ -5644,7 +5646,9 @@ string_to_bytea_const(const char *str, size_t str_len)
 
 static void
 genericcostestimate(PlannerInfo *root,
-					IndexOptInfo *index, List *indexQuals,
+					IndexOptInfo *index,
+					List *indexQuals,
+					List *indexOrderBys,
 					RelOptInfo *outer_rel,
 					double numIndexTuples,
 					Cost *indexStartupCost,
@@ -5856,7 +5860,8 @@ genericcostestimate(PlannerInfo *root,
 	 * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
 	 * indexqual operator.	Because we have numIndexTuples as a per-scan
 	 * number, we have to multiply by num_sa_scans to get the correct result
-	 * for ScalarArrayOpExpr cases.
+	 * for ScalarArrayOpExpr cases.  Similarly add in costs for any index
+	 * ORDER BY expressions.
 	 *
 	 * Note: this neglects the possible costs of rechecking lossy operators
 	 * and OR-clause expressions.  Detecting that that might be needed seems
@@ -5864,11 +5869,15 @@ genericcostestimate(PlannerInfo *root,
 	 * inaccuracies here ...
 	 */
 	cost_qual_eval(&index_qual_cost, indexQuals, root);
-	qual_op_cost = cpu_operator_cost * list_length(indexQuals);
-	qual_arg_cost = index_qual_cost.startup +
-		index_qual_cost.per_tuple - qual_op_cost;
+	qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
+	cost_qual_eval(&index_qual_cost, indexOrderBys, root);
+	qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+	qual_op_cost = cpu_operator_cost *
+		(list_length(indexQuals) + list_length(indexOrderBys));
+	qual_arg_cost -= qual_op_cost;
 	if (qual_arg_cost < 0)		/* just in case... */
 		qual_arg_cost = 0;
+
 	*indexStartupCost = qual_arg_cost;
 	*indexTotalCost += qual_arg_cost;
 	*indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
@@ -5901,11 +5910,12 @@ btcostestimate(PG_FUNCTION_ARGS)
 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
 	List	   *indexQuals = (List *) PG_GETARG_POINTER(2);
-	RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
-	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+	List	   *indexOrderBys = (List *) PG_GETARG_POINTER(3);
+	RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
+	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
+	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
+	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
+	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(8);
 	Oid			relid;
 	AttrNumber	colnum;
 	VariableStatData vardata;
@@ -6082,7 +6092,8 @@ btcostestimate(PG_FUNCTION_ARGS)
 		numIndexTuples = rint(numIndexTuples / num_sa_scans);
 	}
 
-	genericcostestimate(root, index, indexQuals, outer_rel, numIndexTuples,
+	genericcostestimate(root, index, indexQuals, indexOrderBys,
+						outer_rel, numIndexTuples,
 						indexStartupCost, indexTotalCost,
 						indexSelectivity, indexCorrelation);
 
@@ -6206,13 +6217,14 @@ hashcostestimate(PG_FUNCTION_ARGS)
 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
 	List	   *indexQuals = (List *) PG_GETARG_POINTER(2);
-	RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
-	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(7);
-
-	genericcostestimate(root, index, indexQuals, outer_rel, 0.0,
+	List	   *indexOrderBys = (List *) PG_GETARG_POINTER(3);
+	RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
+	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
+	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
+	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
+	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(8);
+
+	genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0,
 						indexStartupCost, indexTotalCost,
 						indexSelectivity, indexCorrelation);
 
@@ -6225,13 +6237,14 @@ gistcostestimate(PG_FUNCTION_ARGS)
 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
 	List	   *indexQuals = (List *) PG_GETARG_POINTER(2);
-	RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
-	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(7);
-
-	genericcostestimate(root, index, indexQuals, outer_rel, 0.0,
+	List	   *indexOrderBys = (List *) PG_GETARG_POINTER(3);
+	RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
+	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
+	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
+	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
+	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(8);
+
+	genericcostestimate(root, index, indexQuals, indexOrderBys, outer_rel, 0.0,
 						indexStartupCost, indexTotalCost,
 						indexSelectivity, indexCorrelation);
 
@@ -6262,11 +6275,12 @@ gincostestimate(PG_FUNCTION_ARGS)
 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
 	IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
 	List	   *indexQuals = (List *) PG_GETARG_POINTER(2);
-	RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
-	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
-	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+	List	   *indexOrderBys = (List *) PG_GETARG_POINTER(3);
+	RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(4);
+	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(5);
+	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(6);
+	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(7);
+	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(8);
 	ListCell	   *l;
 	int32		   nfullscan = 0;
 	List		   *selectivityQuals;
@@ -6432,7 +6446,7 @@ gincostestimate(PG_FUNCTION_ARGS)
 		 * Get the operator's strategy number and declared input data types
 		 * within the index opfamily.
 		 */
-		get_op_opfamily_properties(clause_op, index->opfamily[indexcol],
+		get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
 								   &strategy_op, &lefttype, &righttype);
 
 		/*
@@ -6581,15 +6595,18 @@ gincostestimate(PG_FUNCTION_ARGS)
 	 * Add on index qual eval costs, much as in genericcostestimate
 	 */
 	cost_qual_eval(&index_qual_cost, indexQuals, root);
-	qual_op_cost = cpu_operator_cost * list_length(indexQuals);
-	qual_arg_cost = index_qual_cost.startup +
-		index_qual_cost.per_tuple - qual_op_cost;
+	qual_arg_cost = index_qual_cost.startup + index_qual_cost.per_tuple;
+	cost_qual_eval(&index_qual_cost, indexOrderBys, root);
+	qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+	qual_op_cost = cpu_operator_cost *
+		(list_length(indexQuals) + list_length(indexOrderBys));
+	qual_arg_cost -= qual_op_cost;
 	if (qual_arg_cost < 0)      /* just in case... */
 		qual_arg_cost = 0;
 
 	*indexStartupCost += qual_arg_cost;
 	*indexTotalCost += qual_arg_cost;
-	*indexTotalCost += ( numTuples * *indexSelectivity ) * (cpu_index_tuple_cost + qual_op_cost);
+	*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 9beae0d9ef1..cbdfe05031f 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -85,19 +85,42 @@ get_op_opfamily_strategy(Oid opno, Oid opfamily)
 	return result;
 }
 
+/*
+ * get_op_opfamily_sortfamily
+ *
+ *		If the operator is an ordering operator within the specified opfamily,
+ *		return its amopsortfamily OID; else return InvalidOid.
+ */
+Oid
+get_op_opfamily_sortfamily(Oid opno, Oid opfamily)
+{
+	HeapTuple	tp;
+	Form_pg_amop amop_tup;
+	Oid			result;
+
+	tp = SearchSysCache3(AMOPOPID,
+						 ObjectIdGetDatum(opno),
+						 CharGetDatum(AMOP_ORDER),
+						 ObjectIdGetDatum(opfamily));
+	if (!HeapTupleIsValid(tp))
+		return InvalidOid;
+	amop_tup = (Form_pg_amop) GETSTRUCT(tp);
+	result = amop_tup->amopsortfamily;
+	ReleaseSysCache(tp);
+	return result;
+}
+
 /*
  * get_op_opfamily_properties
  *
  *		Get the operator's strategy number and declared input data types
  *		within the specified opfamily.
  *
- * This function only considers search operators, not ordering operators.
- *
  * Caller should already have verified that opno is a member of opfamily,
  * therefore we raise an error if the tuple is not found.
  */
 void
-get_op_opfamily_properties(Oid opno, Oid opfamily,
+get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op,
 						   int *strategy,
 						   Oid *lefttype,
 						   Oid *righttype)
@@ -107,7 +130,7 @@ get_op_opfamily_properties(Oid opno, Oid opfamily,
 
 	tp = SearchSysCache3(AMOPOPID,
 						 ObjectIdGetDatum(opno),
-						 CharGetDatum(AMOP_SEARCH),
+						 CharGetDatum(ordering_op ? AMOP_ORDER : AMOP_SEARCH),
 						 ObjectIdGetDatum(opfamily));
 	if (!HeapTupleIsValid(tp))
 		elog(ERROR, "operator %u is not a member of opfamily %u",
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index 48380ef32fe..896fb75fddb 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -135,11 +135,13 @@ extern bool index_insert(Relation indexRelation,
 extern IndexScanDesc index_beginscan(Relation heapRelation,
 				Relation indexRelation,
 				Snapshot snapshot,
-				int nkeys, ScanKey key);
+				int nkeys, int norderbys);
 extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation,
 					   Snapshot snapshot,
-					   int nkeys, ScanKey key);
-extern void index_rescan(IndexScanDesc scan, ScanKey key);
+					   int nkeys);
+extern void index_rescan(IndexScanDesc scan,
+			 ScanKey keys, int nkeys,
+			 ScanKey orderbys, int norderbys);
 extern void index_endscan(IndexScanDesc scan);
 extern void index_markpos(IndexScanDesc scan);
 extern void index_restrpos(IndexScanDesc scan);
@@ -161,7 +163,7 @@ extern FmgrInfo *index_getprocinfo(Relation irel, AttrNumber attnum,
  * index access method support routines (in genam.c)
  */
 extern IndexScanDesc RelationGetIndexScan(Relation indexRelation,
-					 int nkeys, ScanKey key);
+					 int nkeys, int norderbys);
 extern void IndexScanEnd(IndexScanDesc scan);
 extern char *BuildIndexValueDescription(Relation indexRelation,
 						   Datum *values, bool *isnull);
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index b4ec01ed12d..f412fc38444 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -62,8 +62,10 @@ typedef struct IndexScanDescData
 	Relation	heapRelation;	/* heap relation descriptor, or NULL */
 	Relation	indexRelation;	/* index relation descriptor */
 	Snapshot	xs_snapshot;	/* snapshot to see */
-	int			numberOfKeys;	/* number of scan keys */
-	ScanKey		keyData;		/* array of scan key descriptors */
+	int			numberOfKeys;	/* number of index qualifier conditions */
+	int			numberOfOrderBys;	/* number of ordering operators */
+	ScanKey		keyData;			/* array of index qualifier descriptors */
+	ScanKey		orderByData;		/* array of ordering op descriptors */
 
 	/* signaling to index AM about killing index tuples */
 	bool		kill_prior_tuple;		/* last-returned tuple is dead */
diff --git a/src/include/access/skey.h b/src/include/access/skey.h
index fcf81ba6abf..c30a44bde37 100644
--- a/src/include/access/skey.h
+++ b/src/include/access/skey.h
@@ -60,6 +60,11 @@ typedef uint16 StrategyNumber;
  * supported only for index scans, not heap scans; and not all index AMs
  * support them.
  *
+ * A ScanKey can also represent an ordering operator invocation, that is
+ * an ordering requirement "ORDER BY indexedcol op constant".  This looks
+ * the same as a comparison operator, except that the operator doesn't
+ * (usually) yield boolean.  We mark such ScanKeys with SK_ORDER_BY.
+ *
  * Note: in some places, ScanKeys are used as a convenient representation
  * for the invocation of an access method support procedure.  In this case
  * sk_strategy/sk_subtype are not meaningful, and sk_func may refer to a
@@ -122,6 +127,7 @@ typedef ScanKeyData *ScanKey;
 #define SK_SEARCHNULL		0x0020		/* scankey represents "col IS NULL" */
 #define SK_SEARCHNOTNULL	0x0040		/* scankey represents "col IS NOT
 										 * NULL" */
+#define SK_ORDER_BY			0x0080		/* scankey is for ORDER BY op */
 
 
 /*
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 8698a43371f..f28162b439d 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201011241
+#define CATALOG_VERSION_NO	201012021
 
 #endif
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h
index 5a18dee0bdb..a729690affe 100644
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -52,11 +52,11 @@ CATALOG(pg_am,2601)
 	bool		amclusterable;	/* does AM support cluster command? */
 	Oid			amkeytype;		/* type of data in index, or InvalidOid */
 	regproc		aminsert;		/* "insert this tuple" function */
-	regproc		ambeginscan;	/* "start new scan" function */
+	regproc		ambeginscan;	/* "prepare for index scan" function */
 	regproc		amgettuple;		/* "next valid tuple" function, or 0 */
 	regproc		amgetbitmap;	/* "fetch all valid tuples" function, or 0 */
-	regproc		amrescan;		/* "restart this scan" function */
-	regproc		amendscan;		/* "end this scan" function */
+	regproc		amrescan;		/* "(re)start index scan" function */
+	regproc		amendscan;		/* "end index scan" function */
 	regproc		ammarkpos;		/* "mark current scan position" function */
 	regproc		amrestrpos;		/* "restore marked scan position" function */
 	regproc		ambuild;		/* "build new index" function */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 25a391282a3..611adef83c2 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -679,7 +679,7 @@ DATA(insert OID = 331 (  btinsert		   PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16 "2
 DESCR("btree(internal)");
 DATA(insert OID = 333 (  btbeginscan	   PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_	btbeginscan _null_ _null_ _null_ ));
 DESCR("btree(internal)");
-DATA(insert OID = 334 (  btrescan		   PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ btrescan _null_ _null_ _null_ ));
+DATA(insert OID = 334 (  btrescan		   PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ btrescan _null_ _null_ _null_ ));
 DESCR("btree(internal)");
 DATA(insert OID = 335 (  btendscan		   PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ btendscan _null_ _null_ _null_ ));
 DESCR("btree(internal)");
@@ -693,7 +693,7 @@ DATA(insert OID = 332 (  btbulkdelete	   PGNSP PGUID 12 1 0 0 f f f t f v 4 0 22
 DESCR("btree(internal)");
 DATA(insert OID = 972 (  btvacuumcleanup   PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ btvacuumcleanup _null_ _null_ _null_ ));
 DESCR("btree(internal)");
-DATA(insert OID = 1268 (  btcostestimate   PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	btcostestimate _null_ _null_ _null_ ));
+DATA(insert OID = 1268 (  btcostestimate   PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	btcostestimate _null_ _null_ _null_ ));
 DESCR("btree(internal)");
 DATA(insert OID = 2785 (  btoptions		   PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  btoptions _null_ _null_ _null_ ));
 DESCR("btree(internal)");
@@ -798,7 +798,7 @@ DATA(insert OID = 441 (  hashinsert		   PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16
 DESCR("hash(internal)");
 DATA(insert OID = 443 (  hashbeginscan	   PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_	hashbeginscan _null_ _null_ _null_ ));
 DESCR("hash(internal)");
-DATA(insert OID = 444 (  hashrescan		   PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ hashrescan _null_ _null_ _null_ ));
+DATA(insert OID = 444 (  hashrescan		   PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ hashrescan _null_ _null_ _null_ ));
 DESCR("hash(internal)");
 DATA(insert OID = 445 (  hashendscan	   PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ hashendscan _null_ _null_ _null_ ));
 DESCR("hash(internal)");
@@ -812,7 +812,7 @@ DATA(insert OID = 442 (  hashbulkdelete    PGNSP PGUID 12 1 0 0 f f f t f v 4 0
 DESCR("hash(internal)");
 DATA(insert OID = 425 (  hashvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ hashvacuumcleanup _null_ _null_ _null_ ));
 DESCR("hash(internal)");
-DATA(insert OID = 438 (  hashcostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	hashcostestimate _null_ _null_ _null_ ));
+DATA(insert OID = 438 (  hashcostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	hashcostestimate _null_ _null_ _null_ ));
 DESCR("hash(internal)");
 DATA(insert OID = 2786 (  hashoptions	   PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  hashoptions _null_ _null_ _null_ ));
 DESCR("hash(internal)");
@@ -1094,7 +1094,7 @@ DATA(insert OID = 775 (  gistinsert		   PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16
 DESCR("gist(internal)");
 DATA(insert OID = 777 (  gistbeginscan	   PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_	gistbeginscan _null_ _null_ _null_ ));
 DESCR("gist(internal)");
-DATA(insert OID = 778 (  gistrescan		   PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ gistrescan _null_ _null_ _null_ ));
+DATA(insert OID = 778 (  gistrescan		   PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gistrescan _null_ _null_ _null_ ));
 DESCR("gist(internal)");
 DATA(insert OID = 779 (  gistendscan	   PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ gistendscan _null_ _null_ _null_ ));
 DESCR("gist(internal)");
@@ -1108,7 +1108,7 @@ DATA(insert OID = 776 (  gistbulkdelete    PGNSP PGUID 12 1 0 0 f f f t f v 4 0
 DESCR("gist(internal)");
 DATA(insert OID = 2561 (  gistvacuumcleanup   PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ gistvacuumcleanup _null_ _null_ _null_ ));
 DESCR("gist(internal)");
-DATA(insert OID = 772 (  gistcostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	gistcostestimate _null_ _null_ _null_ ));
+DATA(insert OID = 772 (  gistcostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	gistcostestimate _null_ _null_ _null_ ));
 DESCR("gist(internal)");
 DATA(insert OID = 2787 (  gistoptions	   PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  gistoptions _null_ _null_ _null_ ));
 DESCR("gist(internal)");
@@ -4335,7 +4335,7 @@ DATA(insert OID = 2732 (  gininsert		   PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16
 DESCR("gin(internal)");
 DATA(insert OID = 2733 (  ginbeginscan	   PGNSP PGUID 12 1 0 0 f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_	ginbeginscan _null_ _null_ _null_ ));
 DESCR("gin(internal)");
-DATA(insert OID = 2734 (  ginrescan		   PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2278 "2281 2281" _null_ _null_ _null_ _null_ ginrescan _null_ _null_ _null_ ));
+DATA(insert OID = 2734 (  ginrescan		   PGNSP PGUID 12 1 0 0 f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ ginrescan _null_ _null_ _null_ ));
 DESCR("gin(internal)");
 DATA(insert OID = 2735 (  ginendscan	   PGNSP PGUID 12 1 0 0 f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ ginendscan _null_ _null_ _null_ ));
 DESCR("gin(internal)");
@@ -4349,7 +4349,7 @@ DATA(insert OID = 2739 (  ginbulkdelete    PGNSP PGUID 12 1 0 0 f f f t f v 4 0
 DESCR("gin(internal)");
 DATA(insert OID = 2740 (  ginvacuumcleanup PGNSP PGUID 12 1 0 0 f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ ginvacuumcleanup _null_ _null_ _null_ ));
 DESCR("gin(internal)");
-DATA(insert OID = 2741 (  gincostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 8 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	gincostestimate _null_ _null_ _null_ ));
+DATA(insert OID = 2741 (  gincostestimate  PGNSP PGUID 12 1 0 0 f f f t f v 9 0 2278 "2281 2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	gincostestimate _null_ _null_ _null_ ));
 DESCR("gin(internal)");
 DATA(insert OID = 2788 (  ginoptions	   PGNSP PGUID 12 1 0 0 f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  ginoptions _null_ _null_ _null_ ));
 DESCR("gin(internal)");
diff --git a/src/include/executor/nodeIndexscan.h b/src/include/executor/nodeIndexscan.h
index 48d35e4a48a..d1e0f380c0a 100644
--- a/src/include/executor/nodeIndexscan.h
+++ b/src/include/executor/nodeIndexscan.h
@@ -25,8 +25,8 @@ extern void ExecReScanIndexScan(IndexScanState *node);
 
 /* routines exported to share code with nodeBitmapIndexscan.c */
 extern void ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
-					   Index scanrelid,
-					   List *quals, ScanKey *scanKeys, int *numScanKeys,
+					   Index scanrelid, List *quals, bool isorderby,
+					   ScanKey *scanKeys, int *numScanKeys,
 					   IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys,
 					   IndexArrayKeyInfo **arrayKeys, int *numArrayKeys);
 extern void ExecIndexEvalRuntimeKeys(ExprContext *econtext,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 89f8e202e35..d669c24b981 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1182,10 +1182,12 @@ typedef struct
  *	 IndexScanState information
  *
  *		indexqualorig	   execution state for indexqualorig expressions
- *		ScanKeys		   Skey structures to scan index rel
- *		NumScanKeys		   number of Skey structs
+ *		ScanKeys		   Skey structures for index quals
+ *		NumScanKeys		   number of ScanKeys
+ *		OrderByKeys		   Skey structures for index ordering operators
+ *		NumOrderByKeys	   number of OrderByKeys
  *		RuntimeKeys		   info about Skeys that must be evaluated at runtime
- *		NumRuntimeKeys	   number of RuntimeKeys structs
+ *		NumRuntimeKeys	   number of RuntimeKeys
  *		RuntimeKeysReady   true if runtime Skeys have been computed
  *		RuntimeContext	   expr context for evaling runtime Skeys
  *		RelationDesc	   index relation descriptor
@@ -1198,6 +1200,8 @@ typedef struct IndexScanState
 	List	   *indexqualorig;
 	ScanKey		iss_ScanKeys;
 	int			iss_NumScanKeys;
+	ScanKey		iss_OrderByKeys;
+	int			iss_NumOrderByKeys;
 	IndexRuntimeKeyInfo *iss_RuntimeKeys;
 	int			iss_NumRuntimeKeys;
 	bool		iss_RuntimeKeysReady;
@@ -1210,12 +1214,12 @@ typedef struct IndexScanState
  *	 BitmapIndexScanState information
  *
  *		result			   bitmap to return output into, or NULL
- *		ScanKeys		   Skey structures to scan index rel
- *		NumScanKeys		   number of Skey structs
+ *		ScanKeys		   Skey structures for index quals
+ *		NumScanKeys		   number of ScanKeys
  *		RuntimeKeys		   info about Skeys that must be evaluated at runtime
- *		NumRuntimeKeys	   number of RuntimeKeys structs
+ *		NumRuntimeKeys	   number of RuntimeKeys
  *		ArrayKeys		   info about Skeys that come from ScalarArrayOpExprs
- *		NumArrayKeys	   number of ArrayKeys structs
+ *		NumArrayKeys	   number of ArrayKeys
  *		RuntimeKeysReady   true if runtime Skeys have been computed
  *		RuntimeContext	   expr context for evaling runtime Skeys
  *		RelationDesc	   index relation descriptor
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index fec4acea342..b89eb55ad76 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -271,7 +271,10 @@ typedef Scan SeqScan;
  * be of the form (indexkey OP comparisonval) or (comparisonval OP indexkey).
  * The indexkey is a Var or expression referencing column(s) of the index's
  * base table.	The comparisonval might be any expression, but it won't use
- * any columns of the base table.
+ * any columns of the base table.  The expressions are ordered by index
+ * column position (but items referencing the same index column can appear
+ * in any order).  indexqualorig is used at runtime only if we have to recheck
+ * a lossy indexqual.
  *
  * indexqual has the same form, but the expressions have been commuted if
  * necessary to put the indexkeys on the left, and the indexkeys are replaced
@@ -280,14 +283,26 @@ typedef Scan SeqScan;
  * table).	This is a bit hokey ... would be cleaner to use a special-purpose
  * node type that could not be mistaken for a regular Var.	But it will do
  * for now.
+ *
+ * indexorderbyorig is similarly the original form of any ORDER BY expressions
+ * that are being implemented by the index, while indexorderby is modified to
+ * have index column Vars on the left-hand side.  Here, multiple expressions
+ * must appear in exactly the ORDER BY order, and this is not necessarily the
+ * index column order.  Only the expressions are provided, not the auxiliary
+ * sort-order information from the ORDER BY SortGroupClauses; it's assumed
+ * that the sort ordering is fully determinable from the top-level operators.
+ * indexorderbyorig is unused at run time, but is needed for EXPLAIN.
+ * (Note these fields are used for amcanorderbyop cases, not amcanorder cases.)
  * ----------------
  */
 typedef struct IndexScan
 {
 	Scan		scan;
 	Oid			indexid;		/* OID of index to scan */
-	List	   *indexqual;		/* list of index quals (OpExprs) */
+	List	   *indexqual;		/* list of index quals (usually OpExprs) */
 	List	   *indexqualorig;	/* the same in original form */
+	List	   *indexorderby;		/* list of index ORDER BY exprs */
+	List	   *indexorderbyorig;	/* the same in original form */
 	ScanDirection indexorderdir;	/* forward or backward or don't care */
 } IndexScan;
 
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index d084338f356..e7ebcfcc81a 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -631,6 +631,13 @@ typedef struct Path
  * indexable operators appear in 'indexclauses', they are replaced by the
  * derived indexscannable conditions in 'indexquals'.
  *
+ * 'indexorderbys', if not NIL, is a list of ORDER BY expressions that have
+ * been found to be usable as ordering operators for an amcanorderbyop index.
+ * Note that these are not RestrictInfos, just bare expressions, since they
+ * generally won't yield booleans.  The list will match the path's pathkeys.
+ * Also, unlike the case for quals, it's guaranteed that each expression has
+ * the index key on the left side of the operator.
+ *
  * 'isjoininner' is TRUE if the path is a nestloop inner scan (that is,
  * some of the index conditions are join rather than restriction clauses).
  * Note that the path costs will be calculated differently from a plain
@@ -663,6 +670,7 @@ typedef struct IndexPath
 	IndexOptInfo *indexinfo;
 	List	   *indexclauses;
 	List	   *indexquals;
+	List	   *indexorderbys;
 	bool		isjoininner;
 	ScanDirection indexscandir;
 	Cost		indextotalcost;
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 8df1b95abe7..48de2a989f0 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -67,7 +67,7 @@ extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
 					double index_pages, PlannerInfo *root);
 extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
 extern void cost_index(IndexPath *path, PlannerInfo *root, IndexOptInfo *index,
-		   List *indexQuals, RelOptInfo *outer_rel);
+		   List *indexQuals, List *indexOrderBys, RelOptInfo *outer_rel);
 extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
 					  Path *bitmapqual, RelOptInfo *outer_rel);
 extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root);
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 53ebe5756b7..2dde5e07ef5 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -31,6 +31,7 @@ extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel);
 extern IndexPath *create_index_path(PlannerInfo *root,
 				  IndexOptInfo *index,
 				  List *clause_groups,
+				  List *indexorderbys,
 				  List *pathkeys,
 				  ScanDirection indexscandir,
 				  RelOptInfo *outer_rel);
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index 62d15cca366..5f41adfcc22 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -22,7 +22,7 @@
 /*
  * Maximum number of arguments to a function.
  *
- * The minimum value is 8 (index cost estimation uses 8-argument functions).
+ * The minimum value is 9 (index cost estimation uses 9-argument functions).
  * The maximum possible value is around 600 (limited by index tuple size in
  * pg_proc's index; BLCKSZ larger than 8K would allow more).  Values larger
  * than needed will waste memory and processing time, but do not directly
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index b6104d7deca..7bf3f360f5b 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -32,7 +32,8 @@ extern PGDLLIMPORT get_attavgwidth_hook_type get_attavgwidth_hook;
 
 extern bool op_in_opfamily(Oid opno, Oid opfamily);
 extern int	get_op_opfamily_strategy(Oid opno, Oid opfamily);
-extern void get_op_opfamily_properties(Oid opno, Oid opfamily,
+extern Oid	get_op_opfamily_sortfamily(Oid opno, Oid opfamily);
+extern void get_op_opfamily_properties(Oid opno, Oid opfamily, bool ordering_op,
 						   int *strategy,
 						   Oid *lefttype,
 						   Oid *righttype);
-- 
GitLab