diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 0eb991cdf0e86f50e182ef07b7bd2a22f7a5b0b6..59b8a2e2b3d9cd99ae6eb38746a85de44a74862d 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2297,6 +2297,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
 			case RTE_RELATION:
+				JumbleExpr(jstate, (Node *) rte->tablesample);
 			case RTE_SUBQUERY:
 				JumbleQuery(jstate, rte->subquery);
@@ -2767,6 +2768,15 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
 				JumbleExpr(jstate, rtfunc->funcexpr);
+		case T_TableSampleClause:
+			{
+				TableSampleClause *tsc = (TableSampleClause *) node;
+				APP_JUMB(tsc->tsmhandler);
+				JumbleExpr(jstate, (Node *) tsc->args);
+				JumbleExpr(jstate, (Node *) tsc->repeatable);
+			}
+			break;
 			/* Only a warning, since we can stumble along anyway */
 			elog(WARNING, "unrecognized node type: %d",
diff --git a/contrib/tsm_system_rows/Makefile b/contrib/tsm_system_rows/Makefile
index 700ab276db2e95b546dee914751387ce3bb940b6..609af463c5c2438b340c8ec6c32fdcddb7d627a1 100644
--- a/contrib/tsm_system_rows/Makefile
+++ b/contrib/tsm_system_rows/Makefile
@@ -1,8 +1,8 @@
-# src/test/modules/tsm_system_rows/Makefile
+# contrib/tsm_system_rows/Makefile
 MODULE_big = tsm_system_rows
 OBJS = tsm_system_rows.o $(WIN32RES)
-PGFILEDESC = "tsm_system_rows - SYSTEM TABLESAMPLE method which accepts number of rows as a limit"
+PGFILEDESC = "tsm_system_rows - TABLESAMPLE method which accepts number of rows as a limit"
 EXTENSION = tsm_system_rows
 DATA = tsm_system_rows--1.0.sql
diff --git a/contrib/tsm_system_rows/expected/tsm_system_rows.out b/contrib/tsm_system_rows/expected/tsm_system_rows.out
index 7e0f72b02b7df38392293f314341058ceb293d06..87b4a8fc64bd222438b14dbce830923f862b0c6a 100644
--- a/contrib/tsm_system_rows/expected/tsm_system_rows.out
+++ b/contrib/tsm_system_rows/expected/tsm_system_rows.out
@@ -1,31 +1,83 @@
 CREATE EXTENSION tsm_system_rows;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
 ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1000);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (0);
+ count 
+     0
+(1 row)
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1);
+ count 
+     1
+(1 row)
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (10);
+ count 
+    10
+(1 row)
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (100);
 (1 row)
-SELECT id FROM test_tablesample TABLESAMPLE system_rows (8) REPEATABLE (5432);
- id 
-  7
- 14
- 21
- 28
-  4
- 11
- 18
- 25
-(8 rows)
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_rows (20) REPEATABLE (10);
-                                    QUERY PLAN                                     
- Sample Scan (system_rows) on test_tablesample  (cost=0.00..80.20 rows=20 width=4)
+-- bad parameters should get through planning, but not execution:
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+               QUERY PLAN               
+ Sample Scan on test_tablesample
+   Sampling: system_rows ('-1'::bigint)
+(2 rows)
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+ERROR:  sample size must not be negative
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) REPEATABLE (0);
+ERROR:  tablesample method system_rows does not support REPEATABLE
+LINE 1: SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) ...
+                                                   ^
+-- but a join should be allowed:
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+                        QUERY PLAN                        
+ Nested Loop
+   ->  Values Scan on "*VALUES*"
+   ->  Aggregate
+         ->  Sample Scan on test_tablesample
+               Sampling: system_rows ("*VALUES*".column1)
+(5 rows)
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+ nrows | count 
+     0 |     0
+    10 |    10
+   100 |    31
+(3 rows)
+  SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (20);
+ count 
+    20
 (1 row)
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_rows;  -- fail, view depends on extension
+ERROR:  cannot drop extension tsm_system_rows because other objects depend on it
+DETAIL:  view vv depends on function system_rows(internal)
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
diff --git a/contrib/tsm_system_rows/sql/tsm_system_rows.sql b/contrib/tsm_system_rows/sql/tsm_system_rows.sql
index bd812220ed98dcab5f8e51128062fc65a50b6f95..e3ab4204eea5ae601aaf9e43c1edec6d0bda29d5 100644
--- a/contrib/tsm_system_rows/sql/tsm_system_rows.sql
+++ b/contrib/tsm_system_rows/sql/tsm_system_rows.sql
@@ -1,14 +1,39 @@
 CREATE EXTENSION tsm_system_rows;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
 ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1000);
-SELECT id FROM test_tablesample TABLESAMPLE system_rows (8) REPEATABLE (5432);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (0);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (1);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (10);
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (100);
+-- bad parameters should get through planning, but not execution:
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+SELECT id FROM test_tablesample TABLESAMPLE system_rows (-1);
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_rows (10) REPEATABLE (0);
+-- but a join should be allowed:
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+  (VALUES (0),(10),(100)) v(nrows),
+  LATERAL (SELECT count(*) FROM test_tablesample
+           TABLESAMPLE system_rows (nrows)) ss;
+  SELECT count(*) FROM test_tablesample TABLESAMPLE system_rows (20);
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_rows (20) REPEATABLE (10);
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_rows;  -- fail, view depends on extension
diff --git a/contrib/tsm_system_rows/tsm_system_rows--1.0.sql b/contrib/tsm_system_rows/tsm_system_rows--1.0.sql
index 1a29c584b5a8386180083120e25fb80edd8004e7..de508ed72675fe3cb51fd1dce770227b97d31acc 100644
--- a/contrib/tsm_system_rows/tsm_system_rows--1.0.sql
+++ b/contrib/tsm_system_rows/tsm_system_rows--1.0.sql
@@ -1,44 +1,9 @@
-/* src/test/modules/tablesample/tsm_system_rows--1.0.sql */
+/* contrib/tsm_system_rows/tsm_system_rows--1.0.sql */
 -- complain if script is sourced in psql, rather than via CREATE EXTENSION
 \echo Use "CREATE EXTENSION tsm_system_rows" to load this file. \quit
-CREATE FUNCTION tsm_system_rows_init(internal, int4, int4)
+CREATE FUNCTION system_rows(internal)
+RETURNS tsm_handler
+AS 'MODULE_PATHNAME', 'tsm_system_rows_handler'
-CREATE FUNCTION tsm_system_rows_nextblock(internal)
-CREATE FUNCTION tsm_system_rows_nexttuple(internal, int4, int2)
-CREATE FUNCTION tsm_system_rows_examinetuple(internal, int4, internal, bool)
-CREATE FUNCTION tsm_system_rows_end(internal)
-CREATE FUNCTION tsm_system_rows_reset(internal)
-CREATE FUNCTION tsm_system_rows_cost(internal, internal, internal, internal, internal, internal, internal)
-INSERT INTO pg_tablesample_method VALUES('system_rows', false, true,
-	'tsm_system_rows_init', 'tsm_system_rows_nextblock',
-	'tsm_system_rows_nexttuple', 'tsm_system_rows_examinetuple',
-	'tsm_system_rows_end', 'tsm_system_rows_reset', 'tsm_system_rows_cost');
diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c
index e325eaff498972b46595b3be60b2f5ce92c8ed6d..f251e3e5e06dd9416d5a8bac14417cfa70311ca2 100644
--- a/contrib/tsm_system_rows/tsm_system_rows.c
+++ b/contrib/tsm_system_rows/tsm_system_rows.c
@@ -1,240 +1,356 @@
  * tsm_system_rows.c
- *	  interface routines for system_rows tablesample method
+ *	  support routines for SYSTEM_ROWS tablesample method
+ * The desire here is to produce a random sample with a given number of rows
+ * (or the whole relation, if that is fewer rows).  We use a block-sampling
+ * approach.  To ensure that the whole relation will be visited if necessary,
+ * we start at a randomly chosen block and then advance with a stride that
+ * is randomly chosen but is relatively prime to the relation's nblocks.
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Because of the dependence on nblocks, this method cannot be repeatable
+ * across queries.  (Even if the user hasn't explicitly changed the relation,
+ * maintenance activities such as autovacuum might change nblocks.)  However,
+ * we can at least make it repeatable across scans, by determining the
+ * sampling pattern only once on the first scan.  This means that rescans
+ * won't visit blocks added after the first scan, but that is fine since
+ * such blocks shouldn't contain any visible tuples anyway.
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
- *	  contrib/tsm_system_rows_rowlimit/tsm_system_rows.c
+ *	  contrib/tsm_system_rows/tsm_system_rows.c
 #include "postgres.h"
-#include "fmgr.h"
-#include "access/tablesample.h"
 #include "access/relscan.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
 #include "miscadmin.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
 #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
+#include "optimizer/cost.h"
 #include "utils/sampling.h"
- * State
- */
+/* Private state */
 typedef struct
-	SamplerRandomState randstate;
 	uint32		seed;			/* random seed */
-	BlockNumber nblocks;		/* number of block in relation */
-	int32		ntuples;		/* number of tuples to return */
-	int32		donetuples;		/* tuples already returned */
+	int64		ntuples;		/* number of tuples to return */
+	int64		donetuples;		/* number of tuples already returned */
 	OffsetNumber lt;			/* last tuple returned from current block */
-	BlockNumber step;			/* step size */
+	BlockNumber doneblocks;		/* number of already-scanned blocks */
 	BlockNumber lb;				/* last block visited */
-	BlockNumber doneblocks;		/* number of already returned blocks */
-} SystemSamplerData;
+	/* these three values are not changed during a rescan: */
+	BlockNumber nblocks;		/* number of blocks in relation */
+	BlockNumber firstblock;		/* first block to sample from */
+	BlockNumber step;			/* step size, or 0 if not set yet */
+} SystemRowsSamplerData;
+static void system_rows_samplescangetsamplesize(PlannerInfo *root,
+									RelOptInfo *baserel,
+									List *paramexprs,
+									BlockNumber *pages,
+									double *tuples);
+static void system_rows_initsamplescan(SampleScanState *node,
+						   int eflags);
+static void system_rows_beginsamplescan(SampleScanState *node,
+							Datum *params,
+							int nparams,
+							uint32 seed);
+static BlockNumber system_rows_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_rows_nextsampletuple(SampleScanState *node,
+							BlockNumber blockno,
+							OffsetNumber maxoffset);
+static bool SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan);
 static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
- * Initializes the state.
+ * Create a TsmRoutine descriptor for the SYSTEM_ROWS method.
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	uint32		seed = PG_GETARG_UINT32(1);
-	int32		ntuples = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
-	HeapScanDesc scan = tsdesc->heapScan;
-	SystemSamplerData *sampler;
+	TsmRoutine *tsm = makeNode(TsmRoutine);
-	if (ntuples < 1)
-		ereport(ERROR,
-				 errmsg("invalid sample size"),
-				 errhint("Sample size must be positive integer value.")));
+	tsm->parameterTypes = list_make1_oid(INT8OID);
-	sampler = palloc0(sizeof(SystemSamplerData));
+	/* See notes at head of file */
+	tsm->repeatable_across_queries = false;
+	tsm->repeatable_across_scans = true;
-	/* Remember initial values for reinit */
-	sampler->seed = seed;
-	sampler->nblocks = scan->rs_nblocks;
-	sampler->ntuples = ntuples;
-	sampler->donetuples = 0;
-	sampler->lt = InvalidOffsetNumber;
-	sampler->doneblocks = 0;
-	sampler_random_init_state(sampler->seed, sampler->randstate);
-	/* Find relative prime as step size for linear probing. */
-	sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-	/*
-	 * Randomize start position so that blocks close to step size don't have
-	 * higher probability of being chosen on very short scan.
-	 */
-	sampler->lb = sampler_random_fract(sampler->randstate) *
-		(sampler->nblocks / sampler->step);
+	tsm->SampleScanGetSampleSize = system_rows_samplescangetsamplesize;
+	tsm->InitSampleScan = system_rows_initsamplescan;
+	tsm->BeginSampleScan = system_rows_beginsamplescan;
+	tsm->NextSampleBlock = system_rows_nextsampleblock;
+	tsm->NextSampleTuple = system_rows_nextsampletuple;
+	tsm->EndSampleScan = NULL;
-	tsdesc->tsmdata = (void *) sampler;
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses linear probing algorithm for picking next block.
+ * Sample size estimation.
+static void
+system_rows_samplescangetsamplesize(PlannerInfo *root,
+									RelOptInfo *baserel,
+									List *paramexprs,
+									BlockNumber *pages,
+									double *tuples)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+	Node	   *limitnode;
+	int64		ntuples;
+	double		npages;
-	sampler->lb = (sampler->lb + sampler->step) % sampler->nblocks;
-	sampler->doneblocks++;
+	/* Try to extract an estimate for the limit rowcount */
+	limitnode = (Node *) linitial(paramexprs);
+	limitnode = estimate_expression_value(root, limitnode);
-	/* All blocks have been read, we're done */
-	if (sampler->doneblocks > sampler->nblocks ||
-		sampler->donetuples >= sampler->ntuples)
-		PG_RETURN_UINT32(InvalidBlockNumber);
+	if (IsA(limitnode, Const) &&
+		!((Const *) limitnode)->constisnull)
+	{
+		ntuples = DatumGetInt64(((Const *) limitnode)->constvalue);
+		if (ntuples < 0)
+		{
+			/* Default ntuples if the value is bogus */
+			ntuples = 1000;
+		}
+	}
+	else
+	{
+		/* Default ntuples if we didn't obtain a non-null Const */
+		ntuples = 1000;
+	}
-	PG_RETURN_UINT32(sampler->lb);
+	/* Clamp to the estimated relation size */
+	if (ntuples > baserel->tuples)
+		ntuples = (int64) baserel->tuples;
+	ntuples = clamp_row_est(ntuples);
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
- */
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-	SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-	OffsetNumber tupoffset = sampler->lt;
+	if (baserel->tuples > 0 && baserel->pages > 0)
+	{
+		/* Estimate number of pages visited based on tuple density */
+		double		density = baserel->tuples / (double) baserel->pages;
-	if (tupoffset == InvalidOffsetNumber)
-		tupoffset = FirstOffsetNumber;
+		npages = ntuples / density;
+	}
-		tupoffset++;
-	if (tupoffset > maxoffset ||
-		sampler->donetuples >= sampler->ntuples)
-		tupoffset = InvalidOffsetNumber;
+	{
+		/* For lack of data, assume one tuple per page */
+		npages = ntuples;
+	}
-	sampler->lt = tupoffset;
+	/* Clamp to sane value */
+	npages = clamp_row_est(Min((double) baserel->pages, npages));
-	PG_RETURN_UINT16(tupoffset);
+	*pages = npages;
+	*tuples = ntuples;
- * Examine tuple and decide if it should be returned.
+ * Initialize during executor setup.
+static void
+system_rows_initsamplescan(SampleScanState *node, int eflags)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	bool		visible = PG_GETARG_BOOL(3);
-	SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-	if (!visible)
-		PG_RETURN_BOOL(false);
-	sampler->donetuples++;
+	node->tsm_state = palloc0(sizeof(SystemRowsSamplerData));
+	/* Note the above leaves tsm_state->step equal to zero */
- * Cleanup method.
+ * Examine parameters and prepare for a sample scan.
+static void
+system_rows_beginsamplescan(SampleScanState *node,
+							Datum *params,
+							int nparams,
+							uint32 seed)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+	SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+	int64		ntuples = DatumGetInt64(params[0]);
+	if (ntuples < 0)
+		ereport(ERROR,
+				 errmsg("sample size must not be negative")));
-	pfree(tsdesc->tsmdata);
+	sampler->seed = seed;
+	sampler->ntuples = ntuples;
+	sampler->donetuples = 0;
+	sampler->lt = InvalidOffsetNumber;
+	sampler->doneblocks = 0;
+	/* lb will be initialized during first NextSampleBlock call */
+	/* we intentionally do not change nblocks/firstblock/step here */
+	/*
+	 * We *must* use pagemode visibility checking in this module, so force
+	 * that even though it's currently default.
+	 */
+	node->use_pagemode = true;
- * Reset state (called by ReScan).
+ * Select next block to sample.
+ *
+ * Uses linear probing algorithm for picking next block.
+static BlockNumber
+system_rows_nextsampleblock(SampleScanState *node)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+	SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+	HeapScanDesc scan = node->ss.ss_currentScanDesc;
-	sampler->lt = InvalidOffsetNumber;
-	sampler->donetuples = 0;
-	sampler->doneblocks = 0;
+	/* First call within scan? */
+	if (sampler->doneblocks == 0)
+	{
+		/* First scan within query? */
+		if (sampler->step == 0)
+		{
+			/* Initialize now that we have scan descriptor */
+			SamplerRandomState randstate;
+			/* If relation is empty, there's nothing to scan */
+			if (scan->rs_nblocks == 0)
+				return InvalidBlockNumber;
+			/* We only need an RNG during this setup step */
+			sampler_random_init_state(sampler->seed, randstate);
+			/* Compute nblocks/firstblock/step only once per query */
+			sampler->nblocks = scan->rs_nblocks;
-	sampler_random_init_state(sampler->seed, sampler->randstate);
-	sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-	sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
+			/* Choose random starting block within the relation */
+			/* (Actually this is the predecessor of the first block visited) */
+			sampler->firstblock = sampler_random_fract(randstate) *
+				sampler->nblocks;
+			/* Find relative prime as step size for linear probing */
+			sampler->step = random_relative_prime(sampler->nblocks, randstate);
+		}
+		/* Reinitialize lb */
+		sampler->lb = sampler->firstblock;
+	}
+	/* If we've read all blocks or returned all needed tuples, we're done */
+	if (++sampler->doneblocks > sampler->nblocks ||
+		sampler->donetuples >= sampler->ntuples)
+		return InvalidBlockNumber;
+	/*
+	 * It's probably impossible for scan->rs_nblocks to decrease between scans
+	 * within a query; but just in case, loop until we select a block number
+	 * less than scan->rs_nblocks.  We don't care if scan->rs_nblocks has
+	 * increased since the first scan.
+	 */
+	do
+	{
+		/* Advance lb, using uint64 arithmetic to forestall overflow */
+		sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
+	} while (sampler->lb >= scan->rs_nblocks);
+	return sampler->lb;
- * Costing function.
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+static OffsetNumber
+system_rows_nextsampletuple(SampleScanState *node,
+							BlockNumber blockno,
+							OffsetNumber maxoffset)
-	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-	Path	   *path = (Path *) PG_GETARG_POINTER(1);
-	RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-	List	   *args = (List *) PG_GETARG_POINTER(3);
-	BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-	double	   *tuples = (double *) PG_GETARG_POINTER(5);
-	Node	   *limitnode;
-	int32		ntuples;
+	SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
+	HeapScanDesc scan = node->ss.ss_currentScanDesc;
+	OffsetNumber tupoffset = sampler->lt;
-	limitnode = linitial(args);
-	limitnode = estimate_expression_value(root, limitnode);
+	/* Quit if we've returned all needed tuples */
+	if (sampler->donetuples >= sampler->ntuples)
+		return InvalidOffsetNumber;
-	if (IsA(limitnode, RelabelType))
-		limitnode = (Node *) ((RelabelType *) limitnode)->arg;
+	/*
+	 * Because we should only count visible tuples as being returned, we need
+	 * to search for a visible tuple rather than just let the core code do it.
+	 */
-	if (IsA(limitnode, Const))
-		ntuples = DatumGetInt32(((Const *) limitnode)->constvalue);
-	else
+	/* We rely on the data accumulated in pagemode access */
+	Assert(scan->rs_pageatatime);
+	for (;;)
-		/* Default ntuples if the estimation didn't return Const. */
-		ntuples = 1000;
+		/* Advance to next possible offset on page */
+		if (tupoffset == InvalidOffsetNumber)
+			tupoffset = FirstOffsetNumber;
+		else
+			tupoffset++;
+		/* Done? */
+		if (tupoffset > maxoffset)
+		{
+			tupoffset = InvalidOffsetNumber;
+			break;
+		}
+		/* Found a candidate? */
+		if (SampleOffsetVisible(tupoffset, scan))
+		{
+			sampler->donetuples++;
+			break;
+		}
-	*pages = Min(baserel->pages, ntuples);
-	*tuples = ntuples;
-	path->rows = *tuples;
+	sampler->lt = tupoffset;
+	return tupoffset;
+ * Check if tuple offset is visible
+ *
+ * In pageatatime mode, heapgetpage() already did visibility checks,
+ * so just look at the info it left in rs_vistuples[].
+ */
+static bool
+SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan)
+	int			start = 0,
+				end = scan->rs_ntuples - 1;
+	while (start <= end)
+	{
+		int			mid = (start + end) / 2;
+		OffsetNumber curoffset = scan->rs_vistuples[mid];
+		if (tupoffset == curoffset)
+			return true;
+		else if (tupoffset < curoffset)
+			end = mid - 1;
+		else
+			start = mid + 1;
+	}
+	return false;
+ * Compute greatest common divisor of two uint32's.
+ */
 static uint32
 gcd(uint32 a, uint32 b)
@@ -250,22 +366,29 @@ gcd(uint32 a, uint32 b)
 	return b;
+ * Pick a random value less than and relatively prime to n, if possible
+ * (else return 1).
+ */
 static uint32
 random_relative_prime(uint32 n, SamplerRandomState randstate)
-	/* Pick random starting number, with some limits on what it can be. */
-	uint32		r = (uint32) sampler_random_fract(randstate) * n / 2 + n / 4,
-				t;
+	uint32		r;
+	/* Safety check to avoid infinite loop or zero result for small n. */
+	if (n <= 1)
+		return 1;
 	 * This should only take 2 or 3 iterations as the probability of 2 numbers
-	 * being relatively prime is ~61%.
+	 * being relatively prime is ~61%; but just in case, we'll include a
+	 * CHECK_FOR_INTERRUPTS in the loop.
-	while ((t = gcd(r, n)) > 1)
+	do
-		r /= t;
-	}
+		r = (uint32) (sampler_random_fract(randstate) * n);
+	} while (r == 0 || gcd(r, n) > 1);
 	return r;
diff --git a/contrib/tsm_system_rows/tsm_system_rows.control b/contrib/tsm_system_rows/tsm_system_rows.control
index 84ea7adb49a261247b936ba4c884b2edb165529a..4bd0232f97215933516bf5ea801a49f43f8c0d6c 100644
--- a/contrib/tsm_system_rows/tsm_system_rows.control
+++ b/contrib/tsm_system_rows/tsm_system_rows.control
@@ -1,5 +1,5 @@
 # tsm_system_rows extension
-comment = 'SYSTEM TABLESAMPLE method which accepts number rows as a limit'
+comment = 'TABLESAMPLE method which accepts number of rows as a limit'
 default_version = '1.0'
 module_pathname = '$libdir/tsm_system_rows'
 relocatable = true
diff --git a/contrib/tsm_system_time/Makefile b/contrib/tsm_system_time/Makefile
index c42c1c6bb61f22f1e01925c02e41ecba53f184e6..168becf54e2ff225a583437d87d590602b6677e4 100644
--- a/contrib/tsm_system_time/Makefile
+++ b/contrib/tsm_system_time/Makefile
@@ -1,8 +1,8 @@
-# src/test/modules/tsm_system_time/Makefile
+# contrib/tsm_system_time/Makefile
 MODULE_big = tsm_system_time
 OBJS = tsm_system_time.o $(WIN32RES)
-PGFILEDESC = "tsm_system_time - SYSTEM TABLESAMPLE method which accepts number rows of as a limit"
+PGFILEDESC = "tsm_system_time - TABLESAMPLE method which accepts time in milliseconds as a limit"
 EXTENSION = tsm_system_time
 DATA = tsm_system_time--1.0.sql
diff --git a/contrib/tsm_system_time/expected/tsm_system_time.out b/contrib/tsm_system_time/expected/tsm_system_time.out
index 32ad03c4bdcef47e9bfce5ed7ab2060b528e77bc..ac44f30be90386407273b213239aa234a5dd0bd1 100644
--- a/contrib/tsm_system_time/expected/tsm_system_time.out
+++ b/contrib/tsm_system_time/expected/tsm_system_time.out
@@ -1,54 +1,100 @@
 CREATE EXTENSION tsm_system_time;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
 ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
+-- It's a bit tricky to test SYSTEM_TIME in a platform-independent way.
+-- We can test the zero-time corner case ...
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (0);
-    31
+     0
 (1 row)
-SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
- id 
-  7
- 14
- 21
- 28
-  4
- 11
- 18
- 25
-  1
-  8
- 15
- 22
- 29
-  5
- 12
- 19
- 26
-  2
-  9
- 16
- 23
- 30
-  6
- 13
- 20
- 27
-  3
- 10
- 17
- 24
-  0
-(31 rows)
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
-                                     QUERY PLAN                                     
- Sample Scan (system_time) on test_tablesample  (cost=0.00..100.25 rows=25 width=4)
+-- ... and we assume that this will finish before running out of time:
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (100000);
+ count 
+    31
 (1 row)
--- done
-DROP TABLE test_tablesample CASCADE;
+-- bad parameters should get through planning, but not execution:
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+                    QUERY PLAN                    
+ Sample Scan on test_tablesample
+   Sampling: system_time ('-1'::double precision)
+(2 rows)
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+ERROR:  sample collection time must not be negative
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_time (10) REPEATABLE (0);
+ERROR:  tablesample method system_time does not support REPEATABLE
+LINE 1: SELECT * FROM test_tablesample TABLESAMPLE system_time (10) ...
+                                                   ^
+-- since it's not repeatable, we expect a Materialize node in these plans:
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+                               QUERY PLAN                               
+ Nested Loop
+   ->  Aggregate
+         ->  Materialize
+               ->  Sample Scan on test_tablesample
+                     Sampling: system_time ('100000'::double precision)
+   ->  Values Scan on "*VALUES*"
+(6 rows)
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+  time  | count 
+      0 |    31
+ 100000 |    31
+(2 rows)
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+                           QUERY PLAN                           
+ Nested Loop
+   ->  Values Scan on "*VALUES*"
+   ->  Aggregate
+         ->  Materialize
+               ->  Sample Scan on test_tablesample
+                     Sampling: system_time ("*VALUES*".column1)
+(6 rows)
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+  time  | count 
+      0 |     0
+ 100000 |    31
+(2 rows)
+  SELECT * FROM test_tablesample TABLESAMPLE system_time (20);
+                    QUERY PLAN                    
+ Sample Scan on test_tablesample
+   Sampling: system_time ('20'::double precision)
+(2 rows)
+DROP EXTENSION tsm_system_time;  -- fail, view depends on extension
+ERROR:  cannot drop extension tsm_system_time because other objects depend on it
+DETAIL:  view vv depends on function system_time(internal)
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
diff --git a/contrib/tsm_system_time/sql/tsm_system_time.sql b/contrib/tsm_system_time/sql/tsm_system_time.sql
index 68dbbf98afd2df5f66e0aa5258c4e00166613d33..117de163d85059f362cda878ef94f5f6b43aa65e 100644
--- a/contrib/tsm_system_time/sql/tsm_system_time.sql
+++ b/contrib/tsm_system_time/sql/tsm_system_time.sql
@@ -1,14 +1,51 @@
 CREATE EXTENSION tsm_system_time;
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000) FROM generate_series(0, 30) s(i) ORDER BY i;
+CREATE TABLE test_tablesample (id int, name text);
+INSERT INTO test_tablesample SELECT i, repeat(i::text, 1000)
+  FROM generate_series(0, 30) s(i);
 ANALYZE test_tablesample;
-SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (1000);
-SELECT id FROM test_tablesample TABLESAMPLE system_time (1000) REPEATABLE (5432);
+-- It's a bit tricky to test SYSTEM_TIME in a platform-independent way.
+-- We can test the zero-time corner case ...
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (0);
+-- ... and we assume that this will finish before running out of time:
+SELECT count(*) FROM test_tablesample TABLESAMPLE system_time (100000);
+-- bad parameters should get through planning, but not execution:
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+SELECT id FROM test_tablesample TABLESAMPLE system_time (-1);
+-- fail, this method is not repeatable:
+SELECT * FROM test_tablesample TABLESAMPLE system_time (10) REPEATABLE (0);
+-- since it's not repeatable, we expect a Materialize node in these plans:
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (100000)) ss;
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+  (VALUES (0),(100000)) v(time),
+  LATERAL (SELECT COUNT(*) FROM test_tablesample
+           TABLESAMPLE system_time (time)) ss;
+  SELECT * FROM test_tablesample TABLESAMPLE system_time (20);
-EXPLAIN SELECT id FROM test_tablesample TABLESAMPLE system_time (100) REPEATABLE (10);
--- done
-DROP TABLE test_tablesample CASCADE;
+DROP EXTENSION tsm_system_time;  -- fail, view depends on extension
diff --git a/contrib/tsm_system_time/tsm_system_time--1.0.sql b/contrib/tsm_system_time/tsm_system_time--1.0.sql
index 1f390d6ed7acac601567e67a42fdbc2220802ac8..c59d2e84efdabfa9e9163c036eb702e8ad981d26 100644
--- a/contrib/tsm_system_time/tsm_system_time--1.0.sql
+++ b/contrib/tsm_system_time/tsm_system_time--1.0.sql
@@ -1,39 +1,9 @@
-/* src/test/modules/tablesample/tsm_system_time--1.0.sql */
+/* contrib/tsm_system_time/tsm_system_time--1.0.sql */
 -- complain if script is sourced in psql, rather than via CREATE EXTENSION
 \echo Use "CREATE EXTENSION tsm_system_time" to load this file. \quit
-CREATE FUNCTION tsm_system_time_init(internal, int4, int4)
+CREATE FUNCTION system_time(internal)
+RETURNS tsm_handler
+AS 'MODULE_PATHNAME', 'tsm_system_time_handler'
-CREATE FUNCTION tsm_system_time_nextblock(internal)
-CREATE FUNCTION tsm_system_time_nexttuple(internal, int4, int2)
-CREATE FUNCTION tsm_system_time_end(internal)
-CREATE FUNCTION tsm_system_time_reset(internal)
-CREATE FUNCTION tsm_system_time_cost(internal, internal, internal, internal, internal, internal, internal)
-INSERT INTO pg_tablesample_method VALUES('system_time', false, true,
-	'tsm_system_time_init', 'tsm_system_time_nextblock',
-	'tsm_system_time_nexttuple', '-', 'tsm_system_time_end',
-	'tsm_system_time_reset', 'tsm_system_time_cost');
diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c
index 7708fc07617488e9a57128a72eba9707004dc9f3..83f1455c5fa248b3028e095acd8ceedd6ae4c9e1 100644
--- a/contrib/tsm_system_time/tsm_system_time.c
+++ b/contrib/tsm_system_time/tsm_system_time.c
@@ -1,286 +1,320 @@
  * tsm_system_time.c
- *	  interface routines for system_time tablesample method
+ *	  support routines for SYSTEM_TIME tablesample method
+ * The desire here is to produce a random sample with as many rows as possible
+ * in no more than the specified amount of time.  We use a block-sampling
+ * approach.  To ensure that the whole relation will be visited if necessary,
+ * we start at a randomly chosen block and then advance with a stride that
+ * is randomly chosen but is relatively prime to the relation's nblocks.
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Because of the time dependence, this method is necessarily unrepeatable.
+ * However, we do what we can to reduce surprising behavior by selecting
+ * the sampling pattern just once per query, much as in tsm_system_rows.
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
- *	  contrib/tsm_system_time_rowlimit/tsm_system_time.c
+ *	  contrib/tsm_system_time/tsm_system_time.c
 #include "postgres.h"
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h>				/* for _isnan */
+#include <math.h>
-#include "access/tablesample.h"
 #include "access/relscan.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
 #include "miscadmin.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
 #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
+#include "optimizer/cost.h"
 #include "utils/sampling.h"
 #include "utils/spccache.h"
-#include "utils/timestamp.h"
- * State
- */
+/* Private state */
 typedef struct
-	SamplerRandomState randstate;
 	uint32		seed;			/* random seed */
-	BlockNumber nblocks;		/* number of block in relation */
-	int32		time;			/* time limit for sampling */
-	TimestampTz start_time;		/* start time of sampling */
-	TimestampTz end_time;		/* end time of sampling */
+	double		millis;			/* time limit for sampling */
+	instr_time	start_time;		/* scan start time */
 	OffsetNumber lt;			/* last tuple returned from current block */
-	BlockNumber step;			/* step size */
+	BlockNumber doneblocks;		/* number of already-scanned blocks */
 	BlockNumber lb;				/* last block visited */
-	BlockNumber estblocks;		/* estimated number of returned blocks
-								 * (moving) */
-	BlockNumber doneblocks;		/* number of already returned blocks */
-} SystemSamplerData;
+	/* these three values are not changed during a rescan: */
+	BlockNumber nblocks;		/* number of blocks in relation */
+	BlockNumber firstblock;		/* first block to sample from */
+	BlockNumber step;			/* step size, or 0 if not set yet */
+} SystemTimeSamplerData;
+static void system_time_samplescangetsamplesize(PlannerInfo *root,
+									RelOptInfo *baserel,
+									List *paramexprs,
+									BlockNumber *pages,
+									double *tuples);
+static void system_time_initsamplescan(SampleScanState *node,
+						   int eflags);
+static void system_time_beginsamplescan(SampleScanState *node,
+							Datum *params,
+							int nparams,
+							uint32 seed);
+static BlockNumber system_time_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_time_nextsampletuple(SampleScanState *node,
+							BlockNumber blockno,
+							OffsetNumber maxoffset);
 static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
- * Initializes the state.
+ * Create a TsmRoutine descriptor for the SYSTEM_TIME method.
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	uint32		seed = PG_GETARG_UINT32(1);
-	int32		time = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
-	HeapScanDesc scan = tsdesc->heapScan;
-	SystemSamplerData *sampler;
-	if (time < 1)
-		ereport(ERROR,
-				 errmsg("invalid time limit"),
-				 errhint("Time limit must be positive integer value.")));
+	TsmRoutine *tsm = makeNode(TsmRoutine);
-	sampler = palloc0(sizeof(SystemSamplerData));
+	tsm->parameterTypes = list_make1_oid(FLOAT8OID);
-	/* Remember initial values for reinit */
-	sampler->seed = seed;
-	sampler->nblocks = scan->rs_nblocks;
-	sampler->lt = InvalidOffsetNumber;
-	sampler->estblocks = 2;
-	sampler->doneblocks = 0;
-	sampler->time = time;
-	sampler->start_time = GetCurrentTimestamp();
-	sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time,
-													sampler->time);
+	/* See notes at head of file */
+	tsm->repeatable_across_queries = false;
+	tsm->repeatable_across_scans = false;
-	sampler_random_init_state(sampler->seed, sampler->randstate);
+	tsm->SampleScanGetSampleSize = system_time_samplescangetsamplesize;
+	tsm->InitSampleScan = system_time_initsamplescan;
+	tsm->BeginSampleScan = system_time_beginsamplescan;
+	tsm->NextSampleBlock = system_time_nextsampleblock;
+	tsm->NextSampleTuple = system_time_nextsampletuple;
+	tsm->EndSampleScan = NULL;
-	/* Find relative prime as step size for linear probing. */
-	sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-	/*
-	 * Randomize start position so that blocks close to step size don't have
-	 * higher probability of being chosen on very short scan.
-	 */
-	sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
-	tsdesc->tsmdata = (void *) sampler;
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses linear probing algorithm for picking next block.
+ * Sample size estimation.
+static void
+system_time_samplescangetsamplesize(PlannerInfo *root,
+									RelOptInfo *baserel,
+									List *paramexprs,
+									BlockNumber *pages,
+									double *tuples)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-	sampler->lb = (sampler->lb + sampler->step) % sampler->nblocks;
-	sampler->doneblocks++;
+	Node	   *limitnode;
+	double		millis;
+	double		spc_random_page_cost;
+	double		npages;
+	double		ntuples;
-	/* All blocks have been read, we're done */
-	if (sampler->doneblocks > sampler->nblocks)
-		PG_RETURN_UINT32(InvalidBlockNumber);
+	/* Try to extract an estimate for the limit time spec */
+	limitnode = (Node *) linitial(paramexprs);
+	limitnode = estimate_expression_value(root, limitnode);
-	/*
-	 * Update the estimations for time limit at least 10 times per estimated
-	 * number of returned blocks to handle variations in block read speed.
-	 */
-	if (sampler->doneblocks % Max(sampler->estblocks / 10, 1) == 0)
+	if (IsA(limitnode, Const) &&
+		!((Const *) limitnode)->constisnull)
+	{
+		millis = DatumGetFloat8(((Const *) limitnode)->constvalue);
+		if (millis < 0 || isnan(millis))
+		{
+			/* Default millis if the value is bogus */
+			millis = 1000;
+		}
+	}
+	else
-		TimestampTz now = GetCurrentTimestamp();
-		long		secs;
-		int			usecs;
-		int			usecs_remaining;
-		int			time_per_block;
+		/* Default millis if we didn't obtain a non-null Const */
+		millis = 1000;
+	}
-		TimestampDifference(sampler->start_time, now, &secs, &usecs);
-		usecs += (int) secs *1000000;
+	/* Get the planner's idea of cost per page read */
+	get_tablespace_page_costs(baserel->reltablespace,
+							  &spc_random_page_cost,
+							  NULL);
-		time_per_block = usecs / sampler->doneblocks;
+	/*
+	 * Estimate the number of pages we can read by assuming that the cost
+	 * figure is expressed in milliseconds.  This is completely, unmistakably
+	 * bogus, but we have to do something to produce an estimate and there's
+	 * no better answer.
+	 */
+	if (spc_random_page_cost > 0)
+		npages = millis / spc_random_page_cost;
+	else
+		npages = millis;		/* even more bogus, but whatcha gonna do? */
-		/* No time left, end. */
-		TimestampDifference(now, sampler->end_time, &secs, &usecs);
-		if (secs <= 0 && usecs <= 0)
-			PG_RETURN_UINT32(InvalidBlockNumber);
+	/* Clamp to sane value */
+	npages = clamp_row_est(Min((double) baserel->pages, npages));
-		/* Remaining microseconds */
-		usecs_remaining = usecs + (int) secs *1000000;
+	if (baserel->tuples > 0 && baserel->pages > 0)
+	{
+		/* Estimate number of tuples returned based on tuple density */
+		double		density = baserel->tuples / (double) baserel->pages;
-		/* Recalculate estimated returned number of blocks */
-		if (time_per_block < usecs_remaining && time_per_block > 0)
-			sampler->estblocks = sampler->time * time_per_block;
+		ntuples = npages * density;
-	PG_RETURN_UINT32(sampler->lb);
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
- */
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-	SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-	OffsetNumber tupoffset = sampler->lt;
-	if (tupoffset == InvalidOffsetNumber)
-		tupoffset = FirstOffsetNumber;
-		tupoffset++;
-	if (tupoffset > maxoffset)
-		tupoffset = InvalidOffsetNumber;
+	{
+		/* For lack of data, assume one tuple per page */
+		ntuples = npages;
+	}
-	sampler->lt = tupoffset;
+	/* Clamp to the estimated relation size */
+	ntuples = clamp_row_est(Min(baserel->tuples, ntuples));
-	PG_RETURN_UINT16(tupoffset);
+	*pages = npages;
+	*tuples = ntuples;
- * Cleanup method.
+ * Initialize during executor setup.
+static void
+system_time_initsamplescan(SampleScanState *node, int eflags)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	pfree(tsdesc->tsmdata);
+	node->tsm_state = palloc0(sizeof(SystemTimeSamplerData));
+	/* Note the above leaves tsm_state->step equal to zero */
- * Reset state (called by ReScan).
+ * Examine parameters and prepare for a sample scan.
+static void
+system_time_beginsamplescan(SampleScanState *node,
+							Datum *params,
+							int nparams,
+							uint32 seed)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+	SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+	double		millis = DatumGetFloat8(params[0]);
+	if (millis < 0 || isnan(millis))
+		ereport(ERROR,
+				 errmsg("sample collection time must not be negative")));
+	sampler->seed = seed;
+	sampler->millis = millis;
 	sampler->lt = InvalidOffsetNumber;
-	sampler->start_time = GetCurrentTimestamp();
-	sampler->end_time = TimestampTzPlusMilliseconds(sampler->start_time,
-													sampler->time);
-	sampler->estblocks = 2;
 	sampler->doneblocks = 0;
-	sampler_random_init_state(sampler->seed, sampler->randstate);
-	sampler->step = random_relative_prime(sampler->nblocks, sampler->randstate);
-	sampler->lb = sampler_random_fract(sampler->randstate) * (sampler->nblocks / sampler->step);
+	/* start_time, lb will be initialized during first NextSampleBlock call */
+	/* we intentionally do not change nblocks/firstblock/step here */
- * Costing function.
+ * Select next block to sample.
+ *
+ * Uses linear probing algorithm for picking next block.
+static BlockNumber
+system_time_nextsampleblock(SampleScanState *node)
-	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-	Path	   *path = (Path *) PG_GETARG_POINTER(1);
-	RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-	List	   *args = (List *) PG_GETARG_POINTER(3);
-	BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-	double	   *tuples = (double *) PG_GETARG_POINTER(5);
-	Node	   *limitnode;
-	int32		time;
-	BlockNumber relpages;
-	double		reltuples;
-	double		density;
-	double		spc_random_page_cost;
-	limitnode = linitial(args);
-	limitnode = estimate_expression_value(root, limitnode);
-	if (IsA(limitnode, RelabelType))
-		limitnode = (Node *) ((RelabelType *) limitnode)->arg;
+	SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+	HeapScanDesc scan = node->ss.ss_currentScanDesc;
+	instr_time	cur_time;
-	if (IsA(limitnode, Const))
-		time = DatumGetInt32(((Const *) limitnode)->constvalue);
-	else
+	/* First call within scan? */
+	if (sampler->doneblocks == 0)
-		/* Default time (1s) if the estimation didn't return Const. */
-		time = 1000;
+		/* First scan within query? */
+		if (sampler->step == 0)
+		{
+			/* Initialize now that we have scan descriptor */
+			SamplerRandomState randstate;
+			/* If relation is empty, there's nothing to scan */
+			if (scan->rs_nblocks == 0)
+				return InvalidBlockNumber;
+			/* We only need an RNG during this setup step */
+			sampler_random_init_state(sampler->seed, randstate);
+			/* Compute nblocks/firstblock/step only once per query */
+			sampler->nblocks = scan->rs_nblocks;
+			/* Choose random starting block within the relation */
+			/* (Actually this is the predecessor of the first block visited) */
+			sampler->firstblock = sampler_random_fract(randstate) *
+				sampler->nblocks;
+			/* Find relative prime as step size for linear probing */
+			sampler->step = random_relative_prime(sampler->nblocks, randstate);
+		}
+		/* Reinitialize lb and start_time */
+		sampler->lb = sampler->firstblock;
+		INSTR_TIME_SET_CURRENT(sampler->start_time);
-	relpages = baserel->pages;
-	reltuples = baserel->tuples;
+	/* If we've read all blocks in relation, we're done */
+	if (++sampler->doneblocks > sampler->nblocks)
+		return InvalidBlockNumber;
-	/* estimate the tuple density */
-	if (relpages > 0)
-		density = reltuples / (double) relpages;
-	else
-		density = (BLCKSZ - SizeOfPageHeaderData) / baserel->width;
+	/* If we've used up all the allotted time, we're done */
+	INSTR_TIME_SUBTRACT(cur_time, sampler->start_time);
+	if (INSTR_TIME_GET_MILLISEC(cur_time) >= sampler->millis)
+		return InvalidBlockNumber;
-	 * We equal random page cost value to number of ms it takes to read the
-	 * random page here which is far from accurate but we don't have anything
-	 * better to base our predicted page reads.
+	 * It's probably impossible for scan->rs_nblocks to decrease between scans
+	 * within a query; but just in case, loop until we select a block number
+	 * less than scan->rs_nblocks.  We don't care if scan->rs_nblocks has
+	 * increased since the first scan.
-	get_tablespace_page_costs(baserel->reltablespace,
-							  &spc_random_page_cost,
-							  NULL);
+	do
+	{
+		/* Advance lb, using uint64 arithmetic to forestall overflow */
+		sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
+	} while (sampler->lb >= scan->rs_nblocks);
-	/*
-	 * Assumption here is that we'll never read less than 1% of table pages,
-	 * this is here mainly because it is much less bad to overestimate than
-	 * underestimate and using just spc_random_page_cost will probably lead to
-	 * underestimations in general.
-	 */
-	*pages = Min(baserel->pages, Max(time / spc_random_page_cost, baserel->pages / 100));
-	*tuples = rint(density * (double) *pages * path->rows / baserel->tuples);
-	path->rows = *tuples;
+	return sampler->lb;
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+static OffsetNumber
+system_time_nextsampletuple(SampleScanState *node,
+							BlockNumber blockno,
+							OffsetNumber maxoffset)
+	SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
+	OffsetNumber tupoffset = sampler->lt;
+	/* Advance to next possible offset on page */
+	if (tupoffset == InvalidOffsetNumber)
+		tupoffset = FirstOffsetNumber;
+	else
+		tupoffset++;
+	/* Done? */
+	if (tupoffset > maxoffset)
+		tupoffset = InvalidOffsetNumber;
+	sampler->lt = tupoffset;
+	return tupoffset;
+ * Compute greatest common divisor of two uint32's.
+ */
 static uint32
 gcd(uint32 a, uint32 b)
@@ -296,22 +330,29 @@ gcd(uint32 a, uint32 b)
 	return b;
+ * Pick a random value less than and relatively prime to n, if possible
+ * (else return 1).
+ */
 static uint32
 random_relative_prime(uint32 n, SamplerRandomState randstate)
-	/* Pick random starting number, with some limits on what it can be. */
-	uint32		r = (uint32) sampler_random_fract(randstate) * n / 2 + n / 4,
-				t;
+	uint32		r;
+	/* Safety check to avoid infinite loop or zero result for small n. */
+	if (n <= 1)
+		return 1;
 	 * This should only take 2 or 3 iterations as the probability of 2 numbers
-	 * being relatively prime is ~61%.
+	 * being relatively prime is ~61%; but just in case, we'll include a
+	 * CHECK_FOR_INTERRUPTS in the loop.
-	while ((t = gcd(r, n)) > 1)
+	do
-		r /= t;
-	}
+		r = (uint32) (sampler_random_fract(randstate) * n);
+	} while (r == 0 || gcd(r, n) > 1);
 	return r;
diff --git a/contrib/tsm_system_time/tsm_system_time.control b/contrib/tsm_system_time/tsm_system_time.control
index ebcee19d23a0db519f1597335e80588d7a56b59d..c247987c66d14b9a2cb75fb07bbe623366d4b458 100644
--- a/contrib/tsm_system_time/tsm_system_time.control
+++ b/contrib/tsm_system_time/tsm_system_time.control
@@ -1,5 +1,5 @@
 # tsm_system_time extension
-comment = 'SYSTEM TABLESAMPLE method which accepts time in milliseconds as a limit'
+comment = 'TABLESAMPLE method which accepts time in milliseconds as a limit'
 default_version = '1.0'
 module_pathname = '$libdir/tsm_system_time'
 relocatable = true
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 2c2190f13d373e0ff0567f7052bf73838cc3d770..9096ee5d517de88aff7d8cd0c233cc8862c13b3c 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -278,11 +278,6 @@
       <entry>planner statistics</entry>
-     <row>
-      <entry><link linkend="catalog-pg-tablesample-method"><structname>pg_tablesample_method</structname></link></entry>
-      <entry>table sampling methods</entry>
-     </row>
       <entry><link linkend="catalog-pg-tablespace"><structname>pg_tablespace</structname></link></entry>
       <entry>tablespaces within this database cluster</entry>
@@ -6132,121 +6127,6 @@
- <sect1 id="catalog-pg-tablesample-method">
-  <title><structname>pg_tabesample_method</structname></title>
-  <indexterm zone="catalog-pg-tablesample-method">
-   <primary>pg_am</primary>
-  </indexterm>
-  <para>
-   The catalog <structname>pg_tablesample_method</structname> stores
-   information about table sampling methods which can be used in
-   <command>TABLESAMPLE</command> clause of a <command>SELECT</command>
-   statement.
-  </para>
-  <table>
-   <title><structname>pg_tablesample_method</> Columns</title>
-   <tgroup cols="4">
-    <thead>
-     <row>
-      <entry>Name</entry>
-      <entry>Type</entry>
-      <entry>References</entry>
-      <entry>Description</entry>
-     </row>
-    </thead>
-    <tbody>
-     <row>
-      <entry><structfield>oid</structfield></entry>
-      <entry><type>oid</type></entry>
-      <entry></entry>
-      <entry>Row identifier (hidden attribute; must be explicitly selected)</entry>
-     </row>
-     <row>
-      <entry><structfield>tsmname</structfield></entry>
-      <entry><type>name</type></entry>
-      <entry></entry>
-      <entry>Name of the sampling method</entry>
-     </row>
-     <row>
-      <entry><structfield>tsmseqscan</structfield></entry>
-      <entry><type>bool</type></entry>
-      <entry></entry>
-      <entry>If true, the sampling method scans the whole table sequentially.
-      </entry>
-     </row>
-     <row>
-      <entry><structfield>tsmpagemode</structfield></entry>
-      <entry><type>bool</type></entry>
-      <entry></entry>
-      <entry>If true, the sampling method always reads the pages completely.
-      </entry>
-     </row>
-     <row>
-      <entry><structfield>tsminit</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Initialize the sampling scan</quote> function</entry>
-     </row>
-     <row>
-      <entry><structfield>tsmnextblock</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Get next block number</quote> function</entry>
-     </row>
-     <row>
-      <entry><structfield>tsmnexttuple</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Get next tuple offset</quote> function</entry>
-     </row>
-     <row>
-      <entry><structfield>tsmexaminetuple</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry>Function which examines the tuple contents and decides if to
-        return it, or zero if none</entry>
-     </row>
-     <row>
-      <entry><structfield>tsmend</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>End the sampling scan</quote> function</entry>
-     </row>
-     <row>
-      <entry><structfield>tsmreset</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry><quote>Restart the state of sampling scan</quote> function</entry>
-     </row>
-     <row>
-      <entry><structfield>tsmcost</structfield></entry>
-      <entry><type>regproc</type></entry>
-      <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
-      <entry>Costing function</entry>
-     </row>
-    </tbody>
-   </tgroup>
-  </table>
- </sect1>
  <sect1 id="catalog-pg-tablespace">
diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 8e13555a3aa5518d11eac71c1536bb6acf4032be..8113ddf8179f10e1b0f031d2d4106837656e23e3 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4346,7 +4346,7 @@ SET xmloption TO { DOCUMENT | CONTENT };
     an object identifier.  There are also several alias types for
     <type>oid</>: <type>regproc</>, <type>regprocedure</>,
     <type>regoper</>, <type>regoperator</>, <type>regclass</>,
-    <type>regtype</>, <type>regrole</>, <type>regnamespace</>, 
+    <type>regtype</>, <type>regrole</>, <type>regnamespace</>,
     <type>regconfig</>, and <type>regdictionary</>.
     <xref linkend="datatype-oid-table"> shows an overview.
@@ -4622,6 +4622,10 @@ SELECT * FROM pg_attribute
+   <indexterm zone="datatype-pseudo">
+    <primary>tsm_handler</primary>
+   </indexterm>
    <indexterm zone="datatype-pseudo">
@@ -4716,6 +4720,11 @@ SELECT * FROM pg_attribute
         <entry>A foreign-data wrapper handler is declared to return <type>fdw_handler</>.</entry>
+       <row>
+        <entry><type>tsm_handler</></entry>
+        <entry>A tablesample method handler is declared to return <type>tsm_handler</>.</entry>
+       </row>
         <entry>Identifies a function returning an unspecified row type.</entry>
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index d1703e9c01ff87eb99f48e8e00b3bd5ff0db187f..7e82cdc3b124b870f39ac6d4a8077372d1cd8cd8 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -243,6 +243,7 @@
+  &tablesample-method;
@@ -250,7 +251,6 @@
-  &tablesample-method;
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
index 632d7935cb41fe946cbbd6d356ba927af6c1cf27..44810f4909c06cdfaac8db5a9cf2a0ad5746db50 100644
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -49,7 +49,8 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
 <phrase>where <replaceable class="parameter">from_item</replaceable> can be one of:</phrase>
-    [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ] [ TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ] ]
+    [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
+                [ TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ] ]
     [ LATERAL ] ( <replaceable class="parameter">select</replaceable> ) [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ]
     <replaceable class="parameter">with_query_name</replaceable> [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
     [ LATERAL ] <replaceable class="parameter">function_name</replaceable> ( [ <replaceable class="parameter">argument</replaceable> [, ...] ] )
@@ -325,50 +326,6 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
-     <varlistentry>
-      <term>TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ]</term>
-      <listitem>
-       <para>
-        Table sample clause after
-        <replaceable class="parameter">table_name</replaceable> indicates that
-        a <replaceable class="parameter">sampling_method</replaceable> should
-        be used to retrieve subset of rows in the table.
-        The <replaceable class="parameter">sampling_method</replaceable> can be
-        any sampling method installed in the database. There are currently two
-        sampling methods available in the standard
-        <productname>PostgreSQL</productname> distribution:
-        <itemizedlist>
-         <listitem>
-          <para><literal>SYSTEM</literal></para>
-         </listitem>
-         <listitem>
-          <para><literal>BERNOULLI</literal></para>
-         </listitem>
-        </itemizedlist>
-        Both of these sampling methods currently accept only single argument
-        which is the percent (floating point from 0 to 100) of the rows to
-        be returned.
-        The <literal>SYSTEM</literal> sampling method does block level
-        sampling with each block having the same chance of being selected and
-        returns all rows from each selected block.
-        The <literal>BERNOULLI</literal> scans whole table and returns
-        individual rows with equal probability. Additional sampling methods
-        may be installed in the database via extensions.
-       </para>
-       <para>
-        The optional parameter <literal>REPEATABLE</literal> uses the seed
-        parameter, which can be a number or expression producing a number, as
-        a random seed for sampling. Note that subsequent commands may return
-        different results even if same <literal>REPEATABLE</literal> clause was
-        specified. This happens because <acronym>DML</acronym> statements and
-        maintenance operations such as <command>VACUUM</> may affect physical
-        distribution of data. The <function>setseed()</> function will not
-        affect the sampling result when the <literal>REPEATABLE</literal>
-        parameter is used.
-       </para>
-      </listitem>
-     </varlistentry>
       <term><replaceable class="parameter">alias</replaceable></term>
@@ -387,6 +344,61 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
+     <varlistentry>
+      <term><literal>TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ]</literal></term>
+      <listitem>
+       <para>
+        A <literal>TABLESAMPLE</> clause after
+        a <replaceable class="parameter">table_name</> indicates that the
+        specified <replaceable class="parameter">sampling_method</replaceable>
+        should be used to retrieve a subset of the rows in that table.
+        This sampling precedes the application of any other filters such
+        as <literal>WHERE</> clauses.
+        The standard <productname>PostgreSQL</productname> distribution
+        includes two sampling methods, <literal>BERNOULLI</literal>
+        and <literal>SYSTEM</literal>, and other sampling methods can be
+        installed in the database via extensions.
+       </para>
+       <para>
+        The <literal>BERNOULLI</> and <literal>SYSTEM</> sampling methods
+        each accept a single <replaceable class="parameter">argument</>
+        which is the fraction of the table to sample, expressed as a
+        percentage between 0 and 100.  This argument can be
+        any <type>real</>-valued expression.  (Other sampling methods might
+        accept more or different arguments.)  These two methods each return
+        a randomly-chosen sample of the table that will contain
+        approximately the specified percentage of the table's rows.
+        The <literal>BERNOULLI</literal> method scans the whole table and
+        selects or ignores individual rows independently with the specified
+        probability.
+        The <literal>SYSTEM</literal> method does block-level sampling with
+        each block having the specified chance of being selected; all rows
+        in each selected block are returned.
+        The <literal>SYSTEM</literal> method is significantly faster than
+        the <literal>BERNOULLI</literal> method when small sampling
+        percentages are specified, but it may return a less-random sample of
+        the table as a result of clustering effects.
+       </para>
+       <para>
+        The optional <literal>REPEATABLE</literal> clause specifies
+        a <replaceable class="parameter">seed</> number or expression to use
+        for generating random numbers within the sampling method.  The seed
+        value can be any non-null floating-point value.  Two queries that
+        specify the same seed and <replaceable class="parameter">argument</>
+        values will select the same sample of the table, if the table has
+        not been changed meanwhile.  But different seed values will usually
+        produce different samples.
+        If <literal>REPEATABLE</literal> is not given then a new random
+        sample is selected for each query.
+        Note that some add-on sampling methods do not
+        accept <literal>REPEATABLE</literal>, and will always produce new
+        samples on each use.
+       </para>
+      </listitem>
+     </varlistentry>
       <term><replaceable class="parameter">select</replaceable></term>
@@ -1870,6 +1882,16 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
+  <refsect2>
+   <title><literal>TABLESAMPLE</literal> Clause Restrictions</title>
+   <para>
+    The <literal>TABLESAMPLE</> clause is currently accepted only on
+    regular tables and materialized views.  According to the SQL standard
+    it should be possible to apply it to any <literal>FROM</> item.
+   </para>
+  </refsect2>
    <title>Function Calls in <literal>FROM</literal></title>
@@ -1993,19 +2015,5 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
-  <refsect2>
-   <title><literal>TABLESAMPLE</literal> clause</title>
-   <para>
-    The <literal>TABLESAMPLE</> clause is currently accepted only on physical
-    relations and materialized views.
-   </para>
-   <para>
-    Additional modules allow you to install custom sampling methods and use
-    them instead of the SQL standard methods.
-   </para>
-  </refsect2>
diff --git a/doc/src/sgml/tablesample-method.sgml b/doc/src/sgml/tablesample-method.sgml
index 48eb7fe84ea93e61166d7b689582cce2b2a1720c..22f8bbe19aa4b4c5166a8de98e1c8b26624d0d44 100644
--- a/doc/src/sgml/tablesample-method.sgml
+++ b/doc/src/sgml/tablesample-method.sgml
@@ -1,139 +1,301 @@
 <!-- doc/src/sgml/tablesample-method.sgml -->
 <chapter id="tablesample-method">
- <title>Writing A TABLESAMPLE Sampling Method</title>
+ <title>Writing A Table Sampling Method</title>
  <indexterm zone="tablesample-method">
-  <primary>tablesample method</primary>
+  <primary>table sampling method</primary>
+ </indexterm>
+ <indexterm zone="tablesample-method">
+  <primary><literal>TABLESAMPLE</literal> method</primary>
-  The <command>TABLESAMPLE</command> clause implementation in
-  <productname>PostgreSQL</> supports creating a custom sampling methods.
-  These methods control what sample of the table will be returned when the
-  <command>TABLESAMPLE</command> clause is used.
+  <productname>PostgreSQL</>'s implementation of the <literal>TABLESAMPLE</>
+  clause supports custom table sampling methods, in addition to
+  the <literal>BERNOULLI</> and <literal>SYSTEM</> methods that are required
+  by the SQL standard.  The sampling method determines which rows of the
+  table will be selected when the <literal>TABLESAMPLE</> clause is used.
- <sect1 id="tablesample-method-functions">
-  <title>Tablesample Method Functions</title>
+ <para>
+  At the SQL level, a table sampling method is represented by a single SQL
+  function, typically implemented in C, having the signature
+method_name(internal) RETURNS tsm_handler
+  The name of the function is the same method name appearing in the
+  <literal>TABLESAMPLE</> clause.  The <type>internal</> argument is a dummy
+  (always having value zero) that simply serves to prevent this function from
+  being called directly from a SQL command.
+  The result of the function must be a palloc'd struct of
+  type <type>TsmRoutine</>, which contains pointers to support functions for
+  the sampling method.  These support functions are plain C functions and
+  are not visible or callable at the SQL level.  The support functions are
+  described in <xref linkend="tablesample-support-functions">.
+ </para>
+ <para>
+  In addition to function pointers, the <type>TsmRoutine</> struct must
+  provide these additional fields:
+ </para>
+ <variablelist>
+  <varlistentry>
+   <term><literal>List *parameterTypes</literal></term>
+   <listitem>
+    <para>
+     This is an OID list containing the data type OIDs of the parameter(s)
+     that will be accepted by the <literal>TABLESAMPLE</> clause when this
+     sampling method is used.  For example, for the built-in methods, this
+     list contains a single item with value <literal>FLOAT4OID</>, which
+     represents the sampling percentage.  Custom sampling methods can have
+     more or different parameters.
+    </para>
+   </listitem>
+  </varlistentry>
+  <varlistentry>
+   <term><literal>bool repeatable_across_queries</literal></term>
+   <listitem>
+    <para>
+     If <literal>true</>, the sampling method can deliver identical samples
+     across successive queries, if the same parameters
+     and <literal>REPEATABLE</> seed value are supplied each time and the
+     table contents have not changed.  When this is <literal>false</>,
+     the <literal>REPEATABLE</> clause is not accepted for use with the
+     sampling method.
+    </para>
+   </listitem>
+  </varlistentry>
+  <varlistentry>
+   <term><literal>bool repeatable_across_scans</literal></term>
+   <listitem>
+    <para>
+     If <literal>true</>, the sampling method can deliver identical samples
+     across successive scans in the same query (assuming unchanging
+     parameters, seed value, and snapshot).
+     When this is <literal>false</>, the planner will not select plans that
+     would require scanning the sampled table more than once, since that
+     might result in inconsistent query output.
+    </para>
+   </listitem>
+  </varlistentry>
+ </variablelist>
+ <para>
+  The <type>TsmRoutine</> struct type is declared
+  in <filename>src/include/access/tsmapi.h</>, which see for additional
+  details.
+ </para>
+ <para>
+  The table sampling methods included in the standard distribution are good
+  references when trying to write your own.  Look into
+  the <filename>src/backend/access/tablesample</> subdirectory of the source
+  tree for the built-in sampling methods, and into the <filename>contrib</>
+  subdirectory for add-on methods.
+ </para>
+ <sect1 id="tablesample-support-functions">
+  <title>Sampling Method Support Functions</title>
-   The tablesample method must provide following set of functions:
+   The TSM handler function returns a palloc'd <type>TsmRoutine</> struct
+   containing pointers to the support functions described below.  Most of
+   the functions are required, but some are optional, and those pointers can
+   be NULL.
-tsm_init (TableSampleDesc *desc,
-         uint32 seed, ...);
+SampleScanGetSampleSize (PlannerInfo *root,
+                         RelOptInfo *baserel,
+                         List *paramexprs,
+                         BlockNumber *pages,
+                         double *tuples);
-   Initialize the tablesample scan. The function is called at the beginning
-   of each relation scan.
+   This function is called during planning.  It must estimate the number of
+   relation pages that will be read during a sample scan, and the number of
+   tuples that will be selected by the scan.  (For example, these might be
+   determined by estimating the sampling fraction, and then multiplying
+   the <literal>baserel-&gt;pages</> and <literal>baserel-&gt;tuples</>
+   numbers by that, being sure to round the results to integral values.)
+   The <literal>paramexprs</> list holds the expression(s) that are
+   parameters to the <literal>TABLESAMPLE</> clause.  It is recommended to
+   use <function>estimate_expression_value()</> to try to reduce these
+   expressions to constants, if their values are needed for estimation
+   purposes; but the function must provide size estimates even if they cannot
+   be reduced, and it should not fail even if the values appear invalid
+   (remember that they're only estimates of what the run-time values will be).
+   The <literal>pages</> and <literal>tuples</> parameters are outputs.
-   Note that the first two parameters are required but you can specify
-   additional parameters which then will be used by the <command>TABLESAMPLE</>
-   clause to determine the required user input in the query itself.
-   This means that if your function will specify additional float4 parameter
-   named percent, the user will have to call the tablesample method with
-   expression which evaluates (or can be coerced) to float4.
-   For example this definition:
-tsm_init (TableSampleDesc *desc,
-          uint32 seed, float4 pct);
-Will lead to SQL call like this:
-... TABLESAMPLE yourmethod(0.5) ...
+InitSampleScan (SampleScanState *node,
+                int eflags);
+   Initialize for execution of a SampleScan plan node.
+   This is called during executor startup.
+   It should perform any initialization needed before processing can start.
+   The <structname>SampleScanState</> node has already been created, but
+   its <structfield>tsm_state</> field is NULL.
+   The <function>InitSampleScan</> function can palloc whatever internal
+   state data is needed by the sampling method, and store a pointer to
+   it in <literal>node-&gt;tsm_state</>.
+   Information about the table to scan is accessible through other fields
+   of the <structname>SampleScanState</> node (but note that the
+   <literal>node-&gt;ss.ss_currentScanDesc</> scan descriptor is not set
+   up yet).
+   <literal>eflags</> contains flag bits describing the executor's
+   operating mode for this plan node.
-tsm_nextblock (TableSampleDesc *desc);
-   Returns the block number of next page to be scanned. InvalidBlockNumber
-   should be returned if the sampling has reached end of the relation.
+   When <literal>(eflags &amp; EXEC_FLAG_EXPLAIN_ONLY)</> is true,
+   the scan will not actually be performed, so this function should only do
+   the minimum required to make the node state valid for <command>EXPLAIN</>
+   and <function>EndSampleScan</>.
-tsm_nexttuple (TableSampleDesc *desc, BlockNumber blockno,
-               OffsetNumber maxoffset);
-   Return next tuple offset for the current page. InvalidOffsetNumber should
-   be returned if the sampling has reached end of the page.
+   This function can be omitted (set the pointer to NULL), in which case
+   <function>BeginSampleScan</> must perform all initialization needed
+   by the sampling method.
-tsm_end (TableSampleDesc *desc);
+BeginSampleScan (SampleScanState *node,
+                 Datum *params,
+                 int nparams,
+                 uint32 seed);
-   The scan has finished, cleanup any left over state.
+   Begin execution of a sampling scan.
+   This is called just before the first attempt to fetch a tuple, and
+   may be called again if the scan needs to be restarted.
+   Information about the table to scan is accessible through fields
+   of the <structname>SampleScanState</> node (but note that the
+   <literal>node-&gt;ss.ss_currentScanDesc</> scan descriptor is not set
+   up yet).
+   The <literal>params</> array, of length <literal>nparams</>, contains the
+   values of the parameters supplied in the <literal>TABLESAMPLE</> clause.
+   These will have the number and types specified in the sampling
+   method's <literal>parameterTypes</literal> list, and have been checked
+   to not be null.
+   <literal>seed</> contains a seed to use for any random numbers generated
+   within the sampling method; it is either a hash derived from the
+   <literal>REPEATABLE</> value if one was given, or the result
+   of <literal>random()</> if not.
-tsm_reset (TableSampleDesc *desc);
-   The scan needs to rescan the relation again, reset any tablesample method
-   state.
+   This function may adjust the fields <literal>node-&gt;use_bulkread</>
+   and <literal>node-&gt;use_pagemode</>.
+   If <literal>node-&gt;use_bulkread</> is <literal>true</>, which it is by
+   default, the scan will use a buffer access strategy that encourages
+   recycling buffers after use.  It might be reasonable to set this
+   to <literal>false</> if the scan will visit only a small fraction of the
+   table's pages.
+   If <literal>node-&gt;use_pagemode</> is <literal>true</>, which it is by
+   default, the scan will perform visibility checking in a single pass for
+   all tuples on each visited page.  It might be reasonable to set this
+   to <literal>false</> if the scan will select only a small fraction of the
+   tuples on each visited page.  That will result in fewer tuple visibility
+   checks being performed, though each one will be more expensive because it
+   will require more locking.
+  </para>
+  <para>
+   If the sampling method is
+   marked <literal>repeatable_across_scans</literal>, it must be able to
+   select the same set of tuples during a rescan as it did originally, that is
+   a fresh call of <function>BeginSampleScan</> must lead to selecting the
+   same tuples as before (if the <literal>TABLESAMPLE</> parameters
+   and seed don't change).
-tsm_cost (PlannerInfo *root, Path *path, RelOptInfo *baserel,
-          List *args, BlockNumber *pages, double *tuples);
+NextSampleBlock (SampleScanState *node);
-   This function is used by optimizer to decide best plan and is also used
-   for output of <command>EXPLAIN</>.
+   Returns the block number of the next page to be scanned, or
+   <literal>InvalidBlockNumber</> if no pages remain to be scanned.
-   There is one more function which tablesampling method can implement in order
-   to gain more fine grained control over sampling. This function is optional:
+   This function can be omitted (set the pointer to NULL), in which case
+   the core code will perform a sequential scan of the entire relation.
+   Such a scan can use synchronized scanning, so that the sampling method
+   cannot assume that the relation pages are visited in the same order on
+   each scan.
-tsm_examinetuple (TableSampleDesc *desc, BlockNumber blockno,
-                  HeapTuple tuple, bool visible);
+NextSampleTuple (SampleScanState *node,
+                 BlockNumber blockno,
+                 OffsetNumber maxoffset);
-   Function that enables the sampling method to examine contents of the tuple
-   (for example to collect some internal statistics). The return value of this
-   function is used to determine if the tuple should be returned to client.
-   Note that this function will receive even invisible tuples but it is not
-   allowed to return true for such tuple (if it does,
-   <productname>PostgreSQL</> will raise an error).
+   Returns the offset number of the next tuple to be sampled on the
+   specified page, or <literal>InvalidOffsetNumber</> if no tuples remain to
+   be sampled.  <literal>maxoffset</> is the largest offset number in use
+   on the page.
+  <note>
+   <para>
+    <function>NextSampleTuple</> is not explicitly told which of the offset
+    numbers in the range <literal>1 .. maxoffset</> actually contain valid
+    tuples.  This is not normally a problem since the core code ignores
+    requests to sample missing or invisible tuples; that should not result in
+    any bias in the sample.  However, if necessary, the function can
+    examine <literal>node-&gt;ss.ss_currentScanDesc-&gt;rs_vistuples[]</>
+    to identify which tuples are valid and visible.  (This
+    requires <literal>node-&gt;use_pagemode</> to be <literal>true</>.)
+   </para>
+  </note>
+  <note>
+   <para>
+    <function>NextSampleTuple</> must <emphasis>not</> assume
+    that <literal>blockno</> is the same page number returned by the most
+    recent <function>NextSampleBlock</> call.  It was returned by some
+    previous <function>NextSampleBlock</> call, but the core code is allowed
+    to call <function>NextSampleBlock</> in advance of actually scanning
+    pages, so as to support prefetching.  It is OK to assume that once
+    sampling of a given page begins, successive <function>NextSampleTuple</>
+    calls all refer to the same page until <literal>InvalidOffsetNumber</> is
+    returned.
+   </para>
+  </note>
-  As you can see most of the tablesample method interfaces get the
-  <structname>TableSampleDesc</> as a first parameter. This structure holds
-  state of the current scan and also provides storage for the tablesample
-  method's state. It is defined as following:
-typedef struct TableSampleDesc {
-    HeapScanDesc    heapScan;
-    TupleDesc       tupDesc;
-    void           *tsmdata;
-} TableSampleDesc;
+EndSampleScan (SampleScanState *node);
-  Where <structfield>heapScan</> is the descriptor of the physical table scan.
-  It's possible to get table size info from it. The <structfield>tupDesc</>
-  represents the tuple descriptor of the tuples returned by the scan and passed
-  to the <function>tsm_examinetuple()</> interface. The <structfield>tsmdata</>
-  can be used by tablesample method itself to store any state info it might
-  need during the scan. If used by the method, it should be <function>pfree</>d
-  in <function>tsm_end()</> function.
+   End the scan and release resources.  It is normally not important
+   to release palloc'd memory, but any externally-visible resources
+   should be cleaned up.
+   This function can be omitted (set the pointer to NULL) in the common
+   case where no such resources exist.
diff --git a/doc/src/sgml/tsm-system-rows.sgml b/doc/src/sgml/tsm-system-rows.sgml
index 0c2f1779c9ad8750db938a6edb38480ae25b57cf..93aa5366649bda2b93f19f141982a77b8f1e8a01 100644
--- a/doc/src/sgml/tsm-system-rows.sgml
+++ b/doc/src/sgml/tsm-system-rows.sgml
@@ -8,24 +8,37 @@
-  The <filename>tsm_system_rows</> module provides the tablesample method
-  <literal>SYSTEM_ROWS</literal>, which can be used inside the
-  <command>TABLESAMPLE</command> clause of a <command>SELECT</command>.
+  The <filename>tsm_system_rows</> module provides the table sampling method
+  <literal>SYSTEM_ROWS</literal>, which can be used in
+  the <literal>TABLESAMPLE</> clause of a <xref linkend="sql-select">
+  command.
-  This tablesample method uses a linear probing algorithm to read sample
-  of a table and uses actual number of rows as limit (unlike the
-  <literal>SYSTEM</literal> tablesample method which limits by percentage
-  of a table).
+  This table sampling method accepts a single integer argument that is the
+  maximum number of rows to read.  The resulting sample will always contain
+  exactly that many rows, unless the table does not contain enough rows, in
+  which case the whole table is selected.
+ </para>
+ <para>
+  Like the built-in <literal>SYSTEM</literal> sampling
+  method, <literal>SYSTEM_ROWS</literal> performs block-level sampling, so
+  that the sample is not completely random but may be subject to clustering
+  effects, especially if only a small number of rows are requested.
+ </para>
+ <para>
+  <literal>SYSTEM_ROWS</literal> does not support
+  the <literal>REPEATABLE</literal> clause.
-   Here is an example of selecting sample of a table with
-   <literal>SYSTEM_ROWS</>. First install the extension:
+   Here is an example of selecting a sample of a table with
+   <literal>SYSTEM_ROWS</>.  First install the extension:
@@ -33,8 +46,7 @@ CREATE EXTENSION tsm_system_rows;
-   Then you can use it in <command>SELECT</command> command same way as other
-   tablesample methods:
+   Then you can use it in a <command>SELECT</command> command, for instance:
@@ -42,8 +54,9 @@ SELECT * FROM my_table TABLESAMPLE SYSTEM_ROWS(100);
-   The above command will return a sample of 100 rows from the table my_table
-   (less if the table does not have 100 visible rows).
+   This command will return a sample of 100 rows from the
+   table <structname>my_table</> (unless the table does not have 100
+   visible rows, in which case all its rows are returned).
diff --git a/doc/src/sgml/tsm-system-time.sgml b/doc/src/sgml/tsm-system-time.sgml
index 2343ab16d4f2bf6e3cadd4f599822f27106c26ab..3f8ff1a026f2e1a719dccb2ab7cd29ed6f293139 100644
--- a/doc/src/sgml/tsm-system-time.sgml
+++ b/doc/src/sgml/tsm-system-time.sgml
@@ -8,25 +8,39 @@
-  The <filename>tsm_system_time</> module provides the tablesample method
-  <literal>SYSTEM_TIME</literal>, which can be used inside the
-  <command>TABLESAMPLE</command> clause of a <command>SELECT</command>.
+  The <filename>tsm_system_time</> module provides the table sampling method
+  <literal>SYSTEM_TIME</literal>, which can be used in
+  the <literal>TABLESAMPLE</> clause of a <xref linkend="sql-select">
+  command.
-  This tablesample method uses a linear probing algorithm to read sample
-  of a table and uses time in milliseconds as limit (unlike the
-  <literal>SYSTEM</literal> tablesample method which limits by percentage
-  of a table). This gives you some control over the length of execution
-  of your query.
+  This table sampling method accepts a single floating-point argument that
+  is the maximum number of milliseconds to spend reading the table.  This
+  gives you direct control over how long the query takes, at the price that
+  the size of the sample becomes hard to predict.  The resulting sample will
+  contain as many rows as could be read in the specified time, unless the
+  whole table has been read first.
+ </para>
+ <para>
+  Like the built-in <literal>SYSTEM</literal> sampling
+  method, <literal>SYSTEM_TIME</literal> performs block-level sampling, so
+  that the sample is not completely random but may be subject to clustering
+  effects, especially if only a small number of rows are selected.
+ </para>
+ <para>
+  <literal>SYSTEM_TIME</literal> does not support
+  the <literal>REPEATABLE</literal> clause.
-   Here is an example of selecting sample of a table with
-   <literal>SYSTEM_TIME</>. First install the extension:
+   Here is an example of selecting a sample of a table with
+   <literal>SYSTEM_TIME</>.  First install the extension:
@@ -34,8 +48,7 @@ CREATE EXTENSION tsm_system_time;
-   Then you can use it in a <command>SELECT</command> command the same way as
-   other tablesample methods:
+   Then you can use it in a <command>SELECT</command> command, for instance:
@@ -43,8 +56,9 @@ SELECT * FROM my_table TABLESAMPLE SYSTEM_TIME(1000);
-   The above command will return as large a sample of my_table as it can read in
-   1 second (or less if it reads whole table faster).
+   This command will return as large a sample of <structname>my_table</> as
+   it can read in 1 second (1000 milliseconds).  Of course, if the whole
+   table can be read in under 1 second, all its rows will be returned.
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 6f4ff2718fed8d224837d2aeb46da44cb5cadecd..050efdc4806a716df0f0515619fe7a04bd9577d5 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -80,8 +80,11 @@ bool		synchronize_seqscans = true;
 static HeapScanDesc heap_beginscan_internal(Relation relation,
 						Snapshot snapshot,
 						int nkeys, ScanKey key,
-					  bool allow_strat, bool allow_sync, bool allow_pagemode,
-						bool is_bitmapscan, bool is_samplescan,
+						bool allow_strat,
+						bool allow_sync,
+						bool allow_pagemode,
+						bool is_bitmapscan,
+						bool is_samplescan,
 						bool temp_snap);
 static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
 					TransactionId xid, CommandId cid, int options);
@@ -207,7 +210,7 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
  * ----------------
 static void
-initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
+initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
 	bool		allow_strat;
 	bool		allow_sync;
@@ -257,12 +260,12 @@ initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
 		scan->rs_strategy = NULL;
-	if (is_rescan)
+	if (keep_startblock)
-		 * If rescan, keep the previous startblock setting so that rewinding a
-		 * cursor doesn't generate surprising results.  Reset the syncscan
-		 * setting, though.
+		 * When rescanning, we want to keep the previous startblock setting,
+		 * so that rewinding a cursor doesn't generate surprising results.
+		 * Reset the active syncscan setting, though.
 		scan->rs_syncscan = (allow_sync && synchronize_seqscans);
@@ -1313,6 +1316,10 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
 /* ----------------
  *		heap_beginscan	- begin relation scan
+ * heap_beginscan is the "standard" case.
+ *
+ * heap_beginscan_catalog differs in setting up its own temporary snapshot.
+ *
  * heap_beginscan_strat offers an extended API that lets the caller control
  * whether a nondefault buffer access strategy can be used, and whether
  * syncscan can be chosen (possibly resulting in the scan not starting from
@@ -1323,8 +1330,11 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
  * really quite unlike a standard seqscan, there is just enough commonality
  * to make it worth using the same data structure.
- * heap_beginscan_samplingscan is alternate entry point for setting up a
- * HeapScanDesc for a TABLESAMPLE scan.
+ * heap_beginscan_sampling is an alternative entry point for setting up a
+ * HeapScanDesc for a TABLESAMPLE scan.  As with bitmap scans, it's worth
+ * using the same data structure although the behavior is rather different.
+ * In addition to the options offered by heap_beginscan_strat, this call
+ * also allows control of whether page-mode visibility checking is used.
  * ----------------
@@ -1366,18 +1376,22 @@ heap_beginscan_bm(Relation relation, Snapshot snapshot,
 heap_beginscan_sampling(Relation relation, Snapshot snapshot,
 						int nkeys, ScanKey key,
-						bool allow_strat, bool allow_pagemode)
+					  bool allow_strat, bool allow_sync, bool allow_pagemode)
 	return heap_beginscan_internal(relation, snapshot, nkeys, key,
-								   allow_strat, false, allow_pagemode,
+								   allow_strat, allow_sync, allow_pagemode,
 								   false, true, false);
 static HeapScanDesc
 heap_beginscan_internal(Relation relation, Snapshot snapshot,
 						int nkeys, ScanKey key,
-					  bool allow_strat, bool allow_sync, bool allow_pagemode,
-					  bool is_bitmapscan, bool is_samplescan, bool temp_snap)
+						bool allow_strat,
+						bool allow_sync,
+						bool allow_pagemode,
+						bool is_bitmapscan,
+						bool is_samplescan,
+						bool temp_snap)
 	HeapScanDesc scan;
@@ -1461,6 +1475,27 @@ heap_rescan(HeapScanDesc scan,
 	initscan(scan, key, true);
+/* ----------------
+ *		heap_rescan_set_params	- restart a relation scan after changing params
+ *
+ * This call allows changing the buffer strategy, syncscan, and pagemode
+ * options before starting a fresh scan.  Note that although the actual use
+ * of syncscan might change (effectively, enabling or disabling reporting),
+ * the previously selected startblock will be kept.
+ * ----------------
+ */
+heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+					   bool allow_strat, bool allow_sync, bool allow_pagemode)
+	/* adjust parameters */
+	scan->rs_allow_strat = allow_strat;
+	scan->rs_allow_sync = allow_sync;
+	scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot);
+	/* ... and rescan */
+	heap_rescan(scan, key);
 /* ----------------
  *		heap_endscan	- end relation scan
diff --git a/src/backend/access/tablesample/Makefile b/src/backend/access/tablesample/Makefile
index 46eeb59f9c468075c53d241fcb529175461e7a64..68d9ab281472d976e41aea3350fa768b5c296160 100644
--- a/src/backend/access/tablesample/Makefile
+++ b/src/backend/access/tablesample/Makefile
@@ -1,10 +1,10 @@
 # Makefile--
-#    Makefile for utils/tablesample
+#    Makefile for access/tablesample
-#    src/backend/utils/tablesample/Makefile
+#    src/backend/access/tablesample/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/access/tablesample
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
-OBJS = tablesample.o system.o bernoulli.o
+OBJS = bernoulli.o system.o tablesample.o
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c
index 0a539008221a5592febbeb9cf1a652eb9da0a1d6..cf88f95e757b1754da8b4d074c9abfc367560208 100644
--- a/src/backend/access/tablesample/bernoulli.c
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -1,233 +1,231 @@
  * bernoulli.c
- *	  interface routines for BERNOULLI tablesample method
+ *	  support routines for BERNOULLI tablesample method
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
+ *
+ * To achieve that, we proceed by hashing each candidate TID together with
+ * the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
- *	  src/backend/utils/tablesample/bernoulli.c
+ *	  src/backend/access/tablesample/bernoulli.c
 #include "postgres.h"
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h>				/* for _isnan */
+#include <math.h>
-#include "access/tablesample.h"
-#include "access/relscan.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
+#include "access/hash.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
 #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
-#include "utils/sampling.h"
+#include "optimizer/cost.h"
+#include "utils/builtins.h"
-/* tsdesc */
+/* Private state */
 typedef struct
+	uint64		cutoff;			/* select tuples with hash less than this */
 	uint32		seed;			/* random seed */
-	BlockNumber startblock;		/* starting block, we use ths for syncscan
-								 * support */
-	BlockNumber nblocks;		/* number of blocks */
-	BlockNumber blockno;		/* current block */
-	float4		probability;	/* probabilty that tuple will be returned
-								 * (0.0-1.0) */
 	OffsetNumber lt;			/* last tuple returned from current block */
-	SamplerRandomState randstate;		/* random generator tsdesc */
 } BernoulliSamplerData;
+static void bernoulli_samplescangetsamplesize(PlannerInfo *root,
+								  RelOptInfo *baserel,
+								  List *paramexprs,
+								  BlockNumber *pages,
+								  double *tuples);
+static void bernoulli_initsamplescan(SampleScanState *node,
+						 int eflags);
+static void bernoulli_beginsamplescan(SampleScanState *node,
+						  Datum *params,
+						  int nparams,
+						  uint32 seed);
+static OffsetNumber bernoulli_nextsampletuple(SampleScanState *node,
+						  BlockNumber blockno,
+						  OffsetNumber maxoffset);
- * Initialize the state.
+ * Create a TsmRoutine descriptor for the BERNOULLI method.
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	uint32		seed = PG_GETARG_UINT32(1);
-	float4		percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
-	HeapScanDesc scan = tsdesc->heapScan;
-	BernoulliSamplerData *sampler;
+	TsmRoutine *tsm = makeNode(TsmRoutine);
+	tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+	tsm->repeatable_across_queries = true;
+	tsm->repeatable_across_scans = true;
+	tsm->SampleScanGetSampleSize = bernoulli_samplescangetsamplesize;
+	tsm->InitSampleScan = bernoulli_initsamplescan;
+	tsm->BeginSampleScan = bernoulli_beginsamplescan;
+	tsm->NextSampleBlock = NULL;
+	tsm->NextSampleTuple = bernoulli_nextsampletuple;
+	tsm->EndSampleScan = NULL;
-	if (percent < 0 || percent > 100)
-		ereport(ERROR,
-				 errmsg("invalid sample size"),
-				 errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+ * Sample size estimation.
+ */
+static void
+bernoulli_samplescangetsamplesize(PlannerInfo *root,
+								  RelOptInfo *baserel,
+								  List *paramexprs,
+								  BlockNumber *pages,
+								  double *tuples)
+	Node	   *pctnode;
+	float4		samplefract;
-	sampler = palloc0(sizeof(BernoulliSamplerData));
+	/* Try to extract an estimate for the sample percentage */
+	pctnode = (Node *) linitial(paramexprs);
+	pctnode = estimate_expression_value(root, pctnode);
-	/* Remember initial values for reinit */
-	sampler->seed = seed;
-	sampler->startblock = scan->rs_startblock;
-	sampler->nblocks = scan->rs_nblocks;
-	sampler->blockno = InvalidBlockNumber;
-	sampler->probability = percent / 100;
-	sampler->lt = InvalidOffsetNumber;
-	sampler_random_init_state(sampler->seed, sampler->randstate);
+	if (IsA(pctnode, Const) &&
+		!((Const *) pctnode)->constisnull)
+	{
+		samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+		if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+			samplefract /= 100.0f;
+		else
+		{
+			/* Default samplefract if the value is bogus */
+			samplefract = 0.1f;
+		}
+	}
+	else
+	{
+		/* Default samplefract if we didn't obtain a non-null Const */
+		samplefract = 0.1f;
+	}
+	/* We'll visit all pages of the baserel */
+	*pages = baserel->pages;
-	tsdesc->tsmdata = (void *) sampler;
+	*tuples = clamp_row_est(baserel->tuples * samplefract);
+ * Initialize during executor setup.
+ */
+static void
+bernoulli_initsamplescan(SampleScanState *node, int eflags)
+	node->tsm_state = palloc0(sizeof(BernoulliSamplerData));
- * Get next block number to read or InvalidBlockNumber if we are at the
- * end of the relation.
+ * Examine parameters and prepare for a sample scan.
+static void
+bernoulli_beginsamplescan(SampleScanState *node,
+						  Datum *params,
+						  int nparams,
+						  uint32 seed)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
+	BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
+	double		percent = DatumGetFloat4(params[0]);
+	if (percent < 0 || percent > 100 || isnan(percent))
+		ereport(ERROR,
+				 errmsg("sample percentage must be between 0 and 100")));
-	 * Bernoulli sampling scans all blocks on the table and supports syncscan
-	 * so loop from startblock to startblock instead of from 0 to nblocks.
+	 * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+	 * store that as a uint64, of course.  Note that this gives strictly
+	 * correct behavior at the limits of zero or one probability.
-	if (sampler->blockno == InvalidBlockNumber)
-		sampler->blockno = sampler->startblock;
-	else
-	{
-		sampler->blockno++;
-		if (sampler->blockno >= sampler->nblocks)
-			sampler->blockno = 0;
-		if (sampler->blockno == sampler->startblock)
-			PG_RETURN_UINT32(InvalidBlockNumber);
-	}
+	sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+	sampler->seed = seed;
+	sampler->lt = InvalidOffsetNumber;
-	PG_RETURN_UINT32(sampler->blockno);
+	/*
+	 * Use bulkread, since we're scanning all pages.  But pagemode visibility
+	 * checking is a win only at larger sampling fractions.  The 25% cutoff
+	 * here is based on very limited experimentation.
+	 */
+	node->use_bulkread = true;
+	node->use_pagemode = (percent >= 25);
- * Get next tuple from current block.
- *
- * This method implements the main logic in bernoulli sampling.
- * The algorithm simply generates new random number (in 0.0-1.0 range) and if
- * it falls within user specified probability (in the same range) return the
- * tuple offset.
- *
- * It is ok here to return tuple offset without knowing if tuple is visible
- * and not check it via examinetuple. The reason for that is that we do the
- * coinflip (random number generation) for every tuple in the table. Since all
- * tuples have same probability of being returned the visible and invisible
- * tuples will be returned in same ratio as they have in the actual table.
- * This means that there is no skew towards either visible or invisible tuples
- * and the number of visible tuples returned from the executor node should
- * match the fraction of visible tuples which was specified by user.
+ * Select next sampled tuple in current block.
- * This is faster than doing the coinflip in examinetuple because we don't
- * have to do visibility checks on uninteresting tuples.
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists).  The reason is that we do the coinflip for every tuple
+ * offset in the table.  Since all tuples have the same probability of being
+ * returned, it doesn't matter if we do extra coinflips for invisible tuples.
- * If we reach end of the block return InvalidOffsetNumber which tells
+ * When we reach end of the block, return InvalidOffsetNumber which tells
  * SampleScan to go to next block.
+static OffsetNumber
+bernoulli_nextsampletuple(SampleScanState *node,
+						  BlockNumber blockno,
+						  OffsetNumber maxoffset)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-	BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
+	BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
 	OffsetNumber tupoffset = sampler->lt;
-	float4		probability = sampler->probability;
+	uint32		hashinput[3];
+	/* Advance to first/next tuple in block */
 	if (tupoffset == InvalidOffsetNumber)
 		tupoffset = FirstOffsetNumber;
-	 * Loop over tuple offsets until the random generator returns value that
-	 * is within the probability of returning the tuple or until we reach end
-	 * of the block.
+	 * We compute the hash by applying hash_any to an array of 3 uint32's
+	 * containing the block, offset, and seed.  This is efficient to set up,
+	 * and with the current implementation of hash_any, it gives
+	 * machine-independent results, which is a nice property for regression
+	 * testing.
-	 * (This is our implementation of bernoulli trial)
+	 * These words in the hash input are the same throughout the block:
-	while (sampler_random_fract(sampler->randstate) > probability)
+	hashinput[0] = blockno;
+	hashinput[2] = sampler->seed;
+	/*
+	 * Loop over tuple offsets until finding suitable TID or reaching end of
+	 * block.
+	 */
+	for (; tupoffset <= maxoffset; tupoffset++)
-		tupoffset++;
+		uint32		hash;
-		if (tupoffset > maxoffset)
+		hashinput[1] = tupoffset;
+		hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+									   (int) sizeof(hashinput)));
+		if (hash < sampler->cutoff)
 	if (tupoffset > maxoffset)
-		/* Tell SampleScan that we want next block. */
 		tupoffset = InvalidOffsetNumber;
 	sampler->lt = tupoffset;
-	PG_RETURN_UINT16(tupoffset);
- * Cleanup method.
- */
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	pfree(tsdesc->tsmdata);
- * Reset tsdesc (called by ReScan).
- */
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	BernoulliSamplerData *sampler = (BernoulliSamplerData *) tsdesc->tsmdata;
-	sampler->blockno = InvalidBlockNumber;
-	sampler->lt = InvalidOffsetNumber;
-	sampler_random_init_state(sampler->seed, sampler->randstate);
- * Costing function.
- */
-	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-	Path	   *path = (Path *) PG_GETARG_POINTER(1);
-	RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-	List	   *args = (List *) PG_GETARG_POINTER(3);
-	BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-	double	   *tuples = (double *) PG_GETARG_POINTER(5);
-	Node	   *pctnode;
-	float4		samplesize;
-	*pages = baserel->pages;
-	pctnode = linitial(args);
-	pctnode = estimate_expression_value(root, pctnode);
-	if (IsA(pctnode, RelabelType))
-		pctnode = (Node *) ((RelabelType *) pctnode)->arg;
-	if (IsA(pctnode, Const))
-	{
-		samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
-		samplesize /= 100.0;
-	}
-	else
-	{
-		/* Default samplesize if the estimation didn't return Const. */
-		samplesize = 0.1f;
-	}
-	*tuples = path->rows * samplesize;
-	path->rows = *tuples;
+	return tupoffset;
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c
index 1d834369a4bd11fbf6127d9d8c8d7e3e4859ca01..43c5dab71619a7a6d8e2ee22bc306e56674191c0 100644
--- a/src/backend/access/tablesample/system.c
+++ b/src/backend/access/tablesample/system.c
@@ -1,186 +1,260 @@
  * system.c
- *	  interface routines for system tablesample method
+ *	  support routines for SYSTEM tablesample method
+ * To ensure repeatability of samples, it is necessary that selection of a
+ * given tuple be history-independent; otherwise syncscanning would break
+ * repeatability, to say nothing of logically-irrelevant maintenance such
+ * as physical extension or shortening of the relation.
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * To achieve that, we proceed by hashing each candidate block number together
+ * with the active seed, and then selecting it if the hash is less than the
+ * cutoff value computed from the selection probability by BeginSampleScan.
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
- *	  src/backend/utils/tablesample/system.c
+ *	  src/backend/access/tablesample/system.c
 #include "postgres.h"
-#include "fmgr.h"
+#ifdef _MSC_VER
+#include <float.h>				/* for _isnan */
+#include <math.h>
-#include "access/tablesample.h"
+#include "access/hash.h"
 #include "access/relscan.h"
-#include "nodes/execnodes.h"
-#include "nodes/relation.h"
+#include "access/tsmapi.h"
+#include "catalog/pg_type.h"
 #include "optimizer/clauses.h"
-#include "storage/bufmgr.h"
-#include "utils/sampling.h"
+#include "optimizer/cost.h"
+#include "utils/builtins.h"
- * State
- */
+/* Private state */
 typedef struct
-	BlockSamplerData bs;
+	uint64		cutoff;			/* select blocks with hash less than this */
 	uint32		seed;			/* random seed */
-	BlockNumber nblocks;		/* number of block in relation */
-	int			samplesize;		/* number of blocks to return */
+	BlockNumber nextblock;		/* next block to consider sampling */
 	OffsetNumber lt;			/* last tuple returned from current block */
 } SystemSamplerData;
- * Initializes the state.
- */
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	uint32		seed = PG_GETARG_UINT32(1);
-	float4		percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
-	HeapScanDesc scan = tsdesc->heapScan;
-	SystemSamplerData *sampler;
+static void system_samplescangetsamplesize(PlannerInfo *root,
+							   RelOptInfo *baserel,
+							   List *paramexprs,
+							   BlockNumber *pages,
+							   double *tuples);
+static void system_initsamplescan(SampleScanState *node,
+					  int eflags);
+static void system_beginsamplescan(SampleScanState *node,
+					   Datum *params,
+					   int nparams,
+					   uint32 seed);
+static BlockNumber system_nextsampleblock(SampleScanState *node);
+static OffsetNumber system_nextsampletuple(SampleScanState *node,
+					   BlockNumber blockno,
+					   OffsetNumber maxoffset);
-	if (percent < 0 || percent > 100)
-		ereport(ERROR,
-				 errmsg("invalid sample size"),
-				 errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
-	sampler = palloc0(sizeof(SystemSamplerData));
-	/* Remember initial values for reinit */
-	sampler->seed = seed;
-	sampler->nblocks = scan->rs_nblocks;
-	sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0));
-	sampler->lt = InvalidOffsetNumber;
-	BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
-					  sampler->seed);
-	tsdesc->tsmdata = (void *) sampler;
- * Get next block number or InvalidBlockNumber when we're done.
- *
- * Uses the same logic as ANALYZE for picking the random blocks.
+ * Create a TsmRoutine descriptor for the SYSTEM method.
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-	BlockNumber blockno;
-	if (!BlockSampler_HasMore(&sampler->bs))
-		PG_RETURN_UINT32(InvalidBlockNumber);
-	blockno = BlockSampler_Next(&sampler->bs);
-	PG_RETURN_UINT32(blockno);
+	TsmRoutine *tsm = makeNode(TsmRoutine);
+	tsm->parameterTypes = list_make1_oid(FLOAT4OID);
+	tsm->repeatable_across_queries = true;
+	tsm->repeatable_across_scans = true;
+	tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
+	tsm->InitSampleScan = system_initsamplescan;
+	tsm->BeginSampleScan = system_beginsamplescan;
+	tsm->NextSampleBlock = system_nextsampleblock;
+	tsm->NextSampleTuple = system_nextsampletuple;
+	tsm->EndSampleScan = NULL;
- * Get next tuple offset in current block or InvalidOffsetNumber if we are done
- * with this block.
+ * Sample size estimation.
+static void
+system_samplescangetsamplesize(PlannerInfo *root,
+							   RelOptInfo *baserel,
+							   List *paramexprs,
+							   BlockNumber *pages,
+							   double *tuples)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	OffsetNumber maxoffset = PG_GETARG_UINT16(2);
-	SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
-	OffsetNumber tupoffset = sampler->lt;
+	Node	   *pctnode;
+	float4		samplefract;
-	if (tupoffset == InvalidOffsetNumber)
-		tupoffset = FirstOffsetNumber;
-	else
-		tupoffset++;
+	/* Try to extract an estimate for the sample percentage */
+	pctnode = (Node *) linitial(paramexprs);
+	pctnode = estimate_expression_value(root, pctnode);
-	if (tupoffset > maxoffset)
-		tupoffset = InvalidOffsetNumber;
+	if (IsA(pctnode, Const) &&
+		!((Const *) pctnode)->constisnull)
+	{
+		samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
+		if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
+			samplefract /= 100.0f;
+		else
+		{
+			/* Default samplefract if the value is bogus */
+			samplefract = 0.1f;
+		}
+	}
+	else
+	{
+		/* Default samplefract if we didn't obtain a non-null Const */
+		samplefract = 0.1f;
+	}
-	sampler->lt = tupoffset;
+	/* We'll visit a sample of the pages ... */
+	*pages = clamp_row_est(baserel->pages * samplefract);
-	PG_RETURN_UINT16(tupoffset);
+	/* ... and hopefully get a representative number of tuples from them */
+	*tuples = clamp_row_est(baserel->tuples * samplefract);
- * Cleanup method.
+ * Initialize during executor setup.
+static void
+system_initsamplescan(SampleScanState *node, int eflags)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	pfree(tsdesc->tsmdata);
+	node->tsm_state = palloc0(sizeof(SystemSamplerData));
- * Reset state (called by ReScan).
+ * Examine parameters and prepare for a sample scan.
+static void
+system_beginsamplescan(SampleScanState *node,
+					   Datum *params,
+					   int nparams,
+					   uint32 seed)
-	TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
-	SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+	SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+	double		percent = DatumGetFloat4(params[0]);
+	if (percent < 0 || percent > 100 || isnan(percent))
+		ereport(ERROR,
+				 errmsg("sample percentage must be between 0 and 100")));
+	/*
+	 * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
+	 * store that as a uint64, of course.  Note that this gives strictly
+	 * correct behavior at the limits of zero or one probability.
+	 */
+	sampler->cutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
+	sampler->seed = seed;
+	sampler->nextblock = 0;
 	sampler->lt = InvalidOffsetNumber;
-	BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
-					  sampler->seed);
+	/*
+	 * Bulkread buffer access strategy probably makes sense unless we're
+	 * scanning a very small fraction of the table.  The 1% cutoff here is a
+	 * guess.  We should use pagemode visibility checking, since we scan all
+	 * tuples on each selected page.
+	 */
+	node->use_bulkread = (percent >= 1);
+	node->use_pagemode = true;
- * Costing function.
+ * Select next block to sample.
+static BlockNumber
+system_nextsampleblock(SampleScanState *node)
-	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-	Path	   *path = (Path *) PG_GETARG_POINTER(1);
-	RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
-	List	   *args = (List *) PG_GETARG_POINTER(3);
-	BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
-	double	   *tuples = (double *) PG_GETARG_POINTER(5);
-	Node	   *pctnode;
-	float4		samplesize;
+	SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+	HeapScanDesc scan = node->ss.ss_currentScanDesc;
+	BlockNumber nextblock = sampler->nextblock;
+	uint32		hashinput[2];
+	/*
+	 * We compute the hash by applying hash_any to an array of 2 uint32's
+	 * containing the block number and seed.  This is efficient to set up, and
+	 * with the current implementation of hash_any, it gives
+	 * machine-independent results, which is a nice property for regression
+	 * testing.
+	 *
+	 * These words in the hash input are the same throughout the block:
+	 */
+	hashinput[1] = sampler->seed;
+	/*
+	 * Loop over block numbers until finding suitable block or reaching end of
+	 * relation.
+	 */
+	for (; nextblock < scan->rs_nblocks; nextblock++)
+	{
+		uint32		hash;
-	pctnode = linitial(args);
-	pctnode = estimate_expression_value(root, pctnode);
+		hashinput[0] = nextblock;
-	if (IsA(pctnode, RelabelType))
-		pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+		hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
+									   (int) sizeof(hashinput)));
+		if (hash < sampler->cutoff)
+			break;
+	}
-	if (IsA(pctnode, Const))
+	if (nextblock < scan->rs_nblocks)
-		samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
-		samplesize /= 100.0;
+		/* Found a suitable block; remember where we should start next time */
+		sampler->nextblock = nextblock + 1;
+		return nextblock;
+	/* Done, but let's reset nextblock to 0 for safety. */
+	sampler->nextblock = 0;
+	return InvalidBlockNumber;
+ * Select next sampled tuple in current block.
+ *
+ * In block sampling, we just want to sample all the tuples in each selected
+ * block.
+ *
+ * It is OK here to return an offset without knowing if the tuple is visible
+ * (or even exists); nodeSamplescan.c will deal with that.
+ *
+ * When we reach end of the block, return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+static OffsetNumber
+system_nextsampletuple(SampleScanState *node,
+					   BlockNumber blockno,
+					   OffsetNumber maxoffset)
+	SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
+	OffsetNumber tupoffset = sampler->lt;
+	/* Advance to next possible offset on page */
+	if (tupoffset == InvalidOffsetNumber)
+		tupoffset = FirstOffsetNumber;
-	{
-		/* Default samplesize if the estimation didn't return Const. */
-		samplesize = 0.1f;
-	}
+		tupoffset++;
-	*pages = baserel->pages * samplesize;
-	*tuples = path->rows * samplesize;
-	path->rows = *tuples;
+	/* Done? */
+	if (tupoffset > maxoffset)
+		tupoffset = InvalidOffsetNumber;
+	sampler->lt = tupoffset;
+	return tupoffset;
diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c
index f21d42c8e38ca04b82579967f54c613fd84290ee..b8ad7ced743cba99021c4752fc8131aa46c99789 100644
--- a/src/backend/access/tablesample/tablesample.c
+++ b/src/backend/access/tablesample/tablesample.c
@@ -1,7 +1,7 @@
  * tablesample.c
- *		  TABLESAMPLE internal API
+ *		  Support functions for TABLESAMPLE feature
  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -10,356 +10,31 @@
  *		  src/backend/access/tablesample/tablesample.c
- * TABLESAMPLE is the SQL standard clause for sampling the relations.
- *
- * The API is interface between the Executor and the TABLESAMPLE Methods.
- *
- * TABLESAMPLE Methods are implementations of actual sampling algorithms which
- * can be used for returning a sample of the source relation.
- * Methods don't read the table directly but are asked for block number and
- * tuple offset which they want to examine (or return) and the tablesample
- * interface implemented here does the reading for them.
- *
- * We currently only support sampling of the physical relations, but in the
- * future we might extend the API to support subqueries as well.
- *
  * -------------------------------------------------------------------------
 #include "postgres.h"
-#include "access/tablesample.h"
-#include "catalog/pg_tablesample_method.h"
-#include "miscadmin.h"
-#include "pgstat.h"
-#include "storage/bufmgr.h"
-#include "storage/predicate.h"
-#include "utils/rel.h"
-#include "utils/tqual.h"
-static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan);
- * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method.
- */
-TableSampleDesc *
-tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample)
-	FunctionCallInfoData fcinfo;
-	int			i;
-	List	   *args = tablesample->args;
-	ListCell   *arg;
-	ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
-	TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc));
-	/* Load functions */
-	fmgr_info(tablesample->tsminit, &(tsdesc->tsminit));
-	fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock));
-	fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple));
-	if (OidIsValid(tablesample->tsmexaminetuple))
-		fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple));
-	else
-		tsdesc->tsmexaminetuple.fn_oid = InvalidOid;
-	fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset));
-	fmgr_info(tablesample->tsmend, &(tsdesc->tsmend));
-	InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit,
-							 list_length(args) + 2,
-							 InvalidOid, NULL, NULL);
-	tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
-	tsdesc->heapScan = scanstate->ss.ss_currentScanDesc;
-	/* First argument for init function is always TableSampleDesc */
-	fcinfo.arg[0] = PointerGetDatum(tsdesc);
-	fcinfo.argnull[0] = false;
+#include "access/tsmapi.h"
-	/*
-	 * Second arg for init function is always REPEATABLE.
-	 *
-	 * If tablesample->repeatable is NULL then REPEATABLE clause was not
-	 * specified, and we insert a random value as default.
-	 *
-	 * When specified, the expression cannot evaluate to NULL.
-	 */
-	if (tablesample->repeatable)
-	{
-		ExprState  *argstate = ExecInitExpr((Expr *) tablesample->repeatable,
-											(PlanState *) scanstate);
-		fcinfo.arg[1] = ExecEvalExpr(argstate, econtext,
-									 &fcinfo.argnull[1], NULL);
-		if (fcinfo.argnull[1])
-			ereport(ERROR,
-				errmsg("REPEATABLE clause must be NOT NULL numeric value")));
-	}
-	else
-	{
-		fcinfo.arg[1] = UInt32GetDatum(random());
-		fcinfo.argnull[1] = false;
-	}
-	/* Rest of the arguments come from user. */
-	i = 2;
-	foreach(arg, args)
-	{
-		Expr	   *argexpr = (Expr *) lfirst(arg);
-		ExprState  *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate);
-		fcinfo.arg[i] = ExecEvalExpr(argstate, econtext,
-									 &fcinfo.argnull[i], NULL);
-		i++;
-	}
-	Assert(i == fcinfo.nargs);
-	(void) FunctionCallInvoke(&fcinfo);
-	return tsdesc;
- * Get next tuple from TABLESAMPLE Method.
- */
-tablesample_getnext(TableSampleDesc *desc)
-	HeapScanDesc scan = desc->heapScan;
-	HeapTuple	tuple = &(scan->rs_ctup);
-	bool		pagemode = scan->rs_pageatatime;
-	BlockNumber blockno;
-	Page		page;
-	bool		page_all_visible;
-	ItemId		itemid;
-	OffsetNumber tupoffset,
-				maxoffset;
-	if (!scan->rs_inited)
-	{
-		/*
-		 * return null immediately if relation is empty
-		 */
-		if (scan->rs_nblocks == 0)
-		{
-			Assert(!BufferIsValid(scan->rs_cbuf));
-			tuple->t_data = NULL;
-			return NULL;
-		}
-		blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
-											  PointerGetDatum(desc)));
-		if (!BlockNumberIsValid(blockno))
-		{
-			tuple->t_data = NULL;
-			return NULL;
-		}
-		heapgetpage(scan, blockno);
-		scan->rs_inited = true;
-	}
-	else
-	{
-		/* continue from previously returned page/tuple */
-		blockno = scan->rs_cblock;		/* current page */
-	}
-	/*
-	 * When pagemode is disabled, the scan will do visibility checks for each
-	 * tuple it finds so the buffer needs to be locked.
-	 */
-	if (!pagemode)
-		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-	page = (Page) BufferGetPage(scan->rs_cbuf);
-	page_all_visible = PageIsAllVisible(page);
-	maxoffset = PageGetMaxOffsetNumber(page);
-	for (;;)
-	{
-		tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple,
-												 PointerGetDatum(desc),
-												 UInt32GetDatum(blockno),
-												 UInt16GetDatum(maxoffset)));
-		if (OffsetNumberIsValid(tupoffset))
-		{
-			bool		visible;
-			bool		found;
-			/* Skip invalid tuple pointers. */
-			itemid = PageGetItemId(page, tupoffset);
-			if (!ItemIdIsNormal(itemid))
-				continue;
-			tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid);
-			tuple->t_len = ItemIdGetLength(itemid);
-			ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
-			if (page_all_visible)
-				visible = true;
-			else
-				visible = SampleTupleVisible(tuple, tupoffset, scan);
-			/*
-			 * Let the sampling method examine the actual tuple and decide if
-			 * we should return it.
-			 *
-			 * Note that we let it examine even invisible tuples for
-			 * statistical purposes, but not return them since user should
-			 * never see invisible tuples.
-			 */
-			if (OidIsValid(desc->tsmexaminetuple.fn_oid))
-			{
-				found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple,
-												   PointerGetDatum(desc),
-												   UInt32GetDatum(blockno),
-												   PointerGetDatum(tuple),
-												   BoolGetDatum(visible)));
-				/* Should not happen if sampling method is well written. */
-				if (found && !visible)
-					elog(ERROR, "Sampling method wanted to return invisible tuple");
-			}
-			else
-				found = visible;
-			/* Found visible tuple, return it. */
-			if (found)
-			{
-				if (!pagemode)
-					LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-				break;
-			}
-			else
-			{
-				/* Try next tuple from same page. */
-				continue;
-			}
-		}
-		if (!pagemode)
-			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-		blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
-											  PointerGetDatum(desc)));
-		/*
-		 * Report our new scan position for synchronization purposes. We don't
-		 * do that when moving backwards, however. That would just mess up any
-		 * other forward-moving scanners.
-		 *
-		 * Note: we do this before checking for end of scan so that the final
-		 * state of the position hint is back at the start of the rel.  That's
-		 * not strictly necessary, but otherwise when you run the same query
-		 * multiple times the starting position would shift a little bit
-		 * backwards on every invocation, which is confusing. We don't
-		 * guarantee any specific ordering in general, though.
-		 */
-		if (scan->rs_syncscan)
-			ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ?
-							   blockno : scan->rs_startblock);
-		/*
-		 * Reached end of scan.
-		 */
-		if (!BlockNumberIsValid(blockno))
-		{
-			if (BufferIsValid(scan->rs_cbuf))
-				ReleaseBuffer(scan->rs_cbuf);
-			scan->rs_cbuf = InvalidBuffer;
-			scan->rs_cblock = InvalidBlockNumber;
-			tuple->t_data = NULL;
-			scan->rs_inited = false;
-			return NULL;
-		}
-		heapgetpage(scan, blockno);
-		if (!pagemode)
-			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-		page = (Page) BufferGetPage(scan->rs_cbuf);
-		page_all_visible = PageIsAllVisible(page);
-		maxoffset = PageGetMaxOffsetNumber(page);
-	}
-	pgstat_count_heap_getnext(scan->rs_rd);
-	return &(scan->rs_ctup);
- * Reset the sampling to starting state
- */
-tablesample_reset(TableSampleDesc *desc)
-	(void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc));
- * Signal the sampling method that the scan has finished.
- */
-tablesample_end(TableSampleDesc *desc)
-	(void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc));
- * Check visibility of the tuple.
+ * GetTsmRoutine --- get a TsmRoutine struct by invoking the handler.
+ *
+ * This is a convenience routine that's just meant to check for errors.
-static bool
-SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+TsmRoutine *
+GetTsmRoutine(Oid tsmhandler)
-	/*
-	 * If this scan is reading whole pages at a time, there is already
-	 * visibility info present in rs_vistuples so we can just search it for
-	 * the tupoffset.
-	 */
-	if (scan->rs_pageatatime)
-	{
-		int			start = 0,
-					end = scan->rs_ntuples - 1;
-		/*
-		 * Do the binary search over rs_vistuples, it's already sorted by
-		 * OffsetNumber so we don't need to do any sorting ourselves here.
-		 *
-		 * We could use bsearch() here but it's slower for integers because of
-		 * the function call overhead and because it needs boiler plate code
-		 * it would not save us anything code-wise anyway.
-		 */
-		while (start <= end)
-		{
-			int			mid = start + (end - start) / 2;
-			OffsetNumber curoffset = scan->rs_vistuples[mid];
-			if (curoffset == tupoffset)
-				return true;
-			else if (curoffset > tupoffset)
-				end = mid - 1;
-			else
-				start = mid + 1;
-		}
-		return false;
-	}
-	else
-	{
-		/* No pagemode, we have to check the tuple itself. */
-		Snapshot	snapshot = scan->rs_snapshot;
-		Buffer		buffer = scan->rs_cbuf;
+	Datum		datum;
+	TsmRoutine *routine;
-		bool		visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
+	datum = OidFunctionCall1(tsmhandler, PointerGetDatum(NULL));
+	routine = (TsmRoutine *) DatumGetPointer(datum);
-		CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer,
-										snapshot);
+	if (routine == NULL || !IsA(routine, TsmRoutine))
+		elog(ERROR, "tablesample handler function %u did not return a TsmRoutine struct",
+			 tsmhandler);
-		return visible;
-	}
+	return routine;
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 3d1139b5ba0bfb7e41041fedc1adf42f022e41ed..25130ecf124805565f61c17045c9589445c10e8c 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -40,8 +40,9 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
 	pg_ts_parser.h pg_ts_template.h pg_extension.h \
 	pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
 	pg_foreign_table.h pg_policy.h pg_replication_origin.h \
-	pg_tablesample_method.h pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
-	pg_collation.h pg_range.h pg_transform.h toasting.h indexing.h \
+	pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
+	pg_collation.h pg_range.h pg_transform.h \
+	toasting.h indexing.h \
 # location of Catalog.pm
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 5d7c441739cec7a45090bab0f331c0ad2fc130c5..90b1cd835f89edad6200872360d4dabee620ce37 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -1911,6 +1911,14 @@ find_expr_references_walker(Node *node,
+	else if (IsA(node, TableSampleClause))
+	{
+		TableSampleClause *tsc = (TableSampleClause *) node;
+		add_object_address(OCLASS_PROC, tsc->tsmhandler, 0,
+						   context->addrs);
+		/* fall through to examine arguments */
+	}
 	return expression_tree_walker(node, find_expr_references_walker,
 								  (void *) context);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 0d1ecc2a3edbb85276c3e707ad7b90840e5fd35f..5d06fa4ea65c4a751c38daaefb05b032a0b7aaca 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -96,6 +96,8 @@ static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
 					 List *ancestors, ExplainState *es);
 static void show_sortorder_options(StringInfo buf, Node *sortexpr,
 					   Oid sortOperator, Oid collation, bool nullsFirst);
+static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+				 List *ancestors, ExplainState *es);
 static void show_sort_info(SortState *sortstate, ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_tidbitmap_info(BitmapHeapScanState *planstate,
@@ -116,7 +118,7 @@ static void ExplainMemberNodes(List *plans, PlanState **planstates,
 static void ExplainSubPlans(List *plans, List *ancestors,
 				const char *relationship, ExplainState *es);
 static void ExplainCustomChildren(CustomScanState *css,
-								  List *ancestors, ExplainState *es);
+					  List *ancestors, ExplainState *es);
 static void ExplainProperty(const char *qlabel, const char *value,
 				bool numeric, ExplainState *es);
 static void ExplainOpenGroup(const char *objtype, const char *labelname,
@@ -730,6 +732,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
 	switch (nodeTag(plan))
 		case T_SeqScan:
+		case T_SampleScan:
 		case T_IndexScan:
 		case T_IndexOnlyScan:
 		case T_BitmapHeapScan:
@@ -739,7 +742,6 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
 		case T_ValuesScan:
 		case T_CteScan:
 		case T_WorkTableScan:
-		case T_SampleScan:
 			*rels_used = bms_add_member(*rels_used,
 										((Scan *) plan)->scanrelid);
@@ -935,6 +937,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		case T_SeqScan:
 			pname = sname = "Seq Scan";
+		case T_SampleScan:
+			pname = sname = "Sample Scan";
+			break;
 		case T_IndexScan:
 			pname = sname = "Index Scan";
@@ -976,23 +981,6 @@ ExplainNode(PlanState *planstate, List *ancestors,
 				pname = sname;
-		case T_SampleScan:
-			{
-				/*
-				 * Fetch the tablesample method name from RTE.
-				 *
-				 * It would be nice to also show parameters, but since we
-				 * support arbitrary expressions as parameter it might get
-				 * quite messy.
-				 */
-				RangeTblEntry *rte;
-				rte = rt_fetch(((SampleScan *) plan)->scanrelid, es->rtable);
-				custom_name = get_tablesample_method_name(rte->tablesample->tsmid);
-				pname = psprintf("Sample Scan (%s)", custom_name);
-				sname = "Sample Scan";
-			}
-			break;
 		case T_Material:
 			pname = sname = "Materialize";
@@ -1101,6 +1089,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	switch (nodeTag(plan))
 		case T_SeqScan:
+		case T_SampleScan:
 		case T_BitmapHeapScan:
 		case T_TidScan:
 		case T_SubqueryScan:
@@ -1115,9 +1104,6 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (((Scan *) plan)->scanrelid > 0)
 				ExplainScanTarget((Scan *) plan, es);
-		case T_SampleScan:
-			ExplainScanTarget((Scan *) plan, es);
-			break;
 		case T_IndexScan:
 				IndexScan  *indexscan = (IndexScan *) plan;
@@ -1363,12 +1349,15 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (es->analyze)
 				show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
+		case T_SampleScan:
+			show_tablesample(((SampleScan *) plan)->tablesample,
+							 planstate, ancestors, es);
+			/* FALL THRU to print additional fields the same as SeqScan */
 		case T_SeqScan:
 		case T_ValuesScan:
 		case T_CteScan:
 		case T_WorkTableScan:
 		case T_SubqueryScan:
-		case T_SampleScan:
 			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
@@ -2109,6 +2098,72 @@ show_sortorder_options(StringInfo buf, Node *sortexpr,
+ * Show TABLESAMPLE properties
+ */
+static void
+show_tablesample(TableSampleClause *tsc, PlanState *planstate,
+				 List *ancestors, ExplainState *es)
+	List	   *context;
+	bool		useprefix;
+	char	   *method_name;
+	List	   *params = NIL;
+	char	   *repeatable;
+	ListCell   *lc;
+	/* Set up deparsing context */
+	context = set_deparse_context_planstate(es->deparse_cxt,
+											(Node *) planstate,
+											ancestors);
+	useprefix = list_length(es->rtable) > 1;
+	/* Get the tablesample method name */
+	method_name = get_func_name(tsc->tsmhandler);
+	/* Deparse parameter expressions */
+	foreach(lc, tsc->args)
+	{
+		Node	   *arg = (Node *) lfirst(lc);
+		params = lappend(params,
+						 deparse_expression(arg, context,
+											useprefix, false));
+	}
+	if (tsc->repeatable)
+		repeatable = deparse_expression((Node *) tsc->repeatable, context,
+										useprefix, false);
+	else
+		repeatable = NULL;
+	/* Print results */
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+		bool		first = true;
+		appendStringInfoSpaces(es->str, es->indent * 2);
+		appendStringInfo(es->str, "Sampling: %s (", method_name);
+		foreach(lc, params)
+		{
+			if (!first)
+				appendStringInfoString(es->str, ", ");
+			appendStringInfoString(es->str, (const char *) lfirst(lc));
+			first = false;
+		}
+		appendStringInfoChar(es->str, ')');
+		if (repeatable)
+			appendStringInfo(es->str, " REPEATABLE (%s)", repeatable);
+		appendStringInfoChar(es->str, '\n');
+	}
+	else
+	{
+		ExplainPropertyText("Sampling Method", method_name, es);
+		ExplainPropertyList("Sampling Parameters", params, es);
+		if (repeatable)
+			ExplainPropertyText("Repeatable Seed", repeatable, es);
+	}
  * If it's EXPLAIN ANALYZE, show tuplesort stats for a sort node
@@ -2366,13 +2421,13 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
 	switch (nodeTag(plan))
 		case T_SeqScan:
+		case T_SampleScan:
 		case T_IndexScan:
 		case T_IndexOnlyScan:
 		case T_BitmapHeapScan:
 		case T_TidScan:
 		case T_ForeignScan:
 		case T_CustomScan:
-		case T_SampleScan:
 		case T_ModifyTable:
 			/* Assert it's on a real relation */
 			Assert(rte->rtekind == RTE_RELATION);
@@ -2663,9 +2718,9 @@ ExplainCustomChildren(CustomScanState *css, List *ancestors, ExplainState *es)
 	ListCell   *cell;
 	const char *label =
-		(list_length(css->custom_ps) != 1 ? "children" : "child");
+	(list_length(css->custom_ps) != 1 ? "children" : "child");
-	foreach (cell, css->custom_ps)
+	foreach(cell, css->custom_ps)
 		ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, es);
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 04073d3f9f916f23a750ad0ce2c45e5b0169b802..93e1e9a691c507b08aa58beddbcb74c66a1a8501 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -463,6 +463,10 @@ ExecSupportsBackwardScan(Plan *node)
 		case T_CteScan:
 			return TargetListSupportsBackwardScan(node->targetlist);
+		case T_SampleScan:
+			/* Simplify life for tablesample methods by disallowing this */
+			return false;
 		case T_IndexScan:
 			return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) &&
@@ -485,9 +489,6 @@ ExecSupportsBackwardScan(Plan *node)
 			return false;
-		case T_SampleScan:
-			return false;
 		case T_Material:
 		case T_Sort:
 			/* these don't evaluate tlist */
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c
index 4c1c5237b7d203c5bd19f48375d87586980776d5..dbe84b0baa86886be548194b2630e6f39497293b 100644
--- a/src/backend/executor/nodeSamplescan.c
+++ b/src/backend/executor/nodeSamplescan.c
@@ -3,7 +3,7 @@
  * nodeSamplescan.c
  *	  Support routines for sample scans of relations (table sampling).
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -14,22 +14,23 @@
 #include "postgres.h"
-#include "access/tablesample.h"
+#include "access/hash.h"
+#include "access/relscan.h"
+#include "access/tsmapi.h"
 #include "executor/executor.h"
 #include "executor/nodeSamplescan.h"
 #include "miscadmin.h"
-#include "parser/parsetree.h"
 #include "pgstat.h"
-#include "storage/bufmgr.h"
 #include "storage/predicate.h"
 #include "utils/rel.h"
-#include "utils/syscache.h"
 #include "utils/tqual.h"
-static void InitScanRelation(SampleScanState *node, EState *estate,
-				 int eflags, TableSampleClause *tablesample);
+static void InitScanRelation(SampleScanState *node, EState *estate, int eflags);
 static TupleTableSlot *SampleNext(SampleScanState *node);
+static void tablesample_init(SampleScanState *scanstate);
+static HeapTuple tablesample_getnext(SampleScanState *scanstate);
+static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
+				   HeapScanDesc scan);
 /* ----------------------------------------------------------------
  *						Scan Support
@@ -45,23 +46,26 @@ static TupleTableSlot *SampleNext(SampleScanState *node);
 static TupleTableSlot *
 SampleNext(SampleScanState *node)
-	TupleTableSlot *slot;
-	TableSampleDesc *tsdesc;
 	HeapTuple	tuple;
+	TupleTableSlot *slot;
-	 * get information from the scan state
+	 * if this is first call within a scan, initialize
-	slot = node->ss.ss_ScanTupleSlot;
-	tsdesc = node->tsdesc;
+	if (!node->begun)
+		tablesample_init(node);
+	/*
+	 * get the next tuple, and store it in our result slot
+	 */
+	tuple = tablesample_getnext(node);
-	tuple = tablesample_getnext(tsdesc);
+	slot = node->ss.ss_ScanTupleSlot;
 	if (tuple)
 		ExecStoreTuple(tuple,	/* tuple to store */
 					   slot,	/* slot to store in */
-					   tsdesc->heapScan->rs_cbuf,		/* buffer associated
-														 * with this tuple */
+					   node->ss.ss_currentScanDesc->rs_cbuf,	/* tuple's buffer */
 					   false);	/* don't pfree this pointer */
@@ -75,7 +79,10 @@ SampleNext(SampleScanState *node)
 static bool
 SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
-	/* No need to recheck for SampleScan */
+	/*
+	 * No need to recheck for SampleScan, since like SeqScan we don't pass any
+	 * checkable keys to heap_beginscan.
+	 */
 	return true;
@@ -103,8 +110,7 @@ ExecSampleScan(SampleScanState *node)
  * ----------------------------------------------------------------
 static void
-InitScanRelation(SampleScanState *node, EState *estate, int eflags,
-				 TableSampleClause *tablesample)
+InitScanRelation(SampleScanState *node, EState *estate, int eflags)
 	Relation	currentRelation;
@@ -113,19 +119,13 @@ InitScanRelation(SampleScanState *node, EState *estate, int eflags,
 	 * open that relation and acquire appropriate lock on it.
 	currentRelation = ExecOpenScanRelation(estate,
-								((SampleScan *) node->ss.ps.plan)->scanrelid,
+						   ((SampleScan *) node->ss.ps.plan)->scan.scanrelid,
 	node->ss.ss_currentRelation = currentRelation;
-	/*
-	 * Even though we aren't going to do a conventional seqscan, it is useful
-	 * to create a HeapScanDesc --- many of the fields in it are usable.
-	 */
-	node->ss.ss_currentScanDesc =
-		heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL,
-								tablesample->tsmseqscan,
-								tablesample->tsmpagemode);
+	/* we won't set up the HeapScanDesc till later */
+	node->ss.ss_currentScanDesc = NULL;
 	/* and report the scan tuple slot's rowtype */
 	ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
@@ -140,12 +140,11 @@ SampleScanState *
 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
 	SampleScanState *scanstate;
-	RangeTblEntry *rte = rt_fetch(node->scanrelid,
-								  estate->es_range_table);
+	TableSampleClause *tsc = node->tablesample;
+	TsmRoutine *tsm;
 	Assert(outerPlan(node) == NULL);
 	Assert(innerPlan(node) == NULL);
-	Assert(rte->tablesample != NULL);
 	 * create state structure
@@ -165,10 +164,17 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
 	 * initialize child expressions
 	scanstate->ss.ps.targetlist = (List *)
-		ExecInitExpr((Expr *) node->plan.targetlist,
+		ExecInitExpr((Expr *) node->scan.plan.targetlist,
 					 (PlanState *) scanstate);
 	scanstate->ss.ps.qual = (List *)
-		ExecInitExpr((Expr *) node->plan.qual,
+		ExecInitExpr((Expr *) node->scan.plan.qual,
+					 (PlanState *) scanstate);
+	scanstate->args = (List *)
+		ExecInitExpr((Expr *) tsc->args,
+					 (PlanState *) scanstate);
+	scanstate->repeatable =
+		ExecInitExpr(tsc->repeatable,
 					 (PlanState *) scanstate);
@@ -180,7 +186,7 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
 	 * initialize scan relation
-	InitScanRelation(scanstate, estate, eflags, rte->tablesample);
+	InitScanRelation(scanstate, estate, eflags);
 	scanstate->ss.ps.ps_TupFromTlist = false;
@@ -190,7 +196,25 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
-	scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample);
+	/*
+	 * If we don't have a REPEATABLE clause, select a random seed.  We want to
+	 * do this just once, since the seed shouldn't change over rescans.
+	 */
+	if (tsc->repeatable == NULL)
+		scanstate->seed = random();
+	/*
+	 * Finally, initialize the TABLESAMPLE method handler.
+	 */
+	tsm = GetTsmRoutine(tsc->tsmhandler);
+	scanstate->tsmroutine = tsm;
+	scanstate->tsm_state = NULL;
+	if (tsm->InitSampleScan)
+		tsm->InitSampleScan(scanstate, eflags);
+	/* We'll do BeginSampleScan later; we can't evaluate params yet */
+	scanstate->begun = false;
 	return scanstate;
@@ -207,7 +231,8 @@ ExecEndSampleScan(SampleScanState *node)
 	 * Tell sampling function that we finished the scan.
-	tablesample_end(node->tsdesc);
+	if (node->tsmroutine->EndSampleScan)
+		node->tsmroutine->EndSampleScan(node);
 	 * Free the exprcontext
@@ -223,7 +248,8 @@ ExecEndSampleScan(SampleScanState *node)
 	 * close heap scan
-	heap_endscan(node->ss.ss_currentScanDesc);
+	if (node->ss.ss_currentScanDesc)
+		heap_endscan(node->ss.ss_currentScanDesc);
 	 * close the heap relation.
@@ -231,11 +257,6 @@ ExecEndSampleScan(SampleScanState *node)
-/* ----------------------------------------------------------------
- *						Join Support
- * ----------------------------------------------------------------
- */
 /* ----------------------------------------------------------------
  *		ExecReScanSampleScan
@@ -246,12 +267,336 @@ ExecEndSampleScan(SampleScanState *node)
 ExecReScanSampleScan(SampleScanState *node)
-	heap_rescan(node->ss.ss_currentScanDesc, NULL);
+	/* Remember we need to do BeginSampleScan again (if we did it at all) */
+	node->begun = false;
+	ExecScanReScan(&node->ss);
+ * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
+ */
+static void
+tablesample_init(SampleScanState *scanstate)
+	TsmRoutine *tsm = scanstate->tsmroutine;
+	ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
+	Datum	   *params;
+	Datum		datum;
+	bool		isnull;
+	uint32		seed;
+	bool		allow_sync;
+	int			i;
+	ListCell   *arg;
+	params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
+	i = 0;
+	foreach(arg, scanstate->args)
+	{
+		ExprState  *argstate = (ExprState *) lfirst(arg);
+		params[i] = ExecEvalExprSwitchContext(argstate,
+											  econtext,
+											  &isnull,
+											  NULL);
+		if (isnull)
+			ereport(ERROR,
+					 errmsg("TABLESAMPLE parameter cannot be null")));
+		i++;
+	}
+	if (scanstate->repeatable)
+	{
+		datum = ExecEvalExprSwitchContext(scanstate->repeatable,
+										  econtext,
+										  &isnull,
+										  NULL);
+		if (isnull)
+			ereport(ERROR,
+				 errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
+		/*
+		 * The REPEATABLE parameter has been coerced to float8 by the parser.
+		 * The reason for using float8 at the SQL level is that it will
+		 * produce unsurprising results both for users used to databases that
+		 * accept only integers in the REPEATABLE clause and for those who
+		 * might expect that REPEATABLE works like setseed() (a float in the
+		 * range from -1 to 1).
+		 *
+		 * We use hashfloat8() to convert the supplied value into a suitable
+		 * seed.  For regression-testing purposes, that has the convenient
+		 * property that REPEATABLE(0) gives a machine-independent result.
+		 */
+		seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
+	}
+	else
+	{
+		/* Use the seed selected by ExecInitSampleScan */
+		seed = scanstate->seed;
+	}
+	/* Set default values for params that BeginSampleScan can adjust */
+	scanstate->use_bulkread = true;
+	scanstate->use_pagemode = true;
+	/* Let tablesample method do its thing */
+	tsm->BeginSampleScan(scanstate,
+						 params,
+						 list_length(scanstate->args),
+						 seed);
+	/* We'll use syncscan if there's no NextSampleBlock function */
+	allow_sync = (tsm->NextSampleBlock == NULL);
+	/* Now we can create or reset the HeapScanDesc */
+	if (scanstate->ss.ss_currentScanDesc == NULL)
+	{
+		scanstate->ss.ss_currentScanDesc =
+			heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
+									scanstate->ss.ps.state->es_snapshot,
+									0, NULL,
+									scanstate->use_bulkread,
+									allow_sync,
+									scanstate->use_pagemode);
+	}
+	else
+	{
+		heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
+							   scanstate->use_bulkread,
+							   allow_sync,
+							   scanstate->use_pagemode);
+	}
+	pfree(params);
+	/* And we're initialized. */
+	scanstate->begun = true;
+ * Get next tuple from TABLESAMPLE method.
+ *
+ * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
+ * perhaps be better to refactor to share more code.
+ */
+static HeapTuple
+tablesample_getnext(SampleScanState *scanstate)
+	TsmRoutine *tsm = scanstate->tsmroutine;
+	HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
+	HeapTuple	tuple = &(scan->rs_ctup);
+	Snapshot	snapshot = scan->rs_snapshot;
+	bool		pagemode = scan->rs_pageatatime;
+	BlockNumber blockno;
+	Page		page;
+	bool		all_visible;
+	OffsetNumber maxoffset;
+	if (!scan->rs_inited)
+	{
+		/*
+		 * return null immediately if relation is empty
+		 */
+		if (scan->rs_nblocks == 0)
+		{
+			Assert(!BufferIsValid(scan->rs_cbuf));
+			tuple->t_data = NULL;
+			return NULL;
+		}
+		if (tsm->NextSampleBlock)
+		{
+			blockno = tsm->NextSampleBlock(scanstate);
+			if (!BlockNumberIsValid(blockno))
+			{
+				tuple->t_data = NULL;
+				return NULL;
+			}
+		}
+		else
+			blockno = scan->rs_startblock;
+		Assert(blockno < scan->rs_nblocks);
+		heapgetpage(scan, blockno);
+		scan->rs_inited = true;
+	}
+	else
+	{
+		/* continue from previously returned page/tuple */
+		blockno = scan->rs_cblock;		/* current page */
+	}
-	 * Tell sampling function to reset its state for rescan.
+	 * When not using pagemode, we must lock the buffer during tuple
+	 * visibility checks.
-	tablesample_reset(node->tsdesc);
+	if (!pagemode)
+		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+	page = (Page) BufferGetPage(scan->rs_cbuf);
+	all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+	maxoffset = PageGetMaxOffsetNumber(page);
+	for (;;)
+	{
+		OffsetNumber tupoffset;
+		bool		finished;
+		/* Ask the tablesample method which tuples to check on this page. */
+		tupoffset = tsm->NextSampleTuple(scanstate,
+										 blockno,
+										 maxoffset);
+		if (OffsetNumberIsValid(tupoffset))
+		{
+			ItemId		itemid;
+			bool		visible;
+			/* Skip invalid tuple pointers. */
+			itemid = PageGetItemId(page, tupoffset);
+			if (!ItemIdIsNormal(itemid))
+				continue;
+			tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+			tuple->t_len = ItemIdGetLength(itemid);
+			ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+			if (all_visible)
+				visible = true;
+			else
+				visible = SampleTupleVisible(tuple, tupoffset, scan);
+			/* in pagemode, heapgetpage did this for us */
+			if (!pagemode)
+				CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
+												scan->rs_cbuf, snapshot);
+			if (visible)
+			{
+				/* Found visible tuple, return it. */
+				if (!pagemode)
+					LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+				break;
+			}
+			else
+			{
+				/* Try next tuple from same page. */
+				continue;
+			}
+		}
+		/*
+		 * if we get here, it means we've exhausted the items on this page and
+		 * it's time to move to the next.
+		 */
+		if (!pagemode)
+			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+		if (tsm->NextSampleBlock)
+		{
+			blockno = tsm->NextSampleBlock(scanstate);
+			Assert(!scan->rs_syncscan);
+			finished = !BlockNumberIsValid(blockno);
+		}
+		else
+		{
+			/* Without NextSampleBlock, just do a plain forward seqscan. */
+			blockno++;
+			if (blockno >= scan->rs_nblocks)
+				blockno = 0;
+			/*
+			 * Report our new scan position for synchronization purposes.
+			 *
+			 * Note: we do this before checking for end of scan so that the
+			 * final state of the position hint is back at the start of the
+			 * rel.  That's not strictly necessary, but otherwise when you run
+			 * the same query multiple times the starting position would shift
+			 * a little bit backwards on every invocation, which is confusing.
+			 * We don't guarantee any specific ordering in general, though.
+			 */
+			if (scan->rs_syncscan)
+				ss_report_location(scan->rs_rd, blockno);
+			finished = (blockno == scan->rs_startblock);
+		}
+		/*
+		 * Reached end of scan?
+		 */
+		if (finished)
+		{
+			if (BufferIsValid(scan->rs_cbuf))
+				ReleaseBuffer(scan->rs_cbuf);
+			scan->rs_cbuf = InvalidBuffer;
+			scan->rs_cblock = InvalidBlockNumber;
+			tuple->t_data = NULL;
+			scan->rs_inited = false;
+			return NULL;
+		}
+		Assert(blockno < scan->rs_nblocks);
+		heapgetpage(scan, blockno);
+		/* Re-establish state for new page */
+		if (!pagemode)
+			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+		page = (Page) BufferGetPage(scan->rs_cbuf);
+		all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+		maxoffset = PageGetMaxOffsetNumber(page);
+	}
+	/* Count successfully-fetched tuples as heap fetches */
+	pgstat_count_heap_getnext(scan->rs_rd);
+	return &(scan->rs_ctup);
-	ExecScanReScan(&node->ss);
+ * Check visibility of the tuple.
+ */
+static bool
+SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+	if (scan->rs_pageatatime)
+	{
+		/*
+		 * In pageatatime mode, heapgetpage() already did visibility checks,
+		 * so just look at the info it left in rs_vistuples[].
+		 *
+		 * We use a binary search over the known-sorted array.  Note: we could
+		 * save some effort if we insisted that NextSampleTuple select tuples
+		 * in increasing order, but it's not clear that there would be enough
+		 * gain to justify the restriction.
+		 */
+		int			start = 0,
+					end = scan->rs_ntuples - 1;
+		while (start <= end)
+		{
+			int			mid = (start + end) / 2;
+			OffsetNumber curoffset = scan->rs_vistuples[mid];
+			if (tupoffset == curoffset)
+				return true;
+			else if (tupoffset < curoffset)
+				end = mid - 1;
+			else
+				start = mid + 1;
+		}
+		return false;
+	}
+	else
+	{
+		/* Otherwise, we have to check the tuple individually. */
+		return HeapTupleSatisfiesVisibility(tuple,
+											scan->rs_snapshot,
+											scan->rs_cbuf);
+	}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 6a08c2db211b4e65a103b4aacf5e52c5f41b5adc..7248440ead363a0960b20a2f5b73f8662e4c85d0 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -359,6 +359,27 @@ _copySeqScan(const SeqScan *from)
 	return newnode;
+ * _copySampleScan
+ */
+static SampleScan *
+_copySampleScan(const SampleScan *from)
+	SampleScan *newnode = makeNode(SampleScan);
+	/*
+	 * copy node superclass fields
+	 */
+	CopyScanFields((const Scan *) from, (Scan *) newnode);
+	/*
+	 * copy remainder of node
+	 */
+	COPY_NODE_FIELD(tablesample);
+	return newnode;
  * _copyIndexScan
@@ -641,22 +662,6 @@ _copyCustomScan(const CustomScan *from)
 	return newnode;
- * _copySampleScan
- */
-static SampleScan *
-_copySampleScan(const SampleScan *from)
-	SampleScan *newnode = makeNode(SampleScan);
-	/*
-	 * copy node superclass fields
-	 */
-	CopyScanFields((const Scan *) from, (Scan *) newnode);
-	return newnode;
  * CopyJoinFields
@@ -2143,6 +2148,18 @@ _copyRangeTblFunction(const RangeTblFunction *from)
 	return newnode;
+static TableSampleClause *
+_copyTableSampleClause(const TableSampleClause *from)
+	TableSampleClause *newnode = makeNode(TableSampleClause);
+	COPY_SCALAR_FIELD(tsmhandler);
+	COPY_NODE_FIELD(repeatable);
+	return newnode;
 static WithCheckOption *
 _copyWithCheckOption(const WithCheckOption *from)
@@ -2271,40 +2288,6 @@ _copyCommonTableExpr(const CommonTableExpr *from)
 	return newnode;
-static RangeTableSample *
-_copyRangeTableSample(const RangeTableSample *from)
-	RangeTableSample *newnode = makeNode(RangeTableSample);
-	COPY_NODE_FIELD(relation);
-	COPY_NODE_FIELD(repeatable);
-	return newnode;
-static TableSampleClause *
-_copyTableSampleClause(const TableSampleClause *from)
-	TableSampleClause *newnode = makeNode(TableSampleClause);
-	COPY_SCALAR_FIELD(tsmseqscan);
-	COPY_SCALAR_FIELD(tsmpagemode);
-	COPY_SCALAR_FIELD(tsmnextblock);
-	COPY_SCALAR_FIELD(tsmnexttuple);
-	COPY_SCALAR_FIELD(tsmexaminetuple);
-	COPY_SCALAR_FIELD(tsmreset);
-	COPY_NODE_FIELD(repeatable);
-	return newnode;
 static A_Expr *
 _copyAExpr(const A_Expr *from)
@@ -2532,6 +2515,20 @@ _copyRangeFunction(const RangeFunction *from)
 	return newnode;
+static RangeTableSample *
+_copyRangeTableSample(const RangeTableSample *from)
+	RangeTableSample *newnode = makeNode(RangeTableSample);
+	COPY_NODE_FIELD(relation);
+	COPY_NODE_FIELD(method);
+	COPY_NODE_FIELD(repeatable);
+	return newnode;
 static TypeCast *
 _copyTypeCast(const TypeCast *from)
@@ -4237,6 +4234,9 @@ copyObject(const void *from)
 		case T_SeqScan:
 			retval = _copySeqScan(from);
+		case T_SampleScan:
+			retval = _copySampleScan(from);
+			break;
 		case T_IndexScan:
 			retval = _copyIndexScan(from);
@@ -4273,9 +4273,6 @@ copyObject(const void *from)
 		case T_CustomScan:
 			retval = _copyCustomScan(from);
-		case T_SampleScan:
-			retval = _copySampleScan(from);
-			break;
 		case T_Join:
 			retval = _copyJoin(from);
@@ -4897,6 +4894,9 @@ copyObject(const void *from)
 		case T_RangeFunction:
 			retval = _copyRangeFunction(from);
+		case T_RangeTableSample:
+			retval = _copyRangeTableSample(from);
+			break;
 		case T_TypeName:
 			retval = _copyTypeName(from);
@@ -4921,6 +4921,9 @@ copyObject(const void *from)
 		case T_RangeTblFunction:
 			retval = _copyRangeTblFunction(from);
+		case T_TableSampleClause:
+			retval = _copyTableSampleClause(from);
+			break;
 		case T_WithCheckOption:
 			retval = _copyWithCheckOption(from);
@@ -4948,12 +4951,6 @@ copyObject(const void *from)
 		case T_CommonTableExpr:
 			retval = _copyCommonTableExpr(from);
-		case T_RangeTableSample:
-			retval = _copyRangeTableSample(from);
-			break;
-		case T_TableSampleClause:
-			retval = _copyTableSampleClause(from);
-			break;
 		case T_FuncWithArgs:
 			retval = _copyFuncWithArgs(from);
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index faf5eedab4ed4b7412970b82621d6a42704d008c..6597dbc33e12f9d7e942eb7fa4ca72a566475989 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2290,6 +2290,18 @@ _equalRangeFunction(const RangeFunction *a, const RangeFunction *b)
 	return true;
+static bool
+_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
+	COMPARE_NODE_FIELD(repeatable);
+	return true;
 static bool
 _equalIndexElem(const IndexElem *a, const IndexElem *b)
@@ -2428,6 +2440,16 @@ _equalRangeTblFunction(const RangeTblFunction *a, const RangeTblFunction *b)
 	return true;
+static bool
+_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
+	COMPARE_NODE_FIELD(repeatable);
+	return true;
 static bool
 _equalWithCheckOption(const WithCheckOption *a, const WithCheckOption *b)
@@ -2538,36 +2560,6 @@ _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
 	return true;
-static bool
-_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
-	COMPARE_NODE_FIELD(repeatable);
-	return true;
-static bool
-_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
-	COMPARE_SCALAR_FIELD(tsmpagemode);
-	COMPARE_SCALAR_FIELD(tsmnextblock);
-	COMPARE_SCALAR_FIELD(tsmnexttuple);
-	COMPARE_SCALAR_FIELD(tsmexaminetuple);
-	COMPARE_NODE_FIELD(repeatable);
-	return true;
 static bool
 _equalXmlSerialize(const XmlSerialize *a, const XmlSerialize *b)
@@ -3260,6 +3252,9 @@ equal(const void *a, const void *b)
 		case T_RangeFunction:
 			retval = _equalRangeFunction(a, b);
+		case T_RangeTableSample:
+			retval = _equalRangeTableSample(a, b);
+			break;
 		case T_TypeName:
 			retval = _equalTypeName(a, b);
@@ -3284,6 +3279,9 @@ equal(const void *a, const void *b)
 		case T_RangeTblFunction:
 			retval = _equalRangeTblFunction(a, b);
+		case T_TableSampleClause:
+			retval = _equalTableSampleClause(a, b);
+			break;
 		case T_WithCheckOption:
 			retval = _equalWithCheckOption(a, b);
@@ -3311,12 +3309,6 @@ equal(const void *a, const void *b)
 		case T_CommonTableExpr:
 			retval = _equalCommonTableExpr(a, b);
-		case T_RangeTableSample:
-			retval = _equalRangeTableSample(a, b);
-			break;
-		case T_TableSampleClause:
-			retval = _equalTableSampleClause(a, b);
-			break;
 		case T_FuncWithArgs:
 			retval = _equalFuncWithArgs(a, b);
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index b1e3e6e489320086dce3500b1418178095e99714..c517dfd9d69c6264ecdd0c4904b8b8337ccea099 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -1486,6 +1486,9 @@ exprLocation(const Node *expr)
 		case T_WindowDef:
 			loc = ((const WindowDef *) expr)->location;
+		case T_RangeTableSample:
+			loc = ((const RangeTableSample *) expr)->location;
+			break;
 		case T_TypeName:
 			loc = ((const TypeName *) expr)->location;
@@ -1995,6 +1998,17 @@ expression_tree_walker(Node *node,
 			return walker(((PlaceHolderInfo *) node)->ph_var, context);
 		case T_RangeTblFunction:
 			return walker(((RangeTblFunction *) node)->funcexpr, context);
+		case T_TableSampleClause:
+			{
+				TableSampleClause *tsc = (TableSampleClause *) node;
+				if (expression_tree_walker((Node *) tsc->args,
+										   walker, context))
+					return true;
+				if (walker((Node *) tsc->repeatable, context))
+					return true;
+			}
+			break;
 			elog(ERROR, "unrecognized node type: %d",
 				 (int) nodeTag(node));
@@ -2082,13 +2096,8 @@ range_table_walker(List *rtable,
 		switch (rte->rtekind)
 			case RTE_RELATION:
-				if (rte->tablesample)
-				{
-					if (walker(rte->tablesample->args, context))
-						return true;
-					if (walker(rte->tablesample->repeatable, context))
-						return true;
-				}
+				if (walker(rte->tablesample, context))
+					return true;
 			case RTE_CTE:
 				/* nothing to do */
@@ -2782,6 +2791,17 @@ expression_tree_mutator(Node *node,
 				return (Node *) newnode;
+		case T_TableSampleClause:
+			{
+				TableSampleClause *tsc = (TableSampleClause *) node;
+				TableSampleClause *newnode;
+				FLATCOPY(newnode, tsc, TableSampleClause);
+				MUTATE(newnode->args, tsc->args, List *);
+				MUTATE(newnode->repeatable, tsc->repeatable, Expr *);
+				return (Node *) newnode;
+			}
+			break;
 			elog(ERROR, "unrecognized node type: %d",
 				 (int) nodeTag(node));
@@ -2868,20 +2888,12 @@ range_table_mutator(List *rtable,
 		switch (rte->rtekind)
 			case RTE_RELATION:
-				if (rte->tablesample)
-				{
-					CHECKFLATCOPY(newrte->tablesample, rte->tablesample,
-								  TableSampleClause);
-					MUTATE(newrte->tablesample->args,
-						   newrte->tablesample->args,
-						   List *);
-					MUTATE(newrte->tablesample->repeatable,
-						   newrte->tablesample->repeatable,
-						   Node *);
-				}
+				MUTATE(newrte->tablesample, rte->tablesample,
+					   TableSampleClause *);
+				/* we don't bother to copy eref, aliases, etc; OK? */
 			case RTE_CTE:
-				/* we don't bother to copy eref, aliases, etc; OK? */
+				/* nothing to do */
 			case RTE_SUBQUERY:
 				if (!(flags & QTW_IGNORE_RT_SUBQUERIES))
@@ -3316,6 +3328,19 @@ raw_expression_tree_walker(Node *node,
 					return true;
+		case T_RangeTableSample:
+			{
+				RangeTableSample *rts = (RangeTableSample *) node;
+				if (walker(rts->relation, context))
+					return true;
+				/* method name is deemed uninteresting */
+				if (walker(rts->args, context))
+					return true;
+				if (walker(rts->repeatable, context))
+					return true;
+			}
+			break;
 		case T_TypeName:
 				TypeName   *tn = (TypeName *) node;
@@ -3380,18 +3405,6 @@ raw_expression_tree_walker(Node *node,
 		case T_CommonTableExpr:
 			return walker(((CommonTableExpr *) node)->ctequery, context);
-		case T_RangeTableSample:
-			{
-				RangeTableSample *rts = (RangeTableSample *) node;
-				if (walker(rts->relation, context))
-					return true;
-				if (walker(rts->repeatable, context))
-					return true;
-				if (walker(rts->args, context))
-					return true;
-			}
-			break;
 			elog(ERROR, "unrecognized node type: %d",
 				 (int) nodeTag(node));
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 87304ba9bf65df548c5361bcf33eb7f45aaa0c83..81725d6e59a20d2e2dfc9efea995202e843afb7b 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -444,6 +444,16 @@ _outSeqScan(StringInfo str, const SeqScan *node)
 	_outScanInfo(str, (const Scan *) node);
+static void
+_outSampleScan(StringInfo str, const SampleScan *node)
+	_outScanInfo(str, (const Scan *) node);
+	WRITE_NODE_FIELD(tablesample);
 static void
 _outIndexScan(StringInfo str, const IndexScan *node)
@@ -591,14 +601,6 @@ _outCustomScan(StringInfo str, const CustomScan *node)
 		node->methods->TextOutCustomScan(str, node);
-static void
-_outSampleScan(StringInfo str, const SampleScan *node)
-	_outScanInfo(str, (const Scan *) node);
 static void
 _outJoin(StringInfo str, const Join *node)
@@ -2478,36 +2480,6 @@ _outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
-static void
-_outRangeTableSample(StringInfo str, const RangeTableSample *node)
-	WRITE_NODE_FIELD(relation);
-	WRITE_NODE_FIELD(repeatable);
-static void
-_outTableSampleClause(StringInfo str, const TableSampleClause *node)
-	WRITE_BOOL_FIELD(tsmseqscan);
-	WRITE_BOOL_FIELD(tsmpagemode);
-	WRITE_OID_FIELD(tsminit);
-	WRITE_OID_FIELD(tsmnextblock);
-	WRITE_OID_FIELD(tsmnexttuple);
-	WRITE_OID_FIELD(tsmexaminetuple);
-	WRITE_OID_FIELD(tsmend);
-	WRITE_OID_FIELD(tsmreset);
-	WRITE_OID_FIELD(tsmcost);
-	WRITE_NODE_FIELD(repeatable);
 static void
 _outSetOperationStmt(StringInfo str, const SetOperationStmt *node)
@@ -2594,6 +2566,16 @@ _outRangeTblFunction(StringInfo str, const RangeTblFunction *node)
+static void
+_outTableSampleClause(StringInfo str, const TableSampleClause *node)
+	WRITE_OID_FIELD(tsmhandler);
+	WRITE_NODE_FIELD(repeatable);
 static void
 _outAExpr(StringInfo str, const A_Expr *node)
@@ -2845,6 +2827,18 @@ _outRangeFunction(StringInfo str, const RangeFunction *node)
+static void
+_outRangeTableSample(StringInfo str, const RangeTableSample *node)
+	WRITE_NODE_FIELD(relation);
+	WRITE_NODE_FIELD(repeatable);
 static void
 _outConstraint(StringInfo str, const Constraint *node)
@@ -3002,6 +2996,9 @@ _outNode(StringInfo str, const void *obj)
 			case T_SeqScan:
 				_outSeqScan(str, obj);
+			case T_SampleScan:
+				_outSampleScan(str, obj);
+				break;
 			case T_IndexScan:
 				_outIndexScan(str, obj);
@@ -3038,9 +3035,6 @@ _outNode(StringInfo str, const void *obj)
 			case T_CustomScan:
 				_outCustomScan(str, obj);
-			case T_SampleScan:
-				_outSampleScan(str, obj);
-				break;
 			case T_Join:
 				_outJoin(str, obj);
@@ -3393,12 +3387,6 @@ _outNode(StringInfo str, const void *obj)
 			case T_CommonTableExpr:
 				_outCommonTableExpr(str, obj);
-			case T_RangeTableSample:
-				_outRangeTableSample(str, obj);
-				break;
-			case T_TableSampleClause:
-				_outTableSampleClause(str, obj);
-				break;
 			case T_SetOperationStmt:
 				_outSetOperationStmt(str, obj);
@@ -3408,6 +3396,9 @@ _outNode(StringInfo str, const void *obj)
 			case T_RangeTblFunction:
 				_outRangeTblFunction(str, obj);
+			case T_TableSampleClause:
+				_outTableSampleClause(str, obj);
+				break;
 			case T_A_Expr:
 				_outAExpr(str, obj);
@@ -3450,6 +3441,9 @@ _outNode(StringInfo str, const void *obj)
 			case T_RangeFunction:
 				_outRangeFunction(str, obj);
+			case T_RangeTableSample:
+				_outRangeTableSample(str, obj);
+				break;
 			case T_Constraint:
 				_outConstraint(str, obj);
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index f5a40fbfb44b8d648a9aa32c1089055c4d3c70a6..71be840eac9f76a44dfbf258fcec629cdd2268d7 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -367,46 +367,6 @@ _readCommonTableExpr(void)
- * _readRangeTableSample
- */
-static RangeTableSample *
-	READ_LOCALS(RangeTableSample);
-	READ_NODE_FIELD(relation);
-	READ_NODE_FIELD(repeatable);
- * _readTableSampleClause
- */
-static TableSampleClause *
-	READ_LOCALS(TableSampleClause);
-	READ_OID_FIELD(tsmid);
-	READ_BOOL_FIELD(tsmseqscan);
-	READ_BOOL_FIELD(tsmpagemode);
-	READ_OID_FIELD(tsminit);
-	READ_OID_FIELD(tsmnextblock);
-	READ_OID_FIELD(tsmnexttuple);
-	READ_OID_FIELD(tsmexaminetuple);
-	READ_OID_FIELD(tsmend);
-	READ_OID_FIELD(tsmreset);
-	READ_OID_FIELD(tsmcost);
-	READ_NODE_FIELD(repeatable);
  * _readSetOperationStmt
@@ -1391,6 +1351,21 @@ _readRangeTblFunction(void)
+ * _readTableSampleClause
+ */
+static TableSampleClause *
+	READ_LOCALS(TableSampleClause);
+	READ_OID_FIELD(tsmhandler);
+	READ_NODE_FIELD(repeatable);
  * parseNodeString
@@ -1426,10 +1401,6 @@ parseNodeString(void)
 		return_value = _readRowMarkClause();
 	else if (MATCH("COMMONTABLEEXPR", 15))
 		return_value = _readCommonTableExpr();
-		return_value = _readRangeTableSample();
-		return_value = _readTableSampleClause();
 		return_value = _readSetOperationStmt();
 	else if (MATCH("ALIAS", 5))
@@ -1528,6 +1499,8 @@ parseNodeString(void)
 		return_value = _readRangeTblEntry();
 		return_value = _readRangeTblFunction();
+		return_value = _readTableSampleClause();
 	else if (MATCH("NOTIFY", 6))
 		return_value = _readNotifyStmt();
 	else if (MATCH("DECLARECURSOR", 13))
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 888eeac5151842a285fb16c94da8f338ce89567b..1590be116750846b8957fe8a9ae1ed03b89d6917 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -18,6 +18,7 @@
 #include <math.h>
 #include "access/sysattr.h"
+#include "access/tsmapi.h"
 #include "catalog/pg_class.h"
 #include "catalog/pg_operator.h"
 #include "foreign/fdwapi.h"
@@ -390,7 +391,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 				else if (rte->tablesample != NULL)
-					/* Build sample scan on relation */
+					/* Sampled relation */
 					set_tablesample_rel_pathlist(root, rel, rte);
@@ -480,11 +481,40 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  * set_tablesample_rel_size
- *	  Set size estimates for a sampled relation.
+ *	  Set size estimates for a sampled relation
 static void
 set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+	TableSampleClause *tsc = rte->tablesample;
+	TsmRoutine *tsm;
+	BlockNumber pages;
+	double		tuples;
+	/*
+	 * Test any partial indexes of rel for applicability.  We must do this
+	 * first since partial unique indexes can affect size estimates.
+	 */
+	check_partial_indexes(root, rel);
+	/*
+	 * Call the sampling method's estimation function to estimate the number
+	 * of pages it will read and the number of tuples it will return.  (Note:
+	 * we assume the function returns sane values.)
+	 */
+	tsm = GetTsmRoutine(tsc->tsmhandler);
+	tsm->SampleScanGetSampleSize(root, rel, tsc->args,
+								 &pages, &tuples);
+	/*
+	 * For the moment, because we will only consider a SampleScan path for the
+	 * rel, it's okay to just overwrite the pages and tuples estimates for the
+	 * whole relation.  If we ever consider multiple path types for sampled
+	 * rels, we'll need more complication.
+	 */
+	rel->pages = pages;
+	rel->tuples = tuples;
 	/* Mark rel with estimated output rows, width, etc */
 	set_baserel_size_estimates(root, rel);
@@ -492,8 +522,6 @@ set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  * set_tablesample_rel_pathlist
  *	  Build access paths for a sampled relation
- *
- * There is only one possible path - sampling scan
 static void
 set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
@@ -502,15 +530,41 @@ set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *
 	Path	   *path;
-	 * We don't support pushing join clauses into the quals of a seqscan, but
-	 * it could still have required parameterization due to LATERAL refs in
-	 * its tlist.
+	 * We don't support pushing join clauses into the quals of a samplescan,
+	 * but it could still have required parameterization due to LATERAL refs
+	 * in its tlist or TABLESAMPLE arguments.
 	required_outer = rel->lateral_relids;
-	/* We only do sample scan if it was requested */
+	/* Consider sampled scan */
 	path = create_samplescan_path(root, rel, required_outer);
-	rel->pathlist = list_make1(path);
+	/*
+	 * If the sampling method does not support repeatable scans, we must avoid
+	 * plans that would scan the rel multiple times.  Ideally, we'd simply
+	 * avoid putting the rel on the inside of a nestloop join; but adding such
+	 * a consideration to the planner seems like a great deal of complication
+	 * to support an uncommon usage of second-rate sampling methods.  Instead,
+	 * if there is a risk that the query might perform an unsafe join, just
+	 * wrap the SampleScan in a Materialize node.  We can check for joins by
+	 * counting the membership of all_baserels (note that this correctly
+	 * counts inheritance trees as single rels).  If we're inside a subquery,
+	 * we can't easily check whether a join might occur in the outer query, so
+	 * just assume one is possible.
+	 *
+	 * GetTsmRoutine is relatively expensive compared to the other tests here,
+	 * so check repeatable_across_scans last, even though that's a bit odd.
+	 */
+	if ((root->query_level > 1 ||
+		 bms_membership(root->all_baserels) != BMS_SINGLETON) &&
+	 !(GetTsmRoutine(rte->tablesample->tsmhandler)->repeatable_across_scans))
+	{
+		path = (Path *) create_material_path(rel, path);
+	}
+	add_path(rel, path);
+	/* For the moment, at least, there are no other paths to consider */
@@ -2450,7 +2504,33 @@ print_path(PlannerInfo *root, Path *path, int indent)
 	switch (nodeTag(path))
 		case T_Path:
-			ptype = "SeqScan";
+			switch (path->pathtype)
+			{
+				case T_SeqScan:
+					ptype = "SeqScan";
+					break;
+				case T_SampleScan:
+					ptype = "SampleScan";
+					break;
+				case T_SubqueryScan:
+					ptype = "SubqueryScan";
+					break;
+				case T_FunctionScan:
+					ptype = "FunctionScan";
+					break;
+				case T_ValuesScan:
+					ptype = "ValuesScan";
+					break;
+				case T_CteScan:
+					ptype = "CteScan";
+					break;
+				case T_WorkTableScan:
+					ptype = "WorkTableScan";
+					break;
+				default:
+					ptype = "???Path";
+					break;
+			}
 		case T_IndexPath:
 			ptype = "IdxScan";
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 0d302f66bee4c478dc4cc99729c79a75af727982..7069f6041102e6cb995316a3d951fe61adc0d367 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -74,6 +74,7 @@
 #include <math.h>
 #include "access/htup_details.h"
+#include "access/tsmapi.h"
 #include "executor/executor.h"
 #include "executor/nodeHash.h"
 #include "miscadmin.h"
@@ -223,64 +224,66 @@ cost_seqscan(Path *path, PlannerInfo *root,
  * cost_samplescan
  *	  Determines and returns the cost of scanning a relation using sampling.
- * From planner/optimizer perspective, we don't care all that much about cost
- * itself since there is always only one scan path to consider when sampling
- * scan is present, but number of rows estimation is still important.
- *
  * 'baserel' is the relation to be scanned
  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
-cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+cost_samplescan(Path *path, PlannerInfo *root,
+				RelOptInfo *baserel, ParamPathInfo *param_info)
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
+	RangeTblEntry *rte;
+	TableSampleClause *tsc;
+	TsmRoutine *tsm;
 	double		spc_seq_page_cost,
 	QualCost	qpqual_cost;
 	Cost		cpu_per_tuple;
-	BlockNumber pages;
-	double		tuples;
-	RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
-	TableSampleClause *tablesample = rte->tablesample;
-	/* Should only be applied to base relations */
+	/* Should only be applied to base relations with tablesample clauses */
 	Assert(baserel->relid > 0);
-	Assert(baserel->rtekind == RTE_RELATION);
+	rte = planner_rt_fetch(baserel->relid, root);
+	Assert(rte->rtekind == RTE_RELATION);
+	tsc = rte->tablesample;
+	Assert(tsc != NULL);
+	tsm = GetTsmRoutine(tsc->tsmhandler);
 	/* Mark the path with the correct row estimate */
-	if (path->param_info)
-		path->rows = path->param_info->ppi_rows;
+	if (param_info)
+		path->rows = param_info->ppi_rows;
 		path->rows = baserel->rows;
-	/* Call the sampling method's costing function. */
-	OidFunctionCall6(tablesample->tsmcost, PointerGetDatum(root),
-					 PointerGetDatum(path), PointerGetDatum(baserel),
-					 PointerGetDatum(tablesample->args),
-					 PointerGetDatum(&pages), PointerGetDatum(&tuples));
 	/* fetch estimated page cost for tablespace containing table */
-	spc_page_cost = tablesample->tsmseqscan ? spc_seq_page_cost :
-		spc_random_page_cost;
+	/* if NextSampleBlock is used, assume random access, else sequential */
+	spc_page_cost = (tsm->NextSampleBlock != NULL) ?
+		spc_random_page_cost : spc_seq_page_cost;
-	 * disk costs
+	 * disk costs (recall that baserel->pages has already been set to the
+	 * number of pages the sampling method will visit)
-	run_cost += spc_page_cost * pages;
+	run_cost += spc_page_cost * baserel->pages;
-	/* CPU costs */
-	get_restriction_qual_cost(root, baserel, path->param_info, &qpqual_cost);
+	/*
+	 * CPU costs (recall that baserel->tuples has already been set to the
+	 * number of tuples the sampling method will select).  Note that we ignore
+	 * execution cost of the TABLESAMPLE parameter expressions; they will be
+	 * evaluated only once per scan, and in most usages they'll likely be
+	 * simple constants anyway.  We also don't charge anything for the
+	 * calculations the sampling method might do internally.
+	 */
+	get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
 	startup_cost += qpqual_cost.startup;
 	cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
-	run_cost += cpu_per_tuple * tuples;
+	run_cost += cpu_per_tuple * baserel->tuples;
 	path->startup_cost = startup_cost;
 	path->total_cost = startup_cost + run_cost;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8d15c8ede90f9be93dec263ce61a0eb20dea5e54..f461586e08c5b3a2711eb55c003c26d2907388c7 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -102,7 +102,8 @@ static List *order_qual_clauses(PlannerInfo *root, List *clauses);
 static void copy_path_costsize(Plan *dest, Path *src);
 static void copy_plan_costsize(Plan *dest, Plan *src);
 static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
-static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid);
+static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid,
+				TableSampleClause *tsc);
 static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
 			   Oid indexid, List *indexqual, List *indexqualorig,
 			   List *indexorderby, List *indexorderbyorig,
@@ -1148,7 +1149,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path,
  * create_samplescan_plan
- *	 Returns a samplecan plan for the base relation scanned by 'best_path'
+ *	 Returns a samplescan plan for the base relation scanned by 'best_path'
  *	 with restriction clauses 'scan_clauses' and targetlist 'tlist'.
 static SampleScan *
@@ -1157,11 +1158,15 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path,
 	SampleScan *scan_plan;
 	Index		scan_relid = best_path->parent->relid;
+	RangeTblEntry *rte;
+	TableSampleClause *tsc;
-	/* it should be a base rel with tablesample clause... */
+	/* it should be a base rel with a tablesample clause... */
 	Assert(scan_relid > 0);
-	Assert(best_path->parent->rtekind == RTE_RELATION);
-	Assert(best_path->pathtype == T_SampleScan);
+	rte = planner_rt_fetch(scan_relid, root);
+	Assert(rte->rtekind == RTE_RELATION);
+	tsc = rte->tablesample;
+	Assert(tsc != NULL);
 	/* Sort clauses into best execution order */
 	scan_clauses = order_qual_clauses(root, scan_clauses);
@@ -1174,13 +1179,16 @@ create_samplescan_plan(PlannerInfo *root, Path *best_path,
 		scan_clauses = (List *)
 			replace_nestloop_params(root, (Node *) scan_clauses);
+		tsc = (TableSampleClause *)
+			replace_nestloop_params(root, (Node *) tsc);
 	scan_plan = make_samplescan(tlist,
-								scan_relid);
+								scan_relid,
+								tsc);
-	copy_path_costsize(&scan_plan->plan, best_path);
+	copy_path_costsize(&scan_plan->scan.plan, best_path);
 	return scan_plan;
@@ -2161,9 +2169,9 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path,
 	ListCell   *lc;
 	/* Recursively transform child paths. */
-	foreach (lc, best_path->custom_paths)
+	foreach(lc, best_path->custom_paths)
-		Plan   *plan = create_plan_recurse(root, (Path *) lfirst(lc));
+		Plan	   *plan = create_plan_recurse(root, (Path *) lfirst(lc));
 		custom_plans = lappend(custom_plans, plan);
@@ -3437,17 +3445,19 @@ make_seqscan(List *qptlist,
 static SampleScan *
 make_samplescan(List *qptlist,
 				List *qpqual,
-				Index scanrelid)
+				Index scanrelid,
+				TableSampleClause *tsc)
 	SampleScan *node = makeNode(SampleScan);
-	Plan	   *plan = &node->plan;
+	Plan	   *plan = &node->scan.plan;
 	/* cost should be inserted by caller */
 	plan->targetlist = qptlist;
 	plan->qual = qpqual;
 	plan->lefttree = NULL;
 	plan->righttree = NULL;
-	node->scanrelid = scanrelid;
+	node->scan.scanrelid = scanrelid;
+	node->tablesample = tsc;
 	return node;
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 00b2625d342ee375e884bf30945e4b29230a2118..701b99254db0d1745f3c2965bb8e445f58d4a45b 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -306,7 +306,9 @@ extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex)
 	/* Fetch the appropriate variables */
-	if (rte->rtekind == RTE_SUBQUERY)
+	if (rte->rtekind == RTE_RELATION)
+		vars = pull_vars_of_level((Node *) rte->tablesample, 0);
+	else if (rte->rtekind == RTE_SUBQUERY)
 		vars = pull_vars_of_level((Node *) rte->subquery, 1);
 	else if (rte->rtekind == RTE_FUNCTION)
 		vars = pull_vars_of_level((Node *) rte->functions, 0);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a6ce96efc48623c187233a1b04e45a64f8eeeae0..b95cc95e5d9a201949d89d713e0cfa77be6a1a22 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -505,14 +505,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 		if (rte->rtekind == RTE_RELATION)
 			if (rte->tablesample)
-			{
-				rte->tablesample->args = (List *)
-					preprocess_expression(root, (Node *) rte->tablesample->args,
-				rte->tablesample->repeatable = (Node *)
-					preprocess_expression(root, rte->tablesample->repeatable,
+				rte->tablesample = (TableSampleClause *)
+					preprocess_expression(root,
+										  (Node *) rte->tablesample,
-			}
 		else if (rte->rtekind == RTE_SUBQUERY)
@@ -697,11 +693,14 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
 	 * If the query has any join RTEs, replace join alias variables with
 	 * base-relation variables.  We must do this before sublink processing,
 	 * else sublinks expanded out from join aliases would not get processed.
-	 * We can skip it in non-lateral RTE functions and VALUES lists, however,
-	 * since they can't contain any Vars of the current query level.
+	 * We can skip it in non-lateral RTE functions, VALUES lists, and
+	 * TABLESAMPLE clauses, however, since they can't contain any Vars of the
+	 * current query level.
 	if (root->hasJoinRTEs &&
-		!(kind == EXPRKIND_RTFUNC || kind == EXPRKIND_VALUES))
+		!(kind == EXPRKIND_RTFUNC ||
+		  kind == EXPRKIND_VALUES ||
 		expr = flatten_join_alias_vars(root, expr);
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 258e541754aa165612ff41eb141fc2bc1db9198b..ea185d4b4cff6b98cb1da5a709b376595eb6d652 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -372,9 +372,8 @@ flatten_rtes_walker(Node *node, PlannerGlobal *glob)
  * In the flat rangetable, we zero out substructure pointers that are not
  * needed by the executor; this reduces the storage space and copying cost
- * for cached plans.  We keep only the tablesample field (which we'd otherwise
- * have to put in the plan tree, anyway); the ctename, alias and eref Alias
- * fields, which are needed by EXPLAIN; and the selectedCols, insertedCols and
+ * for cached plans.  We keep only the ctename, alias and eref Alias fields,
+ * which are needed by EXPLAIN, and the selectedCols, insertedCols and
  * updatedCols bitmaps, which are needed for executor-startup permissions
  * checking and for trigger event checking.
@@ -388,6 +387,7 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, RangeTblEntry *rte)
 	memcpy(newrte, rte, sizeof(RangeTblEntry));
 	/* zap unneeded sub-structure */
+	newrte->tablesample = NULL;
 	newrte->subquery = NULL;
 	newrte->joinaliasvars = NIL;
 	newrte->functions = NIL;
@@ -456,11 +456,13 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
 				SampleScan *splan = (SampleScan *) plan;
-				splan->scanrelid += rtoffset;
-				splan->plan.targetlist =
-					fix_scan_list(root, splan->plan.targetlist, rtoffset);
-				splan->plan.qual =
-					fix_scan_list(root, splan->plan.qual, rtoffset);
+				splan->scan.scanrelid += rtoffset;
+				splan->scan.plan.targetlist =
+					fix_scan_list(root, splan->scan.plan.targetlist, rtoffset);
+				splan->scan.plan.qual =
+					fix_scan_list(root, splan->scan.plan.qual, rtoffset);
+				splan->tablesample = (TableSampleClause *)
+					fix_scan_expr(root, (Node *) splan->tablesample, rtoffset);
 		case T_IndexScan:
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 4708b87f330b6145505afeac4a0be5eb00d441cb..f3038cdffda3ad9467935327df6c1cf7913798f1 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2216,7 +2216,12 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
 		case T_SeqScan:
+			context.paramids = bms_add_members(context.paramids, scan_params);
+			break;
 		case T_SampleScan:
+			finalize_primnode((Node *) ((SampleScan *) plan)->tablesample,
+							  &context);
 			context.paramids = bms_add_members(context.paramids, scan_params);
@@ -2384,7 +2389,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
 					bms_add_members(context.paramids, scan_params);
 				/* child nodes if any */
-				foreach (lc, cscan->custom_plans)
+				foreach(lc, cscan->custom_plans)
 					context.paramids =
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 92b0562843458b403517d2c008c9db5cd26a1f79..34144ccaf0fa69161541bf5785d9d20d9a162cda 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1091,12 +1091,15 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 			switch (child_rte->rtekind)
+				case RTE_RELATION:
+					if (child_rte->tablesample)
+						child_rte->lateral = true;
+					break;
 				case RTE_SUBQUERY:
 				case RTE_FUNCTION:
 				case RTE_VALUES:
 					child_rte->lateral = true;
-				case RTE_RELATION:
 				case RTE_JOIN:
 				case RTE_CTE:
 					/* these can't contain any lateral references */
@@ -1909,6 +1912,13 @@ replace_vars_in_jointree(Node *jtnode,
 				switch (rte->rtekind)
+					case RTE_RELATION:
+						/* shouldn't be marked LATERAL unless tablesample */
+						Assert(rte->tablesample);
+						rte->tablesample = (TableSampleClause *)
+							pullup_replace_vars((Node *) rte->tablesample,
+												context);
+						break;
 					case RTE_SUBQUERY:
 						rte->subquery =
@@ -1924,7 +1934,6 @@ replace_vars_in_jointree(Node *jtnode,
 							pullup_replace_vars((Node *) rte->values_lists,
-					case RTE_RELATION:
 					case RTE_JOIN:
 					case RTE_CTE:
 						/* these shouldn't be marked LATERAL */
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index f7f33bbe7721b6a01865f1deaf2f25d0d6d96a96..935bc2b9667d33e7e8ddc9a7469b42ea0a2c0fdf 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -713,7 +713,7 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
  * create_samplescan_path
- *	  Like seqscan but uses sampling function while scanning.
+ *	  Creates a path node for a sampled table scan.
 Path *
 create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
@@ -726,7 +726,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer
 	pathnode->pathkeys = NIL;	/* samplescan has unordered result */
-	cost_samplescan(pathnode, root, rel);
+	cost_samplescan(pathnode, root, rel, pathnode->param_info);
 	return pathnode;
@@ -1773,6 +1773,8 @@ reparameterize_path(PlannerInfo *root, Path *path,
 		case T_SeqScan:
 			return create_seqscan_path(root, rel, required_outer);
+		case T_SampleScan:
+			return (Path *) create_samplescan_path(root, rel, required_outer);
 		case T_IndexScan:
 		case T_IndexOnlyScan:
@@ -1805,8 +1807,6 @@ reparameterize_path(PlannerInfo *root, Path *path,
 		case T_SubqueryScan:
 			return create_subqueryscan_path(root, rel, path->pathkeys,
-		case T_SampleScan:
-			return (Path *) create_samplescan_path(root, rel, required_outer);
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 2b02a2e523380cf2a12d2171c63b4ed887cb7285..8f053e47e82df8aebb228138691dc131f61805f8 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -457,8 +457,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <jexpr>	joined_table
 %type <range>	relation_expr
 %type <range>	relation_expr_opt_alias
+%type <node>	tablesample_clause opt_repeatable_clause
 %type <target>	target_el single_set_clause set_target insert_column_item
-%type <node>	relation_expr_tablesample tablesample_clause opt_repeatable_clause
 %type <str>		generic_option_name
 %type <node>	generic_option_arg
@@ -10491,9 +10491,13 @@ table_ref:	relation_expr opt_alias_clause
 					$1->alias = $2;
 					$$ = (Node *) $1;
-			| relation_expr_tablesample
+			| relation_expr opt_alias_clause tablesample_clause
-					$$ = (Node *) $1;
+					RangeTableSample *n = (RangeTableSample *) $3;
+					$1->alias = $2;
+					/* relation_expr goes inside the RangeTableSample node */
+					n->relation = (Node *) $1;
+					$$ = (Node *) n;
 			| func_table func_alias_clause
@@ -10820,23 +10824,18 @@ relation_expr_opt_alias: relation_expr					%prec UMINUS
-relation_expr_tablesample: relation_expr opt_alias_clause tablesample_clause
-				{
-					RangeTableSample *n = (RangeTableSample *) $3;
-					n->relation = $1;
-					n->relation->alias = $2;
-					$$ = (Node *) n;
-				}
-		;
+ * TABLESAMPLE decoration in a FROM item
+ */
-			TABLESAMPLE ColId '(' expr_list ')' opt_repeatable_clause
+			TABLESAMPLE func_name '(' expr_list ')' opt_repeatable_clause
 					RangeTableSample *n = makeNode(RangeTableSample);
+					/* n->relation will be filled in later */
 					n->method = $2;
 					n->args = $4;
 					n->repeatable = $6;
+					n->location = @2;
 					$$ = (Node *) n;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index e90e1d68e3a535ad9c549e5fd242e0c6d69232c4..4e490b23b4e272fadc4272fbc06c7453465f2a9d 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -18,8 +18,8 @@
 #include "miscadmin.h"
 #include "access/heapam.h"
+#include "access/tsmapi.h"
 #include "catalog/catalog.h"
-#include "access/htup_details.h"
 #include "catalog/heap.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_type.h"
@@ -43,7 +43,7 @@
 #include "utils/guc.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
-#include "utils/syscache.h"
 /* Convenience macro for the most common makeNamespaceItem() case */
 #define makeDefaultNSItem(rte)	makeNamespaceItem(rte, true, true, false, true)
@@ -63,6 +63,8 @@ static RangeTblEntry *transformRangeSubselect(ParseState *pstate,
 						RangeSubselect *r);
 static RangeTblEntry *transformRangeFunction(ParseState *pstate,
 					   RangeFunction *r);
+static TableSampleClause *transformRangeTableSample(ParseState *pstate,
+						  RangeTableSample *rts);
 static Node *transformFromClauseItem(ParseState *pstate, Node *n,
 						RangeTblEntry **top_rte, int *top_rti,
 						List **namespace);
@@ -423,40 +425,6 @@ transformJoinOnClause(ParseState *pstate, JoinExpr *j, List *namespace)
 	return result;
-static RangeTblEntry *
-transformTableSampleEntry(ParseState *pstate, RangeTableSample *rv)
-	RangeTblEntry *rte = NULL;
-	CommonTableExpr *cte = NULL;
-	TableSampleClause *tablesample = NULL;
-	/* if relation has an unqualified name, it might be a CTE reference */
-	if (!rv->relation->schemaname)
-	{
-		Index		levelsup;
-		cte = scanNameSpaceForCTE(pstate, rv->relation->relname, &levelsup);
-	}
-	/* We first need to build a range table entry */
-	if (!cte)
-		rte = transformTableEntry(pstate, rv->relation);
-	if (!rte ||
-		(rte->relkind != RELKIND_RELATION &&
-		 rte->relkind != RELKIND_MATVIEW))
-		ereport(ERROR,
-				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("TABLESAMPLE clause can only be used on tables and materialized views"),
-				 parser_errposition(pstate, rv->relation->location)));
-	tablesample = ParseTableSample(pstate, rv->method, rv->repeatable,
-								   rv->args, rv->relation->location);
-	rte->tablesample = tablesample;
-	return rte;
  * transformTableEntry --- transform a RangeVar (simple relation reference)
@@ -748,6 +716,109 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r)
 	return rte;
+ * transformRangeTableSample --- transform a TABLESAMPLE clause
+ *
+ * Caller has already transformed rts->relation, we just have to validate
+ * the remaining fields and create a TableSampleClause node.
+ */
+static TableSampleClause *
+transformRangeTableSample(ParseState *pstate, RangeTableSample *rts)
+	TableSampleClause *tablesample;
+	Oid			handlerOid;
+	Oid			funcargtypes[1];
+	TsmRoutine *tsm;
+	List	   *fargs;
+	ListCell   *larg,
+			   *ltyp;
+	/*
+	 * To validate the sample method name, look up the handler function, which
+	 * has the same name, one dummy INTERNAL argument, and a result type of
+	 * tsm_handler.  (Note: tablesample method names are not schema-qualified
+	 * in the SQL standard; but since they are just functions to us, we allow
+	 * schema qualification to resolve any potential ambiguity.)
+	 */
+	funcargtypes[0] = INTERNALOID;
+	handlerOid = LookupFuncName(rts->method, 1, funcargtypes, true);
+	/* we want error to complain about no-such-method, not no-such-function */
+	if (!OidIsValid(handlerOid))
+		ereport(ERROR,
+				 errmsg("tablesample method %s does not exist",
+						NameListToString(rts->method)),
+				 parser_errposition(pstate, rts->location)));
+	/* check that handler has correct return type */
+	if (get_func_rettype(handlerOid) != TSM_HANDLEROID)
+		ereport(ERROR,
+				 errmsg("function %s must return type \"tsm_handler\"",
+						NameListToString(rts->method)),
+				 parser_errposition(pstate, rts->location)));
+	/* OK, run the handler to get TsmRoutine, for argument type info */
+	tsm = GetTsmRoutine(handlerOid);
+	tablesample = makeNode(TableSampleClause);
+	tablesample->tsmhandler = handlerOid;
+	/* check user provided the expected number of arguments */
+	if (list_length(rts->args) != list_length(tsm->parameterTypes))
+		ereport(ERROR,
+		  errmsg_plural("tablesample method %s requires %d argument, not %d",
+						"tablesample method %s requires %d arguments, not %d",
+						list_length(tsm->parameterTypes),
+						NameListToString(rts->method),
+						list_length(tsm->parameterTypes),
+						list_length(rts->args)),
+				 parser_errposition(pstate, rts->location)));
+	/*
+	 * Transform the arguments, typecasting them as needed.  Note we must also
+	 * assign collations now, because assign_query_collations() doesn't
+	 * examine any substructure of RTEs.
+	 */
+	fargs = NIL;
+	forboth(larg, rts->args, ltyp, tsm->parameterTypes)
+	{
+		Node	   *arg = (Node *) lfirst(larg);
+		Oid			argtype = lfirst_oid(ltyp);
+		arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
+		arg = coerce_to_specific_type(pstate, arg, argtype, "TABLESAMPLE");
+		assign_expr_collations(pstate, arg);
+		fargs = lappend(fargs, arg);
+	}
+	tablesample->args = fargs;
+	/* Process REPEATABLE (seed) */
+	if (rts->repeatable != NULL)
+	{
+		Node	   *arg;
+		if (!tsm->repeatable_across_queries)
+			ereport(ERROR,
+				  errmsg("tablesample method %s does not support REPEATABLE",
+						 NameListToString(rts->method)),
+					 parser_errposition(pstate, rts->location)));
+		arg = transformExpr(pstate, rts->repeatable, EXPR_KIND_FROM_FUNCTION);
+		arg = coerce_to_specific_type(pstate, arg, FLOAT8OID, "REPEATABLE");
+		assign_expr_collations(pstate, arg);
+		tablesample->repeatable = (Expr *) arg;
+	}
+	else
+		tablesample->repeatable = NULL;
+	return tablesample;
  * transformFromClauseItem -
@@ -844,6 +915,33 @@ transformFromClauseItem(ParseState *pstate, Node *n,
 		rtr->rtindex = rtindex;
 		return (Node *) rtr;
+	else if (IsA(n, RangeTableSample))
+	{
+		/* TABLESAMPLE clause (wrapping some other valid FROM node) */
+		RangeTableSample *rts = (RangeTableSample *) n;
+		Node	   *rel;
+		RangeTblRef *rtr;
+		RangeTblEntry *rte;
+		/* Recursively transform the contained relation */
+		rel = transformFromClauseItem(pstate, rts->relation,
+									  top_rte, top_rti, namespace);
+		/* Currently, grammar could only return a RangeVar as contained rel */
+		Assert(IsA(rel, RangeTblRef));
+		rtr = (RangeTblRef *) rel;
+		rte = rt_fetch(rtr->rtindex, pstate->p_rtable);
+		/* We only support this on plain relations and matviews */
+		if (rte->relkind != RELKIND_RELATION &&
+			rte->relkind != RELKIND_MATVIEW)
+			ereport(ERROR,
+					 errmsg("TABLESAMPLE clause can only be applied to tables and materialized views"),
+				   parser_errposition(pstate, exprLocation(rts->relation))));
+		/* Transform TABLESAMPLE details and attach to the RTE */
+		rte->tablesample = transformRangeTableSample(pstate, rts);
+		return (Node *) rtr;
+	}
 	else if (IsA(n, JoinExpr))
 		/* A newfangled join expression */
@@ -1165,26 +1263,6 @@ transformFromClauseItem(ParseState *pstate, Node *n,
 		return (Node *) j;
-	else if (IsA(n, RangeTableSample))
-	{
-		/* Tablesample reference */
-		RangeTableSample *rv = (RangeTableSample *) n;
-		RangeTblRef *rtr;
-		RangeTblEntry *rte = NULL;
-		int			rtindex;
-		rte = transformTableSampleEntry(pstate, rv);
-		/* assume new rte is at end */
-		rtindex = list_length(pstate->p_rtable);
-		Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
-		*top_rte = rte;
-		*top_rti = rtindex;
-		*namespace = list_make1(makeDefaultNSItem(rte));
-		rtr = makeNode(RangeTblRef);
-		rtr->rtindex = rtindex;
-		return (Node *) rtr;
-	}
 		elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
 	return NULL;				/* can't get here, keep compiler quiet */
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 430baff11652721778e3f37ba49b1707fea62247..554ca9d8c47e5f38eddb5579dd70160a2d5d363b 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -18,7 +18,6 @@
 #include "catalog/pg_aggregate.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_type.h"
-#include "catalog/pg_tablesample_method.h"
 #include "funcapi.h"
 #include "lib/stringinfo.h"
 #include "nodes/makefuncs.h"
@@ -27,7 +26,6 @@
 #include "parser/parse_clause.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_func.h"
-#include "parser/parse_expr.h"
 #include "parser/parse_relation.h"
 #include "parser/parse_target.h"
 #include "parser/parse_type.h"
@@ -769,148 +767,6 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
- * ParseTableSample
- *
- * Parse TABLESAMPLE clause and process the arguments
- */
-TableSampleClause *
-ParseTableSample(ParseState *pstate, char *samplemethod, Node *repeatable,
-				 List *sampleargs, int location)
-	HeapTuple	tuple;
-	Form_pg_tablesample_method tsm;
-	Form_pg_proc procform;
-	TableSampleClause *tablesample;
-	List	   *fargs;
-	ListCell   *larg;
-	int			nargs,
-				initnargs;
-	Oid			init_arg_types[FUNC_MAX_ARGS];
-	/* Load the tablesample method */
-	tuple = SearchSysCache1(TABLESAMPLEMETHODNAME, PointerGetDatum(samplemethod));
-	if (!HeapTupleIsValid(tuple))
-		ereport(ERROR,
-				 errmsg("tablesample method \"%s\" does not exist",
-						samplemethod),
-				 parser_errposition(pstate, location)));
-	tablesample = makeNode(TableSampleClause);
-	tablesample->tsmid = HeapTupleGetOid(tuple);
-	tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
-	tablesample->tsmseqscan = tsm->tsmseqscan;
-	tablesample->tsmpagemode = tsm->tsmpagemode;
-	tablesample->tsminit = tsm->tsminit;
-	tablesample->tsmnextblock = tsm->tsmnextblock;
-	tablesample->tsmnexttuple = tsm->tsmnexttuple;
-	tablesample->tsmexaminetuple = tsm->tsmexaminetuple;
-	tablesample->tsmend = tsm->tsmend;
-	tablesample->tsmreset = tsm->tsmreset;
-	tablesample->tsmcost = tsm->tsmcost;
-	ReleaseSysCache(tuple);
-	/* Validate the parameters against init function definition. */
-	tuple = SearchSysCache1(PROCOID,
-							ObjectIdGetDatum(tablesample->tsminit));
-	if (!HeapTupleIsValid(tuple))		/* should not happen */
-		elog(ERROR, "cache lookup failed for function %u",
-			 tablesample->tsminit);
-	procform = (Form_pg_proc) GETSTRUCT(tuple);
-	initnargs = procform->pronargs;
-	Assert(initnargs >= 3);
-	/*
-	 * First parameter is used to pass the SampleScanState, second is seed
-	 * (REPEATABLE), skip the processing for them here, just assert that the
-	 * types are correct.
-	 */
-	Assert(procform->proargtypes.values[0] == INTERNALOID);
-	Assert(procform->proargtypes.values[1] == INT4OID);
-	initnargs -= 2;
-	memcpy(init_arg_types, procform->proargtypes.values + 2,
-		   initnargs * sizeof(Oid));
-	/* Now we are done with the catalog */
-	ReleaseSysCache(tuple);
-	/* Process repeatable (seed) */
-	if (repeatable != NULL)
-	{
-		Node	   *arg = repeatable;
-		if (arg && IsA(arg, A_Const))
-		{
-			A_Const    *con = (A_Const *) arg;
-			if (con->val.type == T_Null)
-				ereport(ERROR,
-				  errmsg("REPEATABLE clause must be NOT NULL numeric value"),
-						 parser_errposition(pstate, con->location)));
-		}
-		arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
-		arg = coerce_to_specific_type(pstate, arg, INT4OID, "REPEATABLE");
-		tablesample->repeatable = arg;
-	}
-	else
-		tablesample->repeatable = NULL;
-	/* Check user provided expected number of arguments. */
-	if (list_length(sampleargs) != initnargs)
-		ereport(ERROR,
-		errmsg_plural("tablesample method \"%s\" expects %d argument got %d",
-					  "tablesample method \"%s\" expects %d arguments got %d",
-					  initnargs,
-					  samplemethod,
-					  initnargs, list_length(sampleargs)),
-				 parser_errposition(pstate, location)));
-	/* Transform the arguments, typecasting them as needed. */
-	fargs = NIL;
-	nargs = 0;
-	foreach(larg, sampleargs)
-	{
-		Node	   *inarg = (Node *) lfirst(larg);
-		Node	   *arg = transformExpr(pstate, inarg, EXPR_KIND_FROM_FUNCTION);
-		Oid			argtype = exprType(arg);
-		if (argtype != init_arg_types[nargs])
-		{
-			if (!can_coerce_type(1, &argtype, &init_arg_types[nargs],
-				ereport(ERROR,
-				   errmsg("wrong parameter %d for tablesample method \"%s\"",
-						  nargs + 1, samplemethod),
-						 errdetail("Expected type %s got %s.",
-								   format_type_be(init_arg_types[nargs]),
-								   format_type_be(argtype)),
-						 parser_errposition(pstate, exprLocation(inarg))));
-			arg = coerce_type(pstate, arg, argtype, init_arg_types[nargs], -1,
-		}
-		fargs = lappend(fargs, arg);
-		nargs++;
-	}
-	/* Pass the arguments down */
-	tablesample->args = fargs;
-	return tablesample;
 /* func_match_argtypes()
  * Given a list of candidate functions (having the right name and number
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index bbd6b77c5eab640ee1af8accaaaa6ae686aa1313..1734e48241ada102ac66cfd2788ffff0837dfcab 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -418,6 +418,10 @@ rewriteRuleAction(Query *parsetree,
 			switch (rte->rtekind)
+				case RTE_RELATION:
+					sub_action->hasSubLinks =
+						checkExprHasSubLink((Node *) rte->tablesample);
+					break;
 				case RTE_FUNCTION:
 					sub_action->hasSubLinks =
 						checkExprHasSubLink((Node *) rte->functions);
diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c
index 9ad460abfbdbcc8f1e75613b00841b7926af5592..5b809aa7d4996d55d467aa570e8dca9650031a31 100644
--- a/src/backend/utils/adt/pseudotypes.c
+++ b/src/backend/utils/adt/pseudotypes.c
@@ -373,6 +373,33 @@ fdw_handler_out(PG_FUNCTION_ARGS)
+ * tsm_handler_in		- input routine for pseudo-type TSM_HANDLER.
+ */
+	ereport(ERROR,
+			 errmsg("cannot accept a value of type tsm_handler")));
+	PG_RETURN_VOID();			/* keep compiler quiet */
+ * tsm_handler_out		- output routine for pseudo-type TSM_HANDLER.
+ */
+	ereport(ERROR,
+			 errmsg("cannot display a value of type tsm_handler")));
+	PG_RETURN_VOID();			/* keep compiler quiet */
  * internal_in		- input routine for pseudo-type INTERNAL.
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 5112cac90173595d56c7cc14beba8ebfcc9113e7..51391f6a4e0d16e4e647f7845425e11795ca7508 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -32,7 +32,6 @@
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
-#include "catalog/pg_tablesample_method.h"
 #include "catalog/pg_trigger.h"
 #include "catalog/pg_type.h"
 #include "commands/defrem.h"
@@ -349,8 +348,6 @@ static void make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
 			 int prettyFlags);
 static void make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
 			 int prettyFlags, int wrapColumn);
-static void get_tablesample_def(TableSampleClause *tablesample,
-					deparse_context *context);
 static void get_query_def(Query *query, StringInfo buf, List *parentnamespace,
 			  TupleDesc resultDesc,
 			  int prettyFlags, int wrapColumn, int startIndent);
@@ -416,6 +413,8 @@ static void get_column_alias_list(deparse_columns *colinfo,
 static void get_from_clause_coldeflist(RangeTblFunction *rtfunc,
 						   deparse_columns *colinfo,
 						   deparse_context *context);
+static void get_tablesample_def(TableSampleClause *tablesample,
+					deparse_context *context);
 static void get_opclass_name(Oid opclass, Oid actual_datatype,
 				 StringInfo buf);
 static Node *processIndirection(Node *node, deparse_context *context,
@@ -4235,50 +4234,6 @@ make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
 	heap_close(ev_relation, AccessShareLock);
-/* ----------
- * get_tablesample_def			- Convert TableSampleClause back to SQL
- * ----------
- */
-static void
-get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
-	StringInfo	buf = context->buf;
-	HeapTuple	tuple;
-	Form_pg_tablesample_method tsm;
-	char	   *tsmname;
-	int			nargs;
-	ListCell   *l;
-	/* Load the tablesample method */
-	tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tablesample->tsmid));
-	if (!HeapTupleIsValid(tuple))
-		ereport(ERROR,
-				 errmsg("cache lookup failed for tablesample method %u",
-						tablesample->tsmid)));
-	tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
-	tsmname = NameStr(tsm->tsmname);
-	appendStringInfo(buf, " TABLESAMPLE %s (", quote_identifier(tsmname));
-	ReleaseSysCache(tuple);
-	nargs = 0;
-	foreach(l, tablesample->args)
-	{
-		if (nargs++ > 0)
-			appendStringInfoString(buf, ", ");
-		get_rule_expr((Node *) lfirst(l), context, true);
-	}
-	appendStringInfoChar(buf, ')');
-	if (tablesample->repeatable != NULL)
-	{
-		appendStringInfoString(buf, " REPEATABLE (");
-		get_rule_expr(tablesample->repeatable, context, true);
-		appendStringInfoChar(buf, ')');
-	}
 /* ----------
  * get_query_def			- Parse back one query parsetree
@@ -8781,9 +8736,6 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
-				if (rte->tablesample)
-					get_tablesample_def(rte->tablesample, context);
 			case RTE_SUBQUERY:
 				/* Subquery RTE */
@@ -8963,6 +8915,10 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
 			/* Else print column aliases as needed */
 			get_column_alias_list(colinfo, context);
+		/* Tablesample clause must go after any alias */
+		if (rte->rtekind == RTE_RELATION && rte->tablesample)
+			get_tablesample_def(rte->tablesample, context);
 	else if (IsA(jtnode, JoinExpr))
@@ -9162,6 +9118,44 @@ get_from_clause_coldeflist(RangeTblFunction *rtfunc,
 	appendStringInfoChar(buf, ')');
+ * get_tablesample_def			- print a TableSampleClause
+ */
+static void
+get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
+	StringInfo	buf = context->buf;
+	Oid			argtypes[1];
+	int			nargs;
+	ListCell   *l;
+	/*
+	 * We should qualify the handler's function name if it wouldn't be
+	 * resolved by lookup in the current search path.
+	 */
+	argtypes[0] = INTERNALOID;
+	appendStringInfo(buf, " TABLESAMPLE %s (",
+					 generate_function_name(tablesample->tsmhandler, 1,
+											NIL, argtypes,
+											false, NULL, EXPR_KIND_NONE));
+	nargs = 0;
+	foreach(l, tablesample->args)
+	{
+		if (nargs++ > 0)
+			appendStringInfoString(buf, ", ");
+		get_rule_expr((Node *) lfirst(l), context, false);
+	}
+	appendStringInfoChar(buf, ')');
+	if (tablesample->repeatable != NULL)
+	{
+		appendStringInfoString(buf, " REPEATABLE (");
+		get_rule_expr((Node *) tablesample->repeatable, context, false);
+		appendStringInfoChar(buf, ')');
+	}
  * get_opclass_name			- fetch name of an index operator class
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 7b32247d34eae9eb8dadc9514d0b7e2c88828ff0..1dc293297d93edb933b743dc302fb7bcad757290 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -32,7 +32,6 @@
 #include "catalog/pg_range.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_transform.h"
-#include "catalog/pg_tablesample_method.h"
 #include "catalog/pg_type.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -2997,29 +2996,3 @@ get_range_subtype(Oid rangeOid)
 		return InvalidOid;
-/*				---------- PG_TABLESAMPLE_METHOD CACHE ----------			 */
- * get_tablesample_method_name - given a tablesample method OID,
- * look up the name or NULL if not found
- */
-char *
-get_tablesample_method_name(Oid tsmid)
-	HeapTuple	tuple;
-	tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tsmid));
-	if (HeapTupleIsValid(tuple))
-	{
-		Form_pg_tablesample_method tup =
-		(Form_pg_tablesample_method) GETSTRUCT(tuple);
-		char	   *result;
-		result = pstrdup(NameStr(tup->tsmname));
-		ReleaseSysCache(tuple);
-		return result;
-	}
-	else
-		return NULL;
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index b6333e362f018b467aa34b065457268be6dc3bde..efce7b9a3d13b0ce73aff22292ca9f42ee31b60c 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -56,7 +56,6 @@
 #include "catalog/pg_shseclabel.h"
 #include "catalog/pg_replication_origin.h"
 #include "catalog/pg_statistic.h"
-#include "catalog/pg_tablesample_method.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_transform.h"
 #include "catalog/pg_ts_config.h"
@@ -667,28 +666,6 @@ static const struct cachedesc cacheinfo[] = {
-	{TableSampleMethodRelationId,		/* TABLESAMPLEMETHODNAME */
-		TableSampleMethodNameIndexId,
-		1,
-		{
-			Anum_pg_tablesample_method_tsmname,
-			0,
-			0,
-			0,
-		},
-		2
-	},
-	{TableSampleMethodRelationId,		/* TABLESAMPLEMETHODOID */
-		TableSampleMethodOidIndexId,
-		1,
-		{
-			ObjectIdAttributeNumber,
-			0,
-			0,
-			0,
-		},
-		2
-	},
 	{TableSpaceRelationId,		/* TABLESPACEOID */
diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt
index 6cc3ed96c447bd5b7a743d015cd14ec06b9b3f0a..7b97d45a53a12ed849c13bef3630753109ba1f49 100644
--- a/src/backend/utils/errcodes.txt
+++ b/src/backend/utils/errcodes.txt
@@ -177,6 +177,8 @@ Section: Class 22 - Data Exception
 2201B    E    ERRCODE_INVALID_REGULAR_EXPRESSION                             invalid_regular_expression
 2201W    E    ERRCODE_INVALID_ROW_COUNT_IN_LIMIT_CLAUSE                      invalid_row_count_in_limit_clause
 2201X    E    ERRCODE_INVALID_ROW_COUNT_IN_RESULT_OFFSET_CLAUSE              invalid_row_count_in_result_offset_clause
+2202H    E    ERRCODE_INVALID_TABLESAMPLE_ARGUMENT                           invalid_tablesample_argument
+2202G    E    ERRCODE_INVALID_TABLESAMPLE_REPEAT                             invalid_tablesample_repeat
 22009    E    ERRCODE_INVALID_TIME_ZONE_DISPLACEMENT_VALUE                   invalid_time_zone_displacement_value
 2200C    E    ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER                        invalid_use_of_escape_character
 2200G    E    ERRCODE_MOST_SPECIFIC_TYPE_MISMATCH                            most_specific_type_mismatch
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c
index 6191f7973441b2ac7dbc473cfc2058e35d0da4d3..4142e01123f79fe880cac07889e9227d4b6678d5 100644
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -228,7 +228,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
 sampler_random_init_state(long seed, SamplerRandomState randstate)
-	randstate[0] = RAND48_SEED_0;
+	randstate[0] = 0x330e;		/* same as pg_erand48, but could be anything */
 	randstate[1] = (unsigned short) seed;
 	randstate[2] = (unsigned short) (seed >> 16);
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 9596af6a7b35ad57e3c73e19b1841f20b8c0384c..ece05155490b8755cff88231be9a3e01b6e3b773 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -738,13 +738,15 @@ static const SchemaQuery Query_for_list_of_matviews = {
 "  WHERE substring(pg_catalog.quote_ident(evtname),1,%d)='%s'"
 #define Query_for_list_of_tablesample_methods \
-" SELECT pg_catalog.quote_ident(tsmname) "\
-"   FROM pg_catalog.pg_tablesample_method "\
-"  WHERE substring(pg_catalog.quote_ident(tsmname),1,%d)='%s'"
+" SELECT pg_catalog.quote_ident(proname) "\
+"   FROM pg_catalog.pg_proc "\
+"  WHERE prorettype = 'pg_catalog.tsm_handler'::pg_catalog.regtype AND "\
+"        proargtypes[0] = 'pg_catalog.internal'::pg_catalog.regtype AND "\
+"        substring(pg_catalog.quote_ident(proname),1,%d)='%s'"
 #define Query_for_list_of_policies \
 " SELECT pg_catalog.quote_ident(polname) "\
-"   FROM pg_catalog.pg_policy " \
+"   FROM pg_catalog.pg_policy "\
 "  WHERE substring(pg_catalog.quote_ident(polname),1,%d)='%s'"
 #define Query_for_list_of_tables_for_policy \
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 31139cbd0ccc736908afa9218dc10e9d61dedd99..75e6b72f9e0204913254548a42322a6fa7708d63 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -116,11 +116,13 @@ extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot,
 				  int nkeys, ScanKey key);
 extern HeapScanDesc heap_beginscan_sampling(Relation relation,
 						Snapshot snapshot, int nkeys, ScanKey key,
-						bool allow_strat, bool allow_pagemode);
+					 bool allow_strat, bool allow_sync, bool allow_pagemode);
 extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk,
 				   BlockNumber endBlk);
 extern void heapgetpage(HeapScanDesc scan, BlockNumber page);
 extern void heap_rescan(HeapScanDesc scan, ScanKey key);
+extern void heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+					 bool allow_strat, bool allow_sync, bool allow_pagemode);
 extern void heap_endscan(HeapScanDesc scan);
 extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
diff --git a/src/include/access/tablesample.h b/src/include/access/tablesample.h
deleted file mode 100644
index a02e93d32223ddedb158ccc1af69498cb1add5f7..0000000000000000000000000000000000000000
--- a/src/include/access/tablesample.h
+++ /dev/null
@@ -1,61 +0,0 @@
- *
- * tablesample.h
- *		  Public header file for TABLESAMPLE clause interface
- *
- *
- * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/access/tablesample.h
- *
- *-------------------------------------------------------------------------
- */
-#include "access/relscan.h"
-#include "executor/executor.h"
-typedef struct TableSampleDesc
-	HeapScanDesc heapScan;
-	TupleDesc	tupDesc;		/* Mostly useful for tsmexaminetuple */
-	void	   *tsmdata;		/* private method data */
-	/* These point to he function of the TABLESAMPLE Method. */
-	FmgrInfo	tsminit;
-	FmgrInfo	tsmnextblock;
-	FmgrInfo	tsmnexttuple;
-	FmgrInfo	tsmexaminetuple;
-	FmgrInfo	tsmreset;
-	FmgrInfo	tsmend;
-} TableSampleDesc;
-extern TableSampleDesc *tablesample_init(SampleScanState *scanstate,
-				 TableSampleClause *tablesample);
-extern HeapTuple tablesample_getnext(TableSampleDesc *desc);
-extern void tablesample_reset(TableSampleDesc *desc);
-extern void tablesample_end(TableSampleDesc *desc);
-extern HeapTuple tablesample_source_getnext(TableSampleDesc *desc);
-extern HeapTuple tablesample_source_gettup(TableSampleDesc *desc, ItemPointer tid,
-						  bool *visible);
-extern Datum tsm_system_init(PG_FUNCTION_ARGS);
-extern Datum tsm_system_nextblock(PG_FUNCTION_ARGS);
-extern Datum tsm_system_nexttuple(PG_FUNCTION_ARGS);
-extern Datum tsm_system_end(PG_FUNCTION_ARGS);
-extern Datum tsm_system_reset(PG_FUNCTION_ARGS);
-extern Datum tsm_system_cost(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_init(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_nextblock(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_end(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_reset(PG_FUNCTION_ARGS);
-extern Datum tsm_bernoulli_cost(PG_FUNCTION_ARGS);
diff --git a/src/include/access/tsmapi.h b/src/include/access/tsmapi.h
new file mode 100644
index 0000000000000000000000000000000000000000..4b59ffabd6e112f674328e5a87e75b190d14a520
--- /dev/null
+++ b/src/include/access/tsmapi.h
@@ -0,0 +1,81 @@
+ *
+ * tsmapi.h
+ *	  API for tablesample methods
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * src/include/access/tsmapi.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TSMAPI_H
+#define TSMAPI_H
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+ * Callback function signatures --- see tablesample-method.sgml for more info.
+ */
+typedef void (*SampleScanGetSampleSize_function) (PlannerInfo *root,
+														 RelOptInfo *baserel,
+															List *paramexprs,
+														  BlockNumber *pages,
+															  double *tuples);
+typedef void (*InitSampleScan_function) (SampleScanState *node,
+													 int eflags);
+typedef void (*BeginSampleScan_function) (SampleScanState *node,
+													  Datum *params,
+													  int nparams,
+													  uint32 seed);
+typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node);
+typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node,
+														 BlockNumber blockno,
+													 OffsetNumber maxoffset);
+typedef void (*EndSampleScan_function) (SampleScanState *node);
+ * TsmRoutine is the struct returned by a tablesample method's handler
+ * function.  It provides pointers to the callback functions needed by the
+ * planner and executor, as well as additional information about the method.
+ *
+ * More function pointers are likely to be added in the future.
+ * Therefore it's recommended that the handler initialize the struct with
+ * makeNode(TsmRoutine) so that all fields are set to NULL.  This will
+ * ensure that no fields are accidentally left undefined.
+ */
+typedef struct TsmRoutine
+	NodeTag		type;
+	/* List of datatype OIDs for the arguments of the TABLESAMPLE clause */
+	List	   *parameterTypes;
+	/* Can method produce repeatable samples across, or even within, queries? */
+	bool		repeatable_across_queries;
+	bool		repeatable_across_scans;
+	/* Functions for planning a SampleScan on a physical table */
+	SampleScanGetSampleSize_function SampleScanGetSampleSize;
+	/* Functions for executing a SampleScan on a physical table */
+	InitSampleScan_function InitSampleScan;		/* can be NULL */
+	BeginSampleScan_function BeginSampleScan;
+	NextSampleBlock_function NextSampleBlock;	/* can be NULL */
+	NextSampleTuple_function NextSampleTuple;
+	EndSampleScan_function EndSampleScan;		/* can be NULL */
+} TsmRoutine;
+/* Functions in access/tablesample/tablesample.c */
+extern TsmRoutine *GetTsmRoutine(Oid tsmhandler);
+#endif   /* TSMAPI_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 8f6685fd0cce89d8060dd8468b7575bd85bb412d..0e983279313cd59a8a0df57e9d17829cf5967b41 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201507171
+#define CATALOG_VERSION_NO	201507252
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index 748aadde94598945c715a2244d8b12249792eba9..c38958d6c5e26985ceeef95ca0df4defe42bf711 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -316,11 +316,6 @@ DECLARE_UNIQUE_INDEX(pg_replication_origin_roiident_index, 6001, on pg_replicati
 DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname text_pattern_ops));
 #define ReplicationOriginNameIndex 6002
-DECLARE_UNIQUE_INDEX(pg_tablesample_method_name_index, 3331, on pg_tablesample_method using btree(tsmname name_ops));
-#define TableSampleMethodNameIndexId  3331
-DECLARE_UNIQUE_INDEX(pg_tablesample_method_oid_index, 3332, on pg_tablesample_method using btree(oid oid_ops));
-#define TableSampleMethodOidIndexId  3332
 /* last step of initialization script: build the indexes declared above */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 1d68ad7209e1c65333491a4a646945dfc525f2ab..09bf1439c46f8f68f0f3ebbb275ebe84fc84c752 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3734,6 +3734,16 @@ DATA(insert OID = 3116 (  fdw_handler_in	PGNSP PGUID 12 1 0 0 0 f f f f f f i 1
 DATA(insert OID = 3117 (  fdw_handler_out	PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3115" _null_ _null_ _null_ _null_ _null_ fdw_handler_out _null_ _null_ _null_ ));
+DATA(insert OID = 3311 (  tsm_handler_in	PGNSP PGUID 12 1 0 0 0 f f f f f f i 1 0 3310 "2275" _null_ _null_ _null_ _null_ _null_ tsm_handler_in _null_ _null_ _null_ ));
+DATA(insert OID = 3312 (  tsm_handler_out	PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2275 "3310" _null_ _null_ _null_ _null_ _null_ tsm_handler_out _null_ _null_ _null_ ));
+/* tablesample method handlers */
+DATA(insert OID = 3313 (  bernoulli			PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_handler _null_ _null_ _null_ ));
+DESCR("BERNOULLI tablesample method handler");
+DATA(insert OID = 3314 (  system			PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 3310 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_handler _null_ _null_ _null_ ));
+DESCR("SYSTEM tablesample method handler");
 /* cryptographic */
 DATA(insert OID =  2311 (  md5	   PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ ));
@@ -5321,33 +5331,6 @@ DESCR("get an individual replication origin's replication progress");
 DATA(insert OID = 6014 ( pg_show_replication_origin_status PGNSP PGUID 12 1 100 0 0 f f f f f t v 0 0 2249 "" "{26,25,3220,3220}" "{o,o,o,o}" "{local_id, external_id, remote_lsn, local_lsn}" _null_ _null_ pg_show_replication_origin_status _null_ _null_ _null_ ));
 DESCR("get progress for all replication origins");
-/* tablesample */
-DATA(insert OID = 3335 (  tsm_system_init		PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_system_init _null_ _null_ _null_ ));
-DATA(insert OID = 3336 (  tsm_system_nextblock	PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nextblock _null_ _null_ _null_ ));
-DATA(insert OID = 3337 (  tsm_system_nexttuple	PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nexttuple _null_ _null_ _null_ ));
-DATA(insert OID = 3338 (  tsm_system_end		PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_end _null_ _null_ _null_ ));
-DATA(insert OID = 3339 (  tsm_system_reset		PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_reset _null_ _null_ _null_ ));
-DATA(insert OID = 3340 (  tsm_system_cost		PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_system_cost _null_ _null_ _null_ ));
-DATA(insert OID = 3341 (  tsm_bernoulli_init		PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_init _null_ _null_ _null_ ));
-DATA(insert OID = 3342 (  tsm_bernoulli_nextblock	PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nextblock _null_ _null_ _null_ ));
-DATA(insert OID = 3343 (  tsm_bernoulli_nexttuple	PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nexttuple _null_ _null_ _null_ ));
-DATA(insert OID = 3344 (  tsm_bernoulli_end			PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_end _null_ _null_ _null_ ));
-DATA(insert OID = 3345 (  tsm_bernoulli_reset		PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_reset _null_ _null_ _null_ ));
-DATA(insert OID = 3346 (  tsm_bernoulli_cost		PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_cost _null_ _null_ _null_ ));
  * Symbolic values for provolatile column: these indicate whether the result
  * of a function is dependent *only* on the values of its explicit arguments,
diff --git a/src/include/catalog/pg_tablesample_method.h b/src/include/catalog/pg_tablesample_method.h
deleted file mode 100644
index b422414d08016963a7f65b3b91fa90ce50567072..0000000000000000000000000000000000000000
--- a/src/include/catalog/pg_tablesample_method.h
+++ /dev/null
@@ -1,81 +0,0 @@
- *
- * pg_tablesample_method.h
- *	  definition of the table scan methods.
- *
- *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/catalog/pg_tablesample_method.h
- *
- *
- *-------------------------------------------------------------------------
- */
-#include "catalog/genbki.h"
-#include "catalog/objectaddress.h"
-/* ----------------
- *		pg_tablesample_method definition.  cpp turns this into
- *		typedef struct FormData_pg_tablesample_method
- * ----------------
- */
-#define TableSampleMethodRelationId 3330
-	NameData	tsmname;		/* tablesample method name */
-	bool		tsmseqscan;		/* does this method scan whole table
-								 * sequentially? */
-	bool		tsmpagemode;	/* does this method scan page at a time? */
-	regproc		tsminit;		/* init scan function */
-	regproc		tsmnextblock;	/* function returning next block to sample or
-								 * InvalidBlockOffset if finished */
-	regproc		tsmnexttuple;	/* function returning next tuple offset from
-								 * current block or InvalidOffsetNumber if end
-								 * of the block was reacher */
-	regproc		tsmexaminetuple;/* optional function which can examine tuple
-								 * contents and decide if tuple should be
-								 * returned or not */
-	regproc		tsmend;			/* end scan function */
-	regproc		tsmreset;		/* reset state - used by rescan */
-	regproc		tsmcost;		/* costing function */
-} FormData_pg_tablesample_method;
-/* ----------------
- *		Form_pg_tablesample_method corresponds to a pointer to a tuple with
- *		the format of pg_tablesample_method relation.
- * ----------------
- */
-typedef FormData_pg_tablesample_method *Form_pg_tablesample_method;
-/* ----------------
- *		compiler constants for pg_tablesample_method
- * ----------------
- */
-#define Natts_pg_tablesample_method					10
-#define Anum_pg_tablesample_method_tsmname			1
-#define Anum_pg_tablesample_method_tsmseqscan		2
-#define Anum_pg_tablesample_method_tsmpagemode		3
-#define Anum_pg_tablesample_method_tsminit			4
-#define Anum_pg_tablesample_method_tsmnextblock		5
-#define Anum_pg_tablesample_method_tsmnexttuple		6
-#define Anum_pg_tablesample_method_tsmexaminetuple	7
-#define Anum_pg_tablesample_method_tsmend			8
-#define Anum_pg_tablesample_method_tsmreset			9
-#define Anum_pg_tablesample_method_tsmcost			10
-/* ----------------
- *		initial contents of pg_tablesample_method
- * ----------------
- */
-DATA(insert OID = 3333 ( system false true tsm_system_init tsm_system_nextblock tsm_system_nexttuple - tsm_system_end tsm_system_reset tsm_system_cost ));
-DESCR("SYSTEM table sampling method");
-DATA(insert OID = 3334 ( bernoulli true false tsm_bernoulli_init tsm_bernoulli_nextblock tsm_bernoulli_nexttuple - tsm_bernoulli_end tsm_bernoulli_reset tsm_bernoulli_cost ));
-DESCR("BERNOULLI table sampling method");
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index da123f6c4957e70b3fecc07b948609a6bd21bea2..7dc95c8d2c651ef3fc577af3d62cefd8f4d24169 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -694,6 +694,8 @@ DATA(insert OID = 3500 ( anyenum		PGNSP PGUID  4 t p P f t \054 0 0 0 anyenum_in
 #define ANYENUMOID		3500
 DATA(insert OID = 3115 ( fdw_handler	PGNSP PGUID  4 t p P f t \054 0 0 0 fdw_handler_in fdw_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
 #define FDW_HANDLEROID	3115
+DATA(insert OID = 3310 ( tsm_handler	PGNSP PGUID  4 t p P f t \054 0 0 0 tsm_handler_in tsm_handler_out - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
+#define TSM_HANDLEROID	3310
 DATA(insert OID = 3831 ( anyrange		PGNSP PGUID  -1 f p P f t \054 0 0 0 anyrange_in anyrange_out - - - - - d x f 0 -1 0 0 _null_ _null_ _null_ ));
 #define ANYRANGEOID		3831
diff --git a/src/include/executor/nodeSamplescan.h b/src/include/executor/nodeSamplescan.h
index 4b769daec8b917e90587597c92c61ba82ddd5bac..a0cc6ce467a9f58bf54b7eaeba9789c315a4fe55 100644
--- a/src/include/executor/nodeSamplescan.h
+++ b/src/include/executor/nodeSamplescan.h
@@ -4,7 +4,7 @@
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  * src/include/executor/nodeSamplescan.h
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 541ee187356850c69da8e26fc2615cb968f1a62d..303fc3c1c77dca2f4c2abd51440c8614b1e88f61 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1257,13 +1257,22 @@ typedef struct ScanState
 typedef ScanState SeqScanState;
- * SampleScan
+/* ----------------
+ *	 SampleScanState information
+ * ----------------
 typedef struct SampleScanState
 	ScanState	ss;
-	struct TableSampleDesc *tsdesc;
+	List	   *args;			/* expr states for TABLESAMPLE params */
+	ExprState  *repeatable;		/* expr state for REPEATABLE expr */
+	/* use struct pointer to avoid including tsmapi.h here */
+	struct TsmRoutine *tsmroutine;		/* descriptor for tablesample method */
+	void	   *tsm_state;		/* tablesample method can keep state here */
+	bool		use_bulkread;	/* use bulkread buffer access strategy? */
+	bool		use_pagemode;	/* use page-at-a-time visibility checking? */
+	bool		begun;			/* false means need to call BeginSampleScan */
+	uint32		seed;			/* random seed */
 } SampleScanState;
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index f8acda4eede0190e444d0d37e84cd7743884d4e7..748e434a27a21a47874b3ae50844ff39c8f54a24 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -51,6 +51,7 @@ typedef enum NodeTag
+	T_SampleScan,
@@ -61,7 +62,6 @@ typedef enum NodeTag
-	T_SampleScan,
@@ -400,6 +400,7 @@ typedef enum NodeTag
+	T_RangeTableSample,
@@ -407,6 +408,7 @@ typedef enum NodeTag
+	T_TableSampleClause,
@@ -425,8 +427,6 @@ typedef enum NodeTag
-	T_RangeTableSample,
-	T_TableSampleClause,
@@ -452,7 +452,8 @@ typedef enum NodeTag
 	T_WindowObjectData,			/* private in nodeWindowAgg.c */
 	T_TIDBitmap,				/* in nodes/tidbitmap.h */
 	T_InlineCodeBlock,			/* in nodes/parsenodes.h */
-	T_FdwRoutine				/* in foreign/fdwapi.h */
+	T_FdwRoutine,				/* in foreign/fdwapi.h */
+	T_TsmRoutine				/* in access/tsmapi.h */
 } NodeTag;
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index b336ff9c6abaf0bc40bc34d6b3bd527d0e2c5393..151c93a078ea009aa8bd229a64581cb6cce79fc9 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -337,26 +337,6 @@ typedef struct FuncCall
 	int			location;		/* token location, or -1 if unknown */
 } FuncCall;
- * TableSampleClause - a sampling method information
- */
-typedef struct TableSampleClause
-	NodeTag		type;
-	Oid			tsmid;
-	bool		tsmseqscan;
-	bool		tsmpagemode;
-	Oid			tsminit;
-	Oid			tsmnextblock;
-	Oid			tsmnexttuple;
-	Oid			tsmexaminetuple;
-	Oid			tsmend;
-	Oid			tsmreset;
-	Oid			tsmcost;
-	Node	   *repeatable;
-	List	   *args;
-} TableSampleClause;
  * A_Star - '*' representing all columns of a table or compound field
@@ -558,19 +538,23 @@ typedef struct RangeFunction
 } RangeFunction;
- * RangeTableSample - represents <table> TABLESAMPLE <method> (<params>) REPEATABLE (<num>)
+ * RangeTableSample - TABLESAMPLE appearing in a raw FROM clause
- * SQL Standard specifies only one parameter which is percentage. But we allow
- * custom tablesample methods which may need different input arguments so we
- * accept list of arguments.
+ * This node, appearing only in raw parse trees, represents
+ *		<relation> TABLESAMPLE <method> (<params>) REPEATABLE (<num>)
+ * Currently, the <relation> can only be a RangeVar, but we might in future
+ * allow RangeSubselect and other options.  Note that the RangeTableSample
+ * is wrapped around the node representing the <relation>, rather than being
+ * a subfield of it.
 typedef struct RangeTableSample
 	NodeTag		type;
-	RangeVar   *relation;
-	char	   *method;			/* sampling method */
-	Node	   *repeatable;
-	List	   *args;			/* arguments for sampling method */
+	Node	   *relation;		/* relation to be sampled */
+	List	   *method;			/* sampling method name (possibly qualified) */
+	List	   *args;			/* argument(s) for sampling method */
+	Node	   *repeatable;		/* REPEATABLE expression, or NULL if none */
+	int			location;		/* method name location, or -1 if unknown */
 } RangeTableSample;
@@ -810,7 +794,7 @@ typedef struct RangeTblEntry
 	Oid			relid;			/* OID of the relation */
 	char		relkind;		/* relation kind (see pg_class.relkind) */
-	TableSampleClause *tablesample;		/* sampling method and parameters */
+	struct TableSampleClause *tablesample;		/* sampling info, or NULL */
 	 * Fields valid for a subquery RTE (else NULL):
@@ -912,6 +896,19 @@ typedef struct RangeTblFunction
 	Bitmapset  *funcparams;		/* PARAM_EXEC Param IDs affecting this func */
 } RangeTblFunction;
+ * TableSampleClause - TABLESAMPLE appearing in a transformed FROM clause
+ *
+ * Unlike RangeTableSample, this is a subnode of the relevant RangeTblEntry.
+ */
+typedef struct TableSampleClause
+	NodeTag		type;
+	Oid			tsmhandler;		/* OID of the tablesample handler function */
+	List	   *args;			/* tablesample argument expression(s) */
+	Expr	   *repeatable;		/* REPEATABLE expression, or NULL if none */
+} TableSampleClause;
  * WithCheckOption -
  *		representation of WITH CHECK OPTION checks to be applied to new tuples
@@ -2520,7 +2517,7 @@ typedef struct RenameStmt
 typedef struct AlterObjectSchemaStmt
 	NodeTag		type;
-	ObjectType objectType;		/* OBJECT_TABLE, OBJECT_TYPE, etc */
+	ObjectType	objectType;		/* OBJECT_TABLE, OBJECT_TYPE, etc */
 	RangeVar   *relation;		/* in case it's a table */
 	List	   *object;			/* in case it's some other object */
 	List	   *objarg;			/* argument types, if applicable */
@@ -2535,7 +2532,7 @@ typedef struct AlterObjectSchemaStmt
 typedef struct AlterOwnerStmt
 	NodeTag		type;
-	ObjectType objectType;		/* OBJECT_TABLE, OBJECT_TYPE, etc */
+	ObjectType	objectType;		/* OBJECT_TABLE, OBJECT_TYPE, etc */
 	RangeVar   *relation;		/* in case it's a table */
 	List	   *object;			/* in case it's some other object */
 	List	   *objarg;			/* argument types, if applicable */
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 5f538f3e8ccb5a9298af8d2a836d3c08fc8d611d..0654d0266cd6e7d17a7fb75c2834b6b6e793fbf4 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -287,7 +287,12 @@ typedef Scan SeqScan;
  *		table sample scan node
  * ----------------
-typedef Scan SampleScan;
+typedef struct SampleScan
+	Scan		scan;
+	/* use struct pointer to avoid including parsenodes.h here */
+	struct TableSampleClause *tablesample;
+} SampleScan;
 /* ----------------
  *		index scan node
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 24003ae3591b9a2e1d74709a5f60485297f7b146..dd43e45d0c0a5b6c98f54c654f06b0c08ed7bff9 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -68,7 +68,8 @@ extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
 					double index_pages, PlannerInfo *root);
 extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
 			 ParamPathInfo *param_info);
-extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
+extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
+				ParamPathInfo *param_info);
 extern void cost_index(IndexPath *path, PlannerInfo *root,
 		   double loop_count);
 extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
diff --git a/src/include/parser/parse_func.h b/src/include/parser/parse_func.h
index 3194da463948a34e96478738e30368c37d1efedf..32646918e20c4b8101d0b5936144e3f8ff3d5776 100644
--- a/src/include/parser/parse_func.h
+++ b/src/include/parser/parse_func.h
@@ -33,11 +33,6 @@ typedef enum
 extern Node *ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 				  FuncCall *fn, int location);
-extern TableSampleClause *ParseTableSample(ParseState *pstate,
-				 char *samplemethod,
-				 Node *repeatable, List *args,
-				 int location);
 extern FuncDetailCode func_get_detail(List *funcname,
 				List *fargs, List *fargnames,
 				int nargs, Oid *argtypes,
diff --git a/src/include/port.h b/src/include/port.h
index 71113c03944bd7f88991ef9953ae4ea15e86f443..3787cbfb7614cd318a3885bc59554128fb063502 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -357,10 +357,6 @@ extern off_t ftello(FILE *stream);
-#define RAND48_SEED_0	(0x330e)
-#define RAND48_SEED_1	(0xabcd)
-#define RAND48_SEED_2	(0x1234)
 extern double pg_erand48(unsigned short xseed[3]);
 extern long pg_lrand48(void);
 extern void pg_srand48(long seed);
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index fcb0bf0ce8e94c376c683dcf7a8bb18d2b83b0a7..49caa56557420ed96a8adf2ee89d8adfb6157304 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -566,6 +566,8 @@ extern Datum language_handler_in(PG_FUNCTION_ARGS);
 extern Datum language_handler_out(PG_FUNCTION_ARGS);
 extern Datum fdw_handler_in(PG_FUNCTION_ARGS);
 extern Datum fdw_handler_out(PG_FUNCTION_ARGS);
+extern Datum tsm_handler_in(PG_FUNCTION_ARGS);
+extern Datum tsm_handler_out(PG_FUNCTION_ARGS);
 extern Datum internal_in(PG_FUNCTION_ARGS);
 extern Datum internal_out(PG_FUNCTION_ARGS);
 extern Datum opaque_in(PG_FUNCTION_ARGS);
@@ -1213,6 +1215,12 @@ extern Datum ginqueryarrayextract(PG_FUNCTION_ARGS);
 extern Datum ginarrayconsistent(PG_FUNCTION_ARGS);
 extern Datum ginarraytriconsistent(PG_FUNCTION_ARGS);
+/* access/tablesample/bernoulli.c */
+extern Datum tsm_bernoulli_handler(PG_FUNCTION_ARGS);
+/* access/tablesample/system.c */
+extern Datum tsm_system_handler(PG_FUNCTION_ARGS);
 /* access/transam/twophase.c */
 extern Datum pg_prepared_xact(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index a40c9b12732da07f80c1c410a1651cd0db748188..971153843296d55612f201ace510af8ccbee8cdd 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -156,7 +156,6 @@ extern void free_attstatsslot(Oid atttype,
 extern char *get_namespace_name(Oid nspid);
 extern char *get_namespace_name_or_temp(Oid nspid);
 extern Oid	get_range_subtype(Oid rangeOid);
-extern char *get_tablesample_method_name(Oid tsmid);
 #define type_is_array(typid)  (get_element_type(typid) != InvalidOid)
 /* type_is_array_domain accepts both plain arrays and domains over arrays */
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index f06f03a996f260455d8d7c9a0cbae7c8badf4e22..18404e266eb63ec0384e7cf8c75d87a6683ef0ca 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -81,8 +81,6 @@ enum SysCacheIdentifier
diff --git a/src/port/erand48.c b/src/port/erand48.c
index 12efd8193c4ed7b424961b44ba65de750debd1d6..9d471197c354056c8903a5a9e1c6b0023419f1d1 100644
--- a/src/port/erand48.c
+++ b/src/port/erand48.c
@@ -33,6 +33,9 @@
 #include <math.h>
+#define RAND48_SEED_0	(0x330e)
+#define RAND48_SEED_1	(0xabcd)
+#define RAND48_SEED_2	(0x1234)
 #define RAND48_MULT_0	(0xe66d)
 #define RAND48_MULT_1	(0xdeec)
 #define RAND48_MULT_2	(0x0005)
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index 414299a694114112410b8b62682cafcc448af2e8..e7c242cd22d480c0b566a9a8dc617f07fb03b6f6 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -101,15 +101,17 @@ NOTICE:  f_leak => great manga
   44 |   8 |      1 | rls_regress_user2 | great manga           | manga
 (4 rows)
-NOTICE:  f_leak => my first novel
+-- try a sampled version
+  WHERE f_leak(dtitle) ORDER BY did;
 NOTICE:  f_leak => my first manga
 NOTICE:  f_leak => great science fiction
+NOTICE:  f_leak => great manga
  did | cid | dlevel |      dauthor      |        dtitle         
-   1 |  11 |      1 | rls_regress_user1 | my first novel
    4 |  44 |      1 | rls_regress_user1 | my first manga
    6 |  22 |      1 | rls_regress_user2 | great science fiction
+   8 |  44 |      1 | rls_regress_user2 | great manga
 (3 rows)
 -- viewpoint from rls_regress_user2
@@ -156,20 +158,20 @@ NOTICE:  f_leak => great manga
   44 |   8 |      1 | rls_regress_user2 | great manga           | manga
 (8 rows)
-NOTICE:  f_leak => my first novel
-NOTICE:  f_leak => my second novel
+-- try a sampled version
+  WHERE f_leak(dtitle) ORDER BY did;
 NOTICE:  f_leak => my first manga
+NOTICE:  f_leak => my second manga
 NOTICE:  f_leak => great science fiction
-NOTICE:  f_leak => great technology book
+NOTICE:  f_leak => great manga
  did | cid | dlevel |      dauthor      |        dtitle         
-   1 |  11 |      1 | rls_regress_user1 | my first novel
-   2 |  11 |      2 | rls_regress_user1 | my second novel
    4 |  44 |      1 | rls_regress_user1 | my first manga
+   5 |  44 |      2 | rls_regress_user1 | my second manga
    6 |  22 |      1 | rls_regress_user2 | great science fiction
-   7 |  33 |      2 | rls_regress_user2 | great technology book
-(5 rows)
+   8 |  44 |      1 | rls_regress_user2 | great manga
+(4 rows)
 EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
                         QUERY PLAN                        
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index cd5337531d4b41e90aa469c8f33c2a9f13ca8ddd..1e5b0b9a2c43a522d088417dfa249168b3e5eeab 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2202,6 +2202,10 @@ street| SELECT r.name,
    FROM ONLY road r,
     real_city c
   WHERE (c.outline ## r.thepath);
+test_tablesample_v1| SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2);
+test_tablesample_v2| SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system (99);
 toyemp| SELECT emp.name,
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out
index 14acd16da3b3d00d2f1630ac3c7c941f0b4e986f..eb0bc88ef1fb27daee22dd4a2de684df35507417 100644
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -128,7 +128,6 @@ pg_shdepend|t
diff --git a/src/test/regress/expected/tablesample.out b/src/test/regress/expected/tablesample.out
index 04e5eb8b807e2d1c95adae29a57e40742fe0ccf0..727a83543973436293d6f6371374a25b54a66078 100644
--- a/src/test/regress/expected/tablesample.out
+++ b/src/test/regress/expected/tablesample.out
@@ -1,107 +1,123 @@
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
+INSERT INTO test_tablesample
+  SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
-  0
-  1
-  2
-  9
-(7 rows)
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
- id 
-(3 rows)
+(6 rows)
-SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
- count 
-    10
-(1 row)
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+ id 
+(0 rows)
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
-  0
-  1
-  2
+  3
+  4
+  5
-  9
-(7 rows)
+(6 rows)
-  0
-  1
-  3
+  6
+  7
+  8
 (5 rows)
-  0
-  5
-(2 rows)
+  7
+(1 row)
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
-SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
-                                 pg_get_viewdef                                 
-  SELECT test_tablesample.id                                                   +
-    FROM test_tablesample TABLESAMPLE system (((10 * 2))::real) REPEATABLE (2);
+-- 100% should give repeatable count results (ie, all rows) in any case
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
+ count 
+    10
 (1 row)
-SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
-                      pg_get_viewdef                       
-  SELECT test_tablesample.id                              +
-    FROM test_tablesample TABLESAMPLE system ((99)::real);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2);
+ count 
+    10
+(1 row)
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4);
+ count 
+    10
 (1 row)
+CREATE VIEW test_tablesample_v1 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+\d+ test_tablesample_v1
+          View "public.test_tablesample_v1"
+ Column |  Type   | Modifiers | Storage | Description 
+ id     | integer |           | plain   | 
+View definition:
+ SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system ((10 * 2)) REPEATABLE (2);
+\d+ test_tablesample_v2
+          View "public.test_tablesample_v2"
+ Column |  Type   | Modifiers | Storage | Description 
+ id     | integer |           | plain   | 
+View definition:
+ SELECT test_tablesample.id
+   FROM test_tablesample TABLESAMPLE system (99);
+-- check a sampled query doesn't affect cursor in progress
-DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+DECLARE tablesample_cur CURSOR FOR
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
 FETCH FIRST FROM tablesample_cur;
-  0
+  3
 (1 row)
 FETCH NEXT FROM tablesample_cur;
-  1
+  4
 (1 row)
 FETCH NEXT FROM tablesample_cur;
-  2
+  5
 (1 row)
-  0
-  1
-  2
-  9
-(7 rows)
+  6
+  7
+  8
+(6 rows)
 FETCH NEXT FROM tablesample_cur;
@@ -124,19 +140,19 @@ FETCH NEXT FROM tablesample_cur;
 FETCH FIRST FROM tablesample_cur;
-  0
+  3
 (1 row)
 FETCH NEXT FROM tablesample_cur;
-  1
+  4
 (1 row)
 FETCH NEXT FROM tablesample_cur;
-  2
+  5
 (1 row)
 FETCH NEXT FROM tablesample_cur;
@@ -159,41 +175,129 @@ FETCH NEXT FROM tablesample_cur;
 CLOSE tablesample_cur;
-                                  QUERY PLAN                                   
- Sample Scan (system) on test_tablesample  (cost=0.00..26.35 rows=635 width=4)
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
+                             QUERY PLAN                             
+ Sample Scan on test_tablesample
+   Sampling: system ('50'::real) REPEATABLE ('2'::double precision)
+(2 rows)
+  SELECT * FROM test_tablesample_v1;
+                             QUERY PLAN                             
+ Sample Scan on test_tablesample
+   Sampling: system ('20'::real) REPEATABLE ('2'::double precision)
+(2 rows)
+-- check inheritance behavior
+explain (costs off)
+  select count(*) from person tablesample bernoulli (100);
+                   QUERY PLAN                    
+ Aggregate
+   ->  Append
+         ->  Sample Scan on person
+               Sampling: bernoulli ('100'::real)
+         ->  Sample Scan on emp
+               Sampling: bernoulli ('100'::real)
+         ->  Sample Scan on student
+               Sampling: bernoulli ('100'::real)
+         ->  Sample Scan on stud_emp
+               Sampling: bernoulli ('100'::real)
+(10 rows)
+select count(*) from person tablesample bernoulli (100);
+ count 
+    58
 (1 row)
-EXPLAIN SELECT * FROM test_tablesample_v1;
-                                  QUERY PLAN                                   
- Sample Scan (system) on test_tablesample  (cost=0.00..10.54 rows=254 width=4)
+select count(*) from person;
+ count 
+    58
+(1 row)
+-- check that collations get assigned within the tablesample arguments
+SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int);
+ count 
+     0
+(1 row)
+-- check behavior during rescans, as well as correct handling of min/max pct
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss;
+ pct | count 
+   0 |     0
+ 100 | 10000
+(2 rows)
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample system (pct)) ss;
+ pct | count 
+   0 |     0
+ 100 | 10000
+(2 rows)
+explain (costs off)
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+                       QUERY PLAN                       
+ HashAggregate
+   Group Key: "*VALUES*".column1
+   ->  Nested Loop
+         ->  Values Scan on "*VALUES*"
+         ->  Sample Scan on tenk1
+               Sampling: bernoulli ("*VALUES*".column1)
+(6 rows)
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+ pct | count 
+ 100 | 10000
+(1 row)
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample system (pct)) ss
+  group by pct;
+ pct | count 
+ 100 | 10000
 (1 row)
 -- errors
 SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
-ERROR:  tablesample method "foobar" does not exist
+ERROR:  tablesample method foobar does not exist
 LINE 1: SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
-                       ^
+                                                    ^
+ERROR:  TABLESAMPLE parameter cannot be null
-ERROR:  REPEATABLE clause must be NOT NULL numeric value
-LINE 1: ... test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (NULL);
-                                                                 ^
+ERROR:  TABLESAMPLE REPEATABLE parameter cannot be null
 SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
 SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (200);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
 SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (-1);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
 SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (200);
-ERROR:  invalid sample size
-HINT:  Sample size must be numeric value between 0 and 100 (inclusive).
+ERROR:  sample percentage must be between 0 and 100
 SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1);
-ERROR:  TABLESAMPLE clause can only be used on tables and materialized views
+ERROR:  TABLESAMPLE clause can only be applied to tables and materialized views
 LINE 1: SELECT id FROM test_tablesample_v1 TABLESAMPLE BERNOULLI (1)...
 INSERT INTO test_tablesample_v1 VALUES(1);
@@ -202,30 +306,10 @@ DETAIL:  Views containing TABLESAMPLE are not automatically updatable.
 HINT:  To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule.
 WITH query_select AS (SELECT * FROM test_tablesample)
-ERROR:  TABLESAMPLE clause can only be used on tables and materialized views
+ERROR:  TABLESAMPLE clause can only be applied to tables and materialized views
 SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
 ERROR:  syntax error at or near "TABLESAMPLE"
 LINE 1: ...CT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPL...
--- catalog sanity
-FROM pg_tablesample_method
-WHERE tsminit IS NULL
-   OR tsmseqscan IS NULL
-   OR tsmpagemode IS NULL
-   OR tsmnextblock IS NULL
-   OR tsmnexttuple IS NULL
-   OR tsmend IS NULL
-   OR tsmreset IS NULL
-   OR tsmcost IS NULL;
- tsmname | tsmseqscan | tsmpagemode | tsminit | tsmnextblock | tsmnexttuple | tsmexaminetuple | tsmend | tsmreset | tsmcost 
-(0 rows)
--- done
-DROP TABLE test_tablesample CASCADE;
-NOTICE:  drop cascades to 2 other objects
-DETAIL:  drop cascades to view test_tablesample_v1
-drop cascades to view test_tablesample_v2
diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source
index 70c9cc356a642075b3df47ae897b70673e13e471..9eedb363d06be9602d35d90ff68bb84d7b923c7e 100644
--- a/src/test/regress/output/misc.source
+++ b/src/test/regress/output/misc.source
@@ -686,6 +686,9 @@ SELECT user_relns() AS user_relns
+ test_tablesample
+ test_tablesample_v1
+ test_tablesample_v2
@@ -705,7 +708,7 @@ SELECT user_relns() AS user_relns
-(127 rows)
+(130 rows)
 SELECT name(equipment(hobby_construct(text 'skywalking', text 'mer')));
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 3a607cff46c235ff8b15fdbd095c2098ca3ac217..15d74d4e6eba90abc4476ae85412c0e4dc0b5081 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -110,6 +110,7 @@ test: lock
 test: replica_identity
 test: rowsecurity
 test: object_address
+test: tablesample
 test: alter_generic
 test: alter_operator
 test: misc
@@ -156,4 +157,3 @@ test: with
 test: xml
 test: event_trigger
 test: stats
-test: tablesample
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql
index 039070b85b73370be2acdb27dfb115770a96427a..e86f8143142cbbee3f7d154874e9fd82bcff702a 100644
--- a/src/test/regress/sql/rowsecurity.sql
+++ b/src/test/regress/sql/rowsecurity.sql
@@ -94,14 +94,18 @@ SET row_security TO ON;
 SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
 SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
+-- try a sampled version
+  WHERE f_leak(dtitle) ORDER BY did;
 -- viewpoint from rls_regress_user2
 SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
 SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
+-- try a sampled version
+  WHERE f_leak(dtitle) ORDER BY did;
 EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
 EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
diff --git a/src/test/regress/sql/tablesample.sql b/src/test/regress/sql/tablesample.sql
index 7b3eb9bedf7bb3a82ccd127a65bb1382e10a415b..eec97934966966229800a47563153669b5ea353a 100644
--- a/src/test/regress/sql/tablesample.sql
+++ b/src/test/regress/sql/tablesample.sql
@@ -1,26 +1,37 @@
-CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10); -- force smaller pages so we don't have to load too much data to get multiple pages
+CREATE TABLE test_tablesample (id int, name text) WITH (fillfactor=10);
+-- use fillfactor so we don't have to load too much data to get multiple pages
-INSERT INTO test_tablesample SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i) ORDER BY i;
+INSERT INTO test_tablesample
+  SELECT i, repeat(i::text, 200) FROM generate_series(0, 9) s(i);
-SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (10);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (9999);
+SELECT t.id FROM test_tablesample AS t TABLESAMPLE SYSTEM (50) REPEATABLE (0);
+SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (100.0/11) REPEATABLE (0);
+-- 100% should give repeatable count results (ie, all rows) in any case
 SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100);
-SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (1+2);
+SELECT count(*) FROM test_tablesample TABLESAMPLE SYSTEM (100) REPEATABLE (0.4);
-CREATE VIEW test_tablesample_v1 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
-CREATE VIEW test_tablesample_v2 AS SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
-SELECT pg_get_viewdef('test_tablesample_v1'::regclass);
-SELECT pg_get_viewdef('test_tablesample_v2'::regclass);
+CREATE VIEW test_tablesample_v1 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (10*2) REPEATABLE (2);
+CREATE VIEW test_tablesample_v2 AS
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (99);
+\d+ test_tablesample_v1
+\d+ test_tablesample_v2
+-- check a sampled query doesn't affect cursor in progress
-DECLARE tablesample_cur CURSOR FOR SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (100);
+DECLARE tablesample_cur CURSOR FOR
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (0);
 FETCH FIRST FROM tablesample_cur;
 FETCH NEXT FROM tablesample_cur;
 FETCH NEXT FROM tablesample_cur;
 FETCH NEXT FROM tablesample_cur;
 FETCH NEXT FROM tablesample_cur;
@@ -36,12 +47,45 @@ FETCH NEXT FROM tablesample_cur;
 CLOSE tablesample_cur;
-EXPLAIN SELECT * FROM test_tablesample_v1;
+  SELECT id FROM test_tablesample TABLESAMPLE SYSTEM (50) REPEATABLE (2);
+  SELECT * FROM test_tablesample_v1;
+-- check inheritance behavior
+explain (costs off)
+  select count(*) from person tablesample bernoulli (100);
+select count(*) from person tablesample bernoulli (100);
+select count(*) from person;
+-- check that collations get assigned within the tablesample arguments
+SELECT count(*) FROM test_tablesample TABLESAMPLE bernoulli (('1'::text < '0'::text)::int);
+-- check behavior during rescans, as well as correct handling of min/max pct
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample bernoulli (pct)) ss;
+select * from
+  (values (0),(100)) v(pct),
+  lateral (select count(*) from tenk1 tablesample system (pct)) ss;
+explain (costs off)
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample bernoulli (pct)) ss
+  group by pct;
+select pct, count(unique1) from
+  (values (0),(100)) v(pct),
+  lateral (select * from tenk1 tablesample system (pct)) ss
+  group by pct;
 -- errors
 SELECT id FROM test_tablesample TABLESAMPLE FOOBAR (1);
 SELECT id FROM test_tablesample TABLESAMPLE BERNOULLI (-1);
@@ -56,19 +100,3 @@ WITH query_select AS (SELECT * FROM test_tablesample)
 SELECT q.* FROM (SELECT * FROM test_tablesample) as q TABLESAMPLE BERNOULLI (5);
--- catalog sanity
-FROM pg_tablesample_method
-WHERE tsminit IS NULL
-   OR tsmseqscan IS NULL
-   OR tsmpagemode IS NULL
-   OR tsmnextblock IS NULL
-   OR tsmnexttuple IS NULL
-   OR tsmend IS NULL
-   OR tsmreset IS NULL
-   OR tsmcost IS NULL;
--- done
-DROP TABLE test_tablesample CASCADE;