Skip to content
Snippets Groups Projects
logicalfuncs.c 10.7 KiB
Newer Older
Robert Haas's avatar
Robert Haas committed
/*-------------------------------------------------------------------------
 *
 * logicalfuncs.c
 *
 *	   Support functions for using logical decoding and management of
Robert Haas's avatar
Robert Haas committed
 *	   logical replication slots via SQL.
 *
 *
 * Copyright (c) 2012-2016, PostgreSQL Global Development Group
Robert Haas's avatar
Robert Haas committed
 *
 * IDENTIFICATION
 *	  src/backend/replication/logicalfuncs.c
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include <unistd.h>

#include "fmgr.h"
#include "funcapi.h"
#include "miscadmin.h"

#include "access/xlog_internal.h"
#include "access/xlogutils.h"
#include "access/xact.h"

#include "catalog/pg_type.h"

Robert Haas's avatar
Robert Haas committed
#include "nodes/makefuncs.h"
Robert Haas's avatar
Robert Haas committed
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/pg_lsn.h"
#include "utils/resowner.h"
#include "utils/lsyscache.h"
Robert Haas's avatar
Robert Haas committed
#include "replication/decode.h"
#include "replication/logical.h"
#include "replication/logicalfuncs.h"
#include "replication/message.h"
Robert Haas's avatar
Robert Haas committed
#include "storage/fd.h"

/* private date for writing out data */
Bruce Momjian's avatar
Bruce Momjian committed
typedef struct DecodingOutputState
{
Robert Haas's avatar
Robert Haas committed
	Tuplestorestate *tupstore;
Bruce Momjian's avatar
Bruce Momjian committed
	TupleDesc	tupdesc;
	bool		binary_output;
	int64		returned_rows;
Robert Haas's avatar
Robert Haas committed
} DecodingOutputState;

/*
 * Prepare for an output plugin write.
Robert Haas's avatar
Robert Haas committed
 */
static void
LogicalOutputPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
						  bool last_write)
{
	resetStringInfo(ctx->out);
}

/*
 * Perform output plugin write into tuplestore.
 */
static void
LogicalOutputWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
				   bool last_write)
{
	Datum		values[3];
	bool		nulls[3];
	DecodingOutputState *p;

	/* SQL Datums can only be of a limited length... */
	if (ctx->out->len > MaxAllocSize - VARHDRSZ)
		elog(ERROR, "too much output for sql interface");

	p = (DecodingOutputState *) ctx->output_writer_private;

	memset(nulls, 0, sizeof(nulls));
	values[0] = LSNGetDatum(lsn);
	values[1] = TransactionIdGetDatum(xid);

	/*
	 * Assert ctx->out is in database encoding when we're writing textual
	 * output.
	 */
	if (!p->binary_output)
		Assert(pg_verify_mbstr(GetDatabaseEncoding(),
							   ctx->out->data, ctx->out->len,
							   false));

	/* ick, but cstring_to_text_with_len works for bytea perfectly fine */
	values[2] = PointerGetDatum(
Bruce Momjian's avatar
Bruce Momjian committed
					cstring_to_text_with_len(ctx->out->data, ctx->out->len));
Robert Haas's avatar
Robert Haas committed

	tuplestore_putvalues(p->tupstore, p->tupdesc, values, nulls);
	p->returned_rows++;
}

static void
check_permissions(void)
{
	if (!superuser() && !has_rolreplication(GetUserId()))
Robert Haas's avatar
Robert Haas committed
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 (errmsg("must be superuser or replication role to use replication slots"))));
}

int
logical_read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
	int reqLen, XLogRecPtr targetRecPtr, char *cur_page, TimeLineID *pageTLI)
{
	return read_local_xlog_page(state, targetPagePtr, reqLen,
								targetRecPtr, cur_page, pageTLI);
Robert Haas's avatar
Robert Haas committed
}

/*
 * Helper function for the various SQL callable logical decoding functions.
 */
static Datum
pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool binary)
{
Robert Haas's avatar
Robert Haas committed
	XLogRecPtr	upto_lsn;
	int32		upto_nchanges;
	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
	MemoryContext per_query_ctx;
	MemoryContext oldcontext;
	XLogRecPtr	end_of_wal;
	XLogRecPtr	startptr;
	LogicalDecodingContext *ctx;
	ResourceOwner old_resowner = CurrentResourceOwner;
	ArrayType  *arr;
	Size		ndim;
	List	   *options = NIL;
	DecodingOutputState *p;

	check_permissions();

	CheckLogicalDecodingRequirements();

	if (PG_ARGISNULL(0))
		ereport(ERROR,
				(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
				 errmsg("slot name must not be null")));
	name = PG_GETARG_NAME(0);

Robert Haas's avatar
Robert Haas committed
	if (PG_ARGISNULL(1))
		upto_lsn = InvalidXLogRecPtr;
	else
		upto_lsn = PG_GETARG_LSN(1);

	if (PG_ARGISNULL(2))
		upto_nchanges = InvalidXLogRecPtr;
	else
		upto_nchanges = PG_GETARG_INT32(2);

	if (PG_ARGISNULL(3))
		ereport(ERROR,
				(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
				 errmsg("options array must not be null")));
	arr = PG_GETARG_ARRAYTYPE_P(3);

Robert Haas's avatar
Robert Haas committed
	/* check to see if caller supports us returning a tuplestore */
	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("set-valued function called in context that cannot accept a set")));
	if (!(rsinfo->allowedModes & SFRM_Materialize))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("materialize mode required, but it is not allowed in this context")));

	/* state to write output to */
	p = palloc0(sizeof(DecodingOutputState));

	p->binary_output = binary;

	/* Build a tuple descriptor for our result type */
	if (get_call_result_type(fcinfo, NULL, &p->tupdesc) != TYPEFUNC_COMPOSITE)
		elog(ERROR, "return type must be a row type");

	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
	oldcontext = MemoryContextSwitchTo(per_query_ctx);

	/* Deconstruct options array */
	ndim = ARR_NDIM(arr);
Robert Haas's avatar
Robert Haas committed
	if (ndim > 1)
	{
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("array must be one-dimensional")));
	}
	else if (array_contains_nulls(arr))
	{
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("array must not contain nulls")));
	}
	else if (ndim == 1)
	{
		int			nelems;
		Datum	   *datum_opts;
		int			i;

		Assert(ARR_ELEMTYPE(arr) == TEXTOID);

		deconstruct_array(arr, TEXTOID, -1, false, 'i',
						  &datum_opts, NULL, &nelems);

		if (nelems % 2 != 0)
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("array must have even number of elements")));

		for (i = 0; i < nelems; i += 2)
		{
			char	   *name = TextDatumGetCString(datum_opts[i]);
			char	   *opt = TextDatumGetCString(datum_opts[i + 1]);

			options = lappend(options, makeDefElem(name, (Node *) makeString(opt)));
		}
	}

	p->tupstore = tuplestore_begin_heap(true, false, work_mem);
	rsinfo->returnMode = SFRM_Materialize;
	rsinfo->setResult = p->tupstore;
	rsinfo->setDesc = p->tupdesc;

	/* compute the current end-of-wal */
	if (!RecoveryInProgress())
		end_of_wal = GetFlushRecPtr();
	else
		end_of_wal = GetXLogReplayRecPtr(NULL);

Robert Haas's avatar
Robert Haas committed
	ReplicationSlotAcquire(NameStr(*name));

	PG_TRY();
	{
		/* restart at slot's confirmed_flush */
Robert Haas's avatar
Robert Haas committed
		ctx = CreateDecodingContext(InvalidXLogRecPtr,
									options,
									logical_read_local_xlog_page,
									LogicalOutputPrepareWrite,
									LogicalOutputWrite);

		MemoryContextSwitchTo(oldcontext);

		/*
		 * Check whether the output plugin writes textual output if that's
Robert Haas's avatar
Robert Haas committed
		 * what we need.
		 */
		if (!binary &&
Bruce Momjian's avatar
Bruce Momjian committed
			ctx->options.output_type !=OUTPUT_PLUGIN_TEXTUAL_OUTPUT)
Robert Haas's avatar
Robert Haas committed
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("logical decoding output plugin \"%s\" produces binary output, but function \"%s\" expects textual data",
							NameStr(MyReplicationSlot->data.plugin),
							format_procedure(fcinfo->flinfo->fn_oid))));
Robert Haas's avatar
Robert Haas committed

		ctx->output_writer_private = p;

		 * Decoding of WAL must start at restart_lsn so that the entirety of
		 * xacts that committed after the slot's confirmed_flush can be
		 * accumulated into reorder buffers.
Robert Haas's avatar
Robert Haas committed
		startptr = MyReplicationSlot->data.restart_lsn;

		CurrentResourceOwner = ResourceOwnerCreate(CurrentResourceOwner, "logical decoding");

		/* invalidate non-timetravel entries */
		InvalidateSystemCaches();

		while ((startptr != InvalidXLogRecPtr && startptr < end_of_wal) ||
			   (ctx->reader->EndRecPtr != InvalidXLogRecPtr && ctx->reader->EndRecPtr < end_of_wal))
Robert Haas's avatar
Robert Haas committed
		{
			XLogRecord *record;
			char	   *errm = NULL;

			record = XLogReadRecord(ctx->reader, startptr, &errm);
			if (errm)
				elog(ERROR, "%s", errm);

			/*
			 * Now that we've set up the xlog reader state, subsequent calls
			 * pass InvalidXLogRecPtr to say "continue from last record"
			 */
Robert Haas's avatar
Robert Haas committed
			startptr = InvalidXLogRecPtr;

			/*
			 * The {begin_txn,change,commit_txn}_wrapper callbacks above will
			 * store the description into our tuplestore.
			 */
			if (record != NULL)
				LogicalDecodingProcessRecord(ctx, ctx->reader);
Robert Haas's avatar
Robert Haas committed

			/* check limits */
			if (upto_lsn != InvalidXLogRecPtr &&
				upto_lsn <= ctx->reader->EndRecPtr)
				break;
			if (upto_nchanges != 0 &&
				upto_nchanges <= p->returned_rows)
				break;

		tuplestore_donestoring(tupstore);

		CurrentResourceOwner = old_resowner;

		/*
		 * Next time, start where we left off. (Hunting things, the family
		 * business..)
		 */
		if (ctx->reader->EndRecPtr != InvalidXLogRecPtr && confirm)
			LogicalConfirmReceivedLocation(ctx->reader->EndRecPtr);
			/*
			 * If only the confirmed_flush_lsn has changed the slot won't get
			 * marked as dirty by the above. Callers on the walsender interface
			 * are expected to keep track of their own progress and don't need
			 * it written out. But SQL-interface users cannot specify their own
			 * start positions and it's harder for them to keep track of their
			 * progress, so we should make more of an effort to save it for them.
			 *
			 * Dirty the slot so it's written out at the next checkpoint. We'll
			 * still lose its position on crash, as documented, but it's better
			 * than always losing the position even on clean restart.
			 */
			ReplicationSlotMarkDirty();
		}

		/* free context, call shutdown callback */
		FreeDecodingContext(ctx);

		ReplicationSlotRelease();
		InvalidateSystemCaches();
Robert Haas's avatar
Robert Haas committed
	}
	PG_CATCH();
	{
		/* clear all timetravel entries */
		InvalidateSystemCaches();

		PG_RE_THROW();
	}
	PG_END_TRY();

	return (Datum) 0;
}

/*
 * SQL function returning the changestream as text, consuming the data.
 */
Datum
pg_logical_slot_get_changes(PG_FUNCTION_ARGS)
{
	return pg_logical_slot_get_changes_guts(fcinfo, true, false);
Robert Haas's avatar
Robert Haas committed
}

/*
 * SQL function returning the changestream as text, only peeking ahead.
 */
Datum
pg_logical_slot_peek_changes(PG_FUNCTION_ARGS)
{
	return pg_logical_slot_get_changes_guts(fcinfo, false, false);
Robert Haas's avatar
Robert Haas committed
}

/*
 * SQL function returning the changestream in binary, consuming the data.
 */
Datum
pg_logical_slot_get_binary_changes(PG_FUNCTION_ARGS)
{
	return pg_logical_slot_get_changes_guts(fcinfo, true, true);
Robert Haas's avatar
Robert Haas committed
}

/*
 * SQL function returning the changestream in binary, only peeking ahead.
 */
Datum
pg_logical_slot_peek_binary_changes(PG_FUNCTION_ARGS)
{
	return pg_logical_slot_get_changes_guts(fcinfo, false, true);
Andres Freund's avatar
Andres Freund committed
 * SQL function for writing logical decoding message into WAL.
 */
Datum
pg_logical_emit_message_bytea(PG_FUNCTION_ARGS)
{
	bool		transactional = PG_GETARG_BOOL(0);
	char	   *prefix = text_to_cstring(PG_GETARG_TEXT_PP(1));
	bytea	   *data = PG_GETARG_BYTEA_PP(2);
	XLogRecPtr	lsn;

	lsn = LogLogicalMessage(prefix, VARDATA_ANY(data), VARSIZE_ANY_EXHDR(data),
							transactional);
	PG_RETURN_LSN(lsn);
}

Datum
pg_logical_emit_message_text(PG_FUNCTION_ARGS)
{
	/* bytea and text are compatible */
	return pg_logical_emit_message_bytea(fcinfo);
}