From 1b67fe17b89380d2ffb74e850e7b38737bde6c76 Mon Sep 17 00:00:00 2001
From: "Vadim B. Mikheev" <vadim4o@yahoo.com>
Date: Mon, 3 Jul 2000 02:54:21 +0000
Subject: [PATCH] heap' logging

---
 src/backend/access/heap/heapam.c   | 106 ++++++++++++++++++++---------
 src/backend/access/heap/hio.c      | 103 +++++++++-------------------
 src/backend/storage/page/bufpage.c |  21 ++----
 src/include/access/hio.h           |   4 +-
 src/include/access/htup.h          |  13 ++--
 src/include/access/xlog.h          |   7 ++
 src/include/storage/bufpage.h      |   6 +-
 7 files changed, 130 insertions(+), 130 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index d671036f049..9f3a7ac7140 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.74 2000/07/02 22:00:27 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.75 2000/07/03 02:54:15 vadim Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1271,10 +1271,9 @@ heap_get_latest_tid(Relation relation,
 Oid
 heap_insert(Relation relation, HeapTuple tup)
 {
-	/* ----------------
-	 *	increment access statistics
-	 * ----------------
-	 */
+	Buffer buffer;
+
+	/* increment access statistics */
 	tup->tableOid = relation->rd_id;
 	IncrHeapAccessStat(local_insert);
 	IncrHeapAccessStat(global_insert);
@@ -1300,7 +1299,11 @@ heap_insert(Relation relation, HeapTuple tup)
 	tup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
 	tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
 
-	RelationPutHeapTupleAtEnd(relation, tup);
+	/* Find buffer for this tuple */
+	buffer = RelationGetBufferForTuple(relation, tup->t_len, InvalidBuffer);
+
+	/* NO ELOG(ERROR) from here till changes are logged */
+	RelationPutHeapTuple(relation, buffer, tup);
 
 #ifdef XLOG
 	/* XLOG stuff */
@@ -1308,7 +1311,8 @@ heap_insert(Relation relation, HeapTuple tup)
 		xl_heap_insert	xlrec;
 		xlrec.itid.dbId = relation->rd_lockInfo.lockRelId.dbId;
 		xlrec.itid.relId = relation->rd_lockInfo.lockRelId.relId;
-XXX		xlrec.itid.tid = tp.t_self;
+		xlrec.itid.cid = GetCurrentCommandId();
+		xlrec.itid.tid = tup->t_self;
 		xlrec.t_natts = tup->t_data->t_natts;
 		xlrec.t_oid = tup->t_data->t_oid;
 		xlrec.t_hoff = tup->t_data->t_hoff;
@@ -1319,10 +1323,14 @@ XXX		xlrec.itid.tid = tp.t_self;
 			(char*) tup->t_data + offsetof(HeapTupleHeaderData, tbits), 
 			tup->t_len - offsetof(HeapTupleHeaderData, tbits));
 
-		dp->pd_lsn = recptr;
+		((PageHeader) BufferGetPage(buffer))->pd_lsn = recptr;
+		((PageHeader) BufferGetPage(buffer))->pd_sui = ThisStartUpID;
 	}
 #endif
 
+	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+	WriteBuffer(buffer);
+
 	if (IsSystemRelationName(RelationGetRelationName(relation)))
 		RelationMark4RollbackHeapTuple(relation, tup);
 
@@ -1417,11 +1425,13 @@ l1:
 		xl_heap_delete	xlrec;
 		xlrec.dtid.dbId = relation->rd_lockInfo.lockRelId.dbId;
 		xlrec.dtid.relId = relation->rd_lockInfo.lockRelId.relId;
+		xlrec.dtid.cid = GetCurrentCommandId();
 		xlrec.dtid.tid = tp.t_self;
 		XLogRecPtr recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE,
 			(char*) xlrec, sizeof(xlrec), NULL, 0);
 
 		dp->pd_lsn = recptr;
+		dp->pd_sui = ThisStartUpID;
 	}
 #endif
 
@@ -1451,7 +1461,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	ItemId		lp;
 	HeapTupleData oldtup;
 	PageHeader	dp;
-	Buffer		buffer;
+	Buffer		buffer, newbuf;
 	int			result;
 
 	newtup->tableOid = relation->rd_id;
@@ -1531,43 +1541,65 @@ l2:
 	newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
 	newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
 
-	/* logically delete old item */
+	/* Find buffer for new tuple */
+
+	if ((unsigned) MAXALIGN(newtup->t_len) <= PageGetFreeSpace((Page) dp))
+		newbuf = buffer;
+	else
+		newbuf = RelationGetBufferForTuple(relation, newtup->t_len, buffer);
+
+	/* NO ELOG(ERROR) from here till changes are logged */
+
+	/* insert new tuple */
+	RelationPutHeapTuple(relation, newbuf, newtup);
+
+	/* logically delete old tuple */
 	TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax));
 	oldtup.t_data->t_cmax = GetCurrentCommandId();
 	oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
 							 HEAP_XMAX_INVALID | HEAP_MARKED_FOR_UPDATE);
 
-	/* insert new item */
-	if ((unsigned) MAXALIGN(newtup->t_len) <= PageGetFreeSpace((Page) dp))
-		RelationPutHeapTuple(relation, buffer, newtup);
-	else
+	/* record address of new tuple in t_ctid of old one */
+	oldtup.t_data->t_ctid = newtup->t_self;
+
+#ifdef XLOG
+	/* XLOG stuff */
 	{
+		xl_heap_update	xlrec;
+		xlrec.dtid.dbId = relation->rd_lockInfo.lockRelId.dbId;
+		xlrec.dtid.relId = relation->rd_lockInfo.lockRelId.relId;
+		xlrec.dtid.cid = GetCurrentCommandId();
+		xlrec.itid.tid = newtup->t_self;
+		xlrec.t_natts = newtup->t_data->t_natts;
+		xlrec.t_hoff = newtup->t_data->t_hoff;
+		xlrec.mask = newtup->t_data->t_infomask;
+		
+		XLogRecPtr recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_UPDATE,
+			(char*) xlrec, sizeof(xlrec), 
+			(char*) newtup->t_data + offsetof(HeapTupleHeaderData, tbits), 
+			newtup->t_len - offsetof(HeapTupleHeaderData, tbits));
 
-		/*
-		 * New item won't fit on same page as old item, have to look for a
-		 * new place to put it. Note that we have to unlock current buffer
-		 * context - not good but RelationPutHeapTupleAtEnd uses extend
-		 * lock.
-		 */
-		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-		RelationPutHeapTupleAtEnd(relation, newtup);
-		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+		if (newbuf != buffer)
+		{
+			((PageHeader) BufferGetPage(newbuf))->pd_lsn = recptr;
+			((PageHeader) BufferGetPage(newbuf))->pd_sui = ThisStartUpID;
+		}
+		((PageHeader) BufferGetPage(buffer))->pd_lsn = recptr;
+		((PageHeader) BufferGetPage(buffer))->pd_sui = ThisStartUpID;
 	}
-	/* mark for rollback caches */
-	RelationMark4RollbackHeapTuple(relation, newtup);
-
-	/*
-	 * New item in place, now record address of new tuple in t_ctid of old
-	 * one.
-	 */
-	oldtup.t_data->t_ctid = newtup->t_self;
+#endif
 
+	if (newbuf != buffer)
+	{
+		LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
+		WriteBuffer(newbuf);
+	}
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+	WriteBuffer(buffer);
 
 	/* invalidate caches */
 	RelationInvalidateHeapTuple(relation, &oldtup);
-
-	WriteBuffer(buffer);
+	RelationMark4RollbackHeapTuple(relation, newtup);
 
 	return HeapTupleMayBeUpdated;
 }
@@ -1648,6 +1680,14 @@ l3:
 		return result;
 	}
 
+#ifdef XLOG
+	/*
+	 * XLOG stuff: no logging is required as long as we have no
+	 * savepoints. For savepoints private log could be used...
+	 */
+	((PageHeader) BufferGetPage(*buffer))->pd_sui = ThisStartUpID;
+#endif
+
 	/* store transaction information of xact marking the tuple */
 	TransactionIdStore(GetCurrentTransactionId(), &(tuple->t_data->t_xmax));
 	tuple->t_data->t_cmax = GetCurrentCommandId();
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index 3fc2a69df1a..9181a7984d7 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Id: hio.c,v 1.31 2000/04/12 17:14:45 momjian Exp $
+ *	  $Id: hio.c,v 1.32 2000/07/03 02:54:15 vadim Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -19,17 +19,11 @@
 #include "access/hio.h"
 
 /*
- * amputunique	- place tuple at tid
- *	 Currently on errors, calls elog.  Perhaps should return -1?
- *	 Possible errors include the addition of a tuple to the page
- *	 between the time the linep is chosen and the page is L_UP'd.
+ * RelationPutHeapTuple	- place tuple at specified page
  *
- *	 This should be coordinated with the B-tree code.
- *	 Probably needs to have an amdelunique to allow for
- *	 internal index records to be deleted and reordered as needed.
- *	 For the heap AM, this should never be needed.
+ * !!! ELOG(ERROR) IS DISALLOWED HERE !!!
  *
- *	 Note - we assume that caller hold BUFFER_LOCK_EXCLUSIVE on the buffer.
+ * Note - we assume that caller hold BUFFER_LOCK_EXCLUSIVE on the buffer.
  *
  */
 void
@@ -57,62 +51,41 @@ RelationPutHeapTuple(Relation relation,
 	offnum = PageAddItem((Page) pageHeader, (Item) tuple->t_data,
 						 tuple->t_len, InvalidOffsetNumber, LP_USED);
 
+	if (offnum == InvalidOffsetNumber)
+		elog(STOP, "RelationPutHeapTuple: failed to add tuple");
+
 	itemId = PageGetItemId((Page) pageHeader, offnum);
 	item = PageGetItem((Page) pageHeader, itemId);
 
 	ItemPointerSet(&((HeapTupleHeader) item)->t_ctid,
 				   BufferGetBlockNumber(buffer), offnum);
 
-	/*
-	 * Let the caller do this!
-	 *
-	 * WriteBuffer(buffer);
-	 */
-
 	/* return an accurate tuple */
 	ItemPointerSet(&tuple->t_self, BufferGetBlockNumber(buffer), offnum);
 }
 
 /*
- * This routine is another in the series of attempts to reduce the number
- * of I/O's and system calls executed in the various benchmarks.  In
- * particular, this routine is used to append data to the end of a relation
- * file without excessive lseeks.  This code should do no more than 2 semops
- * in the ideal case.
+ * RelationGetBufferForTuple
  *
- * Eventually, we should cache the number of blocks in a relation somewhere.
- * Until that time, this code will have to do an lseek to determine the number
- * of blocks in a relation.
+ * Returns (locked) buffer to add tuple with given len.
+ * If Ubuf is valid then no attempt to lock it should be made -
+ * this is for heap_update...
  *
- * This code should ideally do at most 4 semops, 1 lseek, and possibly 1 write
- * to do an append; it's possible to eliminate 2 of the semops if we do direct
- * buffer stuff (!); the lseek and the write can go if we get
- * RelationGetNumberOfBlocks to be useful.
+ * ELOG(ERROR) is allowed here, so this routine *must* be called
+ * before any (unlogged) changes are made in buffer pool.
  *
- * NOTE: This code presumes that we have a write lock on the relation.
- * Not now - we use extend locking...
- *
- * Also note that this routine probably shouldn't have to exist, and does
- * screw up the call graph rather badly, but we are wasting so much time and
- * system resources being massively general that we are losing badly in our
- * performance benchmarks.
  */
-void
-RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
+Buffer
+RelationGetBufferForTuple(Relation relation, Size len, Buffer Ubuf)
 {
 	Buffer		buffer;
 	Page		pageHeader;
 	BlockNumber lastblock;
-	OffsetNumber offnum;
-	Size		len;
-	ItemId		itemId;
-	Item		item;
 
-	len = MAXALIGN(tuple->t_len);		/* be conservative */
+	len = MAXALIGN(len);		/* be conservative */
 
 	/*
-	 * If we're gonna fail for oversize tuple, do it right away... this
-	 * code should go away eventually.
+	 * If we're gonna fail for oversize tuple, do it right away
 	 */
 	if (len > MaxTupleSize)
 		elog(ERROR, "Tuple is too big: size %u, max size %ld",
@@ -152,7 +125,8 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
 	else
 		buffer = ReadBuffer(relation, lastblock - 1);
 
-	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+	if (buffer != Ubuf)
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 	pageHeader = (Page) BufferGetPage(buffer);
 
 	/*
@@ -160,7 +134,8 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
 	 */
 	if (len > PageGetFreeSpace(pageHeader))
 	{
-		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+		if (buffer != Ubuf)
+			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 		buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
 		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 		pageHeader = (Page) BufferGetPage(buffer);
@@ -168,36 +143,22 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
 
 		if (len > PageGetFreeSpace(pageHeader))
 		{
-
-			/*
-			 * BUG: by elog'ing here, we leave the new buffer locked and
-			 * not marked dirty, which may result in an invalid page
-			 * header being left on disk.  But we should not get here
-			 * given the test at the top of the routine, and the whole
-			 * deal should go away when we implement tuple splitting
-			 * anyway...
-			 */
-			elog(ERROR, "Tuple is too big: size %u", len);
+			/* We should not get here given the test at the top */
+			elog(STOP, "Tuple is too big: size %u", len);
 		}
 	}
+	/*
+	 * Caller should check space in Ubuf but...
+	 */
+	else if (buffer == Ubuf)
+	{
+		ReleaseBuffer(buffer);
+		buffer = Ubuf;
+	}
 
 	if (!relation->rd_myxactonly)
 		UnlockPage(relation, 0, ExclusiveLock);
 
-	offnum = PageAddItem((Page) pageHeader, (Item) tuple->t_data,
-						 tuple->t_len, InvalidOffsetNumber, LP_USED);
-
-	itemId = PageGetItemId((Page) pageHeader, offnum);
-	item = PageGetItem((Page) pageHeader, itemId);
-
-	lastblock = BufferGetBlockNumber(buffer);
-
-	ItemPointerSet(&((HeapTupleHeader) item)->t_ctid, lastblock, offnum);
-
-	/* return an accurate tuple self-pointer */
-	ItemPointerSet(&tuple->t_self, lastblock, offnum);
-
-	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-	WriteBuffer(buffer);
+	return(buffer);
 
 }
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index f25faf180c3..43cabceba14 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.29 2000/04/12 17:15:40 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.30 2000/07/03 02:54:16 vadim Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -50,26 +50,13 @@ PageInit(Page page, Size pageSize, Size specialSize)
 	PageSetPageSize(page, pageSize);
 }
 
-/*
- * PageAddItem
- *		Adds item to the given page.
- *
- * Note:
- *		This does not assume that the item resides on a single page.
- *		It is the responsiblity of the caller to act appropriately
- *		depending on this fact.  The "pskip" routines provide a
- *		friendlier interface, in this case.
- *
- *		This does change the status of any of the resources passed.
- *		The semantics may change in the future.
- *
- *		This routine should probably be combined with others?
- */
 /* ----------------
  *		PageAddItem
  *
  *		add an item to a page.
  *
+ *   !!! ELOG(ERROR) IS DISALLOWED HERE !!!
+ *
  *	 Notes on interface:
  *		If offsetNumber is valid, shuffle ItemId's down to make room
  *		to use it, if PageManagerShuffle is true.  If PageManagerShuffle is
@@ -126,7 +113,7 @@ PageAddItem(Page page,
 			if (((*itemId).lp_flags & LP_USED) ||
 				((*itemId).lp_len != 0))
 			{
-				elog(ERROR, "PageAddItem: tried overwrite of used ItemId");
+				elog(NOTICE, "PageAddItem: tried overwrite of used ItemId");
 				return InvalidOffsetNumber;
 			}
 		}
diff --git a/src/include/access/hio.h b/src/include/access/hio.h
index 999d2412e76..c0636a4ff34 100644
--- a/src/include/access/hio.h
+++ b/src/include/access/hio.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: hio.h,v 1.14 2000/01/26 05:57:50 momjian Exp $
+ * $Id: hio.h,v 1.15 2000/07/03 02:54:17 vadim Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -18,6 +18,6 @@
 
 extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
 					 HeapTuple tuple);
-extern void RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple);
+extern Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer Ubuf);
 
 #endif	 /* HIO_H */
diff --git a/src/include/access/htup.h b/src/include/access/htup.h
index 3370960e2fd..4665fe60bb7 100644
--- a/src/include/access/htup.h
+++ b/src/include/access/htup.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: htup.h,v 1.31 2000/07/02 22:01:00 momjian Exp $
+ * $Id: htup.h,v 1.32 2000/07/03 02:54:17 vadim Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -69,22 +69,25 @@ typedef HeapTupleHeaderData *HeapTupleHeader;
 #define	XLOG_HEAP_MOVE		0x30
 
 /*
- * All what we need to find changed tuple (14 bytes)
+ * All what we need to find changed tuple (18 bytes)
  */
 typedef struct xl_heaptid
 {
 	Oid					dbId;		/* database */
 	Oid					relId;		/* relation */
+	CommandId			cid;		/* this is for "better" tuple' */
+									/* identification - it allows to avoid */
+									/* "compensation" records for undo */
 	ItemPointerData		tid;		/* changed tuple id */
 } xl_heaptid;
 
-/* This is what we need to know about delete - ALIGN(14) = 16 bytes */
+/* This is what we need to know about delete - ALIGN(18) = 24 bytes */
 typedef struct xl_heap_delete
 {
 	xl_heaptid			dtid;		/* deleted tuple id */
 } xl_heap_delete;
 
-/* This is what we need to know about insert - 22 + data */
+/* This is what we need to know about insert - 26 + data */
 typedef struct xl_heap_insert
 {
 	xl_heaptid			itid;		/* inserted tuple id */
@@ -108,7 +111,7 @@ typedef struct xl_heap_update
 	/* NEW TUPLE DATA FOLLOWS AT END OF STRUCT */
 } xl_heap_update;
 
-/* This is what we need to know about tuple move - ALIGN(20) = 24 bytes */
+/* This is what we need to know about tuple move - 24 bytes */
 typedef struct xl_heap_move
 {
 	xl_heaptid			ftid;		/* moved from */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index b5fda0b58ad..b86339f072f 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -68,6 +68,13 @@ typedef XLogPageHeaderData *XLogPageHeader;
 
 #define XLP_FIRST_IS_SUBRECORD	0x0001
 
+/*
+ * StartUpID (SUI) - system startups counter.
+ * It's to allow removing pg_log after shutdown.
+ */
+typedef	uint32		StartUpID;
+extern	StartUpID	ThisStartUpID;
+
 extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, 
 			char *hdr, uint32 hdrlen,
 			char *buf, uint32 buflen);
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 15d1106f26c..30b5a93ad64 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: bufpage.h,v 1.29 2000/06/02 10:20:27 vadim Exp $
+ * $Id: bufpage.h,v 1.30 2000/07/03 02:54:21 vadim Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,8 +118,10 @@ typedef OpaqueData *Opaque;
 typedef struct PageHeaderData
 {
 #ifdef XLOG
-	XLogRecPtr	pd_lsn;			/* XLOG: next byte after last byte of xlog */
+	XLogRecPtr	pd_lsn;			/* LSN: next byte after last byte of xlog */
 								/* record for last change of this page */
+	StartUpID	pd_sui;			/* SUI of last changes (currently it's */
+								/* used by heap AM only) */
 #endif
 	LocationIndex pd_lower;		/* offset to start of free space */
 	LocationIndex pd_upper;		/* offset to end of free space */
-- 
GitLab