From 1500e262b55b581f5f11ce842ef98945c35d8656 Mon Sep 17 00:00:00 2001 From: Hiroshi Inoue <inoue@tpf.co.jp> Date: Mon, 17 Jan 2000 01:15:19 +0000 Subject: [PATCH] Fix for TODO item * spinlock stuck problem when elog(FATAL) and elog(ERROR) inside bufmgr. --- src/backend/storage/buffer/bufmgr.c | 211 ++++++++++++++++++++++------ src/backend/storage/lmgr/proc.c | 5 +- src/include/storage/bufmgr.h | 3 +- 3 files changed, 171 insertions(+), 48 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 4404aa53d36..dc7749522db 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.70 2000/01/15 02:59:33 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.71 2000/01/17 01:15:17 inoue Exp $ * *------------------------------------------------------------------------- */ @@ -74,6 +74,18 @@ static int WriteMode = BUFFER_LATE_WRITE; /* Delayed write is static void WaitIO(BufferDesc *buf, SPINLOCK spinlock); +static void StartBufferIO(BufferDesc *buf, bool forInput); +static void TerminateBufferIO(BufferDesc *buf); +static void ContinueBufferIO(BufferDesc *buf, bool forInput); +extern void InitBufferIO(void); +extern void AbortBufferIO(void); + +/* + * Macro : BUFFER_IS_BROKEN + * Note that write error doesn't mean the buffer broken +*/ +#define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY)) + #ifndef HAS_TEST_AND_SET static void SignalIO(BufferDesc *buf); extern long *NWaitIOBackendP; /* defined in buf_init.c */ @@ -312,12 +324,7 @@ ReadBufferWithBufferLock(Relation reln, } /* If anyone was waiting for IO to complete, wake them up now */ -#ifdef HAS_TEST_AND_SET - S_UNLOCK(&(bufHdr->io_in_progress_lock)); -#else - if (bufHdr->refcount > 1) - SignalIO(bufHdr); -#endif + TerminateBufferIO(bufHdr); SpinRelease(BufMgrLock); @@ -375,13 +382,19 @@ BufferAlloc(Relation reln, inProgress = (buf->flags & BM_IO_IN_PROGRESS); *foundPtr = TRUE; - if (inProgress) + if (inProgress) /* confirm end of IO */ { WaitIO(buf, BufMgrLock); - if (buf->flags & BM_IO_ERROR) - { + inProgress = (buf->flags & BM_IO_IN_PROGRESS); + } + if (BUFFER_IS_BROKEN(buf)) + { + /* I couldn't understand the following old comment. + * If there's no IO for the buffer and the buffer + * is BROKEN,it should be read again. So start a + * new buffer IO here. - /* + * * wierd race condition: * * We were waiting for someone else to read the buffer. While @@ -396,13 +409,14 @@ BufferAlloc(Relation reln, * * This is never going to happen, don't worry about it. */ - *foundPtr = FALSE; - } + *foundPtr = FALSE; } #ifdef BMTRACE _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND); #endif /* BMTRACE */ + if (!(*foundPtr)) + StartBufferIO(buf, true); SpinRelease(BufMgrLock); return buf; @@ -454,7 +468,6 @@ BufferAlloc(Relation reln, * in WaitIO until we're done. */ inProgress = TRUE; - buf->flags |= BM_IO_IN_PROGRESS; #ifdef HAS_TEST_AND_SET /* @@ -462,9 +475,8 @@ BufferAlloc(Relation reln, * since no one had it pinned (it just came off the free * list), no one else can have this lock. */ - Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); - S_LOCK(&(buf->io_in_progress_lock)); #endif /* HAS_TEST_AND_SET */ + StartBufferIO(buf, false); /* * Write the buffer out, being careful to release BufMgrLock @@ -486,12 +498,7 @@ BufferAlloc(Relation reln, inProgress = FALSE; buf->flags |= BM_IO_ERROR; buf->flags &= ~BM_IO_IN_PROGRESS; -#ifdef HAS_TEST_AND_SET - S_UNLOCK(&(buf->io_in_progress_lock)); -#else /* !HAS_TEST_AND_SET */ - if (buf->refcount > 1) - SignalIO(buf); -#endif /* !HAS_TEST_AND_SET */ + TerminateBufferIO(buf); PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; Assert(buf->refcount > 0); buf->refcount--; @@ -533,12 +540,7 @@ BufferAlloc(Relation reln, { inProgress = FALSE; buf->flags &= ~BM_IO_IN_PROGRESS; -#ifdef HAS_TEST_AND_SET - S_UNLOCK(&(buf->io_in_progress_lock)); -#else /* !HAS_TEST_AND_SET */ - if (buf->refcount > 1) - SignalIO(buf); -#endif /* !HAS_TEST_AND_SET */ + TerminateBufferIO(buf); PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; buf->refcount--; buf = (BufferDesc *) NULL; @@ -563,12 +565,7 @@ BufferAlloc(Relation reln, */ if (buf != NULL) { -#ifdef HAS_TEST_AND_SET - S_UNLOCK(&(buf->io_in_progress_lock)); -#else /* !HAS_TEST_AND_SET */ - if (buf->refcount > 1) - SignalIO(buf); -#endif /* !HAS_TEST_AND_SET */ + TerminateBufferIO(buf); /* give up the buffer since we don't need it any more */ PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; Assert(buf->refcount > 0); @@ -588,10 +585,13 @@ BufferAlloc(Relation reln, if (inProgress) { WaitIO(buf2, BufMgrLock); - if (buf2->flags & BM_IO_ERROR) - *foundPtr = FALSE; + inProgress = (buf2->flags & BM_IO_IN_PROGRESS); } + if (BUFFER_IS_BROKEN(buf2)) + *foundPtr = FALSE; + if (!(*foundPtr)) + StartBufferIO(buf2, true); SpinRelease(BufMgrLock); return buf2; @@ -640,12 +640,10 @@ BufferAlloc(Relation reln, */ if (!inProgress) { - buf->flags |= BM_IO_IN_PROGRESS; -#ifdef HAS_TEST_AND_SET - Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); - S_LOCK(&(buf->io_in_progress_lock)); -#endif /* HAS_TEST_AND_SET */ + StartBufferIO(buf, true); } + else + ContinueBufferIO(buf, true); #ifdef BMTRACE _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND); @@ -806,7 +804,9 @@ FlushBuffer(Buffer buffer, bool release) /* To check if block content changed while flushing. - vadim 01/17/97 */ SpinAcquire(BufMgrLock); + WaitIO(bufHdr, BufMgrLock); /* confirm end of IO */ bufHdr->flags &= ~BM_JUST_DIRTIED; + StartBufferIO(bufHdr, false); /* output IO start */ SpinRelease(BufMgrLock); status = smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, @@ -824,6 +824,8 @@ FlushBuffer(Buffer buffer, bool release) BufferFlushCount++; SpinAcquire(BufMgrLock); + bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */ + TerminateBufferIO(bufHdr); /* output IO finished */ /* * If this buffer was marked by someone as DIRTY while we were @@ -998,7 +1000,9 @@ BufferSync() * To check if block content changed while flushing (see * below). - vadim 01/17/97 */ + WaitIO(bufHdr, BufMgrLock); /* confirm end of IO */ bufHdr->flags &= ~BM_JUST_DIRTIED; + StartBufferIO(bufHdr, false); /* output IO start */ /* * If we didn't have the reldesc in our local cache, flush @@ -1034,6 +1038,8 @@ BufferSync() elog(ERROR, "BufferSync: cannot write %u for %s", bufHdr->tag.blockNum, bufHdr->sb_relname); } + bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */ + TerminateBufferIO(bufHdr); /* Sync IO finished */ BufferFlushCount++; /* @@ -1084,10 +1090,16 @@ BufferSync() static void WaitIO(BufferDesc *buf, SPINLOCK spinlock) { - SpinRelease(spinlock); - S_LOCK(&(buf->io_in_progress_lock)); - S_UNLOCK(&(buf->io_in_progress_lock)); - SpinAcquire(spinlock); + /* + * Changed to wait until there's no IO - Inoue 01/13/2000 + */ + while ((buf->flags & BM_IO_IN_PROGRESS) != 0) + { + SpinRelease(spinlock); + S_LOCK(&(buf->io_in_progress_lock)); + S_UNLOCK(&(buf->io_in_progress_lock)); + SpinAcquire(spinlock); + } } #else /* !HAS_TEST_AND_SET */ @@ -2163,3 +2175,112 @@ LockBuffer(Buffer buffer, int mode) #endif } + +/* + * Functions for IO error handling + * + * Note : We assume that nested buffer IO never occur. + * i.e at most one io_in_progress spinlock is held + * per proc. +*/ +static BufferDesc *InProgressBuf = (BufferDesc *)NULL; +static bool IsForInput; + +/* + * Function:StartBufferIO + * (Assumptions) + * My process is executing no IO + * BufMgrLock is held + * BM_IO_IN_PROGRESS mask is not set for the buffer + * The buffer is Pinned + * +*/ +static void StartBufferIO(BufferDesc *buf, bool forInput) +{ + Assert(!InProgressBuf); + Assert(!(buf->flags & BM_IO_IN_PROGRESS)); + buf->flags |= BM_IO_IN_PROGRESS; +#ifdef HAS_TEST_AND_SET + Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))) + S_LOCK(&(buf->io_in_progress_lock)); +#endif /* HAS_TEST_AND_SET */ + InProgressBuf = buf; + IsForInput = forInput; +} + +/* + * Function:TerminateBufferIO + * (Assumptions) + * My process is executing IO for the buffer + * BufMgrLock is held + * The buffer is Pinned + * +*/ +static void TerminateBufferIO(BufferDesc *buf) +{ + Assert(buf == InProgressBuf); +#ifdef HAS_TEST_AND_SET + S_UNLOCK(&(buf->io_in_progress_lock)); +#else + if (buf->refcount > 1) + SignalIO(buf); +#endif /* HAS_TEST_AND_SET */ + InProgressBuf = (BufferDesc *)0; +} + +/* + * Function:ContinueBufferIO + * (Assumptions) + * My process is executing IO for the buffer + * BufMgrLock is held + * The buffer is Pinned + * +*/ +static void ContinueBufferIO(BufferDesc *buf, bool forInput) +{ + Assert(buf == InProgressBuf); + Assert(buf->flags & BM_IO_IN_PROGRESS); + IsForInput = forInput; +} + +extern void InitBufferIO(void) +{ + InProgressBuf = (BufferDesc *)0; +} + +/* + * This function is called from ProcReleaseSpins(). + * BufMgrLock isn't held when this function is called. + * BM_IO_ERROR is always set. If BM_IO_ERROR was already + * set in case of output,this routine would kill all + * backends and reset postmaster. + */ +extern void AbortBufferIO(void) +{ + BufferDesc *buf = InProgressBuf; + if (buf) + { + Assert(buf->flags & BM_IO_IN_PROGRESS); + SpinAcquire(BufMgrLock); + if (IsForInput) + { + Assert(!(buf->flags & BM_DIRTY)); + } + else + { + Assert(!(buf->flags & BM_DIRTY)); + /* Assert(!(buf->flags & BM_IO_ERROR)); */ + if (buf->flags & BM_IO_ERROR) + { + elog(NOTICE, "!!! write error seems permanent !!!"); + elog(NOTICE, "!!! now kill all backends and reset postmaster !!!"); + proc_exit(255); + } + buf->flags |= BM_DIRTY; + } + buf->flags |= BM_IO_ERROR; + TerminateBufferIO(buf); + buf->flags &= ~BM_IO_IN_PROGRESS; + SpinRelease(BufMgrLock); + } +} diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index c685b1ad93c..2a41bdc317a 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.65 1999/12/16 01:25:08 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.66 2000/01/17 01:15:18 inoue Exp $ * *------------------------------------------------------------------------- */ @@ -46,7 +46,7 @@ * This is so that we can support more backends. (system-wide semaphore * sets run out pretty fast.) -ay 4/95 * - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.65 1999/12/16 01:25:08 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.66 2000/01/17 01:15:18 inoue Exp $ */ #include <sys/time.h> #include <unistd.h> @@ -848,6 +848,7 @@ ProcReleaseSpins(PROC *proc) SpinRelease(i); } } + AbortBufferIO(); } /***************************************************************************** diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index fcf9e730ac7..21b43a076d0 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: bufmgr.h,v 1.32 1999/09/28 11:40:53 vadim Exp $ + * $Id: bufmgr.h,v 1.33 2000/01/17 01:15:19 inoue Exp $ * *------------------------------------------------------------------------- */ @@ -186,5 +186,6 @@ extern void SetBufferCommitInfoNeedsSave(Buffer buffer); extern void UnlockBuffers(void); extern void LockBuffer(Buffer buffer, int mode); +extern void AbortBufferIO(void); #endif -- GitLab