diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index ce52e2dd48ed8e4cbdad1fdf0acb4c8ec27f01dd..2d81383ae8ad467427c6161694517856df26d055 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2149,15 +2149,6 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
 										   &vmbuffer, NULL);
 		page = BufferGetPage(buffer);
 
-		if (PageIsAllVisible(page))
-		{
-			all_visible_cleared = true;
-			PageClearAllVisible(page);
-			visibilitymap_clear(relation,
-								BufferGetBlockNumber(buffer),
-								vmbuffer);
-		}
-
 		/* NO EREPORT(ERROR) from here till changes are logged */
 		START_CRIT_SECTION();
 
@@ -2172,6 +2163,15 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
 			RelationPutHeapTuple(relation, buffer, heaptup);
 		}
 
+		if (PageIsAllVisible(page))
+		{
+			all_visible_cleared = true;
+			PageClearAllVisible(page);
+			visibilitymap_clear(relation,
+								BufferGetBlockNumber(buffer),
+								vmbuffer);
+		}
+
 		/*
 		 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
 		 */
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 5696abe4d2a11cec7ab97e8525458dcc124dad39..9152c7d151150573fbdbd5bb8d7a8125da18adf6 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -293,6 +293,13 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr,
  * relation. On return, *buf is a valid buffer with the map page containing
  * the bit for heapBlk, or InvalidBuffer. The caller is responsible for
  * releasing *buf after it's done testing and setting bits.
+ *
+ * NOTE: This function is typically called without a lock on the heap page,
+ * so somebody else could change the bit just after we look at it.  In fact,
+ * since we don't lock the visibility map page either, it's even possible that
+ * someone else could have changed the bit just before we look at it, but yet
+ * we might see the old value.  It is the caller's responsibility to deal with
+ * all concurrency issues!
  */
 bool
 visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
@@ -327,7 +334,9 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
 	map = PageGetContents(BufferGetPage(*buf));
 
 	/*
-	 * We don't need to lock the page, as we're only looking at a single bit.
+	 * A single-bit read is atomic.  There could be memory-ordering effects
+	 * here, but for performance reasons we make it the caller's job to worry
+	 * about that.
 	 */
 	result = (map[mapByte] & (1 << mapBit)) ? true : false;
 
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 0e0193d40e1818f4177bcb5bd68148c2739eb1ec..3ff56a736648c5ab45533b376892b21e9ec569e3 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -419,6 +419,15 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 	 * Note: if scan_all is true, we won't actually skip any pages; but we
 	 * maintain next_not_all_visible_block anyway, so as to set up the
 	 * all_visible_according_to_vm flag correctly for each page.
+	 *
+	 * Note: The value returned by visibilitymap_test could be slightly
+	 * out-of-date, since we make this test before reading the corresponding
+	 * heap page or locking the buffer.  This is OK.  If we mistakenly think
+	 * that the page is all-visible when in fact the flag's just been cleared,
+	 * we might fail to vacuum the page.  But it's OK to skip pages when
+	 * scan_all is not set, so no great harm done; the next vacuum will find
+	 * them.  If we make the reverse mistake and vacuum a page unnecessarily,
+	 * it'll just be a no-op.
 	 */
 	for (next_not_all_visible_block = 0;
 		 next_not_all_visible_block < nblocks;
@@ -852,22 +861,37 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		freespace = PageGetHeapFreeSpace(page);
 
 		/* mark page all-visible, if appropriate */
-		if (all_visible && !all_visible_according_to_vm)
+		if (all_visible)
 		{
 			if (!PageIsAllVisible(page))
 			{
 				PageSetAllVisible(page);
 				MarkBufferDirty(buf);
+				visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer,
+								  visibility_cutoff_xid);
+			}
+			else if (!all_visible_according_to_vm)
+			{
+				/*
+				 * It should never be the case that the visibility map page
+				 * is set while the page-level bit is clear, but the reverse
+				 * is allowed.  Set the visibility map bit as well so that
+				 * we get back in sync.
+				 */
+				visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer,
+								  visibility_cutoff_xid);
 			}
-			visibilitymap_set(onerel, blkno, InvalidXLogRecPtr, vmbuffer,
-							  visibility_cutoff_xid);
 		}
 
 		/*
 		 * As of PostgreSQL 9.2, the visibility map bit should never be set if
-		 * the page-level bit is clear.
+		 * the page-level bit is clear.  However, it's possible that the bit
+		 * got cleared after we checked it and before we took the buffer
+		 * content lock, so we must recheck before jumping to the conclusion
+		 * that something bad has happened.
 		 */
-		else if (all_visible_according_to_vm && !PageIsAllVisible(page))
+		else if (all_visible_according_to_vm && !PageIsAllVisible(page)
+				 && visibilitymap_test(onerel, blkno, &vmbuffer))
 		{
 			elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
 				 relname, blkno);
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 4abd805aa31733fafc0b18e80a65c4f91695fae5..af31671b3eb60979fdfbcacd4c362cecc231c32c 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -82,6 +82,19 @@ IndexOnlyNext(IndexOnlyScanState *node)
 		 * We can skip the heap fetch if the TID references a heap page on
 		 * which all tuples are known visible to everybody.  In any case,
 		 * we'll use the index tuple not the heap tuple as the data source.
+		 *
+		 * Note on Memory Ordering Effects: visibilitymap_test does not lock
+		 * the visibility map buffer, and therefore the result we read here
+		 * could be slightly stale.  However, it can't be stale enough to
+		 * matter.  It suffices to show that (1) there is a read barrier
+		 * between the time we read the index TID and the time we test the
+		 * visibility map; and (2) there is a write barrier between the time
+		 * some other concurrent process clears the visibility map bit and the
+		 * time it inserts the index TID.  Since acquiring or releasing a
+		 * LWLock interposes a full barrier, this is easy to show: (1) is
+		 * satisfied by the release of the index buffer content lock after
+		 * reading the TID; and (2) is satisfied by the acquisition of the
+		 * buffer content lock in order to insert the TID.
 		 */
 		if (!visibilitymap_test(scandesc->heapRelation,
 								ItemPointerGetBlockNumber(tid),