diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index ea023253189b15878da77d91808b4faa5486893d..b9d42bad6d2aba1b97a0e9ae901277d4089568bc 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.84 2005/05/07 21:32:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.85 2005/06/02 05:55:28 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -113,6 +113,13 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level) metaopaque = (BTPageOpaque) PageGetSpecialPointer(page); metaopaque->btpo_flags = BTP_META; + + /* + * Set pd_lower just past the end of the metadata. This is not + * essential but it makes the page look compressible to xlog.c. + */ + ((PageHeader) page)->pd_lower = + ((char *) metad + sizeof(BTMetaPageData)) - (char *) page; } /* diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index ade60619a3d0de564bfbf56900f6b2838db4f85b..536bc17718039860b30e048fe4a9726cd7882e06 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.20 2005/03/22 06:17:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.21 2005/06/02 05:55:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -135,6 +135,13 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn, pageop = (BTPageOpaque) PageGetSpecialPointer(metapg); pageop->btpo_flags = BTP_META; + /* + * Set pd_lower just past the end of the metadata. This is not + * essential but it makes the page look compressible to xlog.c. + */ + ((PageHeader) metapg)->pd_lower = + ((char *) md + sizeof(BTMetaPageData)) - (char *) metapg; + PageSetLSN(metapg, lsn); PageSetTLI(metapg, ThisTimeLineID); LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 2352313b051dac931163d9652677984382dbe969..27f6354987d71b1ac6bab8a957dd9d2262a1bbfa 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.194 2005/05/31 19:10:28 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.195 2005/06/02 05:55:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -434,6 +434,7 @@ static void exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg); static bool recoveryStopsHere(XLogRecord *record, bool *includeThis); +static void SetBkpBlock(BkpBlock *bkpb, Buffer buffer); static bool AdvanceXLInsertBuffer(void); static void XLogWrite(XLogwrtRqst WriteRqst); static int XLogFileInit(uint32 log, uint32 seg, @@ -499,8 +500,10 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) bool dtbuf_bkp[XLR_MAX_BKP_BLOCKS]; BkpBlock dtbuf_xlg[XLR_MAX_BKP_BLOCKS]; XLogRecPtr dtbuf_lsn[XLR_MAX_BKP_BLOCKS]; - XLogRecData dtbuf_rdt[2 * XLR_MAX_BKP_BLOCKS]; - crc64 rdata_crc; + XLogRecData dtbuf_rdt1[XLR_MAX_BKP_BLOCKS]; + XLogRecData dtbuf_rdt2[XLR_MAX_BKP_BLOCKS]; + XLogRecData dtbuf_rdt3[XLR_MAX_BKP_BLOCKS]; + pg_crc32 rdata_crc; uint32 len, write_len; unsigned i; @@ -531,8 +534,10 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) /* * Here we scan the rdata list, determine which buffers must be backed * up, and compute the CRC values for the data. Note that the record - * header isn't added into the CRC yet since we don't know the final - * length or info bits quite yet. + * header isn't added into the CRC initially since we don't know the + * final length or info bits quite yet. Thus, the CRC will represent + * the CRC of the whole record in the order "rdata, then backup blocks, + * then record header". * * We may have to loop back to here if a race condition is detected * below. We could prevent the race by doing all this work while @@ -553,7 +558,7 @@ begin:; dtbuf_bkp[i] = false; } - INIT_CRC64(rdata_crc); + INIT_CRC32(rdata_crc); len = 0; for (rdt = rdata;;) { @@ -561,7 +566,7 @@ begin:; { /* Simple data, just include it */ len += rdt->len; - COMP_CRC64(rdata_crc, rdt->data, rdt->len); + COMP_CRC32(rdata_crc, rdt->data, rdt->len); } else { @@ -576,7 +581,7 @@ begin:; else if (rdt->data) { len += rdt->len; - COMP_CRC64(rdata_crc, rdt->data, rdt->len); + COMP_CRC32(rdata_crc, rdt->data, rdt->len); } break; } @@ -591,26 +596,14 @@ begin:; dtbuf_lsn[i] = *((XLogRecPtr *) BufferGetBlock(rdt->buffer)); if (XLByteLE(dtbuf_lsn[i], RedoRecPtr)) { - crc64 dtcrc; - dtbuf_bkp[i] = true; + SetBkpBlock(&(dtbuf_xlg[i]), rdt->buffer); rdt->data = NULL; - INIT_CRC64(dtcrc); - COMP_CRC64(dtcrc, - BufferGetBlock(dtbuf[i]), - BLCKSZ); - dtbuf_xlg[i].node = BufferGetFileNode(dtbuf[i]); - dtbuf_xlg[i].block = BufferGetBlockNumber(dtbuf[i]); - COMP_CRC64(dtcrc, - (char *) &(dtbuf_xlg[i]) + sizeof(crc64), - sizeof(BkpBlock) - sizeof(crc64)); - FIN_CRC64(dtcrc); - dtbuf_xlg[i].crc = dtcrc; } else if (rdt->data) { len += rdt->len; - COMP_CRC64(rdata_crc, rdt->data, rdt->len); + COMP_CRC32(rdata_crc, rdt->data, rdt->len); } break; } @@ -625,6 +618,39 @@ begin:; rdt = rdt->next; } + /* + * Now add the backup block headers and data into the CRC + */ + for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) + { + if (dtbuf_bkp[i]) + { + BkpBlock *bkpb = &(dtbuf_xlg[i]); + char *page; + + COMP_CRC32(rdata_crc, + (char *) bkpb, + sizeof(BkpBlock)); + page = (char *) BufferGetBlock(dtbuf[i]); + if (bkpb->hole_length == 0) + { + COMP_CRC32(rdata_crc, + page, + BLCKSZ); + } + else + { + /* must skip the hole */ + COMP_CRC32(rdata_crc, + page, + bkpb->hole_offset); + COMP_CRC32(rdata_crc, + page + (bkpb->hole_offset + bkpb->hole_length), + BLCKSZ - (bkpb->hole_offset + bkpb->hole_length)); + } + } + } + /* * NOTE: the test for len == 0 here is somewhat fishy, since in theory * all of the rmgr data might have been suppressed in favor of backup @@ -713,23 +739,49 @@ begin:; write_len = len; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) { + BkpBlock *bkpb; + char *page; + if (dtbuf[i] == InvalidBuffer || !(dtbuf_bkp[i])) continue; info |= XLR_SET_BKP_BLOCK(i); - rdt->next = &(dtbuf_rdt[2 * i]); + bkpb = &(dtbuf_xlg[i]); + page = (char *) BufferGetBlock(dtbuf[i]); + + rdt->next = &(dtbuf_rdt1[i]); + rdt = rdt->next; - dtbuf_rdt[2 * i].data = (char *) &(dtbuf_xlg[i]); - dtbuf_rdt[2 * i].len = sizeof(BkpBlock); + rdt->data = (char *) bkpb; + rdt->len = sizeof(BkpBlock); write_len += sizeof(BkpBlock); - rdt = dtbuf_rdt[2 * i].next = &(dtbuf_rdt[2 * i + 1]); + rdt->next = &(dtbuf_rdt2[i]); + rdt = rdt->next; - dtbuf_rdt[2 * i + 1].data = (char *) BufferGetBlock(dtbuf[i]); - dtbuf_rdt[2 * i + 1].len = BLCKSZ; - write_len += BLCKSZ; - dtbuf_rdt[2 * i + 1].next = NULL; + if (bkpb->hole_length == 0) + { + rdt->data = page; + rdt->len = BLCKSZ; + write_len += BLCKSZ; + rdt->next = NULL; + } + else + { + /* must skip the hole */ + rdt->data = page; + rdt->len = bkpb->hole_offset; + write_len += bkpb->hole_offset; + + rdt->next = &(dtbuf_rdt3[i]); + rdt = rdt->next; + + rdt->data = page + (bkpb->hole_offset + bkpb->hole_length); + rdt->len = BLCKSZ - (bkpb->hole_offset + bkpb->hole_length); + write_len += rdt->len; + rdt->next = NULL; + } } /* @@ -752,14 +804,15 @@ begin:; record->xl_prev = Insert->PrevRecord; record->xl_xid = GetCurrentTransactionIdIfAny(); + record->xl_tot_len = SizeOfXLogRecord + write_len; record->xl_len = len; /* doesn't include backup blocks */ record->xl_info = info; record->xl_rmid = rmid; - /* Now we can finish computing the main CRC */ - COMP_CRC64(rdata_crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(rdata_crc); + /* Now we can finish computing the record's CRC */ + COMP_CRC32(rdata_crc, (char *) record + sizeof(pg_crc32), + SizeOfXLogRecord - sizeof(pg_crc32)); + FIN_CRC32(rdata_crc); record->xl_crc = rdata_crc; /* Compute record's XLOG location */ @@ -884,6 +937,46 @@ begin:; return (RecPtr); } +/* + * Fill a BkpBlock struct given a buffer containing the page to be saved + * + * This is nontrivial only because it has to decide whether to apply "hole + * compression". + */ +static void +SetBkpBlock(BkpBlock *bkpb, Buffer buffer) +{ + PageHeader page; + uint16 offset; + uint16 length; + + /* Save page identity info */ + bkpb->node = BufferGetFileNode(buffer); + bkpb->block = BufferGetBlockNumber(buffer); + + /* Test whether there is a "hole" containing zeroes in the page */ + page = (PageHeader) BufferGetBlock(buffer); + offset = page->pd_lower; + /* Check if pd_lower appears sane at all */ + if (offset >= SizeOfPageHeaderData && offset < BLCKSZ) + { + char *spd = (char *) page + offset; + char *epd = (char *) page + BLCKSZ; + char *pd = spd; + + while (pd < epd && *pd == '\0') + pd++; + + length = pd - spd; + if (length == 0) + offset = 0; + } + else + offset = length = 0; + bkpb->hole_offset = offset; + bkpb->hole_length = length; +} + /* * XLogArchiveNotify * @@ -2276,7 +2369,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) if (!(record->xl_info & XLR_SET_BKP_BLOCK(i))) continue; - memcpy((char *) &bkpb, blk, sizeof(BkpBlock)); + memcpy(&bkpb, blk, sizeof(BkpBlock)); blk += sizeof(BkpBlock); reln = XLogOpenRelation(true, record->xl_rmid, bkpb.node); @@ -2287,7 +2380,21 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) if (BufferIsValid(buffer)) { page = (Page) BufferGetPage(buffer); - memcpy((char *) page, blk, BLCKSZ); + + if (bkpb.hole_length == 0) + { + memcpy((char *) page, blk, BLCKSZ); + } + else + { + /* must zero-fill the hole */ + MemSet((char *) page, 0, BLCKSZ); + memcpy((char *) page, blk, bkpb.hole_offset); + memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length), + blk + bkpb.hole_offset, + BLCKSZ - (bkpb.hole_offset + bkpb.hole_length)); + } + PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); @@ -2295,7 +2402,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) } } - blk += BLCKSZ; + blk += BLCKSZ - bkpb.hole_length; } } @@ -2309,53 +2416,61 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) static bool RecordIsValid(XLogRecord *record, XLogRecPtr recptr, int emode) { - crc64 crc; - crc64 cbuf; + pg_crc32 crc; int i; uint32 len = record->xl_len; + BkpBlock bkpb; char *blk; - /* Check CRC of rmgr data and record header */ - INIT_CRC64(crc); - COMP_CRC64(crc, XLogRecGetData(record), len); - COMP_CRC64(crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(crc); + /* First the rmgr data */ + INIT_CRC32(crc); + COMP_CRC32(crc, XLogRecGetData(record), len); - if (!EQ_CRC64(record->xl_crc, crc)) - { - ereport(emode, - (errmsg("incorrect resource manager data checksum in record at %X/%X", - recptr.xlogid, recptr.xrecoff))); - return (false); - } - - /* Check CRCs of backup blocks, if any */ + /* Add in the backup blocks, if any */ blk = (char *) XLogRecGetData(record) + len; for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) { + uint32 blen; + if (!(record->xl_info & XLR_SET_BKP_BLOCK(i))) continue; - INIT_CRC64(crc); - COMP_CRC64(crc, blk + sizeof(BkpBlock), BLCKSZ); - COMP_CRC64(crc, blk + sizeof(crc64), - sizeof(BkpBlock) - sizeof(crc64)); - FIN_CRC64(crc); - memcpy((char *) &cbuf, blk, sizeof(crc64)); /* don't assume - * alignment */ - - if (!EQ_CRC64(cbuf, crc)) + memcpy(&bkpb, blk, sizeof(BkpBlock)); + if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ) { ereport(emode, - (errmsg("incorrect checksum of backup block %d in record at %X/%X", - i + 1, recptr.xlogid, recptr.xrecoff))); - return (false); + (errmsg("incorrect hole size in record at %X/%X", + recptr.xlogid, recptr.xrecoff))); + return false; } - blk += sizeof(BkpBlock) + BLCKSZ; + blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length; + COMP_CRC32(crc, blk, blen); + blk += blen; + } + + /* Check that xl_tot_len agrees with our calculation */ + if (blk != (char *) record + record->xl_tot_len) + { + ereport(emode, + (errmsg("incorrect total length in record at %X/%X", + recptr.xlogid, recptr.xrecoff))); + return false; } - return (true); + /* Finally include the record header */ + COMP_CRC32(crc, (char *) record + sizeof(pg_crc32), + SizeOfXLogRecord - sizeof(pg_crc32)); + FIN_CRC32(crc); + + if (!EQ_CRC32(record->xl_crc, crc)) + { + ereport(emode, + (errmsg("incorrect resource manager data checksum in record at %X/%X", + recptr.xlogid, recptr.xrecoff))); + return false; + } + + return true; } /* @@ -2382,7 +2497,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode) uint32 targetPageOff; uint32 targetRecOff; uint32 pageHeaderSize; - unsigned i; if (readBuf == NULL) { @@ -2518,6 +2632,15 @@ got_record:; RecPtr->xlogid, RecPtr->xrecoff))); goto next_record_is_invalid; } + if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len || + record->xl_tot_len > SizeOfXLogRecord + record->xl_len + + XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ)) + { + ereport(emode, + (errmsg("invalid record length at %X/%X", + RecPtr->xlogid, RecPtr->xrecoff))); + goto next_record_is_invalid; + } if (record->xl_rmid > RM_MAX_ID) { ereport(emode, @@ -2557,18 +2680,6 @@ got_record:; } } - /* - * Compute total length of record including any appended backup - * blocks. - */ - total_len = SizeOfXLogRecord + record->xl_len; - for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) - { - if (!(record->xl_info & XLR_SET_BKP_BLOCK(i))) - continue; - total_len += sizeof(BkpBlock) + BLCKSZ; - } - /* * Allocate or enlarge readRecordBuf as needed. To avoid useless * small increases, round its size to a multiple of BLCKSZ, and make @@ -2576,6 +2687,7 @@ got_record:; * "normal" records, but very large commit or abort records might need * more space.) */ + total_len = record->xl_tot_len; if (total_len > readRecordBufSize) { uint32 newSize = total_len; @@ -2666,15 +2778,15 @@ got_record:; goto next_record_is_invalid; pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf); if (BLCKSZ - SizeOfXLogRecord >= pageHeaderSize + - SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len)) + MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len)) { nextRecord = (XLogRecord *) ((char *) contrecord + - SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len)); + MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len)); } EndRecPtr.xlogid = readId; EndRecPtr.xrecoff = readSeg * XLogSegSize + readOff + - pageHeaderSize + SizeOfXLogContRecord + - MAXALIGN(contrecord->xl_rem_len); + pageHeaderSize + + MAXALIGN(SizeOfXLogContRecord + contrecord->xl_rem_len); ReadRecPtr = *RecPtr; return record; } @@ -3194,11 +3306,11 @@ WriteControlFile(void) StrNCpy(ControlFile->lc_ctype, localeptr, LOCALE_NAME_BUFLEN); /* Contents are protected with a CRC */ - INIT_CRC64(ControlFile->crc); - COMP_CRC64(ControlFile->crc, - (char *) ControlFile + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(ControlFile->crc); + INIT_CRC32(ControlFile->crc); + COMP_CRC32(ControlFile->crc, + (char *) ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32(ControlFile->crc); /* * We write out BLCKSZ bytes into pg_control, zero-padding the excess @@ -3247,7 +3359,7 @@ WriteControlFile(void) static void ReadControlFile(void) { - crc64 crc; + pg_crc32 crc; int fd; /* @@ -3281,13 +3393,13 @@ ReadControlFile(void) ControlFile->pg_control_version, PG_CONTROL_VERSION), errhint("It looks like you need to initdb."))); /* Now check the CRC. */ - INIT_CRC64(crc); - COMP_CRC64(crc, - (char *) ControlFile + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(crc); + INIT_CRC32(crc); + COMP_CRC32(crc, + (char *) ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32(crc); - if (!EQ_CRC64(crc, ControlFile->crc)) + if (!EQ_CRC32(crc, ControlFile->crc)) ereport(FATAL, (errmsg("incorrect checksum in control file"))); @@ -3396,11 +3508,11 @@ UpdateControlFile(void) { int fd; - INIT_CRC64(ControlFile->crc); - COMP_CRC64(ControlFile->crc, - (char *) ControlFile + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(ControlFile->crc); + INIT_CRC32(ControlFile->crc); + COMP_CRC32(ControlFile->crc, + (char *) ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32(ControlFile->crc); fd = BasicOpenFile(ControlFilePath, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) @@ -3525,7 +3637,7 @@ BootStrapXLOG(void) bool use_existent; uint64 sysidentifier; struct timeval tv; - crc64 crc; + pg_crc32 crc; /* * Select a hopefully-unique system identifier code for this @@ -3582,16 +3694,17 @@ BootStrapXLOG(void) record->xl_prev.xlogid = 0; record->xl_prev.xrecoff = 0; record->xl_xid = InvalidTransactionId; + record->xl_tot_len = SizeOfXLogRecord + sizeof(checkPoint); record->xl_len = sizeof(checkPoint); record->xl_info = XLOG_CHECKPOINT_SHUTDOWN; record->xl_rmid = RM_XLOG_ID; memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint)); - INIT_CRC64(crc); - COMP_CRC64(crc, &checkPoint, sizeof(checkPoint)); - COMP_CRC64(crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(crc); + INIT_CRC32(crc); + COMP_CRC32(crc, &checkPoint, sizeof(checkPoint)); + COMP_CRC32(crc, (char *) record + sizeof(pg_crc32), + SizeOfXLogRecord - sizeof(pg_crc32)); + FIN_CRC32(crc); record->xl_crc = crc; /* Create first XLOG segment file */ @@ -4694,7 +4807,8 @@ ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt) } return NULL; } - if (record->xl_len != sizeof(CheckPoint)) + if (record->xl_len != sizeof(CheckPoint) || + record->xl_tot_len != SizeOfXLogRecord + sizeof(CheckPoint)) { switch (whichChkpt) { diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index c33a0011e600aca23b9a0d6d997707aea2205772..8f8ba9e0d2b364f3059fc2f7cc21070fdebdfa53 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.63 2005/03/22 06:17:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/page/bufpage.c,v 1.64 2005/06/02 05:55:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -357,7 +357,7 @@ PageRepairFragmentation(Page page, OffsetNumber *unused) lp = PageGetItemId(page, i + 1); lp->lp_len = 0; /* indicate unused & deallocated */ } - ((PageHeader) page)->pd_upper = pd_special; + ((PageHeader) page)->pd_upper = pd_upper = pd_special; } else { /* nused != 0 */ @@ -411,11 +411,17 @@ PageRepairFragmentation(Page page, OffsetNumber *unused) lp->lp_off = upper; } - ((PageHeader) page)->pd_upper = upper; + ((PageHeader) page)->pd_upper = pd_upper = upper; pfree(itemidbase); } + /* + * Zero out the now-free space. This is not essential, but it allows + * xlog.c to compress WAL data better. + */ + MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower); + return (nline - nused); } @@ -525,6 +531,13 @@ PageIndexTupleDelete(Page page, OffsetNumber offnum) phdr->pd_upper += size; phdr->pd_lower -= sizeof(ItemIdData); + /* + * Zero out the just-freed space. This is not essential, but it allows + * xlog.c to compress WAL data better. + */ + MemSet((char *) page + phdr->pd_lower, 0, sizeof(ItemIdData)); + MemSet(addr, 0, size); + /* * Finally, we need to adjust the linp entries that remain. * @@ -672,8 +685,14 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems) lp->lp_off = upper; } - phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData); - phdr->pd_upper = upper; + phdr->pd_lower = pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData); + phdr->pd_upper = pd_upper = upper; + + /* + * Zero out the now-free space. This is not essential, but it allows + * xlog.c to compress WAL data better. + */ + MemSet((char *) page + pd_lower, 0, pd_upper - pd_lower); pfree(itemidbase); } diff --git a/src/backend/utils/hash/pg_crc.c b/src/backend/utils/hash/pg_crc.c index bf23242a5ad74d5b239fc866f33234c5dd7890cb..211da1aa729a51a28d88c089addb82ba288712ea 100644 --- a/src/backend/utils/hash/pg_crc.c +++ b/src/backend/utils/hash/pg_crc.c @@ -1,14 +1,25 @@ /*------------------------------------------------------------------------- * * pg_crc.c - * PostgreSQL 64-bit CRC support + * PostgreSQL CRC support + * + * See Ross Williams' excellent introduction + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites. + * + * We use a normal (not "reflected", in Williams' terms) CRC, using initial + * all-ones register contents and a final bit inversion. + * + * The 64-bit variant is not used as of PostgreSQL 8.1, but we retain the + * code for possible future use. + * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/hash/pg_crc.c,v 1.12 2004/12/31 22:01:37 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/utils/hash/pg_crc.c,v 1.13 2005/06/02 05:55:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,9 +28,96 @@ #include "utils/pg_crc.h" +/* + * This table is based on the polynomial + * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + * (This is the same polynomial used in Ethernet checksums, for instance.) + */ +const uint32 pg_crc32_table[256] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D +}; + + +#ifdef PROVIDE_64BIT_CRC + +/* + * This table is based on the polynomial + * + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + + * x^7 + x^4 + x + 1 + * + * which is borrowed from the DLT1 spec + * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM) + */ + #ifdef INT64_IS_BUSTED -const uint32 crc_table0[256] = { +const uint32 pg_crc64_table0[256] = { 0x00000000, 0xA9EA3693, 0x53D46D26, 0xFA3E5BB5, 0x0E42ECDF, 0xA7A8DA4C, @@ -150,7 +248,7 @@ const uint32 crc_table0[256] = { 0x676F8394, 0xCE85B507 }; -const uint32 crc_table1[256] = { +const uint32 pg_crc64_table1[256] = { 0x00000000, 0x42F0E1EB, 0x85E1C3D7, 0xC711223C, 0x49336645, 0x0BC387AE, @@ -283,7 +381,7 @@ const uint32 crc_table1[256] = { #else /* int64 works */ -const uint64 crc_table[256] = { +const uint64 pg_crc64_table[256] = { UINT64CONST(0x0000000000000000), UINT64CONST(0x42F0E1EBA9EA3693), UINT64CONST(0x85E1C3D753D46D26), UINT64CONST(0xC711223CFA3E5BB5), UINT64CONST(0x493366450E42ECDF), UINT64CONST(0x0BC387AEA7A8DA4C), @@ -415,3 +513,5 @@ const uint64 crc_table[256] = { }; #endif /* INT64_IS_BUSTED */ + +#endif /* PROVIDE_64BIT_CRC */ diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index d89a934dfc2abf2798a82d6eb5d3d2ec5d745e92..77f61af06f467851216584e8fcae2eb40c13f8b2 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -6,7 +6,7 @@ * copyright (c) Oliver Elphick <olly@lfix.co.uk>, 2001; * licence: BSD * - * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.23 2005/04/28 21:47:16 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.24 2005/06/02 05:55:29 tgl Exp $ */ #include "postgres.h" @@ -66,7 +66,7 @@ main(int argc, char *argv[]) int fd; char ControlFilePath[MAXPGPATH]; char *DataDir; - crc64 crc; + pg_crc32 crc; char pgctime_str[128]; char ckpttime_str[128]; char sysident_str[32]; @@ -120,13 +120,13 @@ main(int argc, char *argv[]) close(fd); /* Check the CRC. */ - INIT_CRC64(crc); - COMP_CRC64(crc, - (char *) &ControlFile + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(crc); + INIT_CRC32(crc); + COMP_CRC32(crc, + (char *) &ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32(crc); - if (!EQ_CRC64(crc, ControlFile.crc)) + if (!EQ_CRC32(crc, ControlFile.crc)) printf(_("WARNING: Calculated CRC checksum does not match value stored in file.\n" "Either the file is corrupt, or it has a different layout than this program\n" "is expecting. The results below are untrustworthy.\n\n")); diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c index cabc5c00124cf4e9cf070e3171285f9507cf2427..6eceb0a3543943893ce9179736d1949df5485e77 100644 --- a/src/bin/pg_resetxlog/pg_resetxlog.c +++ b/src/bin/pg_resetxlog/pg_resetxlog.c @@ -23,7 +23,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.32 2005/04/28 21:47:16 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.33 2005/06/02 05:55:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -327,7 +327,7 @@ ReadControlFile(void) int fd; int len; char *buffer; - crc64 crc; + pg_crc32 crc; if ((fd = open(ControlFilePath, O_RDONLY)) < 0) { @@ -362,13 +362,13 @@ ReadControlFile(void) ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION) { /* Check the CRC. */ - INIT_CRC64(crc); - COMP_CRC64(crc, - buffer + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(crc); + INIT_CRC32(crc); + COMP_CRC32(crc, + buffer, + offsetof(ControlFileData, crc)); + FIN_CRC32(crc); - if (EQ_CRC64(crc, ((ControlFileData *) buffer)->crc)) + if (EQ_CRC32(crc, ((ControlFileData *) buffer)->crc)) { /* Valid data... */ memcpy(&ControlFile, buffer, sizeof(ControlFile)); @@ -553,11 +553,11 @@ RewriteControlFile(void) ControlFile.prevCheckPoint.xrecoff = 0; /* Contents are protected with a CRC */ - INIT_CRC64(ControlFile.crc); - COMP_CRC64(ControlFile.crc, - (char *) &ControlFile + sizeof(crc64), - sizeof(ControlFileData) - sizeof(crc64)); - FIN_CRC64(ControlFile.crc); + INIT_CRC32(ControlFile.crc); + COMP_CRC32(ControlFile.crc, + (char *) &ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32(ControlFile.crc); /* * We write out BLCKSZ bytes into pg_control, zero-padding the excess @@ -673,7 +673,7 @@ WriteEmptyXLOG(void) XLogPageHeader page; XLogLongPageHeader longpage; XLogRecord *record; - crc64 crc; + pg_crc32 crc; char path[MAXPGPATH]; int fd; int nbytes; @@ -700,17 +700,18 @@ WriteEmptyXLOG(void) record->xl_prev.xlogid = 0; record->xl_prev.xrecoff = 0; record->xl_xid = InvalidTransactionId; + record->xl_tot_len = SizeOfXLogRecord + sizeof(CheckPoint); record->xl_len = sizeof(CheckPoint); record->xl_info = XLOG_CHECKPOINT_SHUTDOWN; record->xl_rmid = RM_XLOG_ID; memcpy(XLogRecGetData(record), &ControlFile.checkPointCopy, sizeof(CheckPoint)); - INIT_CRC64(crc); - COMP_CRC64(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint)); - COMP_CRC64(crc, (char *) record + sizeof(crc64), - SizeOfXLogRecord - sizeof(crc64)); - FIN_CRC64(crc); + INIT_CRC32(crc); + COMP_CRC32(crc, &ControlFile.checkPointCopy, sizeof(CheckPoint)); + COMP_CRC32(crc, (char *) record + sizeof(pg_crc32), + SizeOfXLogRecord - sizeof(pg_crc32)); + FIN_CRC32(crc); record->xl_crc = crc; /* Write the first page */ diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ab471738970a2a529d4f702a2fb00284cf42092a..1d1aa9c15268abd1dd0470922989515769ced4f3 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.61 2005/05/20 14:53:26 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.62 2005/06/02 05:55:29 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -19,23 +19,31 @@ /* - * Header for each record in XLOG + * The overall layout of an XLOG record is: + * Fixed-size header (XLogRecord struct) + * rmgr-specific data + * BkpBlock + * backup block data + * BkpBlock + * backup block data + * ... * - * NOTE: xl_len counts only the rmgr data, not the XLogRecord header, - * and also not any backup blocks appended to the record (which are signaled - * by xl_info flag bits). The total space needed for an XLOG record is - * really: - * - * SizeOfXLogRecord + xl_len + n_backup_blocks * (sizeof(BkpBlock) + BLCKSZ) + * where there can be zero to three backup blocks (as signaled by xl_info flag + * bits). XLogRecord structs always start on MAXALIGN boundaries in the WAL + * files, and we round up SizeOfXLogRecord so that the rmgr data is also + * guaranteed to begin on a MAXALIGN boundary. However, no padding is added + * to align BkpBlock structs or backup block data. * - * rounded up to a MAXALIGN boundary (so that all xlog records start on - * MAXALIGN boundaries). + * NOTE: xl_len counts only the rmgr data, not the XLogRecord header, + * and also not any backup blocks. xl_tot_len counts everything. Neither + * length field is rounded up to an alignment boundary. */ typedef struct XLogRecord { - crc64 xl_crc; /* CRC for this record */ + pg_crc32 xl_crc; /* CRC for this record */ XLogRecPtr xl_prev; /* ptr to previous record in log */ TransactionId xl_xid; /* xact id */ + uint32 xl_tot_len; /* total len of entire record */ uint32 xl_len; /* total len of rmgr data */ uint8 xl_info; /* flag bits, see below */ RmgrId xl_rmid; /* resource manager for this record */ diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 75842328db4b47bc517e608f18c7f16a7143e50e..a0b0b761ccb2f2162138c45628c1f144b6c4dfd0 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.6 2004/12/31 22:03:21 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.7 2005/06/02 05:55:29 tgl Exp $ */ #ifndef XLOG_INTERNAL_H #define XLOG_INTERNAL_H @@ -25,15 +25,25 @@ /* * Header info for a backup block appended to an XLOG record. * - * Note that the backup block has its own CRC, and is not covered by - * the CRC of the XLOG record proper. Also note that we don't attempt - * to align either the BkpBlock struct or the block's data. + * As a trivial form of data compression, the XLOG code is aware that + * PG data pages usually contain an unused "hole" in the middle, which + * contains only zero bytes. If hole_length > 0 then we have removed + * such a "hole" from the stored data (and it's not counted in the + * XLOG record's CRC, either). Hence, the amount of block data actually + * present following the BkpBlock struct is BLCKSZ - hole_length bytes. + * + * Note that we don't attempt to align either the BkpBlock struct or the + * block's data. So, the struct must be copied to aligned local storage + * before use. */ typedef struct BkpBlock { - crc64 crc; - RelFileNode node; - BlockNumber block; + RelFileNode node; /* relation containing block */ + BlockNumber block; /* block number */ + uint16 hole_offset; /* number of bytes before "hole" */ + uint16 hole_length; /* number of bytes in "hole" */ + + /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */ } BkpBlock; /* @@ -42,8 +52,9 @@ typedef struct BkpBlock * XLogRecord header will never be split across pages; if there's less than * SizeOfXLogRecord space left at the end of a page, we just waste it.) * - * Note that xl_rem_len includes backup-block data, unlike xl_len in the - * initial header. + * Note that xl_rem_len includes backup-block data; that is, it tracks + * xl_tot_len not xl_len in the initial header. Also note that the + * continuation data isn't necessarily aligned. */ typedef struct XLogContRecord { @@ -53,12 +64,12 @@ typedef struct XLogContRecord } XLogContRecord; -#define SizeOfXLogContRecord MAXALIGN(sizeof(XLogContRecord)) +#define SizeOfXLogContRecord sizeof(XLogContRecord) /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD05C /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD05D /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData { diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index e60a879424a3462ba0873c9ef8115958d6fadd61..3f96b6bf261734a9cda9b1a9267f16f30a2e5d6d 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.21 2005/04/28 21:47:17 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.22 2005/06/02 05:55:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,7 +22,7 @@ /* Version identifier for this pg_control format */ -#define PG_CONTROL_VERSION 81 +#define PG_CONTROL_VERSION 810 /* * Body of CheckPoint XLOG records. This is declared here because we keep @@ -73,12 +73,17 @@ typedef enum DBState typedef struct ControlFileData { - crc64 crc; /* CRC for remainder of struct */ + /* + * Unique system identifier --- to ensure we match up xlog files with + * the installation that produced them. + */ + uint64 system_identifier; /* - * Version identifier information. Keep these fields at the front, + * Version identifier information. Keep these fields at the same offset, * especially pg_control_version; they won't be real useful if they - * move around. + * move around. (For historical reasons they must be 8 bytes into + * the file rather than immediately at the front.) * * pg_control_version identifies the format of pg_control itself. * catalog_version_no identifies the format of the system catalogs. @@ -90,12 +95,6 @@ typedef struct ControlFileData uint32 pg_control_version; /* PG_CONTROL_VERSION */ uint32 catalog_version_no; /* see catversion.h */ - /* - * Unique system identifier --- to ensure we match up xlog files with - * the installation that produced them. - */ - uint64 system_identifier; - /* * System status data */ @@ -127,6 +126,9 @@ typedef struct ControlFileData uint32 localeBuflen; char lc_collate[LOCALE_NAME_BUFLEN]; char lc_ctype[LOCALE_NAME_BUFLEN]; + + /* CRC of all above ... MUST BE LAST! */ + pg_crc32 crc; } ControlFileData; #endif /* PG_CONTROL_H */ diff --git a/src/include/utils/pg_crc.h b/src/include/utils/pg_crc.h index 6638f75d74e4f7794b667d5c19f0d93e321ce246..5bf9ed76335e8b07d5d02030c49c56d123d3f9d5 100644 --- a/src/include/utils/pg_crc.h +++ b/src/include/utils/pg_crc.h @@ -1,32 +1,65 @@ /* * pg_crc.h * - * PostgreSQL 64-bit CRC support + * PostgreSQL CRC support + * + * See Ross Williams' excellent introduction + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites. + * + * We use a normal (not "reflected", in Williams' terms) CRC, using initial + * all-ones register contents and a final bit inversion. + * + * The 64-bit variant is not used as of PostgreSQL 8.1, but we retain the + * code for possible future use. + * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/pg_crc.h,v 1.12 2004/12/31 22:03:46 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/utils/pg_crc.h,v 1.13 2005/06/02 05:55:29 tgl Exp $ */ #ifndef PG_CRC_H #define PG_CRC_H + +typedef uint32 pg_crc32; + +/* Initialize a CRC accumulator */ +#define INIT_CRC32(crc) ((crc) = 0xFFFFFFFF) + +/* Finish a CRC calculation */ +#define FIN_CRC32(crc) ((crc) ^= 0xFFFFFFFF) + +/* Accumulate some (more) bytes into a CRC */ +#define COMP_CRC32(crc, data, len) \ +do { \ + unsigned char *__data = (unsigned char *) (data); \ + uint32 __len = (len); \ +\ + while (__len-- > 0) \ + { \ + int __tab_index = ((int) ((crc) >> 24) ^ *__data++) & 0xFF; \ + (crc) = pg_crc32_table[__tab_index] ^ ((crc) << 8); \ + } \ +} while (0) + +/* Check for equality of two CRCs */ +#define EQ_CRC32(c1,c2) ((c1) == (c2)) + +/* Constant table for CRC calculation */ +extern const uint32 pg_crc32_table[]; + + +#ifdef PROVIDE_64BIT_CRC + /* * If we have a 64-bit integer type, then a 64-bit CRC looks just like the - * usual sort of implementation. (See Ross Williams' excellent introduction - * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from - * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.) - * If we have no working 64-bit type, then fake it with two 32-bit registers. - * - * The present implementation is a normal (not "reflected", in Williams' - * terms) 64-bit CRC, using initial all-ones register contents and a final - * bit inversion. The chosen polynomial is borrowed from the DLT1 spec - * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM): - * - * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + - * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + - * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + - * x^7 + x^4 + x + 1 + * usual sort of implementation. If we have no working 64-bit type, then + * fake it with two 32-bit registers. (Note: experience has shown that the + * two-32-bit-registers code is as fast as, or even much faster than, the + * 64-bit code on all but true 64-bit machines. INT64_IS_BUSTED is therefore + * probably the wrong control symbol to use to select the implementation.) */ #ifdef INT64_IS_BUSTED @@ -39,11 +72,11 @@ * all machines, we could do a configure test to decide how to order the * two fields, but it seems not worth the trouble. */ -typedef struct crc64 +typedef struct pg_crc64 { uint32 crc0; uint32 crc1; -} crc64; +} pg_crc64; /* Initialize a CRC accumulator */ #define INIT_CRC64(crc) ((crc).crc0 = 0xffffffff, (crc).crc1 = 0xffffffff) @@ -62,8 +95,8 @@ do { \ while (__len-- > 0) \ { \ int __tab_index = ((int) (__crc1 >> 24) ^ *__data++) & 0xFF; \ - __crc1 = crc_table1[__tab_index] ^ ((__crc1 << 8) | (__crc0 >> 24)); \ - __crc0 = crc_table0[__tab_index] ^ (__crc0 << 8); \ + __crc1 = pg_crc64_table1[__tab_index] ^ ((__crc1 << 8) | (__crc0 >> 24)); \ + __crc0 = pg_crc64_table0[__tab_index] ^ (__crc0 << 8); \ } \ (crc).crc0 = __crc0; \ (crc).crc1 = __crc1; \ @@ -73,15 +106,15 @@ do { \ #define EQ_CRC64(c1,c2) ((c1).crc0 == (c2).crc0 && (c1).crc1 == (c2).crc1) /* Constant table for CRC calculation */ -extern const uint32 crc_table0[]; -extern const uint32 crc_table1[]; +extern const uint32 pg_crc64_table0[]; +extern const uint32 pg_crc64_table1[]; #else /* int64 works */ -typedef struct crc64 +typedef struct pg_crc64 { uint64 crc0; -} crc64; +} pg_crc64; /* Initialize a CRC accumulator */ #define INIT_CRC64(crc) ((crc).crc0 = UINT64CONST(0xffffffffffffffff)) @@ -99,7 +132,7 @@ do { \ while (__len-- > 0) \ { \ int __tab_index = ((int) (__crc0 >> 56) ^ *__data++) & 0xFF; \ - __crc0 = crc_table[__tab_index] ^ (__crc0 << 8); \ + __crc0 = pg_crc64_table[__tab_index] ^ (__crc0 << 8); \ } \ (crc).crc0 = __crc0; \ } while (0) @@ -108,7 +141,9 @@ do { \ #define EQ_CRC64(c1,c2) ((c1).crc0 == (c2).crc0) /* Constant table for CRC calculation */ -extern const uint64 crc_table[]; +extern const uint64 pg_crc64_table[]; #endif /* INT64_IS_BUSTED */ +#endif /* PROVIDE_64BIT_CRC */ + #endif /* PG_CRC_H */