diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 2d53c8e5ff7ab22459c19f10a082eb131e01a93f..d19e7220c548c061ad9289ab2dd244462a73bffb 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -35,9 +35,9 @@ #include "access/tuptoaster.h" #include "access/xact.h" #include "catalog/catalog.h" +#include "common/pg_lzcompress.h" #include "miscadmin.h" #include "utils/fmgroids.h" -#include "utils/pg_lzcompress.h" #include "utils/rel.h" #include "utils/typcache.h" #include "utils/tqual.h" @@ -45,6 +45,26 @@ #undef TOAST_DEBUG +/* + * The information at the start of the compressed toast data. + */ +typedef struct toast_compress_header +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int32 rawsize; +} toast_compress_header; + +/* + * Utilities for manipulation of header information for compressed + * toast entries. + */ +#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header)) +#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) ptr)->rawsize) +#define TOAST_COMPRESS_RAWDATA(ptr) \ + (((char *) ptr) + TOAST_COMPRESS_HDRSZ) +#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \ + (((toast_compress_header *) ptr)->rawsize = len) + static void toast_delete_datum(Relation rel, Datum value); static Datum toast_save_datum(Relation rel, Datum value, struct varlena * oldexternal, int options); @@ -53,6 +73,7 @@ static bool toastid_valueid_exists(Oid toastrelid, Oid valueid); static struct varlena *toast_fetch_datum(struct varlena * attr); static struct varlena *toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length); +static struct varlena *toast_decompress_datum(struct varlena * attr); static int toast_open_indexes(Relation toastrel, LOCKMODE lock, Relation **toastidxs, @@ -138,11 +159,8 @@ heap_tuple_untoast_attr(struct varlena * attr) /* If it's compressed, decompress it */ if (VARATT_IS_COMPRESSED(attr)) { - PGLZ_Header *tmp = (PGLZ_Header *) attr; - - attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - pglz_decompress(tmp, VARDATA(attr)); + struct varlena *tmp = attr; + attr = toast_decompress_datum(tmp); pfree(tmp); } } @@ -163,11 +181,7 @@ heap_tuple_untoast_attr(struct varlena * attr) /* * This is a compressed value inside of the main tuple */ - PGLZ_Header *tmp = (PGLZ_Header *) attr; - - attr = (struct varlena *) palloc(PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - SET_VARSIZE(attr, PGLZ_RAW_SIZE(tmp) + VARHDRSZ); - pglz_decompress(tmp, VARDATA(attr)); + attr = toast_decompress_datum(attr); } else if (VARATT_IS_SHORT(attr)) { @@ -234,14 +248,10 @@ heap_tuple_untoast_attr_slice(struct varlena * attr, if (VARATT_IS_COMPRESSED(preslice)) { - PGLZ_Header *tmp = (PGLZ_Header *) preslice; - Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ; - - preslice = (struct varlena *) palloc(size); - SET_VARSIZE(preslice, size); - pglz_decompress(tmp, VARDATA(preslice)); + struct varlena *tmp = preslice; + preslice = toast_decompress_datum(tmp); - if (tmp != (PGLZ_Header *) attr) + if (tmp != attr) pfree(tmp); } @@ -1228,6 +1238,7 @@ toast_compress_datum(Datum value) { struct varlena *tmp; int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); + int32 len; Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value))); Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); @@ -1240,7 +1251,8 @@ toast_compress_datum(Datum value) valsize > PGLZ_strategy_default->max_input_size) return PointerGetDatum(NULL); - tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize)); + tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) + + TOAST_COMPRESS_HDRSZ); /* * We recheck the actual size even if pglz_compress() reports success, @@ -1252,10 +1264,15 @@ toast_compress_datum(Datum value) * only one header byte and no padding if the value is short enough. So * we insist on a savings of more than 2 bytes to ensure we have a gain. */ - if (pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize, - (PGLZ_Header *) tmp, PGLZ_strategy_default) && - VARSIZE(tmp) < valsize - 2) + len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)), + valsize, + TOAST_COMPRESS_RAWDATA(tmp), + PGLZ_strategy_default); + if (len >= 0 && + len + TOAST_COMPRESS_HDRSZ < valsize - 2) { + TOAST_COMPRESS_SET_RAWSIZE(tmp, valsize); + SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ); /* successful compression */ return PointerGetDatum(tmp); } @@ -2100,6 +2117,32 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length) return result; } +/* ---------- + * toast_decompress_datum - + * + * Decompress a compressed version of a varlena datum + */ +static struct varlena * +toast_decompress_datum(struct varlena * attr) +{ + struct varlena *result; + + Assert(VARATT_IS_COMPRESSED(attr)); + + result = (struct varlena *) + palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ); + SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ); + + if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr), + VARSIZE(attr) - TOAST_COMPRESS_HDRSZ, + VARDATA(result), + TOAST_COMPRESS_RAWSIZE(attr)) < 0) + elog(ERROR, "compressed data is corrupted"); + + return result; +} + + /* ---------- * toast_open_indexes * diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 3ea9bf435a31c7d68ec6f142044fd36540866854..20e5ff10c7f18c13ecf9fa9d68fa02f94d55cf53 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -25,8 +25,8 @@ OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \ jsonfuncs.o like.o lockfuncs.o mac.o misc.o nabstime.o name.o \ network.o network_gist.o network_selfuncs.o \ numeric.o numutils.o oid.o oracle_compat.o \ - orderedsetaggs.o pg_lzcompress.o pg_locale.o pg_lsn.o \ - pgstatfuncs.o pseudotypes.o quote.o rangetypes.o rangetypes_gist.o \ + orderedsetaggs.o pg_locale.o pg_lsn.o pgstatfuncs.o \ + pseudotypes.o quote.o rangetypes.o rangetypes_gist.o \ rangetypes_selfuncs.o rangetypes_spgist.o rangetypes_typanalyze.o \ regexp.o regproc.o ri_triggers.o rowtypes.o ruleutils.o \ selfuncs.o tid.o timestamp.o trigfuncs.o \ diff --git a/src/common/Makefile b/src/common/Makefile index e5c345d7def312fbb99a3115785a62ebe112803b..5f24eb38de9bfd4faf43504493cf94227b1c3ab8 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -23,7 +23,8 @@ include $(top_builddir)/src/Makefile.global override CPPFLAGS := -DFRONTEND $(CPPFLAGS) LIBS += $(PTHREAD_LIBS) -OBJS_COMMON = exec.o pgfnames.o psprintf.o relpath.o rmtree.o string.o username.o wait_error.o +OBJS_COMMON = exec.o pg_lzcompress.o pgfnames.o psprintf.o relpath.o \ + rmtree.o string.o username.o wait_error.o OBJS_FRONTEND = $(OBJS_COMMON) fe_memutils.o diff --git a/src/backend/utils/adt/pg_lzcompress.c b/src/common/pg_lzcompress.c similarity index 92% rename from src/backend/utils/adt/pg_lzcompress.c rename to src/common/pg_lzcompress.c index 65cb6a95c289c1bf23bbe0c25fb910c564662d4f..447a043ad719674279056fbc34f5e62d4ab3d4ae 100644 --- a/src/backend/utils/adt/pg_lzcompress.c +++ b/src/common/pg_lzcompress.c @@ -8,8 +8,8 @@ * * Entry routines: * - * bool - * pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, + * int32 + * pglz_compress(const char *source, int32 slen, char *dest, * const PGLZ_Strategy *strategy); * * source is the input data to be compressed. @@ -23,44 +23,43 @@ * the compression algorithm. If NULL, the compiled * in default strategy is used. * - * The return value is TRUE if compression succeeded, - * FALSE if not; in the latter case the contents of dest - * are undefined. + * The return value is the number of bytes written in the + * buffer dest, or -1 if compression fails; in the latter + * case the contents of dest are undefined. * - * void - * pglz_decompress(const PGLZ_Header *source, char *dest) + * int32 + * pglz_decompress(const char *source, int32 slen, char *dest, + * int32 rawsize) * * source is the compressed input. * + * slen is the length of the compressed input. + * * dest is the area where the uncompressed data will be * written to. It is the callers responsibility to - * provide enough space. The required amount can be - * obtained with the macro PGLZ_RAW_SIZE(source). + * provide enough space. * * The data is written to buff exactly as it was handed * to pglz_compress(). No terminating zero byte is added. * - * The decompression algorithm and internal data format: + * rawsize is the length of the uncompressed data. * - * PGLZ_Header is defined as + * The return value is the number of bytes written in the + * buffer dest, or -1 if decompression fails. * - * typedef struct PGLZ_Header { - * int32 vl_len_; - * int32 rawsize; - * } + * The decompression algorithm and internal data format: * - * The header is followed by the compressed data itself. + * It is made with the compressed data itself. * * The data representation is easiest explained by describing * the process of decompression. * - * If VARSIZE(x) == rawsize + sizeof(PGLZ_Header), then the data + * If compressed_size == rawsize, then the data * is stored uncompressed as plain bytes. Thus, the decompressor - * simply copies rawsize bytes from the location after the - * header to the destination. + * simply copies rawsize bytes to the destination. * - * Otherwise the first byte after the header tells what to do - * the next 8 times. We call this the control byte. + * Otherwise the first byte tells what to do the next 8 times. + * We call this the control byte. * * An unset bit in the control byte means, that one uncompressed * byte follows, which is copied from input to output. @@ -169,14 +168,18 @@ * * Copyright (c) 1999-2015, PostgreSQL Global Development Group * - * src/backend/utils/adt/pg_lzcompress.c + * src/common/pg_lzcompress.c * ---------- */ +#ifndef FRONTEND #include "postgres.h" +#else +#include "postgres_fe.h" +#endif #include <limits.h> -#include "utils/pg_lzcompress.h" +#include "common/pg_lzcompress.h" /* ---------- @@ -492,14 +495,15 @@ pglz_find_match(int16 *hstart, const char *input, const char *end, /* ---------- * pglz_compress - * - * Compresses source into dest using strategy. + * Compresses source into dest using strategy. Returns the number of + * bytes written in buffer dest, or -1 if compression fails. * ---------- */ -bool -pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, +int32 +pglz_compress(const char *source, int32 slen, char *dest, const PGLZ_Strategy *strategy) { - unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header); + unsigned char *bp = (unsigned char *) dest; unsigned char *bstart = bp; int hist_next = 1; bool hist_recycle = false; @@ -533,12 +537,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, if (strategy->match_size_good <= 0 || slen < strategy->min_input_size || slen > strategy->max_input_size) - return false; - - /* - * Save the original source size in the header. - */ - dest->rawsize = slen; + return -1; /* * Limit the match parameters to the supported range. @@ -611,7 +610,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, * allow 4 slop bytes. */ if (bp - bstart >= result_max) - return false; + return -1; /* * If we've emitted more than first_success_by bytes without finding @@ -620,7 +619,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, * pre-compressed data). */ if (!found_match && bp - bstart >= strategy->first_success_by) - return false; + return -1; /* * Try to find a match in the history @@ -664,35 +663,34 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, *ctrlp = ctrlb; result_size = bp - bstart; if (result_size >= result_max) - return false; - - /* - * Success - need only fill in the actual length of the compressed datum. - */ - SET_VARSIZE_COMPRESSED(dest, result_size + sizeof(PGLZ_Header)); + return -1; - return true; + /* success */ + return result_size; } /* ---------- * pglz_decompress - * - * Decompresses source into dest. + * Decompresses source into dest. Returns the number of bytes + * decompressed in the destination buffer, or -1 if decompression + * fails. * ---------- */ -void -pglz_decompress(const PGLZ_Header *source, char *dest) +int32 +pglz_decompress(const char *source, int32 slen, char *dest, + int32 rawsize) { const unsigned char *sp; const unsigned char *srcend; unsigned char *dp; unsigned char *destend; - sp = ((const unsigned char *) source) + sizeof(PGLZ_Header); - srcend = ((const unsigned char *) source) + VARSIZE(source); + sp = (const unsigned char *) source; + srcend = ((const unsigned char *) source) + slen; dp = (unsigned char *) dest; - destend = dp + source->rawsize; + destend = dp + rawsize; while (sp < srcend && dp < destend) { @@ -771,9 +769,10 @@ pglz_decompress(const PGLZ_Header *source, char *dest) * Check we decompressed the right amount. */ if (dp != destend || sp != srcend) - elog(ERROR, "compressed data is corrupt"); + return -1; /* * That's it. */ + return rawsize; } diff --git a/src/include/utils/pg_lzcompress.h b/src/include/common/pg_lzcompress.h similarity index 77% rename from src/include/utils/pg_lzcompress.h rename to src/include/common/pg_lzcompress.h index 4af24a32a4935e8ee6da21936580545dd33a1292..52bcaf14b11ea56c8aa25203661923f57430c35a 100644 --- a/src/include/utils/pg_lzcompress.h +++ b/src/include/common/pg_lzcompress.h @@ -3,7 +3,7 @@ * * Definitions for the builtin LZ compressor * - * src/include/utils/pg_lzcompress.h + * src/include/common/pg_lzcompress.h * ---------- */ @@ -11,19 +11,6 @@ #define _PG_LZCOMPRESS_H_ -/* ---------- - * PGLZ_Header - - * - * The information at the start of the compressed data. - * ---------- - */ -typedef struct PGLZ_Header -{ - int32 vl_len_; /* varlena header (do not touch directly!) */ - int32 rawsize; -} PGLZ_Header; - - /* ---------- * PGLZ_MAX_OUTPUT - * @@ -31,16 +18,7 @@ typedef struct PGLZ_Header * We allow 4 bytes for overrun before detecting compression failure. * ---------- */ -#define PGLZ_MAX_OUTPUT(_dlen) ((_dlen) + 4 + sizeof(PGLZ_Header)) - -/* ---------- - * PGLZ_RAW_SIZE - - * - * Macro to determine the uncompressed data size contained - * in the entry. - * ---------- - */ -#define PGLZ_RAW_SIZE(_lzdata) ((_lzdata)->rawsize) +#define PGLZ_MAX_OUTPUT(_dlen) ((_dlen) + 4) /* ---------- @@ -105,8 +83,9 @@ extern const PGLZ_Strategy *const PGLZ_strategy_always; * Global function declarations * ---------- */ -extern bool pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, +extern int32 pglz_compress(const char *source, int32 slen, char *dest, const PGLZ_Strategy *strategy); -extern void pglz_decompress(const PGLZ_Header *source, char *dest); +extern int32 pglz_decompress(const char *source, int32 slen, char *dest, + int32 rawsize); #endif /* _PG_LZCOMPRESS_H_ */ diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index 4336f2eabb888b5c4898f4985b1f4cf7c211db73..830f56f0fa17c16a7274805456119cca7f62dfb7 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -76,7 +76,8 @@ sub mkvcbuild push(@pgportfiles, 'rint.c') if ($vsVersion < '12.00'); our @pgcommonallfiles = qw( - exec.c pgfnames.c psprintf.c relpath.c rmtree.c string.c username.c wait_error.c); + exec.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c + string.c username.c wait_error.c); our @pgcommonfrontendfiles = (@pgcommonallfiles, qw(fe_memutils.c)); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index ab36aa3acb64014286a01208465beae5a6f41432..cfd580c44588fa2bab643342e98a23bf06d62a2b 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1125,7 +1125,6 @@ PGEventResultCreate PGEventResultDestroy PGFInfoFunction PGFunction -PGLZ_Header PGLZ_HistEntry PGLZ_Strategy PGMessageField