From 7f508f1c6b515df66d27f860b2faa7b5761fa55d Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas <heikki.linnakangas@iki.fi> Date: Sun, 23 Jan 2011 23:10:15 +0200 Subject: [PATCH] Add 'directory' format to pg_dump. The new directory format is compatible with the 'tar' format, in that untarring a tar format archive produces a valid directory format archive. Joachim Wieland and Heikki Linnakangas --- doc/src/sgml/ref/pg_dump.sgml | 60 ++- doc/src/sgml/ref/pg_restore.sgml | 13 +- src/bin/pg_dump/Makefile | 2 +- src/bin/pg_dump/compress_io.c | 257 ++++++++++ src/bin/pg_dump/compress_io.h | 13 + src/bin/pg_dump/pg_backup.h | 3 +- src/bin/pg_dump/pg_backup_archiver.c | 48 +- src/bin/pg_dump/pg_backup_archiver.h | 1 + src/bin/pg_dump/pg_backup_directory.c | 678 ++++++++++++++++++++++++++ src/bin/pg_dump/pg_backup_tar.c | 6 +- src/bin/pg_dump/pg_dump.c | 97 ++-- src/bin/pg_dump/pg_restore.c | 9 +- 12 files changed, 1132 insertions(+), 55 deletions(-) create mode 100644 src/bin/pg_dump/pg_backup_directory.c diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index de4968c5a03..fdc4b928d54 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -76,11 +76,7 @@ PostgreSQL documentation database are to be restored. The most flexible output file format is the <quote>custom</quote> format (<option>-Fc</option>). It allows for selection and reordering of all archived items, and is compressed - by default. The <application>tar</application> format - (<option>-Ft</option>) is not compressed and has restrictions on - reordering data when loading, but it is otherwise quite flexible; - moreover, it can be manipulated with standard Unix tools such as - <command>tar</command>. + by default. </para> <para> @@ -194,8 +190,12 @@ PostgreSQL documentation <term><option>--file=<replaceable class="parameter">file</replaceable></option></term> <listitem> <para> - Send output to the specified file. If this is omitted, the - standard output is used. + Send output to the specified file. This parameter can be omitted for + file based output formats, in which case the standard output is used. + It must be given for the directory output format however, where it + specifies the target directory instead of a file. In this case the + directory is created by <command>pg_dump</command> and must not exist + before. </para> </listitem> </varlistentry> @@ -226,9 +226,28 @@ PostgreSQL documentation <para> Output a custom-format archive suitable for input into <application>pg_restore</application>. - This is the most flexible output format in that it allows manual - selection and reordering of archived items during restore. - This format is also compressed by default. + Together with the directory output format, this is the most flexible + output format in that it allows manual selection and reordering of + archived items during restore. This format is also compressed by + default. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>d</></term> + <term><literal>directory</></term> + <listitem> + <para> + Output a directory-format archive suitable for input into + <application>pg_restore</application>. This will create a directory + with one file for each table and blob being dumped, plus a + so-called Table of Contents file describing the dumped objects in a + machine-readable format that <application>pg_restore</application> + can read. A directory format archive can be manipulated with + standard Unix tools; for example, files in an uncompressed archive + can be compressed with the <application>gzip</application> tool. + This format is compressed by default. </para> </listitem> </varlistentry> @@ -239,13 +258,12 @@ PostgreSQL documentation <listitem> <para> Output a <command>tar</command>-format archive suitable for input - into <application>pg_restore</application>. - This output format allows manual selection and reordering of - archived items during restore, but there is a restriction: the - relative order of table data items cannot be changed during - restore. Also, <command>tar</command> format does not support - compression and has a limit of 8 GB on the size of individual - tables. + into <application>pg_restore</application>. The tar-format is + compatible with the directory-format; extracting a tar-format + archive produces a valid directory-format archive. + However, the tar-format does not support compression and has a + limit of 8 GB on the size of individual tables. Also, the relative + order of table data items cannot be changed during restore. </para> </listitem> </varlistentry> @@ -946,6 +964,14 @@ CREATE DATABASE foo WITH TEMPLATE template0; </screen> </para> + <para> + To dump a database into a directory-format archive: + +<screen> +<prompt>$</prompt> <userinput>pg_dump -Fd mydb -f dumpdir</userinput> +</screen> + </para> + <para> To reload an archive file into a (freshly created) database named <literal>newdb</>: diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index 78606969a98..bf261af9d6d 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -79,7 +79,8 @@ <term><replaceable class="parameter">filename</replaceable></term> <listitem> <para> - Specifies the location of the archive file to be restored. + Specifies the location of the archive file (or directory, for a + directory-format archive) to be restored. If not specified, the standard input is used. </para> </listitem> @@ -166,6 +167,16 @@ one of the following: <variablelist> + <varlistentry> + <term><literal>d</></term> + <term><literal>directory</></term> + <listitem> + <para> + The archive is a <command>directory</command> archive. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><literal>t</></term> <term><literal>tar</></term> diff --git a/src/bin/pg_dump/Makefile b/src/bin/pg_dump/Makefile index db607b46f56..8410af13783 100644 --- a/src/bin/pg_dump/Makefile +++ b/src/bin/pg_dump/Makefile @@ -20,7 +20,7 @@ override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) OBJS= pg_backup_archiver.o pg_backup_db.o pg_backup_custom.o \ pg_backup_files.o pg_backup_null.o pg_backup_tar.o \ - dumputils.o compress_io.o $(WIN32RES) + pg_backup_directory.o dumputils.o compress_io.o $(WIN32RES) KEYWRDOBJS = keywords.o kwlookup.o diff --git a/src/bin/pg_dump/compress_io.c b/src/bin/pg_dump/compress_io.c index 8c41a690438..fb280ab672f 100644 --- a/src/bin/pg_dump/compress_io.c +++ b/src/bin/pg_dump/compress_io.c @@ -7,6 +7,17 @@ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * + * This file includes two APIs for dealing with compressed data. The first + * provides more flexibility, using callbacks to read/write data from the + * underlying stream. The second API is a wrapper around fopen/gzopen and + * friends, providing an interface similar to those, but abstracts away + * the possible compression. Both APIs use libz for the compression, but + * the second API uses gzip headers, so the resulting files can be easily + * manipulated with the gzip utility. + * + * Compressor API + * -------------- + * * The interface for writing to an archive consists of three functions: * AllocateCompressor, WriteDataToArchive and EndCompressor. First you call * AllocateCompressor, then write all the data by calling WriteDataToArchive @@ -23,6 +34,17 @@ * * The interface is the same for compressed and uncompressed streams. * + * Compressed stream API + * ---------------------- + * + * The compressed stream API is a wrapper around the C standard fopen() and + * libz's gzopen() APIs. It allows you to use the same functions for + * compressed and uncompressed streams. cfopen_read() first tries to open + * the file with given name, and if it fails, it tries to open the same + * file with the .gz suffix. cfopen_write() opens a file for writing, an + * extra argument specifies if the file should be compressed, and adds the + * .gz suffix to the filename if so. This allows you to easily handle both + * compressed and uncompressed files. * * IDENTIFICATION * src/bin/pg_dump/compress_io.c @@ -32,6 +54,10 @@ #include "compress_io.h" +/*---------------------- + * Compressor API + *---------------------- + */ /* typedef appears in compress_io.h */ struct CompressorState @@ -418,3 +444,234 @@ WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs, } +/*---------------------- + * Compressed stream API + *---------------------- + */ + +/* + * cfp represents an open stream, wrapping the underlying FILE or gzFile + * pointer. This is opaque to the callers. + */ +struct cfp +{ + FILE *uncompressedfp; +#ifdef HAVE_LIBZ + gzFile compressedfp; +#endif +}; + +#ifdef HAVE_LIBZ +static int hasSuffix(const char *filename, const char *suffix); +#endif + +/* + * Open a file for reading. 'path' is the file to open, and 'mode' should + * be either "r" or "rb". + * + * If the file at 'path' does not exist, we append the ".gz" suffix (if 'path' + * doesn't already have it) and try again. So if you pass "foo" as 'path', + * this will open either "foo" or "foo.gz". + */ +cfp * +cfopen_read(const char *path, const char *mode) +{ + cfp *fp; + +#ifdef HAVE_LIBZ + if (hasSuffix(path, ".gz")) + fp = cfopen(path, mode, 1); + else +#endif + { + fp = cfopen(path, mode, 0); +#ifdef HAVE_LIBZ + if (fp == NULL) + { + int fnamelen = strlen(path) + 4; + char *fname = malloc(fnamelen); + if (fname == NULL) + die_horribly(NULL, modulename, "Out of memory\n"); + + snprintf(fname, fnamelen, "%s%s", path, ".gz"); + fp = cfopen(fname, mode, 1); + free(fname); + } +#endif + } + return fp; +} + +/* + * Open a file for writing. 'path' indicates the path name, and 'mode' must + * be a filemode as accepted by fopen() and gzopen() that indicates writing + * ("w", "wb", "a", or "ab"). + * + * If 'compression' is non-zero, a gzip compressed stream is opened, and + * and 'compression' indicates the compression level used. The ".gz" suffix + * is automatically added to 'path' in that case. + */ +cfp * +cfopen_write(const char *path, const char *mode, int compression) +{ + cfp *fp; + + if (compression == 0) + fp = cfopen(path, mode, 0); + else + { +#ifdef HAVE_LIBZ + int fnamelen = strlen(path) + 4; + char *fname = malloc(fnamelen); + if (fname == NULL) + die_horribly(NULL, modulename, "Out of memory\n"); + + snprintf(fname, fnamelen, "%s%s", path, ".gz"); + fp = cfopen(fname, mode, 1); + free(fname); +#else + die_horribly(NULL, modulename, "not built with zlib support\n"); +#endif + } + return fp; +} + +/* + * Opens file 'path' in 'mode'. If 'compression' is non-zero, the file + * is opened with libz gzopen(), otherwise with plain fopen() + */ +cfp * +cfopen(const char *path, const char *mode, int compression) +{ + cfp *fp = malloc(sizeof(cfp)); + if (fp == NULL) + die_horribly(NULL, modulename, "Out of memory\n"); + + if (compression != 0) + { +#ifdef HAVE_LIBZ + fp->compressedfp = gzopen(path, mode); + fp->uncompressedfp = NULL; + if (fp->compressedfp == NULL) + { + free(fp); + fp = NULL; + } +#else + die_horribly(NULL, modulename, "not built with zlib support\n"); +#endif + } + else + { +#ifdef HAVE_LIBZ + fp->compressedfp = NULL; +#endif + fp->uncompressedfp = fopen(path, mode); + if (fp->uncompressedfp == NULL) + { + free(fp); + fp = NULL; + } + } + + return fp; +} + + +int +cfread(void *ptr, int size, cfp *fp) +{ +#ifdef HAVE_LIBZ + if (fp->compressedfp) + return gzread(fp->compressedfp, ptr, size); + else +#endif + return fread(ptr, 1, size, fp->uncompressedfp); +} + +int +cfwrite(const void *ptr, int size, cfp *fp) +{ +#ifdef HAVE_LIBZ + if (fp->compressedfp) + return gzwrite(fp->compressedfp, ptr, size); + else +#endif + return fwrite(ptr, 1, size, fp->uncompressedfp); +} + +int +cfgetc(cfp *fp) +{ +#ifdef HAVE_LIBZ + if (fp->compressedfp) + return gzgetc(fp->compressedfp); + else +#endif + return fgetc(fp->uncompressedfp); +} + +char * +cfgets(cfp *fp, char *buf, int len) +{ +#ifdef HAVE_LIBZ + if (fp->compressedfp) + return gzgets(fp->compressedfp, buf, len); + else +#endif + return fgets(buf, len, fp->uncompressedfp); +} + +int +cfclose(cfp *fp) +{ + int result; + + if (fp == NULL) + { + errno = EBADF; + return EOF; + } +#ifdef HAVE_LIBZ + if (fp->compressedfp) + { + result = gzclose(fp->compressedfp); + fp->compressedfp = NULL; + } + else +#endif + { + result = fclose(fp->uncompressedfp); + fp->uncompressedfp = NULL; + } + free(fp); + + return result; +} + +int +cfeof(cfp *fp) +{ +#ifdef HAVE_LIBZ + if (fp->compressedfp) + return gzeof(fp->compressedfp); + else +#endif + return feof(fp->uncompressedfp); +} + +#ifdef HAVE_LIBZ +static int +hasSuffix(const char *filename, const char *suffix) +{ + int filenamelen = strlen(filename); + int suffixlen = strlen(suffix); + + if (filenamelen < suffixlen) + return 0; + + return memcmp(&filename[filenamelen - suffixlen], + suffix, + suffixlen) == 0; +} +#endif diff --git a/src/bin/pg_dump/compress_io.h b/src/bin/pg_dump/compress_io.h index 13e536f36d1..9963cef9fc1 100644 --- a/src/bin/pg_dump/compress_io.h +++ b/src/bin/pg_dump/compress_io.h @@ -54,4 +54,17 @@ extern size_t WriteDataToArchive(ArchiveHandle *AH, CompressorState *cs, const void *data, size_t dLen); extern void EndCompressor(ArchiveHandle *AH, CompressorState *cs); + +typedef struct cfp cfp; + +extern cfp *cfopen(const char *path, const char *mode, int compression); +extern cfp *cfopen_read(const char *path, const char *mode); +extern cfp *cfopen_write(const char *path, const char *mode, int compression); +extern int cfread(void *ptr, int size, cfp *fp); +extern int cfwrite(const void *ptr, int size, cfp *fp); +extern int cfgetc(cfp *fp); +extern char *cfgets(cfp *fp, char *buf, int len); +extern int cfclose(cfp *fp); +extern int cfeof(cfp *fp); + #endif diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 8fa9a57bacb..f51c95828ae 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -50,7 +50,8 @@ typedef enum _archiveFormat archCustom = 1, archFiles = 2, archTar = 3, - archNull = 4 + archNull = 4, + archDirectory = 5 } ArchiveFormat; typedef enum _archiveMode diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index e230c4e1b00..d001ff49d25 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -25,6 +25,7 @@ #include <ctype.h> #include <unistd.h> +#include <sys/stat.h> #include <sys/types.h> #include <sys/wait.h> @@ -1751,11 +1752,46 @@ _discoverArchiveFormat(ArchiveHandle *AH) if (AH->fSpec) { + struct stat st; + wantClose = 1; - fh = fopen(AH->fSpec, PG_BINARY_R); - if (!fh) - die_horribly(AH, modulename, "could not open input file \"%s\": %s\n", - AH->fSpec, strerror(errno)); + + /* + * Check if the specified archive is a directory. If so, check if + * there's a "toc.dat" (or "toc.dat.gz") file in it. + */ + if (stat(AH->fSpec, &st) == 0 && S_ISDIR(st.st_mode)) + { + char buf[MAXPGPATH]; + if (snprintf(buf, MAXPGPATH, "%s/toc.dat", AH->fSpec) >= MAXPGPATH) + die_horribly(AH, modulename, "directory name too long: \"%s\"\n", + AH->fSpec); + if (stat(buf, &st) == 0 && S_ISREG(st.st_mode)) + { + AH->format = archDirectory; + return AH->format; + } + +#ifdef HAVE_LIBZ + if (snprintf(buf, MAXPGPATH, "%s/toc.dat.gz", AH->fSpec) >= MAXPGPATH) + die_horribly(AH, modulename, "directory name too long: \"%s\"\n", + AH->fSpec); + if (stat(buf, &st) == 0 && S_ISREG(st.st_mode)) + { + AH->format = archDirectory; + return AH->format; + } +#endif + die_horribly(AH, modulename, "directory \"%s\" does not appear to be a valid archive (\"toc.dat\" does not exist)\n", + AH->fSpec); + } + else + { + fh = fopen(AH->fSpec, PG_BINARY_R); + if (!fh) + die_horribly(AH, modulename, "could not open input file \"%s\": %s\n", + AH->fSpec, strerror(errno)); + } } else { @@ -1973,6 +2009,10 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, InitArchiveFmt_Null(AH); break; + case archDirectory: + InitArchiveFmt_Directory(AH); + break; + case archTar: InitArchiveFmt_Tar(AH); break; diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index 502e7410a1b..8d3bbe012cc 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -370,6 +370,7 @@ extern void EndRestoreBlobs(ArchiveHandle *AH); extern void InitArchiveFmt_Custom(ArchiveHandle *AH); extern void InitArchiveFmt_Files(ArchiveHandle *AH); extern void InitArchiveFmt_Null(ArchiveHandle *AH); +extern void InitArchiveFmt_Directory(ArchiveHandle *AH); extern void InitArchiveFmt_Tar(ArchiveHandle *AH); extern bool isValidTarHeader(char *header); diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c new file mode 100644 index 00000000000..ff16c1a74a9 --- /dev/null +++ b/src/bin/pg_dump/pg_backup_directory.c @@ -0,0 +1,678 @@ +/*------------------------------------------------------------------------- + * + * pg_backup_directory.c + * + * A directory format dump is a directory, which contains a "toc.dat" file + * for the TOC, and a separate file for each data entry, named "<oid>.dat". + * Large objects (BLOBs) are stored in separate files named "blob_<uid>.dat", + * and there's a plain-text TOC file for them called "blobs.toc". If + * compression is used, each data file is individually compressed and the + * ".gz" suffix is added to the filenames. The TOC files are never + * compressed by pg_dump, however they are accepted with the .gz suffix too, + * in case the user has manually compressed them with 'gzip'. + * + * NOTE: This format is identical to the files written in the tar file in + * the 'tar' format, except that we don't write the restore.sql file (TODO), + * and the tar format doesn't support compression. Please keep the formats in + * sync. + * + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * Portions Copyright (c) 2000, Philip Warner + * + * Rights are granted to use this software in any way so long + * as this notice is not removed. + * + * The author is not responsible for loss or damages that may + * result from it's use. + * + * IDENTIFICATION + * src/bin/pg_dump/pg_backup_directory.c + * + *------------------------------------------------------------------------- + */ + +#include <dirent.h> +#include <sys/stat.h> + +#include "pg_backup_archiver.h" +#include "compress_io.h" + +typedef struct +{ + /* + * Our archive location. This is basically what the user specified as his + * backup file but of course here it is a directory. + */ + char *directory; + + cfp *dataFH; /* currently open data file */ + + cfp *blobsTocFH; /* file handle for blobs.toc */ +} lclContext; + +typedef struct +{ + char *filename; /* filename excluding the directory (basename) */ +} lclTocEntry; + +static const char *modulename = gettext_noop("directory archiver"); + +/* prototypes for private functions */ +static void _ArchiveEntry(ArchiveHandle *AH, TocEntry *te); +static void _StartData(ArchiveHandle *AH, TocEntry *te); +static void _EndData(ArchiveHandle *AH, TocEntry *te); +static size_t _WriteData(ArchiveHandle *AH, const void *data, size_t dLen); +static int _WriteByte(ArchiveHandle *AH, const int i); +static int _ReadByte(ArchiveHandle *); +static size_t _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len); +static size_t _ReadBuf(ArchiveHandle *AH, void *buf, size_t len); +static void _CloseArchive(ArchiveHandle *AH); +static void _PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt); + +static void _WriteExtraToc(ArchiveHandle *AH, TocEntry *te); +static void _ReadExtraToc(ArchiveHandle *AH, TocEntry *te); +static void _PrintExtraToc(ArchiveHandle *AH, TocEntry *te); + +static void _StartBlobs(ArchiveHandle *AH, TocEntry *te); +static void _StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid); +static void _EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid); +static void _EndBlobs(ArchiveHandle *AH, TocEntry *te); +static void _LoadBlobs(ArchiveHandle *AH, RestoreOptions *ropt); + +static char *prependDirectory(ArchiveHandle *AH, const char *relativeFilename); + +static void createDirectory(const char *dir); + + +/* + * Init routine required by ALL formats. This is a global routine + * and should be declared in pg_backup_archiver.h + * + * Its task is to create any extra archive context (using AH->formatData), + * and to initialize the supported function pointers. + * + * It should also prepare whatever its input source is for reading/writing, + * and in the case of a read mode connection, it should load the Header & TOC. + */ +void +InitArchiveFmt_Directory(ArchiveHandle *AH) +{ + lclContext *ctx; + + /* Assuming static functions, this can be copied for each format. */ + AH->ArchiveEntryPtr = _ArchiveEntry; + AH->StartDataPtr = _StartData; + AH->WriteDataPtr = _WriteData; + AH->EndDataPtr = _EndData; + AH->WriteBytePtr = _WriteByte; + AH->ReadBytePtr = _ReadByte; + AH->WriteBufPtr = _WriteBuf; + AH->ReadBufPtr = _ReadBuf; + AH->ClosePtr = _CloseArchive; + AH->ReopenPtr = NULL; + AH->PrintTocDataPtr = _PrintTocData; + AH->ReadExtraTocPtr = _ReadExtraToc; + AH->WriteExtraTocPtr = _WriteExtraToc; + AH->PrintExtraTocPtr = _PrintExtraToc; + + AH->StartBlobsPtr = _StartBlobs; + AH->StartBlobPtr = _StartBlob; + AH->EndBlobPtr = _EndBlob; + AH->EndBlobsPtr = _EndBlobs; + + AH->ClonePtr = NULL; + AH->DeClonePtr = NULL; + + /* Set up our private context */ + ctx = (lclContext *) calloc(1, sizeof(lclContext)); + if (ctx == NULL) + die_horribly(AH, modulename, "out of memory\n"); + AH->formatData = (void *) ctx; + + ctx->dataFH = NULL; + ctx->blobsTocFH = NULL; + + /* Initialize LO buffering */ + AH->lo_buf_size = LOBBUFSIZE; + AH->lo_buf = (void *) malloc(LOBBUFSIZE); + if (AH->lo_buf == NULL) + die_horribly(AH, modulename, "out of memory\n"); + + /* + * Now open the TOC file + */ + + if (!AH->fSpec || strcmp(AH->fSpec, "") == 0) + die_horribly(AH, modulename, "no output directory specified\n"); + + ctx->directory = AH->fSpec; + + if (AH->mode == archModeWrite) + { + /* Create the directory, errors are caught there */ + createDirectory(ctx->directory); + } + else + { /* Read Mode */ + char *fname; + cfp *tocFH; + + fname = prependDirectory(AH, "toc.dat"); + + tocFH = cfopen_read(fname, PG_BINARY_R); + if (tocFH == NULL) + die_horribly(AH, modulename, + "could not open input file \"%s\": %s\n", + fname, strerror(errno)); + + ctx->dataFH = tocFH; + /* + * The TOC of a directory format dump shares the format code of + * the tar format. + */ + AH->format = archTar; + ReadHead(AH); + AH->format = archDirectory; + ReadToc(AH); + + /* Nothing else in the file, so close it again... */ + if (cfclose(tocFH) != 0) + die_horribly(AH, modulename, "could not close TOC file: %s\n", + strerror(errno)); + ctx->dataFH = NULL; + } +} + +/* + * Called by the Archiver when the dumper creates a new TOC entry. + * + * We determine the filename for this entry. +*/ +static void +_ArchiveEntry(ArchiveHandle *AH, TocEntry *te) +{ + lclTocEntry *tctx; + char fn[MAXPGPATH]; + + tctx = (lclTocEntry *) calloc(1, sizeof(lclTocEntry)); + if (!tctx) + die_horribly(AH, modulename, "out of memory\n"); + if (te->dataDumper) + { + snprintf(fn, MAXPGPATH, "%d.dat", te->dumpId); + tctx->filename = strdup(fn); + } + else if (strcmp(te->desc, "BLOBS") == 0) + tctx->filename = strdup("blobs.toc"); + else + tctx->filename = NULL; + + te->formatData = (void *) tctx; +} + +/* + * Called by the Archiver to save any extra format-related TOC entry + * data. + * + * Use the Archiver routines to write data - they are non-endian, and + * maintain other important file information. + */ +static void +_WriteExtraToc(ArchiveHandle *AH, TocEntry *te) +{ + lclTocEntry *tctx = (lclTocEntry *) te->formatData; + + /* + * A dumpable object has set tctx->filename, any other object has not. + * (see _ArchiveEntry). + */ + if (tctx->filename) + WriteStr(AH, tctx->filename); + else + WriteStr(AH, ""); +} + +/* + * Called by the Archiver to read any extra format-related TOC data. + * + * Needs to match the order defined in _WriteExtraToc, and should also + * use the Archiver input routines. + */ +static void +_ReadExtraToc(ArchiveHandle *AH, TocEntry *te) +{ + lclTocEntry *tctx = (lclTocEntry *) te->formatData; + + if (tctx == NULL) + { + tctx = (lclTocEntry *) calloc(1, sizeof(lclTocEntry)); + if (!tctx) + die_horribly(AH, modulename, "out of memory\n"); + te->formatData = (void *) tctx; + } + + tctx->filename = ReadStr(AH); + if (strlen(tctx->filename) == 0) + { + free(tctx->filename); + tctx->filename = NULL; + } +} + +/* + * Called by the Archiver when restoring an archive to output a comment + * that includes useful information about the TOC entry. + */ +static void +_PrintExtraToc(ArchiveHandle *AH, TocEntry *te) +{ + lclTocEntry *tctx = (lclTocEntry *) te->formatData; + + if (AH->public.verbose && tctx->filename) + ahprintf(AH, "-- File: %s\n", tctx->filename); +} + +/* + * Called by the archiver when saving TABLE DATA (not schema). This routine + * should save whatever format-specific information is needed to read + * the archive back. + * + * It is called just prior to the dumper's 'DataDumper' routine being called. + * + * We create the data file for writing. + */ +static void +_StartData(ArchiveHandle *AH, TocEntry *te) +{ + lclTocEntry *tctx = (lclTocEntry *) te->formatData; + lclContext *ctx = (lclContext *) AH->formatData; + char *fname; + + fname = prependDirectory(AH, tctx->filename); + + ctx->dataFH = cfopen_write(fname, PG_BINARY_W, AH->compression); + if (ctx->dataFH == NULL) + die_horribly(AH, modulename, "could not open output file \"%s\": %s\n", + fname, strerror(errno)); +} + +/* + * Called by archiver when dumper calls WriteData. This routine is + * called for both BLOB and TABLE data; it is the responsibility of + * the format to manage each kind of data using StartBlob/StartData. + * + * It should only be called from within a DataDumper routine. + * + * We write the data to the open data file. + */ +static size_t +_WriteData(ArchiveHandle *AH, const void *data, size_t dLen) +{ + lclContext *ctx = (lclContext *) AH->formatData; + + if (dLen == 0) + return 0; + + return cfwrite(data, dLen, ctx->dataFH); +} + +/* + * Called by the archiver when a dumper's 'DataDumper' routine has + * finished. + * + * We close the data file. + */ +static void +_EndData(ArchiveHandle *AH, TocEntry *te) +{ + lclContext *ctx = (lclContext *) AH->formatData; + + /* Close the file */ + cfclose(ctx->dataFH); + + ctx->dataFH = NULL; +} + +/* + * Print data for a given file (can be a BLOB as well) + */ +static void +_PrintFileData(ArchiveHandle *AH, char *filename, RestoreOptions *ropt) +{ + size_t cnt; + char *buf; + size_t buflen; + cfp *cfp; + + if (!filename) + return; + + cfp = cfopen_read(filename, PG_BINARY_R); + if (!cfp) + die_horribly(AH, modulename, "could not open input file \"%s\": %s\n", + filename, strerror(errno)); + + buf = malloc(ZLIB_OUT_SIZE); + if (buf == NULL) + die_horribly(NULL, modulename, "out of memory\n"); + buflen = ZLIB_OUT_SIZE; + + while ((cnt = cfread(buf, buflen, cfp))) + ahwrite(buf, 1, cnt, AH); + + free(buf); +} + +/* + * Print data for a given TOC entry +*/ +static void +_PrintTocData(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt) +{ + lclTocEntry *tctx = (lclTocEntry *) te->formatData; + + if (!tctx->filename) + return; + + if (strcmp(te->desc, "BLOBS") == 0) + _LoadBlobs(AH, ropt); + else + { + char *fname = prependDirectory(AH, tctx->filename); + _PrintFileData(AH, fname, ropt); + } +} + +static void +_LoadBlobs(ArchiveHandle *AH, RestoreOptions *ropt) +{ + Oid oid; + lclContext *ctx = (lclContext *) AH->formatData; + char *fname; + char line[MAXPGPATH]; + + StartRestoreBlobs(AH); + + fname = prependDirectory(AH, "blobs.toc"); + + ctx->blobsTocFH = cfopen_read(fname, PG_BINARY_R); + + if (ctx->blobsTocFH == NULL) + die_horribly(AH, modulename, "could not open large object TOC file \"%s\" for input: %s\n", + fname, strerror(errno)); + + /* Read the blobs TOC file line-by-line, and process each blob */ + while ((cfgets(ctx->blobsTocFH, line, MAXPGPATH)) != NULL) + { + char fname[MAXPGPATH]; + char path[MAXPGPATH]; + + if (sscanf(line, "%u %s\n", &oid, fname) != 2) + die_horribly(AH, modulename, "invalid line in large object TOC file: %s\n", + line); + + StartRestoreBlob(AH, oid, ropt->dropSchema); + snprintf(path, MAXPGPATH, "%s/%s", ctx->directory, fname); + _PrintFileData(AH, path, ropt); + EndRestoreBlob(AH, oid); + } + if (!cfeof(ctx->blobsTocFH)) + die_horribly(AH, modulename, "error reading large object TOC file \"%s\"\n", + fname); + + if (cfclose(ctx->blobsTocFH) != 0) + die_horribly(AH, modulename, "could not close large object TOC file \"%s\": %s\n", + fname, strerror(errno)); + + ctx->blobsTocFH = NULL; + + EndRestoreBlobs(AH); +} + + +/* + * Write a byte of data to the archive. + * Called by the archiver to do integer & byte output to the archive. + * These routines are only used to read & write the headers & TOC. + */ +static int +_WriteByte(ArchiveHandle *AH, const int i) +{ + unsigned char c = (unsigned char) i; + lclContext *ctx = (lclContext *) AH->formatData; + + if (cfwrite(&c, 1, ctx->dataFH) != 1) + die_horribly(AH, modulename, "could not write byte\n"); + + return 1; +} + +/* + * Read a byte of data from the archive. + * Called by the archiver to read bytes & integers from the archive. + * These routines are only used to read & write headers & TOC. + * EOF should be treated as a fatal error. + */ +static int +_ReadByte(ArchiveHandle *AH) +{ + lclContext *ctx = (lclContext *) AH->formatData; + int res; + + res = cfgetc(ctx->dataFH); + if (res == EOF) + die_horribly(AH, modulename, "unexpected end of file\n"); + + return res; +} + +/* + * Write a buffer of data to the archive. + * Called by the archiver to write a block of bytes to the TOC or a data file. + */ +static size_t +_WriteBuf(ArchiveHandle *AH, const void *buf, size_t len) +{ + lclContext *ctx = (lclContext *) AH->formatData; + size_t res; + + res = cfwrite(buf, len, ctx->dataFH); + if (res != len) + die_horribly(AH, modulename, "could not write to output file: %s\n", + strerror(errno)); + + return res; +} + +/* + * Read a block of bytes from the archive. + * + * Called by the archiver to read a block of bytes from the archive + */ +static size_t +_ReadBuf(ArchiveHandle *AH, void *buf, size_t len) +{ + lclContext *ctx = (lclContext *) AH->formatData; + size_t res; + + res = cfread(buf, len, ctx->dataFH); + + return res; +} + +/* + * Close the archive. + * + * When writing the archive, this is the routine that actually starts + * the process of saving it to files. No data should be written prior + * to this point, since the user could sort the TOC after creating it. + * + * If an archive is to be written, this routine must call: + * WriteHead to save the archive header + * WriteToc to save the TOC entries + * WriteDataChunks to save all DATA & BLOBs. + */ +static void +_CloseArchive(ArchiveHandle *AH) +{ + lclContext *ctx = (lclContext *) AH->formatData; + if (AH->mode == archModeWrite) + { + cfp *tocFH; + char *fname = prependDirectory(AH, "toc.dat"); + + /* The TOC is always created uncompressed */ + tocFH = cfopen_write(fname, PG_BINARY_W, 0); + if (tocFH == NULL) + die_horribly(AH, modulename, "could not open output file \"%s\": %s\n", + fname, strerror(errno)); + ctx->dataFH = tocFH; + /* + * Write 'tar' in the format field of the toc.dat file. The directory + * is compatible with 'tar', so there's no point having a different + * format code for it. + */ + AH->format = archTar; + WriteHead(AH); + AH->format = archDirectory; + WriteToc(AH); + if (cfclose(tocFH) != 0) + die_horribly(AH, modulename, "could not close TOC file: %s\n", + strerror(errno)); + WriteDataChunks(AH); + } + AH->FH = NULL; +} + + +/* + * BLOB support + */ + +/* + * Called by the archiver when starting to save all BLOB DATA (not schema). + * It is called just prior to the dumper's DataDumper routine. + * + * We open the large object TOC file here, so that we can append a line to + * it for each blob. + */ +static void +_StartBlobs(ArchiveHandle *AH, TocEntry *te) +{ + lclContext *ctx = (lclContext *) AH->formatData; + char *fname; + + fname = prependDirectory(AH, "blobs.toc"); + + /* The blob TOC file is never compressed */ + ctx->blobsTocFH = cfopen_write(fname, "ab", 0); + if (ctx->blobsTocFH == NULL) + die_horribly(AH, modulename, "could not open output file \"%s\": %s\n", + fname, strerror(errno)); +} + +/* + * Called by the archiver when we're about to start dumping a blob. + * + * We create a file to write the blob to. + */ +static void +_StartBlob(ArchiveHandle *AH, TocEntry *te, Oid oid) +{ + lclContext *ctx = (lclContext *) AH->formatData; + char fname[MAXPGPATH]; + + snprintf(fname, MAXPGPATH, "%s/blob_%u.dat", ctx->directory, oid); + + ctx->dataFH = cfopen_write(fname, PG_BINARY_W, AH->compression); + + if (ctx->dataFH == NULL) + die_horribly(AH, modulename, "could not open output file \"%s\": %s\n", + fname, strerror(errno)); +} + +/* + * Called by the archiver when the dumper is finished writing a blob. + * + * We close the blob file and write an entry to the blob TOC file for it. + */ +static void +_EndBlob(ArchiveHandle *AH, TocEntry *te, Oid oid) +{ + lclContext *ctx = (lclContext *) AH->formatData; + char buf[50]; + int len; + + /* Close the BLOB data file itself */ + cfclose(ctx->dataFH); + ctx->dataFH = NULL; + + /* register the blob in blobs.toc */ + len = snprintf(buf, sizeof(buf), "%u blob_%u.dat\n", oid, oid); + if (cfwrite(buf, len, ctx->blobsTocFH) != len) + die_horribly(AH, modulename, "could not write to blobs TOC file\n"); +} + +/* + * Called by the archiver when finishing saving all BLOB DATA. + * + * We close the blobs TOC file. + */ +static void +_EndBlobs(ArchiveHandle *AH, TocEntry *te) +{ + lclContext *ctx = (lclContext *) AH->formatData; + + cfclose(ctx->blobsTocFH); + ctx->blobsTocFH = NULL; +} + +static void +createDirectory(const char *dir) +{ + struct stat st; + + /* the directory must not exist yet. */ + if (stat(dir, &st) == 0) + { + if (S_ISDIR(st.st_mode)) + die_horribly(NULL, modulename, + "cannot create directory %s, it exists already\n", + dir); + else + die_horribly(NULL, modulename, + "cannot create directory %s, a file with this name " + "exists already\n", dir); + } + + /* + * Now we create the directory. Note that for some race condition we could + * also run into the situation that the directory has been created just + * between our two calls. + */ + if (mkdir(dir, 0700) < 0) + die_horribly(NULL, modulename, "could not create directory %s: %s", + dir, strerror(errno)); +} + + +static char * +prependDirectory(ArchiveHandle *AH, const char *relativeFilename) +{ + lclContext *ctx = (lclContext *) AH->formatData; + static char buf[MAXPGPATH]; + char *dname; + + dname = ctx->directory; + + if (strlen(dname) + 1 + strlen(relativeFilename) + 1 > MAXPGPATH) + die_horribly(AH, modulename, "path name too long: %s", dname); + + strcpy(buf, dname); + strcat(buf, "/"); + strcat(buf, relativeFilename); + + return buf; +} diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c index 006f7dab72e..d0f6a589b32 100644 --- a/src/bin/pg_dump/pg_backup_tar.c +++ b/src/bin/pg_dump/pg_backup_tar.c @@ -4,6 +4,10 @@ * * This file is copied from the 'files' format file, but dumps data into * one temp file then sends it to the output TAR archive. + * + * NOTE: If you untar the created 'tar' file, the resulting files are + * compatible with the 'directory' format. Please keep the two formats in + * sync. * * See the headers to pg_backup_files & pg_restore for more details. * @@ -167,7 +171,7 @@ InitArchiveFmt_Tar(ArchiveHandle *AH) die_horribly(AH, modulename, "out of memory\n"); /* - * Now open the TOC file + * Now open the tar file, and load the TOC if we're in read mode. */ if (AH->mode == archModeWrite) { diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 40b414b3ecd..e844b5b0624 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -138,6 +138,7 @@ static int no_unlogged_table_data = 0; static void help(const char *progname); +static ArchiveFormat parseArchiveFormat(const char *format, ArchiveMode *mode); static void expand_schema_name_patterns(SimpleStringList *patterns, SimpleOidList *oids); static void expand_table_name_patterns(SimpleStringList *patterns, @@ -267,6 +268,8 @@ main(int argc, char **argv) int my_version; int optindex; RestoreOptions *ropt; + ArchiveFormat archiveFormat = archUnknown; + ArchiveMode archiveMode; static int disable_triggers = 0; static int outputNoTablespaces = 0; @@ -539,36 +542,31 @@ main(int argc, char **argv) exit(1); } - /* open the output file */ - if (pg_strcasecmp(format, "a") == 0 || pg_strcasecmp(format, "append") == 0) - { - /* This is used by pg_dumpall, and is not documented */ - plainText = 1; - g_fout = CreateArchive(filename, archNull, 0, archModeAppend); - } - else if (pg_strcasecmp(format, "c") == 0 || pg_strcasecmp(format, "custom") == 0) - g_fout = CreateArchive(filename, archCustom, compressLevel, archModeWrite); - else if (pg_strcasecmp(format, "f") == 0 || pg_strcasecmp(format, "file") == 0) - { - /* - * Dump files into the current directory; for demonstration only, not - * documented. - */ - g_fout = CreateArchive(filename, archFiles, compressLevel, archModeWrite); - } - else if (pg_strcasecmp(format, "p") == 0 || pg_strcasecmp(format, "plain") == 0) - { + archiveFormat = parseArchiveFormat(format, &archiveMode); + + /* archiveFormat specific setup */ + if (archiveFormat == archNull) plainText = 1; - g_fout = CreateArchive(filename, archNull, 0, archModeWrite); - } - else if (pg_strcasecmp(format, "t") == 0 || pg_strcasecmp(format, "tar") == 0) - g_fout = CreateArchive(filename, archTar, compressLevel, archModeWrite); - else + + /* + * Ignore compression level for plain format. XXX: This is a bit + * inconsistent, tar-format throws an error instead. + */ + if (archiveFormat == archNull) + compressLevel = 0; + + /* Custom and directory formats are compressed by default */ + if (compressLevel == -1) { - write_msg(NULL, "invalid output format \"%s\" specified\n", format); - exit(1); + if (archiveFormat == archCustom || archiveFormat == archDirectory) + compressLevel = Z_DEFAULT_COMPRESSION; + else + compressLevel = 0; } + /* open the output file */ + g_fout = CreateArchive(filename, archiveFormat, compressLevel, archiveMode); + if (g_fout == NULL) { write_msg(NULL, "could not open output file \"%s\" for writing\n", filename); @@ -835,8 +833,8 @@ help(const char *progname) printf(_(" %s [OPTION]... [DBNAME]\n"), progname); printf(_("\nGeneral options:\n")); - printf(_(" -f, --file=FILENAME output file name\n")); - printf(_(" -F, --format=c|t|p output file format (custom, tar, plain text)\n")); + printf(_(" -f, --file=OUTPUT output file or directory name\n")); + printf(_(" -F, --format=c|d|t|p output file format (custom, directory, tar, plain text)\n")); printf(_(" -v, --verbose verbose mode\n")); printf(_(" -Z, --compress=0-9 compression level for compressed formats\n")); printf(_(" --lock-wait-timeout=TIMEOUT fail after waiting TIMEOUT for a table lock\n")); @@ -894,6 +892,49 @@ exit_nicely(void) exit(1); } +static ArchiveFormat +parseArchiveFormat(const char *format, ArchiveMode *mode) +{ + ArchiveFormat archiveFormat; + + *mode = archModeWrite; + + if (pg_strcasecmp(format, "a") == 0 || pg_strcasecmp(format, "append") == 0) + { + /* This is used by pg_dumpall, and is not documented */ + archiveFormat = archNull; + *mode = archModeAppend; + } + else if (pg_strcasecmp(format, "c") == 0) + archiveFormat = archCustom; + else if (pg_strcasecmp(format, "custom") == 0) + archiveFormat = archCustom; + else if (pg_strcasecmp(format, "d") == 0) + archiveFormat = archDirectory; + else if (pg_strcasecmp(format, "directory") == 0) + archiveFormat = archDirectory; + else if (pg_strcasecmp(format, "f") == 0 || pg_strcasecmp(format, "file") == 0) + /* + * Dump files into the current directory; for demonstration only, not + * documented. + */ + archiveFormat = archFiles; + else if (pg_strcasecmp(format, "p") == 0) + archiveFormat = archNull; + else if (pg_strcasecmp(format, "plain") == 0) + archiveFormat = archNull; + else if (pg_strcasecmp(format, "t") == 0) + archiveFormat = archTar; + else if (pg_strcasecmp(format, "tar") == 0) + archiveFormat = archTar; + else + { + write_msg(NULL, "invalid output format \"%s\" specified\n", format); + exit(1); + } + return archiveFormat; +} + /* * Find the OIDs of all schemas matching the given list of patterns, * and append them to the given OID list. diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index 1ddba727515..37793ad18fc 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -352,6 +352,11 @@ main(int argc, char **argv) opts->format = archCustom; break; + case 'd': + case 'D': + opts->format = archDirectory; + break; + case 'f': case 'F': opts->format = archFiles; @@ -363,7 +368,7 @@ main(int argc, char **argv) break; default: - write_msg(NULL, "unrecognized archive format \"%s\"; please specify \"c\" or \"t\"\n", + write_msg(NULL, "unrecognized archive format \"%s\"; please specify \"c\", \"d\" or \"t\"\n", opts->formatName); exit(1); } @@ -418,7 +423,7 @@ usage(const char *progname) printf(_("\nGeneral options:\n")); printf(_(" -d, --dbname=NAME connect to database name\n")); printf(_(" -f, --file=FILENAME output file name\n")); - printf(_(" -F, --format=c|t backup file format (should be automatic)\n")); + printf(_(" -F, --format=c|d|t backup file format (should be automatic)\n")); printf(_(" -l, --list print summarized TOC of the archive\n")); printf(_(" -v, --verbose verbose mode\n")); printf(_(" --help show this help, then exit\n")); -- GitLab