diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 214cc7f06964dbf8c3af866a5b27949bdb37663e..c6295acf7237a23947202302a1298ce135065f2c 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -6,7 +6,7 @@ * Copyright (c) 1994, Regents of the University of California * * - * $Id: nodeHash.c,v 1.38 1999/07/17 20:16:58 momjian Exp $ + * $Id: nodeHash.c,v 1.39 1999/10/13 15:02:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -75,12 +75,7 @@ ExecHash(Hash *node) * ---------------- */ for (i = 0; i < nbatch; i++) - { - File tfile = OpenTemporaryFile(); - - Assert(tfile >= 0); - hashtable->innerBatchFile[i] = BufFileCreate(tfile); - } + hashtable->innerBatchFile[i] = BufFileCreateTemp(); } /* ---------------- diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 439b8634cda1eb81a772129394634903f84cd729..ffda9723182a7b04594b223f642cd2c497f3911f 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.26 1999/07/17 20:16:58 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.27 1999/10/13 15:02:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -129,12 +129,7 @@ ExecHashJoin(HashJoin *node) * ---------------- */ for (i = 0; i < hashtable->nbatch; i++) - { - File tfile = OpenTemporaryFile(); - - Assert(tfile >= 0); - hashtable->outerBatchFile[i] = BufFileCreate(tfile); - } + hashtable->outerBatchFile[i] = BufFileCreateTemp(); } else if (hashtable == NULL) return NULL; @@ -551,13 +546,12 @@ ExecHashJoinNewBatch(HashJoinState *hjstate) * Rewind inner and outer batch files for this batch, so that we can * start reading them. */ - if (BufFileSeek(hashtable->outerBatchFile[newbatch - 1], 0L, - SEEK_SET) != 0L) + if (BufFileSeek(hashtable->outerBatchFile[newbatch - 1], 0, 0L, SEEK_SET)) elog(ERROR, "Failed to rewind hash temp file"); innerFile = hashtable->innerBatchFile[newbatch - 1]; - if (BufFileSeek(innerFile, 0L, SEEK_SET) != 0L) + if (BufFileSeek(innerFile, 0, 0L, SEEK_SET)) elog(ERROR, "Failed to rewind hash temp file"); /* diff --git a/src/backend/storage/file/Makefile b/src/backend/storage/file/Makefile index 9f321ff57fb6f8d0d14372fba92af17532436a5b..766a0c1d1c39763c2e15379a8779605fd88b654b 100644 --- a/src/backend/storage/file/Makefile +++ b/src/backend/storage/file/Makefile @@ -4,7 +4,7 @@ # Makefile for storage/file # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/storage/file/Makefile,v 1.5 1998/04/06 00:25:05 momjian Exp $ +# $Header: /cvsroot/pgsql/src/backend/storage/file/Makefile,v 1.6 1999/10/13 15:02:29 tgl Exp $ # #------------------------------------------------------------------------- @@ -13,7 +13,7 @@ include ../../../Makefile.global CFLAGS += -I../.. -OBJS = fd.o +OBJS = fd.o buffile.o all: SUBSYS.o diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c new file mode 100644 index 0000000000000000000000000000000000000000..cd7da900b4f08afcdcf6b71c08325dceaad6a203 --- /dev/null +++ b/src/backend/storage/file/buffile.c @@ -0,0 +1,556 @@ +/*------------------------------------------------------------------------- + * + * buffile.c + * Management of large buffered files, primarily temporary files. + * + * Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/storage/file/buffile.c,v 1.1 1999/10/13 15:02:29 tgl Exp $ + * + * NOTES: + * + * BufFiles provide a very incomplete emulation of stdio atop virtual Files + * (as managed by fd.c). Currently, we only support the buffered-I/O + * aspect of stdio: a read or write of the low-level File occurs only + * when the buffer is filled or emptied. This is an even bigger win + * for virtual Files than for ordinary kernel files, since reducing the + * frequency with which a virtual File is touched reduces "thrashing" + * of opening/closing file descriptors. + * + * Note that BufFile structs are allocated with palloc(), and therefore + * will go away automatically at transaction end. If the underlying + * virtual File is made with OpenTemporaryFile, then all resources for + * the file are certain to be cleaned up even if processing is aborted + * by elog(ERROR). To avoid confusion, the caller should take care that + * all calls for a single BufFile are made in the same palloc context. + * + * BufFile also supports temporary files that exceed the OS file size limit + * (by opening multiple fd.c temporary files). This is an essential feature + * for sorts and hashjoins on large amounts of data. It is possible to have + * more than one BufFile reading/writing the same temp file, although the + * caller is responsible for avoiding ill effects from buffer overlap when + * this is done. + *------------------------------------------------------------------------- + */ + +#include <errno.h> + +#include "postgres.h" + +#include "storage/buffile.h" + +/* + * The maximum safe file size is presumed to be RELSEG_SIZE * BLCKSZ. + * Note we adhere to this limit whether or not LET_OS_MANAGE_FILESIZE + * is defined, although md.c ignores it when that symbol is defined. + */ +#define MAX_PHYSICAL_FILESIZE (RELSEG_SIZE * BLCKSZ) + +/* + * To handle multiple BufFiles on a single logical temp file, we use this + * data structure representing a logical file (which can be made up of + * multiple physical files to get around the OS file size limit). + */ +typedef struct LogicalFile +{ + int refCount; /* number of BufFiles using me */ + bool isTemp; /* can only add files if this is TRUE */ + int numFiles; /* number of physical files in set */ + /* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */ + + File *files; /* palloc'd array with numFiles entries */ + long *offsets; /* palloc'd array with numFiles entries */ + /* offsets[i] is the current seek position of files[i]. We use this + * to avoid making redundant FileSeek calls. + */ +} LogicalFile; + +/* + * A single file buffer looks like this. + */ +struct BufFile +{ + LogicalFile *logFile; /* the underlying LogicalFile */ + bool dirty; /* does buffer need to be written? */ + /* + * "current pos" is position of start of buffer within LogicalFile. + * Position as seen by user of BufFile is (curFile, curOffset + pos). + */ + int curFile; /* file index (0..n) part of current pos */ + int curOffset; /* offset part of current pos */ + int pos; /* next read/write position in buffer */ + int nbytes; /* total # of valid bytes in buffer */ + char buffer[BLCKSZ]; +}; + +static LogicalFile *makeLogicalFile(File firstfile); +static void extendLogicalFile(LogicalFile *file); +static void deleteLogicalFile(LogicalFile *file); +static void BufFileLoadBuffer(BufFile *file); +static void BufFileDumpBuffer(BufFile *file); +static int BufFileFlush(BufFile *file); + + +/* + * Create a LogicalFile with one component file and refcount 1. + * NOTE: caller must set isTemp true if appropriate. + */ +static LogicalFile * +makeLogicalFile(File firstfile) +{ + LogicalFile *file = (LogicalFile *) palloc(sizeof(LogicalFile)); + + file->refCount = 1; + file->isTemp = false; + file->numFiles = 1; + file->files = (File *) palloc(sizeof(File)); + file->files[0] = firstfile; + file->offsets = (long *) palloc(sizeof(long)); + file->offsets[0] = 0L; + + return file; +} + +/* + * Add another component temp file. + */ +static void +extendLogicalFile(LogicalFile *file) +{ + File pfile; + + Assert(file->isTemp); + pfile = OpenTemporaryFile(); + Assert(pfile >= 0); + + file->files = (File *) repalloc(file->files, + (file->numFiles+1) * sizeof(File)); + file->offsets = (long *) repalloc(file->offsets, + (file->numFiles+1) * sizeof(long)); + file->files[file->numFiles] = pfile; + file->offsets[file->numFiles] = 0L; + file->numFiles++; +} + +/* + * Close and delete a LogicalFile when its refCount has gone to zero. + */ +static void +deleteLogicalFile(LogicalFile *file) +{ + int i; + + for (i = 0; i < file->numFiles; i++) + FileClose(file->files[i]); + pfree(file->files); + pfree(file->offsets); + pfree(file); +} + +/* + * Create a BufFile for a new temporary file (which will expand to become + * multiple temporary files if more than MAX_PHYSICAL_FILESIZE bytes are + * written to it). + */ +BufFile * +BufFileCreateTemp(void) +{ + BufFile *bfile = (BufFile *) palloc(sizeof(BufFile)); + File pfile; + LogicalFile *lfile; + + pfile = OpenTemporaryFile(); + Assert(pfile >= 0); + + lfile = makeLogicalFile(pfile); + lfile->isTemp = true; + + bfile->logFile = lfile; + bfile->dirty = false; + bfile->curFile = 0; + bfile->curOffset = 0L; + bfile->pos = 0; + bfile->nbytes = 0; + + return bfile; +} + +/* + * Create a BufFile and attach it to an already-opened virtual File. + * + * This is comparable to fdopen() in stdio. This is the only way at present + * to attach a BufFile to a non-temporary file. Note that BufFiles created + * in this way CANNOT be expanded into multiple files. + */ +BufFile * +BufFileCreate(File file) +{ + BufFile *bfile = (BufFile *) palloc(sizeof(BufFile)); + LogicalFile *lfile; + + lfile = makeLogicalFile(file); + + bfile->logFile = lfile; + bfile->dirty = false; + bfile->curFile = 0; + bfile->curOffset = 0L; + bfile->pos = 0; + bfile->nbytes = 0; + + return bfile; +} + +/* + * Create an additional BufFile accessing the same underlying file as an + * existing BufFile. This is useful for having multiple read/write access + * positions in a single temporary file. Note the caller is responsible + * for avoiding trouble due to overlapping buffer positions! (Caller may + * assume that buffer size is BLCKSZ...) + */ +BufFile * +BufFileReaccess(BufFile *file) +{ + BufFile *bfile = (BufFile *) palloc(sizeof(BufFile)); + + bfile->logFile = file->logFile; + bfile->logFile->refCount++; + bfile->dirty = false; + bfile->curFile = 0; + bfile->curOffset = 0L; + bfile->pos = 0; + bfile->nbytes = 0; + + return bfile; +} + +/* + * Close a BufFile + * + * Like fclose(), this also implicitly FileCloses the underlying File. + */ +void +BufFileClose(BufFile *file) +{ + /* flush any unwritten data */ + BufFileFlush(file); + /* close the underlying (with delete if it's a temp file) */ + if (--(file->logFile->refCount) <= 0) + deleteLogicalFile(file->logFile); + /* release the buffer space */ + pfree(file); +} + +/* BufFileLoadBuffer + * + * Load some data into buffer, if possible, starting from curOffset. + * At call, must have dirty = false, pos and nbytes = 0. + * On exit, nbytes is number of bytes loaded. + */ +static void +BufFileLoadBuffer(BufFile *file) +{ + LogicalFile *lfile = file->logFile; + File thisfile; + + /* + * Advance to next component file if necessary and possible. + * + * This path can only be taken if there is more than one component, + * so it won't interfere with reading a non-temp file that is over + * MAX_PHYSICAL_FILESIZE. + */ + if (file->curOffset >= MAX_PHYSICAL_FILESIZE && + file->curFile+1 < lfile->numFiles) + { + file->curFile++; + file->curOffset = 0L; + } + thisfile = lfile->files[file->curFile]; + /* + * May need to reposition physical file, if more than one BufFile + * is using it. + */ + if (file->curOffset != lfile->offsets[file->curFile]) + { + if (FileSeek(thisfile, file->curOffset, SEEK_SET) != file->curOffset) + return; /* seek failed, read nothing */ + lfile->offsets[file->curFile] = file->curOffset; + } + file->nbytes = FileRead(thisfile, file->buffer, sizeof(file->buffer)); + if (file->nbytes < 0) + file->nbytes = 0; + lfile->offsets[file->curFile] += file->nbytes; + /* we choose not to advance curOffset here */ +} + +/* BufFileDumpBuffer + * + * Dump buffer contents starting at curOffset. + * At call, should have dirty = true, nbytes > 0. + * On exit, dirty is cleared if successful write, and curOffset is advanced. + */ +static void +BufFileDumpBuffer(BufFile *file) +{ + LogicalFile *lfile = file->logFile; + int wpos = 0; + int bytestowrite; + File thisfile; + + /* + * Unlike BufFileLoadBuffer, we must dump the whole buffer even if + * it crosses a component-file boundary; so we need a loop. + */ + while (wpos < file->nbytes) + { + /* + * Advance to next component file if necessary and possible. + */ + if (file->curOffset >= MAX_PHYSICAL_FILESIZE && lfile->isTemp) + { + while (file->curFile+1 >= lfile->numFiles) + extendLogicalFile(lfile); + file->curFile++; + file->curOffset = 0L; + } + /* + * Enforce per-file size limit only for temp files, else just try + * to write as much as asked... + */ + bytestowrite = file->nbytes - wpos; + if (lfile->isTemp) + { + long availbytes = MAX_PHYSICAL_FILESIZE - file->curOffset; + + if ((long) bytestowrite > availbytes) + bytestowrite = (int) availbytes; + } + thisfile = lfile->files[file->curFile]; + /* + * May need to reposition physical file, if more than one BufFile + * is using it. + */ + if (file->curOffset != lfile->offsets[file->curFile]) + { + if (FileSeek(thisfile, file->curOffset, SEEK_SET) != file->curOffset) + return; /* seek failed, give up */ + lfile->offsets[file->curFile] = file->curOffset; + } + bytestowrite = FileWrite(thisfile, file->buffer, bytestowrite); + if (bytestowrite <= 0) + return; /* failed to write */ + lfile->offsets[file->curFile] += bytestowrite; + file->curOffset += bytestowrite; + wpos += bytestowrite; + } + file->dirty = false; + /* + * At this point, curOffset has been advanced to the end of the buffer, + * ie, its original value + nbytes. We need to make it point to the + * logical file position, ie, original value + pos, in case that is less + * (as could happen due to a small backwards seek in a dirty buffer!) + */ + file->curOffset -= (file->nbytes - file->pos); + if (file->curOffset < 0) /* handle possible segment crossing */ + { + file->curFile--; + Assert(file->curFile >= 0); + file->curOffset += MAX_PHYSICAL_FILESIZE; + } + /* Now we can set the buffer empty without changing the logical position */ + file->pos = 0; + file->nbytes = 0; +} + +/* BufFileRead + * + * Like fread() except we assume 1-byte element size. + */ +size_t +BufFileRead(BufFile *file, void *ptr, size_t size) +{ + size_t nread = 0; + size_t nthistime; + + if (file->dirty) + { + if (BufFileFlush(file) != 0) + return 0; /* could not flush... */ + Assert(! file->dirty); + } + + while (size > 0) + { + if (file->pos >= file->nbytes) + { + /* Try to load more data into buffer. */ + file->curOffset += file->pos; + file->pos = 0; + file->nbytes = 0; + BufFileLoadBuffer(file); + if (file->nbytes <= 0) + break; /* no more data available */ + } + + nthistime = file->nbytes - file->pos; + if (nthistime > size) + nthistime = size; + Assert(nthistime > 0); + + memcpy(ptr, file->buffer + file->pos, nthistime); + + file->pos += nthistime; + ptr = (void *) ((char *) ptr + nthistime); + size -= nthistime; + nread += nthistime; + } + + return nread; +} + +/* BufFileWrite + * + * Like fwrite() except we assume 1-byte element size. + */ +size_t +BufFileWrite(BufFile *file, void *ptr, size_t size) +{ + size_t nwritten = 0; + size_t nthistime; + + while (size > 0) + { + if (file->pos >= BLCKSZ) + { + /* Buffer full, dump it out */ + if (file->dirty) + { + BufFileDumpBuffer(file); + if (file->dirty) + break; /* I/O error */ + } + else + { + /* Hmm, went directly from reading to writing? */ + file->curOffset += file->pos; + file->pos = 0; + file->nbytes = 0; + } + } + + nthistime = BLCKSZ - file->pos; + if (nthistime > size) + nthistime = size; + Assert(nthistime > 0); + + memcpy(file->buffer + file->pos, ptr, nthistime); + + file->dirty = true; + file->pos += nthistime; + if (file->nbytes < file->pos) + file->nbytes = file->pos; + ptr = (void *) ((char *) ptr + nthistime); + size -= nthistime; + nwritten += nthistime; + } + + return nwritten; +} + +/* BufFileFlush + * + * Like fflush() + */ +static int +BufFileFlush(BufFile *file) +{ + if (file->dirty) + { + BufFileDumpBuffer(file); + if (file->dirty) + return EOF; + } + + return 0; +} + +/* BufFileSeek + * + * Like fseek(). Result is 0 if OK, EOF if not. + */ +int +BufFileSeek(BufFile *file, int fileno, long offset, int whence) +{ + int newFile; + long newOffset; + switch (whence) + { + case SEEK_SET: + if (fileno < 0 || fileno >= file->logFile->numFiles || + offset < 0) + return EOF; + newFile = fileno; + newOffset = offset; + break; + case SEEK_CUR: + /* + * Relative seek considers only the signed offset, ignoring fileno. + * Note that large offsets (> 1 gig) risk overflow. + */ + newFile = file->curFile; + newOffset = (file->curOffset + file->pos) + offset; + break; +#ifdef NOT_USED + case SEEK_END: + /* could be implemented, not needed currently */ + break; +#endif + default: + elog(ERROR, "BufFileSeek: invalid whence: %d", whence); + return EOF; + } + while (newOffset < 0) + { + if (--newFile < 0) + return EOF; + newOffset += MAX_PHYSICAL_FILESIZE; + } + if (file->logFile->isTemp) + { + while (newOffset > MAX_PHYSICAL_FILESIZE) + { + if (++newFile >= file->logFile->numFiles) + return EOF; + newOffset -= MAX_PHYSICAL_FILESIZE; + } + } + if (newFile == file->curFile && + newOffset >= file->curOffset && + newOffset <= file->curOffset + file->nbytes) + { + /* + * Seek is to a point within existing buffer; we can just adjust + * pos-within-buffer, without flushing buffer. Note this is OK + * whether reading or writing, but buffer remains dirty if we + * were writing. + */ + file->pos = (int) (newOffset - file->curOffset); + return 0; + } + /* Otherwise, must reposition buffer, so flush any dirty data */ + if (BufFileFlush(file) != 0) + return EOF; + file->curFile = newFile; + file->curOffset = newOffset; + file->pos = 0; + file->nbytes = 0; + return 0; +} + +extern void +BufFileTell(BufFile *file, int *fileno, long *offset) +{ + *fileno = file->curFile; + *offset = file->curOffset + file->pos; +} diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 4cdb638819e81b2be731b4d61b1da56b0dc241d8..2fce82ecfd6aaaff658ffd89392fdcaaa9af3234 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -6,7 +6,7 @@ * Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Id: fd.c,v 1.48 1999/09/27 15:47:49 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.49 1999/10/13 15:02:29 tgl Exp $ * * NOTES: * @@ -49,7 +49,6 @@ #include "miscadmin.h" #include "storage/fd.h" -bool ReleaseDataFile(void); /* * Problem: Postgres does a system(ld...) to do dynamic loading. * This will open several extra files in addition to those used by @@ -188,7 +187,6 @@ static int FileAccess(File file); static File fileNameOpenFile(FileName fileName, int fileFlags, int fileMode); static char *filepath(char *filename); static long pg_nofile(void); -static int BufFileFlush(BufFile *file); /* * pg_fsync --- same as fsync except does nothing if -F switch was given @@ -411,6 +409,9 @@ ReleaseLruFile() LruDelete(VfdCache[0].lruMoreRecently); } +/* + * Force one kernel file descriptor to be released (temporarily). + */ bool ReleaseDataFile() { @@ -506,8 +507,11 @@ FreeVfd(File file) /* filepath() * Convert given pathname to absolute. - * (Is this actually necessary, considering that we should be cd'd - * into the database directory??) + * + * (Generally, this isn't actually necessary, considering that we + * should be cd'd into the database directory. Presently it is only + * necessary to do it in "bootstrap" mode. Maybe we should change + * bootstrap mode to do the cd, and save a few cycles/bytes here.) */ static char * filepath(char *filename) @@ -851,7 +855,7 @@ FileTell(File file) #endif int -FileTruncate(File file, int offset) +FileTruncate(File file, long offset) { int returnCode; @@ -862,7 +866,7 @@ FileTruncate(File file, int offset) FileSync(file); FileAccess(file); - returnCode = ftruncate(VfdCache[file].fd, offset); + returnCode = ftruncate(VfdCache[file].fd, (size_t) offset); return returnCode; } @@ -890,18 +894,6 @@ FileSync(File file) return returnCode; } -int -FileNameUnlink(char *filename) -{ - int retval; - char *fname; - - fname = filepath(filename); - retval = unlink(fname); - pfree(fname); - return retval; -} - /* * Routines that want to use stdio (ie, FILE*) should use AllocateFile * rather than plain fopen(). This lets fd.c deal with freeing FDs if @@ -1023,186 +1015,3 @@ AtEOXact_Files(void) */ tempFileCounter = 0; } - - -/* - * Operations on BufFiles --- a very incomplete emulation of stdio - * atop virtual Files. Currently, we only support the buffered-I/O - * aspect of stdio: a read or write of the low-level File occurs only - * when the buffer is filled or emptied. This is an even bigger win - * for virtual Files than ordinary kernel files, since reducing the - * frequency with which a virtual File is touched reduces "thrashing" - * of opening/closing file descriptors. - * - * Note that BufFile structs are allocated with palloc(), and therefore - * will go away automatically at transaction end. If the underlying - * virtual File is made with OpenTemporaryFile, then all resources for - * the file are certain to be cleaned up even if processing is aborted - * by elog(ERROR). - */ - -struct BufFile -{ - File file; /* the underlying virtual File */ - bool dirty; /* does buffer need to be written? */ - int pos; /* next read/write position in buffer */ - int nbytes; /* total # of valid bytes in buffer */ - char buffer[BLCKSZ]; -}; - - -/* - * Create a BufFile and attach it to an (already opened) virtual File. - * - * This is comparable to fdopen() in stdio. - */ -BufFile * -BufFileCreate(File file) -{ - BufFile *bfile = (BufFile *) palloc(sizeof(BufFile)); - - bfile->file = file; - bfile->dirty = false; - bfile->pos = 0; - bfile->nbytes = 0; - - return bfile; -} - -/* - * Close a BufFile - * - * Like fclose(), this also implicitly FileCloses the underlying File. - */ -void -BufFileClose(BufFile *file) -{ - /* flush any unwritten data */ - BufFileFlush(file); - /* close the underlying (with delete if it's a temp file) */ - FileClose(file->file); - /* release the buffer space */ - pfree(file); -} - -/* BufFileRead - * - * Like fread() except we assume 1-byte element size. - */ -size_t -BufFileRead(BufFile *file, void *ptr, size_t size) -{ - size_t nread = 0; - size_t nthistime; - - if (file->dirty) - { - elog(NOTICE, "BufFileRead: should have flushed after writing"); - BufFileFlush(file); - } - - while (size > 0) - { - if (file->pos >= file->nbytes) - { - /* Try to load more data into buffer */ - file->pos = 0; - file->nbytes = FileRead(file->file, file->buffer, - sizeof(file->buffer)); - if (file->nbytes < 0) - file->nbytes = 0; - if (file->nbytes <= 0) - break; /* no more data available */ - } - - nthistime = file->nbytes - file->pos; - if (nthistime > size) - nthistime = size; - Assert(nthistime > 0); - - memcpy(ptr, file->buffer + file->pos, nthistime); - - file->pos += nthistime; - ptr = (void *) ((char *) ptr + nthistime); - size -= nthistime; - nread += nthistime; - } - - return nread; -} - -/* BufFileWrite - * - * Like fwrite() except we assume 1-byte element size. - */ -size_t -BufFileWrite(BufFile *file, void *ptr, size_t size) -{ - size_t nwritten = 0; - size_t nthistime; - - while (size > 0) - { - if (file->pos >= BLCKSZ) - { - /* Buffer full, dump it out */ - if (file->dirty) - { - if (FileWrite(file->file, file->buffer, file->nbytes) < 0) - break; /* I/O error */ - file->dirty = false; - } - file->pos = 0; - file->nbytes = 0; - } - - nthistime = BLCKSZ - file->pos; - if (nthistime > size) - nthistime = size; - Assert(nthistime > 0); - - memcpy(file->buffer + file->pos, ptr, nthistime); - - file->dirty = true; - file->pos += nthistime; - if (file->nbytes < file->pos) - file->nbytes = file->pos; - ptr = (void *) ((char *) ptr + nthistime); - size -= nthistime; - nwritten += nthistime; - } - - return nwritten; -} - -/* BufFileFlush - * - * Like fflush() - */ -static int -BufFileFlush(BufFile *file) -{ - if (file->dirty) - { - if (FileWrite(file->file, file->buffer, file->nbytes) < 0) - return EOF; - file->dirty = false; - } - - return 0; -} - -/* BufFileSeek - * - * Like fseek(), or really more like lseek() since the return value is - * the new file offset (or -1 in case of error). - */ -long -BufFileSeek(BufFile *file, long offset, int whence) -{ - if (BufFileFlush(file) < 0) - return -1L; - file->pos = 0; - file->nbytes = 0; - return FileSeek(file->file, offset, whence); -} diff --git a/src/backend/storage/large_object/inv_api.c b/src/backend/storage/large_object/inv_api.c index f59e99d7fbe90fd5b0156749b01ef84c7c83a053..555768a0f3e51439276b38b17d0bc5d62fb48343 100644 --- a/src/backend/storage/large_object/inv_api.c +++ b/src/backend/storage/large_object/inv_api.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/large_object/inv_api.c,v 1.59 1999/09/18 19:07:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/large_object/inv_api.c,v 1.60 1999/10/13 15:02:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -300,6 +300,21 @@ inv_destroy(Oid lobjId) * end of relations. Once clustering works, we should fix this. */ #ifdef NOT_USED + +struct pgstat +{ /* just the fields we need from stat + * structure */ + int st_ino; + int st_mode; + unsigned int st_size; + unsigned int st_sizehigh; /* high order bits */ +/* 2^64 == 1.8 x 10^20 bytes */ + int st_uid; + int st_atime_s; /* just the seconds */ + int st_mtime_s; /* since SysV and the new BSD both have */ + int st_ctime_s; /* usec fields.. */ +}; + int inv_stat(LargeObjectDesc *obj_desc, struct pgstat * stbuf) { diff --git a/src/backend/utils/sort/psort.c b/src/backend/utils/sort/psort.c index b1ac25280756552f089488c2d5778cc475bb45ff..14db10c119837607cdce1bfa043faf793697bbb9 100644 --- a/src/backend/utils/sort/psort.c +++ b/src/backend/utils/sort/psort.c @@ -4,7 +4,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: psort.c,v 1.56 1999/07/17 20:18:16 momjian Exp $ + * $Id: psort.c,v 1.57 1999/10/13 15:02:31 tgl Exp $ * * NOTES * Sorts the first relation into the second relation. @@ -142,7 +142,8 @@ psort_begin(Sort *node, int nkeys, ScanKey key) PS(node)->psort_grab_file = mergeruns(node); PS(node)->psort_current = 0; - PS(node)->psort_saved = 0; + PS(node)->psort_saved_fileno = 0; + PS(node)->psort_saved = 0L; return true; } @@ -227,7 +228,7 @@ inittapes(Sort *node) #define SETTUPLEN(TUP, LEN) ((TUP)->t_len = (LEN) - HEAPTUPLESIZE) -#define rewind(FP) BufFileSeek(FP, 0L, SEEK_SET) +#define rewind(FP) BufFileSeek(FP, 0, 0L, SEEK_SET) /* * USEMEM - record use of memory FREEMEM - record @@ -764,9 +765,6 @@ psort_grabtuple(Sort *node, bool *should_free) tup = ALLOCTUP(tuplen); SETTUPLEN(tup, tuplen); GETTUP(node, tup, tuplen, PS(node)->psort_grab_file); - - /* Update current merged sort file position */ - PS(node)->psort_current += tuplen + sizeof(tlendummy); return tup; } else @@ -775,70 +773,67 @@ psort_grabtuple(Sort *node, bool *should_free) return NULL; } } - /* Backward */ - if (PS(node)->psort_current <= sizeof(tlendummy)) - return NULL; - - /* + /* Backward. + * * if all tuples are fetched already then we return last tuple, * else - tuple before last returned. */ if (PS(node)->all_fetched) { - /* - * psort_current is pointing to the zero tuplen at the end of - * file + * Assume seek position is pointing just past the zero tuplen + * at the end of file; back up and fetch last tuple's ending + * length word. If seek fails we must have a completely empty + * file. */ - BufFileSeek(PS(node)->psort_grab_file, - PS(node)->psort_current - sizeof(tlendummy), SEEK_SET); + if (BufFileSeek(PS(node)->psort_grab_file, 0, + - (long) (2 * sizeof(tlendummy)), SEEK_CUR)) + return NULL; GETLEN(tuplen, PS(node)->psort_grab_file); - if (PS(node)->psort_current < tuplen) - elog(ERROR, "psort_grabtuple: too big last tuple len in backward scan"); PS(node)->all_fetched = false; } else { - /* move to position of end tlen of prev tuple */ - PS(node)->psort_current -= sizeof(tlendummy); - BufFileSeek(PS(node)->psort_grab_file, - PS(node)->psort_current, SEEK_SET); - GETLEN(tuplen, PS(node)->psort_grab_file); /* get tlen of prev - * tuple */ + /* + * Back up and fetch prev tuple's ending length word. + * If seek fails, assume we are at start of file. + */ + if (BufFileSeek(PS(node)->psort_grab_file, 0, + - (long) sizeof(tlendummy), SEEK_CUR)) + return NULL; + GETLEN(tuplen, PS(node)->psort_grab_file); if (tuplen == 0) elog(ERROR, "psort_grabtuple: tuplen is 0 in backward scan"); - if (PS(node)->psort_current <= tuplen + sizeof(tlendummy)) - { /* prev tuple should be first one */ - if (PS(node)->psort_current != tuplen) - elog(ERROR, "psort_grabtuple: first tuple expected in backward scan"); - PS(node)->psort_current = 0; - BufFileSeek(PS(node)->psort_grab_file, - PS(node)->psort_current, SEEK_SET); - return NULL; - } - /* - * Get position of prev tuple. This tuple becomes current - * tuple now and we have to return previous one. + * Back up to get ending length word of tuple before it. */ - PS(node)->psort_current -= tuplen; - /* move to position of end tlen of prev tuple */ - BufFileSeek(PS(node)->psort_grab_file, - PS(node)->psort_current - sizeof(tlendummy), SEEK_SET); + if (BufFileSeek(PS(node)->psort_grab_file, 0, + - (long) (tuplen + 2*sizeof(tlendummy)), SEEK_CUR)) + { + /* If fail, presumably the prev tuple is the first in the file. + * Back up so that it becomes next to read in forward direction + * (not obviously right, but that is what in-memory case does) + */ + if (BufFileSeek(PS(node)->psort_grab_file, 0, + - (long) (tuplen + sizeof(tlendummy)), SEEK_CUR)) + elog(ERROR, "psort_grabtuple: too big last tuple len in backward scan"); + return NULL; + } GETLEN(tuplen, PS(node)->psort_grab_file); - if (PS(node)->psort_current < tuplen + sizeof(tlendummy)) - elog(ERROR, "psort_grabtuple: too big tuple len in backward scan"); } /* - * move to prev (or last) tuple start position + sizeof(t_len) + * Now we have the length of the prior tuple, back up and read it. + * Note: GETTUP expects we are positioned after the initial length + * word of the tuple, so back up to that point. */ - BufFileSeek(PS(node)->psort_grab_file, - PS(node)->psort_current - tuplen, SEEK_SET); + if (BufFileSeek(PS(node)->psort_grab_file, 0, + - (long) tuplen, SEEK_CUR)) + elog(ERROR, "psort_grabtuple: too big tuple len in backward scan"); tup = ALLOCTUP(tuplen); SETTUPLEN(tup, tuplen); GETTUP(node, tup, tuplen, PS(node)->psort_grab_file); - return tup; /* file position is equal to psort_current */ + return tup; } else { @@ -875,6 +870,8 @@ psort_grabtuple(Sort *node, bool *should_free) /* * psort_markpos - saves current position in the merged sort file + * + * XXX I suspect these need to save & restore the all_fetched flag as well! */ void psort_markpos(Sort *node) @@ -882,7 +879,12 @@ psort_markpos(Sort *node) Assert(node != (Sort *) NULL); Assert(PS(node) != (Psortstate *) NULL); - PS(node)->psort_saved = PS(node)->psort_current; + if (PS(node)->using_tape_files == true) + BufFileTell(PS(node)->psort_grab_file, + & PS(node)->psort_saved_fileno, + & PS(node)->psort_saved); + else + PS(node)->psort_saved = PS(node)->psort_current; } /* @@ -897,8 +899,11 @@ psort_restorepos(Sort *node) if (PS(node)->using_tape_files == true) BufFileSeek(PS(node)->psort_grab_file, - PS(node)->psort_saved, SEEK_SET); - PS(node)->psort_current = PS(node)->psort_saved; + PS(node)->psort_saved_fileno, + PS(node)->psort_saved, + SEEK_SET); + else + PS(node)->psort_current = PS(node)->psort_saved; } /* @@ -952,7 +957,8 @@ psort_rescan(Sort *node) { PS(node)->all_fetched = false; PS(node)->psort_current = 0; - PS(node)->psort_saved = 0; + PS(node)->psort_saved_fileno = 0; + PS(node)->psort_saved = 0L; if (PS(node)->using_tape_files == true) rewind(PS(node)->psort_grab_file); } @@ -973,11 +979,7 @@ psort_rescan(Sort *node) static BufFile * gettape() { - File tfile; - - tfile = OpenTemporaryFile(); - Assert(tfile >= 0); - return BufFileCreate(tfile); + return BufFileCreateTemp(); } /* diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index 724a7ddd313f3854a4e7ecc1440415b6a2efc2a3..f501cd0365417d9a5f2752d8fd862952df5954d4 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: hashjoin.h,v 1.14 1999/07/15 15:21:08 momjian Exp $ + * $Id: hashjoin.h,v 1.15 1999/10/13 15:02:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -14,6 +14,7 @@ #define HASHJOIN_H #include "access/htup.h" +#include "storage/buffile.h" /* ---------------------------------------------------------------- * hash-join hash table structures diff --git a/src/include/executor/nodeHashjoin.h b/src/include/executor/nodeHashjoin.h index 2061ac7bdcaf78310627ade614dc4b614b8100db..9d5390f79c690713d2f60569fb2416c2815545d4 100644 --- a/src/include/executor/nodeHashjoin.h +++ b/src/include/executor/nodeHashjoin.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: nodeHashjoin.h,v 1.15 1999/07/15 15:21:12 momjian Exp $ + * $Id: nodeHashjoin.h,v 1.16 1999/10/13 15:02:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -14,6 +14,7 @@ #define NODEHASHJOIN_H #include "nodes/plannodes.h" +#include "storage/buffile.h" extern TupleTableSlot *ExecHashJoin(HashJoin *node); extern bool ExecInitHashJoin(HashJoin *node, EState *estate, Plan *parent); diff --git a/src/include/storage/buffile.h b/src/include/storage/buffile.h new file mode 100644 index 0000000000000000000000000000000000000000..2416d645cfe56e8b7f8eb040a4e49d0395b4cdcc --- /dev/null +++ b/src/include/storage/buffile.h @@ -0,0 +1,47 @@ +/*------------------------------------------------------------------------- + * + * buffile.h + * Management of large buffered files, primarily temporary files. + * + * The BufFile routines provide a partial replacement for stdio atop + * virtual file descriptors managed by fd.c. Currently they only support + * buffered access to a virtual file, without any of stdio's formatting + * features. That's enough for immediate needs, but the set of facilities + * could be expanded if necessary. + * + * BufFile also supports working with temporary files that exceed the OS + * file size limit and/or the largest offset representable in an int. + * It might be better to split that out as a separately accessible module, + * but currently we have no need for oversize temp files without buffered + * access. + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: buffile.h,v 1.1 1999/10/13 15:02:32 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#ifndef BUFFILE_H +#define BUFFILE_H + +#include "storage/fd.h" + +/* BufFile is an opaque type whose details are not known outside buffile.c. */ + +typedef struct BufFile BufFile; + +/* + * prototypes for functions in buffile.c + */ + +extern BufFile *BufFileCreateTemp(void); +extern BufFile *BufFileCreate(File file); +extern BufFile *BufFileReaccess(BufFile *file); +extern void BufFileClose(BufFile *file); +extern size_t BufFileRead(BufFile *file, void *ptr, size_t size); +extern size_t BufFileWrite(BufFile *file, void *ptr, size_t size); +extern int BufFileSeek(BufFile *file, int fileno, long offset, int whence); +extern void BufFileTell(BufFile *file, int *fileno, long *offset); + +#endif /* BUFFILE_H */ diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index beb93bf699c69d9f2817592cf1a1e6803cdb2770..42d1f46579759937fc665a11e119e7224bf3924a 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -6,10 +6,11 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: fd.h,v 1.17 1999/07/17 20:18:34 momjian Exp $ + * $Id: fd.h,v 1.18 1999/10/13 15:02:32 tgl Exp $ * *------------------------------------------------------------------------- */ + /* * calls: * @@ -29,11 +30,6 @@ * use FreeFile, not fclose, to close it. AVOID using stdio for files * that you intend to hold open for any length of time, since there is * no way for them to share kernel file descriptors with other files. - * - * The BufFile routines provide a partial replacement for stdio. Currently - * they only support buffered access to a virtual file, without any of - * stdio's formatting features. That's enough for immediate needs, but - * the set of facilities could be expanded if necessary. */ #ifndef FD_H #define FD_H @@ -46,25 +42,6 @@ typedef char *FileName; typedef int File; -/* BufFile is an opaque type whose details are not known outside fd.c. */ - -typedef struct BufFile BufFile; - -/* why is this here? fd.c doesn't want it ... */ -struct pgstat -{ /* just the fields we need from stat - * structure */ - int st_ino; - int st_mode; - unsigned int st_size; - unsigned int st_sizehigh; /* high order bits */ -/* 2^64 == 1.8 x 10^20 bytes */ - int st_uid; - int st_atime_s; /* just the seconds */ - int st_mtime_s; /* since SysV and the new BSD both have */ - int st_ctime_s; /* usec fields.. */ -}; - /* * prototypes for functions in fd.c */ @@ -78,24 +55,15 @@ extern void FileUnlink(File file); extern int FileRead(File file, char *buffer, int amount); extern int FileWrite(File file, char *buffer, int amount); extern long FileSeek(File file, long offset, int whence); -extern int FileTruncate(File file, int offset); +extern int FileTruncate(File file, long offset); extern int FileSync(File file); /* Operations that allow use of regular stdio --- USE WITH CAUTION */ extern FILE *AllocateFile(char *name, char *mode); extern void FreeFile(FILE *); -/* Operations on BufFiles --- a very incomplete emulation of stdio - * atop virtual Files... - */ -extern BufFile *BufFileCreate(File file); -extern void BufFileClose(BufFile *file); -extern size_t BufFileRead(BufFile *file, void *ptr, size_t size); -extern size_t BufFileWrite(BufFile *file, void *ptr, size_t size); -extern long BufFileSeek(BufFile *file, long offset, int whence); - /* Miscellaneous support routines */ -extern int FileNameUnlink(char *filename); +extern bool ReleaseDataFile(void); extern void closeAllVfds(void); extern void AtEOXact_Files(void); extern int pg_fsync(int fd); diff --git a/src/include/utils/psort.h b/src/include/utils/psort.h index 0deac024154413aa0b1799295da2e18adecf76a2..9a100bad0d8b6ae5a5c38620ebe60bb0fe39e8cb 100644 --- a/src/include/utils/psort.h +++ b/src/include/utils/psort.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: psort.h,v 1.21 1999/07/16 17:07:39 momjian Exp $ + * $Id: psort.h,v 1.22 1999/10/13 15:02:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -15,7 +15,7 @@ #include "access/relscan.h" #include "nodes/plannodes.h" -#include "storage/fd.h" +#include "storage/buffile.h" #include "utils/lselect.h" #define MAXTAPES 7 /* See Knuth Fig. 70, p273 */ @@ -57,7 +57,8 @@ typedef struct Psortstate struct leftist *Tuples; BufFile *psort_grab_file; - long psort_current; /* could be file offset, or array index */ + long psort_current; /* array index (only used if not tape) */ + int psort_saved_fileno; /* upper bits of psort_saved, if tape */ long psort_saved; /* could be file offset, or array index */ bool using_tape_files; bool all_fetched; /* this is for cursors */