Skip to content
Snippets Groups Projects
Commit 69d0a15e authored by Tom Lane's avatar Tom Lane
Browse files

Convert hash join code to use MinimalTuple format in tuple hash table

and batch files.  Should reduce memory and I/O demands for such joins.
parent 665c5e86
No related branches found
No related tags found
No related merge requests found
......@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.95 2006/06/27 02:51:39 tgl Exp $
* $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.96 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -718,6 +718,55 @@ ExecFetchSlotTuple(TupleTableSlot *slot)
return ExecMaterializeSlot(slot);
}
/* --------------------------------
* ExecFetchSlotMinimalTuple
* Fetch the slot's minimal physical tuple.
*
* If the slot contains a virtual tuple, we convert it to minimal
* physical form. The slot retains ownership of the physical tuple.
* Likewise, if it contains a regular tuple we convert to minimal form.
*
* As above, the result must be treated as read-only.
* --------------------------------
*/
MinimalTuple
ExecFetchSlotMinimalTuple(TupleTableSlot *slot)
{
MinimalTuple newTuple;
MemoryContext oldContext;
/*
* sanity checks
*/
Assert(slot != NULL);
Assert(!slot->tts_isempty);
/*
* If we have a minimal physical tuple then just return it.
*/
if (slot->tts_mintuple)
return slot->tts_mintuple;
/*
* Otherwise, build a minimal tuple, and then store it as the new slot
* value. (Note: tts_nvalid will be reset to zero here. There are cases
* in which this could be optimized but it's probably not worth worrying
* about.)
*
* We may be called in a context that is shorter-lived than the tuple
* slot, but we have to ensure that the materialized tuple will survive
* anyway.
*/
oldContext = MemoryContextSwitchTo(slot->tts_mcxt);
newTuple = ExecCopySlotMinimalTuple(slot);
MemoryContextSwitchTo(oldContext);
ExecStoreMinimalTuple(newTuple, slot, true);
Assert(slot->tts_mintuple);
return slot->tts_mintuple;
}
/* --------------------------------
* ExecMaterializeSlot
* Force a slot into the "materialized" state.
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.103 2006/05/30 14:01:58 momjian Exp $
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.104 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -92,7 +92,7 @@ MultiExecHash(HashState *node)
/* We have to compute the hash value */
econtext->ecxt_innertuple = slot;
hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys);
ExecHashTableInsert(hashtable, ExecFetchSlotTuple(slot), hashvalue);
ExecHashTableInsert(hashtable, slot, hashvalue);
}
/* must provide our own instrumentation support */
......@@ -358,8 +358,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
* does not allow for any palloc overhead. The manipulations of spaceUsed
* don't count palloc overhead either.
*/
tupsize = MAXALIGN(sizeof(HashJoinTupleData)) +
MAXALIGN(sizeof(HeapTupleHeaderData)) +
tupsize = HJTUPLE_OVERHEAD +
MAXALIGN(sizeof(MinimalTupleData)) +
MAXALIGN(tupwidth);
inner_rel_bytes = ntuples * tupsize;
......@@ -548,7 +548,8 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
{
/* dump it out */
Assert(batchno > curbatch);
ExecHashJoinSaveTuple(&tuple->htup, tuple->hashvalue,
ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(tuple),
tuple->hashvalue,
&hashtable->innerBatchFile[batchno]);
/* and remove from hash table */
if (prevtuple)
......@@ -557,7 +558,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
hashtable->buckets[i] = nexttuple;
/* prevtuple doesn't change */
hashtable->spaceUsed -=
MAXALIGN(sizeof(HashJoinTupleData)) + tuple->htup.t_len;
HJTUPLE_OVERHEAD + HJTUPLE_MINTUPLE(tuple)->t_len;
pfree(tuple);
nfreed++;
}
......@@ -592,12 +593,19 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
* ExecHashTableInsert
* insert a tuple into the hash table depending on the hash value
* it may just go to a temp file for later batches
*
* Note: the passed TupleTableSlot may contain a regular, minimal, or virtual
* tuple; the minimal case in particular is certain to happen while reloading
* tuples from batch files. We could save some cycles in the regular-tuple
* case by not forcing the slot contents into minimal form; not clear if it's
* worth the messiness required.
*/
void
ExecHashTableInsert(HashJoinTable hashtable,
HeapTuple tuple,
TupleTableSlot *slot,
uint32 hashvalue)
{
MinimalTuple tuple = ExecFetchSlotMinimalTuple(slot);
int bucketno;
int batchno;
......@@ -615,18 +623,11 @@ ExecHashTableInsert(HashJoinTable hashtable,
HashJoinTuple hashTuple;
int hashTupleSize;
hashTupleSize = MAXALIGN(sizeof(HashJoinTupleData)) + tuple->t_len;
hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
hashTupleSize);
hashTuple->hashvalue = hashvalue;
memcpy((char *) &hashTuple->htup,
(char *) tuple,
sizeof(hashTuple->htup));
hashTuple->htup.t_data = (HeapTupleHeader)
(((char *) hashTuple) + MAXALIGN(sizeof(HashJoinTupleData)));
memcpy((char *) hashTuple->htup.t_data,
(char *) tuple->t_data,
tuple->t_len);
memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
hashTuple->next = hashtable->buckets[bucketno];
hashtable->buckets[bucketno] = hashTuple;
hashtable->spaceUsed += hashTupleSize;
......@@ -639,7 +640,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
* put the tuple into a temp file for later batches
*/
Assert(batchno > hashtable->curbatch);
ExecHashJoinSaveTuple(tuple, hashvalue,
ExecHashJoinSaveTuple(tuple,
hashvalue,
&hashtable->innerBatchFile[batchno]);
}
}
......@@ -749,7 +751,7 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
*
* The current outer tuple must be stored in econtext->ecxt_outertuple.
*/
HeapTuple
HashJoinTuple
ExecScanHashBucket(HashJoinState *hjstate,
ExprContext *econtext)
{
......@@ -771,13 +773,11 @@ ExecScanHashBucket(HashJoinState *hjstate,
{
if (hashTuple->hashvalue == hashvalue)
{
HeapTuple heapTuple = &hashTuple->htup;
TupleTableSlot *inntuple;
/* insert hashtable's tuple into exec slot so ExecQual sees it */
inntuple = ExecStoreTuple(heapTuple,
inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
hjstate->hj_HashTupleSlot,
InvalidBuffer,
false); /* do not pfree */
econtext->ecxt_innertuple = inntuple;
......@@ -787,7 +787,7 @@ ExecScanHashBucket(HashJoinState *hjstate,
if (ExecQual(hjclauses, econtext, false))
{
hjstate->hj_CurTuple = hashTuple;
return heapTuple;
return hashTuple;
}
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.82 2006/06/16 18:42:22 tgl Exp $
* $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.83 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -54,7 +54,7 @@ ExecHashJoin(HashJoinState *node)
ExprContext *econtext;
ExprDoneCond isDone;
HashJoinTable hashtable;
HeapTuple curtuple;
HashJoinTuple curtuple;
TupleTableSlot *outerTupleSlot;
uint32 hashvalue;
int batchno;
......@@ -224,7 +224,7 @@ ExecHashJoin(HashJoinState *node)
* in the corresponding outer-batch file.
*/
Assert(batchno > hashtable->curbatch);
ExecHashJoinSaveTuple(ExecFetchSlotTuple(outerTupleSlot),
ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
hashvalue,
&hashtable->outerBatchFile[batchno]);
node->hj_NeedNewOuter = true;
......@@ -244,10 +244,9 @@ ExecHashJoin(HashJoinState *node)
/*
* we've got a match, but still need to test non-hashed quals
*/
inntuple = ExecStoreTuple(curtuple,
inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple),
node->hj_HashTupleSlot,
InvalidBuffer,
false); /* don't pfree this tuple */
false); /* don't pfree */
econtext->ecxt_innertuple = inntuple;
/* reset temp memory each time to avoid leaks from qual expr */
......@@ -706,9 +705,7 @@ start_over:
* NOTE: some tuples may be sent to future batches. Also, it is
* possible for hashtable->nbatch to be increased here!
*/
ExecHashTableInsert(hashtable,
ExecFetchSlotTuple(slot),
hashvalue);
ExecHashTableInsert(hashtable, slot, hashvalue);
}
/*
......@@ -741,15 +738,14 @@ start_over:
* save a tuple to a batch file.
*
* The data recorded in the file for each tuple is its hash value,
* then an image of its HeapTupleData (with meaningless t_data pointer)
* followed by the HeapTupleHeader and tuple data.
* then the tuple in MinimalTuple format.
*
* Note: it is important always to call this in the regular executor
* context, not in a shorter-lived context; else the temp file buffers
* will get messed up.
*/
void
ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
BufFile **fileptr)
{
BufFile *file = *fileptr;
......@@ -768,14 +764,8 @@ ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
(errcode_for_file_access(),
errmsg("could not write to hash-join temporary file: %m")));
written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData));
if (written != sizeof(HeapTupleData))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to hash-join temporary file: %m")));
written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len);
if (written != (size_t) heapTuple->t_len)
written = BufFileWrite(file, (void *) tuple, tuple->t_len);
if (written != tuple->t_len)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to hash-join temporary file: %m")));
......@@ -794,32 +784,36 @@ ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
uint32 *hashvalue,
TupleTableSlot *tupleSlot)
{
HeapTupleData htup;
uint32 header[2];
size_t nread;
HeapTuple heapTuple;
MinimalTuple tuple;
nread = BufFileRead(file, (void *) hashvalue, sizeof(uint32));
if (nread == 0)
return NULL; /* end of file */
if (nread != sizeof(uint32))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from hash-join temporary file: %m")));
nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData));
if (nread != sizeof(HeapTupleData))
/*
* Since both the hash value and the MinimalTuple length word are
* uint32, we can read them both in one BufFileRead() call without
* any type cheating.
*/
nread = BufFileRead(file, (void *) header, sizeof(header));
if (nread == 0) /* end of file */
{
ExecClearTuple(tupleSlot);
return NULL;
}
if (nread != sizeof(header))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from hash-join temporary file: %m")));
heapTuple = palloc(HEAPTUPLESIZE + htup.t_len);
memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData));
heapTuple->t_data = (HeapTupleHeader)
((char *) heapTuple + HEAPTUPLESIZE);
nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len);
if (nread != (size_t) htup.t_len)
*hashvalue = header[0];
tuple = (MinimalTuple) palloc(header[1]);
tuple->t_len = header[1];
nread = BufFileRead(file,
(void *) ((char *) tuple + sizeof(uint32)),
header[1] - sizeof(uint32));
if (nread != header[1] - sizeof(uint32))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from hash-join temporary file: %m")));
return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
return ExecStoreMinimalTuple(tuple, tupleSlot, true);
}
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.38 2006/03/05 15:58:56 momjian Exp $
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.39 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -65,9 +65,14 @@ typedef struct HashJoinTupleData
{
struct HashJoinTupleData *next; /* link to next tuple in same bucket */
uint32 hashvalue; /* tuple's hash code */
HeapTupleData htup; /* tuple header */
/* Tuple data, in MinimalTuple format, follows on a MAXALIGN boundary */
} HashJoinTupleData;
#define HJTUPLE_OVERHEAD MAXALIGN(sizeof(HashJoinTupleData))
#define HJTUPLE_MINTUPLE(hjtup) \
((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))
typedef struct HashJoinTableData
{
int nbuckets; /* # buckets in the in-memory hash table */
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.40 2006/03/05 15:58:56 momjian Exp $
* $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.41 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -26,7 +26,7 @@ extern void ExecReScanHash(HashState *node, ExprContext *exprCtxt);
extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators);
extern void ExecHashTableDestroy(HashJoinTable hashtable);
extern void ExecHashTableInsert(HashJoinTable hashtable,
HeapTuple tuple,
TupleTableSlot *slot,
uint32 hashvalue);
extern uint32 ExecHashGetHashValue(HashJoinTable hashtable,
ExprContext *econtext,
......@@ -35,7 +35,7 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
uint32 hashvalue,
int *bucketno,
int *batchno);
extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate,
extern HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate,
ExprContext *econtext);
extern void ExecHashTableReset(HashJoinTable hashtable);
extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/executor/nodeHashjoin.h,v 1.32 2006/03/05 15:58:56 momjian Exp $
* $PostgreSQL: pgsql/src/include/executor/nodeHashjoin.h,v 1.33 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -23,7 +23,7 @@ extern TupleTableSlot *ExecHashJoin(HashJoinState *node);
extern void ExecEndHashJoin(HashJoinState *node);
extern void ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt);
extern void ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
extern void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
BufFile **fileptr);
#endif /* NODEHASHJOIN_H */
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/executor/tuptable.h,v 1.32 2006/06/27 02:51:40 tgl Exp $
* $PostgreSQL: pgsql/src/include/executor/tuptable.h,v 1.33 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -159,6 +159,7 @@ extern TupleTableSlot *ExecStoreAllNullTuple(TupleTableSlot *slot);
extern HeapTuple ExecCopySlotTuple(TupleTableSlot *slot);
extern MinimalTuple ExecCopySlotMinimalTuple(TupleTableSlot *slot);
extern HeapTuple ExecFetchSlotTuple(TupleTableSlot *slot);
extern MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot);
extern HeapTuple ExecMaterializeSlot(TupleTableSlot *slot);
extern TupleTableSlot *ExecCopySlot(TupleTableSlot *dstslot,
TupleTableSlot *srcslot);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment