From d8b1bf47918aafdc515729624ad1ec2db4b91d14 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Sat, 16 Apr 2005 20:07:35 +0000 Subject: [PATCH] Create a new 'MultiExecProcNode' call API for plan nodes that don't return just a single tuple at a time. Currently the only such node type is Hash, but I expect we will soon have indexscans that can return tuple bitmaps. A side benefit is that EXPLAIN ANALYZE now shows the correct tuple count for a Hash node. --- src/backend/commands/explain.c | 11 +++---- src/backend/executor/execProcnode.c | 46 ++++++++++++++++++++++++++++- src/backend/executor/instrument.c | 38 ++++++++++++++---------- src/backend/executor/nodeHash.c | 42 +++++++++++++++++++++----- src/backend/executor/nodeHashjoin.c | 6 ++-- src/include/executor/executor.h | 3 +- src/include/executor/hashjoin.h | 4 +-- src/include/executor/instrument.h | 7 +++-- src/include/executor/nodeHash.h | 3 +- 9 files changed, 121 insertions(+), 39 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 02ceedd3959..4b2abb9e7b4 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994-5, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.131 2005/03/25 21:57:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.132 2005/04/16 20:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -47,7 +47,7 @@ typedef struct ExplainState static void ExplainOneQuery(Query *query, ExplainStmt *stmt, TupOutputState *tstate); -static double elapsed_time(instr_time * starttime); +static double elapsed_time(instr_time *starttime); static void explain_outNode(StringInfo str, Plan *plan, PlanState *planstate, Plan *outer_plan, @@ -296,7 +296,7 @@ ExplainOnePlan(QueryDesc *queryDesc, ExplainStmt *stmt, { int nt; - if (!rInfo->ri_TrigDesc) + if (!rInfo->ri_TrigDesc || !rInfo->ri_TrigInstrument) continue; for (nt = 0; nt < rInfo->ri_TrigDesc->numtriggers; nt++) { @@ -366,7 +366,7 @@ ExplainOnePlan(QueryDesc *queryDesc, ExplainStmt *stmt, /* Compute elapsed time in seconds since given timestamp */ static double -elapsed_time(instr_time * starttime) +elapsed_time(instr_time *starttime) { instr_time endtime; @@ -663,7 +663,8 @@ explain_outNode(StringInfo str, * We have to forcibly clean up the instrumentation state because * we haven't done ExecutorEnd yet. This is pretty grotty ... */ - InstrEndLoop(planstate->instrument); + if (planstate->instrument) + InstrEndLoop(planstate->instrument); if (planstate->instrument && planstate->instrument->nloops > 0) { diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index ff8caf16f01..555668e7799 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -12,7 +12,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.48 2005/04/06 20:13:49 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execProcnode.c,v 1.49 2005/04/16 20:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -375,6 +375,50 @@ ExecProcNode(PlanState *node) return result; } + +/* ---------------------------------------------------------------- + * MultiExecProcNode + * + * Execute a node that doesn't return individual tuples + * (it might return a hashtable, bitmap, etc). Caller should + * check it got back the expected kind of Node. + * + * This has essentially the same responsibilities as ExecProcNode, + * but it does not do InstrStartNode/InstrStopNode (mainly because + * it can't tell how many returned tuples to count). Each per-node + * function must provide its own instrumentation support. + * ---------------------------------------------------------------- + */ +Node * +MultiExecProcNode(PlanState *node) +{ + Node *result; + + CHECK_FOR_INTERRUPTS(); + + if (node->chgParam != NULL) /* something changed */ + ExecReScan(node, NULL); /* let ReScan handle this */ + + switch (nodeTag(node)) + { + /* + * Only node types that actually support multiexec will be listed + */ + + case T_HashState: + result = MultiExecHash((HashState *) node); + break; + + default: + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); + result = NULL; + break; + } + + return result; +} + + /* * ExecCountSlotsNode - count up the number of tuple table slots needed * diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 52d9de4f4ac..c5b4a252d61 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -7,7 +7,7 @@ * Copyright (c) 2001-2005, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.11 2005/03/25 21:57:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.12 2005/04/16 20:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,13 +33,10 @@ InstrAlloc(int n) void InstrStartNode(Instrumentation *instr) { - if (!instr) - return; - - if (!INSTR_TIME_IS_ZERO(instr->starttime)) - elog(DEBUG2, "InstrStartNode called twice in a row"); - else + if (INSTR_TIME_IS_ZERO(instr->starttime)) INSTR_TIME_SET_CURRENT(instr->starttime); + else + elog(DEBUG2, "InstrStartNode called twice in a row"); } /* Exit from a plan node */ @@ -48,12 +45,13 @@ InstrStopNode(Instrumentation *instr, bool returnedTuple) { instr_time endtime; - if (!instr) - return; + /* count the returned tuples */ + if (returnedTuple) + instr->tuplecount += 1; if (INSTR_TIME_IS_ZERO(instr->starttime)) { - elog(DEBUG2, "InstrStopNode without start"); + elog(DEBUG2, "InstrStopNode called without start"); return; } @@ -86,9 +84,17 @@ InstrStopNode(Instrumentation *instr, bool returnedTuple) instr->running = true; instr->firsttuple = INSTR_TIME_GET_DOUBLE(instr->counter); } +} - if (returnedTuple) - instr->tuplecount += 1; +/* As above, but count multiple tuples returned at once */ +void +InstrStopNodeMulti(Instrumentation *instr, double nTuples) +{ + /* count the returned tuples */ + instr->tuplecount += nTuples; + + /* delegate the rest */ + InstrStopNode(instr, false); } /* Finish a run cycle for a plan node */ @@ -97,14 +103,14 @@ InstrEndLoop(Instrumentation *instr) { double totaltime; - if (!instr) - return; - /* Skip if nothing has happened, or already shut down */ if (!instr->running) return; - /* Accumulate statistics */ + if (!INSTR_TIME_IS_ZERO(instr->starttime)) + elog(DEBUG2, "InstrEndLoop called on running node"); + + /* Accumulate per-cycle statistics into totals */ totaltime = INSTR_TIME_GET_DOUBLE(instr->counter); instr->startup += instr->firsttuple; diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 97e6738bd32..c304d930a28 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -8,13 +8,13 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.92 2005/03/31 02:02:52 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.93 2005/04/16 20:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ /* * INTERFACE ROUTINES - * ExecHash - generate an in-memory hash table of the relation + * MultiExecHash - generate an in-memory hash table of the relation * ExecInitHash - initialize node and subnodes * ExecEndHash - shutdown node and subnodes */ @@ -22,6 +22,7 @@ #include "executor/execdebug.h" #include "executor/hashjoin.h" +#include "executor/instrument.h" #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" #include "miscadmin.h" @@ -36,12 +37,25 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable); /* ---------------------------------------------------------------- * ExecHash * - * build hash table for hashjoin, doing partitioning if more - * than one batch is required. + * stub for pro forma compliance * ---------------------------------------------------------------- */ TupleTableSlot * ExecHash(HashState *node) +{ + elog(ERROR, "Hash node does not support ExecProcNode call convention"); + return NULL; +} + +/* ---------------------------------------------------------------- + * MultiExecHash + * + * build hash table for hashjoin, doing partitioning if more + * than one batch is required. + * ---------------------------------------------------------------- + */ +Node * +MultiExecHash(HashState *node) { PlanState *outerNode; List *hashkeys; @@ -50,6 +64,10 @@ ExecHash(HashState *node) ExprContext *econtext; uint32 hashvalue; + /* must provide our own instrumentation support */ + if (node->ps.instrument) + InstrStartNode(node->ps.instrument); + /* * get state info from node */ @@ -70,14 +88,24 @@ ExecHash(HashState *node) slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break; - hashtable->hashNonEmpty = true; + hashtable->totalTuples += 1; /* We have to compute the hash value */ econtext->ecxt_innertuple = slot; hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys); ExecHashTableInsert(hashtable, ExecFetchSlotTuple(slot), hashvalue); } - /* We needn't return a tuple slot or anything else */ + /* must provide our own instrumentation support */ + if (node->ps.instrument) + InstrStopNodeMulti(node->ps.instrument, hashtable->totalTuples); + + /* + * We do not return the hash table directly because it's not a subtype + * of Node, and so would violate the MultiExecProcNode API. Instead, + * our parent Hashjoin node is expected to know how to fish it out + * of our node state. Ugly but not really worth cleaning up, since + * Hashjoin knows quite a bit more about Hash besides that. + */ return NULL; } @@ -220,7 +248,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators) hashtable->nbatch_original = nbatch; hashtable->nbatch_outstart = nbatch; hashtable->growEnabled = true; - hashtable->hashNonEmpty = false; + hashtable->totalTuples = 0; hashtable->innerBatchFile = NULL; hashtable->outerBatchFile = NULL; hashtable->spaceUsed = 0; diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 4811b7068eb..38e48cd6dce 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.70 2005/03/31 02:02:52 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.71 2005/04/16 20:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -123,13 +123,13 @@ ExecHashJoin(HashJoinState *node) * execute the Hash node, to build the hash table */ hashNode->hashtable = hashtable; - (void) ExecProcNode((PlanState *) hashNode); + (void) MultiExecProcNode((PlanState *) hashNode); /* * If the inner relation is completely empty, and we're not doing * an outer join, we can quit without scanning the outer relation. */ - if (!hashtable->hashNonEmpty && node->js.jointype != JOIN_LEFT) + if (hashtable->totalTuples == 0 && node->js.jointype != JOIN_LEFT) { ExecHashTableDestroy(hashtable); node->hj_HashTable = NULL; diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 0d3e18ce0ac..2e42894788e 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.117 2005/03/16 21:38:09 tgl Exp $ + * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.118 2005/04/16 20:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -105,6 +105,7 @@ extern TupleTableSlot *EvalPlanQual(EState *estate, Index rti, */ extern PlanState *ExecInitNode(Plan *node, EState *estate); extern TupleTableSlot *ExecProcNode(PlanState *node); +extern Node *MultiExecProcNode(PlanState *node); extern int ExecCountSlotsNode(Plan *node); extern void ExecEndNode(PlanState *node); diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index c0f75922e12..f5200831d7e 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.35 2005/03/06 22:15:05 tgl Exp $ + * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.36 2005/04/16 20:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -83,7 +83,7 @@ typedef struct HashJoinTableData bool growEnabled; /* flag to shut off nbatch increases */ - bool hashNonEmpty; /* did inner plan produce any rows? */ + double totalTuples; /* # tuples obtained from inner plan */ /* * These arrays are allocated for the life of the hash join, but diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index 0540fd0da71..47899fbcc26 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -6,7 +6,7 @@ * * Copyright (c) 2001-2005, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.10 2005/03/25 21:57:59 tgl Exp $ + * $PostgreSQL: pgsql/src/include/executor/instrument.h,v 1.11 2005/04/16 20:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -60,9 +60,9 @@ typedef struct Instrumentation /* Info about current plan cycle: */ bool running; /* TRUE if we've completed first tuple */ instr_time starttime; /* Start time of current iteration of node */ - instr_time counter; /* Accumulates runtime for this node */ + instr_time counter; /* Accumulated runtime for this node */ double firsttuple; /* Time for first tuple of this cycle */ - double tuplecount; /* Tuples so far this cycle */ + double tuplecount; /* Tuples emitted so far this cycle */ /* Accumulated statistics across all completed cycles: */ double startup; /* Total startup time (in seconds) */ double total; /* Total total time (in seconds) */ @@ -73,6 +73,7 @@ typedef struct Instrumentation extern Instrumentation *InstrAlloc(int n); extern void InstrStartNode(Instrumentation *instr); extern void InstrStopNode(Instrumentation *instr, bool returnedTuple); +extern void InstrStopNodeMulti(Instrumentation *instr, double nTuples); extern void InstrEndLoop(Instrumentation *instr); #endif /* INSTRUMENT_H */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 06d73c060ec..678b2bd7622 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.36 2005/03/06 22:15:05 tgl Exp $ + * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.37 2005/04/16 20:07:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ extern int ExecCountSlotsHash(Hash *node); extern HashState *ExecInitHash(Hash *node, EState *estate); extern TupleTableSlot *ExecHash(HashState *node); +extern Node *MultiExecHash(HashState *node); extern void ExecEndHash(HashState *node); extern void ExecReScanHash(HashState *node, ExprContext *exprCtxt); -- GitLab