Skip to content
Snippets Groups Projects
Commit a36088bc authored by Tom Lane's avatar Tom Lane
Browse files

Skip text->binary conversion of unnecessary columns in contrib/file_fdw.

When reading from a text- or CSV-format file in file_fdw, the datatype
input routines can consume a significant fraction of the runtime.
Often, the query does not need all the columns, so we can get a useful
speed boost by skipping I/O conversion for unnecessary columns.

To support this, add a "convert_selectively" option to the core COPY code.
This is undocumented and not accessible from SQL (for now, anyway).

Etsuro Fujita, reviewed by KaiGai Kohei
parent 76720bdf
Branches
Tags
No related merge requests found
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <unistd.h> #include <unistd.h>
#include "access/reloptions.h" #include "access/reloptions.h"
#include "access/sysattr.h"
#include "catalog/pg_foreign_table.h" #include "catalog/pg_foreign_table.h"
#include "commands/copy.h" #include "commands/copy.h"
#include "commands/defrem.h" #include "commands/defrem.h"
...@@ -29,6 +30,7 @@ ...@@ -29,6 +30,7 @@
#include "optimizer/pathnode.h" #include "optimizer/pathnode.h"
#include "optimizer/planmain.h" #include "optimizer/planmain.h"
#include "optimizer/restrictinfo.h" #include "optimizer/restrictinfo.h"
#include "optimizer/var.h"
#include "utils/memutils.h" #include "utils/memutils.h"
#include "utils/rel.h" #include "utils/rel.h"
...@@ -136,6 +138,9 @@ static bool is_valid_option(const char *option, Oid context); ...@@ -136,6 +138,9 @@ static bool is_valid_option(const char *option, Oid context);
static void fileGetOptions(Oid foreigntableid, static void fileGetOptions(Oid foreigntableid,
char **filename, List **other_options); char **filename, List **other_options);
static List *get_file_fdw_attribute_options(Oid relid); static List *get_file_fdw_attribute_options(Oid relid);
static bool check_selective_binary_conversion(RelOptInfo *baserel,
Oid foreigntableid,
List **columns);
static void estimate_size(PlannerInfo *root, RelOptInfo *baserel, static void estimate_size(PlannerInfo *root, RelOptInfo *baserel,
FileFdwPlanState *fdw_private); FileFdwPlanState *fdw_private);
static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel, static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
...@@ -457,12 +462,25 @@ fileGetForeignPaths(PlannerInfo *root, ...@@ -457,12 +462,25 @@ fileGetForeignPaths(PlannerInfo *root,
FileFdwPlanState *fdw_private = (FileFdwPlanState *) baserel->fdw_private; FileFdwPlanState *fdw_private = (FileFdwPlanState *) baserel->fdw_private;
Cost startup_cost; Cost startup_cost;
Cost total_cost; Cost total_cost;
List *columns;
List *coptions = NIL;
/* Decide whether to selectively perform binary conversion */
if (check_selective_binary_conversion(baserel,
foreigntableid,
&columns))
coptions = list_make1(makeDefElem("convert_selectively",
(Node *) columns));
/* Estimate costs */ /* Estimate costs */
estimate_costs(root, baserel, fdw_private, estimate_costs(root, baserel, fdw_private,
&startup_cost, &total_cost); &startup_cost, &total_cost);
/* Create a ForeignPath node and add it as only possible path */ /*
* Create a ForeignPath node and add it as only possible path. We use the
* fdw_private list of the path to carry the convert_selectively option;
* it will be propagated into the fdw_private list of the Plan node.
*/
add_path(baserel, (Path *) add_path(baserel, (Path *)
create_foreignscan_path(root, baserel, create_foreignscan_path(root, baserel,
baserel->rows, baserel->rows,
...@@ -470,7 +488,7 @@ fileGetForeignPaths(PlannerInfo *root, ...@@ -470,7 +488,7 @@ fileGetForeignPaths(PlannerInfo *root,
total_cost, total_cost,
NIL, /* no pathkeys */ NIL, /* no pathkeys */
NULL, /* no outer rel either */ NULL, /* no outer rel either */
NIL)); /* no fdw_private data */ coptions));
/* /*
* If data file was sorted, and we knew it somehow, we could insert * If data file was sorted, and we knew it somehow, we could insert
...@@ -507,7 +525,7 @@ fileGetForeignPlan(PlannerInfo *root, ...@@ -507,7 +525,7 @@ fileGetForeignPlan(PlannerInfo *root,
scan_clauses, scan_clauses,
scan_relid, scan_relid,
NIL, /* no expressions to evaluate */ NIL, /* no expressions to evaluate */
NIL); /* no private state either */ best_path->fdw_private);
} }
/* /*
...@@ -544,6 +562,7 @@ fileExplainForeignScan(ForeignScanState *node, ExplainState *es) ...@@ -544,6 +562,7 @@ fileExplainForeignScan(ForeignScanState *node, ExplainState *es)
static void static void
fileBeginForeignScan(ForeignScanState *node, int eflags) fileBeginForeignScan(ForeignScanState *node, int eflags)
{ {
ForeignScan *plan = (ForeignScan *) node->ss.ps.plan;
char *filename; char *filename;
List *options; List *options;
CopyState cstate; CopyState cstate;
...@@ -559,6 +578,9 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) ...@@ -559,6 +578,9 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
&filename, &options); &filename, &options);
/* Add any options from the plan (currently only convert_selectively) */
options = list_concat(options, plan->fdw_private);
/* /*
* Create CopyState from FDW options. We always acquire all columns, so * Create CopyState from FDW options. We always acquire all columns, so
* as to match the expected ScanTupleSlot signature. * as to match the expected ScanTupleSlot signature.
...@@ -694,6 +716,125 @@ fileAnalyzeForeignTable(Relation relation, ...@@ -694,6 +716,125 @@ fileAnalyzeForeignTable(Relation relation,
return true; return true;
} }
/*
* check_selective_binary_conversion
*
* Check to see if it's useful to convert only a subset of the file's columns
* to binary. If so, construct a list of the column names to be converted,
* return that at *columns, and return TRUE. (Note that it's possible to
* determine that no columns need be converted, for instance with a COUNT(*)
* query. So we can't use returning a NIL list to indicate failure.)
*/
static bool
check_selective_binary_conversion(RelOptInfo *baserel,
Oid foreigntableid,
List **columns)
{
ForeignTable *table;
ListCell *lc;
Relation rel;
TupleDesc tupleDesc;
AttrNumber attnum;
Bitmapset *attrs_used = NULL;
bool has_wholerow = false;
int numattrs;
int i;
*columns = NIL; /* default result */
/*
* Check format of the file. If binary format, this is irrelevant.
*/
table = GetForeignTable(foreigntableid);
foreach(lc, table->options)
{
DefElem *def = (DefElem *) lfirst(lc);
if (strcmp(def->defname, "format") == 0)
{
char *format = defGetString(def);
if (strcmp(format, "binary") == 0)
return false;
break;
}
}
/* Collect all the attributes needed for joins or final output. */
pull_varattnos((Node *) baserel->reltargetlist, baserel->relid,
&attrs_used);
/* Add all the attributes used by restriction clauses. */
foreach(lc, baserel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
pull_varattnos((Node *) rinfo->clause, baserel->relid,
&attrs_used);
}
/* Convert attribute numbers to column names. */
rel = heap_open(foreigntableid, AccessShareLock);
tupleDesc = RelationGetDescr(rel);
while ((attnum = bms_first_member(attrs_used)) >= 0)
{
/* Adjust for system attributes. */
attnum += FirstLowInvalidHeapAttributeNumber;
if (attnum == 0)
{
has_wholerow = true;
break;
}
/* Ignore system attributes. */
if (attnum < 0)
continue;
/* Get user attributes. */
if (attnum > 0)
{
Form_pg_attribute attr = tupleDesc->attrs[attnum - 1];
char *attname = NameStr(attr->attname);
/* Skip dropped attributes (probably shouldn't see any here). */
if (attr->attisdropped)
continue;
*columns = lappend(*columns, makeString(pstrdup(attname)));
}
}
/* Count non-dropped user attributes while we have the tupdesc. */
numattrs = 0;
for (i = 0; i < tupleDesc->natts; i++)
{
Form_pg_attribute attr = tupleDesc->attrs[i];
if (attr->attisdropped)
continue;
numattrs++;
}
heap_close(rel, AccessShareLock);
/* If there's a whole-row reference, fail: we need all the columns. */
if (has_wholerow)
{
*columns = NIL;
return false;
}
/* If all the user attributes are needed, fail. */
if (numattrs == list_length(*columns))
{
*columns = NIL;
return false;
}
return true;
}
/* /*
* Estimate size of a foreign table. * Estimate size of a foreign table.
* *
......
...@@ -121,6 +121,9 @@ typedef struct CopyStateData ...@@ -121,6 +121,9 @@ typedef struct CopyStateData
bool *force_quote_flags; /* per-column CSV FQ flags */ bool *force_quote_flags; /* per-column CSV FQ flags */
List *force_notnull; /* list of column names */ List *force_notnull; /* list of column names */
bool *force_notnull_flags; /* per-column CSV FNN flags */ bool *force_notnull_flags; /* per-column CSV FNN flags */
bool convert_selectively; /* do selective binary conversion? */
List *convert_select; /* list of column names (can be NIL) */
bool *convert_select_flags; /* per-column CSV/TEXT CS flags */
/* these are just for error messages, see CopyFromErrorCallback */ /* these are just for error messages, see CopyFromErrorCallback */
const char *cur_relname; /* table name for error messages */ const char *cur_relname; /* table name for error messages */
...@@ -961,6 +964,26 @@ ProcessCopyOptions(CopyState cstate, ...@@ -961,6 +964,26 @@ ProcessCopyOptions(CopyState cstate,
errmsg("argument to option \"%s\" must be a list of column names", errmsg("argument to option \"%s\" must be a list of column names",
defel->defname))); defel->defname)));
} }
else if (strcmp(defel->defname, "convert_selectively") == 0)
{
/*
* Undocumented, not-accessible-from-SQL option: convert only
* the named columns to binary form, storing the rest as NULLs.
* It's allowed for the column list to be NIL.
*/
if (cstate->convert_selectively)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
cstate->convert_selectively = true;
if (defel->arg == NULL || IsA(defel->arg, List))
cstate->convert_select = (List *) defel->arg;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument to option \"%s\" must be a list of column names",
defel->defname)));
}
else if (strcmp(defel->defname, "encoding") == 0) else if (strcmp(defel->defname, "encoding") == 0)
{ {
if (cstate->file_encoding >= 0) if (cstate->file_encoding >= 0)
...@@ -1307,6 +1330,29 @@ BeginCopy(bool is_from, ...@@ -1307,6 +1330,29 @@ BeginCopy(bool is_from,
} }
} }
/* Convert convert_selectively name list to per-column flags */
if (cstate->convert_selectively)
{
List *attnums;
ListCell *cur;
cstate->convert_select_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->convert_select);
foreach(cur, attnums)
{
int attnum = lfirst_int(cur);
if (!list_member_int(cstate->attnumlist, attnum))
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg_internal("selected column \"%s\" not referenced by COPY",
NameStr(tupDesc->attrs[attnum - 1]->attname))));
cstate->convert_select_flags[attnum - 1] = true;
}
}
/* Use client encoding when ENCODING option is not specified. */ /* Use client encoding when ENCODING option is not specified. */
if (cstate->file_encoding < 0) if (cstate->file_encoding < 0)
cstate->file_encoding = pg_get_client_encoding(); cstate->file_encoding = pg_get_client_encoding();
...@@ -2565,6 +2611,13 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext, ...@@ -2565,6 +2611,13 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
NameStr(attr[m]->attname)))); NameStr(attr[m]->attname))));
string = field_strings[fieldno++]; string = field_strings[fieldno++];
if (cstate->convert_select_flags &&
!cstate->convert_select_flags[m])
{
/* ignore input field, leaving column as NULL */
continue;
}
if (cstate->csv_mode && string == NULL && if (cstate->csv_mode && string == NULL &&
cstate->force_notnull_flags[m]) cstate->force_notnull_flags[m])
{ {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment