Skip to content
Snippets Groups Projects
Commit c8ba697a authored by Robert Haas's avatar Robert Haas
Browse files

Fix logic bug in gistchoose and gistRelocateBuildBuffersOnSplit.

Every time the best-tuple-found-so-far changes, we need to reset all
the penalty values in which_grow[] to the penalties for the new best
tuple.  The old code failed to do this, resulting in inferior index
quality.

The original patch from Alexander Korotkov was just two lines; I took
the liberty of fleshing that out by adding a bunch of comments that I
hope will make this logic easier for others to understand than it was
for me.
parent d1a4db8d
No related branches found
No related tags found
No related merge requests found
...@@ -625,8 +625,13 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, ...@@ -625,8 +625,13 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
} }
/* /*
* Loop through all index tuples on the buffer on the splitted page, * Loop through all index tuples on the buffer on the page being split,
* moving them to buffers on the new pages. * moving them to buffers on the new pages. We try to move each tuple
* the page that will result in the lowest penalty for the leading column
* or, in the case of a tie, the lowest penalty for the earliest column
* that is not tied.
*
* The guts of this loop are very similar to gistchoose().
*/ */
while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup)) while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
{ {
...@@ -637,14 +642,18 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, ...@@ -637,14 +642,18 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
IndexTuple newtup; IndexTuple newtup;
RelocationBufferInfo *targetBufferInfo; RelocationBufferInfo *targetBufferInfo;
/*
* Choose which page this tuple should go to.
*/
gistDeCompressAtt(giststate, r, gistDeCompressAtt(giststate, r,
itup, NULL, (OffsetNumber) 0, entry, isnull); itup, NULL, (OffsetNumber) 0, entry, isnull);
which = -1; which = -1;
*which_grow = -1.0f; *which_grow = -1.0f;
/*
* Loop over possible target pages. We'll exit early if we find an index key that
* can accommodate the new key with no penalty on any column. sum_grow is used to
* track this condition. It doesn't need to be exactly accurate, just >0 whenever
* we want the loop to continue and equal to 0 when we want it to terminate.
*/
sum_grow = 1.0f; sum_grow = 1.0f;
for (i = 0; i < splitPagesCount && sum_grow; i++) for (i = 0; i < splitPagesCount && sum_grow; i++)
...@@ -653,6 +662,8 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, ...@@ -653,6 +662,8 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i]; RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
sum_grow = 0.0f; sum_grow = 0.0f;
/* Loop over index attributes. */
for (j = 0; j < r->rd_att->natts; j++) for (j = 0; j < r->rd_att->natts; j++)
{ {
float usize; float usize;
...@@ -664,16 +675,36 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, ...@@ -664,16 +675,36 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
if (which_grow[j] < 0 || usize < which_grow[j]) if (which_grow[j] < 0 || usize < which_grow[j])
{ {
/*
* We get here in two cases. First, we may have just discovered that the
* current tuple is the best one we've seen so far; that is, for the first
* column for which the penalty is not equal to the best tuple seen so far,
* this one has a lower penalty than the previously-seen one. But, when
* a new best tuple is found, we must record the best penalty value for
* all the remaining columns. We'll end up here for each remaining index
* column in that case, too.
*/
which = i; which = i;
which_grow[j] = usize; which_grow[j] = usize;
if (j < r->rd_att->natts - 1 && i == 0) if (j < r->rd_att->natts - 1)
which_grow[j + 1] = -1; which_grow[j + 1] = -1;
sum_grow += which_grow[j]; sum_grow += which_grow[j];
} }
else if (which_grow[j] == usize) else if (which_grow[j] == usize)
{
/*
* The current tuple is exactly as good for this column as the best tuple
* seen so far. The next iteration of this loop will compare the next
* column.
*/
sum_grow += usize; sum_grow += usize;
}
else else
{ {
/*
* The current tuple is worse for this column than the best tuple seen so
* far. Skip the remaining columns and move on to the next tuple, if any.
*/
sum_grow = 1; sum_grow = 1;
break; break;
} }
......
...@@ -363,7 +363,12 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis ...@@ -363,7 +363,12 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
} }
/* /*
* find entry with lowest penalty * Search a page for the entry with lowest penalty.
*
* The index may have multiple columns, and there's a penalty value for each column.
* The penalty associated with a column which appears earlier in the index definition is
* strictly more important than the penalty of column which appears later in the index
* definition.
*/ */
OffsetNumber OffsetNumber
gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
...@@ -389,12 +394,28 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ ...@@ -389,12 +394,28 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
Assert(maxoff >= FirstOffsetNumber); Assert(maxoff >= FirstOffsetNumber);
Assert(!GistPageIsLeaf(p)); Assert(!GistPageIsLeaf(p));
/*
* Loop over tuples on page.
*
* We'll exit early if we find an index key that can accommodate the new key with no
* penalty on any column. sum_grow is used to track this condition. Normally, it is the
* sum of the penalties we've seen for this column so far, which is not a very useful
* quantity in general because the penalties for each column are only considered
* independently, but all we really care about is whether or not it's greater than zero.
* Since penalties can't be negative, the sum of the penalties will be greater than
* zero if and only if at least one penalty was greater than zero. To make things just
* a bit more complicated, we arbitrarily set sum_grow to 1.0 whenever we want to force
* the at least one more iteration of this outer loop. Any non-zero value would serve
* just as well.
*/
for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i)) for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
{ {
int j; int j;
IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
sum_grow = 0; sum_grow = 0;
/* Loop over indexed attribtues. */
for (j = 0; j < r->rd_att->natts; j++) for (j = 0; j < r->rd_att->natts; j++)
{ {
Datum datum; Datum datum;
...@@ -409,16 +430,36 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ ...@@ -409,16 +430,36 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
if (which_grow[j] < 0 || usize < which_grow[j]) if (which_grow[j] < 0 || usize < which_grow[j])
{ {
/*
* We get here in two cases. First, we may have just discovered that the
* current tuple is the best one we've seen so far; that is, for the first
* column for which the penalty is not equal to the best tuple seen so far,
* this one has a lower penalty than the previously-seen one. But, when
* a new best tuple is found, we must record the best penalty value for
* all the remaining columns. We'll end up here for each remaining index
* column in that case, too.
*/
which = i; which = i;
which_grow[j] = usize; which_grow[j] = usize;
if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber) if (j < r->rd_att->natts - 1)
which_grow[j + 1] = -1; which_grow[j + 1] = -1;
sum_grow += which_grow[j]; sum_grow += which_grow[j];
} }
else if (which_grow[j] == usize) else if (which_grow[j] == usize)
{
/*
* The current tuple is exactly as good for this column as the best tuple
* seen so far. The next iteration of this loop will compare the next
* column.
*/
sum_grow += usize; sum_grow += usize;
}
else else
{ {
/*
* The current tuple is worse for this column than the best tuple seen so
* far. Skip the remaining columns and move on to the next tuple, if any.
*/
sum_grow = 1; sum_grow = 1;
break; break;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment