diff --git a/src/backend/utils/adt/array_selfuncs.c b/src/backend/utils/adt/array_selfuncs.c index 3916de4bfb61df8309102a862a9d91e0302bac06..bc4ebd20749997ff3441b7824118dbde320b5524 100644 --- a/src/backend/utils/adt/array_selfuncs.c +++ b/src/backend/utils/adt/array_selfuncs.c @@ -242,8 +242,7 @@ scalararraysel_containment(PlannerInfo *root, } /* - * arraycontsel -- restriction selectivity for "arraycolumn @> const", - * "arraycolumn && const" or "arraycolumn <@ const" + * arraycontsel -- restriction selectivity for array @>, &&, <@ operators */ Datum arraycontsel(PG_FUNCTION_ARGS) @@ -323,8 +322,7 @@ arraycontsel(PG_FUNCTION_ARGS) } /* - * arraycontjoinsel -- join selectivity for "arraycolumn @> const", - * "arraycolumn && const" or "arraycolumn <@ const" + * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators */ Datum arraycontjoinsel(PG_FUNCTION_ARGS) @@ -744,6 +742,10 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem, if (numbers == NULL || nnumbers != nmcelem + 3) return DEFAULT_CONTAIN_SEL; + /* Can't do much without a count histogram, either */ + if (hist == NULL || nhist < 3) + return DEFAULT_CONTAIN_SEL; + /* * Grab some of the summary statistics that compute_array_stats() stores: * lowest frequency, frequency of null elements, and average distinct @@ -751,11 +753,7 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem, */ minfreq = numbers[nmcelem]; nullelem_freq = numbers[nmcelem + 2]; - - if (hist && nhist > 0) - avg_count = hist[nhist - 1]; - else - avg_count = 10.0f; /* default assumption */ + avg_count = hist[nhist - 1]; /* * "rest" will be the sum of the frequencies of all elements not @@ -853,83 +851,71 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem, */ mult *= exp(-rest); - /* Check we have nonempty distinct element count histogram */ - if (hist && nhist >= 3) - { - /*---------- - * Using the distinct element count histogram requires - * O(unique_nitems * (nmcelem + unique_nitems)) - * operations. Beyond a certain computational cost threshold, it's - * reasonable to sacrifice accuracy for decreased planning time. - * We limit the number of operations to EFFORT * nmcelem; since - * nmcelem is limited by the column's statistics target, the work - * done is user-controllable. - * - * If the number of operations would be too large, we can reduce it - * without losing all accuracy by reducing unique_nitems and - * considering only the most-common elements of the constant array. - * To make the results exactly match what we would have gotten with - * only those elements to start with, we'd have to remove any - * discarded elements' frequencies from "mult", but since this is only - * an approximation anyway, we don't bother with that. Therefore it's - * sufficient to qsort elem_selec[] and take the largest elements. - * (They will no longer match up with the elements of array_data[], - * but we don't care.) - *---------- - */ + /*---------- + * Using the distinct element count histogram requires + * O(unique_nitems * (nmcelem + unique_nitems)) + * operations. Beyond a certain computational cost threshold, it's + * reasonable to sacrifice accuracy for decreased planning time. We limit + * the number of operations to EFFORT * nmcelem; since nmcelem is limited + * by the column's statistics target, the work done is user-controllable. + * + * If the number of operations would be too large, we can reduce it + * without losing all accuracy by reducing unique_nitems and considering + * only the most-common elements of the constant array. To make the + * results exactly match what we would have gotten with only those + * elements to start with, we'd have to remove any discarded elements' + * frequencies from "mult", but since this is only an approximation + * anyway, we don't bother with that. Therefore it's sufficient to qsort + * elem_selec[] and take the largest elements. (They will no longer match + * up with the elements of array_data[], but we don't care.) + *---------- + */ #define EFFORT 100 - if ((nmcelem + unique_nitems) > 0 && - unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems)) - { - /* - * Use the quadratic formula to solve for largest allowable N; - * we have A = 1, B = nmcelem, C = - EFFORT * nmcelem. - */ - double b = (double) nmcelem; - int n; - - n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2); - - /* Sort, then take just the first n elements */ - qsort(elem_selec, unique_nitems, sizeof(float), - float_compare_desc); - unique_nitems = n; - } - + if ((nmcelem + unique_nitems) > 0 && + unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems)) + { /* - * Calculate probabilities of each distinct element count for both - * mcelems and constant elements. At this point, assume independent - * element occurrence. + * Use the quadratic formula to solve for largest allowable N. We + * have A = 1, B = nmcelem, C = - EFFORT * nmcelem. */ - dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f); - mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest); + double b = (double) nmcelem; + int n; - /* ignore hist[nhist-1], which is the avg not a histogram member */ - hist_part = calc_hist(hist, nhist - 1, unique_nitems); + n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2); - selec = 0.0f; - for (i = 0; i <= unique_nitems; i++) - { - /* - * mult * dist[i] / mcelem_dist[i] gives us probability of qual - * matching from assumption of independent element occurrence with - * the condition that distinct element count = i. - */ - if (mcelem_dist[i] > 0) - selec += hist_part[i] * mult * dist[i] / mcelem_dist[i]; - } - - pfree(dist); - pfree(mcelem_dist); - pfree(hist_part); + /* Sort, then take just the first n elements */ + qsort(elem_selec, unique_nitems, sizeof(float), + float_compare_desc); + unique_nitems = n; } - else + + /* + * Calculate probabilities of each distinct element count for both + * mcelems and constant elements. At this point, assume independent + * element occurrence. + */ + dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f); + mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest); + + /* ignore hist[nhist-1], which is the average not a histogram member */ + hist_part = calc_hist(hist, nhist - 1, unique_nitems); + + selec = 0.0f; + for (i = 0; i <= unique_nitems; i++) { - /* We don't have histogram. Use a rough estimate. */ - selec = mult; + /* + * mult * dist[i] / mcelem_dist[i] gives us probability of qual + * matching from assumption of independent element occurrence with + * the condition that distinct element count = i. + */ + if (mcelem_dist[i] > 0) + selec += hist_part[i] * mult * dist[i] / mcelem_dist[i]; } + pfree(dist); + pfree(mcelem_dist); + pfree(hist_part); pfree(elem_selec); /* Take into account occurrence of NULL element. */