From 337b6f5ecf05b21b5e997986884d097d60e4e3d0 Mon Sep 17 00:00:00 2001 From: Robert Haas <rhaas@postgresql.org> Date: Wed, 15 Feb 2012 12:13:32 -0500 Subject: [PATCH] Speed up in-memory tuplesorting. Per recent work by Peter Geoghegan, it's significantly faster to tuplesort on a single sortkey if ApplySortComparator is inlined into quicksort rather reached via a function pointer. It's also faster in general to have a version of quicksort which is specialized for sorting SortTuple objects rather than objects of arbitrary size and type. This requires a couple of additional copies of the quicksort logic, which in this patch are generate using a Perl script. There might be some benefit in adding further specializations here too, but thus far it's not clear that those gains are worth their weight in code footprint. --- src/backend/Makefile | 4 +- src/backend/utils/sort/.gitignore | 1 + src/backend/utils/sort/Makefile | 8 + src/backend/utils/sort/gen_qsort_tuple.pl | 232 ++++++++++++++++++++++ src/backend/utils/sort/tuplesort.c | 68 ++++--- src/port/qsort.c | 2 +- src/port/qsort_arg.c | 2 +- src/tools/msvc/Solution.pm | 8 + 8 files changed, 289 insertions(+), 36 deletions(-) create mode 100644 src/backend/utils/sort/.gitignore create mode 100644 src/backend/utils/sort/gen_qsort_tuple.pl diff --git a/src/backend/Makefile b/src/backend/Makefile index 0c763dd3758..01bb6e1171d 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -202,6 +202,7 @@ distprep: $(MAKE) -C replication repl_gram.c repl_scanner.c $(MAKE) -C utils fmgrtab.c fmgroids.h errcodes.h $(MAKE) -C utils/misc guc-file.c + $(MAKE) -C utils/sort qsort_tuple.c ########################################################################## @@ -315,7 +316,8 @@ maintainer-clean: distclean utils/fmgroids.h \ utils/fmgrtab.c \ utils/errcodes.h \ - utils/misc/guc-file.c + utils/misc/guc-file.c \ + utils/misc/qsort_tuple.c ########################################################################## diff --git a/src/backend/utils/sort/.gitignore b/src/backend/utils/sort/.gitignore new file mode 100644 index 00000000000..f2958633e61 --- /dev/null +++ b/src/backend/utils/sort/.gitignore @@ -0,0 +1 @@ +/qsort_tuple.c diff --git a/src/backend/utils/sort/Makefile b/src/backend/utils/sort/Makefile index 2ef4965ee6d..f46ce416105 100644 --- a/src/backend/utils/sort/Makefile +++ b/src/backend/utils/sort/Makefile @@ -14,4 +14,12 @@ include $(top_builddir)/src/Makefile.global OBJS = logtape.o sortsupport.o tuplesort.o tuplestore.o +tuplesort.o: qsort_tuple.c + +qsort_tuple.c: gen_qsort_tuple.pl + $(PERL) $(srcdir)/gen_qsort_tuple.pl $< > $@ + include $(top_srcdir)/src/backend/common.mk + +maintainer-clean: + rm -f qsort_tuple.c diff --git a/src/backend/utils/sort/gen_qsort_tuple.pl b/src/backend/utils/sort/gen_qsort_tuple.pl new file mode 100644 index 00000000000..40d55488f1a --- /dev/null +++ b/src/backend/utils/sort/gen_qsort_tuple.pl @@ -0,0 +1,232 @@ +#!/usr/bin/perl -w + +# +# gen_qsort_tuple.pl +# +# This script generates specialized versions of the quicksort algorithm for +# tuple sorting. The quicksort code is derived from the NetBSD code. The +# code generated by this script runs significantly faster than vanilla qsort +# when used to sort tuples. This speedup comes from a number of places. +# The major effects are (1) inlining simple tuple comparators is much faster +# than jumping through a function pointer and (2) swap and vecswap operations +# specialized to the particular data type of interest (in this case, SortTuple) +# are faster than the generic routines. +# +# Modifications from vanilla NetBSD source: +# Add do ... while() macro fix +# Remove __inline, _DIAGASSERTs, __P +# Remove ill-considered "swap_cnt" switch to insertion sort, +# in favor of a simple check for presorted input. +# Instead of sorting arbitrary objects, we're always sorting SortTuples +# Add CHECK_FOR_INTERRUPTS() +# +# CAUTION: if you change this file, see also qsort.c and qsort_arg.c +# + +use strict; + +my $SUFFIX; +my $EXTRAARGS; +my $EXTRAPARAMS; +my $CMPPARAMS; + +emit_qsort_boilerplate(); + +$SUFFIX = 'tuple'; +$EXTRAARGS = ', SortTupleComparator cmp_tuple, Tuplesortstate *state'; +$EXTRAPARAMS = ', cmp_tuple, state'; +$CMPPARAMS = ', state'; +emit_qsort_implementation(); + +$SUFFIX = 'ssup'; +$EXTRAARGS = ', SortSupport ssup'; +$EXTRAPARAMS = ', ssup'; +$CMPPARAMS = ', ssup'; +print <<'EOM'; +#define cmp_ssup(a, b, ssup) \ + ApplySortComparator((a)->datum1, (a)->isnull1, \ + (b)->datum1, (b)->isnull1, ssup) +EOM +emit_qsort_implementation(); + +sub emit_qsort_boilerplate +{ + print <<'EOM'; +/* + * autogenerated by src/backend/utils/sort/gen_qsort_tuple.pl, do not edit + * This file is included by tuplesort.c, rather than compiled separately. + */ + +/* $NetBSD: qsort.c,v 1.13 2003/08/07 16:43:42 agc Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Qsort routine based on J. L. Bentley and M. D. McIlroy, + * "Engineering a sort function", + * Software--Practice and Experience 23 (1993) 1249-1265. + * We have modified their original by adding a check for already-sorted input, + * which seems to be a win per discussions on pgsql-hackers around 2006-03-21. + */ + +static void +swapfunc(SortTuple *a, SortTuple *b, size_t n) +{ + do + { + SortTuple t = *a; + *a++ = *b; + *b++ = t; + } while (--n > 0); +} + +#define swap(a, b) \ + do { \ + SortTuple t = *(a); \ + *(a) = *(b); \ + *(b) = t; \ + } while (0); + +#define vecswap(a, b, n) if ((n) > 0) swapfunc((a), (b), (size_t)(n)) +EOM +} + +sub emit_qsort_implementation +{ + print <<EOM; +static SortTuple * +med3_$SUFFIX(SortTuple *a, SortTuple *b, SortTuple *c$EXTRAARGS) +{ + return cmp_$SUFFIX(a, b$CMPPARAMS) < 0 ? + (cmp_$SUFFIX(b, c$CMPPARAMS) < 0 ? b : + (cmp_$SUFFIX(a, c$CMPPARAMS) < 0 ? c : a)) + : (cmp_$SUFFIX(b, c$CMPPARAMS) > 0 ? b : + (cmp_$SUFFIX(a, c$CMPPARAMS) < 0 ? a : c)); +} + +static void +qsort_$SUFFIX(SortTuple *a, size_t n$EXTRAARGS) +{ + SortTuple *pa, + *pb, + *pc, + *pd, + *pl, + *pm, + *pn; + int d, + r, + presorted; + +loop: + CHECK_FOR_INTERRUPTS(); + if (n < 7) + { + for (pm = a + 1; pm < a + n; pm++) + for (pl = pm; pl > a && cmp_$SUFFIX(pl - 1, pl$CMPPARAMS) > 0; pl--) + swap(pl, pl - 1); + return; + } + presorted = 1; + for (pm = a + 1; pm < a + n; pm++) + { + CHECK_FOR_INTERRUPTS(); + if (cmp_$SUFFIX(pm - 1, pm$CMPPARAMS) > 0) + { + presorted = 0; + break; + } + } + if (presorted) + return; + pm = a + (n / 2); + if (n > 7) + { + pl = a; + pn = a + (n - 1); + if (n > 40) + { + d = (n / 8); + pl = med3_$SUFFIX(pl, pl + d, pl + 2 * d$EXTRAPARAMS); + pm = med3_$SUFFIX(pm - d, pm, pm + d$EXTRAPARAMS); + pn = med3_$SUFFIX(pn - 2 * d, pn - d, pn$EXTRAPARAMS); + } + pm = med3_$SUFFIX(pl, pm, pn$EXTRAPARAMS); + } + swap(a, pm); + pa = pb = a + 1; + pc = pd = a + (n - 1); + for (;;) + { + while (pb <= pc && (r = cmp_$SUFFIX(pb, a$CMPPARAMS)) <= 0) + { + CHECK_FOR_INTERRUPTS(); + if (r == 0) + { + swap(pa, pb); + pa++; + } + pb++; + } + while (pb <= pc && (r = cmp_$SUFFIX(pc, a$CMPPARAMS)) >= 0) + { + CHECK_FOR_INTERRUPTS(); + if (r == 0) + { + swap(pc, pd); + pd--; + } + pc--; + } + if (pb > pc) + break; + swap(pb, pc); + pb++; + pc--; + } + pn = a + n; + r = Min(pa - a, pb - pa); + vecswap(a, pb - r, r); + r = Min(pd - pc, pn - pd - 1); + vecswap(pb, pn - r, r); + if ((r = pb - pa) > 1) + qsort_$SUFFIX(a, r$EXTRAPARAMS); + if ((r = pd - pc) > 1) + { + /* Iterate rather than recurse to save stack space */ + a = pn - r; + n = r; + goto loop; + } +/* qsort_$SUFFIX(pn - r, r$EXTRAPARAMS);*/ +} + +EOM +} diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 1452e8c7cfc..10b19c0b21f 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -195,6 +195,9 @@ typedef enum #define TAPE_BUFFER_OVERHEAD (BLCKSZ * 3) #define MERGE_BUFFER_SIZE (BLCKSZ * 32) +typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); + /* * Private state of a Tuplesort operation. */ @@ -223,8 +226,7 @@ struct Tuplesortstate * <0, 0, >0 according as a<b, a=b, a>b. The API must match * qsort_arg_comparator. */ - int (*comparetup) (const SortTuple *a, const SortTuple *b, - Tuplesortstate *state); + SortTupleComparator comparetup; /* * Function to copy a supplied input tuple into palloc'd space and set up @@ -363,12 +365,14 @@ struct Tuplesortstate /* These are specific to the index_hash subcase: */ uint32 hash_mask; /* mask for sortable part of hash code */ + /* This is initialized when, and only when, there's just one key. */ + SortSupport onlyKey; + /* * These variables are specific to the Datum case; they are set by * tuplesort_begin_datum and used only by the DatumTuple routines. */ Oid datumType; - SortSupport datumKey; /* we need typelen and byval in order to know how to copy the Datums. */ int datumTypeLen; bool datumTypeByVal; @@ -492,6 +496,11 @@ static void readtup_datum(Tuplesortstate *state, SortTuple *stup, static void reversedirection_datum(Tuplesortstate *state); static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup); +/* + * Special version of qsort, just for SortTuple objects. + */ +#include "qsort_tuple.c" + /* * tuplesort_begin_xxx @@ -631,6 +640,9 @@ tuplesort_begin_heap(TupleDesc tupDesc, PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); } + if (nkeys == 1) + state->onlyKey = state->sortKeys; + MemoryContextSwitchTo(oldcontext); return state; @@ -809,13 +821,13 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, state->datumType = datumType; /* Prepare SortSupport data */ - state->datumKey = (SortSupport) palloc0(sizeof(SortSupportData)); + state->onlyKey = (SortSupport) palloc0(sizeof(SortSupportData)); - state->datumKey->ssup_cxt = CurrentMemoryContext; - state->datumKey->ssup_collation = sortCollation; - state->datumKey->ssup_nulls_first = nullsFirstFlag; + state->onlyKey->ssup_cxt = CurrentMemoryContext; + state->onlyKey->ssup_collation = sortCollation; + state->onlyKey->ssup_nulls_first = nullsFirstFlag; - PrepareSortSupportFromOrderingOp(sortOperator, state->datumKey); + PrepareSortSupportFromOrderingOp(sortOperator, state->onlyKey); /* lookup necessary attributes of the datum type */ get_typlenbyval(datumType, &typlen, &typbyval); @@ -1222,11 +1234,16 @@ tuplesort_performsort(Tuplesortstate *state) * amount of memory. Just qsort 'em and we're done. */ if (state->memtupcount > 1) - qsort_arg((void *) state->memtuples, - state->memtupcount, - sizeof(SortTuple), - (qsort_arg_comparator) state->comparetup, - (void *) state); + { + if (state->onlyKey != NULL) + qsort_ssup(state->memtuples, state->memtupcount, + state->onlyKey); + else + qsort_tuple(state->memtuples, + state->memtupcount, + state->comparetup, + state); + } state->current = 0; state->eof_reached = false; state->markpos_offset = 0; @@ -2660,9 +2677,6 @@ comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) int nkey; int32 compare; - /* Allow interrupting long sorts */ - CHECK_FOR_INTERRUPTS(); - /* Compare the leading sort key */ compare = ApplySortComparator(a->datum1, a->isnull1, b->datum1, b->isnull1, @@ -2804,9 +2818,6 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b, int nkey; int32 compare; - /* Allow interrupting long sorts */ - CHECK_FOR_INTERRUPTS(); - /* Compare the leading sort key, if it's simple */ if (state->indexInfo->ii_KeyAttrNumbers[0] != 0) { @@ -2995,9 +3006,6 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b, int nkey; int32 compare; - /* Allow interrupting long sorts */ - CHECK_FOR_INTERRUPTS(); - /* Compare the leading sort key */ compare = inlineApplySortFunction(&scanKey->sk_func, scanKey->sk_flags, scanKey->sk_collation, @@ -3102,9 +3110,6 @@ comparetup_index_hash(const SortTuple *a, const SortTuple *b, IndexTuple tuple1; IndexTuple tuple2; - /* Allow interrupting long sorts */ - CHECK_FOR_INTERRUPTS(); - /* * Fetch hash keys and mask off bits we don't want to sort by. We know * that the first column of the index tuple is the hash key. @@ -3231,12 +3236,9 @@ reversedirection_index_hash(Tuplesortstate *state) static int comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) { - /* Allow interrupting long sorts */ - CHECK_FOR_INTERRUPTS(); - - return ApplySortComparator(a->datum1, a->isnull1, - b->datum1, b->isnull1, - state->datumKey); + /* Not currently needed */ + elog(ERROR, "comparetup_datum() should not be called"); + return 0; } static void @@ -3328,8 +3330,8 @@ readtup_datum(Tuplesortstate *state, SortTuple *stup, static void reversedirection_datum(Tuplesortstate *state) { - state->datumKey->ssup_reverse = !state->datumKey->ssup_reverse; - state->datumKey->ssup_nulls_first = !state->datumKey->ssup_nulls_first; + state->onlyKey->ssup_reverse = !state->onlyKey->ssup_reverse; + state->onlyKey->ssup_nulls_first = !state->onlyKey->ssup_nulls_first; } /* diff --git a/src/port/qsort.c b/src/port/qsort.c index 8e2c6d92c2d..49d8fa7ab6c 100644 --- a/src/port/qsort.c +++ b/src/port/qsort.c @@ -7,7 +7,7 @@ * Remove ill-considered "swap_cnt" switch to insertion sort, * in favor of a simple check for presorted input. * - * CAUTION: if you change this file, see also qsort_arg.c + * CAUTION: if you change this file, see also qsort_arg.c, gen_qsort_tuple.pl * * src/port/qsort.c */ diff --git a/src/port/qsort_arg.c b/src/port/qsort_arg.c index 28d1894992b..3091eb09ead 100644 --- a/src/port/qsort_arg.c +++ b/src/port/qsort_arg.c @@ -7,7 +7,7 @@ * Remove ill-considered "swap_cnt" switch to insertion sort, * in favor of a simple check for presorted input. * - * CAUTION: if you change this file, see also qsort.c + * CAUTION: if you change this file, see also qsort.c, gen_qsort_tuple.pl * * src/port/qsort_arg.c */ diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm index 1725fbb5330..e1d85c85ad0 100644 --- a/src/tools/msvc/Solution.pm +++ b/src/tools/msvc/Solution.pm @@ -287,6 +287,14 @@ s{PG_VERSION_STR "[^"]+"}{__STRINGIFY(x) #x\n#define __STRINGIFY2(z) __STRINGIFY ); } + if (IsNewer('src\backend\utils\sort\qsort_tuple.c','src\backend\utils\sort\gen_qsort_tuple.pl')) + { + print "Generating qsort_tuple.c...\n"; + system( +'perl src\backend\utils\sort\gen_qsort_tuple.pl > src\backend\utils\sort\qsort_tuple.c' + ); + } + if (IsNewer('src\interfaces\libpq\libpq.rc','src\interfaces\libpq\libpq.rc.in')) { print "Generating libpq.rc...\n"; -- GitLab