diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index 97af54644565c2b091fe8cf4f4fa35fb1bb69e1c..93f3411a2d0c21ffcd8ebf0e6275147487693311 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.32 2010/01/01 21:53:49 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.33 2010/02/08 04:33:51 tgl Exp $ --> <chapter id="indexam"> <title>Index Access Method Interface Definition</title> @@ -577,11 +577,10 @@ amrestrpos (IndexScanDesc scan); The core <productname>PostgreSQL</productname> system obtains <literal>AccessShareLock</> on the index during an index scan, and <literal>RowExclusiveLock</> when updating the index (including plain - <command>VACUUM</>). Since these lock - types do not conflict, the access method is responsible for handling any - fine-grained locking it might need. An exclusive lock on the index as a whole - will be taken only during index creation, destruction, - <command>REINDEX</>, or <command>VACUUM FULL</>. + <command>VACUUM</>). Since these lock types do not conflict, the access + method is responsible for handling any fine-grained locking it might need. + An exclusive lock on the index as a whole will be taken only during index + creation, destruction, or <command>REINDEX</>. </para> <para> diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index e6c9d9cc4a972c78ba0cf42c66ba1d263a1960a0..ecec84cc2d927bb83d5ad50e6937f0170d07c30c 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/maintenance.sgml,v 1.98 2010/02/03 17:25:05 momjian Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/maintenance.sgml,v 1.99 2010/02/08 04:33:51 tgl Exp $ --> <chapter id="maintenance"> <title>Routine Database Maintenance Tasks</title> @@ -123,9 +123,7 @@ <command>ALTER TABLE</command> while it is being vacuumed.) <command>VACUUM FULL</> requires exclusive lock on the table it is working on, and therefore cannot be done in parallel with other use - of the table. Another disadvantage of <command>VACUUM FULL</> is that - while it reduces table size, it does not reduce index size proportionally; - in fact it can make indexes <emphasis>larger</>. Generally, therefore, + of the table. Generally, therefore, administrators should strive to use standard <command>VACUUM</> and avoid <command>VACUUM FULL</>. </para> @@ -166,13 +164,10 @@ system, except in the special case where one or more pages at the end of a table become entirely free and an exclusive table lock can be easily obtained. In contrast, <command>VACUUM FULL</> actively compacts - tables by moving row versions to earlier pages. It is thus able to - force pages at the end of the table to become entirely free, whereupon - it will return them to the operating system. However, if many rows - must be moved, this can take a long time. Also, moving a row requires - transiently making duplicate index entries for it (the entry pointing - to its new location must be made before the old entry can be removed); - so moving a lot of rows this way causes severe index bloat. + tables by writing a complete new version of the table file with no dead + space. This minimizes the size of the table, but can take a long time. + It also requires extra disk space for the new copy of the table, until + the operation completes. </para> <para> @@ -220,20 +215,19 @@ <tip> <para> - Neither form of <command>VACUUM</> is entirely satisfactory when + Plain <command>VACUUM</> may not be satisfactory when a table contains large numbers of dead row versions as a result of massive update or delete activity. If you have such a table and - you need to reclaim the excess disk space it occupies, the best - way is to use <xref linkend="sql-cluster" endterm="sql-cluster-title"> + you need to reclaim the excess disk space it occupies, you will need + to use <command>VACUUM FULL</>, or alternatively + <xref linkend="sql-cluster" endterm="sql-cluster-title"> or one of the table-rewriting variants of <xref linkend="sql-altertable" endterm="sql-altertable-title">. These commands rewrite an entire new copy of the table and build - new indexes for it. Like <command>VACUUM FULL</>, they require - exclusive lock. Note that they also temporarily use extra disk - space, since the old copies of the table and indexes can't be - released until the new ones are complete. In the worst case where - your disk is nearly full, <command>VACUUM FULL</> may be the only - workable alternative. + new indexes for it. All these options require exclusive lock. Note that + they also temporarily use extra disk space approximately equal to the size + of the table, since the old copies of the table and indexes can't be + released until the new ones are complete. </para> </tip> @@ -579,22 +573,22 @@ HINT: Stop the postmaster and use a standalone backend to VACUUM in "mydb". <firstterm>autovacuum launcher</firstterm>, which is in charge of starting <firstterm>autovacuum worker</firstterm> processes for all databases. The launcher will distribute the work across time, attempting to start one - worker on each database every <xref linkend="guc-autovacuum-naptime"> - seconds. One worker will be launched for each database, with a maximum - of <xref linkend="guc-autovacuum-max-workers"> processes running at the - same time. If there are more than - <xref linkend="guc-autovacuum-max-workers"> databases to be processed, + worker within each database every <xref linkend="guc-autovacuum-naptime"> + seconds. (Therefore, if the installation has <replaceable>N</> databases, + a new worker will be launched every + <varname>autovacuum_naptime</>/<replaceable>N</> seconds.) + A maximum of <xref linkend="guc-autovacuum-max-workers"> worker processes + are allowed to run at the same time. If there are more than + <varname>autovacuum_max_workers</> databases to be processed, the next database will be processed as soon as the first worker finishes. Each worker process will check each table within its database and execute <command>VACUUM</> and/or <command>ANALYZE</> as needed. </para> <para> - The <xref linkend="guc-autovacuum-max-workers"> setting limits how many - workers may be running at any time. If several large tables all become - eligible for vacuuming in a short amount of time, all autovacuum workers - might become occupied with vacuuming those tables for a long period. - This would result + If several large tables all become eligible for vacuuming in a short + amount of time, all autovacuum workers might become occupied with + vacuuming those tables for a long period. This would result in other tables and databases not being vacuumed until a worker became available. There is no limit on how many workers might be in a single database, but workers do try to avoid repeating work that has @@ -700,8 +694,8 @@ analyze threshold = analyze base threshold + analyze scale factor * number of tu </para> <para> - Index pages that have become - completely empty are reclaimed for re-use. However, here is still the possibility + B-tree index pages that have become completely empty are reclaimed for + re-use. However, there is still a possibility of inefficient use of space: if all but a few index keys on a page have been deleted, the page remains allocated. Therefore, a usage pattern in which most, but not all, keys in each range are eventually diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml index 07559e38c42eefe787ca04211f5d150c630fcf5e..64b6f5e23e614c70d5caabc6886329d12941009f 100644 --- a/doc/src/sgml/ref/vacuum.sgml +++ b/doc/src/sgml/ref/vacuum.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/ref/vacuum.sgml,v 1.57 2010/01/06 05:31:13 itagaki Exp $ +$PostgreSQL: pgsql/doc/src/sgml/ref/vacuum.sgml,v 1.58 2010/02/08 04:33:51 tgl Exp $ PostgreSQL documentation --> @@ -21,7 +21,7 @@ PostgreSQL documentation <refsynopsisdiv> <synopsis> -VACUUM [ ( { FULL [ INPLACE ] | FREEZE | VERBOSE | ANALYZE } [, ...] ) ] [ <replaceable class="PARAMETER">table</replaceable> [ (<replaceable class="PARAMETER">column</replaceable> [, ...] ) ] ] +VACUUM [ ( { FULL | FREEZE | VERBOSE | ANALYZE } [, ...] ) ] [ <replaceable class="PARAMETER">table</replaceable> [ (<replaceable class="PARAMETER">column</replaceable> [, ...] ) ] ] VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> ] VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER">table</replaceable> [ (<replaceable class="PARAMETER">column</replaceable> [, ...] ) ] ] </synopsis> @@ -58,11 +58,12 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER"> space and makes it available for re-use. This form of the command can operate in parallel with normal reading and writing of the table, as an exclusive lock - is not obtained. <command>VACUUM - FULL</command> does more extensive processing, including moving of tuples - across blocks to try to compact the table to the minimum number of disk - blocks. This form is much slower and requires an exclusive lock on each - table while it is being processed. + is not obtained. However, extra space is not returned to the operating + system (in most cases); it's just kept available for re-use within the + same table. <command>VACUUM FULL</command> rewrites the entire contents + of the table into a new disk file with no extra space, allowing unused + space to be returned to the operating system. This form is much slower and + requires an exclusive lock on each table while it is being processed. </para> <para> @@ -85,27 +86,10 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER"> <para> Selects <quote>full</quote> vacuum, which can reclaim more space, but takes much longer and exclusively locks the table. - </para> - <para> - For user tables, all table data and indexes are rewritten. This - method requires extra disk space in which to write the new data, - and is generally useful when a significant amount of space needs - to be reclaimed from within the table. - </para> - <para> - For system tables, all table data and indexes are modified in - place to reclaim space. This method may require less disk space - for the table data than <command>VACUUM FULL</command> on a - comparable user table, but the indexes will grow which may - counteract that benefit. Additionally, the operation is often - slower than <command>VACUUM FULL</command> on a comparable user - table. - </para> - <para> - If <literal>FULL INPLACE</literal> is specified, the space is - reclaimed in the same manner as a system table, even if it is a - user table. Specifying <literal>INPLACE</literal> explicitly is - rarely useful. + This method also requires extra disk space, since it writes a + new copy of the table and doesn't release the old copy until + the operation is complete. Usually this should only be used when a + significant amount of space needs to be reclaimed from within the table. </para> </listitem> </varlistentry> @@ -217,10 +201,7 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER"> or updated most of the rows in a table and would like the table to physically shrink to occupy less disk space and allow faster table scans. <command>VACUUM FULL</command> will usually shrink the table - more than a plain <command>VACUUM</command> would. The - <option>FULL</option> option does not shrink indexes; a periodic - <command>REINDEX</> is still recommended. In fact, it is often faster - to drop all indexes, <command>VACUUM FULL</>, and recreate the indexes. + more than a plain <command>VACUUM</command> would. </para> <para> diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml index b276c5df006aedd8ec46abee4f257554a0d8d6f8..20fb233a4c657ad236cbe9a897682a6f8972216a 100644 --- a/doc/src/sgml/ref/vacuumdb.sgml +++ b/doc/src/sgml/ref/vacuumdb.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/ref/vacuumdb.sgml,v 1.49 2010/01/07 14:35:44 momjian Exp $ +$PostgreSQL: pgsql/doc/src/sgml/ref/vacuumdb.sgml,v 1.50 2010/02/08 04:33:51 tgl Exp $ PostgreSQL documentation --> @@ -24,7 +24,6 @@ PostgreSQL documentation <command>vacuumdb</command> <arg rep="repeat"><replaceable>connection-option</replaceable></arg> <group><arg>--full</arg><arg>-f</arg></group> - <group><arg>--inplace</arg><arg>-i</arg></group> <group><arg>--freeze</arg><arg>-F</arg></group> <group><arg>--verbose</arg><arg>-v</arg></group> <group><arg>--analyze</arg><arg>-z</arg></group> @@ -38,14 +37,12 @@ PostgreSQL documentation <arg rep="repeat"><replaceable>connection-options</replaceable></arg> <group><arg>--all</arg><arg>-a</arg></group> <group><arg>--full</arg><arg>-f</arg></group> - <group><arg>--inplace</arg><arg>-i</arg></group> <group><arg>--freeze</arg><arg>-F</arg></group> <group><arg>--verbose</arg><arg>-v</arg></group> <group><arg>--analyze</arg><arg>-z</arg></group> <group><arg>--analyze-only</arg><arg>-Z</arg></group> </cmdsynopsis> </refsynopsisdiv> - <refsect1> <title>Description</title> @@ -60,8 +57,8 @@ PostgreSQL documentation <para> <application>vacuumdb</application> is a wrapper around the SQL command <xref linkend="SQL-VACUUM" endterm="SQL-VACUUM-title">. - There is no effective difference between vacuuming and analyzing - databases via this utility and via other methods for accessing the + There is no effective difference between vacuuming and analyzing + databases via this utility and via other methods for accessing the server. </para> @@ -73,7 +70,6 @@ PostgreSQL documentation <para> <application>vacuumdb</application> accepts the following command-line arguments: - <variablelist> <varlistentry> <term><option>-a</option></term> @@ -131,16 +127,6 @@ PostgreSQL documentation </listitem> </varlistentry> - <varlistentry> - <term><option>-i</option></term> - <term><option>--inplace</option></term> - <listitem> - <para> - Perform <quote>full inplace</quote> vacuuming. - </para> - </listitem> - </varlistentry> - <varlistentry> <term><option>-Z</option></term> <term><option>--analyze-only</option></term> @@ -203,18 +189,16 @@ PostgreSQL documentation </para> <para> - <application>vacuumdb</application> also accepts + <application>vacuumdb</application> also accepts the following command-line arguments for connection parameters: - <variablelist> <varlistentry> <term><option>-h <replaceable class="parameter">host</replaceable></></term> <term><option>--host <replaceable class="parameter">host</replaceable></></term> <listitem> <para> - Specifies the host name of the machine on which the - server - is running. If the value begins with a slash, it is used + Specifies the host name of the machine on which the server + is running. If the value begins with a slash, it is used as the directory for the Unix domain socket. </para> </listitem> @@ -225,7 +209,7 @@ PostgreSQL documentation <term><option>--port <replaceable class="parameter">port</replaceable></></term> <listitem> <para> - Specifies the TCP port or local Unix domain socket file + Specifies the TCP port or local Unix domain socket file extension on which the server is listening for connections. </para> @@ -263,7 +247,7 @@ PostgreSQL documentation <listitem> <para> Force <application>vacuumdb</application> to prompt for a - password before connecting to a database. + password before connecting to a database. </para> <para> diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README index af65efcb542e560acff0a750da2d694328826f37..cd406935e05505499a3eeaa50a1cfe0b5e32b826 100644 --- a/src/backend/access/gin/README +++ b/src/backend/access/gin/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/access/gin/README,v 1.6 2008/07/08 03:25:42 neilc Exp $ +$PostgreSQL: pgsql/src/backend/access/gin/README,v 1.7 2010/02/08 04:33:52 tgl Exp $ Gin for PostgreSQL ================== @@ -98,13 +98,6 @@ We appreciate any comments, help and suggestions. * Teach optimizer/executor that GIN is intrinsically clustered. i.e., it always returns ItemPointer in ascending order. * Tweak gincostestimate. - * GIN stores several ItemPointer to heap tuple, so VACUUM FULL produces - this warning message: - - WARNING: index "idx" contains 88395 row versions, but table contains - 51812 row versions - HINT: Rebuild the index with REINDEX. - **** Workaround added TODO ---- diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 014db7fb739f10d12b9acfd399164d1de1c0bffd..a13b99cdfaccd9eb2183ee9debc0103ac25aa910 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.32 2010/01/02 16:57:33 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.33 2010/02/08 04:33:52 tgl Exp $ *------------------------------------------------------------------------- */ @@ -745,13 +745,9 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) stats->estimated_count = info->estimated_count; /* - * If vacuum full, we already have exclusive lock on the index. Otherwise, - * need lock unless it's local to this backend. + * Need lock unless it's local to this backend. */ - if (info->vacuum_full) - needLock = false; - else - needLock = !RELATION_IS_LOCAL(index); + needLock = !RELATION_IS_LOCAL(index); if (needLock) LockRelationForExtension(index, ExclusiveLock); @@ -785,15 +781,6 @@ ginvacuumcleanup(PG_FUNCTION_ARGS) } lastBlock = npages - 1; - if (info->vacuum_full && lastBlock > lastFilledBlock) - { - /* try to truncate index */ - RelationTruncate(index, lastFilledBlock + 1); - - stats->pages_removed = lastBlock - lastFilledBlock; - totFreePages = totFreePages - stats->pages_removed; - } - /* Finally, vacuum the FSM */ IndexFreeSpaceMapVacuum(info->index); diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index af26fb0311f94e80d4a231a66150c176b601f6be..178ef40a35d32eda4ad83654c56ec2ec58e361f1 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.46 2010/01/02 16:57:34 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.47 2010/02/08 04:33:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,7 +29,7 @@ typedef struct GistBulkDeleteResult { IndexBulkDeleteResult std; /* common state */ - bool needFullVacuum; + bool needReindex; } GistBulkDeleteResult; typedef struct @@ -496,12 +496,8 @@ gistVacuumUpdate(GistVacuum *gv, BlockNumber blkno, bool needunion) } /* - * For usual vacuum just update FSM, for full vacuum - * reforms parent tuples if some of childs was deleted or changed, - * update invalid tuples (they can exist from last crash recovery only), - * tries to get smaller index + * VACUUM cleanup: update FSM */ - Datum gistvacuumcleanup(PG_FUNCTION_ARGS) { @@ -533,47 +529,15 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) */ } - /* gistVacuumUpdate may cause hard work */ - if (info->vacuum_full) - { - GistVacuum gv; - ArrayTuple res; - - /* note: vacuum.c already acquired AccessExclusiveLock on index */ - - gv.index = rel; - initGISTstate(&(gv.giststate), rel); - gv.opCtx = createTempGistContext(); - gv.result = stats; - gv.strategy = info->strategy; - - /* walk through the entire index for update tuples */ - res = gistVacuumUpdate(&gv, GIST_ROOT_BLKNO, false); - /* cleanup */ - if (res.itup) - { - int i; - - for (i = 0; i < res.ituplen; i++) - pfree(res.itup[i]); - pfree(res.itup); - } - freeGISTstate(&(gv.giststate)); - MemoryContextDelete(gv.opCtx); - } - else if (stats->needFullVacuum) + if (stats->needReindex) ereport(NOTICE, (errmsg("index \"%s\" needs VACUUM FULL or REINDEX to finish crash recovery", RelationGetRelationName(rel)))); /* - * If vacuum full, we already have exclusive lock on the index. Otherwise, - * need lock unless it's local to this backend. + * Need lock unless it's local to this backend. */ - if (info->vacuum_full) - needLock = false; - else - needLock = !RELATION_IS_LOCAL(rel); + needLock = !RELATION_IS_LOCAL(rel); /* try to find deleted pages */ if (needLock) @@ -606,14 +570,6 @@ gistvacuumcleanup(PG_FUNCTION_ARGS) } lastBlock = npages - 1; - if (info->vacuum_full && lastFilledBlock < lastBlock) - { /* try to truncate index */ - RelationTruncate(rel, lastFilledBlock + 1); - - stats->std.pages_removed = lastBlock - lastFilledBlock; - totFreePages = totFreePages - stats->std.pages_removed; - } - /* Finally, vacuum the FSM */ IndexFreeSpaceMapVacuum(info->index); @@ -799,7 +755,7 @@ gistbulkdelete(PG_FUNCTION_ARGS) stack->next = ptr; if (GistTupleIsInvalid(idxtuple)) - stats->needFullVacuum = true; + stats->needReindex = true; } } diff --git a/src/backend/access/heap/README.HOT b/src/backend/access/heap/README.HOT index 76ac83722f5f37d7b49e7192f15a679bd1198488..fb3e9d31ad29748690bb2116be1f57bad798ab39 100644 --- a/src/backend/access/heap/README.HOT +++ b/src/backend/access/heap/README.HOT @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/access/heap/README.HOT,v 1.4 2008/10/02 20:59:31 momjian Exp $ +$PostgreSQL: pgsql/src/backend/access/heap/README.HOT,v 1.5 2010/02/08 04:33:52 tgl Exp $ Heap Only Tuples (HOT) ====================== @@ -255,27 +255,6 @@ dead heap-only tuples, and cleans up any dead line pointers as if they were regular dead tuples. -VACUUM FULL ------------ - -VACUUM FULL performs an extra operation of collapsing out redirecting line -pointers, by moving the first non-DEAD tuple of each HOT chain to the root -position and clearing its heap-only-tuple flag. This effectively changes -the user-visible CTID of that tuple. This would be completely unsafe -during normal concurrent operation, but since VACUUM FULL takes full -exclusive lock on the table, it should be OK. (Note that VACUUM FULL has -always felt free to change tuples' CTIDs by moving them across pages.) -Eliminating redirection links means that the main body of VACUUM FULL -doesn't have to deal with them, which seems a good thing since VACUUM FULL -is horrendously complex already. - -When VACUUM FULL tries to move tuple chains, it does not distinguish regular -and heap-only tuples, but just moves both types the same. This is OK because -it will move the entire non-DEAD tail of an update chain and remove index -entries for each item moved. At worst, we'll uselessly search for index -entries matching the heap-only tuples included in the move. - - Statistics ---------- diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0d9ad2af0450fc4cf9794450c9d262b881a8cb83..9983ff65c8a52c74a3e9f95dfd45a13163619d75 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.285 2010/02/03 10:01:29 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.286 2010/02/08 04:33:52 tgl Exp $ * * * INTERFACE ROUTINES @@ -79,7 +79,7 @@ static HeapScanDesc heap_beginscan_internal(Relation relation, bool allow_strat, bool allow_sync, bool is_bitmapscan); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, - ItemPointerData from, Buffer newbuf, HeapTuple newtup, bool move, + ItemPointerData from, Buffer newbuf, HeapTuple newtup, bool all_visible_cleared, bool new_all_visible_cleared); static bool HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs, HeapTuple oldtup, HeapTuple newtup); @@ -2785,7 +2785,7 @@ l2: if (!relation->rd_istemp) { XLogRecPtr recptr = log_heap_update(relation, buffer, oldtup.t_self, - newbuf, heaptup, false, + newbuf, heaptup, all_visible_cleared, all_visible_cleared_new); @@ -3664,9 +3664,13 @@ recheck_xmax: } /* - * Although xvac per se could only be set by VACUUM, it shares physical - * storage space with cmax, and so could be wiped out by someone setting - * xmax. Hence recheck after changing lock, same as for xmax itself. + * Although xvac per se could only be set by old-style VACUUM FULL, it + * shares physical storage space with cmax, and so could be wiped out by + * someone setting xmax. Hence recheck after changing lock, same as for + * xmax itself. + * + * Old-style VACUUM FULL is gone, but we have to keep this code as long + * as we support having MOVED_OFF/MOVED_IN tuples in the database. */ recheck_xvac: if (tuple->t_infomask & HEAP_MOVED) @@ -3785,8 +3789,7 @@ HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId xmax = HeapTupleHeaderGetXmax(tuple); TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - if (tuple->t_infomask & HEAP_MOVED_OFF || - tuple->t_infomask & HEAP_MOVED_IN) + if (tuple->t_infomask & HEAP_MOVED) { if (TransactionIdPrecedes(*latestRemovedXid, xvac)) *latestRemovedXid = xvac; @@ -3844,7 +3847,7 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused, - TransactionId latestRemovedXid, bool redirect_move) + TransactionId latestRemovedXid) { xl_heap_clean xlrec; uint8 info; @@ -3915,7 +3918,7 @@ log_heap_clean(Relation reln, Buffer buffer, rdata[3].buffer_std = true; rdata[3].next = NULL; - info = redirect_move ? XLOG_HEAP2_CLEAN_MOVE : XLOG_HEAP2_CLEAN; + info = XLOG_HEAP2_CLEAN; recptr = XLogInsert(RM_HEAP2_ID, info, rdata); return recptr; @@ -3970,23 +3973,11 @@ log_heap_freeze(Relation reln, Buffer buffer, */ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, - Buffer newbuf, HeapTuple newtup, bool move, + Buffer newbuf, HeapTuple newtup, bool all_visible_cleared, bool new_all_visible_cleared) { - /* - * Note: xlhdr is declared to have adequate size and correct alignment for - * an xl_heap_header. However the two tids, if present at all, will be - * packed in with no wasted space after the xl_heap_header; they aren't - * necessarily aligned as implied by this struct declaration. - */ - struct - { - xl_heap_header hdr; - TransactionId tid1; - TransactionId tid2; - } xlhdr; - int hsize = SizeOfHeapHeader; xl_heap_update xlrec; + xl_heap_header xlhdr; uint8 info; XLogRecPtr recptr; XLogRecData rdata[4]; @@ -3995,12 +3986,7 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, /* Caller should not call me on a temp relation */ Assert(!reln->rd_istemp); - if (move) - { - Assert(!HeapTupleIsHeapOnly(newtup)); - info = XLOG_HEAP_MOVE; - } - else if (HeapTupleIsHeapOnly(newtup)) + if (HeapTupleIsHeapOnly(newtup)) info = XLOG_HEAP_HOT_UPDATE; else info = XLOG_HEAP_UPDATE; @@ -4022,30 +4008,16 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, rdata[1].buffer_std = true; rdata[1].next = &(rdata[2]); - xlhdr.hdr.t_infomask2 = newtup->t_data->t_infomask2; - xlhdr.hdr.t_infomask = newtup->t_data->t_infomask; - xlhdr.hdr.t_hoff = newtup->t_data->t_hoff; - if (move) /* remember xmax & xmin */ - { - TransactionId xid[2]; /* xmax, xmin */ - - if (newtup->t_data->t_infomask & (HEAP_XMAX_INVALID | HEAP_IS_LOCKED)) - xid[0] = InvalidTransactionId; - else - xid[0] = HeapTupleHeaderGetXmax(newtup->t_data); - xid[1] = HeapTupleHeaderGetXmin(newtup->t_data); - memcpy((char *) &xlhdr + hsize, - (char *) xid, - 2 * sizeof(TransactionId)); - hsize += 2 * sizeof(TransactionId); - } + xlhdr.t_infomask2 = newtup->t_data->t_infomask2; + xlhdr.t_infomask = newtup->t_data->t_infomask; + xlhdr.t_hoff = newtup->t_data->t_hoff; /* * As with insert records, we need not store the rdata[2] segment if we * decide to store the whole buffer instead. */ rdata[2].data = (char *) &xlhdr; - rdata[2].len = hsize; + rdata[2].len = SizeOfHeapHeader; rdata[2].buffer = newbuf; rdata[2].buffer_std = true; rdata[2].next = &(rdata[3]); @@ -4070,19 +4042,6 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, return recptr; } -/* - * Perform XLogInsert for a heap-move operation. Caller must already - * have modified the buffers and marked them dirty. - */ -XLogRecPtr -log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from, - Buffer newbuf, HeapTuple newtup, - bool all_visible_cleared, bool new_all_visible_cleared) -{ - return log_heap_update(reln, oldbuf, from, newbuf, newtup, true, - all_visible_cleared, new_all_visible_cleared); -} - /* * Perform XLogInsert of a HEAP_NEWPAGE record to WAL. Caller is responsible * for writing the page to disk after calling this routine. @@ -4149,10 +4108,10 @@ heap_xlog_cleanup_info(XLogRecPtr lsn, XLogRecord *record) } /* - * Handles CLEAN and CLEAN_MOVE record types + * Handles HEAP_CLEAN record type */ static void -heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move) +heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record) { xl_heap_clean *xlrec = (xl_heap_clean *) XLogRecGetData(record); Buffer buffer; @@ -4171,7 +4130,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move) * no queries running for which the removed tuples are still visible. */ if (InHotStandby) - ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node); + ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, + xlrec->node); RestoreBkpBlocks(lsn, record, true); @@ -4203,8 +4163,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move) heap_page_prune_execute(buffer, redirected, nredirected, nowdead, ndead, - nowunused, nunused, - clean_move); + nowunused, nunused); freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ @@ -4489,10 +4448,10 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record) } /* - * Handles UPDATE, HOT_UPDATE & MOVE + * Handles UPDATE and HOT_UPDATE */ static void -heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move, bool hot_update) +heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) { xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record); Buffer buffer; @@ -4558,33 +4517,19 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move, bool hot_update) htup = (HeapTupleHeader) PageGetItem(page, lp); - if (move) - { - htup->t_infomask &= ~(HEAP_XMIN_COMMITTED | - HEAP_XMIN_INVALID | - HEAP_MOVED_IN); - htup->t_infomask |= HEAP_MOVED_OFF; - HeapTupleHeaderClearHotUpdated(htup); - HeapTupleHeaderSetXvac(htup, record->xl_xid); - /* Make sure there is no forward chain link in t_ctid */ - htup->t_ctid = xlrec->target.tid; - } + htup->t_infomask &= ~(HEAP_XMAX_COMMITTED | + HEAP_XMAX_INVALID | + HEAP_XMAX_IS_MULTI | + HEAP_IS_LOCKED | + HEAP_MOVED); + if (hot_update) + HeapTupleHeaderSetHotUpdated(htup); else - { - htup->t_infomask &= ~(HEAP_XMAX_COMMITTED | - HEAP_XMAX_INVALID | - HEAP_XMAX_IS_MULTI | - HEAP_IS_LOCKED | - HEAP_MOVED); - if (hot_update) - HeapTupleHeaderSetHotUpdated(htup); - else - HeapTupleHeaderClearHotUpdated(htup); - HeapTupleHeaderSetXmax(htup, record->xl_xid); - HeapTupleHeaderSetCmax(htup, FirstCommandId, false); - /* Set forward chain link in t_ctid */ - htup->t_ctid = xlrec->newtid; - } + HeapTupleHeaderClearHotUpdated(htup); + HeapTupleHeaderSetXmax(htup, record->xl_xid); + HeapTupleHeaderSetCmax(htup, FirstCommandId, false); + /* Set forward chain link in t_ctid */ + htup->t_ctid = xlrec->newtid; /* Mark the page as a candidate for pruning */ PageSetPrunable(page, record->xl_xid); @@ -4655,8 +4600,6 @@ newsame:; elog(PANIC, "heap_update_redo: invalid max offset number"); hsize = SizeOfHeapUpdate + SizeOfHeapHeader; - if (move) - hsize += (2 * sizeof(TransactionId)); newlen = record->xl_len - hsize; Assert(newlen <= MaxHeapTupleSize); @@ -4674,22 +4617,8 @@ newsame:; htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; - if (move) - { - TransactionId xid[2]; /* xmax, xmin */ - - memcpy((char *) xid, - (char *) xlrec + SizeOfHeapUpdate + SizeOfHeapHeader, - 2 * sizeof(TransactionId)); - HeapTupleHeaderSetXmin(htup, xid[1]); - HeapTupleHeaderSetXmax(htup, xid[0]); - HeapTupleHeaderSetXvac(htup, record->xl_xid); - } - else - { - HeapTupleHeaderSetXmin(htup, record->xl_xid); - HeapTupleHeaderSetCmin(htup, FirstCommandId); - } + HeapTupleHeaderSetXmin(htup, record->xl_xid); + HeapTupleHeaderSetCmin(htup, FirstCommandId); /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = xlrec->newtid; @@ -4857,13 +4786,10 @@ heap_redo(XLogRecPtr lsn, XLogRecord *record) heap_xlog_delete(lsn, record); break; case XLOG_HEAP_UPDATE: - heap_xlog_update(lsn, record, false, false); - break; - case XLOG_HEAP_MOVE: - heap_xlog_update(lsn, record, true, false); + heap_xlog_update(lsn, record, false); break; case XLOG_HEAP_HOT_UPDATE: - heap_xlog_update(lsn, record, false, true); + heap_xlog_update(lsn, record, true); break; case XLOG_HEAP_NEWPAGE: heap_xlog_newpage(lsn, record); @@ -4895,10 +4821,7 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record) heap_xlog_freeze(lsn, record); break; case XLOG_HEAP2_CLEAN: - heap_xlog_clean(lsn, record, false); - break; - case XLOG_HEAP2_CLEAN_MOVE: - heap_xlog_clean(lsn, record, true); + heap_xlog_clean(lsn, record); break; case XLOG_HEAP2_CLEANUP_INFO: heap_xlog_cleanup_info(lsn, record); @@ -4953,19 +4876,6 @@ heap_desc(StringInfo buf, uint8 xl_info, char *rec) ItemPointerGetBlockNumber(&(xlrec->newtid)), ItemPointerGetOffsetNumber(&(xlrec->newtid))); } - else if (info == XLOG_HEAP_MOVE) - { - xl_heap_update *xlrec = (xl_heap_update *) rec; - - if (xl_info & XLOG_HEAP_INIT_PAGE) - appendStringInfo(buf, "move(init): "); - else - appendStringInfo(buf, "move: "); - out_target(buf, &(xlrec->target)); - appendStringInfo(buf, "; new %u/%u", - ItemPointerGetBlockNumber(&(xlrec->newtid)), - ItemPointerGetOffsetNumber(&(xlrec->newtid))); - } else if (info == XLOG_HEAP_HOT_UPDATE) { xl_heap_update *xlrec = (xl_heap_update *) rec; @@ -5037,15 +4947,6 @@ heap2_desc(StringInfo buf, uint8 xl_info, char *rec) xlrec->node.relNode, xlrec->block, xlrec->latestRemovedXid); } - else if (info == XLOG_HEAP2_CLEAN_MOVE) - { - xl_heap_clean *xlrec = (xl_heap_clean *) rec; - - appendStringInfo(buf, "clean_move: rel %u/%u/%u; blk %u remxid %u", - xlrec->node.spcNode, xlrec->node.dbNode, - xlrec->node.relNode, xlrec->block, - xlrec->latestRemovedXid); - } else if (info == XLOG_HEAP2_CLEANUP_INFO) { xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec; diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 3d7c5c7c0be3b20247e6a9670f9b56bf966e52db..9d6a737277bd589cf7a00d96da979a18c92f471c 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.20 2010/01/02 16:57:34 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.21 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,7 +21,6 @@ #include "pgstat.h" #include "storage/bufmgr.h" #include "storage/off.h" -#include "utils/inval.h" #include "utils/rel.h" #include "utils/tqual.h" @@ -46,8 +45,7 @@ typedef struct static int heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, TransactionId OldestXmin, - PruneState *prstate, - bool redirect_move); + PruneState *prstate); static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid); static void heap_prune_record_redirect(PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum); @@ -123,8 +121,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin) */ if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree) { - /* OK to prune (though not to remove redirects) */ - (void) heap_page_prune(relation, buffer, OldestXmin, false, true); + /* OK to prune */ + (void) heap_page_prune(relation, buffer, OldestXmin, true); } /* And release buffer lock */ @@ -141,14 +139,6 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin) * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum). * - * If redirect_move is set, we remove redirecting line pointers by - * updating the root line pointer to point directly to the first non-dead - * tuple in the chain. NOTE: eliminating the redirect changes the first - * tuple's effective CTID, and is therefore unsafe except within VACUUM FULL. - * The only reason we support this capability at all is that by using it, - * VACUUM FULL need not cope with LP_REDIRECT items at all; which seems a - * good thing since VACUUM FULL is overly complicated already. - * * If report_stats is true then we send the number of reclaimed heap-only * tuples to pgstats. (This must be FALSE during vacuum, since vacuum will * send its own new total to pgstats, and we don't want this delta applied @@ -158,7 +148,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin) */ int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, - bool redirect_move, bool report_stats) + bool report_stats) { int ndeleted = 0; Page page = BufferGetPage(buffer); @@ -172,17 +162,10 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, * logic as possible out of the critical section, and also ensures that * WAL replay will work the same as the normal case. * - * First, inform inval.c that upcoming CacheInvalidateHeapTuple calls are - * nontransactional. - */ - if (redirect_move) - BeginNonTransactionalInvalidation(); - - /* - * Initialize the new pd_prune_xid value to zero (indicating no prunable - * tuples). If we find any tuples which may soon become prunable, we will - * save the lowest relevant XID in new_prune_xid. Also initialize the rest - * of our working state. + * First, initialize the new pd_prune_xid value to zero (indicating no + * prunable tuples). If we find any tuples which may soon become + * prunable, we will save the lowest relevant XID in new_prune_xid. + * Also initialize the rest of our working state. */ prstate.new_prune_xid = InvalidTransactionId; prstate.latestRemovedXid = InvalidTransactionId; @@ -209,22 +192,9 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, /* Process this item or chain of items */ ndeleted += heap_prune_chain(relation, buffer, offnum, OldestXmin, - &prstate, - redirect_move); + &prstate); } - /* - * Send invalidation messages for any tuples we are about to move. It is - * safe to do this now, even though we could theoretically still fail - * before making the actual page update, because a useless cache - * invalidation doesn't hurt anything. Also, no one else can reload the - * tuples while we have exclusive buffer lock, so it's not too early to - * send the invals. This avoids sending the invals while inside the - * critical section, which is a good thing for robustness. - */ - if (redirect_move) - EndNonTransactionalInvalidation(); - /* Any error while applying the changes is critical */ START_CRIT_SECTION(); @@ -238,8 +208,7 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, heap_page_prune_execute(buffer, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, - prstate.nowunused, prstate.nunused, - redirect_move); + prstate.nowunused, prstate.nunused); /* * Update the page's pd_prune_xid field to either zero, or the lowest @@ -257,7 +226,7 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, MarkBufferDirty(buffer); /* - * Emit a WAL HEAP_CLEAN or HEAP_CLEAN_MOVE record showing what we did + * Emit a WAL HEAP_CLEAN record showing what we did */ if (!relation->rd_istemp) { @@ -267,7 +236,7 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, prstate.nowunused, prstate.nunused, - prstate.latestRemovedXid, redirect_move); + prstate.latestRemovedXid); PageSetLSN(BufferGetPage(buffer), recptr); PageSetTLI(BufferGetPage(buffer), ThisTimeLineID); @@ -349,16 +318,12 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED * state are added to nowunused[]. * - * If redirect_move is true, we intend to get rid of redirecting line pointers, - * not just make redirection entries. - * * Returns the number of tuples (to be) deleted from the page. */ static int heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, TransactionId OldestXmin, - PruneState *prstate, - bool redirect_move) + PruneState *prstate) { int ndeleted = 0; Page dp = (Page) BufferGetPage(buffer); @@ -366,7 +331,6 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, ItemId rootlp; HeapTupleHeader htup; OffsetNumber latestdead = InvalidOffsetNumber, - redirect_target = InvalidOffsetNumber, maxoff = PageGetMaxOffsetNumber(dp), offnum; OffsetNumber chainitems[MaxHeapTuplesPerPage]; @@ -592,12 +556,7 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, if (i >= nchain) heap_prune_record_dead(prstate, rootoffnum); else - { heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]); - /* If the redirection will be a move, need more processing */ - if (redirect_move) - redirect_target = chainitems[i]; - } } else if (nchain < 2 && ItemIdIsRedirected(rootlp)) { @@ -610,42 +569,6 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, */ heap_prune_record_dead(prstate, rootoffnum); } - else if (redirect_move && ItemIdIsRedirected(rootlp)) - { - /* - * If we desire to eliminate LP_REDIRECT items by moving tuples, make - * a redirection entry for each redirected root item; this will cause - * heap_page_prune_execute to actually do the move. (We get here only - * when there are no DEAD tuples in the chain; otherwise the - * redirection entry was made above.) - */ - heap_prune_record_redirect(prstate, rootoffnum, chainitems[1]); - redirect_target = chainitems[1]; - } - - /* - * If we are going to implement a redirect by moving tuples, we have to - * issue a cache invalidation against the redirection target tuple, - * because its CTID will be effectively changed by the move. Note that - * CacheInvalidateHeapTuple only queues the request, it doesn't send it; - * if we fail before reaching EndNonTransactionalInvalidation, nothing - * happens and no harm is done. - */ - if (OffsetNumberIsValid(redirect_target)) - { - ItemId firstlp = PageGetItemId(dp, redirect_target); - HeapTupleData firsttup; - - Assert(ItemIdIsNormal(firstlp)); - /* Set up firsttup to reference the tuple at its existing CTID */ - firsttup.t_data = (HeapTupleHeader) PageGetItem(dp, firstlp); - firsttup.t_len = ItemIdGetLength(firstlp); - ItemPointerSet(&firsttup.t_self, - BufferGetBlockNumber(buffer), - redirect_target); - firsttup.t_tableOid = RelationGetRelid(relation); - CacheInvalidateHeapTuple(relation, &firsttup); - } return ndeleted; } @@ -715,14 +638,13 @@ void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, - OffsetNumber *nowunused, int nunused, - bool redirect_move) + OffsetNumber *nowunused, int nunused) { Page page = (Page) BufferGetPage(buffer); OffsetNumber *offnum; int i; - /* Update all redirected or moved line pointers */ + /* Update all redirected line pointers */ offnum = redirected; for (i = 0; i < nredirected; i++) { @@ -730,30 +652,7 @@ heap_page_prune_execute(Buffer buffer, OffsetNumber tooff = *offnum++; ItemId fromlp = PageGetItemId(page, fromoff); - if (redirect_move) - { - /* Physically move the "to" item to the "from" slot */ - ItemId tolp = PageGetItemId(page, tooff); - HeapTupleHeader htup; - - *fromlp = *tolp; - ItemIdSetUnused(tolp); - - /* - * Change heap-only status of the tuple because after the line - * pointer manipulation, it's no longer a heap-only tuple, but is - * directly pointed to by index entries. - */ - Assert(ItemIdIsNormal(fromlp)); - htup = (HeapTupleHeader) PageGetItem(page, fromlp); - Assert(HeapTupleHeaderIsHeapOnly(htup)); - HeapTupleHeaderClearHeapOnly(htup); - } - else - { - /* Just insert a REDIRECT link at fromoff */ - ItemIdSetRedirect(fromlp, tooff); - } + ItemIdSetRedirect(fromlp, tooff); } /* Update all now-dead line pointers */ diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index e53315a83fb53d2d7388e37ae2eeea09d37f78de..57d6308adac3030c4afd2876b6b2ba685aac05b0 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/access/nbtree/README,v 1.21 2009/12/19 01:32:32 sriggs Exp $ +$PostgreSQL: pgsql/src/backend/access/nbtree/README,v 1.22 2010/02/08 04:33:53 tgl Exp $ Btree Indexing ============== @@ -171,9 +171,9 @@ We consider deleting an entire page from the btree only when it's become completely empty of items. (Merging partly-full pages would allow better space reuse, but it seems impractical to move existing data items left or right to make this happen --- a scan moving in the opposite direction -might miss the items if so. We could do it during VACUUM FULL, though.) -Also, we *never* delete the rightmost page on a tree level (this -restriction simplifies the traversal algorithms, as explained below). +might miss the items if so.) Also, we *never* delete the rightmost page +on a tree level (this restriction simplifies the traversal algorithms, as +explained below). To delete an empty page, we acquire write lock on its left sibling (if any), the target page itself, the right sibling (there must be one), and @@ -266,8 +266,7 @@ transactions that were running at the time of deletion are dead; which is overly strong, but is simple to implement within Postgres. When marked dead, a deleted page is labeled with the next-transaction counter value. VACUUM can reclaim the page for re-use when this transaction number is -older than the oldest open transaction. (NOTE: VACUUM FULL can reclaim -such pages immediately.) +older than the oldest open transaction. Reclaiming a page doesn't actually change its state on disk --- we simply record it in the shared-memory free space map, from which it will be diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 71b23644e2b06787d8384a234219fb940fce126c..b0eff770d0bf0f38cb5ae0559fc2b7ad95518439 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.117 2010/02/01 13:40:28 sriggs Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.118 2010/02/08 04:33:53 tgl Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -877,7 +877,7 @@ _bt_parent_deletion_safe(Relation rel, BlockNumber target, BTStack stack) * frequently. */ int -_bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full) +_bt_pagedel(Relation rel, Buffer buf, BTStack stack) { int result; BlockNumber target, @@ -1207,14 +1207,13 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full) /* * Mark the page itself deleted. It can be recycled when all current - * transactions are gone; or immediately if we're doing VACUUM FULL. + * transactions are gone. */ page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque->btpo_flags &= ~BTP_HALF_DEAD; opaque->btpo_flags |= BTP_DELETED; - opaque->btpo.xact = - vacuum_full ? FrozenTransactionId : ReadNewTransactionId(); + opaque->btpo.xact = ReadNewTransactionId(); /* And update the metapage, if needed */ if (BufferIsValid(metabuf)) @@ -1350,7 +1349,7 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full) { /* recursive call will release pbuf */ _bt_relbuf(rel, rbuf); - result = _bt_pagedel(rel, pbuf, stack->bts_parent, vacuum_full) + 1; + result = _bt_pagedel(rel, pbuf, stack->bts_parent) + 1; _bt_relbuf(rel, buf); } else if (parent_one_child && rightsib_empty) @@ -1358,7 +1357,7 @@ _bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full) _bt_relbuf(rel, pbuf); _bt_relbuf(rel, buf); /* recursive call will release rbuf */ - result = _bt_pagedel(rel, rbuf, stack, vacuum_full) + 1; + result = _bt_pagedel(rel, rbuf, stack) + 1; } else { diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index f37cc8e26bce56c8a5767268c289fe0c935c0754..b0acaf257f27856f87b13057c7b46ddaaa5ce1c0 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.174 2010/01/02 16:57:35 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.175 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -579,12 +579,12 @@ btvacuumcleanup(PG_FUNCTION_ARGS) IndexFreeSpaceMapVacuum(info->index); /* - * During a non-FULL vacuum it's quite possible for us to be fooled by - * concurrent page splits into double-counting some index tuples, so - * disbelieve any total that exceeds the underlying heap's count ... if we - * know that accurately. Otherwise this might just make matters worse. + * It's quite possible for us to be fooled by concurrent page splits into + * double-counting some index tuples, so disbelieve any total that exceeds + * the underlying heap's count ... if we know that accurately. Otherwise + * this might just make matters worse. */ - if (!info->vacuum_full && !info->estimated_count) + if (!info->estimated_count) { if (stats->num_index_tuples > info->num_heap_tuples) stats->num_index_tuples = info->num_heap_tuples; @@ -686,27 +686,6 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, } } - /* - * During VACUUM FULL, we truncate off any recyclable pages at the end of - * the index. In a normal vacuum it'd be unsafe to do this except by - * acquiring exclusive lock on the index and then rechecking all the - * pages; doesn't seem worth it. - */ - if (info->vacuum_full && vstate.lastUsedPage < num_pages - 1) - { - BlockNumber new_pages = vstate.lastUsedPage + 1; - - /* - * Okay to truncate. - */ - RelationTruncate(rel, new_pages); - - /* update statistics */ - stats->pages_removed += num_pages - new_pages; - vstate.totFreePages -= (num_pages - new_pages); - num_pages = new_pages; - } - /* * InHotStandby we need to scan right up to the end of the index for * correct locking, so we may need to write a WAL record for the final @@ -963,26 +942,12 @@ restart: MemoryContextReset(vstate->pagedelcontext); oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext); - ndel = _bt_pagedel(rel, buf, NULL, info->vacuum_full); + ndel = _bt_pagedel(rel, buf, NULL); /* count only this page, else may double-count parent */ if (ndel) stats->pages_deleted++; - /* - * During VACUUM FULL it's okay to recycle deleted pages immediately, - * since there can be no other transactions scanning the index. Note - * that we will only recycle the current page and not any parent pages - * that _bt_pagedel might have recursed to; this seems reasonable in - * the name of simplicity. (Trying to do otherwise would mean we'd - * have to sort the list of recyclable pages we're building.) - */ - if (ndel && info->vacuum_full) - { - RecordFreeIndexPage(rel, blkno); - vstate->totFreePages++; - } - MemoryContextSwitchTo(oldcontext); /* pagedel released buffer, so we shouldn't */ } diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index f83b3188125ec5b47320b385c5c6bcb0a9b122fe..83a7c98c14e50c02bc3d90e2ebca6b44532fee3a 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.59 2010/01/29 17:10:05 sriggs Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.60 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1079,8 +1079,8 @@ btree_xlog_cleanup(void) Relation reln; reln = CreateFakeRelcacheEntry(action->node); - if (_bt_pagedel(reln, buf, NULL, true) == 0) - elog(PANIC, "btree_xlog_cleanup: _bt_pagdel failed"); + if (_bt_pagedel(reln, buf, NULL) == 0) + elog(PANIC, "btree_xlog_cleanup: _bt_pagedel failed"); FreeFakeRelcacheEntry(reln); } } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 156ed5c47bea49490fc4d2a7d4a2ad24e6773bc9..27ce9ac4c308820bf38bc2e68282bff56bb591f7 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.283 2010/02/07 20:48:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.284 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -881,11 +881,9 @@ AtSubStart_ResourceOwner(void) * * Returns latest XID among xact and its children, or InvalidTransactionId * if the xact has no XID. (We compute that here just because it's easier.) - * - * This is exported only to support an ugly hack in VACUUM FULL. */ -TransactionId -RecordTransactionCommit(bool isVacuumFull) +static TransactionId +RecordTransactionCommit(void) { TransactionId xid = GetTopTransactionIdIfAny(); bool markXidCommitted = TransactionIdIsValid(xid); @@ -950,8 +948,6 @@ RecordTransactionCommit(bool isVacuumFull) xlrec.xinfo = 0; if (RelcacheInitFileInval) xlrec.xinfo |= XACT_COMPLETION_UPDATE_RELCACHE_FILE; - if (isVacuumFull) - xlrec.xinfo |= XACT_COMPLETION_VACUUM_FULL; if (forceSyncCommit) xlrec.xinfo |= XACT_COMPLETION_FORCE_SYNC_COMMIT; @@ -1755,7 +1751,7 @@ CommitTransaction(void) /* * Here is where we really truly commit. */ - latestXid = RecordTransactionCommit(false); + latestXid = RecordTransactionCommit(); TRACE_POSTGRESQL_TRANSACTION_COMMIT(MyProc->lxid); @@ -4374,28 +4370,23 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid, XLogRecPtr lsn) LWLockRelease(XidGenLock); } - if (!InHotStandby || XactCompletionVacuumFull(xlrec)) + if (!InHotStandby) { /* * Mark the transaction committed in pg_clog. - * - * If InHotStandby and this is the first commit of a VACUUM FULL INPLACE - * we perform only the actual commit to clog. Strangely, there are two - * commits that share the same xid for every VFI, so we need to skip - * some steps for the first commit. It's OK to repeat the clog update - * when we see the second commit on a VFI. */ TransactionIdCommitTree(xid, xlrec->nsubxacts, sub_xids); } else { /* - * If a transaction completion record arrives that has as-yet unobserved - * subtransactions then this will not have been fully handled by the call - * to RecordKnownAssignedTransactionIds() in the main recovery loop in - * xlog.c. So we need to do bookkeeping again to cover that case. This is - * confusing and it is easy to think this call is irrelevant, which has - * happened three times in development already. Leave it in. + * If a transaction completion record arrives that has as-yet + * unobserved subtransactions then this will not have been fully + * handled by the call to RecordKnownAssignedTransactionIds() in the + * main recovery loop in xlog.c. So we need to do bookkeeping again to + * cover that case. This is confusing and it is easy to think this + * call is irrelevant, which has happened three times in development + * already. Leave it in. */ RecordKnownAssignedTransactionIds(max_xid); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f4b03f4c1beb8a8d94cca10a4e1a989308e6a79b..067827abeb636813654160c4f68b1a17988ad3b4 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.367 2010/02/07 20:48:09 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.368 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2106,32 +2106,6 @@ XLogBackgroundFlush(void) END_CRIT_SECTION(); } -/* - * Flush any previous asynchronously-committed transactions' commit records. - * - * NOTE: it is unwise to assume that this provides any strong guarantees. - * In particular, because of the inexact LSN bookkeeping used by clog.c, - * we cannot assume that hint bits will be settable for these transactions. - */ -void -XLogAsyncCommitFlush(void) -{ - XLogRecPtr WriteRqstPtr; - - /* use volatile pointer to prevent code rearrangement */ - volatile XLogCtlData *xlogctl = XLogCtl; - - /* There's no asynchronously committed transactions during recovery */ - if (RecoveryInProgress()) - return; - - SpinLockAcquire(&xlogctl->info_lck); - WriteRqstPtr = xlogctl->asyncCommitLSN; - SpinLockRelease(&xlogctl->info_lck); - - XLogFlush(WriteRqstPtr); -} - /* * Test whether XLOG data has been flushed up to (at least) the given position. * diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index d8a3d47a0a718efe319449be28573d6ad891fc44..e1785c887c4f1bf4597cbaecdfac0bf70235dfde 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.334 2010/02/07 22:40:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.335 2010/02/08 04:33:53 tgl Exp $ * * * INTERFACE ROUTINES @@ -2107,7 +2107,6 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) * Scan the index and gather up all the TIDs into a tuplesort object. */ ivinfo.index = indexRelation; - ivinfo.vacuum_full = false; ivinfo.analyze_only = false; ivinfo.estimated_count = true; ivinfo.message_level = DEBUG2; diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 067d375a84962fe1ebb8506f40354819812668d3..47179ec602fd84b130bc716529dc96b2086ba24c 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.149 2010/02/01 19:28:56 rhaas Exp $ + * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.150 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -595,7 +595,6 @@ cleanup: IndexVacuumInfo ivinfo; ivinfo.index = Irel[ind]; - ivinfo.vacuum_full = false; ivinfo.analyze_only = true; ivinfo.estimated_count = true; ivinfo.message_level = elevel; diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index f10ae31ff183eccbf752d9c09485d69e44fc3100..1ed287306a928514c5791406b5ecc52b80d59438 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.323 2010/02/03 10:01:29 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.324 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2166,7 +2166,7 @@ CopyFrom(CopyState cstate) if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false); + estate); /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, resultRelInfo, tuple, diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index e18ed084b410aa09b0464417068c46b441587b43..c96c8400cc614f6f6bf2423c5fc3302b922adc4f 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -3,9 +3,10 @@ * vacuum.c * The postgres vacuum cleaner. * - * This file includes the "full" version of VACUUM, as well as control code - * used by all three of full VACUUM, lazy VACUUM, and ANALYZE. See - * vacuumlazy.c and analyze.c for the rest of the code for the latter two. + * This file now includes only control and dispatch code for VACUUM and + * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c, + * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled + * in cluster.c. * * * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group @@ -13,48 +14,33 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.404 2010/02/07 20:48:10 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.405 2010/02/08 04:33:53 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#include <sys/time.h> -#include <unistd.h> - #include "access/clog.h" #include "access/genam.h" #include "access/heapam.h" #include "access/transam.h" -#include "access/visibilitymap.h" #include "access/xact.h" -#include "access/xlog.h" -#include "catalog/catalog.h" #include "catalog/namespace.h" #include "catalog/pg_database.h" #include "catalog/pg_namespace.h" -#include "catalog/storage.h" #include "commands/cluster.h" -#include "commands/dbcommands.h" #include "commands/vacuum.h" -#include "executor/executor.h" #include "miscadmin.h" #include "pgstat.h" #include "postmaster/autovacuum.h" #include "storage/bufmgr.h" -#include "storage/freespace.h" #include "storage/lmgr.h" #include "storage/proc.h" #include "storage/procarray.h" #include "utils/acl.h" -#include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/guc.h" -#include "utils/inval.h" -#include "utils/lsyscache.h" #include "utils/memutils.h" -#include "utils/pg_rusage.h" -#include "utils/relcache.h" #include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/tqual.h" @@ -66,152 +52,9 @@ int vacuum_freeze_min_age; int vacuum_freeze_table_age; -/* - * VacPage structures keep track of each page on which we find useful - * amounts of free space. - */ -typedef struct VacPageData -{ - BlockNumber blkno; /* BlockNumber of this Page */ - Size free; /* FreeSpace on this Page */ - uint16 offsets_used; /* Number of OffNums used by vacuum */ - uint16 offsets_free; /* Number of OffNums free or to be free */ - OffsetNumber offsets[1]; /* Array of free OffNums */ -} VacPageData; - -typedef VacPageData *VacPage; - -typedef struct VacPageListData -{ - BlockNumber empty_end_pages; /* Number of "empty" end-pages */ - int num_pages; /* Number of pages in pagedesc */ - int num_allocated_pages; /* Number of allocated pages in - * pagedesc */ - VacPage *pagedesc; /* Descriptions of pages */ -} VacPageListData; - -typedef VacPageListData *VacPageList; - -/* - * The "vtlinks" array keeps information about each recently-updated tuple - * ("recent" meaning its XMAX is too new to let us recycle the tuple). - * We store the tuple's own TID as well as its t_ctid (its link to the next - * newer tuple version). Searching in this array allows us to follow update - * chains backwards from newer to older tuples. When we move a member of an - * update chain, we must move *all* the live members of the chain, so that we - * can maintain their t_ctid link relationships (we must not just overwrite - * t_ctid in an existing tuple). - * - * Note: because t_ctid links can be stale (this would only occur if a prior - * VACUUM crashed partway through), it is possible that new_tid points to an - * empty slot or unrelated tuple. We have to check the linkage as we follow - * it, just as is done in EvalPlanQualFetch. - */ -typedef struct VTupleLinkData -{ - ItemPointerData new_tid; /* t_ctid of an updated tuple */ - ItemPointerData this_tid; /* t_self of the tuple */ -} VTupleLinkData; - -typedef VTupleLinkData *VTupleLink; - -/* - * We use an array of VTupleMoveData to plan a chain tuple move fully - * before we do it. - */ -typedef struct VTupleMoveData -{ - ItemPointerData tid; /* tuple ID */ - VacPage vacpage; /* where to move it to */ - bool cleanVpd; /* clean vacpage before using? */ -} VTupleMoveData; - -typedef VTupleMoveData *VTupleMove; - -/* - * VRelStats contains the data acquired by scan_heap for use later - */ -typedef struct VRelStats -{ - /* miscellaneous statistics */ - BlockNumber rel_pages; /* pages in relation */ - double rel_tuples; /* tuples that remain after vacuuming */ - double rel_indexed_tuples; /* indexed tuples that remain */ - Size min_tlen; /* min surviving tuple size */ - Size max_tlen; /* max surviving tuple size */ - bool hasindex; - /* vtlinks array for tuple chain following - sorted by new_tid */ - int num_vtlinks; - VTupleLink vtlinks; - TransactionId latestRemovedXid; -} VRelStats; - -/*---------------------------------------------------------------------- - * ExecContext: - * - * As these variables always appear together, we put them into one struct - * and pull initialization and cleanup into separate routines. - * ExecContext is used by repair_frag() and move_xxx_tuple(). More - * accurately: It is *used* only in move_xxx_tuple(), but because this - * routine is called many times, we initialize the struct just once in - * repair_frag() and pass it on to move_xxx_tuple(). - */ -typedef struct ExecContextData -{ - ResultRelInfo *resultRelInfo; - EState *estate; - TupleTableSlot *slot; -} ExecContextData; - -typedef ExecContextData *ExecContext; - -static void -ExecContext_Init(ExecContext ec, Relation rel) -{ - TupleDesc tupdesc = RelationGetDescr(rel); - - /* - * We need a ResultRelInfo and an EState so we can use the regular - * executor's index-entry-making machinery. - */ - ec->estate = CreateExecutorState(); - - ec->resultRelInfo = makeNode(ResultRelInfo); - ec->resultRelInfo->ri_RangeTableIndex = 1; /* dummy */ - ec->resultRelInfo->ri_RelationDesc = rel; - ec->resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */ - - ExecOpenIndices(ec->resultRelInfo); - - ec->estate->es_result_relations = ec->resultRelInfo; - ec->estate->es_num_result_relations = 1; - ec->estate->es_result_relation_info = ec->resultRelInfo; - - /* Set up a tuple slot too */ - ec->slot = MakeSingleTupleTableSlot(tupdesc); -} - -static void -ExecContext_Finish(ExecContext ec) -{ - ExecDropSingleTupleTableSlot(ec->slot); - ExecCloseIndices(ec->resultRelInfo); - FreeExecutorState(ec->estate); -} - -/* - * End of ExecContext Implementation - *---------------------------------------------------------------------- - */ /* A few variables that don't seem worth passing around as parameters */ static MemoryContext vac_context = NULL; - -static int elevel = -1; - -static TransactionId OldestXmin; -static TransactionId FreezeLimit; - static BufferAccessStrategy vac_strategy; @@ -221,50 +64,6 @@ static List *get_rel_oids(Oid relid, const RangeVar *vacrel, static void vac_truncate_clog(TransactionId frozenXID); static void vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, bool *scanned_all); -static bool full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt); -static void scan_heap(VRelStats *vacrelstats, Relation onerel, - VacPageList vacuum_pages, VacPageList fraged_pages); -static bool repair_frag(VRelStats *vacrelstats, Relation onerel, - VacPageList vacuum_pages, VacPageList fraged_pages, - int nindexes, Relation *Irel); -static void move_chain_tuple(VRelStats *vacrelstats, Relation rel, - Buffer old_buf, Page old_page, HeapTuple old_tup, - Buffer dst_buf, Page dst_page, VacPage dst_vacpage, - ExecContext ec, ItemPointer ctid, bool cleanVpd); -static void move_plain_tuple(Relation rel, - Buffer old_buf, Page old_page, HeapTuple old_tup, - Buffer dst_buf, Page dst_page, VacPage dst_vacpage, - ExecContext ec); -static void update_hint_bits(Relation rel, VacPageList fraged_pages, - int num_fraged_pages, BlockNumber last_move_dest_block, - int num_moved); -static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, - VacPageList vacpagelist); -static void vacuum_page(VRelStats *vacrelstats, Relation onerel, Buffer buffer, VacPage vacpage); -static void vacuum_index(VacPageList vacpagelist, Relation indrel, - double num_tuples, int keep_tuples); -static void scan_index(Relation indrel, double num_tuples); -static bool tid_reaped(ItemPointer itemptr, void *state); -static void vac_update_fsm(Relation onerel, VacPageList fraged_pages, - BlockNumber rel_pages); -static VacPage copy_vac_page(VacPage vacpage); -static void vpage_insert(VacPageList vacpagelist, VacPage vpnew); -static void *vac_bsearch(const void *key, const void *base, - size_t nelem, size_t size, - int (*compar) (const void *, const void *)); -static int vac_cmp_blk(const void *left, const void *right); -static int vac_cmp_offno(const void *left, const void *right); -static int vac_cmp_vtlinks(const void *left, const void *right); -static bool enough_space(VacPage vacpage, Size len); -static Size PageGetFreeSpaceWithFillFactor(Relation relation, Page page); - - -/**************************************************************************** - * * - * Code common to all flavors of VACUUM and ANALYZE * - * * - **************************************************************************** - */ /* @@ -304,27 +103,14 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast, Assert((vacstmt->options & VACOPT_VACUUM) || !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE))); Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL); - Assert((vacstmt->options & VACOPT_FULL) || - !(vacstmt->options & VACOPT_INPLACE)); stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; - if (vacstmt->options & VACOPT_VERBOSE) - elevel = INFO; - else - elevel = DEBUG2; - /* * We cannot run VACUUM inside a user transaction block; if we were inside * a transaction, then our commit- and start-transaction-command calls - * would not have the intended effect! Furthermore, the forced commit that - * occurs before truncating the relation's file would have the effect of - * committing the rest of the user's transaction too, which would - * certainly not be the desired behavior. (This only applies to VACUUM - * FULL, though. We could in theory run lazy VACUUM inside a transaction - * block, but we choose to disallow that case because we'd rather commit - * as soon as possible after finishing the vacuum. This is mainly so that - * we can let go the AccessExclusiveLock that we may be holding.) + * would not have the intended effect! There are numerous other subtle + * dependencies on this, too. * * ANALYZE (without VACUUM) can run either way. */ @@ -592,7 +378,7 @@ vacuum_set_xid_limits(int freeze_min_age, * We can always ignore processes running lazy vacuum. This is because we * use these values only for deciding which tuples we must keep in the * tables. Since lazy vacuum doesn't write its XID anywhere, it's safe to - * ignore it. In theory it could be problematic to ignore lazy vacuums on + * ignore it. In theory it could be problematic to ignore lazy vacuums in * a full vacuum, but keep in mind that only one vacuum process can be * working on a particular table at any time, and that each vacuum is * always an independent transaction. @@ -696,8 +482,7 @@ vacuum_set_xid_limits(int freeze_min_age, * somebody vacuuming pg_class might think they could delete a tuple * marked with xmin = our xid. * - * This routine is shared by full VACUUM, lazy VACUUM, and stand-alone - * ANALYZE. + * This routine is shared by VACUUM and stand-alone ANALYZE. */ void vac_update_relstats(Relation relation, @@ -802,8 +587,6 @@ vac_update_relstats(Relation relation, * safe since the new value is correct whether or not this transaction * commits. As with vac_update_relstats, this avoids leaving dead tuples * behind after a VACUUM. - * - * This routine is shared by full and lazy VACUUM. */ void vac_update_datfrozenxid(void) @@ -904,9 +687,9 @@ vac_update_datfrozenxid(void) * The passed XID is simply the one I just wrote into my pg_database * entry. It's used to initialize the "min" calculation. * - * This routine is shared by full and lazy VACUUM. Note that it's - * only invoked when we've managed to change our DB's datfrozenxid - * entry, or we found that the shared XID-wrap-limit info is stale. + * This routine is only only invoked when we've managed to change our + * DB's datfrozenxid entry, or we found that the shared XID-wrap-limit + * info is stale. */ static void vac_truncate_clog(TransactionId frozenXID) @@ -981,14 +764,6 @@ vac_truncate_clog(TransactionId frozenXID) } -/**************************************************************************** - * * - * Code common to both flavors of VACUUM * - * * - **************************************************************************** - */ - - /* * vacuum_rel() -- vacuum one heap relation * @@ -1034,7 +809,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets * other concurrent VACUUMs know that they can ignore this one while * determining their OldestXmin. (The reason we don't set it during a - * full VACUUM is exactly that we may have to run user- defined + * full VACUUM is exactly that we may have to run user-defined * functions for functional indexes, and we want to make sure that if * they use the snapshot set above, any tuples it requires can't get * removed from other tables. An index function that depends on the @@ -1182,23 +957,22 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, save_nestlevel = NewGUCNestLevel(); /* - * Do the actual work --- either FULL, FULL INPLACE, or "lazy" vacuum. + * Do the actual work --- either FULL or "lazy" vacuum */ - if (!(vacstmt->options & VACOPT_FULL)) - heldoff = lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all); - else if (vacstmt->options & VACOPT_INPLACE) - heldoff = full_vacuum_rel(onerel, vacstmt); - else + if (vacstmt->options & VACOPT_FULL) { - /* close relation before clustering, but hold lock until commit */ + /* close relation before vacuuming, but hold lock until commit */ relation_close(onerel, NoLock); onerel = NULL; + /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */ cluster_rel(relid, InvalidOid, false, (vacstmt->options & VACOPT_VERBOSE) != 0, vacstmt->freeze_min_age, vacstmt->freeze_table_age); heldoff = false; } + else + heldoff = lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all); /* Roll back any GUC changes executed by index functions */ AtEOXact_GUC(false, save_nestlevel); @@ -1237,2680 +1011,60 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, } -/**************************************************************************** - * * - * Code for VACUUM FULL (only) * - * * - **************************************************************************** - */ - - -/* - * full_vacuum_rel() -- perform FULL VACUUM for one heap relation - * - * This routine vacuums a single heap, cleans out its indexes, and - * updates its num_pages and num_tuples statistics. - * - * At entry, we have already established a transaction and opened - * and locked the relation. - * - * The return value indicates whether this function has held off - * interrupts -- caller must RESUME_INTERRUPTS() after commit if true. - */ -static bool -full_vacuum_rel(Relation onerel, VacuumStmt *vacstmt) -{ - VacPageListData vacuum_pages; /* List of pages to vacuum and/or - * clean indexes */ - VacPageListData fraged_pages; /* List of pages with space enough for - * re-using */ - Relation *Irel; - int nindexes, - i; - VRelStats *vacrelstats; - bool heldoff = false; - - vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age, - onerel->rd_rel->relisshared, - &OldestXmin, &FreezeLimit, NULL); - - /* - * Flush any previous async-commit transactions. This does not guarantee - * that we will be able to set hint bits for tuples they inserted, but it - * improves the probability, especially in simple sequential-commands - * cases. See scan_heap() and repair_frag() for more about this. - */ - XLogAsyncCommitFlush(); - - /* - * Set up statistics-gathering machinery. - */ - vacrelstats = (VRelStats *) palloc(sizeof(VRelStats)); - vacrelstats->rel_pages = 0; - vacrelstats->rel_tuples = 0; - vacrelstats->rel_indexed_tuples = 0; - vacrelstats->hasindex = false; - vacrelstats->latestRemovedXid = InvalidTransactionId; - - /* scan the heap */ - vacuum_pages.num_pages = fraged_pages.num_pages = 0; - scan_heap(vacrelstats, onerel, &vacuum_pages, &fraged_pages); - - /* Now open all indexes of the relation */ - vac_open_indexes(onerel, AccessExclusiveLock, &nindexes, &Irel); - if (nindexes > 0) - vacrelstats->hasindex = true; - - /* Clean/scan index relation(s) */ - if (Irel != NULL) - { - if (vacuum_pages.num_pages > 0) - { - for (i = 0; i < nindexes; i++) - vacuum_index(&vacuum_pages, Irel[i], - vacrelstats->rel_indexed_tuples, 0); - } - else - { - /* just scan indexes to update statistic */ - for (i = 0; i < nindexes; i++) - scan_index(Irel[i], vacrelstats->rel_indexed_tuples); - } - } - - if (fraged_pages.num_pages > 0) - { - /* Try to shrink heap */ - heldoff = repair_frag(vacrelstats, onerel, &vacuum_pages, &fraged_pages, - nindexes, Irel); - vac_close_indexes(nindexes, Irel, NoLock); - } - else - { - vac_close_indexes(nindexes, Irel, NoLock); - if (vacuum_pages.num_pages > 0) - { - /* Clean pages from vacuum_pages list */ - vacuum_heap(vacrelstats, onerel, &vacuum_pages); - } - } - - /* update thefree space map with final free space info, and vacuum it */ - vac_update_fsm(onerel, &fraged_pages, vacrelstats->rel_pages); - FreeSpaceMapVacuum(onerel); - - /* update statistics in pg_class */ - vac_update_relstats(onerel, - vacrelstats->rel_pages, vacrelstats->rel_tuples, - vacrelstats->hasindex, FreezeLimit); - - /* report results to the stats collector, too */ - pgstat_report_vacuum(RelationGetRelid(onerel), - onerel->rd_rel->relisshared, - true, - vacrelstats->rel_tuples); - - return heldoff; -} - - /* - * scan_heap() -- scan an open heap relation - * - * This routine sets commit status bits, constructs vacuum_pages (list - * of pages we need to compact free space on and/or clean indexes of - * deleted tuples), constructs fraged_pages (list of pages with free - * space that tuples could be moved into), and calculates statistics - * on the number of live tuples in the heap. + * Open all the indexes of the given relation, obtaining the specified kind + * of lock on each. Return an array of Relation pointers for the indexes + * into *Irel, and the number of indexes into *nindexes. */ -static void -scan_heap(VRelStats *vacrelstats, Relation onerel, - VacPageList vacuum_pages, VacPageList fraged_pages) +void +vac_open_indexes(Relation relation, LOCKMODE lockmode, + int *nindexes, Relation **Irel) { - BlockNumber nblocks, - blkno; - char *relname; - VacPage vacpage; - BlockNumber empty_pages, - empty_end_pages; - double num_tuples, - num_indexed_tuples, - tups_vacuumed, - nkeep, - nunused; - double free_space, - usable_free_space; - Size min_tlen = MaxHeapTupleSize; - Size max_tlen = 0; - bool do_shrinking = true; - VTupleLink vtlinks = (VTupleLink) palloc(100 * sizeof(VTupleLinkData)); - int num_vtlinks = 0; - int free_vtlinks = 100; - PGRUsage ru0; - - pg_rusage_init(&ru0); - - relname = RelationGetRelationName(onerel); - ereport(elevel, - (errmsg("vacuuming \"%s.%s\"", - get_namespace_name(RelationGetNamespace(onerel)), - relname))); - - empty_pages = empty_end_pages = 0; - num_tuples = num_indexed_tuples = tups_vacuumed = nkeep = nunused = 0; - free_space = 0; - - nblocks = RelationGetNumberOfBlocks(onerel); - - /* - * We initially create each VacPage item in a maximal-sized workspace, - * then copy the workspace into a just-large-enough copy. - */ - vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber)); - - for (blkno = 0; blkno < nblocks; blkno++) - { - Page page, - tempPage = NULL; - bool do_reap, - do_frag; - Buffer buf; - OffsetNumber offnum, - maxoff; - bool notup; - OffsetNumber frozen[MaxOffsetNumber]; - int nfrozen; - - vacuum_delay_point(); - - buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL, - vac_strategy); - page = BufferGetPage(buf); - - /* - * Since we are holding exclusive lock on the relation, no other - * backend can be accessing the page; however it is possible that the - * background writer will try to write the page if it's already marked - * dirty. To ensure that invalid data doesn't get written to disk, we - * must take exclusive buffer lock wherever we potentially modify - * pages. In fact, we insist on cleanup lock so that we can safely - * call heap_page_prune(). (This might be overkill, since the - * bgwriter pays no attention to individual tuples, but on the other - * hand it's unlikely that the bgwriter has this particular page - * pinned at this instant. So violating the coding rule would buy us - * little anyway.) - */ - LockBufferForCleanup(buf); - - vacpage->blkno = blkno; - vacpage->offsets_used = 0; - vacpage->offsets_free = 0; - - if (PageIsNew(page)) - { - VacPage vacpagecopy; - - ereport(WARNING, - (errmsg("relation \"%s\" page %u is uninitialized --- fixing", - relname, blkno))); - PageInit(page, BufferGetPageSize(buf), 0); - MarkBufferDirty(buf); - vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, page); - free_space += vacpage->free; - empty_pages++; - empty_end_pages++; - vacpagecopy = copy_vac_page(vacpage); - vpage_insert(vacuum_pages, vacpagecopy); - vpage_insert(fraged_pages, vacpagecopy); - UnlockReleaseBuffer(buf); - continue; - } - - if (PageIsEmpty(page)) - { - VacPage vacpagecopy; - - vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, page); - free_space += vacpage->free; - empty_pages++; - empty_end_pages++; - vacpagecopy = copy_vac_page(vacpage); - vpage_insert(vacuum_pages, vacpagecopy); - vpage_insert(fraged_pages, vacpagecopy); - UnlockReleaseBuffer(buf); - continue; - } - - /* - * Prune all HOT-update chains in this page. - * - * We use the redirect_move option so that redirecting line pointers - * get collapsed out; this allows us to not worry about them below. - * - * We count tuples removed by the pruning step as removed by VACUUM. - */ - tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, - true, false); - - /* - * Now scan the page to collect vacuumable items and check for tuples - * requiring freezing. - */ - nfrozen = 0; - notup = true; - maxoff = PageGetMaxOffsetNumber(page); - for (offnum = FirstOffsetNumber; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) - { - ItemId itemid = PageGetItemId(page, offnum); - bool tupgone = false; - HeapTupleData tuple; - - /* - * Collect un-used items too - it's possible to have indexes - * pointing here after crash. (That's an ancient comment and is - * likely obsolete with WAL, but we might as well continue to - * check for such problems.) - */ - if (!ItemIdIsUsed(itemid)) - { - vacpage->offsets[vacpage->offsets_free++] = offnum; - nunused += 1; - continue; - } - - /* - * DEAD item pointers are to be vacuumed normally; but we don't - * count them in tups_vacuumed, else we'd be double-counting (at - * least in the common case where heap_page_prune() just freed up - * a non-HOT tuple). - */ - if (ItemIdIsDead(itemid)) - { - vacpage->offsets[vacpage->offsets_free++] = offnum; - continue; - } - - /* Shouldn't have any redirected items anymore */ - if (!ItemIdIsNormal(itemid)) - elog(ERROR, "relation \"%s\" TID %u/%u: unexpected redirect item", - relname, blkno, offnum); - - tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); - tuple.t_len = ItemIdGetLength(itemid); - ItemPointerSet(&(tuple.t_self), blkno, offnum); - - switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf)) - { - case HEAPTUPLE_LIVE: - /* Tuple is good --- but let's do some validity checks */ - if (onerel->rd_rel->relhasoids && - !OidIsValid(HeapTupleGetOid(&tuple))) - elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid", - relname, blkno, offnum); - - /* - * The shrinkage phase of VACUUM FULL requires that all - * live tuples have XMIN_COMMITTED set --- see comments in - * repair_frag()'s walk-along-page loop. Use of async - * commit may prevent HeapTupleSatisfiesVacuum from - * setting the bit for a recently committed tuple. Rather - * than trying to handle this corner case, we just give up - * and don't shrink. - */ - if (do_shrinking && - !(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)) - { - ereport(LOG, - (errmsg("relation \"%s\" TID %u/%u: XMIN_COMMITTED not set for transaction %u --- cannot shrink relation", - relname, blkno, offnum, - HeapTupleHeaderGetXmin(tuple.t_data)))); - do_shrinking = false; - } - break; - case HEAPTUPLE_DEAD: - - /* - * Ordinarily, DEAD tuples would have been removed by - * heap_page_prune(), but it's possible that the tuple - * state changed since heap_page_prune() looked. In - * particular an INSERT_IN_PROGRESS tuple could have - * changed to DEAD if the inserter aborted. So this - * cannot be considered an error condition, though it does - * suggest that someone released a lock early. - * - * If the tuple is HOT-updated then it must only be - * removed by a prune operation; so we keep it as if it - * were RECENTLY_DEAD, and abandon shrinking. (XXX is it - * worth trying to make the shrinking code smart enough to - * handle this? It's an unusual corner case.) - * - * DEAD heap-only tuples can safely be removed if they - * aren't themselves HOT-updated, although this is a bit - * inefficient since we'll uselessly try to remove index - * entries for them. - */ - if (HeapTupleIsHotUpdated(&tuple)) - { - nkeep += 1; - if (do_shrinking) - ereport(LOG, - (errmsg("relation \"%s\" TID %u/%u: dead HOT-updated tuple --- cannot shrink relation", - relname, blkno, offnum))); - do_shrinking = false; - } - else - { - tupgone = true; /* we can delete the tuple */ - - /* - * We need not require XMIN_COMMITTED or - * XMAX_COMMITTED to be set, since we will remove the - * tuple without any further examination of its hint - * bits. - */ - } - break; - case HEAPTUPLE_RECENTLY_DEAD: - - /* - * If tuple is recently deleted then we must not remove it - * from relation. - */ - nkeep += 1; - - /* - * As with the LIVE case, shrinkage requires - * XMIN_COMMITTED to be set. - */ - if (do_shrinking && - !(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)) - { - ereport(LOG, - (errmsg("relation \"%s\" TID %u/%u: XMIN_COMMITTED not set for transaction %u --- cannot shrink relation", - relname, blkno, offnum, - HeapTupleHeaderGetXmin(tuple.t_data)))); - do_shrinking = false; - } - - /* - * If we do shrinking and this tuple is updated one then - * remember it to construct updated tuple dependencies. - */ - if (do_shrinking && - !(ItemPointerEquals(&(tuple.t_self), - &(tuple.t_data->t_ctid)))) - { - if (free_vtlinks == 0) - { - free_vtlinks = 1000; - vtlinks = (VTupleLink) repalloc(vtlinks, - (free_vtlinks + num_vtlinks) * - sizeof(VTupleLinkData)); - } - vtlinks[num_vtlinks].new_tid = tuple.t_data->t_ctid; - vtlinks[num_vtlinks].this_tid = tuple.t_self; - free_vtlinks--; - num_vtlinks++; - } - break; - case HEAPTUPLE_INSERT_IN_PROGRESS: - - /* - * This should not happen, since we hold exclusive lock on - * the relation; shouldn't we raise an error? (Actually, - * it can happen in system catalogs, since we tend to - * release write lock before commit there.) As above, we - * can't apply repair_frag() if the tuple state is - * uncertain. - */ - if (do_shrinking) - ereport(LOG, - (errmsg("relation \"%s\" TID %u/%u: InsertTransactionInProgress %u --- cannot shrink relation", - relname, blkno, offnum, - HeapTupleHeaderGetXmin(tuple.t_data)))); - do_shrinking = false; - break; - case HEAPTUPLE_DELETE_IN_PROGRESS: - - /* - * This should not happen, since we hold exclusive lock on - * the relation; shouldn't we raise an error? (Actually, - * it can happen in system catalogs, since we tend to - * release write lock before commit there.) As above, we - * can't apply repair_frag() if the tuple state is - * uncertain. - */ - if (do_shrinking) - ereport(LOG, - (errmsg("relation \"%s\" TID %u/%u: DeleteTransactionInProgress %u --- cannot shrink relation", - relname, blkno, offnum, - HeapTupleHeaderGetXmax(tuple.t_data)))); - do_shrinking = false; - break; - default: - elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); - break; - } - - if (tupgone) - { - ItemId lpp; - - HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data, - &vacrelstats->latestRemovedXid); - - /* - * Here we are building a temporary copy of the page with dead - * tuples removed. Below we will apply - * PageRepairFragmentation to the copy, so that we can - * determine how much space will be available after removal of - * dead tuples. But note we are NOT changing the real page - * yet... - */ - if (tempPage == NULL) - { - Size pageSize; - - pageSize = PageGetPageSize(page); - tempPage = (Page) palloc(pageSize); - memcpy(tempPage, page, pageSize); - } - - /* mark it unused on the temp page */ - lpp = PageGetItemId(tempPage, offnum); - ItemIdSetUnused(lpp); - - vacpage->offsets[vacpage->offsets_free++] = offnum; - tups_vacuumed += 1; - } - else - { - num_tuples += 1; - if (!HeapTupleIsHeapOnly(&tuple)) - num_indexed_tuples += 1; - notup = false; - if (tuple.t_len < min_tlen) - min_tlen = tuple.t_len; - if (tuple.t_len > max_tlen) - max_tlen = tuple.t_len; - - /* - * Each non-removable tuple must be checked to see if it needs - * freezing. - */ - if (heap_freeze_tuple(tuple.t_data, FreezeLimit, - InvalidBuffer)) - frozen[nfrozen++] = offnum; - } - } /* scan along page */ - - if (tempPage != NULL) - { - /* Some tuples are removable; figure free space after removal */ - PageRepairFragmentation(tempPage); - vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, tempPage); - pfree(tempPage); - do_reap = true; - } - else - { - /* Just use current available space */ - vacpage->free = PageGetFreeSpaceWithFillFactor(onerel, page); - /* Need to reap the page if it has UNUSED or DEAD line pointers */ - do_reap = (vacpage->offsets_free > 0); - } - - free_space += vacpage->free; - - /* - * Add the page to vacuum_pages if it requires reaping, and add it to - * fraged_pages if it has a useful amount of free space. "Useful" - * means enough for a minimal-sized tuple. But we don't know that - * accurately near the start of the relation, so add pages - * unconditionally if they have >= BLCKSZ/10 free space. Also - * forcibly add pages with no live tuples, to avoid confusing the - * empty_end_pages logic. (In the presence of unreasonably small - * fillfactor, it seems possible that such pages might not pass the - * free-space test, but they had better be in the list anyway.) - */ - do_frag = (vacpage->free >= min_tlen || vacpage->free >= BLCKSZ / 10 || - notup); - - if (do_reap || do_frag) - { - VacPage vacpagecopy = copy_vac_page(vacpage); - - if (do_reap) - vpage_insert(vacuum_pages, vacpagecopy); - if (do_frag) - vpage_insert(fraged_pages, vacpagecopy); - } - - /* - * Include the page in empty_end_pages if it will be empty after - * vacuuming; this is to keep us from using it as a move destination. - * Note that such pages are guaranteed to be in fraged_pages. - */ - if (notup) - { - empty_pages++; - empty_end_pages++; - } - else - empty_end_pages = 0; - - /* - * If we froze any tuples, mark the buffer dirty, and write a WAL - * record recording the changes. We must log the changes to be - * crash-safe against future truncation of CLOG. - */ - if (nfrozen > 0) - { - MarkBufferDirty(buf); - /* no XLOG for temp tables, though */ - if (!onerel->rd_istemp) - { - XLogRecPtr recptr; - - recptr = log_heap_freeze(onerel, buf, FreezeLimit, - frozen, nfrozen); - PageSetLSN(page, recptr); - PageSetTLI(page, ThisTimeLineID); - } - } - - UnlockReleaseBuffer(buf); - } - - pfree(vacpage); + List *indexoidlist; + ListCell *indexoidscan; + int i; - /* save stats in the rel list for use later */ - vacrelstats->rel_tuples = num_tuples; - vacrelstats->rel_indexed_tuples = num_indexed_tuples; - vacrelstats->rel_pages = nblocks; - if (num_tuples == 0) - min_tlen = max_tlen = 0; - vacrelstats->min_tlen = min_tlen; - vacrelstats->max_tlen = max_tlen; + Assert(lockmode != NoLock); - vacuum_pages->empty_end_pages = empty_end_pages; - fraged_pages->empty_end_pages = empty_end_pages; + indexoidlist = RelationGetIndexList(relation); - /* - * Clear the fraged_pages list if we found we couldn't shrink. Else, - * remove any "empty" end-pages from the list, and compute usable free - * space = free space in remaining pages. - */ - if (do_shrinking) - { - int i; + *nindexes = list_length(indexoidlist); - Assert((BlockNumber) fraged_pages->num_pages >= empty_end_pages); - fraged_pages->num_pages -= empty_end_pages; - usable_free_space = 0; - for (i = 0; i < fraged_pages->num_pages; i++) - usable_free_space += fraged_pages->pagedesc[i]->free; - } + if (*nindexes > 0) + *Irel = (Relation *) palloc(*nindexes * sizeof(Relation)); else - { - fraged_pages->num_pages = 0; - usable_free_space = 0; - } + *Irel = NULL; - /* don't bother to save vtlinks if we will not call repair_frag */ - if (fraged_pages->num_pages > 0 && num_vtlinks > 0) - { - qsort((char *) vtlinks, num_vtlinks, sizeof(VTupleLinkData), - vac_cmp_vtlinks); - vacrelstats->vtlinks = vtlinks; - vacrelstats->num_vtlinks = num_vtlinks; - } - else + i = 0; + foreach(indexoidscan, indexoidlist) { - vacrelstats->vtlinks = NULL; - vacrelstats->num_vtlinks = 0; - pfree(vtlinks); + Oid indexoid = lfirst_oid(indexoidscan); + + (*Irel)[i++] = index_open(indexoid, lockmode); } - ereport(elevel, - (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages", - RelationGetRelationName(onerel), - tups_vacuumed, num_tuples, nblocks), - errdetail("%.0f dead row versions cannot be removed yet.\n" - "Nonremovable row versions range from %lu to %lu bytes long.\n" - "There were %.0f unused item pointers.\n" - "Total free space (including removable row versions) is %.0f bytes.\n" - "%u pages are or will become empty, including %u at the end of the table.\n" - "%u pages containing %.0f free bytes are potential move destinations.\n" - "%s.", - nkeep, - (unsigned long) min_tlen, (unsigned long) max_tlen, - nunused, - free_space, - empty_pages, empty_end_pages, - fraged_pages->num_pages, usable_free_space, - pg_rusage_show(&ru0)))); + list_free(indexoidlist); } - /* - * repair_frag() -- try to repair relation's fragmentation - * - * This routine marks dead tuples as unused and tries re-use dead space - * by moving tuples (and inserting indexes if needed). It constructs - * Nvacpagelist list of free-ed pages (moved tuples) and clean indexes - * for them after committing (in hack-manner - without losing locks - * and freeing memory!) current transaction. It truncates relation - * if some end-blocks are gone away. - * - * The return value indicates whether this function has held off - * interrupts -- caller must RESUME_INTERRUPTS() after commit if true. + * Release the resources acquired by vac_open_indexes. Optionally release + * the locks (say NoLock to keep 'em). */ -static bool -repair_frag(VRelStats *vacrelstats, Relation onerel, - VacPageList vacuum_pages, VacPageList fraged_pages, - int nindexes, Relation *Irel) +void +vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode) { - TransactionId myXID = GetCurrentTransactionId(); - Buffer dst_buffer = InvalidBuffer; - BlockNumber nblocks, - blkno; - BlockNumber last_move_dest_block = 0, - last_vacuum_block; - Page dst_page = NULL; - ExecContextData ec; - VacPageListData Nvacpagelist; - VacPage dst_vacpage = NULL, - last_vacuum_page, - vacpage, - *curpage; - int i; - int num_moved = 0, - num_fraged_pages, - vacuumed_pages; - int keep_tuples = 0; - int keep_indexed_tuples = 0; - PGRUsage ru0; - bool heldoff = false; - - pg_rusage_init(&ru0); - - ExecContext_Init(&ec, onerel); - - Nvacpagelist.num_pages = 0; - num_fraged_pages = fraged_pages->num_pages; - Assert((BlockNumber) vacuum_pages->num_pages >= vacuum_pages->empty_end_pages); - vacuumed_pages = vacuum_pages->num_pages - vacuum_pages->empty_end_pages; - if (vacuumed_pages > 0) - { - /* get last reaped page from vacuum_pages */ - last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1]; - last_vacuum_block = last_vacuum_page->blkno; - } - else - { - last_vacuum_page = NULL; - last_vacuum_block = InvalidBlockNumber; - } - - vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber)); - vacpage->offsets_used = vacpage->offsets_free = 0; + if (Irel == NULL) + return; - /* - * Scan pages backwards from the last nonempty page, trying to move tuples - * down to lower pages. Quit when we reach a page that we have moved any - * tuples onto, or the first page if we haven't moved anything, or when we - * find a page we cannot completely empty (this last condition is handled - * by "break" statements within the loop). - * - * NB: this code depends on the vacuum_pages and fraged_pages lists being - * in order by blkno. - */ - nblocks = vacrelstats->rel_pages; - for (blkno = nblocks - vacuum_pages->empty_end_pages - 1; - blkno > last_move_dest_block; - blkno--) + while (nindexes--) { - Buffer buf; - Page page; - OffsetNumber offnum, - maxoff; - bool isempty, - chain_tuple_moved; - - vacuum_delay_point(); - - /* - * Forget fraged_pages pages at or after this one; they're no longer - * useful as move targets, since we only want to move down. Note that - * since we stop the outer loop at last_move_dest_block, pages removed - * here cannot have had anything moved onto them already. - * - * Also note that we don't change the stored fraged_pages list, only - * our local variable num_fraged_pages; so the forgotten pages are - * still available to be loaded into the free space map later. - */ - while (num_fraged_pages > 0 && - fraged_pages->pagedesc[num_fraged_pages - 1]->blkno >= blkno) - { - Assert(fraged_pages->pagedesc[num_fraged_pages - 1]->offsets_used == 0); - --num_fraged_pages; - } - - /* - * Process this page of relation. - */ - buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL, - vac_strategy); - page = BufferGetPage(buf); - - vacpage->offsets_free = 0; - - isempty = PageIsEmpty(page); - - /* Is the page in the vacuum_pages list? */ - if (blkno == last_vacuum_block) - { - if (last_vacuum_page->offsets_free > 0) - { - /* there are dead tuples on this page - clean them */ - Assert(!isempty); - LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); - vacuum_page(vacrelstats, onerel, buf, last_vacuum_page); - LockBuffer(buf, BUFFER_LOCK_UNLOCK); - } - else - Assert(isempty); - --vacuumed_pages; - if (vacuumed_pages > 0) - { - /* get prev reaped page from vacuum_pages */ - last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1]; - last_vacuum_block = last_vacuum_page->blkno; - } - else - { - last_vacuum_page = NULL; - last_vacuum_block = InvalidBlockNumber; - } - if (isempty) - { - ReleaseBuffer(buf); - continue; - } - } - else - Assert(!isempty); - - chain_tuple_moved = false; /* no one chain-tuple was moved off - * this page, yet */ - vacpage->blkno = blkno; - maxoff = PageGetMaxOffsetNumber(page); - for (offnum = FirstOffsetNumber; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) - { - Size tuple_len; - HeapTupleData tuple; - ItemId itemid = PageGetItemId(page, offnum); - - if (!ItemIdIsUsed(itemid)) - continue; - - if (ItemIdIsDead(itemid)) - { - /* just remember it for vacuum_page() */ - vacpage->offsets[vacpage->offsets_free++] = offnum; - continue; - } - - /* Shouldn't have any redirected items now */ - Assert(ItemIdIsNormal(itemid)); - - tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); - tuple_len = tuple.t_len = ItemIdGetLength(itemid); - ItemPointerSet(&(tuple.t_self), blkno, offnum); - - /* --- - * VACUUM FULL has an exclusive lock on the relation. So - * normally no other transaction can have pending INSERTs or - * DELETEs in this relation. A tuple is either: - * (a) live (XMIN_COMMITTED) - * (b) known dead (XMIN_INVALID, or XMAX_COMMITTED and xmax - * is visible to all active transactions) - * (c) inserted and deleted (XMIN_COMMITTED+XMAX_COMMITTED) - * but at least one active transaction does not see the - * deleting transaction (ie, it's RECENTLY_DEAD) - * (d) moved by the currently running VACUUM - * (e) inserted or deleted by a not yet committed transaction, - * or by a transaction we couldn't set XMIN_COMMITTED for. - * In case (e) we wouldn't be in repair_frag() at all, because - * scan_heap() detects those cases and shuts off shrinking. - * We can't see case (b) here either, because such tuples were - * already removed by vacuum_page(). Cases (a) and (c) are - * normal and will have XMIN_COMMITTED set. Case (d) is only - * possible if a whole tuple chain has been moved while - * processing this or a higher numbered block. - * --- - */ - if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)) - { - if (tuple.t_data->t_infomask & HEAP_MOVED_IN) - elog(ERROR, "HEAP_MOVED_IN was not expected"); - if (!(tuple.t_data->t_infomask & HEAP_MOVED_OFF)) - elog(ERROR, "HEAP_MOVED_OFF was expected"); - - /* - * MOVED_OFF by another VACUUM would have caused the - * visibility check to set XMIN_COMMITTED or XMIN_INVALID. - */ - if (HeapTupleHeaderGetXvac(tuple.t_data) != myXID) - elog(ERROR, "invalid XVAC in tuple header"); - - /* - * If this (chain) tuple is moved by me already then I have to - * check is it in vacpage or not - i.e. is it moved while - * cleaning this page or some previous one. - */ - - /* Can't we Assert(keep_tuples > 0) here? */ - if (keep_tuples == 0) - continue; - if (chain_tuple_moved) - { - /* some chains were moved while cleaning this page */ - Assert(vacpage->offsets_free > 0); - for (i = 0; i < vacpage->offsets_free; i++) - { - if (vacpage->offsets[i] == offnum) - break; - } - if (i >= vacpage->offsets_free) /* not found */ - { - vacpage->offsets[vacpage->offsets_free++] = offnum; - - /* - * If this is not a heap-only tuple, there must be an - * index entry for this item which will be removed in - * the index cleanup. Decrement the - * keep_indexed_tuples count to remember this. - */ - if (!HeapTupleHeaderIsHeapOnly(tuple.t_data)) - keep_indexed_tuples--; - keep_tuples--; - } - } - else - { - vacpage->offsets[vacpage->offsets_free++] = offnum; - - /* - * If this is not a heap-only tuple, there must be an - * index entry for this item which will be removed in the - * index cleanup. Decrement the keep_indexed_tuples count - * to remember this. - */ - if (!HeapTupleHeaderIsHeapOnly(tuple.t_data)) - keep_indexed_tuples--; - keep_tuples--; - } - continue; - } - - /* - * If this tuple is in a chain of tuples created in updates by - * "recent" transactions then we have to move the whole chain of - * tuples to other places, so that we can write new t_ctid links - * that preserve the chain relationship. - * - * This test is complicated. Read it as "if tuple is a recently - * created updated version, OR if it is an obsoleted version". (In - * the second half of the test, we needn't make any check on XMAX - * --- it must be recently obsoleted, else scan_heap would have - * deemed it removable.) - * - * NOTE: this test is not 100% accurate: it is possible for a - * tuple to be an updated one with recent xmin, and yet not match - * any new_tid entry in the vtlinks list. Presumably there was - * once a parent tuple with xmax matching the xmin, but it's - * possible that that tuple has been removed --- for example, if - * it had xmin = xmax and wasn't itself an updated version, then - * HeapTupleSatisfiesVacuum would deem it removable as soon as the - * xmin xact completes. - * - * To be on the safe side, we abandon the repair_frag process if - * we cannot find the parent tuple in vtlinks. This may be overly - * conservative; AFAICS it would be safe to move the chain. - * - * Also, because we distinguish DEAD and RECENTLY_DEAD tuples - * using OldestXmin, which is a rather coarse test, it is quite - * possible to have an update chain in which a tuple we think is - * RECENTLY_DEAD links forward to one that is definitely DEAD. In - * such a case the RECENTLY_DEAD tuple must actually be dead, but - * it seems too complicated to try to make VACUUM remove it. We - * treat each contiguous set of RECENTLY_DEAD tuples as a - * separately movable chain, ignoring any intervening DEAD ones. - */ - if (((tuple.t_data->t_infomask & HEAP_UPDATED) && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(tuple.t_data), - OldestXmin)) || - (!(tuple.t_data->t_infomask & (HEAP_XMAX_INVALID | - HEAP_IS_LOCKED)) && - !(ItemPointerEquals(&(tuple.t_self), - &(tuple.t_data->t_ctid))))) - { - Buffer Cbuf = buf; - bool freeCbuf = false; - bool chain_move_failed = false; - bool moved_target = false; - ItemPointerData Ctid; - HeapTupleData tp = tuple; - Size tlen = tuple_len; - VTupleMove vtmove; - int num_vtmove; - int free_vtmove; - VacPage to_vacpage = NULL; - int to_item = 0; - int ti; - - if (dst_buffer != InvalidBuffer) - { - ReleaseBuffer(dst_buffer); - dst_buffer = InvalidBuffer; - } - - /* Quick exit if we have no vtlinks to search in */ - if (vacrelstats->vtlinks == NULL) - { - elog(DEBUG2, "parent item in update-chain not found --- cannot continue repair_frag"); - break; /* out of walk-along-page loop */ - } - - /* - * If this tuple is in the begin/middle of the chain then we - * have to move to the end of chain. As with any t_ctid - * chase, we have to verify that each new tuple is really the - * descendant of the tuple we came from; however, here we need - * even more than the normal amount of paranoia. If t_ctid - * links forward to a tuple determined to be DEAD, then - * depending on where that tuple is, it might already have - * been removed, and perhaps even replaced by a MOVED_IN - * tuple. We don't want to include any DEAD tuples in the - * chain, so we have to recheck HeapTupleSatisfiesVacuum. - */ - while (!(tp.t_data->t_infomask & (HEAP_XMAX_INVALID | - HEAP_IS_LOCKED)) && - !(ItemPointerEquals(&(tp.t_self), - &(tp.t_data->t_ctid)))) - { - ItemPointerData nextTid; - TransactionId priorXmax; - Buffer nextBuf; - Page nextPage; - OffsetNumber nextOffnum; - ItemId nextItemid; - HeapTupleHeader nextTdata; - HTSV_Result nextTstatus; - - nextTid = tp.t_data->t_ctid; - priorXmax = HeapTupleHeaderGetXmax(tp.t_data); - /* assume block# is OK (see heap_fetch comments) */ - nextBuf = ReadBufferExtended(onerel, MAIN_FORKNUM, - ItemPointerGetBlockNumber(&nextTid), - RBM_NORMAL, vac_strategy); - nextPage = BufferGetPage(nextBuf); - /* If bogus or unused slot, assume tp is end of chain */ - nextOffnum = ItemPointerGetOffsetNumber(&nextTid); - if (nextOffnum < FirstOffsetNumber || - nextOffnum > PageGetMaxOffsetNumber(nextPage)) - { - ReleaseBuffer(nextBuf); - break; - } - nextItemid = PageGetItemId(nextPage, nextOffnum); - if (!ItemIdIsNormal(nextItemid)) - { - ReleaseBuffer(nextBuf); - break; - } - /* if not matching XMIN, assume tp is end of chain */ - nextTdata = (HeapTupleHeader) PageGetItem(nextPage, - nextItemid); - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(nextTdata), - priorXmax)) - { - ReleaseBuffer(nextBuf); - break; - } - - /* - * Must check for DEAD or MOVED_IN tuple, too. This could - * potentially update hint bits, so we'd better hold the - * buffer content lock. - */ - LockBuffer(nextBuf, BUFFER_LOCK_SHARE); - nextTstatus = HeapTupleSatisfiesVacuum(nextTdata, - OldestXmin, - nextBuf); - if (nextTstatus == HEAPTUPLE_DEAD || - nextTstatus == HEAPTUPLE_INSERT_IN_PROGRESS) - { - UnlockReleaseBuffer(nextBuf); - break; - } - LockBuffer(nextBuf, BUFFER_LOCK_UNLOCK); - /* if it's MOVED_OFF we shoulda moved this one with it */ - if (nextTstatus == HEAPTUPLE_DELETE_IN_PROGRESS) - elog(ERROR, "updated tuple is already HEAP_MOVED_OFF"); - /* OK, switch our attention to the next tuple in chain */ - tp.t_data = nextTdata; - tp.t_self = nextTid; - tlen = tp.t_len = ItemIdGetLength(nextItemid); - if (freeCbuf) - ReleaseBuffer(Cbuf); - Cbuf = nextBuf; - freeCbuf = true; - } - - /* Set up workspace for planning the chain move */ - vtmove = (VTupleMove) palloc(100 * sizeof(VTupleMoveData)); - num_vtmove = 0; - free_vtmove = 100; - - /* - * Now, walk backwards up the chain (towards older tuples) and - * check if all items in chain can be moved. We record all - * the moves that need to be made in the vtmove array. - */ - for (;;) - { - Buffer Pbuf; - Page Ppage; - ItemId Pitemid; - HeapTupleHeader PTdata; - VTupleLinkData vtld, - *vtlp; - - /* Identify a target page to move this tuple to */ - if (to_vacpage == NULL || - !enough_space(to_vacpage, tlen)) - { - for (i = 0; i < num_fraged_pages; i++) - { - if (enough_space(fraged_pages->pagedesc[i], tlen)) - break; - } - - if (i == num_fraged_pages) - { - /* can't move item anywhere */ - chain_move_failed = true; - break; /* out of check-all-items loop */ - } - to_item = i; - to_vacpage = fraged_pages->pagedesc[to_item]; - } - to_vacpage->free -= MAXALIGN(tlen); - if (to_vacpage->offsets_used >= to_vacpage->offsets_free) - to_vacpage->free -= sizeof(ItemIdData); - (to_vacpage->offsets_used)++; - - /* Add an entry to vtmove list */ - if (free_vtmove == 0) - { - free_vtmove = 1000; - vtmove = (VTupleMove) - repalloc(vtmove, - (free_vtmove + num_vtmove) * - sizeof(VTupleMoveData)); - } - vtmove[num_vtmove].tid = tp.t_self; - vtmove[num_vtmove].vacpage = to_vacpage; - if (to_vacpage->offsets_used == 1) - vtmove[num_vtmove].cleanVpd = true; - else - vtmove[num_vtmove].cleanVpd = false; - free_vtmove--; - num_vtmove++; - - /* Remember if we reached the original target tuple */ - if (ItemPointerGetBlockNumber(&tp.t_self) == blkno && - ItemPointerGetOffsetNumber(&tp.t_self) == offnum) - moved_target = true; - - /* Done if at beginning of chain */ - if (!(tp.t_data->t_infomask & HEAP_UPDATED) || - TransactionIdPrecedes(HeapTupleHeaderGetXmin(tp.t_data), - OldestXmin)) - break; /* out of check-all-items loop */ - - /* Move to tuple with prior row version */ - vtld.new_tid = tp.t_self; - vtlp = (VTupleLink) - vac_bsearch((void *) &vtld, - (void *) (vacrelstats->vtlinks), - vacrelstats->num_vtlinks, - sizeof(VTupleLinkData), - vac_cmp_vtlinks); - if (vtlp == NULL) - { - /* see discussion above */ - elog(DEBUG2, "parent item in update-chain not found --- cannot continue repair_frag"); - chain_move_failed = true; - break; /* out of check-all-items loop */ - } - tp.t_self = vtlp->this_tid; - Pbuf = ReadBufferExtended(onerel, MAIN_FORKNUM, - ItemPointerGetBlockNumber(&(tp.t_self)), - RBM_NORMAL, vac_strategy); - Ppage = BufferGetPage(Pbuf); - Pitemid = PageGetItemId(Ppage, - ItemPointerGetOffsetNumber(&(tp.t_self))); - /* this can't happen since we saw tuple earlier: */ - if (!ItemIdIsNormal(Pitemid)) - elog(ERROR, "parent itemid marked as unused"); - PTdata = (HeapTupleHeader) PageGetItem(Ppage, Pitemid); - - /* ctid should not have changed since we saved it */ - Assert(ItemPointerEquals(&(vtld.new_tid), - &(PTdata->t_ctid))); - - /* - * Read above about cases when !ItemIdIsUsed(nextItemid) - * (child item is removed)... Due to the fact that at the - * moment we don't remove unuseful part of update-chain, - * it's possible to get non-matching parent row here. Like - * as in the case which caused this problem, we stop - * shrinking here. I could try to find real parent row but - * want not to do it because of real solution will be - * implemented anyway, later, and we are too close to 6.5 - * release. - vadim 06/11/99 - */ - if ((PTdata->t_infomask & HEAP_XMAX_IS_MULTI) || - !(TransactionIdEquals(HeapTupleHeaderGetXmax(PTdata), - HeapTupleHeaderGetXmin(tp.t_data)))) - { - ReleaseBuffer(Pbuf); - elog(DEBUG2, "too old parent tuple found --- cannot continue repair_frag"); - chain_move_failed = true; - break; /* out of check-all-items loop */ - } - tp.t_data = PTdata; - tlen = tp.t_len = ItemIdGetLength(Pitemid); - if (freeCbuf) - ReleaseBuffer(Cbuf); - Cbuf = Pbuf; - freeCbuf = true; - } /* end of check-all-items loop */ - - if (freeCbuf) - ReleaseBuffer(Cbuf); - freeCbuf = false; - - /* Double-check that we will move the current target tuple */ - if (!moved_target && !chain_move_failed) - { - elog(DEBUG2, "failed to chain back to target --- cannot continue repair_frag"); - chain_move_failed = true; - } - - if (chain_move_failed) - { - /* - * Undo changes to offsets_used state. We don't bother - * cleaning up the amount-free state, since we're not - * going to do any further tuple motion. - */ - for (i = 0; i < num_vtmove; i++) - { - Assert(vtmove[i].vacpage->offsets_used > 0); - (vtmove[i].vacpage->offsets_used)--; - } - pfree(vtmove); - break; /* out of walk-along-page loop */ - } - - /* - * Okay, move the whole tuple chain in reverse order. - * - * Ctid tracks the new location of the previously-moved tuple. - */ - ItemPointerSetInvalid(&Ctid); - for (ti = 0; ti < num_vtmove; ti++) - { - VacPage destvacpage = vtmove[ti].vacpage; - Page Cpage; - ItemId Citemid; - - /* Get page to move from */ - tuple.t_self = vtmove[ti].tid; - Cbuf = ReadBufferExtended(onerel, MAIN_FORKNUM, - ItemPointerGetBlockNumber(&(tuple.t_self)), - RBM_NORMAL, vac_strategy); - - /* Get page to move to */ - dst_buffer = ReadBufferExtended(onerel, MAIN_FORKNUM, - destvacpage->blkno, - RBM_NORMAL, vac_strategy); - - LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE); - if (dst_buffer != Cbuf) - LockBuffer(Cbuf, BUFFER_LOCK_EXCLUSIVE); - - dst_page = BufferGetPage(dst_buffer); - Cpage = BufferGetPage(Cbuf); - - Citemid = PageGetItemId(Cpage, - ItemPointerGetOffsetNumber(&(tuple.t_self))); - tuple.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid); - tuple_len = tuple.t_len = ItemIdGetLength(Citemid); - - move_chain_tuple(vacrelstats, onerel, Cbuf, Cpage, &tuple, - dst_buffer, dst_page, destvacpage, - &ec, &Ctid, vtmove[ti].cleanVpd); - - /* - * If the tuple we are moving is a heap-only tuple, this - * move will generate an additional index entry, so - * increment the rel_indexed_tuples count. - */ - if (HeapTupleHeaderIsHeapOnly(tuple.t_data)) - vacrelstats->rel_indexed_tuples++; - - num_moved++; - if (destvacpage->blkno > last_move_dest_block) - last_move_dest_block = destvacpage->blkno; - - /* - * Remember that we moved tuple from the current page - * (corresponding index tuple will be cleaned). - */ - if (Cbuf == buf) - vacpage->offsets[vacpage->offsets_free++] = - ItemPointerGetOffsetNumber(&(tuple.t_self)); - else - { - /* - * When we move tuple chains, we may need to move - * tuples from a block that we haven't yet scanned in - * the outer walk-along-the-relation loop. Note that - * we can't be moving a tuple from a block that we - * have already scanned because if such a tuple - * exists, then we must have moved the chain along - * with that tuple when we scanned that block. IOW the - * test of (Cbuf != buf) guarantees that the tuple we - * are looking at right now is in a block which is yet - * to be scanned. - * - * We maintain two counters to correctly count the - * moved-off tuples from blocks that are not yet - * scanned (keep_tuples) and how many of them have - * index pointers (keep_indexed_tuples). The main - * reason to track the latter is to help verify that - * indexes have the expected number of entries when - * all the dust settles. - */ - if (!HeapTupleHeaderIsHeapOnly(tuple.t_data)) - keep_indexed_tuples++; - keep_tuples++; - } - - ReleaseBuffer(dst_buffer); - ReleaseBuffer(Cbuf); - } /* end of move-the-tuple-chain loop */ - - dst_buffer = InvalidBuffer; - pfree(vtmove); - chain_tuple_moved = true; - - /* advance to next tuple in walk-along-page loop */ - continue; - } /* end of is-tuple-in-chain test */ - - /* try to find new page for this tuple */ - if (dst_buffer == InvalidBuffer || - !enough_space(dst_vacpage, tuple_len)) - { - if (dst_buffer != InvalidBuffer) - { - ReleaseBuffer(dst_buffer); - dst_buffer = InvalidBuffer; - } - for (i = 0; i < num_fraged_pages; i++) - { - if (enough_space(fraged_pages->pagedesc[i], tuple_len)) - break; - } - if (i == num_fraged_pages) - break; /* can't move item anywhere */ - dst_vacpage = fraged_pages->pagedesc[i]; - dst_buffer = ReadBufferExtended(onerel, MAIN_FORKNUM, - dst_vacpage->blkno, - RBM_NORMAL, vac_strategy); - LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE); - dst_page = BufferGetPage(dst_buffer); - /* if this page was not used before - clean it */ - if (!PageIsEmpty(dst_page) && dst_vacpage->offsets_used == 0) - vacuum_page(vacrelstats, onerel, dst_buffer, dst_vacpage); - } - else - LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE); - - LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); - - move_plain_tuple(onerel, buf, page, &tuple, - dst_buffer, dst_page, dst_vacpage, &ec); - - /* - * If the tuple we are moving is a heap-only tuple, this move will - * generate an additional index entry, so increment the - * rel_indexed_tuples count. - */ - if (HeapTupleHeaderIsHeapOnly(tuple.t_data)) - vacrelstats->rel_indexed_tuples++; - - num_moved++; - if (dst_vacpage->blkno > last_move_dest_block) - last_move_dest_block = dst_vacpage->blkno; - - /* - * Remember that we moved tuple from the current page - * (corresponding index tuple will be cleaned). - */ - vacpage->offsets[vacpage->offsets_free++] = offnum; - } /* walk along page */ - - /* - * If we broke out of the walk-along-page loop early (ie, still have - * offnum <= maxoff), then we failed to move some tuple off this page. - * No point in shrinking any more, so clean up and exit the per-page - * loop. - */ - if (offnum < maxoff && keep_tuples > 0) - { - OffsetNumber off; - - /* - * Fix vacpage state for any unvisited tuples remaining on page - */ - for (off = OffsetNumberNext(offnum); - off <= maxoff; - off = OffsetNumberNext(off)) - { - ItemId itemid = PageGetItemId(page, off); - HeapTupleHeader htup; - - if (!ItemIdIsUsed(itemid)) - continue; - /* Shouldn't be any DEAD or REDIRECT items anymore */ - Assert(ItemIdIsNormal(itemid)); - - htup = (HeapTupleHeader) PageGetItem(page, itemid); - if (htup->t_infomask & HEAP_XMIN_COMMITTED) - continue; - - /* - * See comments in the walk-along-page loop above about why - * only MOVED_OFF tuples should be found here. - */ - if (htup->t_infomask & HEAP_MOVED_IN) - elog(ERROR, "HEAP_MOVED_IN was not expected"); - if (!(htup->t_infomask & HEAP_MOVED_OFF)) - elog(ERROR, "HEAP_MOVED_OFF was expected"); - if (HeapTupleHeaderGetXvac(htup) != myXID) - elog(ERROR, "invalid XVAC in tuple header"); - - if (chain_tuple_moved) - { - /* some chains were moved while cleaning this page */ - Assert(vacpage->offsets_free > 0); - for (i = 0; i < vacpage->offsets_free; i++) - { - if (vacpage->offsets[i] == off) - break; - } - if (i >= vacpage->offsets_free) /* not found */ - { - vacpage->offsets[vacpage->offsets_free++] = off; - Assert(keep_tuples > 0); - - /* - * If this is not a heap-only tuple, there must be an - * index entry for this item which will be removed in - * the index cleanup. Decrement the - * keep_indexed_tuples count to remember this. - */ - if (!HeapTupleHeaderIsHeapOnly(htup)) - keep_indexed_tuples--; - keep_tuples--; - } - } - else - { - vacpage->offsets[vacpage->offsets_free++] = off; - Assert(keep_tuples > 0); - if (!HeapTupleHeaderIsHeapOnly(htup)) - keep_indexed_tuples--; - keep_tuples--; - } - } - } - - if (vacpage->offsets_free > 0) /* some tuples were moved */ - { - if (chain_tuple_moved) /* else - they are ordered */ - { - qsort((char *) (vacpage->offsets), vacpage->offsets_free, - sizeof(OffsetNumber), vac_cmp_offno); - } - vpage_insert(&Nvacpagelist, copy_vac_page(vacpage)); - } - - ReleaseBuffer(buf); - - if (offnum <= maxoff) - break; /* had to quit early, see above note */ - - } /* walk along relation */ - - blkno++; /* new number of blocks */ - - if (dst_buffer != InvalidBuffer) - { - Assert(num_moved > 0); - ReleaseBuffer(dst_buffer); - } - - if (num_moved > 0) - { - /* - * We have to commit our tuple movings before we truncate the - * relation. Ideally we should do Commit/StartTransactionCommand - * here, relying on the session-level table lock to protect our - * exclusive access to the relation. However, that would require a - * lot of extra code to close and re-open the relation, indexes, etc. - * For now, a quick hack: record status of current transaction as - * committed, and continue. We force the commit to be synchronous so - * that it's down to disk before we truncate. (Note: tqual.c knows - * that VACUUM FULL always uses sync commit, too.) The transaction - * continues to be shown as running in the ProcArray. - * - * XXX This desperately needs to be revisited. Any failure after this - * point will result in a PANIC "cannot abort transaction nnn, it was - * already committed"! As a precaution, we prevent cancel interrupts - * after this point to mitigate this problem; caller is responsible for - * re-enabling them after committing the transaction. - */ - HOLD_INTERRUPTS(); - heldoff = true; - ForceSyncCommit(); - (void) RecordTransactionCommit(true); - } - - /* - * We are not going to move any more tuples across pages, but we still - * need to apply vacuum_page to compact free space in the remaining pages - * in vacuum_pages list. Note that some of these pages may also be in the - * fraged_pages list, and may have had tuples moved onto them; if so, we - * already did vacuum_page and needn't do it again. - */ - for (i = 0, curpage = vacuum_pages->pagedesc; - i < vacuumed_pages; - i++, curpage++) - { - vacuum_delay_point(); - - Assert((*curpage)->blkno < blkno); - if ((*curpage)->offsets_used == 0) - { - Buffer buf; - Page page; - - /* this page was not used as a move target, so must clean it */ - buf = ReadBufferExtended(onerel, MAIN_FORKNUM, (*curpage)->blkno, - RBM_NORMAL, vac_strategy); - LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); - page = BufferGetPage(buf); - if (!PageIsEmpty(page)) - vacuum_page(vacrelstats, onerel, buf, *curpage); - UnlockReleaseBuffer(buf); - } - } - - /* - * Now scan all the pages that we moved tuples onto and update tuple - * status bits. This is not really necessary, but will save time for - * future transactions examining these tuples. - */ - update_hint_bits(onerel, fraged_pages, num_fraged_pages, - last_move_dest_block, num_moved); - - /* - * It'd be cleaner to make this report at the bottom of this routine, but - * then the rusage would double-count the second pass of index vacuuming. - * So do it here and ignore the relatively small amount of processing that - * occurs below. - */ - ereport(elevel, - (errmsg("\"%s\": moved %u row versions, truncated %u to %u pages", - RelationGetRelationName(onerel), - num_moved, nblocks, blkno), - errdetail("%s.", - pg_rusage_show(&ru0)))); - - /* - * Reflect the motion of system tuples to catalog cache here. - */ - CommandCounterIncrement(); - - if (Nvacpagelist.num_pages > 0) - { - /* vacuum indexes again if needed */ - if (Irel != NULL) - { - VacPage *vpleft, - *vpright, - vpsave; - - /* re-sort Nvacpagelist.pagedesc */ - for (vpleft = Nvacpagelist.pagedesc, - vpright = Nvacpagelist.pagedesc + Nvacpagelist.num_pages - 1; - vpleft < vpright; vpleft++, vpright--) - { - vpsave = *vpleft; - *vpleft = *vpright; - *vpright = vpsave; - } - - /* - * keep_tuples is the number of tuples that have been moved off a - * page during chain moves but not been scanned over subsequently. - * The tuple ids of these tuples are not recorded as free offsets - * for any VacPage, so they will not be cleared from the indexes. - * keep_indexed_tuples is the portion of these that are expected - * to have index entries. - */ - Assert(keep_tuples >= 0); - for (i = 0; i < nindexes; i++) - vacuum_index(&Nvacpagelist, Irel[i], - vacrelstats->rel_indexed_tuples, - keep_indexed_tuples); - } - - /* - * Clean moved-off tuples from last page in Nvacpagelist list. - * - * We need only do this in this one page, because higher-numbered - * pages are going to be truncated from the relation entirely. But see - * comments for update_hint_bits(). - */ - if (vacpage->blkno == (blkno - 1) && - vacpage->offsets_free > 0) - { - Buffer buf; - Page page; - OffsetNumber unused[MaxOffsetNumber]; - OffsetNumber offnum, - maxoff; - int uncnt = 0; - int num_tuples = 0; - - buf = ReadBufferExtended(onerel, MAIN_FORKNUM, vacpage->blkno, - RBM_NORMAL, vac_strategy); - LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); - page = BufferGetPage(buf); - maxoff = PageGetMaxOffsetNumber(page); - for (offnum = FirstOffsetNumber; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) - { - ItemId itemid = PageGetItemId(page, offnum); - HeapTupleHeader htup; - - if (!ItemIdIsUsed(itemid)) - continue; - /* Shouldn't be any DEAD or REDIRECT items anymore */ - Assert(ItemIdIsNormal(itemid)); - - htup = (HeapTupleHeader) PageGetItem(page, itemid); - if (htup->t_infomask & HEAP_XMIN_COMMITTED) - continue; - - /* - * See comments in the walk-along-page loop above about why - * only MOVED_OFF tuples should be found here. - */ - if (htup->t_infomask & HEAP_MOVED_IN) - elog(ERROR, "HEAP_MOVED_IN was not expected"); - if (!(htup->t_infomask & HEAP_MOVED_OFF)) - elog(ERROR, "HEAP_MOVED_OFF was expected"); - if (HeapTupleHeaderGetXvac(htup) != myXID) - elog(ERROR, "invalid XVAC in tuple header"); - - ItemIdSetUnused(itemid); - num_tuples++; - - unused[uncnt++] = offnum; - } - Assert(vacpage->offsets_free == num_tuples); - - START_CRIT_SECTION(); - - PageRepairFragmentation(page); - - MarkBufferDirty(buf); - - /* XLOG stuff */ - if (!onerel->rd_istemp) - { - XLogRecPtr recptr; - - recptr = log_heap_clean(onerel, buf, - NULL, 0, NULL, 0, - unused, uncnt, - vacrelstats->latestRemovedXid, false); - PageSetLSN(page, recptr); - PageSetTLI(page, ThisTimeLineID); - } - - END_CRIT_SECTION(); - - UnlockReleaseBuffer(buf); - } - - /* now - free new list of reaped pages */ - curpage = Nvacpagelist.pagedesc; - for (i = 0; i < Nvacpagelist.num_pages; i++, curpage++) - pfree(*curpage); - pfree(Nvacpagelist.pagedesc); - } - - /* Truncate relation, if needed */ - if (blkno < nblocks) - { - RelationTruncate(onerel, blkno); - - /* force relcache inval so all backends reset their rd_targblock */ - CacheInvalidateRelcache(onerel); - - vacrelstats->rel_pages = blkno; /* set new number of blocks */ - } - - /* clean up */ - pfree(vacpage); - if (vacrelstats->vtlinks != NULL) - pfree(vacrelstats->vtlinks); - - ExecContext_Finish(&ec); - - return heldoff; -} - -/* - * move_chain_tuple() -- move one tuple that is part of a tuple chain - * - * This routine moves old_tup from old_page to dst_page. - * old_page and dst_page might be the same page. - * On entry old_buf and dst_buf are locked exclusively, both locks (or - * the single lock, if this is a intra-page-move) are released before - * exit. - * - * Yes, a routine with ten parameters is ugly, but it's still better - * than having these 120 lines of code in repair_frag() which is - * already too long and almost unreadable. - */ -static void -move_chain_tuple(VRelStats *vacrelstats, Relation rel, - Buffer old_buf, Page old_page, HeapTuple old_tup, - Buffer dst_buf, Page dst_page, VacPage dst_vacpage, - ExecContext ec, ItemPointer ctid, bool cleanVpd) -{ - TransactionId myXID = GetCurrentTransactionId(); - HeapTupleData newtup; - OffsetNumber newoff; - ItemId newitemid; - Size tuple_len = old_tup->t_len; - bool all_visible_cleared = false; - bool all_visible_cleared_new = false; - - /* - * make a modifiable copy of the source tuple. - */ - heap_copytuple_with_tuple(old_tup, &newtup); - - /* - * register invalidation of source tuple in catcaches. - */ - CacheInvalidateHeapTuple(rel, old_tup); - - /* NO EREPORT(ERROR) TILL CHANGES ARE LOGGED */ - START_CRIT_SECTION(); - - /* - * mark the source tuple MOVED_OFF. - */ - old_tup->t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED | - HEAP_XMIN_INVALID | - HEAP_MOVED_IN); - old_tup->t_data->t_infomask |= HEAP_MOVED_OFF; - HeapTupleHeaderSetXvac(old_tup->t_data, myXID); - - /* - * If this page was not used before - clean it. - * - * NOTE: a nasty bug used to lurk here. It is possible for the source and - * destination pages to be the same (since this tuple-chain member can be - * on a page lower than the one we're currently processing in the outer - * loop). If that's true, then after vacuum_page() the source tuple will - * have been moved, and tuple.t_data will be pointing at garbage. - * Therefore we must do everything that uses old_tup->t_data BEFORE this - * step!! - * - * This path is different from the other callers of vacuum_page, because - * we have already incremented the vacpage's offsets_used field to account - * for the tuple(s) we expect to move onto the page. Therefore - * vacuum_page's check for offsets_used == 0 is wrong. But since that's a - * good debugging check for all other callers, we work around it here - * rather than remove it. - */ - if (!PageIsEmpty(dst_page) && cleanVpd) - { - int sv_offsets_used = dst_vacpage->offsets_used; - - dst_vacpage->offsets_used = 0; - vacuum_page(vacrelstats, rel, dst_buf, dst_vacpage); - dst_vacpage->offsets_used = sv_offsets_used; - } - - /* - * Update the state of the copied tuple, and store it on the destination - * page. The copied tuple is never part of a HOT chain. - */ - newtup.t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED | - HEAP_XMIN_INVALID | - HEAP_MOVED_OFF); - newtup.t_data->t_infomask |= HEAP_MOVED_IN; - HeapTupleHeaderClearHotUpdated(newtup.t_data); - HeapTupleHeaderClearHeapOnly(newtup.t_data); - HeapTupleHeaderSetXvac(newtup.t_data, myXID); - newoff = PageAddItem(dst_page, (Item) newtup.t_data, tuple_len, - InvalidOffsetNumber, false, true); - if (newoff == InvalidOffsetNumber) - elog(PANIC, "failed to add item with len = %lu to page %u while moving tuple chain", - (unsigned long) tuple_len, dst_vacpage->blkno); - newitemid = PageGetItemId(dst_page, newoff); - /* drop temporary copy, and point to the version on the dest page */ - pfree(newtup.t_data); - newtup.t_data = (HeapTupleHeader) PageGetItem(dst_page, newitemid); - - ItemPointerSet(&(newtup.t_self), dst_vacpage->blkno, newoff); - - /* - * Set new tuple's t_ctid pointing to itself if last tuple in chain, and - * to next tuple in chain otherwise. (Since we move the chain in reverse - * order, this is actually the previously processed tuple.) - */ - if (!ItemPointerIsValid(ctid)) - newtup.t_data->t_ctid = newtup.t_self; - else - newtup.t_data->t_ctid = *ctid; - *ctid = newtup.t_self; - - /* clear PD_ALL_VISIBLE flags */ - if (PageIsAllVisible(old_page)) - { - all_visible_cleared = true; - PageClearAllVisible(old_page); - } - if (dst_buf != old_buf && PageIsAllVisible(dst_page)) - { - all_visible_cleared_new = true; - PageClearAllVisible(dst_page); - } - - MarkBufferDirty(dst_buf); - if (dst_buf != old_buf) - MarkBufferDirty(old_buf); - - /* XLOG stuff */ - if (!rel->rd_istemp) - { - XLogRecPtr recptr = log_heap_move(rel, old_buf, old_tup->t_self, - dst_buf, &newtup, - all_visible_cleared, - all_visible_cleared_new); - - if (old_buf != dst_buf) - { - PageSetLSN(old_page, recptr); - PageSetTLI(old_page, ThisTimeLineID); - } - PageSetLSN(dst_page, recptr); - PageSetTLI(dst_page, ThisTimeLineID); - } - - END_CRIT_SECTION(); - - LockBuffer(dst_buf, BUFFER_LOCK_UNLOCK); - if (dst_buf != old_buf) - LockBuffer(old_buf, BUFFER_LOCK_UNLOCK); - - /* Clear bits in visibility map */ - if (all_visible_cleared) - visibilitymap_clear(rel, BufferGetBlockNumber(old_buf)); - if (all_visible_cleared_new) - visibilitymap_clear(rel, BufferGetBlockNumber(dst_buf)); - - /* Create index entries for the moved tuple */ - if (ec->resultRelInfo->ri_NumIndices > 0) - { - ExecStoreTuple(&newtup, ec->slot, InvalidBuffer, false); - ExecInsertIndexTuples(ec->slot, &(newtup.t_self), ec->estate, true); - ResetPerTupleExprContext(ec->estate); - } -} - -/* - * move_plain_tuple() -- move one tuple that is not part of a chain - * - * This routine moves old_tup from old_page to dst_page. - * On entry old_buf and dst_buf are locked exclusively, both locks are - * released before exit. - * - * Yes, a routine with eight parameters is ugly, but it's still better - * than having these 90 lines of code in repair_frag() which is already - * too long and almost unreadable. - */ -static void -move_plain_tuple(Relation rel, - Buffer old_buf, Page old_page, HeapTuple old_tup, - Buffer dst_buf, Page dst_page, VacPage dst_vacpage, - ExecContext ec) -{ - TransactionId myXID = GetCurrentTransactionId(); - HeapTupleData newtup; - OffsetNumber newoff; - ItemId newitemid; - Size tuple_len = old_tup->t_len; - bool all_visible_cleared = false; - bool all_visible_cleared_new = false; - - /* copy tuple */ - heap_copytuple_with_tuple(old_tup, &newtup); - - /* - * register invalidation of source tuple in catcaches. - * - * (Note: we do not need to register the copied tuple, because we are not - * changing the tuple contents and so there cannot be any need to flush - * negative catcache entries.) - */ - CacheInvalidateHeapTuple(rel, old_tup); - - /* NO EREPORT(ERROR) TILL CHANGES ARE LOGGED */ - START_CRIT_SECTION(); - - /* - * Mark new tuple as MOVED_IN by me; also mark it not HOT. - */ - newtup.t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED | - HEAP_XMIN_INVALID | - HEAP_MOVED_OFF); - newtup.t_data->t_infomask |= HEAP_MOVED_IN; - HeapTupleHeaderClearHotUpdated(newtup.t_data); - HeapTupleHeaderClearHeapOnly(newtup.t_data); - HeapTupleHeaderSetXvac(newtup.t_data, myXID); - - /* add tuple to the page */ - newoff = PageAddItem(dst_page, (Item) newtup.t_data, tuple_len, - InvalidOffsetNumber, false, true); - if (newoff == InvalidOffsetNumber) - elog(PANIC, "failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)", - (unsigned long) tuple_len, - dst_vacpage->blkno, (unsigned long) dst_vacpage->free, - dst_vacpage->offsets_used, dst_vacpage->offsets_free); - newitemid = PageGetItemId(dst_page, newoff); - pfree(newtup.t_data); - newtup.t_data = (HeapTupleHeader) PageGetItem(dst_page, newitemid); - ItemPointerSet(&(newtup.t_data->t_ctid), dst_vacpage->blkno, newoff); - newtup.t_self = newtup.t_data->t_ctid; - - /* - * Mark old tuple as MOVED_OFF by me. - */ - old_tup->t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED | - HEAP_XMIN_INVALID | - HEAP_MOVED_IN); - old_tup->t_data->t_infomask |= HEAP_MOVED_OFF; - HeapTupleHeaderSetXvac(old_tup->t_data, myXID); - - /* clear PD_ALL_VISIBLE flags */ - if (PageIsAllVisible(old_page)) - { - all_visible_cleared = true; - PageClearAllVisible(old_page); - } - if (PageIsAllVisible(dst_page)) - { - all_visible_cleared_new = true; - PageClearAllVisible(dst_page); - } - - MarkBufferDirty(dst_buf); - MarkBufferDirty(old_buf); - - /* XLOG stuff */ - if (!rel->rd_istemp) - { - XLogRecPtr recptr = log_heap_move(rel, old_buf, old_tup->t_self, - dst_buf, &newtup, - all_visible_cleared, - all_visible_cleared_new); - - PageSetLSN(old_page, recptr); - PageSetTLI(old_page, ThisTimeLineID); - PageSetLSN(dst_page, recptr); - PageSetTLI(dst_page, ThisTimeLineID); - } - - END_CRIT_SECTION(); - - dst_vacpage->free = PageGetFreeSpaceWithFillFactor(rel, dst_page); - LockBuffer(dst_buf, BUFFER_LOCK_UNLOCK); - LockBuffer(old_buf, BUFFER_LOCK_UNLOCK); - - dst_vacpage->offsets_used++; - - /* Clear bits in visibility map */ - if (all_visible_cleared) - visibilitymap_clear(rel, BufferGetBlockNumber(old_buf)); - if (all_visible_cleared_new) - visibilitymap_clear(rel, BufferGetBlockNumber(dst_buf)); - - /* insert index' tuples if needed */ - if (ec->resultRelInfo->ri_NumIndices > 0) - { - ExecStoreTuple(&newtup, ec->slot, InvalidBuffer, false); - ExecInsertIndexTuples(ec->slot, &(newtup.t_self), ec->estate, true); - ResetPerTupleExprContext(ec->estate); - } -} - -/* - * update_hint_bits() -- update hint bits in destination pages - * - * Scan all the pages that we moved tuples onto and update tuple status bits. - * This is not really necessary, but it will save time for future transactions - * examining these tuples. - * - * This pass guarantees that all HEAP_MOVED_IN tuples are marked as - * XMIN_COMMITTED, so that future tqual tests won't need to check their XVAC. - * - * BUT NOTICE that this code fails to clear HEAP_MOVED_OFF tuples from - * pages that were move source pages but not move dest pages. The bulk - * of the move source pages will be physically truncated from the relation, - * and the last page remaining in the rel will be fixed separately in - * repair_frag(), so the only cases where a MOVED_OFF tuple won't get its - * hint bits updated are tuples that are moved as part of a chain and were - * on pages that were not either move destinations nor at the end of the rel. - * To completely ensure that no MOVED_OFF tuples remain unmarked, we'd have - * to remember and revisit those pages too. - * - * One wonders whether it wouldn't be better to skip this work entirely, - * and let the tuple status updates happen someplace that's not holding an - * exclusive lock on the relation. - */ -static void -update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages, - BlockNumber last_move_dest_block, int num_moved) -{ - TransactionId myXID = GetCurrentTransactionId(); - int checked_moved = 0; - int i; - VacPage *curpage; - - for (i = 0, curpage = fraged_pages->pagedesc; - i < num_fraged_pages; - i++, curpage++) - { - Buffer buf; - Page page; - OffsetNumber max_offset; - OffsetNumber off; - int num_tuples = 0; - - vacuum_delay_point(); - - if ((*curpage)->blkno > last_move_dest_block) - break; /* no need to scan any further */ - if ((*curpage)->offsets_used == 0) - continue; /* this page was never used as a move dest */ - buf = ReadBufferExtended(rel, MAIN_FORKNUM, (*curpage)->blkno, - RBM_NORMAL, vac_strategy); - LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); - page = BufferGetPage(buf); - max_offset = PageGetMaxOffsetNumber(page); - for (off = FirstOffsetNumber; - off <= max_offset; - off = OffsetNumberNext(off)) - { - ItemId itemid = PageGetItemId(page, off); - HeapTupleHeader htup; - - if (!ItemIdIsUsed(itemid)) - continue; - /* Shouldn't be any DEAD or REDIRECT items anymore */ - Assert(ItemIdIsNormal(itemid)); - - htup = (HeapTupleHeader) PageGetItem(page, itemid); - if (htup->t_infomask & HEAP_XMIN_COMMITTED) - continue; - - /* - * Here we may see either MOVED_OFF or MOVED_IN tuples. - */ - if (!(htup->t_infomask & HEAP_MOVED)) - elog(ERROR, "HEAP_MOVED_OFF/HEAP_MOVED_IN was expected"); - if (HeapTupleHeaderGetXvac(htup) != myXID) - elog(ERROR, "invalid XVAC in tuple header"); - - if (htup->t_infomask & HEAP_MOVED_IN) - { - htup->t_infomask |= HEAP_XMIN_COMMITTED; - htup->t_infomask &= ~HEAP_MOVED; - num_tuples++; - } - else - htup->t_infomask |= HEAP_XMIN_INVALID; - } - MarkBufferDirty(buf); - UnlockReleaseBuffer(buf); - Assert((*curpage)->offsets_used == num_tuples); - checked_moved += num_tuples; - } - Assert(num_moved == checked_moved); -} - -/* - * vacuum_heap() -- free dead tuples - * - * This routine marks dead tuples as unused and truncates relation - * if there are "empty" end-blocks. - */ -static void -vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages) -{ - Buffer buf; - VacPage *vacpage; - BlockNumber relblocks; - int nblocks; - int i; - - nblocks = vacuum_pages->num_pages; - nblocks -= vacuum_pages->empty_end_pages; /* nothing to do with them */ - - for (i = 0, vacpage = vacuum_pages->pagedesc; i < nblocks; i++, vacpage++) - { - vacuum_delay_point(); - - if ((*vacpage)->offsets_free > 0) - { - buf = ReadBufferExtended(onerel, MAIN_FORKNUM, (*vacpage)->blkno, - RBM_NORMAL, vac_strategy); - LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); - vacuum_page(vacrelstats, onerel, buf, *vacpage); - UnlockReleaseBuffer(buf); - } - } - - /* Truncate relation if there are some empty end-pages */ - Assert(vacrelstats->rel_pages >= vacuum_pages->empty_end_pages); - if (vacuum_pages->empty_end_pages > 0) - { - relblocks = vacrelstats->rel_pages - vacuum_pages->empty_end_pages; - ereport(elevel, - (errmsg("\"%s\": truncated %u to %u pages", - RelationGetRelationName(onerel), - vacrelstats->rel_pages, relblocks))); - RelationTruncate(onerel, relblocks); - - /* force relcache inval so all backends reset their rd_targblock */ - CacheInvalidateRelcache(onerel); - - vacrelstats->rel_pages = relblocks; /* set new number of blocks */ - } -} - -/* - * vacuum_page() -- free dead tuples on a page - * and repair its fragmentation. - * - * Caller must hold pin and lock on buffer. - */ -static void -vacuum_page(VRelStats *vacrelstats, Relation onerel, Buffer buffer, VacPage vacpage) -{ - Page page = BufferGetPage(buffer); - int i; - - /* There shouldn't be any tuples moved onto the page yet! */ - Assert(vacpage->offsets_used == 0); - - START_CRIT_SECTION(); - - for (i = 0; i < vacpage->offsets_free; i++) - { - ItemId itemid = PageGetItemId(page, vacpage->offsets[i]); - - ItemIdSetUnused(itemid); - } - - PageRepairFragmentation(page); - - MarkBufferDirty(buffer); - - /* XLOG stuff */ - if (!onerel->rd_istemp) - { - XLogRecPtr recptr; - - recptr = log_heap_clean(onerel, buffer, - NULL, 0, NULL, 0, - vacpage->offsets, vacpage->offsets_free, - vacrelstats->latestRemovedXid, false); - PageSetLSN(page, recptr); - PageSetTLI(page, ThisTimeLineID); - } - - END_CRIT_SECTION(); -} - -/* - * scan_index() -- scan one index relation to update pg_class statistics. - * - * We use this when we have no deletions to do. - */ -static void -scan_index(Relation indrel, double num_tuples) -{ - IndexBulkDeleteResult *stats; - IndexVacuumInfo ivinfo; - PGRUsage ru0; - - pg_rusage_init(&ru0); - - ivinfo.index = indrel; - ivinfo.vacuum_full = true; - ivinfo.analyze_only = false; - ivinfo.estimated_count = false; - ivinfo.message_level = elevel; - ivinfo.num_heap_tuples = num_tuples; - ivinfo.strategy = vac_strategy; - - stats = index_vacuum_cleanup(&ivinfo, NULL); - - if (!stats) - return; - - /* - * Now update statistics in pg_class, but only if the index says the count - * is accurate. - */ - if (!stats->estimated_count) - vac_update_relstats(indrel, - stats->num_pages, stats->num_index_tuples, - false, InvalidTransactionId); - - ereport(elevel, - (errmsg("index \"%s\" now contains %.0f row versions in %u pages", - RelationGetRelationName(indrel), - stats->num_index_tuples, - stats->num_pages), - errdetail("%u index pages have been deleted, %u are currently reusable.\n" - "%s.", - stats->pages_deleted, stats->pages_free, - pg_rusage_show(&ru0)))); - - /* - * Check for tuple count mismatch. If the index is partial, then it's OK - * for it to have fewer tuples than the heap; else we got trouble. - */ - if (!stats->estimated_count && - stats->num_index_tuples != num_tuples) - { - if (stats->num_index_tuples > num_tuples || - !vac_is_partial_index(indrel)) - ereport(WARNING, - (errmsg("index \"%s\" contains %.0f row versions, but table contains %.0f row versions", - RelationGetRelationName(indrel), - stats->num_index_tuples, num_tuples), - errhint("Rebuild the index with REINDEX."))); - } - - pfree(stats); -} - -/* - * vacuum_index() -- vacuum one index relation. - * - * Vpl is the VacPageList of the heap we're currently vacuuming. - * It's locked. Indrel is an index relation on the vacuumed heap. - * - * We don't bother to set locks on the index relation here, since - * the parent table is exclusive-locked already. - * - * Finally, we arrange to update the index relation's statistics in - * pg_class. - */ -static void -vacuum_index(VacPageList vacpagelist, Relation indrel, - double num_tuples, int keep_tuples) -{ - IndexBulkDeleteResult *stats; - IndexVacuumInfo ivinfo; - PGRUsage ru0; - - pg_rusage_init(&ru0); - - ivinfo.index = indrel; - ivinfo.vacuum_full = true; - ivinfo.analyze_only = false; - ivinfo.estimated_count = false; - ivinfo.message_level = elevel; - ivinfo.num_heap_tuples = num_tuples + keep_tuples; - ivinfo.strategy = vac_strategy; - - /* Do bulk deletion */ - stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist); - - /* Do post-VACUUM cleanup */ - stats = index_vacuum_cleanup(&ivinfo, stats); - - if (!stats) - return; - - /* - * Now update statistics in pg_class, but only if the index says the count - * is accurate. - */ - if (!stats->estimated_count) - vac_update_relstats(indrel, - stats->num_pages, stats->num_index_tuples, - false, InvalidTransactionId); - - ereport(elevel, - (errmsg("index \"%s\" now contains %.0f row versions in %u pages", - RelationGetRelationName(indrel), - stats->num_index_tuples, - stats->num_pages), - errdetail("%.0f index row versions were removed.\n" - "%u index pages have been deleted, %u are currently reusable.\n" - "%s.", - stats->tuples_removed, - stats->pages_deleted, stats->pages_free, - pg_rusage_show(&ru0)))); - - /* - * Check for tuple count mismatch. If the index is partial, then it's OK - * for it to have fewer tuples than the heap; else we got trouble. - */ - if (!stats->estimated_count && - stats->num_index_tuples != num_tuples + keep_tuples) - { - if (stats->num_index_tuples > num_tuples + keep_tuples || - !vac_is_partial_index(indrel)) - ereport(WARNING, - (errmsg("index \"%s\" contains %.0f row versions, but table contains %.0f row versions", - RelationGetRelationName(indrel), - stats->num_index_tuples, num_tuples + keep_tuples), - errhint("Rebuild the index with REINDEX."))); - } - - pfree(stats); -} - -/* - * tid_reaped() -- is a particular tid reaped? - * - * This has the right signature to be an IndexBulkDeleteCallback. - * - * vacpagelist->VacPage_array is sorted in right order. - */ -static bool -tid_reaped(ItemPointer itemptr, void *state) -{ - VacPageList vacpagelist = (VacPageList) state; - OffsetNumber ioffno; - OffsetNumber *voff; - VacPage vp, - *vpp; - VacPageData vacpage; - - vacpage.blkno = ItemPointerGetBlockNumber(itemptr); - ioffno = ItemPointerGetOffsetNumber(itemptr); - - vp = &vacpage; - vpp = (VacPage *) vac_bsearch((void *) &vp, - (void *) (vacpagelist->pagedesc), - vacpagelist->num_pages, - sizeof(VacPage), - vac_cmp_blk); - - if (vpp == NULL) - return false; - - /* ok - we are on a partially or fully reaped page */ - vp = *vpp; - - if (vp->offsets_free == 0) - { - /* this is EmptyPage, so claim all tuples on it are reaped!!! */ - return true; - } - - voff = (OffsetNumber *) vac_bsearch((void *) &ioffno, - (void *) (vp->offsets), - vp->offsets_free, - sizeof(OffsetNumber), - vac_cmp_offno); - - if (voff == NULL) - return false; - - /* tid is reaped */ - return true; -} - -/* - * Update the Free Space Map with the info we now have about free space in - * the relation. - */ -static void -vac_update_fsm(Relation onerel, VacPageList fraged_pages, - BlockNumber rel_pages) -{ - int nPages = fraged_pages->num_pages; - VacPage *pagedesc = fraged_pages->pagedesc; - int i; - - for (i = 0; i < nPages; i++) - { - /* - * fraged_pages may contain entries for pages that we later decided to - * truncate from the relation; don't enter them into the free space - * map! - */ - if (pagedesc[i]->blkno >= rel_pages) - break; - - RecordPageWithFreeSpace(onerel, pagedesc[i]->blkno, pagedesc[i]->free); - } - -} - -/* Copy a VacPage structure */ -static VacPage -copy_vac_page(VacPage vacpage) -{ - VacPage newvacpage; - - /* allocate a VacPageData entry */ - newvacpage = (VacPage) palloc(sizeof(VacPageData) + - vacpage->offsets_free * sizeof(OffsetNumber)); - - /* fill it in */ - if (vacpage->offsets_free > 0) - memcpy(newvacpage->offsets, vacpage->offsets, - vacpage->offsets_free * sizeof(OffsetNumber)); - newvacpage->blkno = vacpage->blkno; - newvacpage->free = vacpage->free; - newvacpage->offsets_used = vacpage->offsets_used; - newvacpage->offsets_free = vacpage->offsets_free; - - return newvacpage; -} - -/* - * Add a VacPage pointer to a VacPageList. - * - * As a side effect of the way that scan_heap works, - * higher pages come after lower pages in the array - * (and highest tid on a page is last). - */ -static void -vpage_insert(VacPageList vacpagelist, VacPage vpnew) -{ -#define PG_NPAGEDESC 1024 - - /* allocate a VacPage entry if needed */ - if (vacpagelist->num_pages == 0) - { - vacpagelist->pagedesc = (VacPage *) palloc(PG_NPAGEDESC * sizeof(VacPage)); - vacpagelist->num_allocated_pages = PG_NPAGEDESC; - } - else if (vacpagelist->num_pages >= vacpagelist->num_allocated_pages) - { - vacpagelist->num_allocated_pages *= 2; - vacpagelist->pagedesc = (VacPage *) repalloc(vacpagelist->pagedesc, vacpagelist->num_allocated_pages * sizeof(VacPage)); - } - vacpagelist->pagedesc[vacpagelist->num_pages] = vpnew; - (vacpagelist->num_pages)++; -} - -/* - * vac_bsearch: just like standard C library routine bsearch(), - * except that we first test to see whether the target key is outside - * the range of the table entries. This case is handled relatively slowly - * by the normal binary search algorithm (ie, no faster than any other key) - * but it occurs often enough in VACUUM to be worth optimizing. - */ -static void * -vac_bsearch(const void *key, const void *base, - size_t nelem, size_t size, - int (*compar) (const void *, const void *)) -{ - int res; - const void *last; - - if (nelem == 0) - return NULL; - res = compar(key, base); - if (res < 0) - return NULL; - if (res == 0) - return (void *) base; - if (nelem > 1) - { - last = (const void *) ((const char *) base + (nelem - 1) * size); - res = compar(key, last); - if (res > 0) - return NULL; - if (res == 0) - return (void *) last; - } - if (nelem <= 2) - return NULL; /* already checked 'em all */ - return bsearch(key, base, nelem, size, compar); -} - -/* - * Comparator routines for use with qsort() and bsearch(). - */ -static int -vac_cmp_blk(const void *left, const void *right) -{ - BlockNumber lblk, - rblk; - - lblk = (*((VacPage *) left))->blkno; - rblk = (*((VacPage *) right))->blkno; - - if (lblk < rblk) - return -1; - if (lblk == rblk) - return 0; - return 1; -} - -static int -vac_cmp_offno(const void *left, const void *right) -{ - if (*(OffsetNumber *) left < *(OffsetNumber *) right) - return -1; - if (*(OffsetNumber *) left == *(OffsetNumber *) right) - return 0; - return 1; -} - -static int -vac_cmp_vtlinks(const void *left, const void *right) -{ - if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi < - ((VTupleLink) right)->new_tid.ip_blkid.bi_hi) - return -1; - if (((VTupleLink) left)->new_tid.ip_blkid.bi_hi > - ((VTupleLink) right)->new_tid.ip_blkid.bi_hi) - return 1; - /* bi_hi-es are equal */ - if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo < - ((VTupleLink) right)->new_tid.ip_blkid.bi_lo) - return -1; - if (((VTupleLink) left)->new_tid.ip_blkid.bi_lo > - ((VTupleLink) right)->new_tid.ip_blkid.bi_lo) - return 1; - /* bi_lo-es are equal */ - if (((VTupleLink) left)->new_tid.ip_posid < - ((VTupleLink) right)->new_tid.ip_posid) - return -1; - if (((VTupleLink) left)->new_tid.ip_posid > - ((VTupleLink) right)->new_tid.ip_posid) - return 1; - return 0; -} - - -/* - * Open all the indexes of the given relation, obtaining the specified kind - * of lock on each. Return an array of Relation pointers for the indexes - * into *Irel, and the number of indexes into *nindexes. - */ -void -vac_open_indexes(Relation relation, LOCKMODE lockmode, - int *nindexes, Relation **Irel) -{ - List *indexoidlist; - ListCell *indexoidscan; - int i; - - Assert(lockmode != NoLock); - - indexoidlist = RelationGetIndexList(relation); - - *nindexes = list_length(indexoidlist); - - if (*nindexes > 0) - *Irel = (Relation *) palloc(*nindexes * sizeof(Relation)); - else - *Irel = NULL; - - i = 0; - foreach(indexoidscan, indexoidlist) - { - Oid indexoid = lfirst_oid(indexoidscan); - - (*Irel)[i++] = index_open(indexoid, lockmode); - } - - list_free(indexoidlist); -} - -/* - * Release the resources acquired by vac_open_indexes. Optionally release - * the locks (say NoLock to keep 'em). - */ -void -vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode) -{ - if (Irel == NULL) - return; - - while (nindexes--) - { - Relation ind = Irel[nindexes]; + Relation ind = Irel[nindexes]; index_close(ind, lockmode); } pfree(Irel); } - -/* - * Is an index partial (ie, could it contain fewer tuples than the heap?) - */ -bool -vac_is_partial_index(Relation indrel) -{ - /* - * If the index's AM doesn't support nulls, it's partial for our purposes - */ - if (!indrel->rd_am->amindexnulls) - return true; - - /* Otherwise, look to see if there's a partial-index predicate */ - if (!heap_attisnull(indrel->rd_indextuple, Anum_pg_index_indpred)) - return true; - - return false; -} - - -static bool -enough_space(VacPage vacpage, Size len) -{ - len = MAXALIGN(len); - - if (len > vacpage->free) - return false; - - /* if there are free itemid(s) and len <= free_space... */ - if (vacpage->offsets_used < vacpage->offsets_free) - return true; - - /* noff_used >= noff_free and so we'll have to allocate new itemid */ - if (len + sizeof(ItemIdData) <= vacpage->free) - return true; - - return false; -} - -static Size -PageGetFreeSpaceWithFillFactor(Relation relation, Page page) -{ - /* - * It is correct to use PageGetExactFreeSpace() here, *not* - * PageGetHeapFreeSpace(). This is because (a) we do our own, exact - * accounting for whether line pointers must be added, and (b) we will - * recycle any LP_DEAD line pointers before starting to add rows to a - * page, but that may not have happened yet at the time this function is - * applied to a page, which means PageGetHeapFreeSpace()'s protection - * against too many line pointers on a page could fire incorrectly. We do - * not need that protection here: since VACUUM FULL always recycles all - * dead line pointers first, it'd be physically impossible to insert more - * than MaxHeapTuplesPerPage tuples anyway. - */ - Size freespace = PageGetExactFreeSpace(page); - Size targetfree; - - targetfree = RelationGetTargetPageFreeSpace(relation, - HEAP_DEFAULT_FILLFACTOR); - if (freespace > targetfree) - return freespace - targetfree; - else - return 0; -} - /* * vacuum_delay_point --- check for interrupts and cost-based delay. * diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 3c7dc2a3d438921597e21f789384358dfbf0cacf..ec0fa18b8384921b39851bae5935355002398935 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -29,7 +29,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.128 2010/01/28 07:31:42 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.129 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -501,8 +501,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, * * We count tuples removed by the pruning step as removed by VACUUM. */ - tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, - false, false); + tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false); /* * Now scan the page to collect vacuumable items and check for tuples @@ -907,7 +906,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, recptr = log_heap_clean(onerel, buffer, NULL, 0, NULL, 0, unused, uncnt, - vacrelstats->latestRemovedXid, false); + vacrelstats->latestRemovedXid); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } @@ -934,7 +933,6 @@ lazy_vacuum_index(Relation indrel, pg_rusage_init(&ru0); ivinfo.index = indrel; - ivinfo.vacuum_full = false; ivinfo.analyze_only = false; ivinfo.estimated_count = true; ivinfo.message_level = elevel; @@ -966,7 +964,6 @@ lazy_cleanup_index(Relation indrel, pg_rusage_init(&ru0); ivinfo.index = indrel; - ivinfo.vacuum_full = false; ivinfo.analyze_only = false; ivinfo.estimated_count = !vacrelstats->scanned_all; ivinfo.message_level = elevel; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 5d6473dfc69a765274b4996308153458bb9204b9..151e50b63f9e12963438e95612173344718bcf53 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.169 2010/01/02 17:53:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.170 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -977,8 +977,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) List * ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, - EState *estate, - bool is_vacuum_full) + EState *estate) { List *result = NIL; ResultRelInfo *resultRelInfo; @@ -1070,12 +1069,8 @@ ExecInsertIndexTuples(TupleTableSlot *slot, * For a deferrable unique index, we tell the index AM to just detect * possible non-uniqueness, and we add the index OID to the result * list if further checking is needed. - * - * Special hack: we suppress unique-index checks if we are being - * called from VACUUM FULL, since VACUUM FULL may need to move dead - * tuples that have the same keys as live ones. */ - if (is_vacuum_full || !indexRelation->rd_index->indisunique) + if (!indexRelation->rd_index->indisunique) checkUnique = UNIQUE_CHECK_NO; else if (indexRelation->rd_index->indimmediate) checkUnique = UNIQUE_CHECK_YES; diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index cad08fa8f72debbf0e82ed95de83cd778a79637a..eca1d6de3f4a1dea781ad320fc22e84e24c398b3 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeModifyTable.c,v 1.5 2010/01/31 18:15:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeModifyTable.c,v 1.6 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -249,7 +249,7 @@ ExecInsert(TupleTableSlot *slot, */ if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false); + estate); /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes); @@ -566,7 +566,7 @@ lreplace:; */ if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple)) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false); + estate); /* AFTER ROW UPDATE Triggers */ ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple, diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index d7bbbbd1bef4b8eba2b2c798ad4f3f359d253a0b..2541d0212877c476c160ac9715dd1573147ce5a5 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.706 2010/01/28 23:21:12 petere Exp $ + * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.707 2010/02/08 04:33:54 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -491,7 +491,7 @@ static TypeName *TableFuncTypeName(List *columns); IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IN_P INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P - INNER_P INOUT INPLACE INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER + INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION JOIN @@ -6830,7 +6830,6 @@ vacuum_option_elem: | VERBOSE { $$ = VACOPT_VERBOSE; } | FREEZE { $$ = VACOPT_FREEZE; } | FULL { $$ = VACOPT_FULL; } - | FULL INPLACE { $$ = VACOPT_FULL | VACOPT_INPLACE; } ; AnalyzeStmt: @@ -10822,7 +10821,6 @@ unreserved_keyword: | INHERIT | INHERITS | INLINE_P - | INPLACE | INPUT_P | INSENSITIVE | INSERT diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index da64e1953a3259ffdb2769b9282938b4ec422db5..a96a558da32b2f64ce8900892db64bb9fbcda70e 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.214 2010/01/23 16:37:12 sriggs Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.215 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -617,8 +617,7 @@ LockWaitCancel(void) * at main transaction commit or abort * * At main transaction commit, we release all locks except session locks. - * At main transaction abort, we release all locks including session locks; - * this lets us clean up after a VACUUM FULL failure. + * At main transaction abort, we release all locks including session locks. * * At subtransaction commit, we don't release any locks (so this func is not * needed at all); we will defer the releasing to the parent transaction. diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 96439fda18a161a0662ebf362d47bb7f84ba9048..86b88ba0571498992ecd994c6ddae60e4b541c49 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -80,7 +80,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.94 2010/02/07 20:48:10 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.95 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1112,103 +1112,6 @@ CommandEndInvalidationMessages(void) } -/* - * BeginNonTransactionalInvalidation - * Prepare for invalidation messages for nontransactional updates. - * - * A nontransactional invalidation is one that must be sent whether or not - * the current transaction eventually commits. We arrange for all invals - * queued between this call and EndNonTransactionalInvalidation() to be sent - * immediately when the latter is called. - * - * Currently, this is only used by heap_page_prune(), and only when it is - * invoked during VACUUM FULL's first pass over a table. We expect therefore - * that we are not inside a subtransaction and there are no already-pending - * invalidations. This could be relaxed by setting up a new nesting level of - * invalidation data, but for now there's no need. Note that heap_page_prune - * knows that this function does not change any state, and therefore there's - * no need to worry about cleaning up if there's an elog(ERROR) before - * reaching EndNonTransactionalInvalidation (the invals will just be thrown - * away if that happens). - * - * Note that these are not replayed in standby mode. - */ -void -BeginNonTransactionalInvalidation(void) -{ - /* Must be at top of stack */ - Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL); - - /* Must not have any previously-queued activity */ - Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL); - Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL); - Assert(transInvalInfo->CurrentCmdInvalidMsgs.cclist == NULL); - Assert(transInvalInfo->CurrentCmdInvalidMsgs.rclist == NULL); - Assert(transInvalInfo->RelcacheInitFileInval == false); - - SharedInvalidMessagesArray = NULL; - numSharedInvalidMessagesArray = 0; -} - -/* - * EndNonTransactionalInvalidation - * Process queued-up invalidation messages for nontransactional updates. - * - * We expect to find messages in CurrentCmdInvalidMsgs only (else there - * was a CommandCounterIncrement within the "nontransactional" update). - * We must process them locally and send them out to the shared invalidation - * message queue. - * - * We must also reset the lists to empty and explicitly free memory (we can't - * rely on end-of-transaction cleanup for that). - */ -void -EndNonTransactionalInvalidation(void) -{ - InvalidationChunk *chunk; - InvalidationChunk *next; - - /* Must be at top of stack */ - Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL); - - /* Must not have any prior-command messages */ - Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL); - Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL); - - /* - * At present, this function is only used for CTID-changing updates; since - * the relcache init file doesn't store any tuple CTIDs, we don't have to - * invalidate it. That might not be true forever though, in which case - * we'd need code similar to AtEOXact_Inval. - */ - - /* Send out the invals */ - ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs, - LocalExecuteInvalidationMessage); - ProcessInvalidationMessagesMulti(&transInvalInfo->CurrentCmdInvalidMsgs, - SendSharedInvalidMessages); - - /* Clean up and release memory */ - for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.cclist; - chunk != NULL; - chunk = next) - { - next = chunk->next; - pfree(chunk); - } - for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.rclist; - chunk != NULL; - chunk = next) - { - next = chunk->next; - pfree(chunk); - } - transInvalInfo->CurrentCmdInvalidMsgs.cclist = NULL; - transInvalInfo->CurrentCmdInvalidMsgs.rclist = NULL; - transInvalInfo->RelcacheInitFileInval = false; -} - - /* * CacheInvalidateHeapTuple * Register the given tuple for invalidation at end of command diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index 202f2c3007aa090994aace331e8ad37430df1a83..292caabb1fb270f4f34331838d2b2f7def7748d4 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -50,7 +50,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/time/tqual.c,v 1.115 2010/01/02 16:57:58 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/time/tqual.c,v 1.116 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -91,9 +91,12 @@ static bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot); * code in heapam.c relies on that!) * * Also, if we are cleaning up HEAP_MOVED_IN or HEAP_MOVED_OFF entries, then - * we can always set the hint bits, since VACUUM FULL always uses synchronous - * commits and doesn't move tuples that weren't previously hinted. (This is - * not known by this subroutine, but is applied by its callers.) + * we can always set the hint bits, since old-style VACUUM FULL always used + * synchronous commits and didn't move tuples that weren't previously + * hinted. (This is not known by this subroutine, but is applied by its + * callers.) Note: old-style VACUUM FULL is gone, but we have to keep this + * module's support for MOVED_OFF/MOVED_IN flag bits for as long as we + * support in-place update from pre-9.0 databases. * * Normal commits may be asynchronous, so for those we need to get the LSN * of the transaction and then check whether this is flushed. diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c index a70d26429bdece661c7ecf86278c0d0c2f82fcd4..242ea9ebae11a81db0727d16110b9b052fd28fc2 100644 --- a/src/bin/scripts/vacuumdb.c +++ b/src/bin/scripts/vacuumdb.c @@ -5,7 +5,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/bin/scripts/vacuumdb.c,v 1.33 2010/01/07 14:35:44 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/scripts/vacuumdb.c,v 1.34 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -14,12 +14,12 @@ #include "common.h" -static void vacuum_one_database(const char *dbname, bool full, bool inplace, bool verbose, +static void vacuum_one_database(const char *dbname, bool full, bool verbose, bool and_analyze, bool analyze_only, bool freeze, const char *table, const char *host, const char *port, const char *username, enum trivalue prompt_password, const char *progname, bool echo); -static void vacuum_all_databases(bool full, bool inplace, bool verbose, bool and_analyze, +static void vacuum_all_databases(bool full, bool verbose, bool and_analyze, bool analyze_only, bool freeze, const char *host, const char *port, const char *username, enum trivalue prompt_password, @@ -47,7 +47,6 @@ main(int argc, char *argv[]) {"table", required_argument, NULL, 't'}, {"full", no_argument, NULL, 'f'}, {"verbose", no_argument, NULL, 'v'}, - {"inplace", no_argument, NULL, 'i'}, {NULL, 0, NULL, 0} }; @@ -69,14 +68,13 @@ main(int argc, char *argv[]) char *table = NULL; bool full = false; bool verbose = false; - bool inplace = false; progname = get_progname(argv[0]); set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pgscripts")); handle_help_version_opts(argc, argv, "vacuumdb", help); - while ((c = getopt_long(argc, argv, "h:p:U:wWeqd:zaFt:fiv", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "h:p:U:wWeqd:zaFt:fv", long_options, &optindex)) != -1) { switch (c) { @@ -122,9 +120,6 @@ main(int argc, char *argv[]) case 'f': full = true; break; - case 'i': - inplace = true; - break; case 'v': verbose = true; break; @@ -148,13 +143,6 @@ main(int argc, char *argv[]) exit(1); } - if (inplace && !full) - { - fprintf(stderr, _("%s: cannot use the \"inplace\" option when performing full vacuum\n"), - progname); - exit(1); - } - if (analyze_only) { if (full) @@ -189,7 +177,7 @@ main(int argc, char *argv[]) exit(1); } - vacuum_all_databases(full, inplace, verbose, and_analyze, analyze_only, freeze, + vacuum_all_databases(full, verbose, and_analyze, analyze_only, freeze, host, port, username, prompt_password, progname, echo, quiet); } @@ -205,7 +193,7 @@ main(int argc, char *argv[]) dbname = get_user_name(progname); } - vacuum_one_database(dbname, full, inplace, verbose, and_analyze, analyze_only, + vacuum_one_database(dbname, full, verbose, and_analyze, analyze_only, freeze, table, host, port, username, prompt_password, progname, echo); @@ -216,7 +204,7 @@ main(int argc, char *argv[]) static void -vacuum_one_database(const char *dbname, bool full, bool inplace, bool verbose, bool and_analyze, +vacuum_one_database(const char *dbname, bool full, bool verbose, bool and_analyze, bool analyze_only, bool freeze, const char *table, const char *host, const char *port, const char *username, enum trivalue prompt_password, @@ -247,8 +235,7 @@ vacuum_one_database(const char *dbname, bool full, bool inplace, bool verbose, b if (full) { - appendPQExpBuffer(&sql, "%sFULL%s", sep, - inplace ? " INPLACE" : ""); + appendPQExpBuffer(&sql, "%sFULL", sep); sep = comma; } if (freeze) @@ -271,10 +258,6 @@ vacuum_one_database(const char *dbname, bool full, bool inplace, bool verbose, b } else { - /* - * On older servers, VACUUM FULL is equivalent to VACUUM (FULL - * INPLACE) on newer servers, so we can ignore 'inplace'. - */ if (full) appendPQExpBuffer(&sql, " FULL"); if (freeze) @@ -306,7 +289,7 @@ vacuum_one_database(const char *dbname, bool full, bool inplace, bool verbose, b static void -vacuum_all_databases(bool full, bool inplace, bool verbose, bool and_analyze, bool analyze_only, +vacuum_all_databases(bool full, bool verbose, bool and_analyze, bool analyze_only, bool freeze, const char *host, const char *port, const char *username, enum trivalue prompt_password, const char *progname, bool echo, bool quiet) @@ -329,7 +312,7 @@ vacuum_all_databases(bool full, bool inplace, bool verbose, bool and_analyze, bo fflush(stdout); } - vacuum_one_database(dbname, full, inplace, verbose, and_analyze, analyze_only, + vacuum_one_database(dbname, full, verbose, and_analyze, analyze_only, freeze, NULL, host, port, username, prompt_password, progname, echo); } @@ -350,7 +333,6 @@ help(const char *progname) printf(_(" -e, --echo show the commands being sent to the server\n")); printf(_(" -f, --full do full vacuuming\n")); printf(_(" -F, --freeze freeze row transaction information\n")); - printf(_(" -i, --inplace do full inplace vacuuming\n")); printf(_(" -q, --quiet don't write any messages\n")); printf(_(" -t, --table='TABLE[(COLUMNS)]' vacuum specific table only\n")); printf(_(" -v, --verbose write a lot of output\n")); diff --git a/src/include/access/genam.h b/src/include/access/genam.h index a03bcc542f6305ac191a445c4d992fefe6f8b51f..f355c23149f0ac29c3364923273b1a96b5e6a81a 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.82 2010/01/02 16:58:00 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.83 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -42,7 +42,6 @@ typedef struct IndexBuildResult typedef struct IndexVacuumInfo { Relation index; /* the index being vacuumed */ - bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */ bool analyze_only; /* ANALYZE (without any actual vacuum) */ bool estimated_count; /* num_heap_tuples is an estimate */ int message_level; /* ereport level for progress messages */ diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 057034cf48539f9c871cb9ee9cab2b1c859c4d39..64eac4cd8963ca9e7e23dabb1273d72f99d98bd8 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.146 2010/01/02 16:58:00 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.147 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -126,17 +126,13 @@ extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec); extern void heap2_redo(XLogRecPtr lsn, XLogRecord *rptr); extern void heap2_desc(StringInfo buf, uint8 xl_info, char *rec); -extern XLogRecPtr log_heap_move(Relation reln, Buffer oldbuf, - ItemPointerData from, - Buffer newbuf, HeapTuple newtup, - bool all_visible_cleared, bool new_all_visible_cleared); extern XLogRecPtr log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid); extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused, - TransactionId latestRemovedXid, bool redirect_move); + TransactionId latestRemovedXid); extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, OffsetNumber *offsets, int offcnt); @@ -148,12 +144,11 @@ extern void heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin); extern int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, - bool redirect_move, bool report_stats); + bool report_stats); extern void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, - OffsetNumber *nowunused, int nunused, - bool redirect_move); + OffsetNumber *nowunused, int nunused); extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets); /* in heap/syncscan.c */ diff --git a/src/include/access/htup.h b/src/include/access/htup.h index e5478a21b680aa4e2bc606068a1d5e0195d33ab7..8ed6e44f646da2d372cadb6e167a3b3d8a7da003 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.110 2010/01/10 04:26:36 rhaas Exp $ + * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.111 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -74,11 +74,11 @@ * transaction respectively. If a tuple is inserted and deleted in the same * transaction, we store a "combo" command id that can be mapped to the real * cmin and cmax, but only by use of local state within the originating - * backend. See combocid.c for more details. Meanwhile, Xvac is only set - * by VACUUM FULL, which does not have any command sub-structure and so does - * not need either Cmin or Cmax. (This requires that VACUUM FULL never try - * to move a tuple whose Cmin or Cmax is still interesting, ie, an insert- - * in-progress or delete-in-progress tuple.) + * backend. See combocid.c for more details. Meanwhile, Xvac is only set by + * old-style VACUUM FULL, which does not have any command sub-structure and so + * does not need either Cmin or Cmax. (This requires that old-style VACUUM + * FULL never try to move a tuple whose Cmin or Cmax is still interesting, + * ie, an insert-in-progress or delete-in-progress tuple.) * * A word about t_ctid: whenever a new tuple is stored on disk, its t_ctid * is initialized with its own TID (location). If the tuple is ever updated, @@ -111,7 +111,7 @@ typedef struct HeapTupleFields union { CommandId t_cid; /* inserting or deleting command ID, or both */ - TransactionId t_xvac; /* VACUUM FULL xact ID */ + TransactionId t_xvac; /* old-style VACUUM FULL xact ID */ } t_field3; } HeapTupleFields; @@ -175,10 +175,10 @@ typedef HeapTupleHeaderData *HeapTupleHeader; #define HEAP_XMAX_INVALID 0x0800 /* t_xmax invalid/aborted */ #define HEAP_XMAX_IS_MULTI 0x1000 /* t_xmax is a MultiXactId */ #define HEAP_UPDATED 0x2000 /* this is UPDATEd version of row */ -#define HEAP_MOVED_OFF 0x4000 /* moved to another place by VACUUM - * FULL */ -#define HEAP_MOVED_IN 0x8000 /* moved from another place by VACUUM - * FULL */ +#define HEAP_MOVED_OFF 0x4000 /* moved to another place by + * old-style VACUUM FULL */ +#define HEAP_MOVED_IN 0x8000 /* moved from another place by + * old-style VACUUM FULL */ #define HEAP_MOVED (HEAP_MOVED_OFF | HEAP_MOVED_IN) #define HEAP_XACT_MASK 0xFFE0 /* visibility-related bits */ @@ -559,7 +559,7 @@ typedef HeapTupleData *HeapTuple; #define XLOG_HEAP_INSERT 0x00 #define XLOG_HEAP_DELETE 0x10 #define XLOG_HEAP_UPDATE 0x20 -#define XLOG_HEAP_MOVE 0x30 +/* 0x030 is free, was XLOG_HEAP_MOVE */ #define XLOG_HEAP_HOT_UPDATE 0x40 #define XLOG_HEAP_NEWPAGE 0x50 #define XLOG_HEAP_LOCK 0x60 @@ -579,7 +579,7 @@ typedef HeapTupleData *HeapTuple; */ #define XLOG_HEAP2_FREEZE 0x00 #define XLOG_HEAP2_CLEAN 0x10 -#define XLOG_HEAP2_CLEAN_MOVE 0x20 +/* 0x20 is free, was XLOG_HEAP2_CLEAN_MOVE */ #define XLOG_HEAP2_CLEANUP_INFO 0x30 /* @@ -634,15 +634,14 @@ typedef struct xl_heap_insert #define SizeOfHeapInsert (offsetof(xl_heap_insert, all_visible_cleared) + sizeof(bool)) -/* This is what we need to know about update|move|hot_update */ +/* This is what we need to know about update|hot_update */ typedef struct xl_heap_update { xl_heaptid target; /* deleted tuple id */ ItemPointerData newtid; /* new inserted tuple id */ bool all_visible_cleared; /* PD_ALL_VISIBLE was cleared */ - bool new_all_visible_cleared; /* same for the page of newtid */ - /* NEW TUPLE xl_heap_header (PLUS xmax & xmin IF MOVE OP) */ - /* and TUPLE DATA FOLLOWS AT END OF STRUCT */ + bool new_all_visible_cleared; /* same for the page of newtid */ + /* NEW TUPLE xl_heap_header AND TUPLE DATA FOLLOWS AT END OF STRUCT */ } xl_heap_update; #define SizeOfHeapUpdate (offsetof(xl_heap_update, new_all_visible_cleared) + sizeof(bool)) @@ -657,13 +656,6 @@ typedef struct xl_heap_update * The total number of OffsetNumbers is therefore 2*nredirected+ndead+nunused. * Note that nunused is not explicitly stored, but may be found by reference * to the total record length. - * - * If the opcode is CLEAN_MOVE instead of CLEAN, then each redirection pair - * should be interpreted as physically moving the "to" item pointer to the - * "from" slot, rather than placing a redirection item in the "from" slot. - * The moved pointers should be replaced by LP_UNUSED items (there will not - * be explicit entries in the "now-unused" list for this). Also, the - * HEAP_ONLY bit in the moved tuples must be turned off. */ typedef struct xl_heap_clean { diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 48b3dc0b34ebf5b1757f74f0011cbdd7fadcfbc9..acbb0cbc7d7234a9022fa37eb547f6100a4cac9d 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.127 2010/01/02 16:58:00 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.128 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -577,8 +577,7 @@ extern bool _bt_page_recyclable(Page page); extern void _bt_delitems(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, bool isVacuum, BlockNumber lastBlockVacuumed); -extern int _bt_pagedel(Relation rel, Buffer buf, - BTStack stack, bool vacuum_full); +extern int _bt_pagedel(Relation rel, Buffer buf, BTStack stack); /* * prototypes for functions in nbtsearch.c diff --git a/src/include/access/xact.h b/src/include/access/xact.h index b11029d20cb0649f09d9d4183c02778a1164dc31..27913aa8df22c195c92fcee40e843cddcc6d7638 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.100 2010/01/02 16:58:00 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.101 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -119,12 +119,10 @@ typedef struct xl_xact_commit * transaction completion. */ #define XACT_COMPLETION_UPDATE_RELCACHE_FILE 0x01 -#define XACT_COMPLETION_VACUUM_FULL 0x02 #define XACT_COMPLETION_FORCE_SYNC_COMMIT 0x04 /* Access macros for above flags */ #define XactCompletionRelcacheInitFileInval(xlrec) ((xlrec)->xinfo & XACT_COMPLETION_UPDATE_RELCACHE_FILE) -#define XactCompletionVacuumFull(xlrec) ((xlrec)->xinfo & XACT_COMPLETION_VACUUM_FULL) #define XactCompletionForceSyncCommit(xlrec) ((xlrec)->xinfo & XACT_COMPLETION_FORCE_SYNC_COMMIT) typedef struct xl_xact_abort @@ -212,8 +210,6 @@ extern void UnregisterXactCallback(XactCallback callback, void *arg); extern void RegisterSubXactCallback(SubXactCallback callback, void *arg); extern void UnregisterSubXactCallback(SubXactCallback callback, void *arg); -extern TransactionId RecordTransactionCommit(bool isVacuumFull); - extern int xactGetCommittedChildren(TransactionId **ptr); extern void xact_redo(XLogRecPtr lsn, XLogRecord *record); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index daae5077f7268aceb77ec63fb5d5762acecd0b17..60b98dd66466545cc57936e42ee109d133d3f39c 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.101 2010/02/01 13:40:28 sriggs Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.102 2010/02/08 04:33:54 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -249,7 +249,6 @@ extern char *TriggerFile; extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata); extern void XLogFlush(XLogRecPtr RecPtr); extern void XLogBackgroundFlush(void); -extern void XLogAsyncCommitFlush(void); extern bool XLogNeedsFlush(XLogRecPtr RecPtr); extern int XLogFileInit(uint32 log, uint32 seg, bool *use_existent, bool use_lock); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 5f209a68054a5096090e102442778ccaafcde9e6..6527aa72342ef07eaaf02b0a4137d71a9288cf11 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.87 2010/01/02 16:58:03 momjian Exp $ + * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.88 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -142,7 +142,6 @@ extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age, TransactionId *freezeLimit, TransactionId *freezeTableLimit); extern void vac_update_datfrozenxid(void); -extern bool vac_is_partial_index(Relation indrel); extern void vacuum_delay_point(void); /* in commands/vacuumlazy.c */ diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index c5b1c81407073b2b3cca98a88b2c6d24eb91c73b..e73d113c183c74aa3092b7a655776d10d4c1548a 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.166 2010/01/02 16:58:03 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.167 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -320,7 +320,7 @@ extern void ExecCloseScanRelation(Relation scanrel); extern void ExecOpenIndices(ResultRelInfo *resultRelInfo); extern void ExecCloseIndices(ResultRelInfo *resultRelInfo); extern List *ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, - EState *estate, bool is_vacuum_full); + EState *estate); extern bool check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo, ItemPointer tupleid, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index e0ae804c4168856069321d4a136a342da766255f..ffa6055a57aa70cfdeed4f386a36806f3df1d526 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -13,7 +13,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.427 2010/01/28 23:21:13 petere Exp $ + * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.428 2010/02/08 04:33:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2246,8 +2246,7 @@ typedef enum VacuumOption VACOPT_ANALYZE = 1 << 1, /* do ANALYZE */ VACOPT_VERBOSE = 1 << 2, /* print progress info */ VACOPT_FREEZE = 1 << 3, /* FREEZE option */ - VACOPT_FULL = 1 << 4, /* FULL (non-concurrent) vacuum */ - VACOPT_INPLACE = 1 << 5 /* traditional FULL INPLACE vacuum */ + VACOPT_FULL = 1 << 4 /* FULL (non-concurrent) vacuum */ } VacuumOption; typedef struct VacuumStmt diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index b7d647ab279e54ce65356265e86e33a2939ba433..e9c25e8d833579b8e23ae15384c416b2be98d3b5 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/include/parser/kwlist.h,v 1.10 2010/01/06 05:31:14 itagaki Exp $ + * $PostgreSQL: pgsql/src/include/parser/kwlist.h,v 1.11 2010/02/08 04:33:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -193,7 +193,6 @@ PG_KEYWORD("initially", INITIALLY, RESERVED_KEYWORD) PG_KEYWORD("inline", INLINE_P, UNRESERVED_KEYWORD) PG_KEYWORD("inner", INNER_P, TYPE_FUNC_NAME_KEYWORD) PG_KEYWORD("inout", INOUT, COL_NAME_KEYWORD) -PG_KEYWORD("inplace", INPLACE, UNRESERVED_KEYWORD) PG_KEYWORD("input", INPUT_P, UNRESERVED_KEYWORD) PG_KEYWORD("insensitive", INSENSITIVE, UNRESERVED_KEYWORD) PG_KEYWORD("insert", INSERT, UNRESERVED_KEYWORD) diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 1a9bbe5b38a70b6b86b80f40c3d158e2c5b0c433..8fe710d7183856956085bd57e5801eade84e6f74 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.48 2010/02/07 20:48:13 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.49 2010/02/08 04:33:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -39,10 +39,6 @@ extern void PostPrepare_Inval(void); extern void CommandEndInvalidationMessages(void); -extern void BeginNonTransactionalInvalidation(void); - -extern void EndNonTransactionalInvalidation(void); - extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple); extern void CacheInvalidateCatalog(Oid catalogId); diff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out index 40db5df47ecb397aa24893e479306c4223db7482..f1b8dd0cacf12530f82f38d6f8514b4c963c1b2e 100644 --- a/src/test/regress/expected/vacuum.out +++ b/src/test/regress/expected/vacuum.out @@ -57,65 +57,16 @@ SELECT * FROM vactst; (0 rows) VACUUM (FULL, FREEZE) vactst; -VACUUM (ANALYZE, FULL INPLACE) vactst; +VACUUM (ANALYZE, FULL) vactst; CREATE TABLE vaccluster (i INT PRIMARY KEY); NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "vaccluster_pkey" for table "vaccluster" ALTER TABLE vaccluster CLUSTER ON vaccluster_pkey; INSERT INTO vaccluster SELECT * FROM vactst; -CREATE TEMP TABLE vacid ( - relid regclass, - filenode_0 oid, - filenode_1 oid, - filenode_2 oid, - filenode_3 oid -); -INSERT INTO vacid (relid, filenode_0) -SELECT oid, relfilenode FROM pg_class WHERE oid::regclass IN ( - 'pg_am', -- normal catalog - 'pg_class', -- fundamental catalog - 'pg_database', -- shared catalog - 'vaccluster' , -- clustered table - 'vacid', -- temp table - 'vactst' -- normal table -); --- only clusterd table should be changed CLUSTER vaccluster; -UPDATE vacid SET filenode_1 = relfilenode - FROM pg_class WHERE oid = relid; --- all tables should not be changed -VACUUM (FULL INPLACE) pg_am; -VACUUM (FULL INPLACE) pg_class; -VACUUM (FULL INPLACE) pg_database; -VACUUM (FULL INPLACE) vaccluster; -VACUUM (FULL INPLACE) vacid; -VACUUM (FULL INPLACE) vactst; -UPDATE vacid SET filenode_2 = relfilenode - FROM pg_class WHERE oid = relid; --- only non-system tables should be changed VACUUM FULL pg_am; VACUUM FULL pg_class; VACUUM FULL pg_database; VACUUM FULL vaccluster; -VACUUM FULL vacid; VACUUM FULL vactst; -UPDATE vacid SET filenode_3 = relfilenode - FROM pg_class WHERE oid = relid; -SELECT relid, - filenode_0 = filenode_1 AS cluster, - filenode_1 = filenode_2 AS full_inplace, - filenode_2 = filenode_3 AS full - FROM vacid - ORDER BY relid::text; - relid | cluster | full_inplace | full --------------+---------+--------------+------ - pg_am | t | t | f - pg_class | t | t | t - pg_database | t | t | t - vaccluster | f | t | f - vacid | t | t | f - vactst | t | t | f -(6 rows) - DROP TABLE vaccluster; -DROP TABLE vacid; DROP TABLE vactst; diff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql index 87772ad5f6d6c3c7d414e6116ea8465d04edd502..30551ad1f276bc139de3405dbf2a3f94725bd1ca 100644 --- a/src/test/regress/sql/vacuum.sql +++ b/src/test/regress/sql/vacuum.sql @@ -40,62 +40,18 @@ DELETE FROM vactst; SELECT * FROM vactst; VACUUM (FULL, FREEZE) vactst; -VACUUM (ANALYZE, FULL INPLACE) vactst; +VACUUM (ANALYZE, FULL) vactst; CREATE TABLE vaccluster (i INT PRIMARY KEY); ALTER TABLE vaccluster CLUSTER ON vaccluster_pkey; INSERT INTO vaccluster SELECT * FROM vactst; - -CREATE TEMP TABLE vacid ( - relid regclass, - filenode_0 oid, - filenode_1 oid, - filenode_2 oid, - filenode_3 oid -); - -INSERT INTO vacid (relid, filenode_0) -SELECT oid, relfilenode FROM pg_class WHERE oid::regclass IN ( - 'pg_am', -- normal catalog - 'pg_class', -- fundamental catalog - 'pg_database', -- shared catalog - 'vaccluster' , -- clustered table - 'vacid', -- temp table - 'vactst' -- normal table -); - --- only clusterd table should be changed CLUSTER vaccluster; -UPDATE vacid SET filenode_1 = relfilenode - FROM pg_class WHERE oid = relid; --- all tables should not be changed -VACUUM (FULL INPLACE) pg_am; -VACUUM (FULL INPLACE) pg_class; -VACUUM (FULL INPLACE) pg_database; -VACUUM (FULL INPLACE) vaccluster; -VACUUM (FULL INPLACE) vacid; -VACUUM (FULL INPLACE) vactst; -UPDATE vacid SET filenode_2 = relfilenode - FROM pg_class WHERE oid = relid; - --- only non-system tables should be changed VACUUM FULL pg_am; VACUUM FULL pg_class; VACUUM FULL pg_database; VACUUM FULL vaccluster; -VACUUM FULL vacid; VACUUM FULL vactst; -UPDATE vacid SET filenode_3 = relfilenode - FROM pg_class WHERE oid = relid; - -SELECT relid, - filenode_0 = filenode_1 AS cluster, - filenode_1 = filenode_2 AS full_inplace, - filenode_2 = filenode_3 AS full - FROM vacid - ORDER BY relid::text; DROP TABLE vaccluster; -DROP TABLE vacid; DROP TABLE vactst;