From a1a233af66ed14d225ac2d5e7948a5cc8ed2cde6 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Fri, 18 Nov 2011 18:23:55 -0500 Subject: [PATCH] Further review of range-types patch. Lots of documentation cleanup today, and still more type_sanity tests. --- doc/src/sgml/catalogs.sgml | 33 +- doc/src/sgml/extend.sgml | 23 +- doc/src/sgml/func.sgml | 18 +- doc/src/sgml/rangetypes.sgml | 417 +++++++++++++--------- doc/src/sgml/ref/create_type.sgml | 165 ++++++--- doc/src/sgml/rowtypes.sgml | 2 +- src/bin/pg_dump/pg_dump.c | 10 +- src/bin/pg_dump/pg_dump_sort.c | 7 +- src/include/catalog/pg_range.h | 2 +- src/include/catalog/pg_type.h | 5 +- src/test/regress/expected/type_sanity.out | 79 +++- src/test/regress/sql/type_sanity.sql | 47 ++- 12 files changed, 557 insertions(+), 251 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 2063812942f..f1dcf8ab1a6 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -4607,7 +4607,9 @@ </indexterm> <para> - The catalog <structname>pg_range</structname> stores information about range types. + The catalog <structname>pg_range</structname> stores information about + range types. This is in addition to the types' entries in + <link linkend="catalog-pg-type"><structname>pg_type</structname></link>. </para> <table> @@ -4628,47 +4630,57 @@ <entry><structfield>rngtypid</structfield></entry> <entry><type>oid</type></entry> <entry><literal><link linkend="catalog-pg-type"><structname>pg_type</structname></link>.oid</literal></entry> - <entry>The type that is a range type</entry> + <entry>OID of the range type</entry> </row> <row> <entry><structfield>rngsubtype</structfield></entry> <entry><type>oid</type></entry> <entry><literal><link linkend="catalog-pg-type"><structname>pg_type</structname></link>.oid</literal></entry> - <entry>Subtype of this range type, e.g. <type>integer</type> is the subtype of <type>int4range</type></entry> + <entry>OID of the element type (subtype) of this range type</entry> </row> <row> <entry><structfield>rngcollation</structfield></entry> <entry><type>oid</type></entry> <entry><literal><link linkend="catalog-pg-collation"><structname>pg_collation</structname></link>.oid</literal></entry> - <entry>The collation used when comparing range boundaries</entry> + <entry>OID of the collation used for range comparisons, or 0 if none</entry> </row> <row> <entry><structfield>rngsubopc</structfield></entry> <entry><type>oid</type></entry> <entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry> - <entry>The operator class used when comparing range boundaries</entry> + <entry>OID of the subtype's operator class used for range comparisons</entry> </row> <row> <entry><structfield>rngcanonical</structfield></entry> <entry><type>regproc</type></entry> <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry> - <entry>A function to convert a range into its canonical form</entry> + <entry>OID of the function to convert a range value into canonical form, + or 0 if none</entry> </row> <row> <entry><structfield>rngsubdiff</structfield></entry> <entry><type>regproc</type></entry> <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry> - <entry>A function to return the distance between two lower and upper bound, as a <type>double precision</type>. Used for GiST support</entry> + <entry>OID of the function to return the difference between two element + values as <type>double precision</type>, or 0 if none</entry> </row> </tbody> </tgroup> </table> + <para> + <structfield>rngsubopc</> (plus <structfield>rngcollation</>, if the + element type is collatable) determines the sort ordering used by the range + type. <structfield>rngcanonical</> is used when the element type is + discrete. <structfield>rngsubdiff</> is optional but should be supplied to + improve performance of GiST indexes on the range type. + </para> + </sect1> <sect1 id="catalog-pg-rewrite"> @@ -6059,7 +6071,8 @@ <literal>c</literal> for a composite type (e.g., a table's row type), <literal>d</literal> for a domain, <literal>e</literal> for an enum type, - or <literal>p</literal> for a pseudo-type. + <literal>p</literal> for a pseudo-type, or + <literal>r</literal> for a range type. See also <structfield>typrelid</structfield> and <structfield>typbasetype</structfield>. </entry> @@ -6429,6 +6442,10 @@ <entry><literal>P</literal></entry> <entry>Pseudo-types</entry> </row> + <row> + <entry><literal>R</literal></entry> + <entry>Range types</entry> + </row> <row> <entry><literal>S</literal></entry> <entry>String types</entry> diff --git a/doc/src/sgml/extend.sgml b/doc/src/sgml/extend.sgml index f3850b391e0..8d5b9d0c836 100644 --- a/doc/src/sgml/extend.sgml +++ b/doc/src/sgml/extend.sgml @@ -200,13 +200,13 @@ <para> Five pseudo-types of special interest are <type>anyelement</>, <type>anyarray</>, <type>anynonarray</>, <type>anyenum</>, - and <type>anyrange</>, which are collectively - called <firstterm>polymorphic types</>. Any function declared - using these types is said to be a <firstterm>polymorphic - function</>. A polymorphic function can operate on many - different data types, with the specific data type(s) being - determined by the data types actually passed to it in a - particular call. + and <type>anyrange</>, + which are collectively called <firstterm>polymorphic types</>. + Any function declared using these types is said to be + a <firstterm>polymorphic function</>. A polymorphic function can + operate on many different data types, with the specific data type(s) + being determined by the data types actually passed to it in a particular + call. </para> <para> @@ -217,15 +217,16 @@ data type, but in any given call they must all be the <emphasis>same</emphasis> actual type. Each position declared as <type>anyarray</type> can have any array data type, - but similarly they must all be the same type. If there are + but similarly they must all be the same type. And similarly, + positions declared as <type>anyrange</type> must all be the same range + type. Furthermore, if there are positions declared <type>anyarray</type> and others declared <type>anyelement</type>, the actual array type in the <type>anyarray</type> positions must be an array whose elements are the same type appearing in the <type>anyelement</type> positions. Similarly, if there are positions declared <type>anyrange</type> - and others declared - <type>anyelement</type>, the actual range type in the - <type>anyrange</type> positions must be a range whose subtype is + and others declared <type>anyelement</type>, the actual range type in + the <type>anyrange</type> positions must be a range whose subtype is the same type appearing in the <type>anyelement</type> positions. <type>anynonarray</> is treated exactly the same as <type>anyelement</>, but adds the additional constraint that the actual type must not be diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index a7dc05bf382..be92e6acb32 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -10525,18 +10525,32 @@ SELECT NULLIF(value, '(none)') ... <row> <entry> <literal>@></literal> </entry> - <entry>contains</entry> + <entry>contains range</entry> + <entry><literal>int4range(2,4) @> int4range(2,3)</literal></entry> + <entry><literal>t</literal></entry> + </row> + + <row> + <entry> <literal>@></literal> </entry> + <entry>contains element</entry> <entry><literal>'[2011-01-01,2011-03-01)'::tsrange @> '2011-01-10'::timestamp</literal></entry> <entry><literal>t</literal></entry> </row> <row> <entry> <literal><@</literal> </entry> - <entry>is contained by</entry> + <entry>range is contained by</entry> <entry><literal>int4range(2,4) <@ int4range(1,7)</literal></entry> <entry><literal>t</literal></entry> </row> + <row> + <entry> <literal><@</literal> </entry> + <entry>element is contained by</entry> + <entry><literal>42 <@ int4range(1,7)</literal></entry> + <entry><literal>f</literal></entry> + </row> + <row> <entry> <literal>&&</literal> </entry> <entry>overlap (have points in common)</entry> diff --git a/doc/src/sgml/rangetypes.sgml b/doc/src/sgml/rangetypes.sgml index cf16f25de91..4a888a021dd 100644 --- a/doc/src/sgml/rangetypes.sgml +++ b/doc/src/sgml/rangetypes.sgml @@ -8,137 +8,166 @@ </indexterm> <para> - Range types are data types representing a range of values over some - sub-type with a total order. For instance, ranges + Range types are data types representing a range of values of some + element type (called the range's <firstterm>subtype</>). + For instance, ranges of <type>timestamp</type> might be used to represent the ranges of time that a meeting room is reserved. In this case the data type - is <type>tsrange</type> (short for "timestamp range"), - and <type>timestamp</type> is the sub-type with a total order. + is <type>tsrange</type> (short for <quote>timestamp range</quote>), + and <type>timestamp</type> is the subtype. The subtype must have + a total order so that it is well-defined whether element values are + within, before, or after a range of values. </para> <para> - Range types are useful because they represent many points in a - single value. The use of time and date ranges for scheduling + Range types are useful because they represent many element values in a + single range value, and because concepts such as overlapping ranges can + be expressed clearly. The use of time and date ranges for scheduling purposes is the clearest example; but price ranges, measurement - ranges from an instrument, etc., are also useful. + ranges from an instrument, and so forth can also be useful. </para> <sect2 id="rangetypes-builtin"> <title>Built-in Range Types</title> + <para> PostgreSQL comes with the following built-in range types: <itemizedlist> <listitem> <para> - <type>INT4RANGE</type> -- Range of <type>INTEGER</type>. This is a discrete range type, see <xref linkend="rangetypes-discrete">. + <type>INT4RANGE</type> — Range of <type>INTEGER</type> </para> </listitem> <listitem> <para> - <type>INT8RANGE</type> -- Range of <type>BIGINT</type>. This is a discrete range type, see <xref linkend="rangetypes-discrete">. + <type>INT8RANGE</type> — Range of <type>BIGINT</type> </para> </listitem> <listitem> <para> - <type>NUMRANGE</type> -- Range of <type>NUMERIC</type>. + <type>NUMRANGE</type> — Range of <type>NUMERIC</type> </para> </listitem> <listitem> <para> - <type>TSRANGE</type> -- Range of <type>TIMESTAMP WITHOUT TIME ZONE</type>. + <type>TSRANGE</type> — Range of <type>TIMESTAMP WITHOUT TIME ZONE</type> </para> </listitem> <listitem> <para> - <type>TSTZRANGE</type> -- Range of <type>TIMESTAMP WITH TIME ZONE</type>. + <type>TSTZRANGE</type> — Range of <type>TIMESTAMP WITH TIME ZONE</type> </para> </listitem> <listitem> <para> - <type>DATERANGE</type> -- Range of <type>DATE</type>. This is a discrete range type, see <xref linkend="rangetypes-discrete">. + <type>DATERANGE</type> — Range of <type>DATE</type> </para> </listitem> </itemizedlist> - In addition, you can define your own; see <xref linkend="SQL-CREATETYPE"> for more information. + In addition, you can define your own range types; + see <xref linkend="SQL-CREATETYPE"> for more information. </para> </sect2> <sect2 id="rangetypes-examples"> <title>Examples</title> + <para> <programlisting> -CREATE TABLE reservation ( during TSRANGE ); -INSERT INTO reservation VALUES - ( '[2010-01-01 14:30, 2010-01-01 15:30)' ); +CREATE TABLE reservation ( room int, during TSRANGE ); +INSERT INTO reservation VALUES + ( 1108, '[2010-01-01 14:30, 2010-01-01 15:30)' ); -- Containment SELECT int4range(10, 20) @> 3; -- Overlaps -SELECT numrange(11.1, 22.2) && numrange(20.0, 30.0); +SELECT numrange(11.1, 22.2) && numrange(20.0, 30.0); --- Find the upper bound: +-- Extract the upper bound SELECT upper(int8range(15, 25)); --- Compute the intersection: +-- Compute the intersection SELECT int4range(10, 20) * int4range(15, 25); -- Is the range non-empty? SELECT isempty(numrange(1, 5)); - </programlisting> - See <xref linkend="range-functions-table"> - and <xref linkend="range-operators-table"> for complete lists of - functions and operators on range types. + See <xref linkend="range-functions-table"> + and <xref linkend="range-operators-table"> for complete lists of + functions and operators on range types. </para> </sect2> <sect2 id="rangetypes-inclusivity"> <title>Inclusive and Exclusive Bounds</title> + <para> - Every range has two bounds, the lower bound and the upper bound. All - points in between those values are included in the range. An - inclusive bound means that the boundary point itself is included in - the range as well, while an exclusive bound means that the boundary - point is not included in the range. + Every non-empty range has two bounds, the lower bound and the upper + bound. All points between these values are included in the range. An + inclusive bound means that the boundary point itself is included in + the range as well, while an exclusive bound means that the boundary + point is not included in the range. </para> + <para> - An inclusive lower bound is represented by <literal>[</literal> - while an exclusive lower bound is represented - by <literal>(</literal> (see <xref linkend="rangetypes-construct"> - and <xref linkend="rangetypes-io"> below). Likewise, an inclusive - upper bound is represented by <literal>]</literal>, while an - exclusive upper bound is represented by <literal>)</literal>. + In the text form of a range, an inclusive lower bound is represented by + <quote><literal>[</literal></quote> while an exclusive lower bound is + represented by <quote><literal>(</literal></quote>. Likewise, an inclusive upper bound is represented by + <quote><literal>]</literal></quote>, while an exclusive upper bound is + represented by <quote><literal>)</literal></quote>. + (See <xref linkend="rangetypes-io"> for more details.) </para> + <para> - Functions <literal>lower_inc</literal> - and <literal>upper_inc</literal> test the inclusivity of the lower - and upper bounds of a range, respectively. + The functions <literal>lower_inc</literal> + and <literal>upper_inc</literal> test the inclusivity of the lower + and upper bounds of a range value, respectively. </para> </sect2> <sect2 id="rangetypes-infinite"> - <title>Infinite (unbounded) Ranges</title> + <title>Infinite (Unbounded) Ranges</title> + <para> - The lower bound of a range can be omitted, meaning that all points - less (or equal to, if inclusive) than the upper bound are included - in the range. Likewise, if the upper bound of the range is omitted, - then all points greater than (or equal to, if omitted) the lower - bound are included in the range. If both lower and upper bounds are - omitted, all points are considered to be in the range. + The lower bound of a range can be omitted, meaning that all points less + than the upper bound are included in the range. Likewise, if the upper + bound of the range is omitted, then all points greater than the lower bound + are included in the range. If both lower and upper bounds are omitted, all + values of the element type are considered to be in the range. </para> + + <para> + This is equivalent to considering that the lower bound is <quote>minus + infinity</quote>, or the upper bound is <quote>plus infinity</quote>, + respectively. But note that these infinite values are never values of + the range's element type, and can never be part of the range. (So there + is no such thing as an inclusive infinite bound — if you try to + write one, it will automatically be converted to an exclusive bound.) + </para> + + <para> + Also, some element types have a notion of <quote>infinity</>, but that + is just another value so far as the range type mechanisms are concerned. + For example, in timestamp ranges, <literal>[today,]</> means the same + thing as <literal>[today,)</>. But <literal>[today,infinity]</> means + something different from <literal>[today,infinity)</> — the latter + excludes the special <type>timestamp</> value <literal>infinity</>. + </para> + <para> - Functions <literal>lower_inf</literal> - and <literal>upper_inf</literal> test the range for infinite lower - and upper bounds of a range, respectively. + The functions <literal>lower_inf</literal> + and <literal>upper_inf</literal> test for infinite lower + and upper bounds of a range, respectively. </para> </sect2> <sect2 id="rangetypes-io"> - <title>Input/Output</title> + <title>Range Input/Output</title> + <para> - The input follows one of the following patterns: + The input for a range value must follow one of the following patterns: <synopsis> (<replaceable>lower-bound</replaceable>,<replaceable>upper-bound</replaceable>) (<replaceable>lower-bound</replaceable>,<replaceable>upper-bound</replaceable>] @@ -146,127 +175,173 @@ SELECT isempty(numrange(1, 5)); [<replaceable>lower-bound</replaceable>,<replaceable>upper-bound</replaceable>] empty </synopsis> - Notice that the final pattern is <literal>empty</literal>, which - represents an empty range (a range that contains no points). + The parentheses or brackets indicate whether the lower and upper bounds + are exclusive or inclusive, as described previously. + Notice that the final pattern is <literal>empty</literal>, which + represents an empty range (a range that contains no points). </para> + <para> - The <replaceable>lower-bound</replaceable> may be either a string - that is valid input for the sub-type, or omitted (to indicate no - lower bound); and <replaceable>upper-bound</replaceable> may be - either a string that is valid input for the sub-type, or omitted (to - indicate no upper bound). + The <replaceable>lower-bound</replaceable> may be either a string + that is valid input for the subtype, or empty to indicate no + lower bound. Likewise, <replaceable>upper-bound</replaceable> may be + either a string that is valid input for the subtype, or empty to + indicate no upper bound. </para> + <para> - Either the <replaceable>lower-bound</replaceable> or - the <replaceable>upper-bound</replaceable> may be quoted - using <literal>""</literal> (double quotation marks), which will allow - special characters such as "<literal>,</literal>". Within quotation - marks, "<literal>\</literal>" (backslash) serves as an escape - character. + Each bound value can be quoted using <literal>"</literal> (double quote) + characters. This is necessary if the bound value contains parentheses, + brackets, commas, double quotes, or backslashes, since these characters + would otherwise be taken as part of the range syntax. To put a double + quote or backslash in a quoted bound value, precede it with a + backslash. (Also, a pair of double quotes within a double-quoted bound + value is taken to represent a double quote character, analogously to the + rules for single quotes in SQL literal strings.) Alternatively, you can + avoid quoting and use backslash-escaping to protect all data characters + that would otherwise be taken as range syntax. Also, to write a bound + value that is an empty string, write <literal>""</literal>, since writing + nothing means an infinite bound. </para> + <para> - The choice between the other input formats affects the inclusivity - of the bounds. See <xref linkend="rangetypes-inclusivity">. + Whitespace is allowed before and after the range value, but any whitespace + between the parentheses or brackets is taken as part of the lower or upper + bound value. (Depending on the element type, it might or might not be + significant.) </para> + + <note> + <para> + These rules are very similar to those for writing field values in + composite-type literals. See <xref linkend="rowtypes-io-syntax"> for + additional commentary. + </para> + </note> + <para> Examples: <programlisting> --- includes point 3, does not include point 7, and does include all points in between -select '[3,7)' +-- includes 3, does not include 7, and does include all points in between +select '[3,7)'::int4range; -- does not include either 3 or 7, but includes all points in between -select '(3,7)' +select '(3,7)'::int4range; -- includes only the single point 4 -select '[4,4]' +select '[4,4]'::int4range; </programlisting> </para> </sect2> <sect2 id="rangetypes-construct"> <title>Constructing Ranges</title> + <para> - Each range type has a constructor by the same name. The constructor + Each range type has a constructor function with the same name as the range + type. Using the constructor function is frequently more convenient than + writing a range literal constant, since it avoids the need for extra + quoting of the bound values. The constructor function accepts from zero to three arguments. The zero-argument form constructs an empty range; the one-argument form constructs a - singleton range; the two-argument form constructs a range - in <literal>[ )</literal> form; and the three-argument form - constructs a range in a form specified by the third argument. For - example: + singleton range; the two-argument form constructs a range in + standard form (lower bound inclusive, upper bound exclusive); + and the three-argument form constructs a range in a form specified by the + third argument. The third argument must be one of the strings + <quote><literal>()</literal></quote>, + <quote><literal>(]</literal></quote>, + <quote><literal>[)</literal></quote>, or + <quote><literal>[]</literal></quote>. + For example: + <programlisting> -- Three-argument form: lower bound, upper bound, and third argument indicating --- inclusivity/exclusivity of bounds (if omitted, defaults to <literal>'[)'</literal>). +-- inclusivity/exclusivity of bounds. SELECT numrange(1.0, 14.0, '(]'); --- The int4range input will exclude the lower bound and include the upper bound; but the --- resulting output will appear in the canonical form; see <xref linkend="rangetypes-discrete">. +-- If the third argument is omitted, '[)' is assumed. +SELECT numrange(1.0, 14.0); + +-- Although '(]' is specified here, on display the value will be converted to +-- canonical form, since int8range is a discrete range type (see below). SELECT int8range(1, 14, '(]'); --- Single argument form constructs a singleton range; that is a range consisting of just --- one point. +-- Using NULL for either bound causes the range to be unbounded on that side. +SELECT numrange(NULL, 2.2); + +-- Single argument constructs a singleton range; that is a range consisting of +-- just one point. SELECT numrange(11.1); --- Zero-argument form constructs and empty range. +-- Zero-argument form constructs an empty range. SELECT numrange(); - --- Using NULL for a bound causes the range to be unbounded on that side; that is, negative --- infinity for the lower bound or positive infinity for the upper bound. -SELECT numrange(NULL,2.2); </programlisting> </para> </sect2> <sect2 id="rangetypes-discrete"> <title>Discrete Range Types</title> + <para> - Discrete ranges are those that have a - defined <literal>canonical</literal> function. Loosely speaking, a - discrete range has a sub-type with a well-defined "step"; - e.g. <type>INTEGER</type> or <type>DATE</type>. + A discrete range is one whose element type has a well-defined + <quote>step</quote>, such as <type>INTEGER</type> or <type>DATE</type>. + In these types two elements can be said to be adjacent, since there are + no valid values between them. This contrasts with continuous ranges, + where it's always (or almost always) possible to identify other element + values between two given values. For example, a range over the + <type>NUMERIC</> type is continuous, as is a range over <type>TIMESTAMP</>. + (Even though <type>TIMESTAMP</> has limited precision, and so could + theoretically be treated as discrete, it's better to consider it continuous + since the step size is normally not of interest.) </para> + <para> - The <literal>canonical</literal> function should take an input range - value, and return an equal range value that may have a different - formatting. For instance, the integer range <literal>[1, - 7]</literal> could be represented by the equal integer - range <literal>[1, 8)</literal>. The two values are equal because - there are no points within the integer domain - between <literal>7</literal> and <literal>8</literal>, so not - including the end point <literal>8</literal> is the same as - including the end point <literal>7</literal>. The canonical output - for two values that are equal, like <literal>[1, 7]</literal> - and <literal>[1, 8)</literal>, must be equal. It doesn't matter - which representation you choose to be the canonical one, as long as - two equal values with different formattings are always mapped to the - same value with the same formatting. If the canonical function is - not specified, then ranges with different formatting - (e.g. <literal>[1, 7]</literal> and <literal>[1, 8)</literal>) will - always be treated as unequal. + Another way to think about a discrete range type is that there is a clear + idea of a <quote>next</> or <quote>previous</> value for each element value. + Knowing that, it is possible to convert between inclusive and exclusive + representations of a range's bounds, by choosing the next or previous + element value instead of the one originally given. + For example, in an integer range type <literal>[4,8]</> and + <literal>(3,9)</> denote the same set of values; but this would not be so + for a range over numeric. </para> + <para> - For types such as <type>NUMRANGE</type>, this is not possible, - because there are always points in between two - distinct <type>NUMERIC</type> values. + A discrete range type should have a <firstterm>canonicalization</> + function that is aware of the desired step size for the element type. + The canonicalization function is charged with converting values of the + range type to have consistently inclusive or exclusive bounds. + The canonicalization function takes an input range value, and + must return an equivalent range value that may have a different + formatting. The canonical output for two values that are equivalent, like + <literal>[1, 7]</literal> and <literal>[1, 8)</literal>, must be identical. + It doesn't matter which representation you choose to be the canonical one, + so long as two equivalent values with different formattings are always + mapped to the same value with the same formatting. If a canonicalization + function is not specified, then ranges with different formatting + will always be treated as unequal, even though they might represent the + same set of values. </para> + <para> - The built-in range - types <type>INT4RANGE</type>, <type>INT8RANGE</type>, - and <type>DATERNAGE</type> all use a canonical form that includes - the lower bound and excludes the upper bound; that is, <literal>[ - )</literal>. User-defined ranges can use other conventions, however. + The built-in range types <type>INT4RANGE</type>, <type>INT8RANGE</type>, + and <type>DATERANGE</type> all use a canonical form that includes + the lower bound and excludes the upper bound; that is, + <literal>[)</literal>. User-defined range types can use other conventions, + however. </para> </sect2> <sect2 id="rangetypes-defining"> <title>Defining New Range Types</title> + <para> - Users can define their own range types. The most common reason to do - this is to use ranges where the subtype is not among the built-in - range types, e.g. a range of type <type>FLOAT</type> (or, if the - subtype itself is a user-defined type). - </para> - <para> - For example: to define a new range type of sub-type <type>DOUBLE PRECISION</type>: + Users can define their own range types. The most common reason to do + this is to use ranges over subtypes not provided among the built-in + range types. + For example, to define a new range type of subtype <type>DOUBLE + PRECISION</type>: + <programlisting> CREATE TYPE FLOATRANGE AS RANGE ( SUBTYPE = DOUBLE PRECISION @@ -274,99 +349,113 @@ CREATE TYPE FLOATRANGE AS RANGE ( SELECT '[1.234, 5.678]'::floatrange; </programlisting> - Because <type>DOUBLE PRECISION</type> has no meaningful "step", we - do not define a <literal>canonical</literal> - function. See <xref linkend="SQL-CREATETYPE"> for more - information. + + Because <type>DOUBLE PRECISION</type> has no meaningful + <quote>step</quote>, we do not define a canonicalization + function. + </para> + + <para> + Defining your own range type also allows you to specify a different + operator class or collation to use, so as to change the sort ordering + that determines which values fall into a given range. You might also + choose to use a different canonicalization function, either to change + the displayed format or to modify the effective <quote>step size</>. </para> + <para> - Defining your own range type also allows you to specify a different - operator class or collation to use (which affects the points that - fall between the range boundaries), or a different canonicalization - function. + See <xref linkend="SQL-CREATETYPE"> for more information about creating + range types. </para> </sect2> <sect2 id="rangetypes-gist"> + <title>Indexing</title> + <indexterm> <primary>range type</primary> - <secondary>gist</secondary> + <secondary>GiST index</secondary> </indexterm> - <title>Indexing</title> + <para> - GiST indexes can be applied to a table containing a range type. For instance: + GiST indexes can be applied to columns of range types. For instance: <programlisting> CREATE INDEX reservation_idx ON reservation USING gist (during); </programlisting> - This index may speed up queries - involving <literal>&&</literal> - (overlaps), <literal>@></literal> (contains), and all the boolean - operators found in this - table: <xref linkend="range-operators-table">. + This index may speed up queries + involving <literal>&&</literal> + (overlaps), <literal>@></literal> (contains), and other boolean + operators listed in <xref linkend="range-operators-table">. </para> </sect2> <sect2 id="rangetypes-constraint"> + <title>Constraints on Ranges</title> + <indexterm> <primary>range type</primary> <secondary>exclude</secondary> </indexterm> - <title>Constraints on Ranges</title> + <para> While <literal>UNIQUE</literal> is a natural constraint for scalar values, it is usually unsuitable for range types. Instead, an exclusion constraint is often more appropriate (see <link linkend="SQL-CREATETABLE-EXCLUDE">CREATE TABLE ... CONSTRAINT ... EXCLUDE</link>). Exclusion constraints allow the - specification of constraints such as "non-overlapping" on a range - type. For example: + specification of constraints such as <quote>non-overlapping</quote> on a + range type. For example: + <programlisting> ALTER TABLE reservation - ADD EXCLUDE USING gist (during WITH &&); + ADD EXCLUDE USING gist (during WITH &&); </programlisting> + That constraint will prevent any overlapping values from existing in the table at the same time: + <programlisting> -INSERT INTO reservation VALUES - ( '[2010-01-01 11:30, 2010-01-01 13:00)' ); --- Result: INSERT 0 1 -INSERT INTO reservation VALUES - ( '[2010-01-01 14:45, 2010-01-01 15:45)' ); --- Result: --- ERROR: conflicting key value violates exclusion constraint "reservation_during_excl" --- DETAIL: Key (during)=([ 2010-01-01 14:45:00, 2010-01-01 15:45:00 )) conflicts with --- existing key (during)=([ 2010-01-01 14:30:00, 2010-01-01 15:30:00 )). +INSERT INTO reservation VALUES + ( 1108, '[2010-01-01 11:30, 2010-01-01 13:00)' ); +INSERT 0 1 + +INSERT INTO reservation VALUES + ( 1108, '[2010-01-01 14:45, 2010-01-01 15:45)' ); +ERROR: conflicting key value violates exclusion constraint "reservation_during_excl" +DETAIL: Key (during)=([ 2010-01-01 14:45:00, 2010-01-01 15:45:00 )) conflicts +with existing key (during)=([ 2010-01-01 14:30:00, 2010-01-01 15:30:00 )). </programlisting> </para> + <para> - Combine range types and exclusion constraints - with <link linkend="btree-gist">btree_gist</link> for maximum - flexibility defining - constraints. After <literal>btree_gist</literal> is installed, the - following constraint will prevent overlapping ranges only if the - meeting room numbers are equal: -<programlisting> + You can use the <link linkend="btree-gist"><literal>btree_gist</></link> + extension to define exclusion constraints on plain scalar datatypes, which + can then be combined with range exclusions for maximum flexibility. For + example, after <literal>btree_gist</literal> is installed, the following + constraint will reject overlapping ranges only if the meeting room numbers + are equal: +<programlisting> CREATE TABLE room_reservation ( room TEXT, during TSRANGE, - EXCLUDE USING gist (room WITH =, during WITH &&) + EXCLUDE USING gist (room WITH =, during WITH &&) ); INSERT INTO room_reservation VALUES ( '123A', '[2010-01-01 14:00, 2010-01-01 15:00)' ); --- Result: INSERT 0 1 +INSERT 0 1 + INSERT INTO room_reservation VALUES ( '123A', '[2010-01-01 14:30, 2010-01-01 15:30)' ); --- Result: --- ERROR: conflicting key value violates exclusion constraint "room_reservation_room_during_excl" --- DETAIL: Key (room, during)=(123A, [ 2010-01-01 14:30:00, 2010-01-01 15:30:00 )) conflicts with --- existing key (room, during)=(123A, [ 2010-01-01 14:00:00, 2010-01-01 15:00:00 )). +ERROR: conflicting key value violates exclusion constraint "room_reservation_room_during_excl" +DETAIL: Key (room, during)=(123A, [ 2010-01-01 14:30:00, 2010-01-01 15:30:00 )) conflicts with +existing key (room, during)=(123A, [ 2010-01-01 14:00:00, 2010-01-01 15:00:00 )). + INSERT INTO room_reservation VALUES ( '123B', '[2010-01-01 14:30, 2010-01-01 15:30)' ); --- Result: INSERT 0 1 - +INSERT 0 1 </programlisting> </para> </sect2> diff --git a/doc/src/sgml/ref/create_type.sgml b/doc/src/sgml/ref/create_type.sgml index ebcd461bd91..808990feb77 100644 --- a/doc/src/sgml/ref/create_type.sgml +++ b/doc/src/sgml/ref/create_type.sgml @@ -28,12 +28,12 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> AS ENUM ( [ '<replaceable class="parameter">label</replaceable>' [, ... ] ] ) CREATE TYPE <replaceable class="parameter">name</replaceable> AS RANGE ( - SUBTYPE = <replaceable class="parameter">subtype</replaceable>, + SUBTYPE = <replaceable class="parameter">subtype</replaceable> [ , SUBTYPE_OPCLASS = <replaceable class="parameter">subtype_operator_class</replaceable> ] - [ , SUBTYPE_DIFF = <replaceable class="parameter">subtype_diff_function</replaceable> ] + [ , COLLATION = <replaceable class="parameter">collation</replaceable> ] [ , CANONICAL = <replaceable class="parameter">canonical_function</replaceable> ] + [ , SUBTYPE_DIFF = <replaceable class="parameter">subtype_diff_function</replaceable> ] [ , ANALYZE = <replaceable class="parameter">analyze_function</replaceable> ] - [ , COLLATION = <replaceable class="parameter">collation</replaceable> ] ) CREATE TYPE <replaceable class="parameter">name</replaceable> ( @@ -79,6 +79,18 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> table in the same schema.) </para> + <para> + There are five forms of <command>CREATE TYPE</command>, as shown in the + syntax synopsis above. They respectively create a <firstterm>composite + type</>, an <firstterm>enum type</>, a <firstterm>range type</>, a + <firstterm>base type</>, or a <firstterm>shell type</>. The first four + of these are discussed in turn below. A shell type is simply a placeholder + for a type to be defined later; it is created by issuing <command>CREATE + TYPE</command> with no parameters except for the type name. Shell types + are needed as forward references when creating range types and base types, + as discussed in those sections. + </para> + <refsect2> <title>Composite Types</title> @@ -102,59 +114,65 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> The second form of <command>CREATE TYPE</command> creates an enumerated (enum) type, as described in <xref linkend="datatype-enum">. Enum types take a list of one or more quoted labels, each of which - must be less than <symbol>NAMEDATALEN</symbol> bytes long (64 in a standard - <productname>PostgreSQL</productname> build). + must be less than <symbol>NAMEDATALEN</symbol> bytes long (64 bytes in a + standard <productname>PostgreSQL</productname> build). </para> </refsect2> <refsect2 id="SQL-CREATETYPE-RANGE"> <title>Range Types</title> - <para> + <para> The third form of <command>CREATE TYPE</command> creates a new range type, as described in <xref linkend="rangetypes">. - </para> - - <para> - The <replaceable class="parameter">subtype</replaceable> parameter - can be any type with an associated btree opclass (uses the type's - default btree operator class unless specified with - <replaceable class="parameter">subtype_operator_class</replaceable>). - </para> + </para> - <para> - The <replaceable class="parameter">subtype_diff</replaceable> - function takes two values of type - <replaceable class="parameter">subtype</replaceable> as argument, and - returns the distance between the two values as - <type>double precision</type>. This function is used for GiST indexing - (see <xref linkend="gist"> for more information), and should be provided - for efficiency. - </para> + <para> + The range type's <replaceable class="parameter">subtype</replaceable> can + be any type with an associated btree operator class (to determine the + ordering of values for the range type). Normally the subtype's default + btree operator class is used to determine ordering; to use a non-default + opclass, specify its name with <replaceable + class="parameter">subtype_opclass</replaceable>. If the subtype is + collatable, and you want to use a non-default collation in the range's + ordering, specify the desired collation with the <replaceable + class="parameter">collation</replaceable> option. + </para> - <para> - The <replaceable class="parameter">canonical</replaceable> - function takes an argument and returns a value, both of the same - type being defined. This is used to convert the range value to a - canonical form, when applicable. See <xref linkend="rangetypes"> + <para> + The optional <replaceable class="parameter">canonical</replaceable> + function must take one argument of the range type being defined, and + return a value of the same type. This is used to convert the range value + to a canonical form, when applicable. See <xref linkend="rangetypes"> for more information. To define - a <replaceable class="parameter">canonical</replaceable> function, - you must first create a <firstterm>shell type</>, which is a + the <replaceable class="parameter">canonical</replaceable> function, + you must first create a shell type, which is a placeholder type that has no properties except a name and an owner. This is done by issuing the command <literal>CREATE TYPE - <replaceable>name</></literal>, with no additional parameters. - </para> + <replaceable>name</></literal>, with no additional parameters. Then + the function can be declared, and finally the range type can be declared, + replacing the shell type entry with a valid range type. + </para> - <para> - The <replaceable class="parameter">analyze</replaceable> - function is the same as for creating a base type. - </para> + <para> + The optional <replaceable class="parameter">subtype_diff</replaceable> + function must take two values of the + <replaceable class="parameter">subtype</replaceable> type as argument, + and return a <type>double precision</type> value representing the + difference between the two given values. While this is optional, + providing it allows much greater efficiency of GiST indexes on columns of + the range type. Note that the <replaceable + class="parameter">subtype_diff</replaceable> function should agree with + the sort ordering implied by the selected operator class and collation; + that is, its result should be positive whenever its first argument is + greater than its second according to the sort ordering. + </para> - <para> - The <replaceable class="parameter">collation</replaceable> option - specifies the collation used when determining the total order for - the range. - </para> + <para> + The optional <replaceable class="parameter">analyze</replaceable> + function performs type-specific statistics collection for columns of the + range type. This is defined the same as for base types; see below. + </para> </refsect2> <refsect2> @@ -431,7 +449,7 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> <para> Whenever a user-defined type is created, <productname>PostgreSQL</productname> automatically creates an - associated array type, whose name consists of the base type's + associated array type, whose name consists of the element type's name prepended with an underscore, and truncated if necessary to keep it less than <symbol>NAMEDATALEN</symbol> bytes long. (If the name so generated collides with an existing type name, the process is @@ -496,6 +514,16 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> </listitem> </varlistentry> + <varlistentry> + <term><replaceable class="parameter">collation</replaceable></term> + <listitem> + <para> + The name of an existing collation to be associated with a column of + a composite type, or with a range type. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><replaceable class="parameter">label</replaceable></term> <listitem> @@ -506,6 +534,43 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> </listitem> </varlistentry> + <varlistentry> + <term><replaceable class="parameter">subtype</replaceable></term> + <listitem> + <para> + The name of the element type that the range type will represent ranges + of. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">subtype_operator_class</replaceable></term> + <listitem> + <para> + The name of a btree operator class for the subtype. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">canonical_function</replaceable></term> + <listitem> + <para> + The name of the canonicalization function for the range type. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><replaceable class="parameter">subtype_diff_function</replaceable></term> + <listitem> + <para> + The name of a difference function for the subtype. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><replaceable class="parameter">input_function</replaceable></term> <listitem> @@ -699,8 +764,8 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> <para> Because there are no restrictions on use of a data type once it's been - created, creating a base type is tantamount to granting public execute - permission on the functions mentioned in the type definition. + created, creating a base type or range type is tantamount to granting + public execute permission on the functions mentioned in the type definition. This is usually not an issue for the sorts of functions that are useful in a type definition. But you might want to think twice before designing a type @@ -730,7 +795,8 @@ CREATE TYPE <replaceable class="parameter">name</replaceable> </para> <para> - Before <productname>PostgreSQL</productname> version 8.2, the syntax + Before <productname>PostgreSQL</productname> version 8.2, the shell-type + creation syntax <literal>CREATE TYPE <replaceable>name</></literal> did not exist. The way to create a new base type was to create its input function first. In this approach, <productname>PostgreSQL</productname> will first see @@ -787,6 +853,13 @@ CREATE TABLE bug ( </programlisting> </para> + <para> + This example creates a range type: +<programlisting> +CREATE TYPE float8_range AS RANGE (subtype = float8, subtype_diff = float8mi); +</programlisting> + </para> + <para> This example creates the base data type <type>box</type> and then uses the type in a table definition: @@ -860,7 +933,7 @@ CREATE TABLE big_objs ( <para> The ability to create a composite type with zero attributes is a <productname>PostgreSQL</productname>-specific deviation from the - standard (analogous to <command>CREATE TABLE</command>). + standard (analogous to the same case in <command>CREATE TABLE</command>). </para> </refsect1> diff --git a/doc/src/sgml/rowtypes.sgml b/doc/src/sgml/rowtypes.sgml index 4f8e559f6cb..605dc71dabc 100644 --- a/doc/src/sgml/rowtypes.sgml +++ b/doc/src/sgml/rowtypes.sgml @@ -243,7 +243,7 @@ INSERT INTO mytab (complex_col.r, complex_col.i) VALUES(1.1, 2.2); </para> </sect2> - <sect2> + <sect2 id="rowtypes-io-syntax"> <title>Composite Type Input and Output Syntax</title> <para> diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 973f0b335d7..d2005c1e809 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -2986,7 +2986,8 @@ getTypes(int *numTypes) /* * If it's a base type, make a DumpableObject representing a shell * definition of the type. We will need to dump that ahead of the I/O - * functions for the type. + * functions for the type. Similarly, range types need a shell + * definition in case they have a canonicalize function. * * Note: the shell type doesn't have a catId. You might think it * should copy the base type's catId, but then it might capture the @@ -3006,8 +3007,8 @@ getTypes(int *numTypes) /* * Initially mark the shell type as not to be dumped. We'll only - * dump it if the I/O functions need to be dumped; this is taken - * care of while sorting dependencies. + * dump it if the I/O or canonicalize functions need to be dumped; + * this is taken care of while sorting dependencies. */ stinfo->dobj.dump = false; @@ -7340,6 +7341,9 @@ dumpType(Archive *fout, TypeInfo *tyinfo) dumpEnumType(fout, tyinfo); else if (tyinfo->typtype == TYPTYPE_RANGE) dumpRangeType(fout, tyinfo); + else + write_msg(NULL, "WARNING: typtype of data type \"%s\" appears to be invalid\n", + tyinfo->dobj.name); } /* diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index efde0d0026b..6b6c073eb6a 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -636,7 +636,8 @@ findLoop(DumpableObject *obj, /* * A user-defined datatype will have a dependency loop with each of its * I/O functions (since those have the datatype as input or output). - * Break the loop and make the I/O function depend on the associated + * Similarly, a range type will have a loop with its canonicalize function, + * if any. Break the loop by making the function depend on the associated * shell type, instead. */ static void @@ -651,7 +652,7 @@ repairTypeFuncLoop(DumpableObject *typeobj, DumpableObject *funcobj) if (typeInfo->shellType) { addObjectDependency(funcobj, typeInfo->shellType->dobj.dumpId); - /* Mark shell type as to be dumped if any I/O function is */ + /* Mark shell type as to be dumped if any such function is */ if (funcobj->dump) typeInfo->shellType->dobj.dump = true; } @@ -789,7 +790,7 @@ repairDependencyLoop(DumpableObject **loop, int i, j; - /* Datatype and one of its I/O functions */ + /* Datatype and one of its I/O or canonicalize functions */ if (nLoop == 2 && loop[0]->objType == DO_TYPE && loop[1]->objType == DO_FUNC) diff --git a/src/include/catalog/pg_range.h b/src/include/catalog/pg_range.h index cc9ba293c14..4fbde166b39 100644 --- a/src/include/catalog/pg_range.h +++ b/src/include/catalog/pg_range.h @@ -34,7 +34,7 @@ CATALOG(pg_range,3541) BKI_WITHOUT_OIDS { Oid rngtypid; /* OID of owning range type */ - Oid rngsubtype; /* OID of range's subtype */ + Oid rngsubtype; /* OID of range's element type (subtype) */ Oid rngcollation; /* collation for this range type, or 0 */ Oid rngsubopc; /* subtype's btree opclass */ regproc rngcanonical; /* canonicalize range, or 0 */ diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index 1806985b69a..f2a32507908 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -61,8 +61,9 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO /* * typtype is 'b' for a base type, 'c' for a composite type (e.g., a - * table's rowtype), 'd' for a domain type, 'e' for an enum type, or 'p' - * for a pseudo-type. (Use the TYPTYPE macros below.) + * table's rowtype), 'd' for a domain, 'e' for an enum type, + * 'p' for a pseudo-type, or 'r' for a range type. + * (Use the TYPTYPE macros below.) * * If typtype is 'c', typrelid is the OID of the class' entry in pg_class. */ diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index 19d437ade50..0e1dfd84861 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -56,11 +56,14 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR -----+--------- (0 rows) --- Look for basic or enum types that don't have an array type. +-- Look for types that should have an array type according to their typtype, +-- but don't. We exclude composites here because we have not bothered to +-- make array types corresponding to the system catalogs' rowtypes. -- NOTE: as of 9.1, this check finds pg_node_tree, smgr, and unknown. SELECT p1.oid, p1.typname FROM pg_type as p1 -WHERE p1.typtype in ('b','e') AND p1.typname NOT LIKE E'\\_%' AND NOT EXISTS +WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' + AND NOT EXISTS (SELECT 1 FROM pg_type as p2 WHERE p2.typname = ('_' || p1.typname)::name AND p2.typelem = p1.oid and p1.typarray = p2.oid); @@ -150,6 +153,19 @@ ORDER BY 1; 30 | oidvector | 54 | oidvectorin (2 rows) +-- Composites, domains, enums, ranges should all use the same input routines +SELECT DISTINCT typtype, typinput +FROM pg_type AS p1 +WHERE p1.typtype not in ('b', 'p') +ORDER BY 1; + typtype | typinput +---------+----------- + c | record_in + d | domain_in + e | enum_in + r | range_in +(4 rows) + -- Check for bogus typoutput routines -- As of 8.0, this check finds refcursor, which is borrowing -- other types' I/O routines @@ -174,6 +190,26 @@ WHERE p1.typoutput = p2.oid AND p1.typtype in ('b', 'p') AND NOT -----+---------+-----+--------- (0 rows) +-- Composites, enums, ranges should all use the same output routines +SELECT DISTINCT typtype, typoutput +FROM pg_type AS p1 +WHERE p1.typtype not in ('b', 'd', 'p') +ORDER BY 1; + typtype | typoutput +---------+------------ + c | record_out + e | enum_out + r | range_out +(3 rows) + +-- Domains should have same typoutput as their base types +SELECT p1.oid, p1.typname, p2.oid, p2.typname +FROM pg_type AS p1 LEFT JOIN pg_type AS p2 ON p1.typbasetype = p2.oid +WHERE p1.typtype = 'd' AND p1.typoutput IS DISTINCT FROM p2.typoutput; + oid | typname | oid | typname +-----+---------+-----+--------- +(0 rows) + -- Check for bogus typreceive routines SELECT p1.oid, p1.typname, p2.oid, p2.proname FROM pg_type AS p1, pg_proc AS p2 @@ -222,6 +258,19 @@ WHERE p1.typinput = p2.oid AND p1.typreceive = p3.oid AND -----+---------+-----+---------+-----+--------- (0 rows) +-- Composites, domains, enums, ranges should all use the same receive routines +SELECT DISTINCT typtype, typreceive +FROM pg_type AS p1 +WHERE p1.typtype not in ('b', 'p') +ORDER BY 1; + typtype | typreceive +---------+------------- + c | record_recv + d | domain_recv + e | enum_recv + r | range_recv +(4 rows) + -- Check for bogus typsend routines -- As of 7.4, this check finds refcursor, which is borrowing -- other types' I/O routines @@ -246,10 +295,30 @@ WHERE p1.typsend = p2.oid AND p1.typtype in ('b', 'p') AND NOT -----+---------+-----+--------- (0 rows) +-- Composites, enums, ranges should all use the same send routines +SELECT DISTINCT typtype, typsend +FROM pg_type AS p1 +WHERE p1.typtype not in ('b', 'd', 'p') +ORDER BY 1; + typtype | typsend +---------+------------- + c | record_send + e | enum_send + r | range_send +(3 rows) + +-- Domains should have same typsend as their base types +SELECT p1.oid, p1.typname, p2.oid, p2.typname +FROM pg_type AS p1 LEFT JOIN pg_type AS p2 ON p1.typbasetype = p2.oid +WHERE p1.typtype = 'd' AND p1.typsend IS DISTINCT FROM p2.typsend; + oid | typname | oid | typname +-----+---------+-----+--------- +(0 rows) + -- Check for bogus typmodin routines SELECT p1.oid, p1.typname, p2.oid, p2.proname FROM pg_type AS p1, pg_proc AS p2 -WHERE p1.typmodin = p2.oid AND p1.typtype in ('b', 'p') AND NOT +WHERE p1.typmodin = p2.oid AND NOT (p2.pronargs = 1 AND p2.proargtypes[0] = 'cstring[]'::regtype AND p2.prorettype = 'int4'::regtype AND NOT p2.proretset); @@ -260,7 +329,7 @@ WHERE p1.typmodin = p2.oid AND p1.typtype in ('b', 'p') AND NOT -- Check for bogus typmodout routines SELECT p1.oid, p1.typname, p2.oid, p2.proname FROM pg_type AS p1, pg_proc AS p2 -WHERE p1.typmodout = p2.oid AND p1.typtype in ('b', 'p') AND NOT +WHERE p1.typmodout = p2.oid AND NOT (p2.pronargs = 1 AND p2.proargtypes[0] = 'int4'::regtype AND p2.prorettype = 'cstring'::regtype AND NOT p2.proretset); @@ -298,7 +367,7 @@ WHERE p1.typarray = p2.oid AND -- Check for bogus typanalyze routines SELECT p1.oid, p1.typname, p2.oid, p2.proname FROM pg_type AS p1, pg_proc AS p2 -WHERE p1.typanalyze = p2.oid AND p1.typtype in ('b', 'p') AND NOT +WHERE p1.typanalyze = p2.oid AND NOT (p2.pronargs = 1 AND p2.proargtypes[0] = 'internal'::regtype AND p2.prorettype = 'bool'::regtype AND NOT p2.proretset); diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index d7d9cea5dc5..c6a70ad14c5 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -50,12 +50,15 @@ FROM pg_type as p1 WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR (p1.typtype != 'c' AND p1.typrelid != 0); --- Look for basic or enum types that don't have an array type. +-- Look for types that should have an array type according to their typtype, +-- but don't. We exclude composites here because we have not bothered to +-- make array types corresponding to the system catalogs' rowtypes. -- NOTE: as of 9.1, this check finds pg_node_tree, smgr, and unknown. SELECT p1.oid, p1.typname FROM pg_type as p1 -WHERE p1.typtype in ('b','e') AND p1.typname NOT LIKE E'\\_%' AND NOT EXISTS +WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' + AND NOT EXISTS (SELECT 1 FROM pg_type as p2 WHERE p2.typname = ('_' || p1.typname)::name AND p2.typelem = p1.oid and p1.typarray = p2.oid); @@ -117,6 +120,12 @@ WHERE p1.typinput = p2.oid AND p1.typtype in ('b', 'p') AND (p2.oid = 'array_in'::regproc) ORDER BY 1; +-- Composites, domains, enums, ranges should all use the same input routines +SELECT DISTINCT typtype, typinput +FROM pg_type AS p1 +WHERE p1.typtype not in ('b', 'p') +ORDER BY 1; + -- Check for bogus typoutput routines -- As of 8.0, this check finds refcursor, which is borrowing @@ -135,6 +144,17 @@ FROM pg_type AS p1, pg_proc AS p2 WHERE p1.typoutput = p2.oid AND p1.typtype in ('b', 'p') AND NOT (p2.prorettype = 'cstring'::regtype AND NOT p2.proretset); +-- Composites, enums, ranges should all use the same output routines +SELECT DISTINCT typtype, typoutput +FROM pg_type AS p1 +WHERE p1.typtype not in ('b', 'd', 'p') +ORDER BY 1; + +-- Domains should have same typoutput as their base types +SELECT p1.oid, p1.typname, p2.oid, p2.typname +FROM pg_type AS p1 LEFT JOIN pg_type AS p2 ON p1.typbasetype = p2.oid +WHERE p1.typtype = 'd' AND p1.typoutput IS DISTINCT FROM p2.typoutput; + -- Check for bogus typreceive routines SELECT p1.oid, p1.typname, p2.oid, p2.proname @@ -169,6 +189,12 @@ FROM pg_type AS p1, pg_proc AS p2, pg_proc AS p3 WHERE p1.typinput = p2.oid AND p1.typreceive = p3.oid AND p2.pronargs != p3.pronargs; +-- Composites, domains, enums, ranges should all use the same receive routines +SELECT DISTINCT typtype, typreceive +FROM pg_type AS p1 +WHERE p1.typtype not in ('b', 'p') +ORDER BY 1; + -- Check for bogus typsend routines -- As of 7.4, this check finds refcursor, which is borrowing @@ -187,11 +213,22 @@ FROM pg_type AS p1, pg_proc AS p2 WHERE p1.typsend = p2.oid AND p1.typtype in ('b', 'p') AND NOT (p2.prorettype = 'bytea'::regtype AND NOT p2.proretset); +-- Composites, enums, ranges should all use the same send routines +SELECT DISTINCT typtype, typsend +FROM pg_type AS p1 +WHERE p1.typtype not in ('b', 'd', 'p') +ORDER BY 1; + +-- Domains should have same typsend as their base types +SELECT p1.oid, p1.typname, p2.oid, p2.typname +FROM pg_type AS p1 LEFT JOIN pg_type AS p2 ON p1.typbasetype = p2.oid +WHERE p1.typtype = 'd' AND p1.typsend IS DISTINCT FROM p2.typsend; + -- Check for bogus typmodin routines SELECT p1.oid, p1.typname, p2.oid, p2.proname FROM pg_type AS p1, pg_proc AS p2 -WHERE p1.typmodin = p2.oid AND p1.typtype in ('b', 'p') AND NOT +WHERE p1.typmodin = p2.oid AND NOT (p2.pronargs = 1 AND p2.proargtypes[0] = 'cstring[]'::regtype AND p2.prorettype = 'int4'::regtype AND NOT p2.proretset); @@ -200,7 +237,7 @@ WHERE p1.typmodin = p2.oid AND p1.typtype in ('b', 'p') AND NOT SELECT p1.oid, p1.typname, p2.oid, p2.proname FROM pg_type AS p1, pg_proc AS p2 -WHERE p1.typmodout = p2.oid AND p1.typtype in ('b', 'p') AND NOT +WHERE p1.typmodout = p2.oid AND NOT (p2.pronargs = 1 AND p2.proargtypes[0] = 'int4'::regtype AND p2.prorettype = 'cstring'::regtype AND NOT p2.proretset); @@ -230,7 +267,7 @@ WHERE p1.typarray = p2.oid AND SELECT p1.oid, p1.typname, p2.oid, p2.proname FROM pg_type AS p1, pg_proc AS p2 -WHERE p1.typanalyze = p2.oid AND p1.typtype in ('b', 'p') AND NOT +WHERE p1.typanalyze = p2.oid AND NOT (p2.pronargs = 1 AND p2.proargtypes[0] = 'internal'::regtype AND p2.prorettype = 'bool'::regtype AND NOT p2.proretset); -- GitLab