diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 07d7cec29b5791f51016d209a1ad695cbdb12069..2b03cdeffc5a1dc17d6142189d609d58b2646d4d 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/datatype.sgml,v 1.32 2000/07/29 18:45:51 tgl Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/datatype.sgml,v 1.33 2000/08/23 05:59:01 thomas Exp $ --> <chapter id="datatype"> @@ -594,7 +594,7 @@ CREATE TABLE <replaceable class="parameter">tablename</replaceable> (<replaceabl <entry>12 bytes</entry> <entry>-178000000 years</entry> <entry>178000000 years</entry> - <entry>1 mircosecond</entry> + <entry>1 microsecond</entry> </row> <row> <entry><type>date</type></entry> diff --git a/doc/src/sgml/datetime.sgml b/doc/src/sgml/datetime.sgml index 0ebdd14d91e5af123cfab0abb50bfc15582415bf..97952aa839ba3f95c8b7ca27bf40c65e1b8c2947 100644 --- a/doc/src/sgml/datetime.sgml +++ b/doc/src/sgml/datetime.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/datetime.sgml,v 2.10 2000/05/02 20:36:21 thomas Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/datetime.sgml,v 2.11 2000/08/23 05:59:01 thomas Exp $ Date/time details --> @@ -629,7 +629,7 @@ Date/time details <note> <para> Contributed by - <ulink url="jose@sferacarta.com">José Soares</ulink>. + <ulink url="mailto:jose@sferacarta.com">José Soares</ulink>. </para> </note> diff --git a/doc/src/sgml/ecpg.sgml b/doc/src/sgml/ecpg.sgml index 917493f2a8f217b8b51d953c00bf96478aab0fcf..fabf9bee39764973393b8d64db26a391aaa74910 100644 --- a/doc/src/sgml/ecpg.sgml +++ b/doc/src/sgml/ecpg.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/ecpg.sgml,v 1.14 2000/05/02 20:01:51 thomas Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/ecpg.sgml,v 1.15 2000/08/23 05:59:01 thomas Exp $ --> <chapter> @@ -32,8 +32,8 @@ $Header: /cvsroot/pgsql/doc/src/sgml/ecpg.sgml,v 1.14 2000/05/02 20:01:51 thomas This describes an embedded <acronym>SQL</acronym> in <acronym>C</acronym> package for <productname>Postgres</productname>. - It is written by <ulink url="linus@epact.se">Linus Tolke</ulink> - and <ulink url="meskes@debian.org">Michael Meskes</ulink>. + It is written by <ulink url="mailto:linus@epact.se">Linus Tolke</ulink> + and <ulink url="mailto:meskes@debian.org">Michael Meskes</ulink>. <note> <para> diff --git a/doc/src/sgml/geqo.sgml b/doc/src/sgml/geqo.sgml index 4f2f80e97a2a1fdd678fe60a05b092a97de0b72e..04b8def4ed1634a12cd751f27d6d6316e643628f 100644 --- a/doc/src/sgml/geqo.sgml +++ b/doc/src/sgml/geqo.sgml @@ -1,119 +1,125 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/geqo.sgml,v 1.10 2000/06/28 03:30:53 tgl Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/geqo.sgml,v 1.11 2000/08/23 05:59:02 thomas Exp $ Genetic Optimizer --> -<Chapter Id="geqo"> -<DocInfo> -<Author> -<FirstName>Martin</FirstName> -<SurName>Utesch</SurName> -<Affiliation> -<Orgname> -University of Mining and Technology -</Orgname> -<Orgdiv> -Institute of Automatic Control -</Orgdiv> -<Address> -<City> -Freiberg -</City> -<Country> -Germany -</Country> -</Address> -</Affiliation> -</Author> -<Date>1997-10-02</Date> -</DocInfo> - -<Title>Genetic Query Optimization in Database Systems</Title> - -<Para> -<Note> -<Title>Author</Title> -<Para> -Written by <ULink url="utesch@aut.tu-freiberg.de">Martin Utesch</ULink> -for the Institute of Automatic Control at the University of Mining and Technology in Freiberg, Germany. -</Para> -</Note> -</para> - -<Sect1> -<Title>Query Handling as a Complex Optimization Problem</Title> - -<Para> - Among all relational operators the most difficult one to process and -optimize is the <FirstTerm>join</FirstTerm>. The number of alternative plans to answer a query -grows exponentially with the number of <Command>join</Command>s included in it. Further -optimization effort is caused by the support of a variety of <FirstTerm>join methods</FirstTerm> - (e.g., nested loop, index scan, merge join in <ProductName>Postgres</ProductName>) to -process individual <Command>join</Command>s and a diversity of <FirstTerm>indices</FirstTerm> (e.g., r-tree, -b-tree, hash in <ProductName>Postgres</ProductName>) as access paths for relations. -</para> - -<Para> - The current <ProductName>Postgres</ProductName> optimizer implementation performs a <FirstTerm>near- -exhaustive search</FirstTerm> over the space of alternative strategies. This query -optimization technique is inadequate to support database application -domains that involve the need for extensive queries, such as artificial -intelligence. -</para> - -<Para> - The Institute of Automatic Control at the University of Mining and -Technology, in Freiberg, Germany, encountered the described problems as its -folks wanted to take the <ProductName>Postgres</ProductName> DBMS as the backend for a decision -support knowledge based system for the maintenance of an electrical -power grid. The DBMS needed to handle large <Command>join</Command> queries for the -inference machine of the knowledge based system. -</para> - -<Para> - Performance difficulties within exploring the space of possible query -plans arose the demand for a new optimization technique being developed. -</para> - -<Para> - In the following we propose the implementation of a <FirstTerm>Genetic Algorithm</FirstTerm> - as an option for the database query optimization problem. -</para> -</sect1> - -<Sect1> -<Title>Genetic Algorithms (<Acronym>GA</Acronym>)</Title> - -<Para> - The <Acronym>GA</Acronym> is a heuristic optimization method which operates through -determined, randomized search. The set of possible solutions for the -optimization problem is considered as a <FirstTerm>population</FirstTerm> of <FirstTerm>individuals</FirstTerm>. -The degree of adaption of an individual to its environment is specified -by its <FirstTerm>fitness</FirstTerm>. -</para> - -<Para> - The coordinates of an individual in the search space are represented -by <FirstTerm>chromosomes</FirstTerm>, in essence a set of character strings. A <FirstTerm>gene</FirstTerm> is a -subsection of a chromosome which encodes the value of a single parameter -being optimized. Typical encodings for a gene could be <FirstTerm>binary</FirstTerm> or -<FirstTerm>integer</FirstTerm>. -</para> - -<Para> - Through simulation of the evolutionary operations <FirstTerm>recombination</FirstTerm>, -<FirstTerm>mutation</FirstTerm>, and <FirstTerm>selection</FirstTerm> new generations of search points are found -that show a higher average fitness than their ancestors. -</para> - -<Para> - According to the "comp.ai.genetic" <Acronym>FAQ</Acronym> it cannot be stressed too -strongly that a <Acronym>GA</Acronym> is not a pure random search for a solution to a -problem. A <Acronym>GA</Acronym> uses stochastic processes, but the result is distinctly -non-random (better than random). - -<ProgramListing> -Structured Diagram of a <Acronym>GA</Acronym>: + <chapter id="geqo"> + <docinfo> + <author> + <firstname>Martin</firstname> + <surname>Utesch</surname> + <affiliation> + <orgname> + University of Mining and Technology + </orgname> + <orgdiv> + Institute of Automatic Control + </orgdiv> + <address> + <city> + Freiberg + </city> + <country> + Germany + </country> + </address> + </affiliation> + </author> + <date>1997-10-02</date> + </docinfo> + + <title>Genetic Query Optimization in Database Systems</title> + + <para> + <note> + <title>Author</title> + <para> + Written by <ulink url="mailto:utesch@aut.tu-freiberg.de">Martin Utesch</ulink> + for the Institute of Automatic Control at the University of Mining and Technology in Freiberg, Germany. + </para> + </note> + </para> + + <sect1> + <title>Query Handling as a Complex Optimization Problem</title> + + <para> + Among all relational operators the most difficult one to process and + optimize is the <firstterm>join</firstterm>. The number of alternative plans to answer a query + grows exponentially with the number of <command>join</command>s included in it. Further + optimization effort is caused by the support of a variety of + <firstterm>join methods</firstterm> + (e.g., nested loop, index scan, merge join in <productname>Postgres</productname>) to + process individual <command>join</command>s and a diversity of + <firstterm>indices</firstterm> (e.g., r-tree, + b-tree, hash in <productname>Postgres</productname>) as access paths for relations. + </para> + + <para> + The current <productname>Postgres</productname> optimizer + implementation performs a <firstterm>near- + exhaustive search</firstterm> over the space of alternative strategies. This query + optimization technique is inadequate to support database application + domains that involve the need for extensive queries, such as artificial + intelligence. + </para> + + <para> + The Institute of Automatic Control at the University of Mining and + Technology, in Freiberg, Germany, encountered the described problems as its + folks wanted to take the <productname>Postgres</productname> DBMS as the backend for a decision + support knowledge based system for the maintenance of an electrical + power grid. The DBMS needed to handle large <command>join</command> queries for the + inference machine of the knowledge based system. + </para> + + <para> + Performance difficulties within exploring the space of possible query + plans arose the demand for a new optimization technique being developed. + </para> + + <para> + In the following we propose the implementation of a <firstterm>Genetic Algorithm</firstterm> + as an option for the database query optimization problem. + </para> + </sect1> + + <sect1> + <title>Genetic Algorithms (<acronym>GA</acronym>)</title> + + <para> + The <acronym>GA</acronym> is a heuristic optimization method which operates through + determined, randomized search. The set of possible solutions for the + optimization problem is considered as a + <firstterm>erm>popula</firstterm>erm> of <firstterm>individuals</firstterm>. + The degree of adaption of an individual to its environment is specified + by its <firstterm>fitness</firstterm>. + </para> + + <para> + The coordinates of an individual in the search space are represented + by <firstterm>chromosomes</firstterm>, in essence a set of character + strings. A <firstterm>gene</firstterm> is a + subsection of a chromosome which encodes the value of a single parameter + being optimized. Typical encodings for a gene could be <firstterm>binary</firstterm> or + <firstterm>integer</firstterm>. + </para> + + <para> + Through simulation of the evolutionary operations <firstterm>recombination</firstterm>, + <firstterm>mutation</firstterm>, and + <firstterm>selection</firstterm> new generations of search points are found + that show a higher average fitness than their ancestors. + </para> + + <para> + According to the "comp.ai.genetic" <acronym>FAQ</acronym> it cannot be stressed too + strongly that a <acronym>GA</acronym> is not a pure random search for a solution to a + problem. A <acronym>GA</acronym> uses stochastic processes, but the result is distinctly + non-random (better than random). + + <programlisting> +Structured Diagram of a <acronym>GA</acronym>: --------------------------- P(t) generation of ancestors at a time t @@ -140,229 +146,235 @@ P''(t) generation of descendants at a time t | +-------------------------------------+ | | t := t + 1 | +===+=====================================+ -</ProgramListing> -</para> -</sect1> - -<Sect1> -<Title>Genetic Query Optimization (<Acronym>GEQO</Acronym>) in Postgres</Title> - -<Para> - The <Acronym>GEQO</Acronym> module is intended for the solution of the query -optimization problem similar to a traveling salesman problem (<Acronym>TSP</Acronym>). -Possible query plans are encoded as integer strings. Each string -represents the <Command>join</Command> order from one relation of the query to the next. -E. g., the query tree -<ProgramListing> - /\ - /\ 2 - /\ 3 - 4 1 -</ProgramListing> -is encoded by the integer string '4-1-3-2', -which means, first join relation '4' and '1', then '3', and -then '2', where 1, 2, 3, 4 are relids in <ProductName>Postgres</ProductName>. -</para> - -<Para> - Parts of the <Acronym>GEQO</Acronym> module are adapted from D. Whitley's Genitor -algorithm. -</para> - -<Para> - Specific characteristics of the <Acronym>GEQO</Acronym> implementation in <ProductName>Postgres</ProductName> -are: - -<ItemizedList Mark="bullet" Spacing="compact"> -<ListItem> -<Para> -Usage of a <FirstTerm>steady state</FirstTerm> <Acronym>GA</Acronym> (replacement of the least fit - individuals in a population, not whole-generational replacement) - allows fast convergence towards improved query plans. This is - essential for query handling with reasonable time; -</Para> -</ListItem> - -<ListItem> -<Para> -Usage of <FirstTerm>edge recombination crossover</FirstTerm> which is especially suited - to keep edge losses low for the solution of the <Acronym>TSP</Acronym> by means of a <Acronym>GA</Acronym>; -</Para> -</ListItem> - -<ListItem> -<Para> -Mutation as genetic operator is deprecated so that no repair - mechanisms are needed to generate legal <Acronym>TSP</Acronym> tours. -</Para> -</ListItem> -</ItemizedList> -</para> - -<Para> - The <Acronym>GEQO</Acronym> module gives the following benefits to the <ProductName>Postgres</ProductName> DBMS -compared to the <ProductName>Postgres</ProductName> query optimizer implementation: - -<ItemizedList Mark="bullet" Spacing="compact"> -<ListItem> -<Para> -Handling of large <Command>join</Command> queries through non-exhaustive search; -</Para> -</ListItem> - -<ListItem> -<Para> -Improved cost size approximation of query plans since no longer - plan merging is needed (the <Acronym>GEQO</Acronym> module evaluates the cost for a - query plan as an individual). -</Para> -</ListItem> -</ItemizedList> -</para> - -</Sect1> - -<Sect1> -<Title>Future Implementation Tasks for <ProductName>Postgres</ProductName> <Acronym>GEQO</Acronym></Title> - -<Sect2> -<Title>Basic Improvements</Title> - -<Sect3> -<Title>Improve genetic algorithm parameter settings</Title> - -<Para> -In file <FileName>backend/optimizer/geqo/geqo_params.c</FileName>, routines -<Function>gimme_pool_size</Function> and <Function>gimme_number_generations</Function>, -we have to find a compromise for the parameter settings -to satisfy two competing demands: -<ItemizedList Spacing="compact"> -<ListItem> -<Para> -Optimality of the query plan -</Para> -</ListItem> -<ListItem> -<Para> -Computing time -</Para> -</ListItem> -</ItemizedList> -</para> -</sect3> - -<Sect3> -<Title>Find better solution for integer overflow</Title> - -<Para> -In file <FileName>backend/optimizer/geqo/geqo_eval.c</FileName>, routine -<Function>geqo_joinrel_size</Function>, -the present hack for MAXINT overflow is to set the <ProductName>Postgres</ProductName> integer -value of <StructField>rel->size</StructField> to its logarithm. -Modifications of <StructName>Rel</StructName> in <FileName>backend/nodes/relation.h</FileName> will -surely have severe impacts on the whole <ProductName>Postgres</ProductName> implementation. -</para> -</sect3> - -<Sect3> -<Title>Find solution for exhausted memory</Title> - -<Para> -Memory exhaustion may occur with more than 10 relations involved in a query. -In file <FileName>backend/optimizer/geqo/geqo_eval.c</FileName>, routine -<Function>gimme_tree</Function> is recursively called. -Maybe I forgot something to be freed correctly, but I dunno what. -Of course the <StructName>rel</StructName> data structure of the <Command>join</Command> keeps growing and -growing the more relations are packed into it. -Suggestions are welcome :-( -</para> -</sect3> -</sect2> - - -<BIBLIOGRAPHY Id="geqo-biblio"> -<TITLE> -References -</TITLE> -<PARA>Reference information for <Acronym>GEQ</Acronym> algorithms. -</PARA> -<BIBLIOENTRY> - -<BOOKBIBLIO> -<TITLE> -The Hitch-Hiker's Guide to Evolutionary Computation -</TITLE> -<AUTHORGROUP> -<AUTHOR> -<FIRSTNAME>Jörg</FIRSTNAME> -<SURNAME>Heitkötter</SURNAME> -</AUTHOR> -<AUTHOR> -<FIRSTNAME>David</FIRSTNAME> -<SURNAME>Beasley</SURNAME> -</AUTHOR> -</AUTHORGROUP> -<PUBLISHER> -<PUBLISHERNAME> -InterNet resource -</PUBLISHERNAME> -</PUBLISHER> -<ABSTRACT> -<Para> -FAQ in <ULink url="news://comp.ai.genetic">comp.ai.genetic</ULink> -is available at <ULink url="ftp://ftp.Germany.EU.net/pub/research/softcomp/EC/Welcome.html">Encore</ULink>. -</Para> -</ABSTRACT> -</BOOKBIBLIO> - -<BOOKBIBLIO> -<TITLE> -The Design and Implementation of the Postgres Query Optimizer -</TITLE> -<AUTHORGROUP> -<AUTHOR> -<FIRSTNAME>Z.</FIRSTNAME> -<SURNAME>Fong</SURNAME> -</AUTHOR> -</AUTHORGROUP> -<PUBLISHER> -<PUBLISHERNAME> -University of California, Berkeley Computer Science Department -</PUBLISHERNAME> -</PUBLISHER> -<ABSTRACT> -<Para> -File <FileName>planner/Report.ps</FileName> in the 'postgres-papers' distribution. -</Para> -</ABSTRACT> -</BOOKBIBLIO> - -<BOOKBIBLIO> -<TITLE> -Fundamentals of Database Systems -</TITLE> -<AUTHORGROUP> -<AUTHOR> -<FIRSTNAME>R.</FIRSTNAME> -<SURNAME>Elmasri</SURNAME> -</AUTHOR> -<AUTHOR> -<FIRSTNAME>S.</FIRSTNAME> -<SURNAME>Navathe</SURNAME> -</AUTHOR> -</AUTHORGROUP> -<PUBLISHER> -<PUBLISHERNAME> -The Benjamin/Cummings Pub., Inc. -</PUBLISHERNAME> -</PUBLISHER> -</BOOKBIBLIO> - -</BIBLIOENTRY> -</BIBLIOGRAPHY> - -</sect1> -</Chapter> + </programlisting> + </para> + </sect1> + + <sect1> + <title>Genetic Query Optimization (<acronym>GEQO</acronym>) in Postgres</title> + + <para> + The <acronym>GEQO</acronym> module is intended for the solution of the query + optimization problem similar to a traveling salesman problem (<acronym>TSP</acronym>). + Possible query plans are encoded as integer strings. Each string + represents the <command>join</command> order from one relation of the query to the next. + E. g., the query tree + <programlisting> + /\ + /\ 2 + /\ 3 +4 1 + </programlisting> + is encoded by the integer string '4-1-3-2', + which means, first join relation '4' and '1', then '3', and + then '2', where 1, 2, 3, 4 are relids in <productname>Postgres</productname>. + </para> + + <para> + Parts of the <acronym>GEQO</acronym> module are adapted from D. Whitley's Genitor + algorithm. + </para> + + <para> + Specific characteristics of the <acronym>GEQO</acronym> + implementation in <productname>Postgres</productname> + are: + + <itemizedlist spacing="compact" mark="bullet"> + <listitem> + <para> + Usage of a <firstterm>steady state</firstterm> <acronym>GA</acronym> (replacement of the least fit + individuals in a population, not whole-generational replacement) + allows fast convergence towards improved query plans. This is + essential for query handling with reasonable time; + </para> + </listitem> + + <listitem> + <para> + Usage of <firstterm>edge recombination crossover</firstterm> which is especially suited + to keep edge losses low for the solution of the + <acronym>cro</acronym>cronym> by means of a <acronym>GA</acronym>; + </para> + </listitem> + + <listitem> + <para> + Mutation as genetic operator is deprecated so that no repair + mechanisms are needed to generate legal <acronym>TSP</acronym> tours. + </para> + </listitem> + </itemizedlist> + </para> + + <para> + The <acronym>GEQO</acronym> module gives the following benefits to + the <productname>Postgres</productname> DBMS + compared to the <productname>Postgres</productname> query optimizer implementation: + + <itemizedlist spacing="compact" mark="bullet"> + <listitem> + <para> + Handling of large <command>join</command> queries through non-exhaustive search; + </para> + </listitem> + + <listitem> + <para> + Improved cost size approximation of query plans since no longer + plan merging is needed (the <acronym>GEQO</acronym> module evaluates the cost for a + query plan as an individual). + </para> + </listitem> + </itemizedlist> + </para> + + </sect1> + + <sect1> + <title>Future Implementation Tasks for + <productname>ame>Post</productname>ame> <acronym>GEQO</acronym></title> + + <sect2> + <title>Basic Improvements</title> + + <sect3> + <title>Improve genetic algorithm parameter settings</title> + + <para> + In file <filename>backend/optimizer/geqo/geqo_params.c</filename>, routines + <function>gimme_pool_size</function> and <function>gimme_number_generations</function>, + we have to find a compromise for the parameter settings + to satisfy two competing demands: + <itemizedlist spacing="compact"> + <listitem> + <para> + Optimality of the query plan + </para> + </listitem> + <listitem> + <para> + Computing time + </para> + </listitem> + </itemizedlist> + </para> + </sect3> + + <sect3> + <title>Find better solution for integer overflow</title> + + <para> + In file <filename>backend/optimizer/geqo/geqo_eval.c</filename>, routine + <function>geqo_joinrel_size</function>, + the present hack for MAXINT overflow is to set the <productname>Postgres</productname> integer + value of <structfield>rel->size</structfield> to its logarithm. + Modifications of <structname>Rel</structname> in <filename>backend/nodes/relation.h</filename> will + surely have severe impacts on the whole <productname>Postgres</productname> implementation. + </para> + </sect3> + + <sect3> + <title>Find solution for exhausted memory</title> + + <para> + Memory exhaustion may occur with more than 10 relations involved in a query. + In file <filename>backend/optimizer/geqo/geqo_eval.c</filename>, routine + <function>gimme_tree</function> is recursively called. + Maybe I forgot something to be freed correctly, but I dunno what. + Of course the <structname>rel</structname> data structure of the + <command>join</command> keeps growing and + growing the more relations are packed into it. + Suggestions are welcome :-( + </para> + </sect3> + </sect2> + + + <bibliography id="geqo-biblio"> + <title> + References + </title> + <para>Reference information for <acronym>GEQ</acronym> algorithms. + </para> + <biblioentry> + + <bookbiblio> + <title> + The Hitch-Hiker's Guide to Evolutionary Computation + </title> + <authorgroup> + <author> + <firstname>Jörg</firstname> + <surname>Heitkötter</surname> + </author> + <author> + <firstname>David</firstname> + <surname>Beasley</surname> + </author> + </authorgroup> + <publisher> + <publishername> + InterNet resource + </publishername> + </publisher> + <abstract> + <para> + FAQ in <ulink url="news://comp.ai.genetic">comp.ai.genetic</ulink> + is available at <ulink + url="ftp://ftp.Germany.EU.net/pub/research/softcomp/EC/Welcome.html">Encore</ulink>. + </para> + </abstract> + </bookbiblio> + + <bookbiblio> + <title> + The Design and Implementation of the Postgres Query Optimizer + </title> + <authorgroup> + <author> + <firstname>Z.</firstname> + <surname>Fong</surname> + </author> + </authorgroup> + <publisher> + <publishername> + University of California, Berkeley Computer Science Department + </publishername> + </publisher> + <abstract> + <para> + File <filename>planner/Report.ps</filename> in the 'postgres-papers' distribution. + </para> + </abstract> + </bookbiblio> + + <bookbiblio> + <title> + Fundamentals of Database Systems + </title> + <authorgroup> + <author> + <firstname>R.</firstname> + <surname>Elmasri</surname> + </author> + <author> + <firstname>S.</firstname> + <surname>Navathe</surname> + </author> + </authorgroup> + <publisher> + <publishername> + The Benjamin/Cummings Pub., Inc. + </publishername> + </publisher> + </bookbiblio> + + </biblioentry> + </bibliography> + + </sect1> + </chapter> <!-- Keep this comment at the end of the file Local variables: diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index ab882c0a1b3c1c044738d741f78d056d1cf8f95d..66a8a415f5c2efbf3f937c2913c5ada572066814 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -163,7 +163,7 @@ SELECT am.amname AS acc_name, <title>Author</title> <para> Written by - <ulink url="herouth@oumail.openu.ac.il">Herouth Maoz</ulink> + <ulink url="mailto:herouth@oumail.openu.ac.il">Herouth Maoz</ulink> This originally appeared on the User's Mailing List on 1998-03-02 in response to the question: "What is the difference between PRIMARY KEY and UNIQUE constraints?". @@ -328,7 +328,7 @@ CREATE MEMSTORE ON <table> COLUMNS <cols> <title>Author</title> <para> This is from a reply to a question on the e-mail list - by <ulink url="aoki@CS.Berkeley.EDU">Paul M. Aoki</ulink> + by <ulink url="mailto:aoki@CS.Berkeley.EDU">Paul M. Aoki</ulink> on 1998-08-11. <!-- Paul M. Aoki | University of California at Berkeley diff --git a/doc/src/sgml/jdbc.sgml b/doc/src/sgml/jdbc.sgml index 2db6ab0f9d400586df0e21e4c45c0669957d0602..ddb32b8a500b774f8fad85b4ae6ecc82b3965908 100644 --- a/doc/src/sgml/jdbc.sgml +++ b/doc/src/sgml/jdbc.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/Attic/jdbc.sgml,v 1.10 2000/03/31 03:27:40 thomas Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/Attic/jdbc.sgml,v 1.11 2000/08/23 05:59:02 thomas Exp $ --> <chapter id="jdbc"> @@ -9,7 +9,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/Attic/jdbc.sgml,v 1.10 2000/03/31 03:27:40 <note> <title>Author</title> <para> - Written by <ulink url="peter@retep.org.uk">Peter T. Mount</ulink>, the + Written by <ulink url="mailto:peter@retep.org.uk">Peter T. Mount</ulink>, the author of the <acronym>JDBC</acronym> driver. </para> </note> diff --git a/doc/src/sgml/keys.sgml b/doc/src/sgml/keys.sgml index 11e421dddbacac97871ec4d0c3d68065a003b0f5..29a62489655cb3669cc3f937241c8fb770df7ac4 100644 --- a/doc/src/sgml/keys.sgml +++ b/doc/src/sgml/keys.sgml @@ -1,8 +1,14 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/Attic/keys.sgml,v 1.3 1998/12/29 02:24:16 thomas Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/Attic/keys.sgml,v 1.4 2000/08/23 05:59:02 thomas Exp $ Indices and Keys $Log: keys.sgml,v $ +Revision 1.4 2000/08/23 05:59:02 thomas +Fix several <ulink> tags which refer to e-mail addresses + but were missing the "mailto:" prefix. +Fix typo. +Thanks to Neil Conway <nconway@klamath.dyndns.org> for the heads-up. + Revision 1.3 1998/12/29 02:24:16 thomas Clean up to ensure tag completion as required by the newest versions of Norm's Modular Style Sheets and jade/docbook. @@ -18,37 +24,37 @@ Will go into the User's Guide. --> -<chapter id="keys"> -<docinfo> -<authorgroup> -<author> -<firstname>Herouth</firstname> -<surname>Maoz</surname> -</author> -</authorgroup> -<date>1998-03-02</date> -</docinfo> - -<Title>Indices and Keys</Title> - -<Note> -<Title>Author</Title> -<Para> -Written by -<ULink url="herouth@oumail.openu.ac.il">Herouth Maoz</ULink> -</Para> -</Note> - -<Note> -<Title>Editor's Note</Title> -<Para> -This originally appeared on the mailing list - in response to the question: - "What is the difference between PRIMARY KEY and UNIQUE constraints?". -</Para> -</Note> - -<ProgramListing> + <chapter> + <docinfo> + <authorgroup> + <author> + <firstname>Herouth</firstname> + <surname>Maoz</surname> + </author> + </authorgroup> + <date>1998-03-02</date> + </docinfo> + + <title>Indices and Keys</title> + + <note> + <title>Author</title> + <para> + Written by + <ulink url="mailto:herouth@oumail.openu.ac.il">Herouth Maoz</ulink> + </para> + </note> + + <note> + <title>Editor's Note</title> + <para> + This originally appeared on the mailing list + in response to the question: + "What is the difference between PRIMARY KEY and UNIQUE constraints?". + </para> + </note> + + <programlisting> Subject: Re: [QUESTIONS] PRIMARY KEY | UNIQUE What's the difference between: @@ -59,125 +65,143 @@ Subject: Re: [QUESTIONS] PRIMARY KEY | UNIQUE - Is this an alias? - If PRIMARY KEY is already unique, then why is there another kind of key named UNIQUE? -</ProgramListing> - -<Para> -A primary key is the field(s) used to identify a specific row. For example, -Social Security numbers identifying a person. -</Para> -<Para> -A simply UNIQUE combination of fields has nothing to do with identifying -the row. It's simply an integrity constraint. For example, I have -collections of links. Each collection is identified by a unique number, -which is the primary key. This key is used in relations. -</Para> -<Para> -However, my application requires that each collection will also have a -unique name. Why? So that a human being who wants to modify a collection -will be able to identify it. It's much harder to know, if you have two -collections named "Life Science", the the one tagged 24433 is the one you -need, and the one tagged 29882 is not. -</Para> -<Para> -So, the user selects the collection by its name. We therefore make sure, -withing the database, that names are unique. However, no other table in the -database relates to the collections table by the collection Name. That -would be very inefficient. -</Para> -<Para> -Moreover, despite being unique, the collection name does not actually -define the collection! For example, if somebody decided to change the name -of the collection from "Life Science" to "Biology", it will still be the -same collection, only with a different name. As long as the name is unique, -that's OK. -</Para> -<Para> -So: - -<itemizedlist> -<ListItem> -<Para> -Primary key: -<itemizedList Mark="bullet" Spacing="compact"> -<ListItem> -<Para> -Is used for identifying the row and relating to it. -</Para> -</ListItem> -<ListItem> -<Para> -Is impossible (or hard) to update. -</Para> -</ListItem> -<ListItem> -<Para> -Should not allow NULLs. -</Para> -</ListItem> -</itemizedlist> -</para> -</listitem> - -<ListItem> -<Para> -Unique field(s): -<itemizedlist Mark="bullet" Spacing="compact"> -<ListItem> -<Para> -Are used as an alternative access to the row. -</Para> -</ListItem> -<ListItem> -<Para> -Are updateable, so long as they are kept unique. -</Para> -</ListItem> -<ListItem> -<Para> -NULLs are acceptable. -</Para> -</ListItem> -</itemizedlist> -</para> -</listitem> -</itemizedlist> -</para> - -<Para> -As for why no non-unique keys are defined explicitly in standard <acronym>SQL</acronym> syntax? -Well, you -must understand that indices are implementation-dependent. <acronym>SQL</acronym> does not -define the implementation, merely the relations between data in the -database. <productname>Postgres</productname> does allow non-unique indices, but indices -used to enforce <acronym>SQL</acronym> keys are always unique. -</Para> -<Para> -Thus, you may query a table by any combination of its columns, despite the -fact that you don't have an index on these columns. The indexes are merely -an implementational aid which each <acronym>RDBMS</acronym> offers you, in order to cause -commonly used queries to be done more efficiently. Some <acronym>RDBMS</acronym> may give you -additional measures, such as keeping a key stored in main memory. They will -have a special command, for example -<programlisting> -CREATE MEMSTORE ON <table> COLUMNS <cols> -</programlisting> -(this is not an existing command, just an example). -</Para> -<Para> -In fact, when you create a primary key or a unique combination of fields, -nowhere in the <acronym>SQL</acronym> specification does it say that an index is created, nor that -the retrieval of data by the key is going to be more efficient than a -sequential scan! -</Para> -<Para> -So, if you want to use a combination of fields which is not unique as a -secondary key, you really don't have to specify anything - just start -retrieving by that combination! However, if you want to make the retrieval -efficient, you'll have to resort to the means your <acronym>RDBMS</acronym> provider gives you -- be it an index, my imaginary MEMSTORE command, or an intelligent <acronym>RDBMS</acronym> -which creates indices without your knowledge based on the fact that you have -sent it many queries based on a specific combination of keys... (It learns -from experience). -</Para> -</chapter> - + </programlisting> + + <para> + A primary key is the field(s) used to identify a specific row. For example, + Social Security numbers identifying a person. + </para> + <para> + A simply UNIQUE combination of fields has nothing to do with identifying + the row. It's simply an integrity constraint. For example, I have + collections of links. Each collection is identified by a unique number, + which is the primary key. This key is used in relations. + </para> + <para> + However, my application requires that each collection will also have a + unique name. Why? So that a human being who wants to modify a collection + will be able to identify it. It's much harder to know, if you have two + collections named "Life Science", the the one tagged 24433 is the one you + need, and the one tagged 29882 is not. + </para> + <para> + So, the user selects the collection by its name. We therefore make sure, + withing the database, that names are unique. However, no other table in the + database relates to the collections table by the collection Name. That + would be very inefficient. + </para> + <para> + Moreover, despite being unique, the collection name does not actually + define the collection! For example, if somebody decided to change the name + of the collection from "Life Science" to "Biology", it will still be the + same collection, only with a different name. As long as the name is unique, + that's OK. + </para> + <para> + So: + + <itemizedlist> + <listitem> + <para> + Primary key: + <itemizedlist> + <listitem> + <para> + Is used for identifying the row and relating to it. + </para> + </listitem> + <listitem> + <para> + Is impossible (or hard) to update. + </para> + </listitem> + <listitem> + <para> + Should not allow NULLs. + </para> + </listitem> + </itemizedlist> + </para> + </listitem> + + <listitem> + <para> + Unique field(s): + <itemizedlist> + <listitem> + <para> + Are used as an alternative access to the row. + </para> + </listitem> + <listitem> + <para> + Are updateable, so long as they are kept unique. + </para> + </listitem> + <listitem> + <para> + NULLs are acceptable. + </para> + </listitem> + </itemizedlist> + </para> + </listitem> + </itemizedlist> + </para> + + <para> + As for why no non-unique keys are defined explicitly in standard + <acronym>SQL</acronym> syntax? + Well, you + must understand that indices are implementation-dependent. <acronym>SQL</acronym> does not + define the implementation, merely the relations between data in the + database. <productname>Postgres</productname> does allow non-unique indices, but indices + used to enforce <acronym>SQL</acronym> keys are always unique. + </para> + <para> + Thus, you may query a table by any combination of its columns, despite the + fact that you don't have an index on these columns. The indexes are merely + an implementational aid which each <acronym>RDBMS</acronym> offers you, in order to cause + commonly used queries to be done more efficiently. Some <acronym>RDBMS</acronym> may give you + additional measures, such as keeping a key stored in main memory. They will + have a special command, for example + <programlisting> + CREATE MEMSTORE ON <table> COLUMNS <cols> + </programlisting> + (this is not an existing command, just an example). + </para> + <para> + In fact, when you create a primary key or a unique combination of fields, + nowhere in the <acronym>SQL</acronym> specification does it say that an index is created, nor that + the retrieval of data by the key is going to be more efficient than a + sequential scan! + </para> + <para> + So, if you want to use a combination of fields which is not unique as a + secondary key, you really don't have to specify anything - just start + retrieving by that combination! However, if you want to make the retrieval + efficient, you'll have to resort to the means your <acronym>RDBMS</acronym> provider gives you + - be it an index, my imaginary MEMSTORE command, or an intelligent + <acronym>RDBMS</acronym> + which creates indices without your knowledge based on the fact that you have + sent it many queries based on a specific combination of keys... (It learns + from experience). + </para> + </chapter> + +<!-- Keep this comment at the end of the file +Local variables: +mode:sgml +sgml-omittag:nil +sgml-shorttag:t +sgml-minimize-attributes:nil +sgml-always-quote-attributes:t +sgml-indent-step:1 +sgml-indent-data:t +sgml-parent-document:nil +sgml-default-dtd-file:"./reference.ced" +sgml-exposed-tags:nil +sgml-local-catalogs:("/usr/lib/sgml/catalog") +sgml-local-ecat-files:nil +End: +--></book> diff --git a/doc/src/sgml/ref/ecpg-ref.sgml b/doc/src/sgml/ref/ecpg-ref.sgml index 8bb1dd27f13d708d158c8422d6f1c77ac61d967a..818c7d59ae48ffc40a1d2b2941b7e9fd616e08f7 100644 --- a/doc/src/sgml/ref/ecpg-ref.sgml +++ b/doc/src/sgml/ref/ecpg-ref.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/ref/ecpg-ref.sgml,v 1.2 2000/03/31 06:17:52 thomas Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/ref/ecpg-ref.sgml,v 1.3 2000/08/23 05:59:11 thomas Exp $ Postgres documentation --> @@ -135,11 +135,11 @@ ecpg [ -v ] [ -t ] [ -I include-path ] [ -o outfile ] file1 [ file2 ] [ ... ] </para> <para> - <ulink url="linus@epact.se">Linus Tolke</ulink> was the + <ulink url="mailto:linus@epact.se">Linus Tolke</ulink> was the original author of <application>ecpg</application> (up to version 0.2). - <ulink url="meskes@debian.org">Michael Meskes</ulink> + <ulink url="mailto:meskes@debian.org">Michael Meskes</ulink> is the current author and maintainer of <application>ecpg</application>. - <ulink url="tomg@q8.nrnet.org">Thomas Good</ulink> + <ulink url="mailto:tomg@q8.nrnet.org">Thomas Good</ulink> is the author of the last revision of the ecpg man page, on which this document is based. </para> diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml index 27dedecd60deac5e4c432c4b2ed7b714394a29fe..84abd7c629d823e9171efa28c779c04f231d376c 100644 --- a/doc/src/sgml/ref/insert.sgml +++ b/doc/src/sgml/ref/insert.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/ref/insert.sgml,v 1.7 2000/03/27 17:14:43 thomas Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/ref/insert.sgml,v 1.8 2000/08/23 05:59:11 thomas Exp $ Postgres documentation --> @@ -20,20 +20,20 @@ Postgres documentation </refnamediv> <refsynopsisdiv> <refsynopsisdivinfo> - <date>1999-07-20</date> + <date>2000-08-08</date> </refsynopsisdivinfo> <synopsis> INSERT INTO <replaceable class="PARAMETER">table</replaceable> [ ( <replaceable class="PARAMETER">column</replaceable> [, ...] ) ] - { VALUES ( <replaceable class="PARAMETER">expression</replaceable> [, ...] ) | SELECT <replaceable class="PARAMETER">query</replaceable> } + { DEFAULT VALUES | VALUES ( <replaceable class="PARAMETER">expression</replaceable> [, ...] ) | SELECT <replaceable class="PARAMETER">query</replaceable> } </synopsis> <refsect2 id="R2-SQL-INSERT-1"> <refsect2info> - <date>1998-09-23</date> </refsect2info> <title> Inputs </title> + <para> <variablelist> @@ -45,6 +45,7 @@ INSERT INTO <replaceable class="PARAMETER">table</replaceable> [ ( <replaceable </para> </listitem> </varlistentry> + <varlistentry> <term><replaceable class="PARAMETER">column</replaceable></term> <listitem> @@ -54,6 +55,16 @@ INSERT INTO <replaceable class="PARAMETER">table</replaceable> [ ( <replaceable </listitem> </varlistentry> + <varlistentry> + <term>DEFAULT VALUES</term> + <listitem> + <para> + All columns will be filled by NULLs or by values specified + when the table was created using DEFAULT clauses. + </para> + </listitem> + </varlistentry> + <varlistentry> <term><replaceable class="PARAMETER">expression</replaceable></term> <listitem> @@ -79,7 +90,6 @@ INSERT INTO <replaceable class="PARAMETER">table</replaceable> [ ( <replaceable <refsect2 id="R2-SQL-INSERT-2"> <refsect2info> - <date>1998-09-23</date> </refsect2info> <title> Outputs @@ -118,7 +128,6 @@ INSERT 0 <replaceable>#</replaceable> <refsect1 id="R1-SQL-INSERT-1"> <refsect1info> - <date>1998-09-02</date> </refsect1info> <title> Description @@ -217,7 +226,6 @@ INSERT INTO tictactoe (game, board) <refsect2 id="R2-SQL-INSERT-4"> <refsect2info> - <date>1998-09-23</date> </refsect2info> <title> SQL92