From 60ce9e9279376f6f99b10096370dec1e1e9e7596 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <peter_e@gmx.net> Date: Sun, 2 Sep 2001 23:27:50 +0000 Subject: [PATCH] New blood and fresh air for tutorial --- doc/src/sgml/advanced.sgml | 473 +++++++++--------- doc/src/sgml/arch.sgml | 88 ---- doc/src/sgml/filelist.sgml | 9 +- doc/src/sgml/info.sgml | 4 +- doc/src/sgml/query.sgml | 899 +++++++++++++++++++++++------------ doc/src/sgml/start.sgml | 691 +++++++++++++-------------- doc/src/sgml/tutorial.sgml | 36 +- src/tutorial/advanced.source | 75 +-- src/tutorial/basics.source | 180 +++---- 9 files changed, 1333 insertions(+), 1122 deletions(-) delete mode 100644 doc/src/sgml/arch.sgml diff --git a/doc/src/sgml/advanced.sgml b/doc/src/sgml/advanced.sgml index dc3731b989b..de0bf86e8bb 100644 --- a/doc/src/sgml/advanced.sgml +++ b/doc/src/sgml/advanced.sgml @@ -1,32 +1,203 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/advanced.sgml,v 1.21 2001/01/13 23:58:55 petere Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/advanced.sgml,v 1.22 2001/09/02 23:27:49 petere Exp $ --> - <chapter id="advanced"> - <title>Advanced <productname>Postgres</productname> <acronym>SQL</acronym> Features</title> - - <para> - Having covered the basics of using - <productname>Postgres</productname> <acronym>SQL</acronym> to - access your data, we will now discuss those features of - <productname>Postgres</productname> that distinguish it from conventional data - managers. These features include inheritance, time - travel and non-atomic data values (array- and - set-valued attributes). - Examples in this section can also be found in - <filename>advance.sql</filename> in the tutorial directory. - (Refer to <xref linkend="QUERY"> for how to use it.) - </para> - - <sect1 id="inheritance"> + <chapter id="tutorial-advanced"> + <title>Advanced Features</title> + + <sect1 id="tutorial-advanced-intro"> + <title>Introduction</title> + + <para> + In the previous chapter we have covered the basics of using + <acronym>SQL</acronym> to store and access your data in a + <productname>PostgreSQL</productname>. We will now discuss some + more advanced features of <acronym>SQL</acronym> that simplify the + management and prevent loss or corruption of your data. Finally, + we will look at some <productname>PostgreSQL</productname> + extensions. + </para> + + <para> + This chapter will on occasion refer to examples found in <xref + linkend="tutorial-sql"> to change or improve them, so it will be + of advantage if you have read that chapter. Some examples from + this chapter can also be found in + <filename>advanced.sql</filename> in the tutorial directory. This + file also contains some example data to load, which is not + repeated here. (Refer to <xref linkend="tutorial-sql-intro"> for + how to use the file.) + </para> + </sect1> + + + <sect1 id="tutorial-views"> + <title>Views</title> + + <indexterm zone="tutorial-views"> + <primary>view</primary> + </indexterm> + + <para> + Refer back to the queries in <xref linkend="tutorial-join">. + Suppose the combined listing of weather records and city location + is of particular interest to your application, but you don't want + to type the query each time you need it. You can create a + <firstterm>view</firstterm> over the query, which gives a name to + the query that you can refer to like an ordinary table. + +<programlisting> +CREATE VIEW myview AS + SELECT city, temp_lo, temp_hi, prcp, date, location + FROM weather, cities + WHERE city = name; + +SELECT * FROM myview; +</programlisting> + </para> + + <para> + Making liberal use of views is a key aspect of good SQL database + design. Views allow you to encapsulate the details of the + structure of your tables, which may change as your application + evolves, behind consistent interfaces. + </para> + + <para> + Views can be used in almost any place a real table can be used. + Building views upon other views is not uncommon. + </para> + </sect1> + + + <sect1 id="tutorial-fk"> + <title>Foreign Keys</title> + + <indexterm zone="tutorial-fk"> + <primary>foreign key</primary> + </indexterm> + + <indexterm zone="tutorial-fk"> + <primary>referential integrity</primary> + </indexterm> + + <para> + Recall the <classname>weather</classname> and the + <classname>cities</classname> tables from <xref + linkend="tutorial-sql">. Consider the following problem: You + want to make sure that no one can insert rows in the + <classname>weather</classname> table that do not have a matching + entry in the <classname>cities</classname> table. This is called + maintaining the <firstterm>referential integrity</firstterm> of + your data. In simplistic database systems this would be + implemented (if at all) by first looking at the + <classname>cities</classname> table to check if a matching record + exists, and then inserting or rejecting the new + <classname>weather</classname> records. This approach has a + number of problems and is very inconvenient, so + <productname>PostgreSQL</productname> can do this for you. + </para> + + <para> + The new declaration of the tables would look like this: + +<programlisting> +CREATE TABLE cities ( + name varchar(80) primary key, + location point +); + +CREATE TABLE weather ( + city varchar(80) references weather, + temp_lo int, + temp_hi int, + prcp real, + date date +); + +</programlisting> + + Now try inserting an invalid record: + +<programlisting> +INSERT INTO weather VALUES ('Berkeley', 45, 53, 0.0, '1994-11-28'); +</programlisting> + +<screen> +ERROR: <unnamed> referential integrity violation - key referenced from weather not found in cities +</screen> + + </para> + + <para> + The behavior of foreign keys can be finely tuned to your + application. We will not go beyond this simple example in this + tutorial and refer you to the <citetitle>Reference + Manual</citetitle> for more information. Making correct use of + foreign keys will definitely improve the quality of your database + applications, so you are strongly encouraged to learn about them. + </para> + </sect1> + + + <sect1 id="tutorial-transactions"> + <title>Transactions</title> + + <comment>This section needs to be written.</comment> + + <para> + + </para> + </sect1> + + + <sect1 id="tutorial-inheritance"> <title>Inheritance</title> + <indexterm zone="tutorial-inheritance"> + <primary>inheritance</primary> + </indexterm> + + <para> + Inheritance is a concept from object-oriented databases. It opens + up interesting new possibilities of database design. + </para> + + <para> + Let's create two tables: A table <classname>cities</classname> + and a table <classname>capitals</classname>. Naturally, capitals + are also cities, so you want some way to show the capitals + implicitly when you list all cities. If you're really clever you + might invent some scheme like this: + +<programlisting> +CREATE TABLE capitals ( + name text, + population real, + altitude int, -- (in ft) + state char(2) +); + +CREATE TABLE non_capitals ( + name text, + population real, + altitude int -- (in ft) +); + +CREATE VIEW cities AS + SELECT name, population, altitude FROM capitals + UNION + SELECT name, population, altitude FROM non_capitals; +</programlisting> + + This works OK as far as querying goes, but it gets ugly when you + need to update several rows, to name one thing. + </para> + <para> - Let's create two tables. The capitals table contains - state capitals that are also cities. Naturally, the - capitals table should inherit from cities. + A better solution is this: - <programlisting> +<programlisting> CREATE TABLE cities ( name text, population real, @@ -36,245 +207,93 @@ CREATE TABLE cities ( CREATE TABLE capitals ( state char(2) ) INHERITS (cities); - </programlisting> - - In this case, a row of capitals <firstterm>inherits</firstterm> all - columns (name, population, and altitude) from its - parent, cities. The type of the column name is - <type>text</type>, a native <productname>Postgres</productname> - type for variable length - ASCII strings. The type of the column population is - <type>real</type>, a type for single precision - floating point numbers. State capitals have an extra - column, state, that shows their state. - In <productname>Postgres</productname>, - a table can inherit from zero or more other tables, - and a query can reference either all rows of a - table or all rows of a tables plus all of its - descendants. - - <note> - <para> - The inheritance hierarchy is a directed acyclic graph. - </para> - </note> +</programlisting> + + In this case, a row of <classname>capitals</classname> + <firstterm>inherits</firstterm> all columns (<structfield>name</>, + <structfield>population</>, and <structfield>altitude</>) from its + <firstterm>parent</firstterm>, <classname>cities</classname>. The + type of the column <structfield>name</structfield> is + <type>text</type>, a native <productname>Postgres</productname> + type for variable length character strings. State capitals have + an extra column, state, that shows their state. In + <productname>PostgreSQL</productname>, a table can inherit from + zero or more other tables. </para> <para> For example, the following query finds the names of all cities, including state capitals, that are located at an altitude - over 500ft: + over 500 ft.: - <programlisting> +<programlisting> SELECT name, altitude FROM cities WHERE altitude > 500; - </programlisting> +</programlisting> which returns: - <programlisting> -+----------+----------+ -|name | altitude | -+----------+----------+ -|Las Vegas | 2174 | -+----------+----------+ -|Mariposa | 1953 | -+----------+----------+ -|Madison | 845 | -+----------+----------+ - </programlisting> +<screen> + name | altitude +-----------+---------- + Las Vegas | 2174 + Mariposa | 1953 + Madison | 845 +(3 rows) +</screen> </para> <para> On the other hand, the following query finds all the cities that are not state capitals and - are situated at an altitude of 500ft or higher: + are situated at an altitude of 500 ft. or higher: - <programlisting> +<programlisting> SELECT name, altitude FROM ONLY cities WHERE altitude > 500; +</programlisting> -+----------+----------+ -|name | altitude | -+----------+----------+ -|Las Vegas | 2174 | -+----------+----------+ -|Mariposa | 1953 | -+----------+----------+ - </programlisting> +<screen> + name | altitude +-----------+---------- + Las Vegas | 2174 + Mariposa | 1953 +(2 rows) +</screen> </para> <para> - Here the <quote>ONLY</quote> before cities indicates that the query should - be run over only the cities table, and not tables below cities in the - inheritance hierarchy. Many of the commands that we - have already discussed -- <command>SELECT</command>, - <command>UPDATE</command> and <command>DELETE</command> -- - support this <quote>ONLY</quote> notation. + Here the <literal>ONLY</literal> before <literal>cities</literal> + indicates that the query should be run over only the + <classname>cities</classname> table, and not tables below + <classname>cities</classname> in the inheritance hierarchy. Many + of the commands that we have already discussed -- + <command>SELECT</command>, <command>UPDATE</command> and + <command>DELETE</command> -- support this <literal>ONLY</literal> + notation. </para> - - <note> - <title>Deprecated</title> - <para> - In previous versions of <productname>Postgres</productname>, the - default was not to get access to child tables. This was found to - be error prone and is also in violation of SQL99. Under the old - syntax, to get the sub-tables you append "*" to the table name. - For example -<programlisting> -SELECT * from cities*; -</programlisting> - You can still explicitly specify scanning child tables by appending - "*", as well as explicitly specify not scanning child tables by - writing <quote>ONLY</quote>. But beginning in version 7.1, the default - behavior for an undecorated table name is to scan its child tables - too, whereas before the default was not to do so. To get the old - default behavior, set the configuration option - <literal>SQL_Inheritance</literal> to off, e.g., -<programlisting> -SET SQL_Inheritance TO OFF; -</programlisting> - or add a line in your <filename>postgresql.conf</filename> file. - </para> - </note> </sect1> - <sect1 id="non-atomic-values"> - <title>Non-Atomic Values</title> + <sect1 id="tutorial-conclusion"> + <title>Conclusion</title> + <para> - One of the tenets of the relational model is that the - columns of a table are atomic. - <productname>Postgres</productname> does not - have this restriction; columns can themselves contain - sub-values that can be accessed from the query - language. For example, you can create columns that - are arrays of base types. + <productname>PostgreSQL</productname> has many features not + touched upon in this tutorial introduction, which has been + oriented toward newer users of <acronym>SQL</acronym>. These + features are discussed in more detail in both the + <citetitle>User's Guide</citetitle> and the + <citetitle>Programmer's Guide</citetitle>. </para> - <sect2> - <title>Arrays</title> - - <para> - <productname>Postgres</productname> allows columns of a - row to be defined - as fixed-length or variable-length multi-dimensional - arrays. Arrays of any base type or user-defined type - can be created. To illustrate their use, we first create a - table with arrays of base types. - - <programlisting> -CREATE TABLE SAL_EMP ( - name text, - pay_by_quarter integer[], - schedule text[][] -); - </programlisting> - </para> - - <para> - The above query will create a table named SAL_EMP with - a <firstterm>text</firstterm> string (name), a one-dimensional - array of <firstterm>integer</firstterm> - (pay_by_quarter), which represents the employee's - salary by quarter and a two-dimensional array of - <firstterm>text</firstterm> - (schedule), which represents the employee's weekly - schedule. Now we do some <firstterm>INSERT</firstterm>s; - note that when - appending to an array, we enclose the values within - braces and separate them by commas. If you know - <firstterm>C</firstterm>, - this is not unlike the syntax for initializing structures. - - <programlisting> -INSERT INTO SAL_EMP - VALUES ('Bill', - '{10000, 10000, 10000, 10000}', - '{{"meeting", "lunch"}, {}}'); - -INSERT INTO SAL_EMP - VALUES ('Carol', - '{20000, 25000, 25000, 25000}', - '{{"talk", "consult"}, {"meeting"}}'); - </programlisting> - - By default, <productname>Postgres</productname> uses the - "one-based" numbering - convention for arrays -- that is, an array of n elements - starts with array[1] and ends with array[n]. - Now, we can run some queries on SAL_EMP. First, we - show how to access a single element of an array at a - time. This query retrieves the names of the employees - whose pay changed in the second quarter: - - <programlisting> -SELECT name - FROM SAL_EMP - WHERE SAL_EMP.pay_by_quarter[1] <> - SAL_EMP.pay_by_quarter[2]; - -+------+ -|name | -+------+ -|Carol | -+------+ - </programlisting> - </para> - - <para> - This query retrieves the third quarter pay of all - employees: - - <programlisting> -SELECT SAL_EMP.pay_by_quarter[3] FROM SAL_EMP; - - -+---------------+ -|pay_by_quarter | -+---------------+ -|10000 | -+---------------+ -|25000 | -+---------------+ - </programlisting> - </para> - - <para> - We can also access arbitrary slices of an array (subarrays) - by specifying both lower and upper bounds for - each subscript. This query retrieves the first item on - Bill's schedule for the first two days of the week. - - <programlisting> -SELECT SAL_EMP.schedule[1:2][1:1] - FROM SAL_EMP - WHERE SAL_EMP.name = 'Bill'; - -+-------------------+ -|schedule | -+-------------------+ -|{{"meeting"},{""}} | -+-------------------+ - </programlisting> - </para> - </sect2> - </sect1> - - <sect1 id="more-advanced"> - <title>More Advanced Features</title> - <para> - <productname>Postgres</productname> has many features not touched - upon in this - tutorial introduction, which has been oriented toward newer users of - <acronym>SQL</acronym>. - These are discussed in more detail in both the User's and - Programmer's Guides. + If you feel you need more introductory material, please visit the + <ulink url="http://www.postgresql.org">PostgreSQL web + site</ulink> for links to more resources. </para> - </sect1> </chapter> diff --git a/doc/src/sgml/arch.sgml b/doc/src/sgml/arch.sgml deleted file mode 100644 index d420fab6dd9..00000000000 --- a/doc/src/sgml/arch.sgml +++ /dev/null @@ -1,88 +0,0 @@ -<Chapter Id="arch"> - <TITLE>Architecture</TITLE> - -<Sect1 id="arch-concepts"> -<Title><ProductName>Postgres</ProductName> Architectural Concepts</Title> - -<Para> - Before we begin, you should understand the basic - <ProductName>Postgres</ProductName> system architecture. Understanding how the - parts of <ProductName>Postgres</ProductName> interact will make the next chapter - somewhat clearer. - In database jargon, <ProductName>Postgres</ProductName> uses a simple "process - per-user" client/server model. A <ProductName>Postgres</ProductName> session - consists of the following cooperating Unix processes (programs): -</Para> - -<ItemizedList> -<ListItem> -<Para> - A supervisory daemon process (<Application>postmaster</Application>), -</Para> -</ListItem> -<ListItem> -<Para> - the user's frontend application (e.g., the <Application>psql</Application> program), and -</Para> -</ListItem> -<ListItem> -<Para> - the one or more backend database servers (the <Application>postgres</Application> process itself). -</Para> -</ListItem> -</ItemizedList> - -<Para> - A single <Application>postmaster</Application> manages a given collection of - databases on a single host. Such a collection of - databases is called a cluster (of databases). Frontend - applications that wish to access a given database - within a cluster make calls to the library. - The library sends user requests over the network to the - <Application>postmaster</Application> (<XRef LinkEnd="ARCH-CLIENTSERVER">), - which in turn starts a new backend server process - -<Figure Id="ARCH-CLIENTSERVER"> -<Title>How a connection is established</Title> -<Graphic Align="center" FileRef="clientserver.gif" Format="GIF"></Graphic> -</Figure> - - and connects the - frontend process to the new server. From - that point on, the frontend process and the backend - server communicate without intervention by the - <Application>postmaster</Application>. Hence, the <Application>postmaster</Application> is always running, waiting - for requests, whereas frontend and backend processes - come and go. -</Para> - -<Para> - The <FileName>libpq</FileName> library allows a single - frontend to make multiple connections to backend processes. - However, the frontend application is still a - single-threaded process. Multithreaded frontend/backend - connections are not currently supported in <FileName>libpq</FileName>. - One implication of this architecture is that the - <Application>postmaster</Application> and the backend always run on the same - machine (the database server), while the frontend - application may run anywhere. You should keep this - in mind, - because the files that can be accessed on a client - machine may not be accessible (or may only be accessed - using a different filename) on the database server - machine. -</Para> - -<Para> - You should also be aware that the <Application>postmaster</Application> and - postgres servers run with the user-id of the <ProductName>Postgres</ProductName> - "superuser." Note that the <ProductName>Postgres</ProductName> superuser does not - have to be a special user (e.g., a user named - "postgres"). Furthermore, the <ProductName>Postgres</ProductName> superuser - should - definitely not be the Unix superuser ("root")! In any - case, all files relating to a database should belong to - this <ProductName>Postgres</ProductName> superuser. -</Para> -</sect1> -</Chapter> diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 4b613db8e5b..7f81f6cdca6 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -1,7 +1,8 @@ -<!-- $Header: /cvsroot/pgsql/doc/src/sgml/filelist.sgml,v 1.14 2001/08/26 21:17:12 tgl Exp $ --> +<!-- $Header: /cvsroot/pgsql/doc/src/sgml/filelist.sgml,v 1.15 2001/09/02 23:27:49 petere Exp $ --> <!entity history SYSTEM "history.sgml"> <!entity info SYSTEM "info.sgml"> +<!entity intro SYSTEM "intro.sgml"> <!entity legal SYSTEM "legal.sgml"> <!entity notation SYSTEM "notation.sgml"> <!entity problems SYSTEM "problems.sgml"> @@ -12,12 +13,12 @@ <!-- tutorial --> <!entity advanced SYSTEM "advanced.sgml"> -<!entity arch SYSTEM "arch.sgml"> -<!entity intro SYSTEM "intro.sgml"> <!entity query SYSTEM "query.sgml"> -<!entity sql SYSTEM "sql.sgml"> <!entity start SYSTEM "start.sgml"> +<!-- currently unused, but contains some interesting information --> +<!entity sql SYSTEM "sql.sgml"> + <!-- user's guide --> <!entity array SYSTEM "array.sgml"> <!entity datatype SYSTEM "datatype.sgml"> diff --git a/doc/src/sgml/info.sgml b/doc/src/sgml/info.sgml index 195e56e7f36..608611777ad 100644 --- a/doc/src/sgml/info.sgml +++ b/doc/src/sgml/info.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/info.sgml,v 1.11 2001/02/03 19:03:26 petere Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/info.sgml,v 1.12 2001/09/02 23:27:49 petere Exp $ --> <sect1 id="resources"> @@ -13,7 +13,7 @@ $Header: /cvsroot/pgsql/doc/src/sgml/info.sgml,v 1.11 2001/02/03 19:03:26 petere <term>Tutorial</term> <listitem> <para> - An introduction for new users. Does not cover advanced features. + An informal introduction for new users </para> </listitem> </varlistentry> diff --git a/doc/src/sgml/query.sgml b/doc/src/sgml/query.sgml index 82c4ffe697f..04fcce1985d 100644 --- a/doc/src/sgml/query.sgml +++ b/doc/src/sgml/query.sgml @@ -1,102 +1,106 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/query.sgml,v 1.17 2001/01/13 23:58:55 petere Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/query.sgml,v 1.18 2001/09/02 23:27:49 petere Exp $ --> - <chapter id="query"> - <title>The Query Language</title> - - <para> - The <productname>Postgres</productname> query language is a variant of - the <acronym>SQL</acronym> standard. It - has many extensions to <acronym>SQL</acronym> such as an - extensible type system, - inheritance, functions and production rules. These are - features carried over from the original - <productname>Postgres</productname> query - language, <productname>PostQuel</productname>. - This section provides an overview - of how to use <productname>Postgres</productname> - <acronym>SQL</acronym> to perform simple operations. - This manual is only intended to give you an idea of our - flavor of <acronym>SQL</acronym> and is in no way a complete tutorial on - <acronym>SQL</acronym>. Numerous books have been written on - <acronym>SQL92</acronym>, including - <xref linkend="MELT93" endterm="MELT93"> and - <xref linkend="DATE97" endterm="DATE97">. - You should be aware that some language features - are extensions to the standard. - </para> - - <sect1 id="query-psql"> - <title>Interactive Monitor</title> - - <para> - In the examples that follow, we assume that you have - created the mydb database as described in the previous - subsection and have started <application>psql</application>. - Examples in this manual can also be found in source distribution - in the directory <filename>src/tutorial/</filename>. Refer to the - <filename>README</filename> file in that directory for how to use them. To - start the tutorial, do the following: + <chapter id="tutorial-sql"> + <title>The <acronym>SQL</acronym> Language</title> + + <sect1 id="tutorial-sql-intro"> + <title>Introduction</title> + + <para> + This chapter provides an overview of how to use + <acronym>SQL</acronym> to perform simple operations. This + tutorial is only intended to give you an introduction and is in no + way a complete tutorial on <acronym>SQL</acronym>. Numerous books + have been written on <acronym>SQL92</acronym>, including <xref + linkend="MELT93" endterm="MELT93"> and <xref linkend="DATE97" + endterm="DATE97">. You should be aware that some language + features are extensions to the standard. + </para> + + <para> + In the examples that follow, we assume that you have created a + database named <quote>mydb</quote>, as described in the previous + chapter, and have started <application>psql</application>. + </para> + + <para> + Examples in this manual can also be found in source distribution + in the directory <filename>src/tutorial/</filename>. Refer to the + <filename>README</filename> file in that directory for how to use + them. To start the tutorial, do the following: <screen> -<prompt>$</prompt> <userinput>cd <replaceable>...</replaceable>/src/tutorial</userinput> +<prompt>$</prompt> <userinput>cd <replaceable>....</replaceable>/src/tutorial</userinput> <prompt>$</prompt> <userinput>psql -s mydb</userinput> <computeroutput> -Welcome to the POSTGRESQL interactive sql monitor: - Please read the file COPYRIGHT for copyright terms of POSTGRESQL - - type \? for help on slash commands - type \q to quit - type \g or terminate with semicolon to execute query - You are currently connected to the database: postgres +... </computeroutput> <prompt>mydb=></prompt> <userinput>\i basics.sql</userinput> </screen> + + The <literal>\i</literal> command reads in commands from the + specified files. The <literal>-s</literal> option puts you in + single step mode which pauses before sending a query to the + server. The commands used in this section are in the file + <filename>basics.sql</filename>. </para> + </sect1> + + + <sect1 id="tutorial-concepts"> + <title>Concepts</title> <para> - The <literal>\i</literal> command read in queries from the specified - files. The <literal>-s</literal> option puts you in single step mode which - pauses before sending a query to the backend. Queries - in this section are in the file <filename>basics.sql</filename>. + <indexterm><primary>relational database</primary></indexterm> + <indexterm><primary>hierarchical database</primary></indexterm> + <indexterm><primary>object-oriented database</primary></indexterm> + <indexterm><primary>relation</primary></indexterm> + <indexterm><primary>table</primary></indexterm> + + <productname>PostgreSQL</productname> is a <firstterm>relational + database management system</firstterm> (<acronym>RDBMS</acronym>). + That means it is a system for managing data stored in + <firstterm>relations</firstterm>. Relation is essentially a + mathematical term for <firstterm>table</firstterm>. The notion of + storing data in tables is so commonplace today that it might + seem inherently obvious, but there are a number of other ways of + organizing databases. Files and directories on Unix-like + operating systems form an example of a hierarchical database. A + more modern development is the object-oriented database. </para> <para> - <application>psql</application> - has a variety of <literal>\d</literal> commands for showing system information. - Consult these commands for more details; - for a listing, type <literal>\?</literal> at the <application>psql</application> prompt. + <indexterm><primary>row</primary></indexterm> + <indexterm><primary>column</primary></indexterm> + + Each table is a named collection of <firstterm>rows</firstterm>. + Each row has the same set of named <firstterm>columns</firstterm>, + and each column is of a specific data type. Whereas columns have + a fixed order in each row, it is important to remember that SQL + does not guarantee the order of the rows within the table in any + way (unless they are explicitly sorted). </para> - </sect1> - - <sect1 id="query-concepts"> - <title>Concepts</title> <para> - The fundamental notion in <productname>Postgres</productname> is - that of a <firstterm>table</firstterm>, which is a named - collection of <firstterm>rows</firstterm>. Each row has the same - set of named <firstterm>columns</firstterm>, and each column is of - a specific type. Furthermore, each row has a permanent - <firstterm>object identifier</firstterm> (<acronym>OID</acronym>) - that is unique throughout the database cluster. Historially, - tables have been called classes in - <productname>Postgres</productname>, rows are object instances, - and columns are attributes. This makes sense if you consider the - object-relational aspects of the database system, but in this - manual we will use the customary <acronym>SQL</acronym> - terminology. As previously discussed, - tables are grouped into databases, and a collection of databases - managed by a single <application>postmaster</application> process - constitutes a database cluster. + <indexterm><primary>cluster</primary></indexterm> + + Tables are grouped into databases, and a collection of databases + managed by a single <productname>PostgreSQL</productname> server + instance constitutes a database <firstterm>cluster</firstterm>. </para> </sect1> - <sect1 id="query-table"> + + <sect1 id="tutorial-table"> <title>Creating a New Table</title> + <indexterm zone="tutorial-table"> + <primary>CREATE TABLE</primary> + </indexterm> + <para> You can create a new table by specifying the table name, along with all column names and their types: @@ -110,39 +114,82 @@ CREATE TABLE weather ( date date ); </programlisting> + + You can enter this into <command>psql</command> with the line + breaks. <command>psql</command> will recognize that the command + is not terminated until the semicolon. + </para> + + <para> + White space (i.e., spaces, tabs, and newlines) may be used freely + in SQL commands. That means you can type the command aligned + differently than above, or even all on one line. Two dashes + (<quote><literal>--</literal></quote>) introduce comments. + Whatever follows them is ignored up to the end of the line. SQL + is also case insensitive about key words and identifiers, except + when identifiers are double-quoted to preserve the case (not done + above). + </para> + + <para> + <type>varchar(80)</type> specifies a data type that can store + arbitrary character strings up to 80 characters in length. + <type>int</type> is the normal integer type. <type>real</type> is + a type for storing single precision floating point numbers. + <type>date</type> should be self-explanatory. (Yes, the column of + type <type>date</type> is also named <literal>date</literal>. + This may be convenient or confusing -- you choose.) </para> <para> - Note that both keywords and identifiers are case-insensitive; - identifiers can preserve case by surrounding them with - double-quotes as allowed - by <acronym>SQL92</acronym>. - <productname>Postgres</productname> <acronym>SQL</acronym> - supports the usual + <productname>PostgreSQL</productname> supports the usual <acronym>SQL</acronym> types <type>int</type>, - <type>float</type>, <type>real</type>, <type>smallint</type>, -<type>char(N)</type>, - <type>varchar(N)</type>, <type>date</type>, <type>time</type>, - and <type>timestamp</type>, as well as other types of general utility and - a rich set of geometric types. As we will - see later, <productname>Postgres</productname> can be customized - with an - arbitrary number of - user-defined data types. Consequently, type names are - not syntactical keywords, except where required to support special - cases in the <acronym>SQL92</acronym> standard. - So far, the <productname>Postgres</productname> - <command>CREATE</command> command - looks exactly like - the command used to create a table in a traditional - relational system. However, we will presently see that - tables have properties that are extensions of the - relational model. + <type>smallint</type>, <type>real</type>, <type>double + precision</type>, <type>char(<replaceable>N</>)</type>, + <type>varchar(<replaceable>N</>)</type>, <type>date</type>, + <type>time</type>, <type>timestamp</type>, and + <type>interval</type> as well as other types of general utility + and a rich set of geometric types. + <productname>PostgreSQL</productname> can be customized with an + arbitrary number of user-defined data types. Consequently, type + names are not syntactical keywords, except where required to + support special cases in the <acronym>SQL</acronym> standard. + </para> + + <para> + The second example will store cities and their associated + geographical location: +<programlisting> +CREATE TABLE cities ( + name varchar(80), + location point +); +</programlisting> + The <type>point</type> type is such a + <productname>PostgreSQL</productname>-specific data type. + </para> + + <para> + <indexterm> + <primary>DROP TABLE</primary> + </indexterm> + + Finally, it should be mentioned that if you don't need a table any + longer or want to recreate it differently you can remove it using + the following command: +<synopsis> +DROP TABLE <replaceable>tablename</replaceable>; +</synopsis> </para> </sect1> - <sect1 id="query-populate"> - <title>Populating a Table with Rows</title> + + <sect1 id="tutorial-populate"> + <title>Populating a Table With Rows</title> + + <indexterm zone="tutorial-populate"> + <primary>INSERT</primary> + </indexterm> <para> The <command>INSERT</command> statement is used to populate a table with @@ -151,129 +198,184 @@ CREATE TABLE weather ( <programlisting> INSERT INTO weather VALUES ('San Francisco', 46, 50, 0.25, '1994-11-27'); </programlisting> + + Note that all data types use rather obvious input formats. The + <type>date</type> column is actually quite flexible in what it + accepts, but for this tutorial we will stick to the unambiguous + format shown here. </para> <para> - You can also use <command>COPY</command> to load large - amounts of data from flat (<acronym>ASCII</acronym>) files. - This is usually faster because the data is read (or written) as a - single atomic - transaction directly to or from the target table. An example would be: + The <type>point</type> type requires a coordinate pair as input, + as shown here: +<programlisting> +INSERT INTO cities VALUES ('San Francisco', '(-194.0, 53.0)'); +</programlisting> + </para> + <para> + The syntax used so far requires you to remember the order of the + columns. An alternative syntax allows you to list the columns + explicitly: <programlisting> -COPY weather FROM '/home/user/weather.txt' USING DELIMITERS '|'; +INSERT INTO weather (city, temp_lo, temp_hi, prcp, date) + VALUES ('San Francisco', 43, 57, 0.0, '1994-11-29'); +</programlisting> + You can also list the columns in a different order if you wish or + even omit some columns, e.g., unknown precipitation: +<programlisting> +INSERT INTO weather (date, city, temp_hi, temp_lo) + VALUES ('1994-11-29', 'Hayward', 54, 37); +</programlisting> + Many developers consider explicitly listing the columns better + style than relying on the order implicitly. + </para> + + <para> + Please enter all the commands shown above so you have some data to + work with in the following sections. + </para> + + <para> + <indexterm> + <primary>COPY</primary> + </indexterm> + + You could also have used <command>COPY</command> to load large + amounts of data from flat text files. This is usually faster + because the <command>COPY</command> is optimized for this + application while allowing less flexibility than + <command>INSERT</command>. An example would be: + +<programlisting> +COPY weather FROM '/home/user/weather.txt'; </programlisting> where the path name for the source file must be available to the - backend server - machine, not the client, since the backend server reads the file directly. + backend server machine, not the client, since the backend server + reads the file directly. You can read more about the + <command>COPY</command> command in the <citetitle>Reference + Manual</citetitle>. </para> </sect1> - <sect1 id="query-query"> + + <sect1 id="tutorial-select"> <title>Querying a Table</title> <para> - The <classname>weather</classname> table can be queried with normal relational - selection and projection queries. A <acronym>SQL</acronym> - <command>SELECT</command> - statement is used to do this. The statement is divided into - a target list (the part that lists the columns to be - returned) and a qualification (the part that specifies - any restrictions). For example, to retrieve all the - rows of weather, type: + <indexterm><primary>query</primary></indexterm> + <indexterm><primary>SELECT</primary></indexterm> + + To retrieve data from a table it is + <firstterm>queried</firstterm>. An <acronym>SQL</acronym> + <command>SELECT</command> statement is used to do this. The + statement is divided into a select list (the part that lists the + columns to be returned), a table list (the part that lists the + tables from which to retrieve the data), and an optional + qualification (the part that specifies any restrictions). For + example, to retrieve all the rows of + <classname>weather</classname>, type: <programlisting> SELECT * FROM weather; </programlisting> + (where <literal>*</literal> means <quote>all columns</quote>) and + the output should be: +<screen> + city | temp_lo | temp_hi | prcp | date +---------------+---------+---------+------+------------ + San Francisco | 46 | 50 | 0.25 | 1994-11-27 + San Francisco | 43 | 57 | 0 | 1994-11-29 + Hayward | 37 | 54 | | 1994-11-29 +(3 rows) +</screen> + </para> - and the output should be: -<programlisting> -+--------------+---------+---------+------+------------+ -|city | temp_lo | temp_hi | prcp | date | -+--------------+---------+---------+------+------------+ -|San Francisco | 46 | 50 | 0.25 | 1994-11-27 | -+--------------+---------+---------+------+------------+ -|San Francisco | 43 | 57 | 0 | 1994-11-29 | -+--------------+---------+---------+------+------------+ -|Hayward | 37 | 54 | | 1994-11-29 | -+--------------+---------+---------+------+------------+ -</programlisting> - You may specify any arbitrary expressions in the target list. For + <para> + You may specify any arbitrary expressions in the target list. For example, you can do: <programlisting> SELECT city, (temp_hi+temp_lo)/2 AS temp_avg, date FROM weather; </programlisting> + This should give: +<screen> + city | temp_avg | date +---------------+----------+------------ + San Francisco | 48 | 1994-11-27 + San Francisco | 50 | 1994-11-29 + Hayward | 45 | 1994-11-29 +(3 rows) +</screen> + Notice how the <literal>AS</literal> clause is used to relabel the + output column. (It is optional.) </para> <para> - Arbitrary Boolean operators - (<command>AND</command>, <command>OR</command> and - <command>NOT</command>) are - allowed in the qualification of any query. For example, + Arbitrary Boolean operators (<literal>AND</literal>, + <literal>OR</literal>, and <literal>NOT</literal>) are allowed in + the qualification of a query. For example, the following + retrieves the weather of San Francisco on rainy days: <programlisting> SELECT * FROM weather WHERE city = 'San Francisco' AND prcp > 0.0; </programlisting> -results in: -<programlisting> -+--------------+---------+---------+------+------------+ -|city | temp_lo | temp_hi | prcp | date | -+--------------+---------+---------+------+------------+ -|San Francisco | 46 | 50 | 0.25 | 1994-11-27 | -+--------------+---------+---------+------+------------+ -</programlisting> + Result: +<screen> + city | temp_lo | temp_hi | prcp | date +---------------+---------+---------+------+------------ + San Francisco | 46 | 50 | 0.25 | 1994-11-27 +(1 row) +</screen> </para> <para> - As a final note, you can specify that the results of a - select can be returned in a <firstterm>sorted order</firstterm> - or with duplicate rows removed. + <indexterm><primary>ORDER BY</primary></indexterm> + <indexterm><primary>DISTINCT</primary></indexterm> + <indexterm><primary>duplicate</primary></indexterm> + + As a final note, you can request that the results of a select can + be returned in sorted order or with duplicate rows removed. (Just + to make sure the following won't confuse you, + <literal>DISTINCT</literal> and <literal>ORDER BY</literal> can be + used separately.) <programlisting> SELECT DISTINCT city FROM weather ORDER BY city; </programlisting> - </para> - </sect1> - - <sect1 id="query-selectinto"> - <title>Redirecting SELECT Queries</title> - - <para> - Any <command>SELECT</command> query can be redirected to a new table -<programlisting> -SELECT * INTO TABLE temp FROM weather; -</programlisting> - </para> - <para> - This forms an implicit <command>CREATE</command> command, creating a new - table temp with the column names and types specified - in the target list of the <command>SELECT INTO</command> command. We can - then, of course, perform any operations on the resulting - table that we can perform on other tables. +<screen> + city +--------------- + Hayward + San Francisco +(2 rows) +</screen> </para> </sect1> - <sect1 id="query-join"> + + <sect1 id="tutorial-join"> <title>Joins Between Tables</title> + <indexterm zone="tutorial-join"> + <primary>join</primary> + </indexterm> + <para> - Thus far, our queries have only accessed one table at a - time. Queries can access multiple tables at once, or - access the same table in such a way that multiple - rows of the table are being processed at the same - time. A query that accesses multiple rows of the - same or different tables at one time is called a join - query. - As an example, say we wish to find all the records that - are in the temperature range of other records. In - effect, we need to compare the temp_lo and temp_hi - columns of each WEATHER row to the temp_lo and - temp_hi columns of all other WEATHER columns. + Thus far, our queries have only accessed one table at a time. + Queries can access multiple tables at once, or access the same + table in such a way that multiple rows of the table are being + processed at the same time. A query that accesses multiple rows + of the same or different tables at one time is called a + <firstterm>join</firstterm> query. As an example, say you wish to + list all the weather records together with the location of the + associated city. In effect, we need to compare the city column of + each row of the weather table with the name column of all rows in + the cities table. <note> <para> This is only a conceptual model. The actual join may @@ -281,102 +383,189 @@ SELECT * INTO TABLE temp FROM weather; to the user. </para> </note> - - We can do this with the following query: + This would be accomplished by the following query: <programlisting> -SELECT W1.city, W1.temp_lo AS low, W1.temp_hi AS high, - W2.city, W2.temp_lo AS low, W2.temp_hi AS high - FROM weather W1, weather W2 - WHERE W1.temp_lo < W2.temp_lo - AND W1.temp_hi > W2.temp_hi; +SELECT * + FROM weather, cities + WHERE city = name; +</programlisting> -+--------------+-----+------+---------------+-----+------+ -|city | low | high | city | low | high | -+--------------+-----+------+---------------+-----+------+ -|San Francisco | 43 | 57 | San Francisco | 46 | 50 | -+--------------+-----+------+---------------+-----+------+ -|San Francisco | 37 | 54 | San Francisco | 46 | 50 | -+--------------+-----+------+---------------+-----+------+ -</programlisting> +<screen> + city | temp_lo | temp_hi | prcp | date | name | location +---------------+---------+---------+------+------------+---------------+----------- + San Francisco | 46 | 50 | 0.25 | 1994-11-27 | San Francisco | (-194,53) + San Francisco | 43 | 57 | 0 | 1994-11-29 | San Francisco | (-194,53) +(2 rows) +</screen> - <note> - <para> - The semantics of such a join are - that the qualification - is a truth expression defined for the Cartesian product of - the tables indicated in the query. For those rows in - the Cartesian product for which the qualification is true, - <productname>Postgres</productname> computes and returns the - values specified in the target list. - <productname>Postgres</productname> <acronym>SQL</acronym> - does not assign any meaning to - duplicate values in such expressions. - This means that <productname>Postgres</productname> - sometimes recomputes the same target list several times; - this frequently happens when Boolean expressions are connected - with an "or". To remove such duplicates, you must use - the <command>SELECT DISTINCT</command> statement. - </para> - </note> </para> <para> - In this case, both <literal>W1</literal> and - <literal>W2</literal> are surrogates for a - row of the table weather, and both range over all - rows of the table. (In the terminology of most - database systems, <literal>W1</literal> and <literal>W2</literal> - are known as <firstterm>range variables</firstterm>.) - A query can contain an arbitrary number of - table names and surrogates. + Observe two things about the result set: + <itemizedlist> + <listitem> + <para> + There is no result row for the city of Hayward. This is + because there is no matching entry in the + <classname>cities</classname> table for Hayward, so the join + cannot process the rows in the weather table. We will see + shortly how this can be fixed. + </para> + </listitem> + + <listitem> + <para> + There are two columns containing the city name. This is + correct because the lists of columns of the + <classname>weather</classname> and the + <classname>cities</classname> tables are concatenated. In + practice this is undesirable, though, so you will probably want + to list the output columns explicitly rather than using + <literal>*</literal>: +<programlisting> +SELECT city, temp_lo, temp_hi, prcp, date, location + FROM weather, cities + WHERE city = name; +</programlisting> + </para> + </listitem> + </itemizedlist> </para> - </sect1> - <sect1 id="query-update"> - <title>Updates</title> + <formalpara> + <title>Exercise:</title> + + <para> + Attempt to find out the semantics of this query when the + <literal>WHERE</literal> clause is omitted. + </para> + </formalpara> <para> - You can update existing rows using the - <command>UPDATE</command> command. - Suppose you discover the temperature readings are - all off by 2 degrees as of Nov 28, you may update the - data as follow: + Since the columns all had different names, the parser + automatically found out which table they belong to, but it is good + style to fully qualify column names in join queries: <programlisting> -UPDATE weather - SET temp_hi = temp_hi - 2, temp_lo = temp_lo - 2 - WHERE date > '1994-11-28'; +SELECT weather.city, weather.temp_lo, weather.temp_hi, weather.prcp, weather.date, cities.location + FROM weather, cities + WHERE cities.name = weather.city; </programlisting> </para> - </sect1> - - <sect1 id="query-delete"> - <title>Deletions</title> <para> - Deletions are performed using the <command>DELETE</command> command: + Join queries of the kind seen thus far can also be written in this + alternative form: + <programlisting> -DELETE FROM weather WHERE city = 'Hayward'; +SELECT * + FROM weather INNER JOIN cities ON (weather.city = cities.name); </programlisting> - All weather recording belonging to Hayward are removed. - One should be wary of queries of the form + This syntax is not as commonly used as the one above, but we show + it here to help you understand the following topics. + </para> + + <para> + <indexterm><primary>join</primary><secondary>outer</secondary></indexterm> + + Now we will figure out how we can get the Hayward records back in. + What we want the query to do is to scan the + <classname>weather</classname> table and for each row to find the + matching <classname>cities</classname> row. If no matching row is + found we want some <quote>empty values</quote> to be substituted + for the <classname>cities</classname> table's columns. This kind + of query is called an <firstterm>outer join</firstterm>. (The + joins we have seen to far are inner joins.) The command looks + like this: + <programlisting> -DELETE FROM <replaceable>tablename</replaceable>; +SELECT * + FROM weather LEFT OUTER JOIN cities ON (weather.city = cities.name); + + city | temp_lo | temp_hi | prcp | date | name | location +---------------+---------+---------+------+------------+---------------+----------- + Hayward | 37 | 54 | | 1994-11-29 | | + San Francisco | 46 | 50 | 0.25 | 1994-11-27 | San Francisco | (-194,53) + San Francisco | 43 | 57 | 0 | 1994-11-29 | San Francisco | (-194,53) +(3 rows) </programlisting> - Without a qualification, <command>DELETE</command> will simply - remove all rows from the given table, leaving it - empty. The system will not request confirmation before - doing this. + In particular, this query is a <firstterm>left outer + join</firstterm> because the table mentioned on the left of the + join operator will have each of its rows in the output at least + once, whereas the table on the right will only have those rows + output that match some row of the left table, and will have empty + values substituted appropriately. + </para> + + <formalpara> + <title>Exercise:</title> + + <para> + There are also right outer joins and full outer joins. Try to + find out what those do. + </para> + </formalpara> + + <para> + <indexterm><primary>join</primary><secondary>self</secondary></indexterm> + <indexterm><primary>alias</primary><secondary>for table name in query</secondary></indexterm> + + We can also join a table against itself. This is called a + <firstterm>self join</firstterm>. As an example, suppose we wish + to find all the weather records that are in the temperature range + of other weather records. So we need to compare the + <structfield>temp_lo</> and <structfield>temp_hi</> columns of + each <classname>weather</classname> row to the + <structfield>temp_lo</structfield> and + <structfield>temp_hi</structfield> columns of all other + <classname>weather</classname> rows. We can do this with the + following query: + +<programlisting> +SELECT W1.city, W1.temp_lo AS low, W1.temp_hi AS high, + W2.city, W2.temp_lo AS low, W2.temp_hi AS high + FROM weather W1, weather W2 + WHERE W1.temp_lo < W2.temp_lo + AND W1.temp_hi > W2.temp_hi; + + city | low | high | city | low | high +---------------+-----+------+---------------+-----+------ + San Francisco | 43 | 57 | San Francisco | 46 | 50 + Hayward | 37 | 54 | San Francisco | 46 | 50 +(2 rows) +</programlisting> + + Here we have relabeled the weather table as <literal>W1</> and + <literal>W2</> to be able to distinguish the left and right side + of the join. You can also use these kinds of aliases in other + queries to save some typing, e.g.: +<programlisting> +SELECT * + FROM weather w, cities c + WHERE w.city = c.name; +</programlisting> + You will encounter this style of abbreviating quite frequently. </para> </sect1> - <sect1 id="query-agg"> - <title>Using Aggregate Functions</title> + + <sect1 id="tutorial-agg"> + <title>Aggregate Functions</title> + + <indexterm zone="tutorial-agg"> + <primary>aggregate</primary> + </indexterm> <para> + <indexterm><primary>average</primary></indexterm> + <indexterm><primary>count</primary></indexterm> + <indexterm><primary>max</primary></indexterm> + <indexterm><primary>min</primary></indexterm> + <indexterm><primary>sum</primary></indexterm> + Like most other relational database products, <productname>PostgreSQL</productname> supports aggregate functions. @@ -387,95 +576,215 @@ DELETE FROM <replaceable>tablename</replaceable>; <function>min</function> (minimum) over a set of rows. </para> - <para> - It is important to understand the interaction between aggregates and - SQL's <command>WHERE</command> and <command>HAVING</command> clauses. - The fundamental difference between <command>WHERE</command> and - <command>HAVING</command> is this: <command>WHERE</command> selects - input rows before groups and aggregates are computed (thus, it controls - which rows go into the aggregate computation), whereas - <command>HAVING</command> selects group rows after groups and - aggregates are computed. Thus, the - <command>WHERE</command> clause may not contain aggregate functions; - it makes no sense to try to use an aggregate to determine which rows - will be inputs to the aggregates. On the other hand, - <command>HAVING</command> clauses always contain aggregate functions. - (Strictly speaking, you are allowed to write a <command>HAVING</command> - clause that doesn't use aggregates, but it's wasteful; the same condition - could be used more efficiently at the <command>WHERE</command> stage.) - </para> - <para> As an example, we can find the highest low-temperature reading anywhere with - <programlisting> +<programlisting> SELECT max(temp_lo) FROM weather; - </programlisting> +</programlisting> + +<screen> + max +----- + 46 +(1 row) +</screen> + </para> + + <para> + <indexterm><primary>subquery</primary></indexterm> If we want to know what city (or cities) that reading occurred in, we might try - <programlisting> -SELECT city FROM weather WHERE temp_lo = max(temp_lo); - </programlisting> +<programlisting> +SELECT city FROM weather WHERE temp_lo = max(temp_lo); <lineannotation>WRONG</lineannotation> +</programlisting> but this will not work since the aggregate - <function>max</function> can't be used in - <command>WHERE</command>. However, as is often the case the query can be - restated to accomplish the intended result; here by using a - <firstterm>subselect</firstterm>: + <function>max</function> cannot be used in the + <literal>WHERE</literal> clause. However, as is often the case + the query can be restated to accomplish the intended result; here + by using a <firstterm>subquery</firstterm>: - <programlisting> +<programlisting> SELECT city FROM weather WHERE temp_lo = (SELECT max(temp_lo) FROM weather); - </programlisting> +</programlisting> + +<screen> + city +--------------- + San Francisco +(1 row) +</screen> - This is OK because the sub-select is an independent computation that - computes its own aggregate separately from what's happening in the outer - select. + This is OK because the sub-select is an independent computation + that computes its own aggregate separately from what is happening + in the outer select. </para> <para> - Aggregates are also very useful in combination with - <command>GROUP BY</command> clauses. For example, we can get the - maximum low temperature observed in each city with + <indexterm><primary>GROUP BY</primary></indexterm> + <indexterm><primary>HAVING</primary></indexterm> + + Aggregates are also very useful in combination with <literal>GROUP + BY</literal> clauses. For example, we can get the maximum low + temperature observed in each city with - <programlisting> +<programlisting> SELECT city, max(temp_lo) FROM weather GROUP BY city; - </programlisting> +</programlisting> + +<screen> + city | max +---------------+----- + Hayward | 37 + San Francisco | 46 +(2 rows) +</screen> which gives us one output row per city. We can filter these grouped - rows using <command>HAVING</command>: + rows using <literal>HAVING</literal>: - <programlisting> +<programlisting> SELECT city, max(temp_lo) FROM weather GROUP BY city - HAVING min(temp_lo) < 0; - </programlisting> + HAVING max(temp_lo) < 40; +</programlisting> + +<screen> + city | max +---------+----- + Hayward | 37 +(1 row) +</screen> which gives us the same results for only the cities that have some - below-zero readings. Finally, if we only care about cities whose - names begin with "<literal>P</literal>", we might do + below-forty readings. Finally, if we only care about cities whose + names begin with <quote><literal>S</literal></quote>, we might do - <programlisting> +<programlisting> SELECT city, max(temp_lo) FROM weather - WHERE city like 'P%' + WHERE city LIKE 'S%' GROUP BY city - HAVING min(temp_lo) < 0; - </programlisting> + HAVING max(temp_lo) < 40; +</programlisting> + </para> - Note that we can apply the city-name restriction in - <command>WHERE</command>, since it needs no aggregate. This is - more efficient than adding the restriction to <command>HAVING</command>, + <para> + It is important to understand the interaction between aggregates and + SQL's <literal>WHERE</literal> and <literal>HAVING</literal> clauses. + The fundamental difference between <literal>WHERE</literal> and + <literal>HAVING</literal> is this: <literal>WHERE</literal> selects + input rows before groups and aggregates are computed (thus, it controls + which rows go into the aggregate computation), whereas + <literal>HAVING</literal> selects group rows after groups and + aggregates are computed. Thus, the + <literal>WHERE</literal> clause must not contain aggregate functions; + it makes no sense to try to use an aggregate to determine which rows + will be inputs to the aggregates. On the other hand, + <literal>HAVING</literal> clauses always contain aggregate functions. + (Strictly speaking, you are allowed to write a <literal>HAVING</literal> + clause that doesn't use aggregates, but it's wasteful; the same condition + could be used more efficiently at the <literal>WHERE</literal> stage.) + </para> + + <para> + Note that we can apply the city name restriction in + <literal>WHERE</literal>, since it needs no aggregate. This is + more efficient than adding the restriction to <literal>HAVING</literal>, because we avoid doing the grouping and aggregate calculations - for all rows that fail the <command>WHERE</command> check. + for all rows that fail the <literal>WHERE</literal> check. + </para> + </sect1> + + + <sect1 id="tutorial-update"> + <title>Updates</title> + + <indexterm zone="tutorial-update"> + <primary>UPDATE</primary> + </indexterm> + + <para> + You can update existing rows using the + <command>UPDATE</command> command. + Suppose you discover the temperature readings are + all off by 2 degrees as of November 28, you may update the + data as follow: + +<programlisting> +UPDATE weather + SET temp_hi = temp_hi - 2, temp_lo = temp_lo - 2 + WHERE date > '1994-11-28'; +</programlisting> + </para> + + <para> + Look at the new state of the data: +<programlisting> +SELECT * FROM weather; + + city | temp_lo | temp_hi | prcp | date +---------------+---------+---------+------+------------ + San Francisco | 46 | 50 | 0.25 | 1994-11-27 + San Francisco | 41 | 55 | 0 | 1994-11-29 + Hayward | 35 | 52 | | 1994-11-29 +(3 rows) +</programlisting> </para> </sect1> + + <sect1 id="tutorial-delete"> + <title>Deletions</title> + + <indexterm zone="tutorial-delete"> + <primary>DELETE</primary> + </indexterm> + + <para> + Suppose you are no longer interested in the weather of Hayward, + then you can do the following to delete those rows from the table. + Deletions are performed using the <command>DELETE</command> + command: +<programlisting> +DELETE FROM weather WHERE city = 'Hayward'; +</programlisting> + + All weather recording belonging to Hayward are removed. + +<programlisting> +SELECT * FROM weather; +</programlisting> + +<screen> + city | temp_lo | temp_hi | prcp | date +---------------+---------+---------+------+------------ + San Francisco | 46 | 50 | 0.25 | 1994-11-27 + San Francisco | 41 | 55 | 0 | 1994-11-29 +(2 rows) +</screen> + </para> + + <para> + One should be wary of queries of the form +<synopsis> +DELETE FROM <replaceable>tablename</replaceable>; +</synopsis> + + Without a qualification, <command>DELETE</command> will simply + remove all rows from the given table, leaving it + empty. The system will not request confirmation before + doing this. + </para> + </sect1> + </chapter> <!-- Keep this comment at the end of the file diff --git a/doc/src/sgml/start.sgml b/doc/src/sgml/start.sgml index ebbccfd042e..61ca3a69b38 100644 --- a/doc/src/sgml/start.sgml +++ b/doc/src/sgml/start.sgml @@ -1,414 +1,405 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/start.sgml,v 1.16 2001/07/15 13:45:03 petere Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/start.sgml,v 1.17 2001/09/02 23:27:49 petere Exp $ --> - <chapter id="start"> + <chapter id="tutorial-start"> <title>Getting Started</title> - <abstract> + <sect1 id="tutorial-install"> + <title>Installation</title> + + <para> + Before you can use <productname>PostgreSQL</productname> you need + to install it, of course. It is possible that + <productname>PostgreSQL</productname> is already installed at your + site, either because it was included in your operating system + distribution or because the system administrator already installed + it. If that is the case, you should obtain information from the + operating system documentation or your system administrator about + how to access <productname>PostgreSQL</productname>. + </para> + <para> - How to begin work with <productname>Postgres</productname> for a new user. + If you are not sure whether <productname>PostgreSQL</productname> + is already available or whether you can use it for your + experimentation then you can install it yourself. Doing so is not + hard and it can be a good exercise. + <productname>PostgreSQL</productname> can be installed by any + unprivileged user, no superuser (<systemitem>root</systemitem>) + access is required. </para> - </abstract> - - <para> - Some of the steps required to use <productname>Postgres</productname> - can be performed by any Postgres user, and some must be done by - the site database administrator. This site administrator - is the person who installed the software, created - the database directories and started the - <application>postmaster</application> - process. This person does not have to be the Unix - superuser ("root") - or the computer system administrator; a person can install and use - <productname>Postgres</productname> without any special accounts or - privileges. - </para> - - <para> - If you are installing <productname>Postgres</productname> yourself, then - refer to the Administrator's Guide for instructions on - installation, and return - to this guide when the installation is complete. - </para> - - <para> - Throughout this manual, any examples that begin with - the character "<literal>%</literal>" are commands that should be typed - at the Unix shell prompt. Examples that begin with the - character "<literal>*</literal>" are commands in the Postgres query - language, Postgres <acronym>SQL</acronym>. - </para> - - <sect1 id="start-env"> - <title>Setting Up Your Environment</title> <para> - This section discusses how to set up - your own environment so that you can use frontend - applications. We assume <productname>Postgres</productname> has - already been - successfully installed and started; refer to the Administrator's Guide - and the installation notes - for how to install Postgres. + If you are installing <productname>PostgreSQL</productname> + yourself, then refer to the <citetitle>Administrator's + Guide</citetitle> for instructions on installation, and return to + this guide when the installation is complete. Be sure to follow + closely the section about setting up the appropriate environment + variables. </para> <para> - <productname>Postgres</productname> is a client/server - application. As a user, - you only need access to the client portions of the installation - (an example - of a client application is the interactive monitor - <application>psql</application>). - For simplicity, - we will assume that <productname>Postgres</productname> has been - installed in the - directory <filename>/usr/local/pgsql</filename>. Therefore, wherever - you see the directory <filename>/usr/local/pgsql</filename> you should - substitute the name of the directory where - <productname>Postgres</productname> is - actually installed. - All <productname>Postgres</productname> commands are installed in - the directory - <filename>/usr/local/pgsql/bin</filename>. Therefore, you should add - this directory to your shell command path. If you use - a variant of the Berkeley C shell, such as csh or tcsh, - you would add - - <programlisting> -% set path = ( /usr/local/pgsql/bin path ) - </programlisting> - - in the <filename>.login</filename> file in your home directory. - If you use - a variant of the Bourne shell, such as sh, ksh, or - bash, then you would add - - <programlisting> -% PATH=/usr/local/pgsql/bin:$PATH -% export PATH - </programlisting> - - to the .profile file in your home directory. - From now on, we will assume that you have added the - <productname>Postgres</productname> bin directory to your path. - In addition, we - will make frequent reference to <quote>setting a shell - variable</quote> or <quote>setting an environment - variable</quote> throughout - this document. If you did not fully understand the - last paragraph on modifying your search path, you - should consult the Unix manual pages that describe your - shell before going any further. + If your site administrator has not set things up in the default + way, you may have some more work to do. For example, if the + database server machine is a remote machine, you will need to set + the <envar>PGHOST</envar> environment variable to the name of the + database server machine. The environment variable + <envar>PGPORT</envar> may also have to be set. The bottom line is + this: if you try to start an application program and it complains + that it cannot connect to the database, you should consult your + site administrator or, if that is you, the documentation to make + sure that your environment is properly set up. If you did not + understand the preceding paragraph then read the next section. </para> + </sect1> + + + <sect1 id="tutorial-arch"> + <title>Architectural Fundamentals</title> <para> - If your site administrator has not set things up in the - default way, you may have some more work to do. For example, if - the database - server machine is a remote machine, you - will need to set the <acronym>PGHOST</acronym> environment - variable to the name - of the database server machine. The environment variable - <acronym>PGPORT</acronym> may also have to be set. - The bottom line is this: if - you try to start an application program and it complains - that it cannot connect to the <application>postmaster</application>, - you should immediately consult your site administrator to make - sure that your - environment is properly set up. + Before we proceed, you should understand the basic + <productname>PostgreSQL</productname> system architecture. + Understanding how the parts of + <productname>PostgreSQL</productname> interact will make the next + chapter somewhat clearer. </para> + <para> + In database jargon, <productname>PostgreSQL</productname> uses a + client/server model. A <productname>PostgreSQL</productname> + session consists of the following cooperating processes + (programs): + + <itemizedlist> + <listitem> + <para> + A server process, which manages the database files, accepts + connections to the database from client applications, and + performs actions on the database on behalf of the clients. The + database server program is called + <filename>postmaster</filename>. + <indexterm><primary>postmaster</primary></indexterm> + </para> + </listitem> + + <listitem> + <para> + The user's client (frontend) application that wants to perform + database operations. Client applications can be very diverse + in nature: They could be a text-oriented tool, a graphical + application, a web server that accesses the database to + display web pages, or a specialized database maintenance tool. + Some client applications are supplied with the + <productname>PostgreSQL</productname> distribution, most are + developed by users. + </para> + </listitem> + + </itemizedlist> + </para> + + <para> + As is typical of client/server applications, the client and the + server can be on different hosts. In that case they communicate + over a TCP/IP network connection. You should keep this in mind, + because the files that can be accessed on a client machine might + not be accessible (or might only be accessed using a different + file name) on the database server machine. + </para> + + <para> + The <productname>PostgreSQL</productname> server can handle + multiple concurrent connections from clients. For that purpose it + starts (<quote>forks</quote>) a new process for each connection. + From that point on, the client and the new server process + communicate without intervention by the original + <filename>postmaster</filename> process. Thus, the + <filename>postmaster</filename> is always running, waiting for + client connections, whereas client and associated server processes + come and go. (All of this is of course invisible to the user. We + only mention it here for completeness.) + </para> </sect1> - <sect1 id="start-psql"> - <title>Starting the Interactive Monitor (psql)</title> + + <sect1 id="tutorial-createdb"> + <title>Creating a Database</title> + + <indexterm zone="tutorial-createdb"> + <primary>database</primary> + <secondary>creating</secondary> + </indexterm> + + <indexterm zone="tutorial-createdb"> + <primary>createdb</primary> + </indexterm> <para> - Assuming that your site administrator has properly - started the <application>postmaster</application> process and - authorized you to - use the database, you (as a user) may begin to start up - applications. As previously mentioned, you should add - <filename>/usr/local/pgsql/bin</filename> to your shell search path. - In most cases, this is all you should have to do in - terms of preparation. + The first test to see whether you can access the database server + is to try to create a database. A running + <productname>PostgreSQL</productname> server can manage many + databases. Typically, a separate database is used for each + project or for each user. </para> <para> - Two different styles of connections - are supported. The site administrator will have chosen to allow - TCP/IP network connections - or will have restricted database access to local (same-machine) - socket connections only. - These choices become significant if you encounter problems in - connecting to a database, since you will want to confirm that you - are choosing an allowed connection option. + Possibly, your site administrator has already created a database + for your use. He should have told you what the name of your + database is. In this case you can omit this step and skip ahead + to the next section. </para> <para> - If you get the following error message from a - <productname>Postgres</productname> - command (such as <application>psql</application> or - <application>createdb</application>): + To create a new database, in this example named + <quote>mydb</quote>, you use the following command: +<screen> +<prompt>$</prompt> <userinput>createdb mydb</userinput> +</screen> + This should produce as response: +<screen> +CREATE DATABASE +</screen> + Is so, this step was successful and you can skip over the + remainder of this section. + </para> - <programlisting> -% psql template1 + <para> + If you see a message similar to +<screen> +createdb: command not found +</screen> + then PostgreSQL was not installed properly. Either it was not + installed at all or the search path was not set correctly. Try + calling the command with an absolute path instead: +<screen> +<prompt>$</prompt> <userinput>/usr/local/pgsql/bin/createdb</userinput> +</screen> + The path at your site might be different. Contact your site + administrator or check back in the installation instructions to + correct the situation. + </para> + + <para> + Another response could be this: +<screen> psql: could not connect to server: Connection refused Is the server running locally and accepting connections on Unix domain socket "/tmp/.s.PGSQL.5432"? - </programlisting> +createdb: database creation failed +</screen> + This means that the server was not started, or it was not started + where <command>createdb</command> expected it. Again, check the + installation instructions or consult the administrator. + </para> - or + <para> + If you do not have the privileges required to create a database, + you will see the following: +<screen> +ERROR: CREATE DATABASE: permission denied +createdb: database creation failed +</screen> + Not every user has authorization to create new databases. If + <productname>PostgreSQL</productname> refuses to create databases + for you then the site administrator needs to grant you permission + to create databases. Consult your site administrator if this + occurs. If you installed <productname>PostgreSQL</productname> + yourself then you should log in for the purposes of this tutorial + under the user account that you started the server as. + + <footnote> + <para> + As an explanation for why this works: + <productname>PostgreSQL</productname> user names are separate + from operating system user accounts. If you connect to a + database, you can choose what + <productname>PostgreSQL</productname> user name to connect as; + if you don't, it will default to the same name as your current + operating system account. As it happens, there will always be a + <productname>PostgreSQL</productname> user account that has the + same name as the operating system user that started the server, + and it also happens that that user always has permission to + create databases. Instead of logging in as that user you can + also specify the <option>-U</option> option everywhere to select + a <productname>PostgreSQL</productname> user name to connect as. + </para> + </footnote> + </para> + + <para> + You can also create databases with other names. + <productname>PostgreSQL</productname> allows you to create any + number of databases at a given site. Database names must have an + alphabetic first character and are limited to 32 characters in + length. A convenient choice is to create a database with the same + name as your current user name. Many tools assume that database + name as the default, so it can save you some typing. To create + that database, simply type +<screen> +<prompt>$</prompt> <userinput>createdb</userinput> +</screen> + </para> + + <para> + If you don't want to use your database anymore you can remove it. + For example, if you are the owner (creator) of the database + <quote>mydb</quote>, you can destroy it using the following + command: +<screen> +<prompt>$</prompt> <userinput>dropdb mydb</userinput> +</screen> + (In this case, the database name does not default to the user + account name. You always need to specify it.) This action + physically removes all files associated with the database and + cannot be undone, so this should only be done with a great deal of + forethought. + </para> + </sect1> - <programlisting> -% psql -h localhost template1 -psql: could not connect to server: Connection refused - Is the server running on host localhost and accepting - TCP/IP connections on port 5432? - </programlisting> - it is usually because + <sect1 id="tutorial-accessdb"> + <title>Accessing a Database</title> + + <indexterm zone="tutorial-accessdb"> + <primary>psql</primary> + </indexterm> + + <para> + Once you have created a database, you can access it by: + + <itemizedlist spacing="compact" mark="bullet"> + <listitem> + <para> + Running the <productname>PostgreSQL</productname> interactive + terminal program, called <quote>psql</quote>, which allows you + to interactively enter, edit, and execute + <acronym>SQL</acronym> commands. + </para> + </listitem> - <itemizedlist mark="bullet" spacing="compact"> <listitem> <para> - the <application>postmaster</application> is not running, - or + Using an existing graphical frontend tool like + <application>PgAccess</application> or + <application>ApplixWare</application> (via + <acronym>ODBC</acronym>) to create and manipulate a database. + These possibilities are not covered in this tutorial. </para> </listitem> <listitem> <para> - you are attempting to connect to the wrong server host. + Writing a custom application, using one of the several + available language bindings. These possibilities are discussed + further in <citetitle>The PostgreSQL Programmer's + Guide</citetitle>. </para> </listitem> </itemizedlist> + + You probably want to start up <command>psql</command>, to try out + the examples in this tutorial. It can be activated for the + <quote>mydb</quote> database by typing the command: +<screen> +<prompt>$</prompt> <userinput>psql mydb</userinput> +</screen> + If you leave off the database name then it will default to your + user account name. You already discovered this scheme in the + previous section. </para> <para> - If you get the following error message: - - <programlisting> -FATAL 1:Feb 17 23:19:55:process userid (2360) != database owner (268) - </programlisting> - - it means that the site administrator started the - <application>postmaster</application> - as the wrong user. Tell him to restart it as - the <productname>Postgres</productname> superuser. + In <command>psql</command>, you will be greeted with the following + message: +<screen> +Welcome to psql, the PostgreSQL interactive terminal. + +Type: \copyright for distribution terms + \h for help with SQL commands + \? for help on internal slash commands + \g or terminate with semicolon to execute query + \q to quit + +mydb=> +</screen> + <indexterm><primary>superuser</primary></indexterm> + The last line could also be +<screen> +mydb=# +</screen> + That would mean you are a database superuser, which is most likely + the case if you installed <productname>PostgreSQL</productname> + yourself. Being a superuser means that you are not subject to + access controls. For the purpose of this tutorial this is not of + importance. </para> - </sect1> - <sect1 id="start-manage-db"> - <title>Managing a Database</title> + <para> + If you have encountered problems starting <command>psql</command> + then go back to the previous section. The diagnostics of + <command>psql</command> and <command>createdb</command> are + similar, and if the latter worked the former should work as well. + </para> <para> - Now that <productname>Postgres</productname> is up and running we - can create some - databases to experiment with. Here, we describe the - basic commands for managing a database. + The last line printed out by <command>psql</command> is the + prompt, and it indicates that <command>psql</command> is listening + to you and that you can type <acronym>SQL</acronym> queries into a + workspace maintained by <command>psql</command>. Try out these + commands: + <indexterm><primary>version</primary></indexterm> +<screen> +<prompt>mydb=></prompt> <userinput>SELECT version();</userinput> + version +---------------------------------------------------------------- + PostgreSQL 7.2devel on i586-pc-linux-gnu, compiled by GCC 2.96 +(1 row) + +<prompt>mydb=></prompt> <userinput>SELECT current_date;</userinput> + date +------------ + 2001-08-31 +(1 row) + +<prompt>mydb=></prompt> <userinput>SELECT 2 + 2;</userinput> + ?column? +---------- + 4 +(1 row) +</screen> </para> <para> - Most <productname>Postgres</productname> - applications assume that the database name, if not specified, is - the same as the name on your computer - account. + The <command>psql</command> program has a number of internal + commands that are not SQL commands. They begin the backslash + character, <quote><literal>\</literal></quote>. Some of these + commands were already listed in the welcome message. For example, + you can get help on the syntax of various + <productname>PostgreSQL</productname> <acronym>SQL</acronym> + commands by typing: +<screen> +<prompt>mydb=></prompt> <userinput>\h</userinput> +</screen> </para> <para> - If your database administrator has set up your account without - database creation privileges, - then she should have told you what the name of your database is. If - this is the case, then you - can skip the sections on creating and destroying databases. + To get out of <command>psql</command>, type +<screen> +<prompt>mydb=></prompt> <userinput>\q</userinput> +</screen> + and <command>psql</command> will quit and return you to your + command shell. (For more internal commands, type + <literal>\?</literal> at the <command>psql</command> prompt.) The + full capabilities of <command>psql</command> are documented in the + <citetitle>Reference Manual</citetitle>. If PostgreSQL is + installed correctly you can also type <literal>man psql</literal> + at the operating system shell prompt to see the documentation. In + this tutorial we will not use these features explicitly, but you + can use them yourself when you see it fit. </para> - <sect2> - <title>Creating a Database</title> - - <para> - Let's say you want to create a database named - <database>mydb</database>. - You can do this with the following command: - <programlisting> -% createdb mydb - </programlisting> - </para> - - <para> - If you do not have the privileges required to create a database, - you will see - the following: - <programlisting> -% createdb mydb -NOTICE:user "your username" is not allowed to create/destroy databases -createdb: database creation failed on mydb. - </programlisting> - </para> - - <para> - <productname>Postgres</productname> allows you to create any - number of databases - at a given site and you automatically become the - database administrator of the database you just created. - Database names must have an alphabetic first - character and are limited to 32 characters in length. - Not every user has authorization to become a database - administrator. If <productname>Postgres</productname> refuses to - create databases - for you, then the site administrator needs to grant you - permission to create databases. Consult your site - administrator if this occurs. - </para> - </sect2> - - <sect2> - <title>Accessing a Database</title> - - <para> - Once you have constructed a database, you can access it - by: - - <itemizedlist spacing="compact" mark="bullet"> - <listitem> - <para> - Running the <productname>Postgres</productname> terminal - monitor programs - (e.g. <application>psql</application>) which allows you to - interactively - enter, edit, and execute <acronym>SQL</acronym> commands. - </para> - </listitem> - - <listitem> - <para> - Using an existing native frontend tool like - <application>pgaccess</application> or - <application>ApplixWare</application> (via - <acronym>ODBC</acronym>) to create and manipulate a - database. - </para> - </listitem> - - <listitem> - <para> - Using a language like perl or tcl which has a supported - interface for <productname>Postgres</productname>. Some of - these languages also have convenient and powerful GUI toolkits - which can help you construct custom - applications. <application>pgaccess</application>, mentioned - above, is one such application written in tk/tcl and can be - used as an example. - </para> - </listitem> - - <listitem> - <para> - Writing a <acronym>C</acronym> program using - the LIBPQ subroutine - library. This allows you to submit - <acronym>SQL</acronym> commands - from <acronym>C</acronym> and get answers and - status messages back to - your program. This interface is discussed further - in <citetitle>The PostgreSQL Programmer's Guide</citetitle>. - </para> - </listitem> - </itemizedlist> - - You might want to start up <application>psql</application>, - to try out the examples in this manual. - It can be activated for the <database>mydb</database> - database by typing the command: - <programlisting> -% psql mydb - </programlisting> - - You will be greeted with the following message: - <programlisting> -Welcome to the POSTGRESQL interactive sql monitor: - Please read the file COPYRIGHT for copyright terms of POSTGRESQL - - type \? for help on slash commands - type \q to quit - type \g or terminate with semicolon to execute query - You are currently connected to the database: template1 - -mydb=> - </programlisting> - </para> - - <para> - This prompt indicates that the terminal monitor is listening - to you and that you can type <acronym>SQL</acronym> queries into a - workspace maintained by the terminal monitor. - The <application>psql</application> program responds to escape - codes that begin - with the backslash character, "<literal>\</literal>" For example, you - can get help on the syntax of various - <productname>Postgres</productname> <acronym>SQL</acronym> - commands by typing: - <programlisting> -mydb=> \h - </programlisting> - </para> - - <para> - Once you have finished entering your queries into the - workspace, you can pass the contents of the workspace - to the <productname>Postgres</productname> server by typing: - <programlisting> -mydb=> \g - </programlisting> - - This tells the server to process the query. If you - terminate your query with a semicolon, the "<literal>\g</literal>" is not - necessary. - <application>psql</application> will automatically process - semicolon terminated queries. - To read queries from a file, say myFile, instead of - entering them interactively, type: - <programlisting> -mydb=> \i fileName - </programlisting> - - To get out of <application>psql</application> and return to Unix, type - <programlisting> -mydb=> \q - </programlisting> - - and <application>psql</application> will quit and return - you to your command - shell. (For more escape codes, type <command>\h</command> at the - monitor prompt.) - White space (i.e., spaces, tabs and newlines) may be - used freely in <acronym>SQL</acronym> queries. Single-line - comments are denoted by - "<literal>--</literal>". Everything after the dashes up to the end of the - line is ignored. Multiple-line comments, and comments within a line, - are denoted by "<literal>/* ... */</literal>". - </para> - </sect2> - - <sect2> - <title>Destroying a Database</title> - - <para> - If you are the database administrator for the database - <database>mydb</database>, you can destroy it using the - following Unix command: - <programlisting> -% dropdb mydb - </programlisting> - This action physically removes all of the Unix files - associated with the database and cannot be undone, so - this should only be done with a great deal of forethought. - </para> - </sect2> </sect1> - </chapter> <!-- Keep this comment at the end of the file diff --git a/doc/src/sgml/tutorial.sgml b/doc/src/sgml/tutorial.sgml index 07e4ca67d5b..812fb9d3904 100644 --- a/doc/src/sgml/tutorial.sgml +++ b/doc/src/sgml/tutorial.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/Attic/tutorial.sgml,v 1.14 2001/02/03 19:03:27 petere Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/Attic/tutorial.sgml,v 1.15 2001/09/02 23:27:49 petere Exp $ --> <book id="tutorial"> @@ -10,16 +10,46 @@ $Header: /cvsroot/pgsql/doc/src/sgml/Attic/tutorial.sgml,v 1.14 2001/02/03 19:03 &legal; </bookinfo> + <preface id="tutorial-welcome"> + <title>Welcome</title> + + <para> + Welcome to <productname>PostgreSQL</productname> and the + <citetitle>PostgreSQL Tutorial</citetitle>. The following few + chapters are intended to give a simple introduction to + <productname>PostgreSQL</productname>, relational database + concepts, and the SQL language to those who are new to any one of + these aspects. We only assume some general knowledge about how to + use computers. No particular Unix or programming experience is + required. + </para> + + <para> + After you have worked through this tutorial you might want to move on + to reading the <![%single-book;[<citetitle>User's + Guide</citetitle>]]><![%set-of-books;[<xref linkend="user">]]> to + gain a more formal knowledge of the SQL language, or the + <![%single-book;[<citetitle>Programmer's + Guide</citetitle>]]><![%set-of-books;[<xref linkend="programmer">]]> + for information about developing applications for + <productname>PostgreSQL</productname>. + </para> + + <para> + We hope you have a pleasant experience with + <productname>PostgreSQL</productname>. + </para> + </preface> + <![%single-book;[ &intro; ]]> - &sql; - &arch; &start; &query; &advanced; <![%single-book;[ &biblio; + &bookindex; ]]> </book> diff --git a/src/tutorial/advanced.source b/src/tutorial/advanced.source index f8a819a2f3a..0c04a983254 100644 --- a/src/tutorial/advanced.source +++ b/src/tutorial/advanced.source @@ -1,19 +1,18 @@ --------------------------------------------------------------------------- -- -- advanced.sql- --- more POSTGRES SQL features. (These are not part of the SQL-92 --- standard.) +-- Tutorial on advanced more PostgreSQL features -- -- -- Copyright (c) 1994, Regents of the University of California -- --- $Id: advanced.source,v 1.3 1999/07/08 15:28:51 momjian Exp $ +-- $Id: advanced.source,v 1.4 2001/09/02 23:27:50 petere Exp $ -- --------------------------------------------------------------------------- ----------------------------- -- Inheritance: --- a table can inherit from zero or more tables. A query can reference +-- S table can inherit from zero or more tables. A query can reference -- either all rows of a table or all rows of a table plus all of its -- descendants. ----------------------------- @@ -31,7 +30,7 @@ CREATE TABLE capitals ( state char(2) ) INHERITS (cities); --- now, let's populate the tables +-- Now, let's populate the tables. INSERT INTO cities VALUES ('San Francisco', 7.24E+5, 63); INSERT INTO cities VALUES ('Las Vegas', 2.583E+5, 2174); INSERT INTO cities VALUES ('Mariposa', 1200, 1953); @@ -56,72 +55,6 @@ FROM cities* c WHERE c.altitude > 500; ------------------------------ --- Time Travel: --- this feature allows you to run historical queries. --- removed for v6.3, but possible using triggers. --- see contrib/spi/README for more information. ------------------------------ - --- first, let's make some changes to the cities table (suppose Mariposa's --- population grows 10% this year) - --- UPDATE cities --- SET population = population * 1.1 --- WHERE name = 'Mariposa'; - --- the default time is the current time ('now'): - --- SELECT * FROM cities WHERE name = 'Mariposa'; - --- we can also retrieve the population of Mariposa ever has. ('epoch' is the --- earliest time representable by the system) - --- SELECT name, population --- FROM cities['epoch', 'now'] -- can be abbreviated to cities[,] --- WHERE name = 'Mariposa'; - - ----------------------- --- Arrays: --- attributes can be arrays of base types or user-defined types ----------------------- - -CREATE TABLE sal_emp ( - name text, - pay_by_quarter int4[], - schedule text[][] -); - --- insert instances with array attributes. Note the use of braces - -INSERT INTO sal_emp VALUES ( - 'Bill', - '{10000,10000,10000,10000}', - '{{"meeting", "lunch"}, {}}'); - -INSERT INTO sal_emp VALUES ( - 'Carol', - '{20000,25000,25000,25000}', - '{{"talk", "consult"}, {"meeting"}}'); - ----------------------- --- queries on array attributes ----------------------- -SELECT name FROM sal_emp WHERE - sal_emp.pay_by_quarter[1] <> sal_emp.pay_by_quarter[2]; - --- retrieve third quarter pay of all employees - -SELECT sal_emp.pay_by_quarter[3] FROM sal_emp; - --- select subarrays - -SELECT sal_emp.schedule[1:2][1:1] FROM sal_emp WHERE - sal_emp.name = 'Bill'; - - -- clean up (you must remove the children first) -DROP TABLE sal_emp; DROP TABLE capitals; DROP TABLE cities; diff --git a/src/tutorial/basics.source b/src/tutorial/basics.source index e68bf57b4ad..748323eb8e7 100644 --- a/src/tutorial/basics.source +++ b/src/tutorial/basics.source @@ -6,22 +6,22 @@ -- -- Copyright (c) 1994, Andrew Yu, University of California -- --- $Id: basics.source,v 1.3 1999/07/08 15:27:01 momjian Exp $ +-- $Id: basics.source,v 1.4 2001/09/02 23:27:50 petere Exp $ -- --------------------------------------------------------------------------- ----------------------------- --- Creating a table: --- a CREATE TABLE is used to create base tables. POSTGRES SQL has --- its own set of built-in types. (Note that keywords are case- --- insensitive but identifiers are case-sensitive.) +-- Creating a New Table: +-- A CREATE TABLE is used to create base tables. PostgreSQL has +-- its own set of built-in types. (Note that SQL is case- +-- insensitive.) ----------------------------- CREATE TABLE weather ( city varchar(80), temp_lo int, -- low temperature temp_hi int, -- high temperature - prcp float8, -- precipitation + prcp real, -- precipitation date date ); @@ -30,98 +30,105 @@ CREATE TABLE cities ( location point ); + ----------------------------- --- Inserting data: --- an INSERT statement is used to insert a new row into a table. There +-- Populating a Table With Rows: +-- An INSERT statement is used to insert a new row into a table. There -- are several ways you can specify what columns the data should go to. ----------------------------- --- 1. the simplest case is when the list of value correspond to the order of +-- 1. The simplest case is when the list of value correspond to the order of -- the columns specified in CREATE TABLE. INSERT INTO weather - VALUES ('San Francisco', 46, 50, 0.25, '11/27/1994'); + VALUES ('San Francisco', 46, 50, 0.25, '1994-11-27'); INSERT INTO cities - VALUES ('San Francisco', '(-194.0, 53.0)'); + VALUES ('San Francisco', '(-194.0, 53.0)'); --- 2. you can also specify what column the values correspond to. (The columns --- can be specified in any order. You may also omit any number of columns. --- eg. unknown precipitation below) +-- 2. You can also specify what column the values correspond to. (The columns +-- can be specified in any order. You may also omit any number of columns, +-- e.g., unknown precipitation below. INSERT INTO weather (city, temp_lo, temp_hi, prcp, date) - VALUES ('San Francisco', 43, 57, 0.0, '11/29/1994'); + VALUES ('San Francisco', 43, 57, 0.0, '1994-11-29'); INSERT INTO weather (date, city, temp_hi, temp_lo) - VALUES ('11/29/1994', 'Hayward', 54, 37); + VALUES ('1994-11-29', 'Hayward', 54, 37); ----------------------------- --- Retrieving data: --- a SELECT statement is used for retrieving data. The basic syntax is --- SELECT columns FROM tables WHERE predicates +-- Querying a Table: +-- A SELECT statement is used for retrieving data. The basic syntax is +-- SELECT columns FROM tables WHERE predicates. ----------------------------- --- a simple one would be +-- A simple one would be: SELECT * FROM weather; --- you may also specify expressions in the target list (the 'AS column' --- specifies the column name of the result. It is optional.) +-- You may also specify expressions in the target list. (The 'AS column' +-- specifies the column name of the result. It is optional.) SELECT city, (temp_hi+temp_lo)/2 AS temp_avg, date FROM weather; --- if you want to retrieve rows that satisfy certain condition (ie. a --- restriction), specify the condition in WHERE. The following retrieves +-- If you want to retrieve rows that satisfy certain condition (i.e., a +-- restriction), specify the condition in WHERE. The following retrieves -- the weather of San Francisco on rainy days. SELECT * -FROM weather -WHERE city = 'San Francisco' - and prcp > 0.0; + FROM weather + WHERE city = 'San Francisco' + AND prcp > 0.0; --- here is a more complicated one. Duplicates are removed when DISTINCT is --- specified. ORDER BY specifies the column to sort on. (Just to make sure the +-- Here is a more complicated one. Duplicates are removed when DISTINCT is +-- specified. ORDER BY specifies the column to sort on. (Just to make sure the -- following won't confuse you, DISTINCT and ORDER BY can be used separately.) SELECT DISTINCT city -FROM weather -ORDER BY city; + FROM weather + ORDER BY city; + ----------------------------- --- Retrieving data into other classes: --- a SELECT ... INTO statement can be used to retrieve data into --- another class. +-- Joins Between Tables: +-- queries can access multiple tables at once or access the same table +-- in such a way that multiple instances of the table are being processed +-- at the same time. ----------------------------- -SELECT * INTO TABLE mytemp -FROM weather -WHERE city = 'San Francisco' - and prcp > 0.0; +-- The following joins the weather table and the cities table. -SELECT * from mytemp; +SELECT * + FROM weather, cities + WHERE city = name; ------------------------------ --- Aggregates ------------------------------ +-- This prevents a duplicate city name column: -SELECT max(temp_lo) -FROM weather; +SELECT city, temp_lo, temp_hi, prcp, date, location + FROM weather, cities + WHERE city = name; --- Aggregate with GROUP BY -SELECT city, max(temp_lo) -FROM weather -GROUP BY city; +-- since the column names are all different, we don't have to specify the +-- table name. If you want to be clear, you can do the following. They give +-- identical results, of course. ------------------------------ --- Joining tables: --- queries can access multiple tables at once or access the same table --- in such a way that multiple instances of the table are being processed --- at the same time. ------------------------------ +SELECT weather.city, weather.temp_lo, weather.temp_hi, weather.prcp, weather.date, cities.location + FROM weather, cities + WHERE cities.name = weather.city; --- suppose we want to find all the records that are in the temperature range --- of other records. W1 and W2 are aliases for weather. +-- JOIN syntax + +SELECT * + FROM weather JOIN cities ON (weather.city = cities.name); + +-- Outer join + +SELECT * + FROM weather LEFT OUTER JOIN cities ON (weather.city = cities.name); + +-- Suppose we want to find all the records that are in the temperature range +-- of other records. W1 and W2 are aliases for weather. SELECT W1.city, W1.temp_lo, W1.temp_hi, W2.city, W2.temp_lo, W2.temp_hi @@ -129,60 +136,69 @@ FROM weather W1, weather W2 WHERE W1.temp_lo < W2.temp_lo and W1.temp_hi > W2.temp_hi; --- let's join two tables. The following joins the weather table --- and the cities table. -SELECT city, location, prcp, date -FROM weather, cities -WHERE name = city; +----------------------------- +-- Aggregate Functions +----------------------------- --- since the column names are all different, we don't have to specify the --- table name. If you want to be clear, you can do the following. They give --- identical results, of course. +SELECT max(temp_lo) + FROM weather; + +SELECT city FROM weather + WHERE temp_lo = (SELECT max(temp_lo) FROM weather); + +-- Aggregate with GROUP BY +SELECT city, max(temp_lo) + FROM weather + GROUP BY city; + +-- ... and HAVING +SELECT city, max(temp_lo) + FROM weather + GROUP BY city + HAVING max(temp_lo) < 40; -SELECT w.city, c.location, w.prcp, w.date -FROM weather w, cities c -WHERE c.name = w.city; ----------------------------- --- Updating data: --- an UPDATE statement is used for updating data. +-- Updates: +-- An UPDATE statement is used for updating data. ----------------------------- --- suppose you discover the temperature readings are all off by 2 degrees as +-- Suppose you discover the temperature readings are all off by 2 degrees as -- of Nov 28, you may update the data as follow: UPDATE weather - SET temp_hi = temp_hi - 2, temp_lo = temp_lo - 2 - WHERE date > '11/28/1994'; + SET temp_hi = temp_hi - 2, temp_lo = temp_lo - 2 + WHERE date > '1994-11-28'; -SELECT * from weather; +SELECT * FROM weather; ----------------------------- --- Deleting data: --- a DELETE statement is used for deleting rows from a table. +-- Deletions: +-- A DELETE statement is used for deleting rows from a table. ----------------------------- --- suppose you are no longer interested in the weather of Hayward, you can --- do the following to delete those rows from the table +-- Suppose you are no longer interested in the weather of Hayward, then you can +-- do the following to delete those rows from the table. DELETE FROM weather WHERE city = 'Hayward'; -SELECT * from weather; +SELECT * FROM weather; --- you can also delete all the rows in a table by doing the following. (This +-- You can also delete all the rows in a table by doing the following. (This -- is different from DROP TABLE which removes the table in addition to the -- removing the rows.) DELETE FROM weather; -SELECT * from weather; +SELECT * FROM weather; + ----------------------------- -- Removing the tables: --- DROP TABLE is used to remove tables. After you have done this, you +-- DROP TABLE is used to remove tables. After you have done this, you -- can no longer use those tables. ----------------------------- -DROP TABLE weather, cities, mytemp; +DROP TABLE weather, cities; -- GitLab