diff --git a/doc/src/sgml/ref/create_operator.sgml b/doc/src/sgml/ref/create_operator.sgml index 7f06c4cece36ca76b59fddd64b875db29ee36638..36d791d2a7987bd6bb73dbe5bf1b1bfb08f2c47f 100644 --- a/doc/src/sgml/ref/create_operator.sgml +++ b/doc/src/sgml/ref/create_operator.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_operator.sgml,v 1.11 1999/07/22 15:09:08 thomas Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_operator.sgml,v 1.12 2000/03/18 18:03:12 tgl Exp $ Postgres documentation --> @@ -60,8 +60,8 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class <term><replaceable class="parameter">type1</replaceable></term> <listitem> <para> - The type for the left-hand side of the operator, if any. This option would be - omitted for a right-unary operator. + The type of the left-hand argument of the operator, if any. + This option would be omitted for a left-unary operator. </para> </listitem> </varlistentry> @@ -69,8 +69,8 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class <term><replaceable class="parameter">type2</replaceable></term> <listitem> <para> - The type for the right-hand side of the operator, if any. This option would be - omitted for a left-unary operator. + The type of the right-hand argument of the operator, if any. + This option would be omitted for a right-unary operator. </para> </listitem> </varlistentry> @@ -78,7 +78,7 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class <term><replaceable class="parameter">com_op</replaceable></term> <listitem> <para> - The commutator for this operator. + The commutator of this operator. </para> </listitem> </varlistentry> @@ -110,7 +110,7 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class <term>HASHES</term> <listitem> <para> -Indicates this operator can support a hash-join algorithm. + Indicates this operator can support a hash join. </para> </listitem> </varlistentry> @@ -118,7 +118,8 @@ Indicates this operator can support a hash-join algorithm. <term><replaceable class="parameter">left_sort_op</replaceable></term> <listitem> <para> - Operator that sorts the left-hand data type of this operator. + If this operator can support a merge join, the + operator that sorts the left-hand data type of this operator. </para> </listitem> </varlistentry> @@ -126,7 +127,8 @@ Indicates this operator can support a hash-join algorithm. <term><replaceable class="parameter">right_sort_op</replaceable></term> <listitem> <para> - Operator that sorts the right-hand data type of this operator. + If this operator can support a merge join, the + operator that sorts the right-hand data type of this operator. </para> </listitem> </varlistentry> @@ -172,22 +174,56 @@ CREATE </para> <para> The operator <replaceable class="parameter">name</replaceable> - is a sequence of up to thirty two (32) characters in any combination - from the following: + is a sequence of up to NAMEDATALEN-1 (31 by default) characters + from the following list: <literallayout> -+ - * / < > = ~ ! @ # % ^ & | ` ? $ : ++ - * / < > = ~ ! @ # % ^ & | ` ? $ : </literallayout> + + There are a few restrictions on your choice of name: + <itemizedlist> + <listitem> + <para> + "$" and ":" cannot be defined as single-character operators, + although they can be part of a multi-character operator name. + </para> + </listitem> + <listitem> + <para> + "--" and "/*" cannot appear anywhere in an operator name, + since they will be taken as the start of a comment. + </para> + </listitem> + <listitem> + <para> + A multi-character operator name cannot end in "+" or "-", + unless the name also contains at least one of these characters: + <literallayout> +~ ! @ # % ^ & | ` ? $ : + </literallayout> + For example, <literal>@-</literal> is an allowed operator name, + but <literal>*-</literal> is not. + This restriction allows <productname>Postgres</productname> to + parse SQL-compliant queries without requiring spaces between tokens. + </para> + </listitem> + </itemizedlist> + <note> <para> - No alphabetic characters are allowed in an operator name. - This enables <productname>Postgres</productname> to parse SQL input - into tokens without requiring spaces between each token. + When working with non-SQL-standard operator names, you will usually + need to separate adjacent operators with spaces to avoid ambiguity. + For example, if you have defined a left-unary operator named "@", + you cannot write <literal>X*@Y</literal>; you must write + <literal>X* @Y</literal> to ensure that + <productname>Postgres</productname> reads it as two operator names + not one. </para> </note> </para> <para> - The operator "!=" is mapped to "<>" on input, so they are - therefore equivalent. + The operator "!=" is mapped to "<>" on input, so these two names + are always equivalent. </para> <para> At least one of LEFTARG and RIGHTARG must be defined. For @@ -196,11 +232,11 @@ CREATE unary operators only RIGHTARG should be defined. </para> <para> - Also, the + The <replaceable class="parameter">func_name</replaceable> procedure must have been previously defined using <command>CREATE FUNCTION</command> and must be defined to accept the correct number of arguments - (either one or two). + (either one or two) of the indicated types. </para> <para> The commutator operator should be identified if one exists, @@ -247,8 +283,6 @@ MYBOXES.description !== "0,0,1,1"::box does not yet have a commutator itself, then the commutator's entry is updated to have the newly created operator as its commutator. This applies to the negator, as well. - </para> - <para> This is to allow the definition of two operators that are the commutators or the negators of each other. The first operator should be defined without a commutator or negator @@ -258,7 +292,7 @@ MYBOXES.description !== "0,0,1,1"::box it also works to just have both operators refer to each other.) </para> <para> - The next three specifications are present to support the + The HASHES, SORT1, and SORT2 options are present to support the query optimizer in performing joins. <productname>Postgres</productname> can always evaluate a join (i.e., processing a clause with two tuple @@ -294,9 +328,8 @@ MYBOXES.description !== "0,0,1,1"::box be worth the complexity involved. </para> <para> - The last two pieces of the specification are present so - the query optimizer can estimate result sizes. If a - clause of the form: + The RESTRICT and JOIN options assist the query optimizer in estimating + result sizes. If a clause of the form: <programlisting> MYBOXES.description <<< "0,0,1,1"::box </programlisting> @@ -310,7 +343,7 @@ MYBOXES.description <<< "0,0,1,1"::box data types and returns a floating point number. The query optimizer simply calls this function, passing the parameter "0,0,1,1" and multiplies the result by the relation - size to get the desired expected number of instances. + size to get the expected number of instances. </para> <para> Similarly, when the operands of the operator both contain @@ -318,7 +351,7 @@ MYBOXES.description <<< "0,0,1,1"::box size of the resulting join. The function join_proc will return another floating point number which will be multiplied by the cardinalities of the two classes involved to - compute the desired expected result size. + compute the expected result size. </para> <para> The difference between the function diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml index 332464429cd79b3ba7d3eaf986deae491bb56b59..918d91a05cff7e39369792dec4ed3cca6865f90c 100644 --- a/doc/src/sgml/syntax.sgml +++ b/doc/src/sgml/syntax.sgml @@ -315,12 +315,11 @@ UNCOMMITTED UNNAMED <para> A <firstterm>comment</firstterm> - is an arbitrary sequence of characters following double dashes up to the end - of the line. We also support double-slashes as comments, e.g.: + is an arbitrary sequence of characters beginning with double dashes + and extending to the end of the line, e.g.: <programlisting> -- This is a standard SQL comment -// And this is another supported comment style, like C++ </programlisting> We also support C-style block comments, e.g.: @@ -331,6 +330,9 @@ We also support C-style block comments, e.g.: comment */ </programlisting> + +A comment beginning with "/*" extends to the first occurrence of "*/". + </para> </sect1> @@ -340,17 +342,22 @@ We also support C-style block comments, e.g.: <para> Names in SQL are sequences of less than NAMEDATALEN alphanumeric characters, starting with an alphabetic character. By default, NAMEDATALEN is set - to 32, but at the time the system is built, NAMEDATALEN can be changed + to 32 (but at the time the system is built, NAMEDATALEN can be changed by changing the <literal>#define</literal> in - src/backend/include/postgres.h. + src/backend/include/postgres.h). Underscore ("_") is considered an alphabetic character. </para> <para> - In some contexts, names may contain other characters if surrounded - by double quotes. For example, table or column names may contain otherwise - disallowed characters such as spaces, ampersands, etc. using this - technique. + Names containing other characters may be formed by surrounding them + with double quotes. For example, table or column names may contain + otherwise disallowed characters such as spaces, ampersands, etc. if + quoted. Quoting a name also makes it case-sensitive, + whereas unquoted names are always folded to lower case. For example, + the names <literal>FOO</literal>, <literal>foo</literal> + and <literal>"foo"</literal> are + considered the same by <productname>Postgres</productname>, but + <literal>"Foo"</literal> is a different name. </para> </sect1> diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index f972d6ead174f5602bcc5aafd5c7e3a513625947..64a389b7680d4748fd7a1acfcf3cd05a86a96001 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.67 2000/03/13 01:52:06 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.68 2000/03/18 18:03:09 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -87,10 +87,10 @@ static void addlit(char *ytext, int yleng); * and to eliminate parsing troubles for numeric strings. * Exclusive states: * <xb> binary numeric string - thomas 1997-11-16 - * <xc> extended C-style comments - tgl 1997-07-12 - * <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27 + * <xc> extended C-style comments - thomas 1997-07-12 + * <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27 * <xh> hexadecimal numeric string - thomas 1997-11-16 - * <xq> quoted strings - tgl 1997-07-30 + * <xq> quoted strings - thomas 1997-07-30 */ %x xb @@ -144,7 +144,7 @@ xdinside [^"]+ * have something like plus-slash-star, lex will think this is a 3-character * operator whereas we want to see it as a + operator and a comment start. * The solution is two-fold: - * 1. append {op_and_self}* to xcstart so that it matches as much text as + * 1. append {op_chars}* to xcstart so that it matches as much text as * {operator} would. Then the tie-breaker (first matching rule of same * length) ensures xcstart wins. We put back the extra stuff with yyless() * in case it contains a star-slash that should terminate the comment. @@ -154,7 +154,7 @@ xdinside [^"]+ * SQL92-style comments, which start with dash-dash, have similar interactions * with the operator rule. */ -xcstart \/\*{op_and_self}* +xcstart \/\*{op_chars}* xcstop \*+\/ xcinside ([^*]+)|(\*+[^/]) @@ -166,10 +166,19 @@ identifier {letter}{letter_or_digit}* typecast "::" -/* NB: if you change "self", fix the copy in the operator rule too! */ +/* + * "self" is the set of chars that should be returned as single-character + * tokens. "op_chars" is the set of chars that can make up "Op" tokens, + * which can be one or more characters long (but if a single-char token + * appears in the "self" set, it is not to be returned as an Op). Note + * that the sets overlap, but each has some chars that are not in the other. + * + * If you change either set, adjust the character lists appearing in the + * rule for "operator"! + */ self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|] -op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=] -operator {op_and_self}+ +op_chars [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=] +operator {op_chars}+ /* we no longer allow unary minus in numbers. * instead we pass it separately to parser. there it gets @@ -202,7 +211,7 @@ horiz_space [ \t\f] newline [\n\r] non_newline [^\n\r] -comment (("--"|"//"){non_newline}*) +comment ("--"{non_newline}*) whitespace ({space}|{comment}) @@ -220,7 +229,7 @@ other . /* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION. * AT&T lex does not properly handle C-style comments in this second lex block. - * So, put comments here. tgl - 1997-09-08 + * So, put comments here. thomas - 1997-09-08 * * Quoted strings must allow some special characters such as single-quote * and newline. @@ -329,23 +338,57 @@ other . {self} { return yytext[0]; } {operator} { - /* Check for embedded slash-star or dash-dash */ - char *slashstar = strstr((char*)yytext, "/*"); - char *dashdash = strstr((char*)yytext, "--"); + /* + * Check for embedded slash-star or dash-dash; those + * are comment starts, so operator must stop there. + * Note that slash-star or dash-dash at the first + * character will match a prior rule, not this one. + */ + int nchars = yyleng; + char *slashstar = strstr((char*)yytext, "/*"); + char *dashdash = strstr((char*)yytext, "--"); if (slashstar && dashdash) { + /* if both appear, take the first one */ if (slashstar > dashdash) slashstar = dashdash; } else if (!slashstar) slashstar = dashdash; - if (slashstar) + nchars = slashstar - ((char*)yytext); + + /* + * For SQL92 compatibility, '+' and '-' cannot be the + * last char of a multi-char operator unless the operator + * contains chars that are not in SQL92 operators. + * The idea is to lex '=-' as two operators, but not + * to forbid operator names like '?-' that could not be + * sequences of SQL92 operators. + */ + while (nchars > 1 && + (yytext[nchars-1] == '+' || + yytext[nchars-1] == '-')) + { + int ic; + + for (ic = nchars-2; ic >= 0; ic--) + { + if (strchr("~!@#&`?$:%^|", yytext[ic])) + break; + } + if (ic >= 0) + break; /* found a char that makes it OK */ + nchars--; /* else remove the +/-, and check again */ + } + + if (nchars < yyleng) { - int nchars = slashstar - ((char*)yytext); + /* Strip the unwanted chars from the token */ yyless(nchars); - /* If what we have left is only one char, and it's + /* + * If what we have left is only one char, and it's * one of the characters matching "self", then * return it as a character token the same way * that the "self" rule would have. @@ -355,8 +398,9 @@ other . return yytext[0]; } + /* Convert "!=" operator to "<>" for compatibility */ if (strcmp((char*)yytext, "!=") == 0) - yylval.str = pstrdup("<>"); /* compatibility */ + yylval.str = pstrdup("<>"); else yylval.str = pstrdup((char*)yytext); return Op; diff --git a/src/bin/psql/mainloop.c b/src/bin/psql/mainloop.c index 4f71f3e410526fccb728ce0a5d8ce4b52a1a0b1f..eadd50e94af520bdde96313ce3b4604b07294088 100644 --- a/src/bin/psql/mainloop.c +++ b/src/bin/psql/mainloop.c @@ -3,7 +3,7 @@ * * Copyright 2000 by PostgreSQL Global Development Group * - * $Header: /cvsroot/pgsql/src/bin/psql/mainloop.c,v 1.25 2000/03/13 13:46:32 petere Exp $ + * $Header: /cvsroot/pgsql/src/bin/psql/mainloop.c,v 1.26 2000/03/18 18:03:11 tgl Exp $ */ #include "postgres.h" #include "mainloop.h" @@ -318,8 +318,7 @@ MainLoop(FILE *source) } /* single-line comment? truncate line */ - else if ((line[i] == '-' && line[i + thislen] == '-') || - (line[i] == '/' && line[i + thislen] == '/')) + else if (line[i] == '-' && line[i + thislen] == '-') { line[i] = '\0'; /* remove comment */ break; diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index bcc8e6430e201c6c1829dc4893f6df9214ae375b..992b293085b3de09267af30a446fffe0f12355ba 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -12,7 +12,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.55 2000/03/18 05:44:21 tgl Exp $ + * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.56 2000/03/18 18:03:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -86,10 +86,10 @@ static struct _if_value { * and to eliminate parsing troubles for numeric strings. * Exclusive states: * <xb> binary numeric string - thomas 1997-11-16 - * <xc> extended C-style comments - tgl 1997-07-12 - * <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27 + * <xc> extended C-style comments - thomas 1997-07-12 + * <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27 * <xh> hexadecimal numeric string - thomas 1997-11-16 - * <xq> quoted strings - tgl 1997-07-30 + * <xq> quoted strings - thomas 1997-07-30 */ %x xb @@ -146,14 +146,16 @@ xdcqdq \\\" xdcother [^"] xdcinside ({xdcqq}|{xdcqdq}|{xdcother}) -/* C-Style Comments +/* C-style comments + * * The "extended comment" syntax closely resembles allowable operator syntax. * The tricky part here is to get lex to recognize a string starting with * slash-star as a comment, when interpreting it as an operator would produce - * a longer match --- remember lex will prefer a longer match! Also, if we - * have tor whereas we want to see it as a + operator and a comment start. + * a longer match --- remember lex will prefer a longer match! Also, if we + * have something like plus-slash-star, lex will think this is a 3-character + * operator whereas we want to see it as a + operator and a comment start. * The solution is two-fold: - * 1. append {op_and_self}* to xcstart so that it matches as much text as + * 1. append {op_chars}* to xcstart so that it matches as much text as * {operator} would. Then the tie-breaker (first matching rule of same * length) ensures xcstart wins. We put back the extra stuff with yyless() * in case it contains a star-slash that should terminate the comment. @@ -163,22 +165,31 @@ xdcinside ({xdcqq}|{xdcqdq}|{xdcother}) * SQL92-style comments, which start with dash-dash, have similar interactions * with the operator rule. */ -xcstart \/\*{op_and_self}* +xcstart \/\*{op_chars}* xcstop \*+\/ xcinside ([^*]+)|(\*+[^/]) digit [0-9] letter [\200-\377_A-Za-z] -letter_or_digit [\200-\377_A-Za-z0-9] +letter_or_digit [\200-\377_A-Za-z0-9] identifier {letter}{letter_or_digit}* typecast "::" -/* NB: if you change "self", fix the copy in the operator rule too! */ +/* + * "self" is the set of chars that should be returned as single-character + * tokens. "op_chars" is the set of chars that can make up "Op" tokens, + * which can be one or more characters long (but if a single-char token + * appears in the "self" set, it is not to be returned as an Op). Note + * that the sets overlap, but each has some chars that are not in the other. + * + * If you change either set, adjust the character lists appearing in the + * rule for "operator"! + */ self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|] -op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=] -operator {op_and_self}+ +op_chars [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=] +operator {op_chars}+ /* we no longer allow unary minus in numbers. * instead we pass it separately to parser. there it gets @@ -215,7 +226,7 @@ horiz_space [ \t\f] newline [\n\r] non_newline [^\n\r] -comment (("--"|"//"){non_newline}*) +comment ("--"{non_newline}*) whitespace ({space}|{comment}) @@ -250,7 +261,7 @@ cppline {space}*#(.*\\{line_end})*.* /* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION. * AT&T lex does not properly handle C-style comments in this second lex block. - * So, put comments here. tgl - 1997-09-08 + * So, put comments here. thomas - 1997-09-08 * * Quoted strings must allow some special characters such as single-quote * and newline. @@ -294,15 +305,16 @@ cppline {space}*#(.*\\{line_end})*.* mmerror(ET_ERROR, "Bad binary integer input!"); return ICONST; } -<xb><<EOF>> { mmerror(ET_ERROR, "Unterminated binary integer"); } <xh>{xhinside} | <xb>{xbinside} { addlit(yytext, yyleng); } <xh>{xhcat} | -<xb>{xbcat} { /* ignore */ +<xb>{xbcat} { + /* ignore */ } +<xb><<EOF>> { mmerror(ET_ERROR, "Unterminated binary integer"); } <SQL>{xhstart} { BEGIN(xh); @@ -367,23 +379,57 @@ cppline {space}*#(.*\\{line_end})*.* return yytext[0]; } <SQL>{operator} { - /* Check for embedded slash-star or dash-dash */ - char *slashstar = strstr((char*)yytext, "/*"); - char *dashdash = strstr((char*)yytext, "--"); + /* + * Check for embedded slash-star or dash-dash; those + * are comment starts, so operator must stop there. + * Note that slash-star or dash-dash at the first + * character will match a prior rule, not this one. + */ + int nchars = yyleng; + char *slashstar = strstr((char*)yytext, "/*"); + char *dashdash = strstr((char*)yytext, "--"); if (slashstar && dashdash) { + /* if both appear, take the first one */ if (slashstar > dashdash) slashstar = dashdash; } else if (!slashstar) slashstar = dashdash; - if (slashstar) + nchars = slashstar - ((char*)yytext); + + /* + * For SQL92 compatibility, '+' and '-' cannot be the + * last char of a multi-char operator unless the operator + * contains chars that are not in SQL92 operators. + * The idea is to lex '=-' as two operators, but not + * to forbid operator names like '?-' that could not be + * sequences of SQL92 operators. + */ + while (nchars > 1 && + (yytext[nchars-1] == '+' || + yytext[nchars-1] == '-')) + { + int ic; + + for (ic = nchars-2; ic >= 0; ic--) + { + if (strchr("~!@#&`?$:%^|", yytext[ic])) + break; + } + if (ic >= 0) + break; /* found a char that makes it OK */ + nchars--; /* else remove the +/-, and check again */ + } + + if (nchars < yyleng) { - int nchars = slashstar - ((char*)yytext); + /* Strip the unwanted chars from the token */ yyless(nchars); - /* If what we have left is only one char, and it's + /* + * If what we have left is only one char, and it's * one of the characters matching "self", then * return it as a character token the same way * that the "self" rule would have. @@ -393,8 +439,9 @@ cppline {space}*#(.*\\{line_end})*.* return yytext[0]; } + /* Convert "!=" operator to "<>" for compatibility */ if (strcmp((char*)yytext, "!=") == 0) - yylval.str = mm_strdup("<>"); /* compatability */ + yylval.str = mm_strdup("<>"); else yylval.str = mm_strdup((char*)yytext); return Op;