From 2c0556068fc308ed9cce06c85de7e42305d34b86 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <peter_e@gmx.net>
Date: Thu, 15 May 2003 15:50:21 +0000
Subject: [PATCH] Indexing support for pattern matching operations via separate
 operator class when lc_collate is not C.

---
 doc/src/sgml/charset.sgml                |  16 +--
 doc/src/sgml/indices.sgml                |  45 ++++++-
 doc/src/sgml/release.sgml                |   3 +-
 doc/src/sgml/runtime.sgml                |  29 ++---
 src/backend/access/nbtree/nbtcompare.c   |  11 +-
 src/backend/optimizer/path/indxpath.c    |  81 +++++++------
 src/backend/utils/adt/name.c             |  61 +++++++++-
 src/backend/utils/adt/selfuncs.c         | 121 +++----------------
 src/backend/utils/adt/varlena.c          | 145 ++++++++++++++++++++++-
 src/bin/initdb/initdb.sh                 |   9 +-
 src/include/catalog/catversion.h         |   4 +-
 src/include/catalog/pg_amop.h            |  42 ++++++-
 src/include/catalog/pg_amproc.h          |   6 +-
 src/include/catalog/pg_opclass.h         |   6 +-
 src/include/catalog/pg_operator.h        |  33 +++++-
 src/include/catalog/pg_proc.h            |  35 +++++-
 src/include/utils/builtins.h             |  16 ++-
 src/include/utils/selfuncs.h             |   3 +-
 src/test/regress/expected/opr_sanity.out |  19 +--
 src/test/regress/sql/opr_sanity.sql      |  10 +-
 20 files changed, 488 insertions(+), 207 deletions(-)

diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index 8c2af90770d..213f3a8f62b 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -1,4 +1,4 @@
-<!-- $Header: /cvsroot/pgsql/doc/src/sgml/charset.sgml,v 2.35 2003/04/15 13:26:54 petere Exp $ -->
+<!-- $Header: /cvsroot/pgsql/doc/src/sgml/charset.sgml,v 2.36 2003/05/15 15:50:18 petere Exp $ -->
 
 <chapter id="charset">
  <title>Localization</>
@@ -213,23 +213,13 @@ initdb --locale=sv_SE
        The <function>to_char</> family of functions
       </para>
      </listitem>
-
-     <listitem>
-      <para>
-       The <literal>LIKE</> and <literal>~</> operators for pattern
-       matching
-      </para>
-     </listitem>
     </itemizedlist>
    </para>
 
    <para>
     The only severe drawback of using the locale support in
-    <productname>PostgreSQL</> is its speed.  So use locales only if you
-    actually need it.  It should be noted in particular that selecting
-    a non-C locale disables index optimizations for <literal>LIKE</> and
-    <literal>~</> operators, which can make a huge difference in the
-    speed of searches that use those operators.
+    <productname>PostgreSQL</> is its speed.  So use locales only if
+    you actually need them.
    </para>
   </sect2>
 
diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml
index d900b941fa9..fcd7108a14c 100644
--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
@@ -1,4 +1,4 @@
-<!-- $Header: /cvsroot/pgsql/doc/src/sgml/indices.sgml,v 1.40 2003/03/25 16:15:36 petere Exp $ -->
+<!-- $Header: /cvsroot/pgsql/doc/src/sgml/indices.sgml,v 1.41 2003/05/15 15:50:18 petere Exp $ -->
 
 <chapter id="indexes">
  <title id="indexes-title">Indexes</title>
@@ -132,6 +132,19 @@ CREATE INDEX test1_id_index ON test1 (id);
    </simplelist>
   </para>
 
+  <para>
+   The optimizer can also use a B-tree index for queries involving the
+   pattern matching operators <literal>LIKE</>,
+   <literal>ILIKE</literal>, <literal>~</literal>, and
+   <literal>~*</literal>, <emphasis>if</emphasis> the pattern is
+   anchored to the beginning of the string, e.g., <literal>col LIKE
+   'foo%'</literal> or <literal>col ~ '^foo'</literal>, but not
+   <literal>col LIKE '%bar'</literal>.  However, if your server does
+   not use the C locale you will need to create the index with a
+   special operator class.  See <xref linkend="indexes-opclass">
+   below.
+  </para>
+
   <para>
    <indexterm>
     <primary>indexes</primary>
@@ -405,6 +418,36 @@ CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable>
       <literal>bigbox_ops</literal>.
      </para>
     </listitem>
+
+    <listitem>
+     <para>
+      The operator classes <literal>text_pattern_ops</literal>,
+      <literal>varchar_pattern_ops</literal>,
+      <literal>bpchar_pattern_ops</literal>, and
+      <literal>name_pattern_ops</literal> support B-tree indexes on
+      the types <type>text</type>, <type>varchar</type>,
+      <type>char</type>, and <type>name</type>, respectively.  The
+      difference to the ordinary operator classes is that the values
+      are compared strictly character by character rather than
+      according to the locale-specific collation rules.  This makes
+      these operator classes suitable for use by queries involving
+      pattern matching expressions (<literal>LIKE</literal> or POSIX
+      regular expressions) if the server does not use the standard
+      <quote>C</quote> locale.  As an example, to index a
+      <type>varchar</type> column like this:
+<programlisting>
+CREATE INDEX test_index ON test_table (col varchar_pattern_ops);
+</programlisting>
+      If you do use the C locale, you should instead create an index
+      with the default operator class.  Also note that you should
+      create an index with the default operator class if you want
+      queries involving ordinary comparisons to use an index.  Such
+      queries cannot use the
+      <literal><replaceable>xxx</replaceable>_pattern_ops</literal>
+      operator classes.  It is possible, however, to create multiple
+      indexes on the same column with different operator classes.
+     </para>
+    </listitem>
    </itemizedlist>
   </para>
 
diff --git a/doc/src/sgml/release.sgml b/doc/src/sgml/release.sgml
index 9332ac499b4..9d9b758e89c 100644
--- a/doc/src/sgml/release.sgml
+++ b/doc/src/sgml/release.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/release.sgml,v 1.187 2003/05/14 03:25:59 tgl Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/release.sgml,v 1.188 2003/05/15 15:50:18 petere Exp $
 -->
 
 <appendix id="release">
@@ -24,6 +24,7 @@ CDATA means the content is "SGML-free", so you can write without
 worries about funny characters.
 -->
 <literallayout><![CDATA[
+Pattern matching operations can use indexes regardless of locale
 New frontend/backend protocol supports many long-requested features
 SET AUTOCOMMIT TO OFF is no longer supported
 Reimplementation of NUMERIC datatype for more speed
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index b79f8cff62e..f20a8931e3d 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.179 2003/05/14 03:26:00 tgl Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/runtime.sgml,v 1.180 2003/05/15 15:50:18 petere Exp $
 -->
 
 <Chapter Id="runtime">
@@ -133,26 +133,13 @@ postgres$ <userinput>initdb -D /usr/local/pgsql/data</userinput>
   </para>
 
   <para>
-   <command>initdb</command> also initializes the default locale<indexterm><primary>locale</></> for
-   the database cluster.  Normally, it will just take the locale
-   settings in the environment and apply them to the initialized
-   database.  It is possible to specify a different locale for the
-   database; more information about that can be found in <xref
-   linkend="locale">.  One surprise you might encounter while running
-   <command>initdb</command> is a notice similar to this:
-<screen>
-The database cluster will be initialized with locale de_DE.
-This locale setting will prevent the use of indexes for pattern matching
-operations.  If that is a concern, rerun initdb with the collation order
-set to "C".  For more information see the documentation.
-</screen>
-   This is intended to warn you that the currently selected locale
-   will cause indexes to be sorted in an order that prevents them from
-   being used for <literal>LIKE</> and regular-expression searches. If you need
-   good performance in such searches, you should set your current
-   locale to <literal>C</> and re-run <command>initdb</command>, e.g.,
-   by running <literal>initdb --lc-collate=C</literal>. The sort
-   order used within a particular database cluster is set by
+   <command>initdb</command> also initializes the default
+   locale<indexterm><primary>locale</></> for the database cluster.
+   Normally, it will just take the locale settings in the environment
+   and apply them to the initialized database.  It is possible to
+   specify a different locale for the database; more information about
+   that can be found in <xref linkend="locale">.  The sort order used
+   within a particular database cluster is set by
    <command>initdb</command> and cannot be changed later, short of
    dumping all data, rerunning <command>initdb</command>, and
    reloading the data. So it's important to make this choice correctly
diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c
index 2823ee4207d..f8c479677f9 100644
--- a/src/backend/access/nbtree/nbtcompare.c
+++ b/src/backend/access/nbtree/nbtcompare.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.44 2002/06/20 20:29:25 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.45 2003/05/15 15:50:18 petere Exp $
  *
  * NOTES
  *
@@ -149,3 +149,12 @@ btnamecmp(PG_FUNCTION_ARGS)
 
 	PG_RETURN_INT32(strncmp(NameStr(*a), NameStr(*b), NAMEDATALEN));
 }
+
+Datum
+btname_pattern_cmp(PG_FUNCTION_ARGS)
+{
+	Name		a = PG_GETARG_NAME(0);
+	Name		b = PG_GETARG_NAME(1);
+
+	PG_RETURN_INT32(memcmp(NameStr(*a), NameStr(*b), NAMEDATALEN));
+}
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 8254c6b0391..e8ba0b67c11 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.137 2003/05/13 04:38:58 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.138 2003/05/15 15:50:18 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1797,14 +1797,13 @@ match_special_index_operator(Expr *clause, Oid opclass,
 		case OID_VARCHAR_LIKE_OP:
 		case OID_NAME_LIKE_OP:
 			/* the right-hand const is type text for all of these */
-			if (locale_is_like_safe())
-				isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like,
-								  &prefix, &rest) != Pattern_Prefix_None;
+			isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like,
+											   &prefix, &rest) != Pattern_Prefix_None;
 			break;
 
 		case OID_BYTEA_LIKE_OP:
 			isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like,
-								  &prefix, &rest) != Pattern_Prefix_None;
+											   &prefix, &rest) != Pattern_Prefix_None;
 			break;
 
 		case OID_TEXT_ICLIKE_OP:
@@ -1812,9 +1811,8 @@ match_special_index_operator(Expr *clause, Oid opclass,
 		case OID_VARCHAR_ICLIKE_OP:
 		case OID_NAME_ICLIKE_OP:
 			/* the right-hand const is type text for all of these */
-			if (locale_is_like_safe())
-				isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
-								  &prefix, &rest) != Pattern_Prefix_None;
+			isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
+											   &prefix, &rest) != Pattern_Prefix_None;
 			break;
 
 		case OID_TEXT_REGEXEQ_OP:
@@ -1822,9 +1820,8 @@ match_special_index_operator(Expr *clause, Oid opclass,
 		case OID_VARCHAR_REGEXEQ_OP:
 		case OID_NAME_REGEXEQ_OP:
 			/* the right-hand const is type text for all of these */
-			if (locale_is_like_safe())
-				isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex,
-								  &prefix, &rest) != Pattern_Prefix_None;
+			isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex,
+											   &prefix, &rest) != Pattern_Prefix_None;
 			break;
 
 		case OID_TEXT_ICREGEXEQ_OP:
@@ -1832,9 +1829,8 @@ match_special_index_operator(Expr *clause, Oid opclass,
 		case OID_VARCHAR_ICREGEXEQ_OP:
 		case OID_NAME_ICREGEXEQ_OP:
 			/* the right-hand const is type text for all of these */
-			if (locale_is_like_safe())
-				isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
-								  &prefix, &rest) != Pattern_Prefix_None;
+			isIndexable = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
+											   &prefix, &rest) != Pattern_Prefix_None;
 			break;
 
 		case OID_INET_SUB_OP:
@@ -1867,42 +1863,53 @@ match_special_index_operator(Expr *clause, Oid opclass,
 		case OID_TEXT_ICLIKE_OP:
 		case OID_TEXT_REGEXEQ_OP:
 		case OID_TEXT_ICREGEXEQ_OP:
-			if (!op_in_opclass(find_operator(">=", TEXTOID), opclass) ||
-				!op_in_opclass(find_operator("<", TEXTOID), opclass))
-				isIndexable = false;
+			if (lc_collate_is_c())
+				isIndexable = (op_in_opclass(find_operator(">=", TEXTOID), opclass)
+							   && op_in_opclass(find_operator("<", TEXTOID), opclass));
+			else
+				isIndexable = (op_in_opclass(find_operator("~>=~", TEXTOID), opclass)
+							   && op_in_opclass(find_operator("~<~", TEXTOID), opclass));
 			break;
 
 		case OID_BYTEA_LIKE_OP:
-			if (!op_in_opclass(find_operator(">=", BYTEAOID), opclass) ||
-				!op_in_opclass(find_operator("<", BYTEAOID), opclass))
-				isIndexable = false;
+			isIndexable = (op_in_opclass(find_operator(">=", BYTEAOID), opclass)
+						   && op_in_opclass(find_operator("<", BYTEAOID), opclass));
 			break;
 
 		case OID_BPCHAR_LIKE_OP:
 		case OID_BPCHAR_ICLIKE_OP:
 		case OID_BPCHAR_REGEXEQ_OP:
 		case OID_BPCHAR_ICREGEXEQ_OP:
-			if (!op_in_opclass(find_operator(">=", BPCHAROID), opclass) ||
-				!op_in_opclass(find_operator("<", BPCHAROID), opclass))
-				isIndexable = false;
+			if (lc_collate_is_c())
+				isIndexable = (op_in_opclass(find_operator(">=", BPCHAROID), opclass)
+							   && op_in_opclass(find_operator("<", BPCHAROID), opclass));
+			else
+				isIndexable = (op_in_opclass(find_operator("~>=~", BPCHAROID), opclass)
+							   && op_in_opclass(find_operator("~<~", BPCHAROID), opclass));
 			break;
 
 		case OID_VARCHAR_LIKE_OP:
 		case OID_VARCHAR_ICLIKE_OP:
 		case OID_VARCHAR_REGEXEQ_OP:
 		case OID_VARCHAR_ICREGEXEQ_OP:
-			if (!op_in_opclass(find_operator(">=", VARCHAROID), opclass) ||
-				!op_in_opclass(find_operator("<", VARCHAROID), opclass))
-				isIndexable = false;
+			if (lc_collate_is_c())
+				isIndexable = (op_in_opclass(find_operator(">=", VARCHAROID), opclass)
+							   && op_in_opclass(find_operator("<", VARCHAROID), opclass));
+			else
+				isIndexable = (op_in_opclass(find_operator("~>=~", VARCHAROID), opclass)
+							   && op_in_opclass(find_operator("~<~", VARCHAROID), opclass));
 			break;
 
 		case OID_NAME_LIKE_OP:
 		case OID_NAME_ICLIKE_OP:
 		case OID_NAME_REGEXEQ_OP:
 		case OID_NAME_ICREGEXEQ_OP:
-			if (!op_in_opclass(find_operator(">=", NAMEOID), opclass) ||
-				!op_in_opclass(find_operator("<", NAMEOID), opclass))
-				isIndexable = false;
+			if (lc_collate_is_c())
+				isIndexable = (op_in_opclass(find_operator(">=", NAMEOID), opclass)
+							   && op_in_opclass(find_operator("<", NAMEOID), opclass));
+			else
+				isIndexable = (op_in_opclass(find_operator("~>=~", NAMEOID), opclass)
+							   && op_in_opclass(find_operator("~<~", NAMEOID), opclass));
 			break;
 
 		case OID_INET_SUB_OP:
@@ -2039,6 +2046,7 @@ prefix_quals(Node *leftop, Oid expr_op,
 	List	   *result;
 	Oid			datatype;
 	Oid			oproid;
+	const char *oprname;
 	char	   *prefix;
 	Const	   *con;
 	Expr	   *expr;
@@ -2098,9 +2106,10 @@ prefix_quals(Node *leftop, Oid expr_op,
 	 */
 	if (pstatus == Pattern_Prefix_Exact)
 	{
-		oproid = find_operator("=", datatype);
+		oprname = (datatype == BYTEAOID || lc_collate_is_c() ? "=" : "~=~");
+		oproid = find_operator(oprname, datatype);
 		if (oproid == InvalidOid)
-			elog(ERROR, "prefix_quals: no = operator for type %u", datatype);
+			elog(ERROR, "prefix_quals: no operator %s for type %u", oprname, datatype);
 		con = string_to_const(prefix, datatype);
 		expr = make_opclause(oproid, BOOLOID, false,
 							 (Expr *) leftop, (Expr *) con);
@@ -2113,9 +2122,10 @@ prefix_quals(Node *leftop, Oid expr_op,
 	 *
 	 * We can always say "x >= prefix".
 	 */
-	oproid = find_operator(">=", datatype);
+	oprname = (datatype == BYTEAOID || lc_collate_is_c() ? ">=" : "~>=~");
+	oproid = find_operator(oprname, datatype);
 	if (oproid == InvalidOid)
-		elog(ERROR, "prefix_quals: no >= operator for type %u", datatype);
+		elog(ERROR, "prefix_quals: no operator %s for type %u", oprname, datatype);
 	con = string_to_const(prefix, datatype);
 	expr = make_opclause(oproid, BOOLOID, false,
 						 (Expr *) leftop, (Expr *) con);
@@ -2129,9 +2139,10 @@ prefix_quals(Node *leftop, Oid expr_op,
 	greaterstr = make_greater_string(con);
 	if (greaterstr)
 	{
-		oproid = find_operator("<", datatype);
+		oprname = (datatype == BYTEAOID || lc_collate_is_c() ? "<" : "~<~");
+		oproid = find_operator(oprname, datatype);
 		if (oproid == InvalidOid)
-			elog(ERROR, "prefix_quals: no < operator for type %u", datatype);
+			elog(ERROR, "prefix_quals: no operator %s for type %u", oprname, datatype);
 		expr = make_opclause(oproid, BOOLOID, false,
 							 (Expr *) leftop, (Expr *) greaterstr);
 		result = lappend(result, expr);
diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c
index b7a56cb1cb1..37dca0b0c63 100644
--- a/src/backend/utils/adt/name.c
+++ b/src/backend/utils/adt/name.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/name.c,v 1.45 2003/05/09 21:19:49 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/name.c,v 1.46 2003/05/15 15:50:18 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -182,6 +182,65 @@ namege(PG_FUNCTION_ARGS)
 }
 
 
+/*
+ * comparison routines for LIKE indexing support
+ */
+
+Datum
+name_pattern_eq(PG_FUNCTION_ARGS)
+{
+	Name		arg1 = PG_GETARG_NAME(0);
+	Name		arg2 = PG_GETARG_NAME(1);
+
+	PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) == 0);
+}
+
+Datum
+name_pattern_ne(PG_FUNCTION_ARGS)
+{
+	Name		arg1 = PG_GETARG_NAME(0);
+	Name		arg2 = PG_GETARG_NAME(1);
+
+	PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) != 0);
+}
+
+Datum
+name_pattern_lt(PG_FUNCTION_ARGS)
+{
+	Name		arg1 = PG_GETARG_NAME(0);
+	Name		arg2 = PG_GETARG_NAME(1);
+
+	PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) < 0);
+}
+
+Datum
+name_pattern_le(PG_FUNCTION_ARGS)
+{
+	Name		arg1 = PG_GETARG_NAME(0);
+	Name		arg2 = PG_GETARG_NAME(1);
+
+	PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) <= 0);
+}
+
+Datum
+name_pattern_gt(PG_FUNCTION_ARGS)
+{
+	Name		arg1 = PG_GETARG_NAME(0);
+	Name		arg2 = PG_GETARG_NAME(1);
+
+	PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) > 0);
+}
+
+Datum
+name_pattern_ge(PG_FUNCTION_ARGS)
+{
+	Name		arg1 = PG_GETARG_NAME(0);
+	Name		arg2 = PG_GETARG_NAME(1);
+
+	PG_RETURN_BOOL(memcmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) >= 0);
+}
+
+
 /* (see char.c for comparison/operation routines) */
 
 int
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 729d085c3ca..5ff4b1931da 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.136 2003/04/16 04:37:58 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.137 2003/05/15 15:50:18 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -180,8 +180,6 @@ static void get_join_vars(List *args, Var **var1, Var **var2);
 static Selectivity prefix_selectivity(Query *root, Var *var, Oid vartype,
 									  Const *prefix);
 static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
-static bool string_lessthan(const char *str1, const char *str2,
-				Oid datatype);
 static Oid	find_operator(const char *opname, Oid datatype);
 static Datum string_to_datum(const char *str, Oid datatype);
 static Const *string_to_const(const char *str, Oid datatype);
@@ -3619,51 +3617,21 @@ pattern_selectivity(Const *patt, Pattern_Type ptype)
 
 
 /*
- * We want to test whether the database's LC_COLLATE setting is safe for
- * LIKE/regexp index optimization.
+ * Try to generate a string greater than the given string or any
+ * string it is a prefix of.  If successful, return a palloc'd string;
+ * else return NULL.
  *
  * The key requirement here is that given a prefix string, say "foo",
  * we must be able to generate another string "fop" that is greater
- * than all strings "foobar" starting with "foo".  Unfortunately, a
- * non-C locale may have arbitrary collation rules in which "fop" >
- * "foo" is not sufficient to ensure "fop" > "foobar".	Until we can
- * come up with a more bulletproof way of generating the upper-bound
- * string, the optimization is disabled in all non-C locales.
+ * than all strings "foobar" starting with "foo".
  *
- * (In theory, locales other than C may be LIKE-safe so this function
- * could be different from lc_collate_is_c(), but in a different
- * theory, non-C locales are completely unpredictable so it's unlikely
- * to happen.)
+ * If we max out the righthand byte, truncate off the last character
+ * and start incrementing the next.  For example, if "z" were the last
+ * character in the sort order, then we could produce "foo" as a
+ * string greater than "fonz".
  *
- * Be sure to maintain the correspondence with the code in initdb.
- */
-bool
-locale_is_like_safe(void)
-{
-	return lc_collate_is_c();
-}
-
-/*
- * Try to generate a string greater than the given string or any string it is
- * a prefix of.  If successful, return a palloc'd string; else return NULL.
- *
- * To work correctly in non-ASCII locales with weird collation orders,
- * we cannot simply increment "foo" to "fop" --- we have to check whether
- * we actually produced a string greater than the given one.  If not,
- * increment the righthand byte again and repeat.  If we max out the righthand
- * byte, truncate off the last character and start incrementing the next.
- * For example, if "z" were the last character in the sort order, then we
- * could produce "foo" as a string greater than "fonz".
- *
- * This could be rather slow in the worst case, but in most cases we won't
- * have to try more than one or two strings before succeeding.
- *
- * XXX this is actually not sufficient, since it only copes with the case
- * where individual characters collate in an order different from their
- * numeric code assignments.  It does not handle cases where there are
- * cross-character effects, such as specially sorted digraphs, multiple
- * sort passes, etc.  For now, we just shut down the whole thing in locales
- * that do such things :-(
+ * This could be rather slow in the worst case, but in most cases we
+ * won't have to try more than one or two strings before succeeding.
  */
 Const *
 make_greater_string(const Const *str_const)
@@ -3699,18 +3667,16 @@ make_greater_string(const Const *str_const)
 		/*
 		 * Try to generate a larger string by incrementing the last byte.
 		 */
-		while (*lastchar < (unsigned char) 255)
+		if (*lastchar < (unsigned char) 255)
 		{
+			Const	   *workstr_const;
+
 			(*lastchar)++;
-			if (string_lessthan(str, workstr, datatype))
-			{
-				/* Success! */
-				Const	   *workstr_const = string_to_const(workstr, datatype);
+			workstr_const = string_to_const(workstr, datatype);
 
-				pfree(str);
-				pfree(workstr);
-				return workstr_const;
-			}
+			pfree(str);
+			pfree(workstr);
+			return workstr_const;
 		}
 
 		/* restore last byte so we don't confuse pg_mbcliplen */
@@ -3736,57 +3702,6 @@ make_greater_string(const Const *str_const)
 	return (Const *) NULL;
 }
 
-/*
- * Test whether two strings are "<" according to the rules of the given
- * datatype.  We do this the hard way, ie, actually calling the type's
- * "<" operator function, to ensure we get the right result...
- */
-static bool
-string_lessthan(const char *str1, const char *str2, Oid datatype)
-{
-	Datum		datum1 = string_to_datum(str1, datatype);
-	Datum		datum2 = string_to_datum(str2, datatype);
-	bool		result;
-
-	switch (datatype)
-	{
-		case TEXTOID:
-			result = DatumGetBool(DirectFunctionCall2(text_lt,
-													  datum1, datum2));
-			break;
-
-		case BPCHAROID:
-			result = DatumGetBool(DirectFunctionCall2(bpcharlt,
-													  datum1, datum2));
-			break;
-
-		case VARCHAROID:
-			result = DatumGetBool(DirectFunctionCall2(varcharlt,
-													  datum1, datum2));
-			break;
-
-		case NAMEOID:
-			result = DatumGetBool(DirectFunctionCall2(namelt,
-													  datum1, datum2));
-			break;
-
-		case BYTEAOID:
-			result = DatumGetBool(DirectFunctionCall2(bytealt,
-													  datum1, datum2));
-			break;
-
-		default:
-			elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
-			result = false;
-			break;
-	}
-
-	pfree(DatumGetPointer(datum1));
-	pfree(DatumGetPointer(datum2));
-
-	return result;
-}
-
 /* See if there is a binary op of the given name for the given datatype */
 /* NB: we assume that only built-in system operators are searched for */
 static Oid
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 2a5f97ff028..6be21d241f1 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.97 2003/05/09 15:44:40 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.98 2003/05/15 15:50:19 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1050,6 +1050,149 @@ text_smaller(PG_FUNCTION_ARGS)
 	PG_RETURN_TEXT_P(result);
 }
 
+
+/*
+ * The following operators support character-by-character comparison
+ * of text data types, to allow building indexes suitable for LIKE
+ * clauses.
+ */
+
+static int
+internal_text_pattern_compare(text *arg1, text *arg2)
+{
+	int result;
+
+	result = memcmp(VARDATA(arg1), VARDATA(arg2),
+					Min(VARSIZE(arg1), VARSIZE(arg2)) - VARHDRSZ);
+	if (result != 0)
+		return result;
+	else if (VARSIZE(arg1) < VARSIZE(arg2))
+		return -1;
+	else if (VARSIZE(arg1) > VARSIZE(arg2))
+		return 1;
+	else
+		return 0;
+}
+
+
+Datum
+text_pattern_lt(PG_FUNCTION_ARGS)
+{
+	text	   *arg1 = PG_GETARG_TEXT_P(0);
+	text	   *arg2 = PG_GETARG_TEXT_P(1);
+	int			result;
+
+	result = internal_text_pattern_compare(arg1, arg2);
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_BOOL(result < 0);
+}
+
+
+Datum
+text_pattern_le(PG_FUNCTION_ARGS)
+{
+	text	   *arg1 = PG_GETARG_TEXT_P(0);
+	text	   *arg2 = PG_GETARG_TEXT_P(1);
+	int			result;
+
+	result = internal_text_pattern_compare(arg1, arg2);
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_BOOL(result <= 0);
+}
+
+
+Datum
+text_pattern_eq(PG_FUNCTION_ARGS)
+{
+	text	   *arg1 = PG_GETARG_TEXT_P(0);
+	text	   *arg2 = PG_GETARG_TEXT_P(1);
+	int			result;
+
+	if (VARSIZE(arg1) != VARSIZE(arg2))
+		result = 1;
+	else
+		result = internal_text_pattern_compare(arg1, arg2);
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_BOOL(result == 0);
+}
+
+
+Datum
+text_pattern_ge(PG_FUNCTION_ARGS)
+{
+	text	   *arg1 = PG_GETARG_TEXT_P(0);
+	text	   *arg2 = PG_GETARG_TEXT_P(1);
+	int			result;
+
+	result = internal_text_pattern_compare(arg1, arg2);
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_BOOL(result >= 0);
+}
+
+
+Datum
+text_pattern_gt(PG_FUNCTION_ARGS)
+{
+	text	   *arg1 = PG_GETARG_TEXT_P(0);
+	text	   *arg2 = PG_GETARG_TEXT_P(1);
+	int			result;
+
+	result = internal_text_pattern_compare(arg1, arg2);
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_BOOL(result > 0);
+}
+
+
+Datum
+text_pattern_ne(PG_FUNCTION_ARGS)
+{
+	text	   *arg1 = PG_GETARG_TEXT_P(0);
+	text	   *arg2 = PG_GETARG_TEXT_P(1);
+	int			result;
+
+	if (VARSIZE(arg1) != VARSIZE(arg2))
+		result = 1;
+	else
+		result = internal_text_pattern_compare(arg1, arg2);
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_BOOL(result != 0);
+}
+
+
+Datum
+bttext_pattern_cmp(PG_FUNCTION_ARGS)
+{
+	text	   *arg1 = PG_GETARG_TEXT_P(0);
+	text	   *arg2 = PG_GETARG_TEXT_P(1);
+	int			result;
+
+	result = internal_text_pattern_compare(arg1, arg2);
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_INT32(result);
+}
+
+
 /*-------------------------------------------------------------
  * byteaoctetlen
  *
diff --git a/src/bin/initdb/initdb.sh b/src/bin/initdb/initdb.sh
index 079ceb076b2..47290529dfb 100644
--- a/src/bin/initdb/initdb.sh
+++ b/src/bin/initdb/initdb.sh
@@ -27,7 +27,7 @@
 # Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 # Portions Copyright (c) 1994, Regents of the University of California
 #
-# $Header: /cvsroot/pgsql/src/bin/initdb/Attic/initdb.sh,v 1.188 2003/04/04 03:03:53 tgl Exp $
+# $Header: /cvsroot/pgsql/src/bin/initdb/Attic/initdb.sh,v 1.189 2003/05/15 15:50:19 petere Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -478,13 +478,6 @@ else
     echo "    COLLATE:  `pg_getlocale COLLATE`${TAB}CTYPE:   `pg_getlocale CTYPE`${TAB}MESSAGES: `pg_getlocale MESSAGES`"
     echo "    MONETARY: `pg_getlocale MONETARY`${TAB}NUMERIC: `pg_getlocale NUMERIC`${TAB}TIME:     `pg_getlocale TIME`"
 fi
-
-# (Be sure to maintain the correspondence with locale_is_like_safe() in selfuncs.c.)
-if test x`pg_getlocale COLLATE` != xC && test x`pg_getlocale COLLATE` != xPOSIX; then
-    echo "This locale setting will prevent the use of indexes for pattern matching"
-    echo "operations.  If that is a concern, rerun $CMDNAME with the collation order"
-    echo "set to \"C\".  For more information see the documentation."
-fi
 echo
 
 
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index bb4b824c9bc..5accd1dc5cc 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.193 2003/05/13 18:03:07 tgl Exp $
+ * $Id: catversion.h,v 1.194 2003/05/15 15:50:19 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200305131
+#define CATALOG_VERSION_NO	200305151
 
 #endif
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 445033ecd0d..b373ce15a22 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -16,7 +16,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_amop.h,v 1.47 2003/03/10 22:28:19 tgl Exp $
+ * $Id: pg_amop.h,v 1.48 2003/05/15 15:50:19 petere Exp $
  *
  * NOTES
  *	 the genbki.sh script reads this file and generates .bki
@@ -378,6 +378,46 @@ DATA(insert (	2002 3 f 1804 ));
 DATA(insert (	2002 4 f 1809 ));
 DATA(insert (	2002 5 f 1807 ));
 
+/*
+ *	btree text pattern
+ */
+
+DATA(insert (	2095 1 f 2314 ));
+DATA(insert (	2095 2 f 2315 ));
+DATA(insert (	2095 3 f 2316 ));
+DATA(insert (	2095 4 f 2317 ));
+DATA(insert (	2095 5 f 2318 ));
+
+/*
+ *	btree varchar pattern
+ */
+
+DATA(insert (	2096 1 f 2320 ));
+DATA(insert (	2096 2 f 2321 ));
+DATA(insert (	2096 3 f 2322 ));
+DATA(insert (	2096 4 f 2323 ));
+DATA(insert (	2096 5 f 2324 ));
+
+/*
+ *	btree bpchar pattern
+ */
+
+DATA(insert (	2097 1 f 2326 ));
+DATA(insert (	2097 2 f 2327 ));
+DATA(insert (	2097 3 f 2328 ));
+DATA(insert (	2097 4 f 2329 ));
+DATA(insert (	2097 5 f 2330 ));
+
+/*
+ *	btree name pattern
+ */
+
+DATA(insert (	2098 1 f 2332 ));
+DATA(insert (	2098 2 f 2333 ));
+DATA(insert (	2098 3 f 2334 ));
+DATA(insert (	2098 4 f 2335 ));
+DATA(insert (	2098 5 f 2336 ));
+
 
 /*
  *	hash index _ops
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index 6f4100a7490..ba15ac9a660 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -14,7 +14,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_amproc.h,v 1.35 2002/06/20 20:29:44 momjian Exp $
+ * $Id: pg_amproc.h,v 1.36 2003/05/15 15:50:19 petere Exp $
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -105,6 +105,10 @@ DATA(insert (	2000 1 1358 ));
 DATA(insert (	2002 1 1672 ));
 DATA(insert (	2003 1 1079 ));
 DATA(insert (	2039 1 1314 ));
+DATA(insert (	2095 1 2166 ));
+DATA(insert (	2096 1 2173 ));
+DATA(insert (	2097 1 2180 ));
+DATA(insert (	2098 1 2187 ));
 
 
 /* hash */
diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h
index bedd9d72e08..29c92f5ddfb 100644
--- a/src/include/catalog/pg_opclass.h
+++ b/src/include/catalog/pg_opclass.h
@@ -26,7 +26,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_opclass.h,v 1.47 2003/03/10 22:28:19 tgl Exp $
+ * $Id: pg_opclass.h,v 1.48 2003/05/15 15:50:19 petere Exp $
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -140,5 +140,9 @@ DATA(insert OID = 2003 (	403		varchar_ops		PGNSP PGUID 1043 t 0 ));
 DATA(insert OID = 2004 (	405		varchar_ops		PGNSP PGUID 1043 t 0 ));
 DATA(insert OID = 2039 (	403		timestamp_ops	PGNSP PGUID 1114 t 0 ));
 DATA(insert OID = 2040 (	405		timestamp_ops	PGNSP PGUID 1114 t 0 ));
+DATA(insert OID = 2095 (	403		text_pattern_ops	PGNSP PGUID   25 f 0 ));
+DATA(insert OID = 2096 (	403		varchar_pattern_ops	PGNSP PGUID 1043 f 0 ));
+DATA(insert OID = 2097 (	403		bpchar_pattern_ops	PGNSP PGUID 1042 f 0 ));
+DATA(insert OID = 2098 (	403		name_pattern_ops	PGNSP PGUID   19 f 0 ));
 
 #endif   /* PG_OPCLASS_H */
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index 129db7f7602..2fe0da5fbe9 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_operator.h,v 1.112 2003/04/08 23:20:03 tgl Exp $
+ * $Id: pg_operator.h,v 1.113 2003/05/15 15:50:19 petere Exp $
  *
  * NOTES
  *	  the genbki.sh script reads this file and generates .bki
@@ -814,6 +814,37 @@ DATA(insert OID = 2066 (  "+"	   PGNSP PGUID b f 1114 1186 1114	 0	0 0 0 0 0 tim
 DATA(insert OID = 2067 (  "-"	   PGNSP PGUID b f 1114 1114 1186	 0	0 0 0 0 0 timestamp_mi - - ));
 DATA(insert OID = 2068 (  "-"	   PGNSP PGUID b f 1114 1186 1114	 0	0 0 0 0 0 timestamp_mi_span - - ));
 
+/* character-by-character (not collation order) comparison operators for character types */
+
+DATA(insert OID = 2314 ( "~<~"	PGNSP PGUID b f 25 25 16 2318 2317 0 0 0 0 text_pattern_lt scalarltsel scalarltjoinsel ));
+DATA(insert OID = 2315 ( "~<=~"	PGNSP PGUID b f 25 25 16 2317 2318 0 0 0 0 text_pattern_le scalarltsel scalarltjoinsel ));
+DATA(insert OID = 2316 ( "~=~"	PGNSP PGUID b t 25 25 16 2316 2319 2314 2314 2314 2318 text_pattern_eq eqsel eqjoinsel ));
+DATA(insert OID = 2317 ( "~>=~"	PGNSP PGUID b f 25 25 16 2315 2314 0 0 0 0 text_pattern_ge scalargtsel scalargtjoinsel ));
+DATA(insert OID = 2318 ( "~>~"	PGNSP PGUID b f 25 25 16 2314 2315 0 0 0 0 text_pattern_gt scalargtsel scalargtjoinsel ));
+DATA(insert OID = 2319 ( "~<>~"	PGNSP PGUID b f 25 25 16 2319 2316 0 0 0 0 text_pattern_ne neqsel neqjoinsel ));
+
+DATA(insert OID = 2320 ( "~<~"	PGNSP PGUID b f 1043 1043 16 2324 2323 0 0 0 0 varchar_pattern_lt scalarltsel scalarltjoinsel ));
+DATA(insert OID = 2321 ( "~<=~"	PGNSP PGUID b f 1043 1043 16 2323 2324 0 0 0 0 varchar_pattern_le scalarltsel scalarltjoinsel ));
+DATA(insert OID = 2322 ( "~=~"	PGNSP PGUID b t 1043 1043 16 2322 2325 2320 2320 2320 2324 varchar_pattern_eq eqsel eqjoinsel ));
+DATA(insert OID = 2323 ( "~>=~"	PGNSP PGUID b f 1043 1043 16 2321 2320 0 0 0 0 varchar_pattern_ge scalargtsel scalargtjoinsel ));
+DATA(insert OID = 2324 ( "~>~"	PGNSP PGUID b f 1043 1043 16 2320 2321 0 0 0 0 varchar_pattern_gt scalargtsel scalargtjoinsel ));
+DATA(insert OID = 2325 ( "~<>~"	PGNSP PGUID b f 1043 1043 16 2325 2322 0 0 0 0 varchar_pattern_ne neqsel neqjoinsel ));
+
+DATA(insert OID = 2326 ( "~<~"	PGNSP PGUID b f 1042 1042 16 2330 2329 0 0 0 0 bpchar_pattern_lt scalarltsel scalarltjoinsel ));
+DATA(insert OID = 2327 ( "~<=~"	PGNSP PGUID b f 1042 1042 16 2329 2330 0 0 0 0 bpchar_pattern_le scalarltsel scalarltjoinsel ));
+DATA(insert OID = 2328 ( "~=~"	PGNSP PGUID b t 1042 1042 16 2328 2331 2326 2326 2326 2330 bpchar_pattern_eq eqsel eqjoinsel ));
+DATA(insert OID = 2329 ( "~>=~"	PGNSP PGUID b f 1042 1042 16 2327 2326 0 0 0 0 bpchar_pattern_ge scalargtsel scalargtjoinsel ));
+DATA(insert OID = 2330 ( "~>~"	PGNSP PGUID b f 1042 1042 16 2326 2327 0 0 0 0 bpchar_pattern_gt scalargtsel scalargtjoinsel ));
+DATA(insert OID = 2331 ( "~<>~"	PGNSP PGUID b f 1042 1042 16 2331 2328 0 0 0 0 bpchar_pattern_ne neqsel neqjoinsel ));
+
+DATA(insert OID = 2332 ( "~<~"	PGNSP PGUID b f 19 19 16 2336 2335 0 0 0 0 name_pattern_lt scalarltsel scalarltjoinsel ));
+DATA(insert OID = 2333 ( "~<=~"	PGNSP PGUID b f 19 19 16 2335 2336 0 0 0 0 name_pattern_le scalarltsel scalarltjoinsel ));
+DATA(insert OID = 2334 ( "~=~"	PGNSP PGUID b t 19 19 16 2334 2337 2332 2332 2332 2336 name_pattern_eq eqsel eqjoinsel ));
+DATA(insert OID = 2335 ( "~>=~"	PGNSP PGUID b f 19 19 16 2333 2332 0 0 0 0 name_pattern_ge scalargtsel scalargtjoinsel ));
+DATA(insert OID = 2336 ( "~>~"	PGNSP PGUID b f 19 19 16 2332 2333 0 0 0 0 name_pattern_gt scalargtsel scalargtjoinsel ));
+DATA(insert OID = 2337 ( "~<>~"	PGNSP PGUID b f 19 19 16 2337 2334 0 0 0 0 name_pattern_ne neqsel neqjoinsel ));
+
+
 
 /*
  * function prototypes
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 33513ad1de0..f53af27f44f 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_proc.h,v 1.299 2003/05/13 18:03:07 tgl Exp $
+ * $Id: pg_proc.h,v 1.300 2003/05/15 15:50:19 petere Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -3024,6 +3024,39 @@ DATA(insert OID = 2157 (  stddev			PGNSP PGUID 12 t f f f i 1 701 "700"  aggrega
 DATA(insert OID = 2158 (  stddev			PGNSP PGUID 12 t f f f i 1 701 "701"  aggregate_dummy - _null_ ));
 DATA(insert OID = 2159 (  stddev			PGNSP PGUID 12 t f f f i 1 1700 "1700"	aggregate_dummy - _null_ ));
 
+DATA(insert OID = 2160 ( text_pattern_lt     PGNSP PGUID 12 f f t f i 2 16 "25 25" text_pattern_lt - _null_ ));
+DATA(insert OID = 2161 ( text_pattern_le     PGNSP PGUID 12 f f t f i 2 16 "25 25" text_pattern_le - _null_ ));
+DATA(insert OID = 2162 ( text_pattern_eq     PGNSP PGUID 12 f f t f i 2 16 "25 25" text_pattern_eq - _null_ ));
+DATA(insert OID = 2163 ( text_pattern_ge     PGNSP PGUID 12 f f t f i 2 16 "25 25" text_pattern_ge - _null_ ));
+DATA(insert OID = 2164 ( text_pattern_gt     PGNSP PGUID 12 f f t f i 2 16 "25 25" text_pattern_gt - _null_ ));
+DATA(insert OID = 2165 ( text_pattern_ne     PGNSP PGUID 12 f f t f i 2 16 "25 25" text_pattern_ne - _null_ ));
+DATA(insert OID = 2166 ( bttext_pattern_cmp  PGNSP PGUID 12 f f t f i 2 23 "25 25" bttext_pattern_cmp - _null_ ));
+
+/* We use the same procedures here as above since the types are binary compatible. */
+DATA(insert OID = 2167 ( varchar_pattern_lt    PGNSP PGUID 12 f f t f i 2 16 "1043 1043" text_pattern_lt - _null_ ));
+DATA(insert OID = 2168 ( varchar_pattern_le    PGNSP PGUID 12 f f t f i 2 16 "1043 1043" text_pattern_le - _null_ ));
+DATA(insert OID = 2169 ( varchar_pattern_eq    PGNSP PGUID 12 f f t f i 2 16 "1043 1043" text_pattern_eq - _null_ ));
+DATA(insert OID = 2170 ( varchar_pattern_ge    PGNSP PGUID 12 f f t f i 2 16 "1043 1043" text_pattern_ge - _null_ ));
+DATA(insert OID = 2171 ( varchar_pattern_gt    PGNSP PGUID 12 f f t f i 2 16 "1043 1043" text_pattern_gt - _null_ ));
+DATA(insert OID = 2172 ( varchar_pattern_ne    PGNSP PGUID 12 f f t f i 2 16 "1043 1043" text_pattern_ne - _null_ ));
+DATA(insert OID = 2173 ( btvarchar_pattern_cmp PGNSP PGUID 12 f f t f i 2 23 "1043 1043" bttext_pattern_cmp - _null_ ));
+
+DATA(insert OID = 2174 ( bpchar_pattern_lt    PGNSP PGUID 12 f f t f i 2 16 "1042 1042" text_pattern_lt - _null_ ));
+DATA(insert OID = 2175 ( bpchar_pattern_le    PGNSP PGUID 12 f f t f i 2 16 "1042 1042" text_pattern_le - _null_ ));
+DATA(insert OID = 2176 ( bpchar_pattern_eq    PGNSP PGUID 12 f f t f i 2 16 "1042 1042" text_pattern_eq - _null_ ));
+DATA(insert OID = 2177 ( bpchar_pattern_ge    PGNSP PGUID 12 f f t f i 2 16 "1042 1042" text_pattern_ge - _null_ ));
+DATA(insert OID = 2178 ( bpchar_pattern_gt    PGNSP PGUID 12 f f t f i 2 16 "1042 1042" text_pattern_gt - _null_ ));
+DATA(insert OID = 2179 ( bpchar_pattern_ne    PGNSP PGUID 12 f f t f i 2 16 "1042 1042" text_pattern_ne - _null_ ));
+DATA(insert OID = 2180 ( btbpchar_pattern_cmp PGNSP PGUID 12 f f t f i 2 23 "1042 1042" bttext_pattern_cmp - _null_ ));
+
+DATA(insert OID = 2181 ( name_pattern_lt    PGNSP PGUID 12 f f t f i 2 16 "19 19" name_pattern_lt - _null_ ));
+DATA(insert OID = 2182 ( name_pattern_le    PGNSP PGUID 12 f f t f i 2 16 "19 19" name_pattern_le - _null_ ));
+DATA(insert OID = 2183 ( name_pattern_eq    PGNSP PGUID 12 f f t f i 2 16 "19 19" name_pattern_eq - _null_ ));
+DATA(insert OID = 2184 ( name_pattern_ge    PGNSP PGUID 12 f f t f i 2 16 "19 19" name_pattern_ge - _null_ ));
+DATA(insert OID = 2185 ( name_pattern_gt    PGNSP PGUID 12 f f t f i 2 16 "19 19" name_pattern_gt - _null_ ));
+DATA(insert OID = 2186 ( name_pattern_ne    PGNSP PGUID 12 f f t f i 2 16 "19 19" name_pattern_ne - _null_ ));
+DATA(insert OID = 2187 ( btname_pattern_cmp PGNSP PGUID 12 f f t f i 2 23 "19 19" btname_pattern_cmp - _null_ ));
+
 
 DATA(insert OID = 2212 (  regprocedurein	PGNSP PGUID 12 f f t f s 1 2202 "2275"	regprocedurein - _null_ ));
 DESCR("I/O");
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index a86c9a9ab82..9620483f2c2 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: builtins.h,v 1.216 2003/05/13 18:03:08 tgl Exp $
+ * $Id: builtins.h,v 1.217 2003/05/15 15:50:20 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -190,6 +190,12 @@ extern Datum namelt(PG_FUNCTION_ARGS);
 extern Datum namele(PG_FUNCTION_ARGS);
 extern Datum namegt(PG_FUNCTION_ARGS);
 extern Datum namege(PG_FUNCTION_ARGS);
+extern Datum name_pattern_eq(PG_FUNCTION_ARGS);
+extern Datum name_pattern_ne(PG_FUNCTION_ARGS);
+extern Datum name_pattern_lt(PG_FUNCTION_ARGS);
+extern Datum name_pattern_le(PG_FUNCTION_ARGS);
+extern Datum name_pattern_gt(PG_FUNCTION_ARGS);
+extern Datum name_pattern_ge(PG_FUNCTION_ARGS);
 extern int	namecpy(Name n1, Name n2);
 extern int	namestrcpy(Name name, const char *str);
 extern int	namestrcmp(Name name, const char *str);
@@ -219,6 +225,8 @@ extern Datum btabstimecmp(PG_FUNCTION_ARGS);
 extern Datum btcharcmp(PG_FUNCTION_ARGS);
 extern Datum btnamecmp(PG_FUNCTION_ARGS);
 extern Datum bttextcmp(PG_FUNCTION_ARGS);
+extern Datum btname_pattern_cmp(PG_FUNCTION_ARGS);
+extern Datum bttext_pattern_cmp(PG_FUNCTION_ARGS);
 
 /* float.c */
 extern int	extra_float_digits;
@@ -512,6 +520,12 @@ extern Datum text_gt(PG_FUNCTION_ARGS);
 extern Datum text_ge(PG_FUNCTION_ARGS);
 extern Datum text_larger(PG_FUNCTION_ARGS);
 extern Datum text_smaller(PG_FUNCTION_ARGS);
+extern Datum text_pattern_eq(PG_FUNCTION_ARGS);
+extern Datum text_pattern_ne(PG_FUNCTION_ARGS);
+extern Datum text_pattern_lt(PG_FUNCTION_ARGS);
+extern Datum text_pattern_le(PG_FUNCTION_ARGS);
+extern Datum text_pattern_gt(PG_FUNCTION_ARGS);
+extern Datum text_pattern_ge(PG_FUNCTION_ARGS);
 extern Datum textlen(PG_FUNCTION_ARGS);
 extern Datum textoctetlen(PG_FUNCTION_ARGS);
 extern Datum textpos(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 757c0e1e1ac..cea19f26930 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: selfuncs.h,v 1.12 2003/01/28 22:13:41 tgl Exp $
+ * $Id: selfuncs.h,v 1.13 2003/05/15 15:50:20 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -37,7 +37,6 @@ extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
 					 Pattern_Type ptype,
 					 Const **prefix,
 					 Const **rest);
-extern bool locale_is_like_safe(void);
 extern Const *make_greater_string(const Const *str_const);
 
 extern Datum eqsel(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 48831e911be..2573d2011c3 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -103,11 +103,13 @@ WHERE p1.oid != p2.oid AND
     (p1.proargtypes[0] < p2.proargtypes[0]);
  proargtypes | proargtypes 
 -------------+-------------
+          25 |        1042
           25 |        1043
+        1042 |        1043
         1114 |        1184
         1560 |        1562
         2277 |        2283
-(4 rows)
+(6 rows)
 
 SELECT DISTINCT p1.proargtypes[1], p2.proargtypes[1]
 FROM pg_proc AS p1, pg_proc AS p2
@@ -118,10 +120,13 @@ WHERE p1.oid != p2.oid AND
     (p1.proargtypes[1] < p2.proargtypes[1]);
  proargtypes | proargtypes 
 -------------+-------------
+          25 |        1042
+          25 |        1043
+        1042 |        1043
         1114 |        1184
         1560 |        1562
         2277 |        2283
-(3 rows)
+(6 rows)
 
 SELECT DISTINCT p1.proargtypes[2], p2.proargtypes[2]
 FROM pg_proc AS p1, pg_proc AS p2
@@ -332,7 +337,7 @@ WHERE p1.oprnegate = p2.oid AND
 SELECT p1.oid, p1.oprcode, p2.oid, p2.oprcode
 FROM pg_operator AS p1, pg_operator AS p2
 WHERE p1.oprlsortop = p2.oid AND
-    (p1.oprname != '=' OR p2.oprname != '<' OR
+    (p1.oprname NOT IN ('=', '~=~') OR p2.oprname NOT IN ('<', '~<~') OR
      p1.oprkind != 'b' OR p2.oprkind != 'b' OR
      p1.oprleft != p2.oprleft OR
      p1.oprleft != p2.oprright OR
@@ -345,7 +350,7 @@ WHERE p1.oprlsortop = p2.oid AND
 SELECT p1.oid, p1.oprcode, p2.oid, p2.oprcode
 FROM pg_operator AS p1, pg_operator AS p2
 WHERE p1.oprrsortop = p2.oid AND
-    (p1.oprname != '=' OR p2.oprname != '<' OR
+    (p1.oprname NOT IN ('=', '~=~') OR p2.oprname NOT IN ('<', '~<~') OR
      p1.oprkind != 'b' OR p2.oprkind != 'b' OR
      p1.oprright != p2.oprleft OR
      p1.oprright != p2.oprright OR
@@ -358,7 +363,7 @@ WHERE p1.oprrsortop = p2.oid AND
 SELECT p1.oid, p1.oprcode, p2.oid, p2.oprcode
 FROM pg_operator AS p1, pg_operator AS p2
 WHERE p1.oprltcmpop = p2.oid AND
-    (p1.oprname != '=' OR p2.oprname != '<' OR
+    (p1.oprname NOT IN ('=', '~=~') OR p2.oprname NOT IN ('<', '~<~') OR
      p1.oprkind != 'b' OR p2.oprkind != 'b' OR
      p1.oprleft != p2.oprleft OR
      p1.oprright != p2.oprright OR
@@ -371,7 +376,7 @@ WHERE p1.oprltcmpop = p2.oid AND
 SELECT p1.oid, p1.oprcode, p2.oid, p2.oprcode
 FROM pg_operator AS p1, pg_operator AS p2
 WHERE p1.oprgtcmpop = p2.oid AND
-    (p1.oprname != '=' OR p2.oprname != '>' OR
+    (p1.oprname NOT IN ('=', '~=~') OR p2.oprname NOT IN ('>', '~>~') OR
      p1.oprkind != 'b' OR p2.oprkind != 'b' OR
      p1.oprleft != p2.oprleft OR
      p1.oprright != p2.oprright OR
@@ -427,7 +432,7 @@ SELECT p1.oid, p1.oprname
 FROM pg_operator AS p1
 WHERE p1.oprcanhash AND NOT
     (p1.oprkind = 'b' AND p1.oprresult = 'bool'::regtype AND
-     p1.oprleft = p1.oprright AND p1.oprname = '=' AND p1.oprcom = p1.oid);
+     p1.oprleft = p1.oprright AND p1.oprname IN ('=', '~=~') AND p1.oprcom = p1.oid);
  oid | oprname 
 -----+---------
  353 | =
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql
index 4b07a9ab3f5..5be76aa3b41 100644
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -276,7 +276,7 @@ WHERE p1.oprnegate = p2.oid AND
 SELECT p1.oid, p1.oprcode, p2.oid, p2.oprcode
 FROM pg_operator AS p1, pg_operator AS p2
 WHERE p1.oprlsortop = p2.oid AND
-    (p1.oprname != '=' OR p2.oprname != '<' OR
+    (p1.oprname NOT IN ('=', '~=~') OR p2.oprname NOT IN ('<', '~<~') OR
      p1.oprkind != 'b' OR p2.oprkind != 'b' OR
      p1.oprleft != p2.oprleft OR
      p1.oprleft != p2.oprright OR
@@ -286,7 +286,7 @@ WHERE p1.oprlsortop = p2.oid AND
 SELECT p1.oid, p1.oprcode, p2.oid, p2.oprcode
 FROM pg_operator AS p1, pg_operator AS p2
 WHERE p1.oprrsortop = p2.oid AND
-    (p1.oprname != '=' OR p2.oprname != '<' OR
+    (p1.oprname NOT IN ('=', '~=~') OR p2.oprname NOT IN ('<', '~<~') OR
      p1.oprkind != 'b' OR p2.oprkind != 'b' OR
      p1.oprright != p2.oprleft OR
      p1.oprright != p2.oprright OR
@@ -296,7 +296,7 @@ WHERE p1.oprrsortop = p2.oid AND
 SELECT p1.oid, p1.oprcode, p2.oid, p2.oprcode
 FROM pg_operator AS p1, pg_operator AS p2
 WHERE p1.oprltcmpop = p2.oid AND
-    (p1.oprname != '=' OR p2.oprname != '<' OR
+    (p1.oprname NOT IN ('=', '~=~') OR p2.oprname NOT IN ('<', '~<~') OR
      p1.oprkind != 'b' OR p2.oprkind != 'b' OR
      p1.oprleft != p2.oprleft OR
      p1.oprright != p2.oprright OR
@@ -306,7 +306,7 @@ WHERE p1.oprltcmpop = p2.oid AND
 SELECT p1.oid, p1.oprcode, p2.oid, p2.oprcode
 FROM pg_operator AS p1, pg_operator AS p2
 WHERE p1.oprgtcmpop = p2.oid AND
-    (p1.oprname != '=' OR p2.oprname != '>' OR
+    (p1.oprname NOT IN ('=', '~=~') OR p2.oprname NOT IN ('>', '~>~') OR
      p1.oprkind != 'b' OR p2.oprkind != 'b' OR
      p1.oprleft != p2.oprleft OR
      p1.oprright != p2.oprright OR
@@ -355,7 +355,7 @@ SELECT p1.oid, p1.oprname
 FROM pg_operator AS p1
 WHERE p1.oprcanhash AND NOT
     (p1.oprkind = 'b' AND p1.oprresult = 'bool'::regtype AND
-     p1.oprleft = p1.oprright AND p1.oprname = '=' AND p1.oprcom = p1.oid);
+     p1.oprleft = p1.oprright AND p1.oprname IN ('=', '~=~') AND p1.oprcom = p1.oid);
 
 -- In 6.5 we accepted hashable array equality operators when the array element
 -- type is hashable.  However, what we actually need to make hashjoin work on
-- 
GitLab