From a44af6df859f38247317d1714c2d941961e42919 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Mon, 13 Aug 2007 01:18:47 +0000 Subject: [PATCH] Document that the regexp split functions ignore zero-length matches in certain corner cases. Per discussion, the code does what we want, but it really needs to be documented that these functions act differently from regexp_matches. --- doc/src/sgml/func.sgml | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 4e5f8f1148a..52858efc600 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.384 2007/08/11 03:56:24 tgl Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.385 2007/08/13 01:18:47 tgl Exp $ --> <chapter id="functions"> <title>Functions and Operators</title> @@ -3383,10 +3383,12 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; </para> <para> - <productname>PostgreSQL</productname>'s regular expressions are implemented - using a package written by Henry Spencer. Much of - the description of regular expressions below is copied verbatim from his - manual entry. + As the last example demonstrates, the regexp split functions ignore + zero-length matches that occur at the start or end of the string + or immediately after a previous match. This is contrary to the strict + definition of regexp matching that is implemented by + <function>regexp_matches</>, but is usually the most convenient behavior + in practice. Other software systems such as Perl use similar definitions. </para> <!-- derived from the re_syntax.n man page --> @@ -3394,6 +3396,13 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; <sect3 id="posix-syntax-details"> <title>Regular Expression Details</title> + <para> + <productname>PostgreSQL</productname>'s regular expressions are implemented + using a package written by Henry Spencer. Much of + the description of regular expressions below is copied verbatim from his + manual entry. + </para> + <para> Regular expressions (<acronym>RE</acronym>s), as defined in <acronym>POSIX</acronym> 1003.2, come in two forms: -- GitLab