From cf9b0fea5f6d1bcc9b2c66f5c30ecb04684a0919 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Wed, 17 Aug 2016 18:32:56 -0400 Subject: [PATCH] Implement regexp_match(), a simplified alternative to regexp_matches(). regexp_match() is like regexp_matches(), but it disallows the 'g' flag and in consequence does not need to return a set. Instead, it returns a simple text array value, or NULL if there's no match. Previously people usually got that behavior with a sub-select, but this way is considerably more efficient. Documentation adjusted so that regexp_match() is presented first and then regexp_matches() is introduced as a more complicated version. This is a bit historically revisionist but seems pedagogically better. Still TODO: extend contrib/citext to support this function. Emre Hasegeli, reviewed by David Johnston Discussion: <CAE2gYzy42sna2ME_e3y1KLQ-4UBrB-eVF0SWn8QG39sQSeVhEw@mail.gmail.com> --- doc/src/sgml/func.sgml | 155 +++++++++++++++------ src/backend/catalog/information_schema.sql | 2 +- src/backend/utils/adt/regexp.c | 137 ++++++++++++------ src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.h | 8 +- src/include/utils/builtins.h | 2 + src/test/regress/expected/regex.out | 28 ++++ src/test/regress/expected/strings.out | 4 +- src/test/regress/sql/regex.sql | 7 + 9 files changed, 252 insertions(+), 93 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 426e562b036..169a385a9cc 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -2036,6 +2036,23 @@ <entry><literal>'42.5'</literal></entry> </row> + <row> + <entry> + <indexterm> + <primary>regexp_match</primary> + </indexterm> + <literal><function>regexp_match(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</function></literal> + </entry> + <entry><type>text[]</type></entry> + <entry> + Return captured substring(s) resulting from the first match of a POSIX + regular expression to the <parameter>string</parameter>. See + <xref linkend="functions-posix-regexp"> for more information. + </entry> + <entry><literal>regexp_match('foobarbequebaz', '(bar)(beque)')</literal></entry> + <entry><literal>{bar,beque}</literal></entry> + </row> + <row> <entry> <indexterm> @@ -2045,12 +2062,12 @@ </entry> <entry><type>setof text[]</type></entry> <entry> - Return all captured substrings resulting from matching a POSIX regular - expression against the <parameter>string</parameter>. See + Return captured substring(s) resulting from matching a POSIX regular + expression to the <parameter>string</parameter>. See <xref linkend="functions-posix-regexp"> for more information. </entry> - <entry><literal>regexp_matches('foobarbequebaz', '(bar)(beque)')</literal></entry> - <entry><literal>{bar,beque}</literal></entry> + <entry><literal>regexp_matches('foobarbequebaz', 'ba.', 'g')</literal></entry> + <entry><literal>{bar}</literal><para><literal>{baz}</literal></para> (2 rows)</entry> </row> <row> @@ -4112,6 +4129,9 @@ substring('foobar' from '#"o_b#"%' for '#') <lineannotation>NULL</lineannotat <indexterm> <primary>regexp_replace</primary> </indexterm> + <indexterm> + <primary>regexp_match</primary> + </indexterm> <indexterm> <primary>regexp_matches</primary> </indexterm> @@ -4272,64 +4292,106 @@ regexp_replace('foobarbaz', 'b(..)', E'X\\1Y', 'g') </para> <para> - The <function>regexp_matches</> function returns a text array of - all of the captured substrings resulting from matching a POSIX - regular expression pattern. It has the syntax - <function>regexp_matches</function>(<replaceable>string</>, <replaceable>pattern</> - <optional>, <replaceable>flags</> </optional>). - The function can return no rows, one row, or multiple rows (see - the <literal>g</> flag below). If the <replaceable>pattern</> - does not match, the function returns no rows. If the pattern - contains no parenthesized subexpressions, then each row - returned is a single-element text array containing the substring - matching the whole pattern. If the pattern contains parenthesized - subexpressions, the function returns a text array whose - <replaceable>n</>'th element is the substring matching the - <replaceable>n</>'th parenthesized subexpression of the pattern - (not counting <quote>non-capturing</> parentheses; see below for - details). - The <replaceable>flags</> parameter is an optional text - string containing zero or more single-letter flags that change the - function's behavior. Flag <literal>g</> causes the function to find - each match in the string, not only the first one, and return a row for - each such match. Supported flags (though - not <literal>g</>) - are described in <xref linkend="posix-embedded-options-table">. + The <function>regexp_match</> function returns a text array of + captured substring(s) resulting from the first match of a POSIX + regular expression pattern to a string. It has the syntax + <function>regexp_match</function>(<replaceable>string</>, + <replaceable>pattern</> <optional>, <replaceable>flags</> </optional>). + If there is no match, the result is <literal>NULL</>. + If a match is found, and the <replaceable>pattern</> contains no + parenthesized subexpressions, then the result is a single-element text + array containing the substring matching the whole pattern. + If a match is found, and the <replaceable>pattern</> contains + parenthesized subexpressions, then the result is a text array + whose <replaceable>n</>'th element is the substring matching + the <replaceable>n</>'th parenthesized subexpression of + the <replaceable>pattern</> (not counting <quote>non-capturing</> + parentheses; see below for details). + The <replaceable>flags</> parameter is an optional text string + containing zero or more single-letter flags that change the function's + behavior. Supported flags are described + in <xref linkend="posix-embedded-options-table">. </para> <para> Some examples: <programlisting> -SELECT regexp_matches('foobarbequebaz', '(bar)(beque)'); - regexp_matches ----------------- +SELECT regexp_match('foobarbequebaz', 'bar.*que'); + regexp_match +-------------- + {barbeque} +(1 row) + +SELECT regexp_match('foobarbequebaz', '(bar)(beque)'); + regexp_match +-------------- {bar,beque} (1 row) +</programlisting> + In the common case where you just want the whole matching substring + or <literal>NULL</> for no match, write something like +<programlisting> +SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1]; + regexp_match +-------------- + barbeque +(1 row) +</programlisting> + </para> + + <para> + The <function>regexp_matches</> function returns a set of text arrays + of captured substring(s) resulting from matching a POSIX regular + expression pattern to a string. It has the same syntax as + <function>regexp_match</function>. + This function returns no rows if there is no match, one row if there is + a match and the <literal>g</> flag is not given, or <replaceable>N</> + rows if there are <replaceable>N</> matches and the <literal>g</> flag + is given. Each returned row is a text array containing the whole + matched substring or the substrings matching parenthesized + subexpressions of the <replaceable>pattern</>, just as described above + for <function>regexp_match</function>. + <function>regexp_matches</> accepts all the flags shown + in <xref linkend="posix-embedded-options-table">, plus + the <literal>g</> flag which commands it to return all matches, not + just the first one. + </para> + + <para> + Some examples: +<programlisting> + SELECT regexp_matches('foo', 'not there'); + regexp_matches +---------------- +(0 rows) SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g'); - regexp_matches + regexp_matches ---------------- {bar,beque} {bazil,barf} (2 rows) - -SELECT regexp_matches('foobarbequebaz', 'barbeque'); - regexp_matches ----------------- - {barbeque} -(1 row) </programlisting> </para> - <para> - It is possible to force <function>regexp_matches()</> to always - return one row by using a sub-select; this is particularly useful - in a <literal>SELECT</> target list when you want all rows - returned, even non-matching ones: + <tip> + <para> + In most cases <function>regexp_matches()</> should be used with + the <literal>g</> flag, since if you only want the first match, it's + easier and more efficient to use <function>regexp_match()</>. + However, <function>regexp_match()</> only exists + in <productname>PostgreSQL</> version 10 and up. When working in older + versions, a common trick is to place a <function>regexp_matches()</> + call in a sub-select, for example: <programlisting> SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab; </programlisting> - </para> + This produces a text array if there's a match, or <literal>NULL</> if + not, the same as <function>regexp_match()</> would do. Without the + sub-select, this query would produce no output at all for table rows + without a match, which is typically not the desired behavior. + </para> + </tip> <para> The <function>regexp_split_to_table</> function splits a string using a POSIX @@ -4408,6 +4470,7 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; zero-length matches that occur at the start or end of the string or immediately after a previous match. This is contrary to the strict definition of regexp matching that is implemented by + <function>regexp_match</> and <function>regexp_matches</>, but is usually the most convenient behavior in practice. Other software systems such as Perl use similar definitions. </para> @@ -5482,7 +5545,7 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); into the digits and the parts before and after them. We might try to do that like this: <screen> -SELECT regexp_matches('abc01234xyz', '(.*)(\d+)(.*)'); +SELECT regexp_match('abc01234xyz', '(.*)(\d+)(.*)'); <lineannotation>Result: </lineannotation><computeroutput>{abc0123,4,xyz}</computeroutput> </screen> That didn't work: the first <literal>.*</> is greedy so @@ -5490,14 +5553,14 @@ SELECT regexp_matches('abc01234xyz', '(.*)(\d+)(.*)'); match at the last possible place, the last digit. We might try to fix that by making it non-greedy: <screen> -SELECT regexp_matches('abc01234xyz', '(.*?)(\d+)(.*)'); +SELECT regexp_match('abc01234xyz', '(.*?)(\d+)(.*)'); <lineannotation>Result: </lineannotation><computeroutput>{abc,0,""}</computeroutput> </screen> That didn't work either, because now the RE as a whole is non-greedy and so it ends the overall match as soon as possible. We can get what we want by forcing the RE as a whole to be greedy: <screen> -SELECT regexp_matches('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); +SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); <lineannotation>Result: </lineannotation><computeroutput>{abc,01234,xyz}</computeroutput> </screen> Controlling the RE's overall greediness separately from its components' diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql index 18be08fead5..00550eb8044 100644 --- a/src/backend/catalog/information_schema.sql +++ b/src/backend/catalog/information_schema.sql @@ -2068,7 +2068,7 @@ CREATE VIEW triggers AS -- XXX strange hacks follow CAST( CASE WHEN pg_has_role(c.relowner, 'USAGE') - THEN (SELECT m[1] FROM regexp_matches(pg_get_triggerdef(t.oid), E'.{35,} WHEN \\((.+)\\) EXECUTE PROCEDURE') AS rm(m) LIMIT 1) + THEN (regexp_match(pg_get_triggerdef(t.oid), E'.{35,} WHEN \\((.+)\\) EXECUTE PROCEDURE'))[1] ELSE null END AS character_data) AS action_condition, CAST( diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index 5b216e0b721..bc5e34e222b 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -47,7 +47,7 @@ typedef struct pg_re_flags bool glob; /* do it globally (for each occurrence) */ } pg_re_flags; -/* cross-call state for regexp_matches(), also regexp_split() */ +/* cross-call state for regexp_match and regexp_split functions */ typedef struct regexp_matches_ctx { text *orig_str; /* data string in original TEXT form */ @@ -57,7 +57,7 @@ typedef struct regexp_matches_ctx /* so the number of entries in match_locs is nmatches * npatterns * 2 */ int *match_locs; /* 0-based character indexes */ int next_match; /* 0-based index of next match to process */ - /* workspace for build_regexp_matches_result() */ + /* workspace for build_regexp_match_result() */ Datum *elems; /* has npatterns elements */ bool *nulls; /* has npatterns elements */ } regexp_matches_ctx; @@ -107,13 +107,12 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */ /* Local functions */ static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern, - text *flags, + pg_re_flags *flags, Oid collation, - bool force_glob, bool use_subpatterns, bool ignore_degenerate); static void cleanup_regexp_matches(regexp_matches_ctx *matchctx); -static ArrayType *build_regexp_matches_result(regexp_matches_ctx *matchctx); +static ArrayType *build_regexp_match_result(regexp_matches_ctx *matchctx); static Datum build_regexp_split_result(regexp_matches_ctx *splitctx); @@ -350,7 +349,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len, /* - * parse_re_flags - parse the options argument of regexp_matches and friends + * parse_re_flags - parse the options argument of regexp_match and friends * * flags --- output argument, filled with desired options * opts --- TEXT object, or NULL for defaults @@ -840,9 +839,53 @@ similar_escape(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(result); } +/* + * regexp_match() + * Return the first substring(s) matching a pattern within a string. + */ +Datum +regexp_match(PG_FUNCTION_ARGS) +{ + text *orig_str = PG_GETARG_TEXT_PP(0); + text *pattern = PG_GETARG_TEXT_PP(1); + text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2); + pg_re_flags re_flags; + regexp_matches_ctx *matchctx; + + /* Determine options */ + parse_re_flags(&re_flags, flags); + /* User mustn't specify 'g' */ + if (re_flags.glob) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("regexp_match does not support the global option"), + errhint("Use the regexp_matches function instead."))); + + matchctx = setup_regexp_matches(orig_str, pattern, &re_flags, + PG_GET_COLLATION(), true, false); + + if (matchctx->nmatches == 0) + PG_RETURN_NULL(); + + Assert(matchctx->nmatches == 1); + + /* Create workspace that build_regexp_match_result needs */ + matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns); + matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns); + + PG_RETURN_DATUM(PointerGetDatum(build_regexp_match_result(matchctx))); +} + +/* This is separate to keep the opr_sanity regression test from complaining */ +Datum +regexp_match_no_flags(PG_FUNCTION_ARGS) +{ + return regexp_match(fcinfo); +} + /* * regexp_matches() - * Return a table of matches of a pattern within a string. + * Return a table of all matches of a pattern within a string. */ Datum regexp_matches(PG_FUNCTION_ARGS) @@ -854,18 +897,22 @@ regexp_matches(PG_FUNCTION_ARGS) { text *pattern = PG_GETARG_TEXT_PP(1); text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2); + pg_re_flags re_flags; MemoryContext oldcontext; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + /* Determine options */ + parse_re_flags(&re_flags, flags); + /* be sure to copy the input string into the multi-call ctx */ matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern, - flags, + &re_flags, PG_GET_COLLATION(), - false, true, false); + true, false); - /* Pre-create workspace that build_regexp_matches_result needs */ + /* Pre-create workspace that build_regexp_match_result needs */ matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns); matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns); @@ -880,7 +927,7 @@ regexp_matches(PG_FUNCTION_ARGS) { ArrayType *result_ary; - result_ary = build_regexp_matches_result(matchctx); + result_ary = build_regexp_match_result(matchctx); matchctx->next_match++; SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary)); } @@ -899,28 +946,27 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS) } /* - * setup_regexp_matches --- do the initial matching for regexp_matches() - * or regexp_split() + * setup_regexp_matches --- do the initial matching for regexp_match + * and regexp_split functions * * To avoid having to re-find the compiled pattern on each call, we do * all the matching in one swoop. The returned regexp_matches_ctx contains * the locations of all the substrings matching the pattern. * - * The three bool parameters have only two patterns (one for each caller) - * but it seems clearer to distinguish the functionality this way than to - * key it all off one "is_split" flag. + * The two bool parameters have only two patterns (one for matching, one for + * splitting) but it seems clearer to distinguish the functionality this way + * than to key it all off one "is_split" flag. */ static regexp_matches_ctx * -setup_regexp_matches(text *orig_str, text *pattern, text *flags, +setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags, Oid collation, - bool force_glob, bool use_subpatterns, + bool use_subpatterns, bool ignore_degenerate) { regexp_matches_ctx *matchctx = palloc0(sizeof(regexp_matches_ctx)); int orig_len; pg_wchar *wide_str; int wide_len; - pg_re_flags re_flags; regex_t *cpattern; regmatch_t *pmatch; int pmatch_len; @@ -937,21 +983,8 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags, wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1)); wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len); - /* determine options */ - parse_re_flags(&re_flags, flags); - if (force_glob) - { - /* user mustn't specify 'g' for regexp_split */ - if (re_flags.glob) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("regexp_split does not support the global option"))); - /* but we find all the matches anyway */ - re_flags.glob = true; - } - /* set up the compiled pattern */ - cpattern = RE_compile_and_cache(pattern, re_flags.cflags, collation); + cpattern = RE_compile_and_cache(pattern, re_flags->cflags, collation); /* do we want to remember subpatterns? */ if (use_subpatterns && cpattern->re_nsub > 0) @@ -970,7 +1003,7 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags, pmatch = palloc(sizeof(regmatch_t) * pmatch_len); /* the real output space (grown dynamically if needed) */ - array_len = re_flags.glob ? 256 : 32; + array_len = re_flags->glob ? 256 : 32; matchctx->match_locs = (int *) palloc(sizeof(int) * array_len); array_idx = 0; @@ -1018,7 +1051,7 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags, prev_match_end = pmatch[0].rm_eo; /* if not glob, stop after one match */ - if (!re_flags.glob) + if (!re_flags->glob) break; /* @@ -1057,10 +1090,10 @@ cleanup_regexp_matches(regexp_matches_ctx *matchctx) } /* - * build_regexp_matches_result - build output array for current match + * build_regexp_match_result - build output array for current match */ static ArrayType * -build_regexp_matches_result(regexp_matches_ctx *matchctx) +build_regexp_match_result(regexp_matches_ctx *matchctx) { Datum *elems = matchctx->elems; bool *nulls = matchctx->nulls; @@ -1114,16 +1147,27 @@ regexp_split_to_table(PG_FUNCTION_ARGS) { text *pattern = PG_GETARG_TEXT_PP(1); text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2); + pg_re_flags re_flags; MemoryContext oldcontext; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + /* Determine options */ + parse_re_flags(&re_flags, flags); + /* User mustn't specify 'g' */ + if (re_flags.glob) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("regexp_split_to_table does not support the global option"))); + /* But we find all the matches anyway */ + re_flags.glob = true; + /* be sure to copy the input string into the multi-call ctx */ splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern, - flags, + &re_flags, PG_GET_COLLATION(), - true, false, true); + false, true); MemoryContextSwitchTo(oldcontext); funcctx->user_fctx = (void *) splitctx; @@ -1162,13 +1206,24 @@ Datum regexp_split_to_array(PG_FUNCTION_ARGS) { ArrayBuildState *astate = NULL; + pg_re_flags re_flags; regexp_matches_ctx *splitctx; + /* Determine options */ + parse_re_flags(&re_flags, PG_GETARG_TEXT_PP_IF_EXISTS(2)); + /* User mustn't specify 'g' */ + if (re_flags.glob) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("regexp_split_to_array does not support the global option"))); + /* But we find all the matches anyway */ + re_flags.glob = true; + splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0), PG_GETARG_TEXT_PP(1), - PG_GETARG_TEXT_PP_IF_EXISTS(2), + &re_flags, PG_GET_COLLATION(), - true, false, true); + false, true); while (splitctx->next_match <= splitctx->nmatches) { diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 82810c8fbae..fb356bf3cd8 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201608161 +#define CATALOG_VERSION_NO 201608171 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index af19c1a82b6..6fed7a0d198 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -1912,10 +1912,14 @@ DATA(insert OID = 2284 ( regexp_replace PGNSP PGUID 12 1 0 0 0 f f f f t f i DESCR("replace text using regexp"); DATA(insert OID = 2285 ( regexp_replace PGNSP PGUID 12 1 0 0 0 f f f f t f i s 4 0 25 "25 25 25 25" _null_ _null_ _null_ _null_ _null_ textregexreplace _null_ _null_ _null_ )); DESCR("replace text using regexp"); +DATA(insert OID = 3396 ( regexp_match PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_match_no_flags _null_ _null_ _null_ )); +DESCR("find first match for regexp"); +DATA(insert OID = 3397 ( regexp_match PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_match _null_ _null_ _null_ )); +DESCR("find first match for regexp"); DATA(insert OID = 2763 ( regexp_matches PGNSP PGUID 12 1 1 0 0 f f f f t t i s 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches_no_flags _null_ _null_ _null_ )); -DESCR("find all match groups for regexp"); +DESCR("find match(es) for regexp"); DATA(insert OID = 2764 ( regexp_matches PGNSP PGUID 12 1 10 0 0 f f f f t t i s 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches _null_ _null_ _null_ )); -DESCR("find all match groups for regexp"); +DESCR("find match(es) for regexp"); DATA(insert OID = 2088 ( split_part PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 25 "25 25 23" _null_ _null_ _null_ _null_ _null_ split_text _null_ _null_ _null_ )); DESCR("split string by field_sep and return field_num"); DATA(insert OID = 2765 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 0 f f f f t t i s 2 0 25 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_split_to_table_no_flags _null_ _null_ _null_ )); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index a91be981b98..40e25c88247 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -628,6 +628,8 @@ extern Datum textregexsubstr(PG_FUNCTION_ARGS); extern Datum textregexreplace_noopt(PG_FUNCTION_ARGS); extern Datum textregexreplace(PG_FUNCTION_ARGS); extern Datum similar_escape(PG_FUNCTION_ARGS); +extern Datum regexp_match(PG_FUNCTION_ARGS); +extern Datum regexp_match_no_flags(PG_FUNCTION_ARGS); extern Datum regexp_matches(PG_FUNCTION_ARGS); extern Datum regexp_matches_no_flags(PG_FUNCTION_ARGS); extern Datum regexp_split_to_table(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index af097193c52..79a7fa7a845 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -90,6 +90,34 @@ select substring('a' from '((a)+)'); a (1 row) +-- Test regexp_match() +select regexp_match('abc', ''); + regexp_match +-------------- + {""} +(1 row) + +select regexp_match('abc', 'bc'); + regexp_match +-------------- + {bc} +(1 row) + +select regexp_match('abc', 'd') is null; + ?column? +---------- + t +(1 row) + +select regexp_match('abc', '(B)(c)', 'i'); + regexp_match +-------------- + {b,c} +(1 row) + +select regexp_match('abc', 'Bd', 'ig'); -- error +ERROR: regexp_match does not support the global option +HINT: Use the regexp_matches function instead. -- Test lookahead constraints select regexp_matches('ab', 'a(?=b)b*'); regexp_matches diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 19708c32fdd..35cadb24aa1 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -681,9 +681,9 @@ SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', ERROR: invalid regexp option: "z" -- global option meaningless for regexp_split SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g') AS foo; -ERROR: regexp_split does not support the global option +ERROR: regexp_split_to_table does not support the global option SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g'); -ERROR: regexp_split does not support the global option +ERROR: regexp_split_to_array does not support the global option -- change NULL-display back \pset null '' -- E021-11 position expression diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index 1028ca6dcdc..1361b625707 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -25,6 +25,13 @@ select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)'); select substring('a' from '((a))+'); select substring('a' from '((a)+)'); +-- Test regexp_match() +select regexp_match('abc', ''); +select regexp_match('abc', 'bc'); +select regexp_match('abc', 'd') is null; +select regexp_match('abc', '(B)(c)', 'i'); +select regexp_match('abc', 'Bd', 'ig'); -- error + -- Test lookahead constraints select regexp_matches('ab', 'a(?=b)b*'); select regexp_matches('a', 'a(?=b)b*'); -- GitLab