From f27976c85b1fb9002727cce65b9f9567e158f754 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Sun, 1 Feb 2004 06:27:48 +0000 Subject: [PATCH] Make length() disregard trailing spaces in char(n) values, per discussion some time ago and recent patch from Gavin Sherry. Update documentation to point out that trailing spaces are insignificant in char(n). --- doc/src/sgml/datatype.sgml | 20 ++++++++++++++++++-- src/backend/utils/adt/varchar.c | 16 +++++++++------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 7bbe676a9a1..bcfe1690588 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.140 2004/01/20 22:46:06 tgl Exp $ +$PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.141 2004/02/01 06:27:48 tgl Exp $ --> <chapter id="datatype"> @@ -901,6 +901,18 @@ CREATE TABLE <replaceable class="parameter">tablename</replaceable> ( management systems have it as well. </para> + <para> + Values of type <type>character</type> are physically padded + with spaces to the specified width <replaceable>n</>, and are + stored and displayed that way. However, the padding spaces are + treated as semantically insignificant. Trailing spaces are + disregarded when comparing two values of type <type>character</type>, + and they will be removed when converting a <type>character</type> value + to one of the other string types. Note that trailing spaces + <emphasis>are</> semantically significant in + <type>character varying</type> and <type>text</type> values. + </para> + <para> The storage requirement for data of these types is 4 bytes plus the actual string, and in case of <type>character</type> plus the @@ -922,7 +934,11 @@ CREATE TABLE <replaceable class="parameter">tablename</replaceable> ( <para> There are no performance differences between these three types, apart from the increased storage size when using the blank-padded - type. + type. While <type>character(<replaceable>n</>)</type> has performance + advantages in some other database systems, it has no such advantages in + <productname>PostgreSQL</productname>. In most situations + <type>text</type> or <type>character varying</type> should be used + instead. </para> </tip> diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 2c10ca1485a..6b26a91192c 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.103 2003/11/29 19:51:59 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.104 2004/02/01 06:27:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -510,14 +510,16 @@ Datum bpcharlen(PG_FUNCTION_ARGS) { BpChar *arg = PG_GETARG_BPCHAR_P(0); + int len; - /* optimization for single byte encoding */ - if (pg_database_encoding_max_length() <= 1) - PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ); + /* get number of bytes, ignoring trailing spaces */ + len = bcTruelen(arg); + + /* in multibyte encoding, convert to number of characters */ + if (pg_database_encoding_max_length() != 1) + len = pg_mbstrlen_with_len(VARDATA(arg), len); - PG_RETURN_INT32( - pg_mbstrlen_with_len(VARDATA(arg), VARSIZE(arg) - VARHDRSZ) - ); + PG_RETURN_INT32(len); } Datum -- GitLab