Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
postgres-lambda-diff
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jakob Huber
postgres-lambda-diff
Commits
b87b52bf
Commit
b87b52bf
authored
16 years ago
by
Teodor Sigaev
Browse files
Options
Downloads
Patches
Plain Diff
Support of multibyte encoding for pg_trgm
parent
e4ffd143
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
contrib/pg_trgm/trgm.h
+9
-2
9 additions, 2 deletions
contrib/pg_trgm/trgm.h
contrib/pg_trgm/trgm_gin.c
+2
-2
2 additions, 2 deletions
contrib/pg_trgm/trgm_gin.c
contrib/pg_trgm/trgm_op.c
+150
-84
150 additions, 84 deletions
contrib/pg_trgm/trgm_op.c
with
161 additions
and
88 deletions
contrib/pg_trgm/trgm.h
+
9
−
2
View file @
b87b52bf
/*
/*
* $PostgreSQL: pgsql/contrib/pg_trgm/trgm.h,v 1.
9
2008/
05/17 01:28:21 adunstan
Exp $
* $PostgreSQL: pgsql/contrib/pg_trgm/trgm.h,v 1.
10
2008/
11/12 13:43:54 teodor
Exp $
*/
*/
#ifndef __TRGM_H__
#ifndef __TRGM_H__
#define __TRGM_H__
#define __TRGM_H__
...
@@ -31,7 +31,14 @@ typedef char trgm[3];
...
@@ -31,7 +31,14 @@ typedef char trgm[3];
*(((char*)(a))+2) = *(((char*)(b))+2); \
*(((char*)(a))+2) = *(((char*)(b))+2); \
} while(0);
} while(0);
#define TRGMINT(a) ( (*(((char*)(a))+2)<<16)+(*(((char*)(a))+1)<<8)+*(((char*)(a))+0) )
uint32
trgm2int
(
trgm
*
ptr
);
#ifdef KEEPONLYALNUM
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
#else
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
#endif
#define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)t) ) && ISPRINTABLECHAR( ((char*)t)+1 ) && ISPRINTABLECHAR( ((char*)t)+2 ) )
typedef
struct
typedef
struct
{
{
...
...
This diff is collapsed.
Click to expand it.
contrib/pg_trgm/trgm_gin.c
+
2
−
2
View file @
b87b52bf
/*
/*
* $PostgreSQL: pgsql/contrib/pg_trgm/trgm_gin.c,v 1.
5
2008/
07/11 11:56:48
teodor Exp $
* $PostgreSQL: pgsql/contrib/pg_trgm/trgm_gin.c,v 1.
6
2008/
11/12 13:43:54
teodor Exp $
*/
*/
#include
"trgm.h"
#include
"trgm.h"
...
@@ -42,7 +42,7 @@ gin_extract_trgm(PG_FUNCTION_ARGS)
...
@@ -42,7 +42,7 @@ gin_extract_trgm(PG_FUNCTION_ARGS)
ptr
=
GETARR
(
trg
);
ptr
=
GETARR
(
trg
);
while
(
ptr
-
GETARR
(
trg
)
<
ARRNELEM
(
trg
))
while
(
ptr
-
GETARR
(
trg
)
<
ARRNELEM
(
trg
))
{
{
item
=
TRGMINT
(
ptr
);
item
=
trgm2int
(
ptr
);
entries
[
i
++
]
=
Int32GetDatum
(
item
);
entries
[
i
++
]
=
Int32GetDatum
(
item
);
ptr
++
;
ptr
++
;
...
...
This diff is collapsed.
Click to expand it.
contrib/pg_trgm/trgm_op.c
+
150
−
84
View file @
b87b52bf
/*
/*
* $PostgreSQL: pgsql/contrib/pg_trgm/trgm_op.c,v 1.1
0
2008/
05/17 01:28:21 adunstan
Exp $
* $PostgreSQL: pgsql/contrib/pg_trgm/trgm_op.c,v 1.1
1
2008/
11/12 13:43:54 teodor
Exp $
*/
*/
#include
"trgm.h"
#include
"trgm.h"
#include
<ctype.h>
#include
<ctype.h>
#include
"utils/array.h"
#include
"utils/array.h"
#include
"catalog/pg_type.h"
#include
"catalog/pg_type.h"
#include
"tsearch/ts_locale.h"
PG_MODULE_MAGIC
;
PG_MODULE_MAGIC
;
...
@@ -31,9 +32,6 @@ show_limit(PG_FUNCTION_ARGS)
...
@@ -31,9 +32,6 @@ show_limit(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT4
(
trgm_limit
);
PG_RETURN_FLOAT4
(
trgm_limit
);
}
}
#define WORDWAIT 0
#define INWORD 1
static
int
static
int
comp_trgm
(
const
void
*
a
,
const
void
*
b
)
comp_trgm
(
const
void
*
a
,
const
void
*
b
)
{
{
...
@@ -60,18 +58,119 @@ unique_array(trgm * a, int len)
...
@@ -60,18 +58,119 @@ unique_array(trgm * a, int len)
return
curend
+
1
-
a
;
return
curend
+
1
-
a
;
}
}
#ifdef KEEPONLYALNUM
#define iswordchr(c) (t_isalpha(c) || t_isdigit(c))
#else
#define iswordchr(c) (!t_isspace(c))
#endif
/*
* Finds first word in string, returns pointer to the word,
* endword points to the character after word
*/
static
char
*
find_word
(
char
*
str
,
int
lenstr
,
char
**
endword
,
int
*
charlen
)
{
char
*
beginword
=
str
;
while
(
beginword
-
str
<
lenstr
&&
!
iswordchr
(
beginword
)
)
beginword
+=
pg_mblen
(
beginword
);
if
(
beginword
-
str
>=
lenstr
)
return
NULL
;
*
endword
=
beginword
;
*
charlen
=
0
;
while
(
*
endword
-
str
<
lenstr
&&
iswordchr
(
*
endword
)
)
{
*
endword
+=
pg_mblen
(
*
endword
);
(
*
charlen
)
++
;
}
return
beginword
;
}
#ifdef USE_WIDE_UPPER_LOWER
static
void
cnt_trigram
(
trgm
*
tptr
,
char
*
str
,
int
bytelen
)
{
if
(
bytelen
==
3
)
{
CPTRGM
(
tptr
,
str
);
}
else
{
pg_crc32
crc
;
INIT_CRC32
(
crc
);
COMP_CRC32
(
crc
,
str
,
bytelen
);
FIN_CRC32
(
crc
);
/*
* use only 3 upper bytes from crc, hope, it's
* good enough hashing
*/
CPTRGM
(
tptr
,
&
crc
);
}
}
#endif
/*
* Adds trigramm from words (already padded).
*/
static
trgm
*
make_trigrams
(
trgm
*
tptr
,
char
*
str
,
int
bytelen
,
int
charlen
)
{
char
*
ptr
=
str
;
if
(
charlen
<
3
)
return
tptr
;
#ifdef USE_WIDE_UPPER_LOWER
if
(
pg_database_encoding_max_length
()
>
1
)
{
int
lenfirst
=
pg_mblen
(
str
),
lenmiddle
=
pg_mblen
(
str
+
lenfirst
),
lenlast
=
pg_mblen
(
str
+
lenfirst
+
lenmiddle
);
while
(
(
ptr
-
str
)
+
lenfirst
+
lenmiddle
+
lenlast
<=
bytelen
)
{
cnt_trigram
(
tptr
,
ptr
,
lenfirst
+
lenmiddle
+
lenlast
);
ptr
+=
lenfirst
;
tptr
++
;
lenfirst
=
lenmiddle
;
lenmiddle
=
lenlast
;
lenlast
=
pg_mblen
(
ptr
+
lenfirst
+
lenmiddle
);
}
}
else
#endif
{
Assert
(
bytelen
==
charlen
);
while
(
ptr
-
str
<
bytelen
-
2
/* number of trigrams = strlen - 2 */
)
{
CPTRGM
(
tptr
,
ptr
);
ptr
++
;
tptr
++
;
}
}
return
tptr
;
}
TRGM
*
TRGM
*
generate_trgm
(
char
*
str
,
int
slen
)
generate_trgm
(
char
*
str
,
int
slen
)
{
{
TRGM
*
trg
;
TRGM
*
trg
;
char
*
buf
,
char
*
buf
;
*
sptr
,
*
bufptr
;
trgm
*
tptr
;
trgm
*
tptr
;
int
state
=
WORDWAIT
;
int
len
,
int
wl
,
charlen
,
len
;
bytelen
;
char
*
bword
,
*
eword
;
trg
=
(
TRGM
*
)
palloc
(
TRGMHDRSIZE
+
sizeof
(
trgm
)
*
(
slen
/
2
+
1
)
*
3
);
trg
=
(
TRGM
*
)
palloc
(
TRGMHDRSIZE
+
sizeof
(
trgm
)
*
(
slen
/
2
+
1
)
*
3
);
trg
->
flag
=
ARRKEY
;
trg
->
flag
=
ARRKEY
;
...
@@ -83,7 +182,6 @@ generate_trgm(char *str, int slen)
...
@@ -83,7 +182,6 @@ generate_trgm(char *str, int slen)
tptr
=
GETARR
(
trg
);
tptr
=
GETARR
(
trg
);
buf
=
palloc
(
sizeof
(
char
)
*
(
slen
+
4
));
buf
=
palloc
(
sizeof
(
char
)
*
(
slen
+
4
));
sptr
=
str
;
if
(
LPADDING
>
0
)
if
(
LPADDING
>
0
)
{
{
...
@@ -92,82 +190,29 @@ generate_trgm(char *str, int slen)
...
@@ -92,82 +190,29 @@ generate_trgm(char *str, int slen)
*
(
buf
+
1
)
=
' '
;
*
(
buf
+
1
)
=
' '
;
}
}
bufptr
=
buf
+
LPADDING
;
eword
=
str
;
while
(
sptr
-
str
<
slen
)
while
(
(
bword
=
find_word
(
eword
,
slen
-
(
eword
-
str
),
&
eword
,
&
charlen
))
!=
NULL
)
{
if
(
state
==
WORDWAIT
)
{
if
(
#ifdef KEEPONLYALNUM
isalnum
((
unsigned
char
)
*
sptr
)
#else
!
isspace
((
unsigned
char
)
*
sptr
)
#endif
)
{
{
*
bufptr
=
*
sptr
;
/* start put word in buffer */
#ifdef IGNORECASE
bufptr
++
;
bword
=
lowerstr_with_len
(
bword
,
eword
-
bword
);
state
=
INWORD
;
bytelen
=
strlen
(
bword
);
if
(
sptr
-
str
==
slen
-
1
/* last char */
)
goto
gettrg
;
}
}
else
{
if
(
#ifdef KEEPONLYALNUM
!
isalnum
((
unsigned
char
)
*
sptr
)
#else
#else
isspace
((
unsigned
char
)
*
sptr
)
bytelen
=
eword
-
bword
;
#endif
#endif
)
{
gettrg:
/* word in buffer, so count trigrams */
*
bufptr
=
' '
;
*
(
bufptr
+
1
)
=
' '
;
wl
=
bufptr
-
(
buf
+
LPADDING
)
-
2
+
LPADDING
+
RPADDING
;
if
(
wl
<=
0
)
{
bufptr
=
buf
+
LPADDING
;
state
=
WORDWAIT
;
sptr
++
;
continue
;
}
#ifdef IGNORECASE
memcpy
(
buf
+
LPADDING
,
bword
,
bytelen
);
do
{
/* lower word */
int
wwl
=
bufptr
-
buf
;
bufptr
=
buf
+
LPADDING
;
#ifdef IGNORECASE
while
(
bufptr
-
buf
<
wwl
)
pfree
(
bword
);
{
*
bufptr
=
tolower
((
unsigned
char
)
*
bufptr
);
bufptr
++
;
}
}
while
(
0
);
#endif
#endif
bufptr
=
buf
;
buf
[
LPADDING
+
bytelen
]
=
' '
;
/* set trigrams */
buf
[
LPADDING
+
bytelen
+
1
]
=
' '
;
while
(
bufptr
-
buf
<
wl
)
{
/*
CPTRGM
(
tptr
,
bufptr
);
* count trigrams
bufptr
++
;
*/
tptr
++
;
tptr
=
make_trigrams
(
tptr
,
buf
,
bytelen
+
LPADDING
+
RPADDING
,
}
charlen
+
LPADDING
+
RPADDING
);
bufptr
=
buf
+
LPADDING
;
state
=
WORDWAIT
;
}
else
{
*
bufptr
=
*
sptr
;
/* put in buffer */
bufptr
++
;
if
(
sptr
-
str
==
slen
-
1
)
goto
gettrg
;
}
}
sptr
++
;
}
}
pfree
(
buf
);
pfree
(
buf
);
...
@@ -186,6 +231,19 @@ generate_trgm(char *str, int slen)
...
@@ -186,6 +231,19 @@ generate_trgm(char *str, int slen)
return
trg
;
return
trg
;
}
}
uint32
trgm2int
(
trgm
*
ptr
)
{
uint32
val
=
0
;
val
|=
*
(
((
unsigned
char
*
)
ptr
)
);
val
<<=
8
;
val
|=
*
(
((
unsigned
char
*
)
ptr
)
+
1
);
val
<<=
8
;
val
|=
*
(
((
unsigned
char
*
)
ptr
)
+
2
);
return
val
;
}
PG_FUNCTION_INFO_V1
(
show_trgm
);
PG_FUNCTION_INFO_V1
(
show_trgm
);
Datum
show_trgm
(
PG_FUNCTION_ARGS
);
Datum
show_trgm
(
PG_FUNCTION_ARGS
);
...
@@ -204,10 +262,18 @@ show_trgm(PG_FUNCTION_ARGS)
...
@@ -204,10 +262,18 @@ show_trgm(PG_FUNCTION_ARGS)
for
(
i
=
0
,
ptr
=
GETARR
(
trg
);
i
<
ARRNELEM
(
trg
);
i
++
,
ptr
++
)
for
(
i
=
0
,
ptr
=
GETARR
(
trg
);
i
<
ARRNELEM
(
trg
);
i
++
,
ptr
++
)
{
{
text
*
item
=
(
text
*
)
palloc
(
VARHDRSZ
+
3
);
text
*
item
=
(
text
*
)
palloc
(
VARHDRSZ
+
Max
(
12
,
pg_database_encoding_max_length
()
*
3
)
);
if
(
pg_database_encoding_max_length
()
>
1
&&
!
ISPRINTABLETRGM
(
ptr
)
)
{
snprintf
(
VARDATA
(
item
),
12
,
"0x%06x"
,
trgm2int
(
ptr
));
SET_VARSIZE
(
item
,
VARHDRSZ
+
strlen
(
VARDATA
(
item
)));
}
else
{
SET_VARSIZE
(
item
,
VARHDRSZ
+
3
);
SET_VARSIZE
(
item
,
VARHDRSZ
+
3
);
CPTRGM
(
VARDATA
(
item
),
ptr
);
CPTRGM
(
VARDATA
(
item
),
ptr
);
}
d
[
i
]
=
PointerGetDatum
(
item
);
d
[
i
]
=
PointerGetDatum
(
item
);
}
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment