Skip to content
Snippets Groups Projects
Commit 18d99bc2 authored by Peter Eisentraut's avatar Peter Eisentraut
Browse files

Update soundex to new fmgr interface and fix algorithm

parent baa3a09b
Branches
Tags
No related merge requests found
# #
# $Header: /cvsroot/pgsql/contrib/soundex/Attic/Makefile,v 1.7 2000/07/09 13:13:33 petere Exp $ # $Header: /cvsroot/pgsql/contrib/soundex/Attic/Makefile,v 1.8 2000/10/04 19:25:34 petere Exp $
# #
subdir = contrib/soundex subdir = contrib/soundex
top_builddir = ../.. top_builddir = ../..
include ../../src/Makefile.global include $(top_builddir)/src/Makefile.global
NAME := soundex NAME := soundex
SONAME := $(NAME)$(DLSUFFIX) SONAME := $(NAME)$(DLSUFFIX)
...@@ -14,7 +14,7 @@ CFLAGS += -I. $(CFLAGS_SL) ...@@ -14,7 +14,7 @@ CFLAGS += -I. $(CFLAGS_SL)
all: $(SONAME) $(NAME).sql all: $(SONAME) $(NAME).sql
$(NAME).sql: $(NAME).sql.in $(NAME).sql: $(NAME).sql.in
sed -e 's:MODULE_PATHNAME:$(datadir)/contrib/$(SONAME):g' < $< > $@ sed 's,@MODULE_FILENAME@,$(libdir)/contrib/$(SONAME),g' $< >$@
install: all installdirs install: all installdirs
$(INSTALL_SHLIB) $(SONAME) $(libdir)/contrib $(INSTALL_SHLIB) $(SONAME) $(libdir)/contrib
...@@ -28,7 +28,7 @@ uninstall: ...@@ -28,7 +28,7 @@ uninstall:
rm -f $(libdir)/contrib/$(SONAME) $(datadir)/contrib/$(NAME).sql $(docdir)/contrib/README.$(NAME) rm -f $(libdir)/contrib/$(SONAME) $(datadir)/contrib/$(NAME).sql $(docdir)/contrib/README.$(NAME)
clean distclean maintainer-clean: clean distclean maintainer-clean:
rm -f $(SONAME) $(NAME).sql rm -f $(SONAME) $(NAME).o $(NAME).sql
depend dep: depend dep:
$(CC) -MM -MG $(CFLAGS) *.c > depend $(CC) -MM -MG $(CFLAGS) *.c > depend
... ...
......
This directory contains a module that implements the "Soundex" code as
a PostgreSQL user-defined function. The Soundex system is a method of
matching similar sounding names (or any words) to the same code. It
was initially used by the United States Census in 1880, 1900, and
1910, but it has little use beyond English names (or the English
pronunciation of names), and it is not a linguistic tool.
To install it, first configure the main source tree, then run make;
make install in this directory. Finally, load the function definition
with psql:
psql -f PREFIX/share/contrib/soundex.sql
The following are some usage examples:
SELECT text_soundex('hello world!'); SELECT text_soundex('hello world!');
...@@ -50,4 +64,3 @@ WHERE text_sx_eq(nm,'john')\g ...@@ -50,4 +64,3 @@ WHERE text_sx_eq(nm,'john')\g
SELECT * SELECT *
from s from s
where s.nm #= 'john'; where s.nm #= 'john';
/*****************************************************************************/ /* $Header: /cvsroot/pgsql/contrib/soundex/Attic/soundex.c,v 1.7 2000/10/04 19:25:34 petere Exp $ */
/* soundex.c */ #include "postgres.h"
/*****************************************************************************/ #include "fmgr.h"
#include "utils/builtins.h"
#include <ctype.h> #include <ctype.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include "postgres.h" /* for char16, etc. */
#include "utils/palloc.h" /* for palloc */ Datum
text_soundex(PG_FUNCTION_ARGS);
/* prototypes for soundex functions */ static void
text *text_soundex(text *t); soundex(const char *instr, char *outstr);
char *soundex(char *instr, char *outstr);
text * #define SOUNDEX_LEN 4
text_soundex(text *t)
{
text *new_t;
char outstr[6 + 1]; /* max length of soundex is 6 */
char *instr;
/* make a null-terminated string */ #define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
instr = palloc(VARSIZE(t) + 1); #define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
memcpy(instr, VARDATA(t), VARSIZE(t) - VARHDRSZ);
instr[VARSIZE(t) - VARHDRSZ] = (char) 0;
/* load soundex into outstr */
soundex(instr, outstr);
/* Now the outstr contains the soundex of instr */ #ifndef SOUNDEX_TEST
/* copy outstr to new_t */ /*
new_t = (text *) palloc(strlen(outstr) + VARHDRSZ); * SQL function: text_soundex(text) returns text
memset(new_t, 0, strlen(outstr) + 1); */
VARSIZE(new_t) = strlen(outstr) + VARHDRSZ; Datum
memcpy((void *) VARDATA(new_t), text_soundex(PG_FUNCTION_ARGS)
(void *) outstr, {
strlen(outstr)); char outstr[SOUNDEX_LEN + 1];
char *arg;
arg = _textout(PG_GETARG_TEXT_P(0));
/* free instr */ soundex(arg, outstr);
pfree(instr);
return (new_t); PG_RETURN_TEXT_P(_textin(outstr));
} }
#endif /* not SOUNDEX_TEST */
char *
soundex(char *instr, char *outstr)
{
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
char *table = "01230120022455012623010202"; static const char *soundex_table = "01230120022455012623010202";
int count = 0; #define soundex_code(letter) soundex_table[toupper(letter) - 'A']
static void
soundex(const char *instr, char *outstr)
{
int count;
AssertArg(instr);
AssertArg(outstr);
outstr[SOUNDEX_LEN] = '\0';
/* Skip leading non-alphabetic characters */
while (!isalpha(instr[0]) && instr[0]) while (!isalpha(instr[0]) && instr[0])
++instr; ++instr;
/* No string left */
if (!instr[0]) if (!instr[0])
{ /* Hey! Where'd the string go? */
outstr[0] = (char) 0;
return outstr;
}
if (toupper(instr[0]) == 'P' && toupper(instr[1]) == 'H')
{ {
instr[0] = 'F'; outstr[0] = (char) 0;
instr[1] = 'A'; return;
} }
/* Take the first letter as is */
*outstr++ = (char) toupper(*instr++); *outstr++ = (char) toupper(*instr++);
while (*instr && count < 5) count = 1;
while (*instr && count < SOUNDEX_LEN)
{ {
if (isalpha(*instr) && *instr != *(instr - 1)) if (isalpha(*instr) && soundex_code(*instr) != soundex_code(*(instr - 1)))
{ {
*outstr = table[toupper(instr[0]) - 'A']; *outstr = soundex_code(instr[0]);
if (*outstr != '0') if (*outstr != '0')
{ {
++outstr; ++outstr;
...@@ -83,6 +83,33 @@ soundex(char *instr, char *outstr) ...@@ -83,6 +83,33 @@ soundex(char *instr, char *outstr)
++instr; ++instr;
} }
*outstr = '\0'; /* Fill with 0's */
return (outstr); while (count < SOUNDEX_LEN)
{
*outstr = '0';
++outstr;
++count;
}
}
#ifdef SOUNDEX_TEST
int
main (int argc, char *argv[])
{
if (argc < 2)
{
fprintf(stderr, "usage: %s string\n", argv[0]);
return 1;
}
else
{
char output[SOUNDEX_LEN + 1];
soundex(argv[1], output);
printf("soundex(%s) = %s\n", argv[1], output);
return 0;
}
} }
#endif /* SOUNDEX_TEST */
CREATE FUNCTION text_soundex(text) RETURNS text CREATE FUNCTION text_soundex(text) RETURNS text
AS 'MODULE_PATHNAME' LANGUAGE 'c'; AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'newC';
CREATE FUNCTION soundex(text) RETURNS text
AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'newC';
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment