From 04e9704b9e77c57bb1b0a06876977c1b255376ed Mon Sep 17 00:00:00 2001 From: Teodor Sigaev <teodor@sigaev.ru> Date: Fri, 9 Jun 2006 13:25:59 +0000 Subject: [PATCH] Now ispell dictionary can eat dictionaries in MySpell format, used by OpenOffice. Dictionaries are placed at http://lingucomponent.openoffice.org/spell_dic.html Dictionary automatically recognizes format of files. Warning. MySpell's format has limitation with compound word support: it's impossible to mark affix as compound-only affix. So for norwegian, german etc languages it's recommended to use original ispell format. For that reason I don't want to remove my2ispell scripts, it's has workaround at least for norwegian language. --- contrib/tsearch2/ispell/spell.c | 94 +++++++++++++++++++++++++++++++-- contrib/tsearch2/ispell/spell.h | 1 + 2 files changed, 92 insertions(+), 3 deletions(-) diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c index 223ae4a9ada..28f38eefd32 100644 --- a/contrib/tsearch2/ispell/spell.c +++ b/contrib/tsearch2/ispell/spell.c @@ -391,6 +391,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename) char flagflags = 0; FILE *affix; int line=0; + int oldformat = 0; if (!(affix = fopen(filename, "r"))) return (1); @@ -412,6 +413,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename) while (*s && t_isspace(s)) s++; if ( *s && pg_mblen(s) == 1 ) Conf->compoundcontrol = *s; + oldformat++; continue; } } @@ -419,12 +421,14 @@ NIImportAffixes(IspellDict * Conf, const char *filename) { suffixes = 1; prefixes = 0; + oldformat++; continue; } if (STRNCMP(tmpstr, "prefixes") == 0) { suffixes = 0; prefixes = 1; + oldformat++; continue; } if (STRNCMP(tmpstr, "flag") == 0) @@ -433,10 +437,11 @@ NIImportAffixes(IspellDict * Conf, const char *filename) flagflags = 0; while (*s && t_isspace(s)) s++; + oldformat++; /* allow only single-encoded flags */ - if ( pg_mblen(s) != 1 ) - continue; + if ( pg_mblen(s) != 1 ) + elog(ERROR,"Multiencoded flag at line %d: %s", line, s); if (*s == '*') { @@ -455,12 +460,22 @@ NIImportAffixes(IspellDict * Conf, const char *filename) /* allow only single-encoded flags */ if ( pg_mblen(s) != 1 ) { flagflags = 0; - continue; + elog(ERROR,"Multiencoded flag at line %d: %s", line, s); } flag = (unsigned char) *s; continue; } + if ( STRNCMP(str, "COMPOUNDFLAG") == 0 || STRNCMP(str, "COMPOUNDMIN") == 0 || + STRNCMP(str, "PFX")==0 || STRNCMP(str, "SFX")==0 ) { + + if ( oldformat ) + elog(ERROR,"Wrong affix file format"); + + fclose(affix); + return NIImportOOAffixes(Conf, filename); + + } if ((!suffixes) && (!prefixes)) continue; @@ -475,6 +490,79 @@ NIImportAffixes(IspellDict * Conf, const char *filename) return (0); } +int +NIImportOOAffixes(IspellDict * Conf, const char *filename) { + char str[BUFSIZ]; + char type[BUFSIZ]; + char sflag[BUFSIZ]; + char mask[BUFSIZ]; + char find[BUFSIZ]; + char repl[BUFSIZ]; + bool isSuffix = false; + int flag = 0; + char flagflags = 0; + FILE *affix; + int line=0; + int scanread = 0; + char scanbuf[BUFSIZ]; + + sprintf(scanbuf,"%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ/5, BUFSIZ/5, BUFSIZ/5, BUFSIZ/5); + + if (!(affix = fopen(filename, "r"))) + return (1); + Conf->compoundcontrol = '\t'; + + while (fgets(str, sizeof(str), affix)) + { + line++; + if ( *str == '\0' || t_isspace(str) || t_iseq(str,'#') ) + continue; + pg_verifymbstr( str, strlen(str), false); + + if ( STRNCMP(str, "COMPOUNDFLAG")==0 ) { + char *s = str+strlen("COMPOUNDFLAG"); + while (*s && t_isspace(s)) s++; + if ( *s && pg_mblen(s) == 1 ) + Conf->compoundcontrol = *s; + continue; + } + + scanread = sscanf(str, scanbuf, type, sflag, find, repl, mask); + + lowerstr(type); + if ( scanread<4 || (STRNCMP(type,"sfx") && STRNCMP(type,"pfx")) ) + continue; + + if ( scanread == 4 ) { + if ( strlen(sflag) != 1 ) + continue; + flag = *sflag; + isSuffix = (STRNCMP(type,"sfx")==0) ? true : false; + lowerstr(find); + if ( t_iseq(find,'y') ) + flagflags |= FF_CROSSPRODUCT; + else + flagflags = 0; + } else { + if ( strlen(sflag) != 1 || flag != *sflag || flag==0 ) + continue; + lowerstr(repl); + lowerstr(find); + lowerstr(mask); + if ( t_iseq(find,'0') ) + *find = '\0'; + if ( t_iseq(repl,'0') ) + *repl = '\0'; + + NIAddAffix(Conf, flag, flagflags, mask, find, repl, isSuffix ? FF_SUFFIX : FF_PREFIX); + } + } + + fclose(affix); + + return 0; +} + static int MergeAffix(IspellDict * Conf, int a1, int a2) { diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h index fc3240a1d8b..fe79888bf3e 100644 --- a/contrib/tsearch2/ispell/spell.h +++ b/contrib/tsearch2/ispell/spell.h @@ -121,6 +121,7 @@ typedef struct TSLexeme *NINormalizeWord(IspellDict * Conf, char *word); int NIImportAffixes(IspellDict * Conf, const char *filename); +int NIImportOOAffixes(IspellDict * Conf, const char *filename); int NIImportDictionary(IspellDict * Conf, const char *filename); int NIAddSpell(IspellDict * Conf, const char *word, const char *flag); -- GitLab