From 04e9704b9e77c57bb1b0a06876977c1b255376ed Mon Sep 17 00:00:00 2001
From: Teodor Sigaev <teodor@sigaev.ru>
Date: Fri, 9 Jun 2006 13:25:59 +0000
Subject: [PATCH] Now ispell dictionary can eat dictionaries in MySpell format,
 used by OpenOffice. Dictionaries are placed at
 http://lingucomponent.openoffice.org/spell_dic.html Dictionary automatically
 recognizes format of files.

Warning. MySpell's format has limitation with compound
word support: it's impossible to mark affix as
compound-only affix. So for norwegian, german etc
languages it's recommended to use original ispell format.
For that reason I don't want to remove my2ispell
scripts, it's has workaround at least for norwegian language.
---
 contrib/tsearch2/ispell/spell.c | 94 +++++++++++++++++++++++++++++++--
 contrib/tsearch2/ispell/spell.h |  1 +
 2 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c
index 223ae4a9ada..28f38eefd32 100644
--- a/contrib/tsearch2/ispell/spell.c
+++ b/contrib/tsearch2/ispell/spell.c
@@ -391,6 +391,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
 	char		flagflags = 0;
 	FILE	   *affix;
 	int	line=0;
+	int	oldformat = 0;
 
 	if (!(affix = fopen(filename, "r")))
 		return (1);
@@ -412,6 +413,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
 				while (*s && t_isspace(s)) s++;
 				if ( *s && pg_mblen(s) == 1 ) 
 					Conf->compoundcontrol = *s;
+				oldformat++;
 				continue;
 			}
 		}
@@ -419,12 +421,14 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
 		{
 			suffixes = 1;
 			prefixes = 0;
+			oldformat++;
 			continue;
 		}
 		if (STRNCMP(tmpstr, "prefixes") == 0)
 		{
 			suffixes = 0;
 			prefixes = 1;
+			oldformat++;
 			continue;
 		}
 		if (STRNCMP(tmpstr, "flag") == 0)
@@ -433,10 +437,11 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
 			flagflags = 0;
 
 			while (*s && t_isspace(s)) s++;
+			oldformat++;
 
 			/* allow only single-encoded flags */
-			if ( pg_mblen(s) != 1 )
-				continue;			
+			if ( pg_mblen(s) != 1 ) 
+				elog(ERROR,"Multiencoded flag at line %d: %s", line, s);
 
 			if (*s == '*')
 			{
@@ -455,12 +460,22 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
 			/* allow only single-encoded flags */
 			if ( pg_mblen(s) != 1 ) {
 				flagflags = 0;
-				continue;
+				elog(ERROR,"Multiencoded flag at line %d: %s", line, s);
 			}
 
 			flag = (unsigned char) *s;
 			continue;
 		}
+		if ( STRNCMP(str, "COMPOUNDFLAG") == 0 || STRNCMP(str, "COMPOUNDMIN") == 0 || 
+					 STRNCMP(str, "PFX")==0 || STRNCMP(str, "SFX")==0 ) {
+
+			if ( oldformat ) 
+				elog(ERROR,"Wrong affix file format");
+
+			fclose(affix);
+			return NIImportOOAffixes(Conf, filename);
+			
+		}
 		if ((!suffixes) && (!prefixes))
 			continue;
 
@@ -475,6 +490,79 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
 	return (0);
 }
 
+int
+NIImportOOAffixes(IspellDict * Conf, const char *filename) {
+	char		str[BUFSIZ];
+	char		type[BUFSIZ];
+	char		sflag[BUFSIZ];
+	char		mask[BUFSIZ];
+	char		find[BUFSIZ];
+	char		repl[BUFSIZ];
+	bool		isSuffix = false;
+	int			flag = 0;
+	char		flagflags = 0;
+	FILE	   *affix;
+	int	line=0;
+	int	scanread = 0;
+	char		scanbuf[BUFSIZ];
+
+	sprintf(scanbuf,"%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ/5, BUFSIZ/5, BUFSIZ/5, BUFSIZ/5);
+
+	if (!(affix = fopen(filename, "r")))
+		return (1);
+	Conf->compoundcontrol = '\t';
+
+	while (fgets(str, sizeof(str), affix))
+	{
+		line++;
+		if ( *str == '\0' || t_isspace(str) || t_iseq(str,'#') )
+			continue;
+		pg_verifymbstr( str, strlen(str), false);
+
+		if ( STRNCMP(str, "COMPOUNDFLAG")==0 ) {
+			char *s = str+strlen("COMPOUNDFLAG");
+			while (*s && t_isspace(s)) s++;
+			if ( *s && pg_mblen(s) == 1 ) 
+				Conf->compoundcontrol = *s;
+			continue;
+		}
+
+		scanread = sscanf(str, scanbuf, type, sflag, find, repl, mask);
+
+		lowerstr(type);
+		if ( scanread<4 || (STRNCMP(type,"sfx") && STRNCMP(type,"pfx")) )
+			continue;
+
+		if ( scanread == 4 ) {
+			if ( strlen(sflag) != 1 )
+				continue;
+			flag = *sflag;
+			isSuffix = (STRNCMP(type,"sfx")==0) ? true : false;
+			lowerstr(find);
+			if ( t_iseq(find,'y') )
+				flagflags |= FF_CROSSPRODUCT;
+			else
+				flagflags = 0;
+		} else {
+			if ( strlen(sflag) != 1 || flag != *sflag || flag==0 )
+				continue;
+			lowerstr(repl);
+			lowerstr(find);
+			lowerstr(mask);
+			if ( t_iseq(find,'0') )
+				*find = '\0';
+			if ( t_iseq(repl,'0') )
+				*repl = '\0';
+
+			NIAddAffix(Conf, flag, flagflags, mask, find, repl, isSuffix ? FF_SUFFIX : FF_PREFIX);
+		}
+	}
+
+	fclose(affix);
+
+	return 0;
+}
+
 static int
 MergeAffix(IspellDict * Conf, int a1, int a2)
 {
diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h
index fc3240a1d8b..fe79888bf3e 100644
--- a/contrib/tsearch2/ispell/spell.h
+++ b/contrib/tsearch2/ispell/spell.h
@@ -121,6 +121,7 @@ typedef struct
 
 TSLexeme   *NINormalizeWord(IspellDict * Conf, char *word);
 int			NIImportAffixes(IspellDict * Conf, const char *filename);
+int			NIImportOOAffixes(IspellDict * Conf, const char *filename);
 int			NIImportDictionary(IspellDict * Conf, const char *filename);
 
 int			NIAddSpell(IspellDict * Conf, const char *word, const char *flag);
-- 
GitLab