Skip to content
Snippets Groups Projects
Commit f5e8bfb4 authored by Teodor Sigaev's avatar Teodor Sigaev
Browse files

Utility for convert myspell dictionaries to ispell, full README will be later

parent 43f77c06
No related branches found
No related tags found
No related merge requests found
ZIPFILE=nb_NO
LANGUAGE=norsk
UNZIP=unzip -o
all: $(LANGUAGE).dict $(LANGUAGE).aff
$(ZIPFILE).aff: $(ZIPFILE).zip
$(UNZIP) $? $@
touch $@
# 1 Cleanup dictionary
# 2 remove " symbol
# 3 add compoundwords controlled flag to word which hasn't it, but
# has compound only suffixes
$(LANGUAGE).dict: $(ZIPFILE).zip
$(UNZIP) $? $(ZIPFILE).dic
grep -v -E '^[[:digit:]]+$$' < $(ZIPFILE).dic \
| grep -v '\.' \
| sed -e 's/"//g' \
| perl -pi -e 's|/(\S+)| $$q=$$1; ( $$q=~/[\\_`]/ && $$q!~/z/ ) ? "/$${q}z" : "/$${q}"|e' \
| sort \
> $@
#just convert affix file
$(LANGUAGE).aff: $(ZIPFILE).aff
grep -v -i zyzyzy $(ZIPFILE).aff \
| grep -v -i zyzyzy \
| perl -pi \
-e 's/^COMPOUNDFLAG\s+(\S+)/compoundwords controlled $$1/;' \
-e 's/^COMPOUNDMIN\s+(\d+)/compoundmin $$1/;' \
-e 's/^PFX\s+(\S+)\s+Y\s+\d+.*$$/ if ( !$$wasprf ) { $$wasprf=1; "prefixes\n\nflag $$1:" } else { "flag $$1:" } /e;' \
-e 's/^PFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > $$2")/e;' \
-e 's/^(.*)SFX\s+(\S+)\s+([YN])\s+\d+.*$$/ $$flg=($$3 eq "Y") ? "*" : ""; $$flg="~$$flg" if length $$1; $$q=$$2; $$q="\\$$q" if $$q!~m#[a-zA-Z]#; if ( !$$wassfx ) { $$wassfx=1; "suffixes\n\nflag $$flg$$q:" } else { "flag $$flg$$q:" } /e;' \
-e 's/^.*SFX\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)/ uc(" $$3 > ".( ($$1 eq "0") ? "" : "-$$1,").( ($$2 eq "0") ? "" : "$$2") )/e;' \
-e 's/^(SET|TRY)/#$$1/' \
> $@
clean:
rm -rf $(ZIPFILE).aff $(ZIPFILE).dic $(LANGUAGE).dict $(LANGUAGE).aff
Utility for convert MySpell dictionary and affix from
myspell to ispell format.
Utility tested on nb_NO.zip and nn_NO.zip from
OpenOffice (http://lingucomponent.openoffice.org/download_dictionary.html)
usage:
For example, make norwegian dictionary and affix:
% cp nb_NO.zip my2ispell
% cd my2ispell
% gmake ZIPFILE=nb_NO LANGUAGE=norsk
Author: Teodor Sigaev <teodor@sigaev.ru>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment