new-words
diff new-words.sh @ 31:48ca8248e9cc
+esperanto normalization
author | Igor Chubin <igor@chub.in> |
---|---|
date | Tue Aug 17 21:35:57 2010 +0200 (2010-08-17) |
parents | c631833fa2be |
children | 753fb84437aa |
line diff
1.1 --- a/new-words.sh Mon Jun 21 19:46:58 2010 +0300 1.2 +++ b/new-words.sh Tue Aug 17 21:35:57 2010 +0200 1.3 @@ -113,7 +113,7 @@ 1.4 { 1.5 tr ' ' '\n' | sed 's/--/ /g' \ 1.6 | sed "s/'/__APOSTROPHE__/g" \ 1.7 -| perl -MEncode -Mutf8 -n -e '$_ = decode( "utf8", $_);y/*\r,.:#@()+=—<>$;"?!|·[]^%&/ /; binmode STDOUT, ":utf8"; print if /^[[:alpha:]'"'"'_-]*$/'\ 1.8 +| perl -MEncode -Mutf8 -n -e '$_ = decode( "utf8", $_);y/*\r,.:#@()+=—<>$;"?!|·[]^%&/ /; binmode STDOUT, ":utf8"; print if /^[[:alpha:] '"'"'_-]*$/'\ 1.9 | sed "s/__APOSTROPHE__/'/g" \ 1.10 | tr ' ' '\n' \ 1.11 | tee "$1" \ 1.12 @@ -284,6 +284,7 @@ 1.13 if ( $ENV{LANGUAGE} eq "en" ) { return normalize_english(shift); } 1.14 elsif ( $ENV{LANGUAGE} eq "de" ) { return normalize_german(shift); } 1.15 elsif ( $ENV{LANGUAGE} eq "uk" ) { return normalize_ukrainian(shift); } 1.16 + elsif ( $ENV{LANGUAGE} eq "io" ) { return normalize_esperanto(shift); } 1.17 else { return shift ; } 1.18 } 1.19 1.20 @@ -294,6 +295,18 @@ 1.21 return $_; 1.22 } 1.23 1.24 +sub normalize_esperanto($) 1.25 +{ 1.26 + $_=lc(shift); 1.27 +# verbs 1.28 + s/i$//; s/is$//; s/os$//; s/as$//; s/us$//; 1.29 + 1.30 +# nouns 1.31 + s/j?n?$//; 1.32 + 1.33 + return $_; 1.34 +} 1.35 + 1.36 sub normalize_german($) 1.37 { 1.38 $_=lc(shift);