new-words

diff new-words.sh @ 31:48ca8248e9cc

+esperanto normalization
author Igor Chubin <igor@chub.in>
date Tue Aug 17 21:35:57 2010 +0200 (2010-08-17)
parents c631833fa2be
children 753fb84437aa
line diff
     1.1 --- a/new-words.sh	Mon Jun 21 19:46:58 2010 +0300
     1.2 +++ b/new-words.sh	Tue Aug 17 21:35:57 2010 +0200
     1.3 @@ -113,7 +113,7 @@
     1.4  {
     1.5  tr ' ' '\n' | sed 's/--/ /g' \
     1.6  | sed "s/'/__APOSTROPHE__/g" \
     1.7 -| perl -MEncode -Mutf8 -n -e '$_ = decode( "utf8", $_);y/*\r,.:#@()+=—<>$;"?!|·[]^%&/                        /; binmode STDOUT, ":utf8"; print if /^[[:alpha:]'"'"'_-]*$/'\
     1.8 +| perl -MEncode -Mutf8 -n -e '$_ = decode( "utf8", $_);y/*\r,.:#@()+=—<>$;"?!|·[]^%&/ /; binmode STDOUT, ":utf8"; print if /^[[:alpha:] '"'"'_-]*$/'\
     1.9  | sed "s/__APOSTROPHE__/'/g" \
    1.10  | tr ' ' '\n' \
    1.11  | tee "$1" \
    1.12 @@ -284,6 +284,7 @@
    1.13      if   ( $ENV{LANGUAGE} eq "en" ) { return normalize_english(shift); }
    1.14      elsif ( $ENV{LANGUAGE} eq "de" ) { return normalize_german(shift); }
    1.15      elsif ( $ENV{LANGUAGE} eq "uk" ) { return normalize_ukrainian(shift); }
    1.16 +    elsif ( $ENV{LANGUAGE} eq "io" ) { return normalize_esperanto(shift); }
    1.17      else { return shift ; }
    1.18  }
    1.19  
    1.20 @@ -294,6 +295,18 @@
    1.21      return $_;
    1.22  }
    1.23  
    1.24 +sub normalize_esperanto($)
    1.25 +{
    1.26 +    $_=lc(shift);
    1.27 +# verbs
    1.28 +    s/i$//; s/is$//; s/os$//; s/as$//; s/us$//;
    1.29 +
    1.30 +# nouns
    1.31 +    s/j?n?$//;
    1.32 +
    1.33 +    return $_;
    1.34 +}
    1.35 +
    1.36  sub normalize_german($)
    1.37  {
    1.38      $_=lc(shift);