# HG changeset patch # User Igor Chubin # Date 1282073757 -7200 # Node ID 48ca8248e9cc157b89a9757ca8333c25a94bfe41 # Parent 07d89c2505e75cf909e34388f6397250a9c618c7 +esperanto normalization diff -r 07d89c2505e7 -r 48ca8248e9cc en.sh --- a/en.sh Sun Jul 04 12:24:01 2010 +0200 +++ b/en.sh Tue Aug 17 21:35:57 2010 +0200 @@ -22,6 +22,9 @@ "uk") slovnyk "$1" ;; + "io") + vortaro "$1" + ;; esac } diff -r 07d89c2505e7 -r 48ca8248e9cc grep-sentences.pl --- a/grep-sentences.pl Sun Jul 04 12:24:01 2010 +0200 +++ b/grep-sentences.pl Tue Aug 17 21:35:57 2010 +0200 @@ -9,6 +9,8 @@ $regexp=decode("utf8",$regexp); $page=$ARGV[1]; +$number_of_printed = 0; +$number_of_printed_max = 10; #if (open(PAGE, "lynx -dump '$page'|")) { if (open(PAGE, "$page")) { binmode PAGE,":utf8"; @@ -28,6 +30,9 @@ s/\s*$//; s/\[[0-9]+\]//g; s/\s+/ /g; - print "$_.\n\n" if /\b$regexp\b/; + if (/\b$regexp\b/ and $number_of_printed < $number_of_printed_max ) { + print "$_.\n\n"; + $number_of_printed++; + } } } diff -r 07d89c2505e7 -r 48ca8248e9cc new-words.sh --- a/new-words.sh Sun Jul 04 12:24:01 2010 +0200 +++ b/new-words.sh Tue Aug 17 21:35:57 2010 +0200 @@ -113,7 +113,7 @@ { tr ' ' '\n' | sed 's/--/ /g' \ | sed "s/'/__APOSTROPHE__/g" \ -| perl -MEncode -Mutf8 -n -e '$_ = decode( "utf8", $_);y/*\r,.:#@()+=—<>$;"?!|·[]^%&/ /; binmode STDOUT, ":utf8"; print if /^[[:alpha:]'"'"'_-]*$/'\ +| perl -MEncode -Mutf8 -n -e '$_ = decode( "utf8", $_);y/*\r,.:#@()+=—<>$;"?!|·[]^%&/ /; binmode STDOUT, ":utf8"; print if /^[[:alpha:] '"'"'_-]*$/'\ | sed "s/__APOSTROPHE__/'/g" \ | tr ' ' '\n' \ | tee "$1" \ @@ -284,6 +284,7 @@ if ( $ENV{LANGUAGE} eq "en" ) { return normalize_english(shift); } elsif ( $ENV{LANGUAGE} eq "de" ) { return normalize_german(shift); } elsif ( $ENV{LANGUAGE} eq "uk" ) { return normalize_ukrainian(shift); } + elsif ( $ENV{LANGUAGE} eq "io" ) { return normalize_esperanto(shift); } else { return shift ; } } @@ -294,6 +295,18 @@ return $_; } +sub normalize_esperanto($) +{ + $_=lc(shift); +# verbs + s/i$//; s/is$//; s/os$//; s/as$//; s/us$//; + +# nouns + s/j?n?$//; + + return $_; +} + sub normalize_german($) { $_=lc(shift);