# HG changeset patch # User Igor Chubin # Date 1274023218 -10800 # Node ID 46e987f4636db8be64d8cd0c250897d2960d7dea # Parent 190d4ac6b07cf63208fed4e79c054cde3cb331cf part.pl script + german normalization support diff -r 190d4ac6b07c -r 46e987f4636d en.sh --- a/en.sh Thu May 06 21:05:20 2010 +0300 +++ b/en.sh Sun May 16 18:20:18 2010 +0300 @@ -1,7 +1,19 @@ #!/bin/sh +DICTD_SERVER="dictd.xdsl.by" +DICTD_SERVER_ARGS="-h $DICTD_SERVER" +DICT_NAME=mueller24 + + +myname="`echo $0 | sed s@.*/@@`" +case $myname in + "de") DICT_NAME=deu-eng ; DICTD_SERVER_ARGS='' ;; + "pl") DICT_NAME=slovnyk_pl-ru ;; + "ru") DICT_NAME=ozhshv ;; +esac + ( - dict -h dictd.xdsl.by -d mueller24 "$@" + dict $DICTD_SERVER_ARGS -d "$DICT_NAME" "$@" if [ -e "$ORIGINAL_TEXT" ] then echo . . . . . . . . . . . . . . . . . . . . . . . . . ; echo diff -r 190d4ac6b07c -r 46e987f4636d new-words.sh --- a/new-words.sh Thu May 06 21:05:20 2010 +0300 +++ b/new-words.sh Sun May 16 18:20:18 2010 +0300 @@ -44,10 +44,13 @@ LANGUAGE=en my_name="`echo $0 | sed s@.*/@@ | sed s/-.*// `" -if echo "$1" | grep -q http://...wikipedia.org/wiki/ -then -LANGUAGE="`echo $1 | sed s@http://@@ | sed s@.wikipedia.*@@`" -fi +for arg +do + if echo "$arg" | grep -q http://...wikipedia.org/wiki/ + then + LANGUAGE="`echo $arg | sed s@http://@@ | sed s@.wikipedia.*@@`" + fi +done [ "${my_name}" = "new" ] || LANGUAGE="$my_name" if [ "$1" = "-l" ] then @@ -179,11 +182,11 @@ group_words() { - if [ "$LANGUAGE" != "en" ] - then - cat - return - fi + #if [ "$LANGUAGE" != "en" ] + #then + # cat + # return + #fi PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-group-words-XXXXXXXX` cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME #!/usr/bin/perl @@ -211,9 +214,43 @@ sub normalize($) { + if ( $ENV{LANGUAGE} eq "en" ) { return normalize_english(shift); } + elsif ( $ENV{LANGUAGE} eq "de" ) { return normalize_german(shift); } + else { return shift ; } +} + +sub normalize_german($) +{ + $_=lc(shift); + + s/heit$//; s/keit$//; s/tum$//; s/ung$//; s/nis$//;s/schaft$//; s/ist$//; + s/en$//; s/er$//; + + s/lich$//; s/ig$//; + s/al$//; s/isch$//; + s/ell$//; s/haft$//; + + s/bar$//; s/sam$//; s/lich$//; + + @prefixes=qw( + ab an auf aus bei dazwischen ein fest heraus her hinaus hin los mit nach voraus vorbei vor weg weiter zurück zusammen zu + be emp ent er ge miss ver zer durch über um unter wieder); + for $pref (@prefixes) { + s/^$pref//; + } + + + return $_; +} + +sub normalize_english($) +{ $_=lc(shift); s/s$//; + + s/ation$//; s/ness$//; s/ship$//; s/ally$//; s/ance$//;s/ity$//; s/ment$//; + s/ed$//; s/en$//; s/er$//; @@ -221,8 +258,7 @@ s/ing$//; s/ism$//; s/ist$//; s/ful$//; s/able$//; s/ably$//; - s/ation$//; s/ness$//; s/ship$//; s/ally$//; - s/ment$//; s/ify$//; s/ity$//; s/fy$//; s/ly$//; + s/ify$//; s/fy$//; s/ly$//; s/ise$//; s/ize$//; s/e$//; @@ -286,6 +322,7 @@ PERL_SCRIPT export VOCABULARY export NEED_TO_USE_VOCABULARY_WHEN_SORT + export LANGUAGE perl $PERL_SCRIPT_TEMP_NAME rm $PERL_SCRIPT_TEMP_NAME } diff -r 190d4ac6b07c -r 46e987f4636d part.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/part.pl Sun May 16 18:20:18 2010 +0300 @@ -0,0 +1,39 @@ +#!/usr/bin/perl + +my @lines=; +my $lines=$#lines; +my $interval=$ARGV[0]; +my ($start,$stop,$total); +if ($interval =~ m@(.*)/(.*)@) { + $start = $1; + $total = $2; +} +else { + $start=$interval; + $total=0; +} +if ($start =~ m@(.*)-(.*)@) { + $start = $1; + $stop = $2; +} +if ($start =~ m@(.*)\+(.*)@) { + $start = $1; + #if ($2 =~ /%$/) { + #} + #else { + $stop = $start+$2; + #} +} +#print "start = $start\n"; +#print "stop = $stop\n"; +#print "total = $total\n"; + +$start=int($lines/$total*$start); +$stop=int($lines/$total*$stop); + +#print "start = $start\n"; +#print "stop = $stop\n"; +#print "total = $total\n"; +for($i=$start;$i<$stop;$i++){ + print $lines[$i]; +}