new-words
changeset 22:46e987f4636d
part.pl script + german normalization support
author | Igor Chubin <igor@chub.in> |
---|---|
date | Sun May 16 18:20:18 2010 +0300 (2010-05-16) |
parents | 190d4ac6b07c |
children | 4b9d13c78de2 |
files | en.sh new-words.sh part.pl |
line diff
1.1 --- a/en.sh Thu May 06 21:05:20 2010 +0300 1.2 +++ b/en.sh Sun May 16 18:20:18 2010 +0300 1.3 @@ -1,7 +1,19 @@ 1.4 #!/bin/sh 1.5 1.6 +DICTD_SERVER="dictd.xdsl.by" 1.7 +DICTD_SERVER_ARGS="-h $DICTD_SERVER" 1.8 +DICT_NAME=mueller24 1.9 + 1.10 + 1.11 +myname="`echo $0 | sed s@.*/@@`" 1.12 +case $myname in 1.13 + "de") DICT_NAME=deu-eng ; DICTD_SERVER_ARGS='' ;; 1.14 + "pl") DICT_NAME=slovnyk_pl-ru ;; 1.15 + "ru") DICT_NAME=ozhshv ;; 1.16 +esac 1.17 + 1.18 ( 1.19 - dict -h dictd.xdsl.by -d mueller24 "$@" 1.20 + dict $DICTD_SERVER_ARGS -d "$DICT_NAME" "$@" 1.21 if [ -e "$ORIGINAL_TEXT" ] 1.22 then 1.23 echo . . . . . . . . . . . . . . . . . . . . . . . . . ; echo
2.1 --- a/new-words.sh Thu May 06 21:05:20 2010 +0300 2.2 +++ b/new-words.sh Sun May 16 18:20:18 2010 +0300 2.3 @@ -44,10 +44,13 @@ 2.4 2.5 LANGUAGE=en 2.6 my_name="`echo $0 | sed s@.*/@@ | sed s/-.*// `" 2.7 -if echo "$1" | grep -q http://...wikipedia.org/wiki/ 2.8 -then 2.9 -LANGUAGE="`echo $1 | sed s@http://@@ | sed s@.wikipedia.*@@`" 2.10 -fi 2.11 +for arg 2.12 +do 2.13 + if echo "$arg" | grep -q http://...wikipedia.org/wiki/ 2.14 + then 2.15 + LANGUAGE="`echo $arg | sed s@http://@@ | sed s@.wikipedia.*@@`" 2.16 + fi 2.17 +done 2.18 [ "${my_name}" = "new" ] || LANGUAGE="$my_name" 2.19 if [ "$1" = "-l" ] 2.20 then 2.21 @@ -179,11 +182,11 @@ 2.22 2.23 group_words() 2.24 { 2.25 - if [ "$LANGUAGE" != "en" ] 2.26 - then 2.27 - cat 2.28 - return 2.29 - fi 2.30 + #if [ "$LANGUAGE" != "en" ] 2.31 + #then 2.32 + # cat 2.33 + # return 2.34 + #fi 2.35 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-group-words-XXXXXXXX` 2.36 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME 2.37 #!/usr/bin/perl 2.38 @@ -211,9 +214,43 @@ 2.39 2.40 sub normalize($) 2.41 { 2.42 + if ( $ENV{LANGUAGE} eq "en" ) { return normalize_english(shift); } 2.43 + elsif ( $ENV{LANGUAGE} eq "de" ) { return normalize_german(shift); } 2.44 + else { return shift ; } 2.45 +} 2.46 + 2.47 +sub normalize_german($) 2.48 +{ 2.49 + $_=lc(shift); 2.50 + 2.51 + s/heit$//; s/keit$//; s/tum$//; s/ung$//; s/nis$//;s/schaft$//; s/ist$//; 2.52 + s/en$//; s/er$//; 2.53 + 2.54 + s/lich$//; s/ig$//; 2.55 + s/al$//; s/isch$//; 2.56 + s/ell$//; s/haft$//; 2.57 + 2.58 + s/bar$//; s/sam$//; s/lich$//; 2.59 + 2.60 + @prefixes=qw( 2.61 + ab an auf aus bei dazwischen ein fest heraus her hinaus hin los mit nach voraus vorbei vor weg weiter zurück zusammen zu 2.62 + be emp ent er ge miss ver zer durch über um unter wieder); 2.63 + for $pref (@prefixes) { 2.64 + s/^$pref//; 2.65 + } 2.66 + 2.67 + 2.68 + return $_; 2.69 +} 2.70 + 2.71 +sub normalize_english($) 2.72 +{ 2.73 $_=lc(shift); 2.74 2.75 s/s$//; 2.76 + 2.77 + s/ation$//; s/ness$//; s/ship$//; s/ally$//; s/ance$//;s/ity$//; s/ment$//; 2.78 + 2.79 s/ed$//; 2.80 s/en$//; 2.81 s/er$//; 2.82 @@ -221,8 +258,7 @@ 2.83 s/ing$//; 2.84 2.85 s/ism$//; s/ist$//; s/ful$//; s/able$//; s/ably$//; 2.86 - s/ation$//; s/ness$//; s/ship$//; s/ally$//; 2.87 - s/ment$//; s/ify$//; s/ity$//; s/fy$//; s/ly$//; 2.88 + s/ify$//; s/fy$//; s/ly$//; 2.89 s/ise$//; s/ize$//; 2.90 2.91 s/e$//; 2.92 @@ -286,6 +322,7 @@ 2.93 PERL_SCRIPT 2.94 export VOCABULARY 2.95 export NEED_TO_USE_VOCABULARY_WHEN_SORT 2.96 + export LANGUAGE 2.97 perl $PERL_SCRIPT_TEMP_NAME 2.98 rm $PERL_SCRIPT_TEMP_NAME 2.99 }
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/part.pl Sun May 16 18:20:18 2010 +0300 3.3 @@ -0,0 +1,39 @@ 3.4 +#!/usr/bin/perl 3.5 + 3.6 +my @lines=<STDIN>; 3.7 +my $lines=$#lines; 3.8 +my $interval=$ARGV[0]; 3.9 +my ($start,$stop,$total); 3.10 +if ($interval =~ m@(.*)/(.*)@) { 3.11 + $start = $1; 3.12 + $total = $2; 3.13 +} 3.14 +else { 3.15 + $start=$interval; 3.16 + $total=0; 3.17 +} 3.18 +if ($start =~ m@(.*)-(.*)@) { 3.19 + $start = $1; 3.20 + $stop = $2; 3.21 +} 3.22 +if ($start =~ m@(.*)\+(.*)@) { 3.23 + $start = $1; 3.24 + #if ($2 =~ /%$/) { 3.25 + #} 3.26 + #else { 3.27 + $stop = $start+$2; 3.28 + #} 3.29 +} 3.30 +#print "start = $start\n"; 3.31 +#print "stop = $stop\n"; 3.32 +#print "total = $total\n"; 3.33 + 3.34 +$start=int($lines/$total*$start); 3.35 +$stop=int($lines/$total*$stop); 3.36 + 3.37 +#print "start = $start\n"; 3.38 +#print "stop = $stop\n"; 3.39 +#print "total = $total\n"; 3.40 +for($i=$start;$i<$stop;$i++){ 3.41 + print $lines[$i]; 3.42 +}