# HG changeset patch # User Igor Chubin # Date 1272477638 -10800 # Node ID 23f949c03f95a727aff931c93c66e2030a5f910c # Parent 416394a87d9f93e0e1da87fceabbbaace37d3015 Usage information, oneliners prototype script, autoraising of the words that are similar to known diff -r 416394a87d9f -r 23f949c03f95 new-words.sh --- a/new-words.sh Tue Apr 20 21:15:19 2010 +0400 +++ b/new-words.sh Wed Apr 28 21:00:38 2010 +0300 @@ -1,5 +1,8 @@ #!/bin/sh -cat < /dev/null + +show_usage() +{ +cat < /dev/stderr USAGE: @@ -7,22 +10,26 @@ SWITCHES: - -s show text statistics and exit + -h print this screen + -k put higher words that are similar to the known words (only for English) -l lang override language settings + -s show the text statistics (percentage of known words and so on) and exit -Поддержка нескольких языков: +The language of the text can be specified also +by name of the program new-words (correspondent link must be created before). +For example, these calls are equivalent: - new-words -l lang URL - -Например, для немецких текстов: - + de-words URL new-words -l de URL -Или, предварительно создав соответствующую ссылку: +HELP +} - de-words URL - -HELP +if [ "$1" = "-h" ] +then + show_usage + exit 0 +fi WORK_DIR=~/.new-words/ TEMP1=`mktemp /tmp/new-words-XXXXXXXXXX-temp1` @@ -53,6 +60,13 @@ shift fi +NEED_TO_USE_VOCABULARY_WHEN_SORT=NO +if [ "$1" = "-k" ] +then + NEED_TO_USE_VOCABULARY_WHEN_SORT=YES + shift +fi + get_words() { tr ' ' '\n' | sed 's/--/ /g' \ @@ -219,7 +233,17 @@ } } -@lines=<>; +our %Vocabulary; +open(VOC, $ENV{VOCABULARY}) + or die "Can't open VOCABULARY"; +while (){ + chomp; + #s/'//g; + $Vocabulary{normalize($_)}="1"; +} +close(VOC); + +@lines=; for $L (@lines) { chomp($L); $l=$L; @@ -227,11 +251,20 @@ my ($a, $b)=split(/\s+/,$l,2); $group_weight{normalize($b)}+=$a; } +if ($ENV{NEED_TO_USE_VOCABULARY_WHEN_SORT} eq "YES") { + for $k (keys %group_weight) { + if (defined($Vocabulary{$k})) { + $group_weight{$k} *= 2; + } + } +} @lines2 = sort { compare($b,$a) } @lines; for $l (@lines2) { print "$l\n"; } PERL_SCRIPT + export VOCABULARY + export NEED_TO_USE_VOCABULARY_WHEN_SORT perl $PERL_SCRIPT_TEMP_NAME rm $PERL_SCRIPT_TEMP_NAME } diff -r 416394a87d9f -r 23f949c03f95 oneliners.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/oneliners.sh Wed Apr 28 21:00:38 2010 +0300 @@ -0,0 +1,11 @@ +temp=/tmp/oneliner$$ +while read l +do + dict -h dictd.xdsl.by -d mueller24 "$l" 2> /dev/null | perl -n -e 'print if ($y and not /^\s*$/) ; $y=1 if /'$l'/;' | grep -v _Syn | grep -v _Ant > $temp + if [ `wc -l $temp | awk '{print $1}'` = 1 ] + then + echo -n $l + cat $temp + fi +done +