# HG changeset patch # User Igor Chubin # Date 1270492495 -10800 # Node ID 7e3a52db54adfd165c330be3f3e2dfa9b611e052 # Parent 35eeaf2620ce807c255c3520c6934ba58844fb51 Среднее количество слов и неизвестных слов (*10) в предложении в статистике. Пример использования: LANG KNOWN% UNKNOWN% KNOWN TOTAL WPS UWPS*10 en 89.8 10.2 167021 185840 21 21 diff -r 35eeaf2620ce -r 7e3a52db54ad new-words.sh --- a/new-words.sh Sun Apr 04 20:11:22 2010 +0300 +++ b/new-words.sh Mon Apr 05 21:34:55 2010 +0300 @@ -75,10 +75,14 @@ total_unknown="`cat $after|awk '{s=s+$1}END{print s}'`" total_known="`echo $total-$total_unknown|bc`" percentage="`echo '100*('$total-$total_unknown')'/$total | bc -l | sed 's/\\.\(.\).*/.\1/'`" + #sentences="`cat $after | perl -e 'local $/; $_=<>; s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g; s@\n@@g; s@(Mr|Mrs)\.@\1POINT@g; @sentences=split /\\./;print $#sentences;'`" + sentences="`cat $ORIGINAL_TEXT | perl -e 'local $/; $_=<>; s/[^.]//msg; print length($_);'`" + + if [ "$STAT_ONLY" = "YES" ] then - echo "LANG KNOWN% UNKNOWN% KNOWN TOTAL" - echo "$LANGUAGE $percentage `echo \(100-$percentage\) | bc -l` $total_known $total" + echo "LANG KNOWN% UNKNOWN% KNOWN TOTAL WPS UWPS*10" + echo "$LANGUAGE $percentage `echo \(100-$percentage\) | bc -l` $total_known $total `echo $total/$sentences|bc` `echo 10*$total_unknown/$sentences|bc` " rm $after return 0 else