new-words

diff new-words-py.sh @ 44:7eb1a8c3eade

-2 and -3 are now supported by new-words.py
author Igor Chubin <igor@chub.in>
date Fri Jan 28 21:45:58 2011 +0100 (2011-01-28)
parents d532e7b52ab2
children 5f90e44eecfc
line diff
     1.1 --- a/new-words-py.sh	Fri Jan 28 12:40:58 2011 +0200
     1.2 +++ b/new-words-py.sh	Fri Jan 28 21:45:58 2011 +0100
     1.3 @@ -123,39 +123,6 @@
     1.4    exit 0
     1.5  fi
     1.6  
     1.7 -two_and_three_words()
     1.8 -{
     1.9 -    if [ $GROUP_WORDS_BY_THREE = NO -a $GROUP_WORDS_BY_TWO = NO ]
    1.10 -    then 
    1.11 -        cat
    1.12 -    else
    1.13 -        cat 
    1.14 -
    1.15 -    export GROUP_WORDS_BY_THREE
    1.16 -    export GROUP_WORDS_BY_TWO
    1.17 -    PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-two-and-three-XXXXXXXX`
    1.18 -    cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
    1.19 -#!/usr/bin/perl
    1.20 -local $/;
    1.21 -$words=<>;
    1.22 -$words=~ s@[!?;,:#1-9".]@ @g;
    1.23 -$words =~ s@\s+@ @g;
    1.24 -@words = split /\s+/, $words;
    1.25 -for ($i=0; $i<$#words-3;$i++) {
    1.26 -    my ($a,$b,$c)= ($words[$i],$words[$i+1],$words[$i+2]);
    1.27 -    if ($ENV{GROUP_WORDS_BY_THREE} eq "YES" and ($a && $b && $c)) {
    1.28 -        print "${a}_${b}_${c}\n";
    1.29 -    };  
    1.30 -    if ($ENV{GROUP_WORDS_BY_TWO} eq "YES" and ($a && $b)) {
    1.31 -        print "${a}_${b}\n";
    1.32 -    };
    1.33 -}
    1.34 -PERL_SCRIPT
    1.35 -    perl $PERL_SCRIPT_TEMP_NAME "$ORIGINAL_TEXT"
    1.36 -    rm $PERL_SCRIPT_TEMP_NAME
    1.37 -    fi
    1.38 -}
    1.39 -
    1.40  text_from_url()
    1.41  {
    1.42  lynx -dump "$1" | perl -p -e 's@http://[a-zA-Z&_.:/0-9%?=,#+()\[\]~-]*@@'
    1.43 @@ -171,6 +138,9 @@
    1.44  }
    1.45  get_words_group_words_add_stat()
    1.46  {
    1.47 +    STAT_ONLY="$STAT_ONLY" \
    1.48 +    GROUP_WORDS_BY_TWO="$GROUP_WORDS_BY_TWO" \
    1.49 +    GROUP_WORDS_BY_THREE="$GROUP_WORDS_BY_THREE" \
    1.50      $NEW_WORDS_PY -l "$LANGUAGE" -f get_words_group_words_add_stat "$1"
    1.51  }
    1.52  
    1.53 @@ -282,8 +252,8 @@
    1.54  fi \
    1.55     | part $PART_TO_PROCESS \
    1.56     | tee $ORIGINAL_TEXT \
    1.57 -   | two_and_three_words \
    1.58 -   | STAT_ONLY="$STAT_ONLY" get_words_group_words_add_stat \
    1.59 +   | \
    1.60 +    get_words_group_words_add_stat \
    1.61     | tee "$TEMP1" > "$TEMP2"
    1.62  
    1.63  if [ "$STAT_ONLY" = "YES" ]