new-words
diff new-words-py.sh @ 44:7eb1a8c3eade
-2 and -3 are now supported by new-words.py
author | Igor Chubin <igor@chub.in> |
---|---|
date | Fri Jan 28 21:45:58 2011 +0100 (2011-01-28) |
parents | d532e7b52ab2 |
children | 5f90e44eecfc |
line diff
1.1 --- a/new-words-py.sh Fri Jan 28 12:40:58 2011 +0200 1.2 +++ b/new-words-py.sh Fri Jan 28 21:45:58 2011 +0100 1.3 @@ -123,39 +123,6 @@ 1.4 exit 0 1.5 fi 1.6 1.7 -two_and_three_words() 1.8 -{ 1.9 - if [ $GROUP_WORDS_BY_THREE = NO -a $GROUP_WORDS_BY_TWO = NO ] 1.10 - then 1.11 - cat 1.12 - else 1.13 - cat 1.14 - 1.15 - export GROUP_WORDS_BY_THREE 1.16 - export GROUP_WORDS_BY_TWO 1.17 - PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-two-and-three-XXXXXXXX` 1.18 - cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME 1.19 -#!/usr/bin/perl 1.20 -local $/; 1.21 -$words=<>; 1.22 -$words=~ s@[!?;,:#1-9".]@ @g; 1.23 -$words =~ s@\s+@ @g; 1.24 -@words = split /\s+/, $words; 1.25 -for ($i=0; $i<$#words-3;$i++) { 1.26 - my ($a,$b,$c)= ($words[$i],$words[$i+1],$words[$i+2]); 1.27 - if ($ENV{GROUP_WORDS_BY_THREE} eq "YES" and ($a && $b && $c)) { 1.28 - print "${a}_${b}_${c}\n"; 1.29 - }; 1.30 - if ($ENV{GROUP_WORDS_BY_TWO} eq "YES" and ($a && $b)) { 1.31 - print "${a}_${b}\n"; 1.32 - }; 1.33 -} 1.34 -PERL_SCRIPT 1.35 - perl $PERL_SCRIPT_TEMP_NAME "$ORIGINAL_TEXT" 1.36 - rm $PERL_SCRIPT_TEMP_NAME 1.37 - fi 1.38 -} 1.39 - 1.40 text_from_url() 1.41 { 1.42 lynx -dump "$1" | perl -p -e 's@http://[a-zA-Z&_.:/0-9%?=,#+()\[\]~-]*@@' 1.43 @@ -171,6 +138,9 @@ 1.44 } 1.45 get_words_group_words_add_stat() 1.46 { 1.47 + STAT_ONLY="$STAT_ONLY" \ 1.48 + GROUP_WORDS_BY_TWO="$GROUP_WORDS_BY_TWO" \ 1.49 + GROUP_WORDS_BY_THREE="$GROUP_WORDS_BY_THREE" \ 1.50 $NEW_WORDS_PY -l "$LANGUAGE" -f get_words_group_words_add_stat "$1" 1.51 } 1.52 1.53 @@ -282,8 +252,8 @@ 1.54 fi \ 1.55 | part $PART_TO_PROCESS \ 1.56 | tee $ORIGINAL_TEXT \ 1.57 - | two_and_three_words \ 1.58 - | STAT_ONLY="$STAT_ONLY" get_words_group_words_add_stat \ 1.59 + | \ 1.60 + get_words_group_words_add_stat \ 1.61 | tee "$TEMP1" > "$TEMP2" 1.62 1.63 if [ "$STAT_ONLY" = "YES" ]