new-words
diff new-words-py.sh @ 54:e25de9ea9184
new-words.py is almost ready
author | Igor Chubin <igor@chub.in> |
---|---|
date | Tue Nov 01 20:19:18 2011 +0100 (2011-11-01) |
parents | 4e931db74618 |
children | 2a1a25e61872 |
line diff
1.1 --- a/new-words-py.sh Fri Mar 25 22:35:44 2011 +0200 1.2 +++ b/new-words-py.sh Tue Nov 01 20:19:18 2011 +0100 1.3 @@ -49,9 +49,6 @@ 1.4 1.5 NEW_WORDS_PY=/home/igor/hg/new-words/new-words.py 1.6 WORK_DIR=~/.new-words/ 1.7 -TEMP1=`mktemp /tmp/new-words-temp1.XXXXXXXXXX` 1.8 -TEMP2=`mktemp /tmp/new-words-temp2.XXXXXXXXXX` 1.9 -export ORIGINAL_TEXT=`mktemp /tmp/new-words-orig.XXXXXXXXXX` 1.10 editor=${EDITOR:-vim} 1.11 1.12 # language detection 1.13 @@ -133,74 +130,31 @@ 1.14 exit 0 1.15 fi 1.16 1.17 -text_from_url() 1.18 -{ 1.19 -lynx -dump "$1" | perl -p -e 's@http://[a-zA-Z&_.:/0-9%?=,#+()\[\]~-]*@@' 1.20 -} 1.21 - 1.22 -add_marks() 1.23 -{ 1.24 - $NEW_WORDS_PY -l "$LANGUAGE" -f add_notes "$1" 1.25 -} 1.26 -remove_marks() 1.27 -{ 1.28 - $NEW_WORDS_PY -l "$LANGUAGE" -f remove_notes "$1" 1.29 -} 1.30 get_words_group_words_add_stat() 1.31 { 1.32 + [ "$PART_TO_PROCESS" == "" ] || PART_TO_PROCESS="-p $PART_TO_PROCESS" 1.33 + [ "$ALLOWED_WORDS_FILENAME" = "" ] || ALLOWED_WORDS_FILENAME="-f $ALLOWED_WORDS_FILENAME" 1.34 + [ "$SHOW_RANGE_PERCENTAGE" = "" ] || SHOW_RANGE_PERCENTAGE="-R $SHOW_RANGE_PERCENTAGE" 1.35 + [ "$NON_INTERACTIVE_MODE" = YES ] && non_interactive="-n" 1.36 + [ "$STAT_ONLY" = YES ] && stat_only="-s" 1.37 + [ "$COMPRESSED_WORDLIST" = YES ] && compressed_wordlist="-c" 1.38 + [ "$FILTER_WORDS" = NO ] && filter_words="-N" 1.39 + [ "$GROUP_WORDS_BY_TWO" = YES ] && group_words_by_two="-2" 1.40 + [ "$GROUP_WORDS_BY_THREE" = YES ] && group_words_by_three="-3" 1.41 + 1.42 SHOW_RANGE="$SHOW_RANGE" \ 1.43 - SHOW_RANGE_PERCENTAGE="$SHOW_RANGE_PERCENTAGE" \ 1.44 - COMPRESSED_WORDLIST="$COMPRESSED_WORDLIST" \ 1.45 - GROUP_WORDS_BY_TWO="$GROUP_WORDS_BY_TWO" \ 1.46 - GROUP_WORDS_BY_THREE="$GROUP_WORDS_BY_THREE" \ 1.47 - STAT_ONLY="$STAT_ONLY" \ 1.48 WORDS_GROUPING="$WORDS_GROUPING" \ 1.49 - FILTER_WORDS="$FILTER_WORDS" \ 1.50 - ALLOWED_WORDS_FILENAME="$ALLOWED_WORDS_FILENAME" \ 1.51 - $NEW_WORDS_PY -l "$LANGUAGE" -f get_words_group_words_add_stat "$1" 1.52 -} 1.53 - 1.54 -part() 1.55 -{ 1.56 - PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-part-XXXXXXXX` 1.57 - cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME 1.58 -#!/usr/bin/perl 1.59 - 1.60 -my @lines=<STDIN>; 1.61 -my $lines=$#lines; 1.62 -my $interval=$ARGV[0]; 1.63 -if (not $interval) { 1.64 - print @lines; 1.65 -} 1.66 -else { 1.67 - my ($start,$stop,$total); 1.68 - if ($interval =~ m@(.*)/(.*)@) { 1.69 - $start = $1; 1.70 - $total = $2; 1.71 - } 1.72 - else { 1.73 - $start=$interval; 1.74 - $total=0; 1.75 - } 1.76 - if ($start =~ m@(.*)-(.*)@) { 1.77 - $start = $1; 1.78 - $stop = $2; 1.79 - } 1.80 - if ($start =~ m@(.*)\+(.*)@) { 1.81 - $start = $1; 1.82 - $stop = $start+$2; 1.83 - } 1.84 - 1.85 - $start=int($lines/$total*$start); 1.86 - $stop=int($lines/$total*$stop); 1.87 - 1.88 - for($i=$start;$i<$stop;$i++){ 1.89 - print $lines[$i]; 1.90 - } 1.91 -} 1.92 -PERL_SCRIPT 1.93 - perl $PERL_SCRIPT_TEMP_NAME "$1" 1.94 - rm $PERL_SCRIPT_TEMP_NAME 1.95 + $NEW_WORDS_PY -l "$LANGUAGE" \ 1.96 + $SHOW_RANGE_PERCENTAGE \ 1.97 + $PART_TO_PROCESS \ 1.98 + $ALLOWED_WORDS_FILENAME \ 1.99 + $non_interactive \ 1.100 + $stat_only \ 1.101 + $compressed_wordlist \ 1.102 + $filter_words \ 1.103 + $group_words_by_two \ 1.104 + $group_words_by_three \ 1.105 + -X get_words_group_words_add_stat "$1" 1.106 } 1.107 1.108 if [ "$TAGS_LIST_ONLY" = "YES" ] 1.109 @@ -232,68 +186,30 @@ 1.110 exit 0 1.111 fi 1.112 1.113 -mkdir -p $WORK_DIR 1.114 -oldpwd="$PWD" 1.115 -cd $WORK_DIR 1.116 -if [ "$MERGE_TAGGED_WORDS" = "YES" ] 1.117 -then 1.118 - VOC_FILES='' 1.119 - for i in $MERGE_THIS_TAGS 1.120 - do 1.121 - f=`tag_file_name $i` 1.122 - [ -e "$f" ] && VOC_FILES="${VOC_FILES} $f" 1.123 - done 1.124 - if [ -z "$VOC_FILES" ] 1.125 - then 1.126 - echo Unknown tags: $MERGE_THIS_TAGS > /dev/stderr 1.127 - else 1.128 - cat $VOC_FILES 1.129 - fi 1.130 -elif [ "$MERGE_ALL_TAGGED" = "YES" ] 1.131 -then 1.132 - cat ${LANGUAGE}_*.txt 1.133 -elif echo "$1" | grep -q http: 1.134 -then 1.135 - text_from_url "$1" 1.136 -elif [ "$#" != 0 ] 1.137 -then 1.138 - if echo $1 | grep -q ^/ 1.139 - then 1.140 - cat "$1" 1.141 - else 1.142 - cat "$oldpwd/$1" 1.143 - fi 1.144 -else 1.145 - cat 1.146 -fi \ 1.147 - | part $PART_TO_PROCESS \ 1.148 - | tee $ORIGINAL_TEXT \ 1.149 - | \ 1.150 - get_words_group_words_add_stat \ 1.151 - | tee "$TEMP1" > "$TEMP2" 1.152 +get_words_group_words_add_stat "$1" 1.153 1.154 -if [ "$STAT_ONLY" = "YES" ] 1.155 -then 1.156 - cat "$TEMP1" 1.157 -elif [ "$NON_INTERACTIVE_MODE" = "YES" ] 1.158 -then 1.159 - cat "$TEMP1" 1.160 -else 1.161 - if [ `wc -l "$TEMP2" | awk '{print $1}'` != 0 ] 1.162 - then 1.163 - [ "$DONT_ADD_MARKS" = "YES" ] || add_marks "$TEMP2" 1.164 - if [ "$editor" = vim ] 1.165 - then 1.166 - vim -c 'set keywordprg='"$LANGUAGE" -c 'set iskeyword=@,48-57,/,.,-,_,+,,,#,$,%,~,=,48-255' "$TEMP2" < /dev/tty > /dev/tty 1.167 - else 1.168 - $editor "$TEMP2" 1.169 - fi 1.170 - remove_marks "$TEMP2" 1.171 +#mkdir -p $WORK_DIR 1.172 +#oldpwd="$PWD" 1.173 +#cd $WORK_DIR 1.174 +#if [ "$MERGE_TAGGED_WORDS" = "YES" ] 1.175 +#then 1.176 +# VOC_FILES='' 1.177 +# for i in $MERGE_THIS_TAGS 1.178 +# do 1.179 +# f=`tag_file_name $i` 1.180 +# [ -e "$f" ] && VOC_FILES="${VOC_FILES} $f" 1.181 +# done 1.182 +# if [ -z "$VOC_FILES" ] 1.183 +# then 1.184 +# echo Unknown tags: $MERGE_THIS_TAGS > /dev/stderr 1.185 +# else 1.186 +# cat $VOC_FILES 1.187 +# fi 1.188 +#elif [ "$MERGE_ALL_TAGGED" = "YES" ] 1.189 +#then 1.190 +# cat ${LANGUAGE}_*.txt 1.191 +#else 1.192 +# cat 1.193 +#fi 1.194 1.195 - vocabulary="$VOCABULARY" 1.196 - [ -n "$TAG_NAME" ] && vocabulary="`tag_file_name $TAG_NAME`" 1.197 - diff "$TEMP1" "$TEMP2" | awk '{print $3}' | sort -u >> "$vocabulary" 1.198 - fi 1.199 -fi 1.200 1.201 -rm -f "$TEMP1" "$TEMP2" "${TEMP1}-full" "$ORIGINAL_TEXT"