new-words
diff new-words.sh @ 25:d1eb7dc37feb
minifix: -p pages
author | Igor Chubin <igor@chub.in> |
---|---|
date | Mon May 17 14:48:34 2010 +0300 (2010-05-17) |
parents | 1318aa5898ee |
children | 4a10c0f4510c |
line diff
1.1 --- a/new-words.sh Mon May 17 12:14:44 2010 +0300 1.2 +++ b/new-words.sh Mon May 17 14:48:34 2010 +0300 1.3 @@ -15,6 +15,7 @@ 1.4 -l lang override language settings 1.5 -n non-interactive mode (don't run vi) 1.6 -m don't add marks (and don't save marks added by user) 1.7 + -p pages work with specified pages only (pages = start-stop/total ) 1.8 -s show the text statistics (percentage of known words and so on) and exit 1.9 1.10 The language of the text can be specified also 1.11 @@ -59,7 +60,8 @@ 1.12 NEED_TO_USE_VOCABULARY_WHEN_SORT=NO 1.13 DONT_ADD_MARKS=NO 1.14 NON_INTERACTIVE_MODE=NO 1.15 -while getopts l:skmnp opt 1.16 +PART_TO_PROCESS='' 1.17 +while getopts l:skmnp: opt 1.18 do 1.19 case "$opt" in 1.20 s) STAT_ONLY=YES;; 1.21 @@ -67,7 +69,7 @@ 1.22 l) LANGUAGE="$OPTARG";; 1.23 m) DONT_ADD_MARKS=YES;; 1.24 n) NON_INTERACTIVE_MODE=YES;; 1.25 - p) PAGES="$OPTARG";; 1.26 + p) PART_TO_PROCESS="$OPTARG";; 1.27 \?) # unknown flag 1.28 show_usage 1.29 exit 1;; 1.30 @@ -424,6 +426,49 @@ 1.31 rm $PERL_SCRIPT_TEMP_NAME 1.32 } 1.33 1.34 +part() 1.35 +{ 1.36 + PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-part-XXXXXXXX` 1.37 + cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME 1.38 +#!/usr/bin/perl 1.39 + 1.40 +my @lines=<STDIN>; 1.41 +my $lines=$#lines; 1.42 +my $interval=$ARGV[0]; 1.43 +if (not $interval) { 1.44 + print @lines; 1.45 +} 1.46 +else { 1.47 + my ($start,$stop,$total); 1.48 + if ($interval =~ m@(.*)/(.*)@) { 1.49 + $start = $1; 1.50 + $total = $2; 1.51 + } 1.52 + else { 1.53 + $start=$interval; 1.54 + $total=0; 1.55 + } 1.56 + if ($start =~ m@(.*)-(.*)@) { 1.57 + $start = $1; 1.58 + $stop = $2; 1.59 + } 1.60 + if ($start =~ m@(.*)\+(.*)@) { 1.61 + $start = $1; 1.62 + $stop = $start+$2; 1.63 + } 1.64 + 1.65 + $start=int($lines/$total*$start); 1.66 + $stop=int($lines/$total*$stop); 1.67 + 1.68 + for($i=$start;$i<$stop;$i++){ 1.69 + print $lines[$i]; 1.70 + } 1.71 +} 1.72 +PERL_SCRIPT 1.73 + perl $PERL_SCRIPT_TEMP_NAME "$1" 1.74 + rm $PERL_SCRIPT_TEMP_NAME 1.75 +} 1.76 + 1.77 mkdir -p $WORK_DIR 1.78 oldpwd="$PWD" 1.79 cd $WORK_DIR 1.80 @@ -441,6 +486,7 @@ 1.81 else 1.82 cat 1.83 fi \ 1.84 + | part $PART_TO_PROCESS \ 1.85 | tee $ORIGINAL_TEXT \ 1.86 | get_words ${TEMP1}-full \ 1.87 | group_words \