# HG changeset patch # User Igor Chubin # Date 1274096914 -10800 # Node ID d1eb7dc37feb7fb9b4b4e2a42176b9a71ffa6e7e # Parent 1318aa5898ee54beafb49fe2e5096f1aec4d5215 minifix: -p pages diff -r 1318aa5898ee -r d1eb7dc37feb new-words.sh --- a/new-words.sh Mon May 17 12:14:44 2010 +0300 +++ b/new-words.sh Mon May 17 14:48:34 2010 +0300 @@ -15,6 +15,7 @@ -l lang override language settings -n non-interactive mode (don't run vi) -m don't add marks (and don't save marks added by user) + -p pages work with specified pages only (pages = start-stop/total ) -s show the text statistics (percentage of known words and so on) and exit The language of the text can be specified also @@ -59,7 +60,8 @@ NEED_TO_USE_VOCABULARY_WHEN_SORT=NO DONT_ADD_MARKS=NO NON_INTERACTIVE_MODE=NO -while getopts l:skmnp opt +PART_TO_PROCESS='' +while getopts l:skmnp: opt do case "$opt" in s) STAT_ONLY=YES;; @@ -67,7 +69,7 @@ l) LANGUAGE="$OPTARG";; m) DONT_ADD_MARKS=YES;; n) NON_INTERACTIVE_MODE=YES;; - p) PAGES="$OPTARG";; + p) PART_TO_PROCESS="$OPTARG";; \?) # unknown flag show_usage exit 1;; @@ -424,6 +426,49 @@ rm $PERL_SCRIPT_TEMP_NAME } +part() +{ + PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-part-XXXXXXXX` + cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME +#!/usr/bin/perl + +my @lines=; +my $lines=$#lines; +my $interval=$ARGV[0]; +if (not $interval) { + print @lines; +} +else { + my ($start,$stop,$total); + if ($interval =~ m@(.*)/(.*)@) { + $start = $1; + $total = $2; + } + else { + $start=$interval; + $total=0; + } + if ($start =~ m@(.*)-(.*)@) { + $start = $1; + $stop = $2; + } + if ($start =~ m@(.*)\+(.*)@) { + $start = $1; + $stop = $start+$2; + } + + $start=int($lines/$total*$start); + $stop=int($lines/$total*$stop); + + for($i=$start;$i<$stop;$i++){ + print $lines[$i]; + } +} +PERL_SCRIPT + perl $PERL_SCRIPT_TEMP_NAME "$1" + rm $PERL_SCRIPT_TEMP_NAME +} + mkdir -p $WORK_DIR oldpwd="$PWD" cd $WORK_DIR @@ -441,6 +486,7 @@ else cat fi \ + | part $PART_TO_PROCESS \ | tee $ORIGINAL_TEXT \ | get_words ${TEMP1}-full \ | group_words \ diff -r 1318aa5898ee -r d1eb7dc37feb part.pl --- a/part.pl Mon May 17 12:14:44 2010 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -#!/usr/bin/perl - -my @lines=; -my $lines=$#lines; -my $interval=$ARGV[0]; -my ($start,$stop,$total); -if ($interval =~ m@(.*)/(.*)@) { - $start = $1; - $total = $2; -} -else { - $start=$interval; - $total=0; -} -if ($start =~ m@(.*)-(.*)@) { - $start = $1; - $stop = $2; -} -if ($start =~ m@(.*)\+(.*)@) { - $start = $1; - #if ($2 =~ /%$/) { - #} - #else { - $stop = $start+$2; - #} -} -#print "start = $start\n"; -#print "stop = $stop\n"; -#print "total = $total\n"; - -$start=int($lines/$total*$start); -$stop=int($lines/$total*$stop); - -#print "start = $start\n"; -#print "stop = $stop\n"; -#print "total = $total\n"; -for($i=$start;$i<$stop;$i++){ - print $lines[$i]; -}