new-words

changeset 25:d1eb7dc37feb

minifix: -p pages
author Igor Chubin <igor@chub.in>
date Mon May 17 14:48:34 2010 +0300 (2010-05-17)
parents 1318aa5898ee
children 4a10c0f4510c
files new-words.sh part.pl
line diff
     1.1 --- a/new-words.sh	Mon May 17 12:14:44 2010 +0300
     1.2 +++ b/new-words.sh	Mon May 17 14:48:34 2010 +0300
     1.3 @@ -15,6 +15,7 @@
     1.4      -l lang     override language settings
     1.5      -n          non-interactive mode (don't run vi)
     1.6      -m          don't add marks (and don't save marks added by user)
     1.7 +    -p pages    work with specified pages only (pages = start-stop/total )
     1.8      -s          show the text statistics (percentage of known words and so on) and exit
     1.9  
    1.10  The language of the text can be specified also
    1.11 @@ -59,7 +60,8 @@
    1.12  NEED_TO_USE_VOCABULARY_WHEN_SORT=NO
    1.13  DONT_ADD_MARKS=NO
    1.14  NON_INTERACTIVE_MODE=NO
    1.15 -while getopts l:skmnp opt
    1.16 +PART_TO_PROCESS=''
    1.17 +while getopts l:skmnp: opt
    1.18  do
    1.19      case "$opt" in
    1.20        s)  STAT_ONLY=YES;;
    1.21 @@ -67,7 +69,7 @@
    1.22        l)  LANGUAGE="$OPTARG";;
    1.23        m)  DONT_ADD_MARKS=YES;;
    1.24        n)  NON_INTERACTIVE_MODE=YES;;
    1.25 -      p)  PAGES="$OPTARG";;
    1.26 +      p)  PART_TO_PROCESS="$OPTARG";;
    1.27        \?)       # unknown flag
    1.28            show_usage
    1.29            exit 1;;
    1.30 @@ -424,6 +426,49 @@
    1.31      rm $PERL_SCRIPT_TEMP_NAME
    1.32  }
    1.33  
    1.34 +part()
    1.35 +{
    1.36 +    PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-part-XXXXXXXX`
    1.37 +    cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
    1.38 +#!/usr/bin/perl
    1.39 +
    1.40 +my @lines=<STDIN>;
    1.41 +my $lines=$#lines;
    1.42 +my $interval=$ARGV[0];
    1.43 +if (not $interval) {
    1.44 +    print @lines;
    1.45 +}
    1.46 +else {
    1.47 +    my ($start,$stop,$total);
    1.48 +    if ($interval =~ m@(.*)/(.*)@) {
    1.49 +        $start = $1;
    1.50 +        $total = $2;
    1.51 +    }
    1.52 +    else {
    1.53 +        $start=$interval;
    1.54 +        $total=0;
    1.55 +    }
    1.56 +    if ($start =~ m@(.*)-(.*)@) {
    1.57 +        $start = $1;
    1.58 +        $stop = $2;
    1.59 +    }
    1.60 +    if ($start =~ m@(.*)\+(.*)@) {
    1.61 +        $start = $1;
    1.62 +        $stop = $start+$2;
    1.63 +    }
    1.64 +
    1.65 +    $start=int($lines/$total*$start);
    1.66 +    $stop=int($lines/$total*$stop);
    1.67 +
    1.68 +    for($i=$start;$i<$stop;$i++){
    1.69 +        print $lines[$i];
    1.70 +    }
    1.71 +}
    1.72 +PERL_SCRIPT
    1.73 +    perl $PERL_SCRIPT_TEMP_NAME "$1"
    1.74 +    rm $PERL_SCRIPT_TEMP_NAME
    1.75 +}
    1.76 +
    1.77  mkdir -p $WORK_DIR
    1.78  oldpwd="$PWD"
    1.79  cd $WORK_DIR
    1.80 @@ -441,6 +486,7 @@
    1.81  else 
    1.82      cat
    1.83  fi \
    1.84 +   | part $PART_TO_PROCESS \
    1.85     | tee $ORIGINAL_TEXT \
    1.86     | get_words ${TEMP1}-full \
    1.87     | group_words \
     2.1 --- a/part.pl	Mon May 17 12:14:44 2010 +0300
     2.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.3 @@ -1,39 +0,0 @@
     2.4 -#!/usr/bin/perl
     2.5 -
     2.6 -my @lines=<STDIN>;
     2.7 -my $lines=$#lines;
     2.8 -my $interval=$ARGV[0];
     2.9 -my ($start,$stop,$total);
    2.10 -if ($interval =~ m@(.*)/(.*)@) {
    2.11 -    $start = $1;
    2.12 -    $total = $2;
    2.13 -}
    2.14 -else {
    2.15 -    $start=$interval;
    2.16 -    $total=0;
    2.17 -}
    2.18 -if ($start =~ m@(.*)-(.*)@) {
    2.19 -    $start = $1;
    2.20 -    $stop = $2;
    2.21 -}
    2.22 -if ($start =~ m@(.*)\+(.*)@) {
    2.23 -    $start = $1;
    2.24 -    #if ($2 =~ /%$/) {
    2.25 -    #}
    2.26 -    #else {
    2.27 -    $stop = $start+$2;
    2.28 -    #}
    2.29 -}
    2.30 -#print "start = $start\n";
    2.31 -#print "stop = $stop\n";
    2.32 -#print "total = $total\n";
    2.33 -
    2.34 -$start=int($lines/$total*$start);
    2.35 -$stop=int($lines/$total*$stop);
    2.36 -
    2.37 -#print "start = $start\n";
    2.38 -#print "stop = $stop\n";
    2.39 -#print "total = $total\n";
    2.40 -for($i=$start;$i<$stop;$i++){
    2.41 -    print $lines[$i];
    2.42 -}