new-words

diff grep-sentences.pl @ 45:5f90e44eecfc

new-words.py: turn words filtering and grouping on and off
author Igor Chubin <igor@chub.in>
date Fri Feb 04 06:18:50 2011 +0100 (2011-02-04)
parents c631833fa2be
children 3a61988109a8
line diff
     1.1 --- a/grep-sentences.pl	Mon Jun 21 19:46:58 2010 +0300
     1.2 +++ b/grep-sentences.pl	Fri Feb 04 06:18:50 2011 +0100
     1.3 @@ -9,6 +9,8 @@
     1.4  $regexp=decode("utf8",$regexp);
     1.5  
     1.6  $page=$ARGV[1];
     1.7 +$number_of_printed = 0;
     1.8 +$number_of_printed_max = 10;
     1.9  #if (open(PAGE, "lynx -dump '$page'|")) {
    1.10  if (open(PAGE, "$page")) {
    1.11      binmode PAGE,":utf8";
    1.12 @@ -28,6 +30,9 @@
    1.13          s/\s*$//;
    1.14          s/\[[0-9]+\]//g;
    1.15          s/\s+/ /g;
    1.16 -        print "$_.\n\n" if /\b$regexp\b/;
    1.17 +        if (/\b$regexp\b/ and $number_of_printed < $number_of_printed_max ) {
    1.18 +            print "$_.\n\n";
    1.19 +            $number_of_printed++;
    1.20 +        }
    1.21      }
    1.22  }