new-words
annotate grep-sentences.pl @ 17:35eeaf2620ce
minifix
| author | Igor Chubin <igor@chub.in> |
|---|---|
| date | Sun Apr 04 20:11:22 2010 +0300 (2010-04-04) |
| parents | d1851ee29a1a |
| children | 416394a87d9f |
| rev | line source |
|---|---|
| igor@5 | 1 #!/usr/bin/perl |
| igor@5 | 2 |
| igor@5 | 3 |
| igor@6 | 4 $regexp=$ARGV[0]; |
| igor@6 | 5 $page=$ARGV[1]; |
| igor@5 | 6 #if (open(PAGE, "lynx -dump '$page'|")) { |
| igor@5 | 7 if (open(PAGE, "$page")) { |
| igor@5 | 8 local $/; |
| igor@5 | 9 $text=<PAGE>; |
| igor@5 | 10 $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g; |
| igor@5 | 11 $text =~ s@\n@@g; |
| igor@15 | 12 $text =~ s@(Mr|Mrs)\.@\1POINT@g; |
| igor@5 | 13 @sentences=split /\./, $text; |
| igor@5 | 14 for (@sentences) { |
| igor@15 | 15 s@(Mr|Mrs)POINT@\1.@g; |
| igor@5 | 16 s/^\s*//; |
| igor@5 | 17 s/\s*$//; |
| igor@5 | 18 s/\[[0-9]+\]//g; |
| igor@5 | 19 s/\s+/ /g; |
| igor@5 | 20 print "$_.\n\n" if /\b$regexp\b/; |
| igor@5 | 21 } |
| igor@5 | 22 } |
