new-words
annotate grep-sentences.pl @ 28:7db7bbf96fad
temporary files names
| author | Igor Chubin <igor@chub.in> | 
|---|---|
| date | Tue Jun 15 05:46:50 2010 +0300 (2010-06-15) | 
| parents | 416394a87d9f | 
| children | c631833fa2be | 
| rev | line source | 
|---|---|
| igor@5 | 1 #!/usr/bin/perl | 
| igor@5 | 2 | 
| igor@5 | 3 | 
| igor@6 | 4 $regexp=$ARGV[0]; | 
| igor@6 | 5 $page=$ARGV[1]; | 
| igor@5 | 6 #if (open(PAGE, "lynx -dump '$page'|")) { | 
| igor@5 | 7 if (open(PAGE, "$page")) { | 
| igor@5 | 8 local $/; | 
| igor@5 | 9 $text=<PAGE>; | 
| igor@5 | 10 $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g; | 
| igor@5 | 11 $text =~ s@\n@@g; | 
| igor@28 | 12 $text =~ s@(Mr|Mrs|viz)\.@\1POINT@g; | 
| igor@28 | 13 $text =~ s@e\.g\.@ePOINTgPOINT@g; | 
| igor@28 | 14 $text =~ s@i\.e\.@iPOINTePOINT@g; | 
| igor@19 | 15 @sentences=split /[.!?]/, $text; | 
| igor@5 | 16 for (@sentences) { | 
| igor@28 | 17 s@iPOINTePOINT@i\.e\.@g; | 
| igor@28 | 18 s@ePOINTgPOINT@e\.g\.@g; | 
| igor@28 | 19 s@(Mr|Mrs|viz)POINT@\1.@g; | 
| igor@5 | 20 s/^\s*//; | 
| igor@5 | 21 s/\s*$//; | 
| igor@5 | 22 s/\[[0-9]+\]//g; | 
| igor@5 | 23 s/\s+/ /g; | 
| igor@5 | 24 print "$_.\n\n" if /\b$regexp\b/; | 
| igor@5 | 25 } | 
| igor@5 | 26 } | 
