new-words

view grep-sentences.pl @ 28:7db7bbf96fad

temporary files names
author Igor Chubin <igor@chub.in>
date Tue Jun 15 05:46:50 2010 +0300 (2010-06-15)
parents 416394a87d9f
children c631833fa2be
line source
1 #!/usr/bin/perl
4 $regexp=$ARGV[0];
5 $page=$ARGV[1];
6 #if (open(PAGE, "lynx -dump '$page'|")) {
7 if (open(PAGE, "$page")) {
8 local $/;
9 $text=<PAGE>;
10 $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g;
11 $text =~ s@\n@@g;
12 $text =~ s@(Mr|Mrs|viz)\.@\1POINT@g;
13 $text =~ s@e\.g\.@ePOINTgPOINT@g;
14 $text =~ s@i\.e\.@iPOINTePOINT@g;
15 @sentences=split /[.!?]/, $text;
16 for (@sentences) {
17 s@iPOINTePOINT@i\.e\.@g;
18 s@ePOINTgPOINT@e\.g\.@g;
19 s@(Mr|Mrs|viz)POINT@\1.@g;
20 s/^\s*//;
21 s/\s*$//;
22 s/\[[0-9]+\]//g;
23 s/\s+/ /g;
24 print "$_.\n\n" if /\b$regexp\b/;
25 }
26 }