igor@5: #!/usr/bin/perl igor@5: igor@29: use utf8; igor@29: use Encode; igor@29: binmode STDIN,":utf8"; igor@29: binmode STDOUT,":utf8"; igor@5: igor@6: $regexp=$ARGV[0]; igor@29: $regexp=decode("utf8",$regexp); igor@29: igor@6: $page=$ARGV[1]; igor@62: shift @ARGV; igor@31: $number_of_printed = 0; igor@31: $number_of_printed_max = 10; igor@5: #if (open(PAGE, "lynx -dump '$page'|")) { igor@62: for $page (@ARGV) { igor@62: if (open(PAGE, "$page")) { igor@62: binmode PAGE,":utf8"; igor@62: local $/; igor@62: $text=; igor@62: $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g; igor@62: $text =~ s@\n@@g; igor@62: $text =~ s@(Mr|Mrs|viz)\.@\1POINT@g; igor@62: $text =~ s@e\.g\.@ePOINTgPOINT@g; igor@62: $text =~ s@i\.e\.@iPOINTePOINT@g; igor@62: @sentences=split /[.!?]/, $text; igor@62: for (@sentences) { igor@62: s@iPOINTePOINT@i\.e\.@g; igor@62: s@ePOINTgPOINT@e\.g\.@g; igor@62: s@(Mr|Mrs|viz)POINT@\1.@g; igor@62: s/^\s*//; igor@62: s/\s*$//; igor@62: s/\[[0-9]+\]//g; igor@62: s/\s+/ /g; igor@62: if (/\b$regexp\b/ and $number_of_printed < $number_of_printed_max ) { igor@62: $mark_start = "\e[35;1m"; igor@62: $mark_stop = "\e[0m"; igor@62: s/\b($regexp)\b/$mark_start$1$mark_stop/; igor@62: print "$_.\n\n"; igor@62: $number_of_printed++; igor@62: } igor@62: if ($number_of_printed >= $number_of_printed_max) { igor@62: exit(0); igor@62: }; igor@31: } igor@5: } igor@5: }