igor@5: #!/usr/bin/perl igor@5: igor@29: use utf8; igor@29: use Encode; igor@29: binmode STDIN,":utf8"; igor@29: binmode STDOUT,":utf8"; igor@5: igor@6: $regexp=$ARGV[0]; igor@29: $regexp=decode("utf8",$regexp); igor@29: igor@6: $page=$ARGV[1]; igor@31: $number_of_printed = 0; igor@31: $number_of_printed_max = 10; igor@5: #if (open(PAGE, "lynx -dump '$page'|")) { igor@5: if (open(PAGE, "$page")) { igor@29: binmode PAGE,":utf8"; igor@5: local $/; igor@5: $text=; igor@5: $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g; igor@5: $text =~ s@\n@@g; igor@28: $text =~ s@(Mr|Mrs|viz)\.@\1POINT@g; igor@28: $text =~ s@e\.g\.@ePOINTgPOINT@g; igor@28: $text =~ s@i\.e\.@iPOINTePOINT@g; igor@19: @sentences=split /[.!?]/, $text; igor@5: for (@sentences) { igor@28: s@iPOINTePOINT@i\.e\.@g; igor@28: s@ePOINTgPOINT@e\.g\.@g; igor@28: s@(Mr|Mrs|viz)POINT@\1.@g; igor@5: s/^\s*//; igor@5: s/\s*$//; igor@5: s/\[[0-9]+\]//g; igor@5: s/\s+/ /g; igor@31: if (/\b$regexp\b/ and $number_of_printed < $number_of_printed_max ) { igor@31: print "$_.\n\n"; igor@31: $number_of_printed++; igor@31: } igor@5: } igor@5: }