# HG changeset patch # User Igor Chubin # Date 1320436249 -3600 # Node ID 3a61988109a8464da01436d1888492d19542c5e6 # Parent 0799785cf194707033e97b56930c20a3ef8789da grep-sentences.pl: color support diff -r 0799785cf194 -r 3a61988109a8 grep-sentences.pl --- a/grep-sentences.pl Fri Nov 04 20:48:34 2011 +0100 +++ b/grep-sentences.pl Fri Nov 04 20:50:49 2011 +0100 @@ -9,30 +9,39 @@ $regexp=decode("utf8",$regexp); $page=$ARGV[1]; +shift @ARGV; $number_of_printed = 0; $number_of_printed_max = 10; #if (open(PAGE, "lynx -dump '$page'|")) { -if (open(PAGE, "$page")) { - binmode PAGE,":utf8"; - local $/; - $text=; - $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g; - $text =~ s@\n@@g; - $text =~ s@(Mr|Mrs|viz)\.@\1POINT@g; - $text =~ s@e\.g\.@ePOINTgPOINT@g; - $text =~ s@i\.e\.@iPOINTePOINT@g; - @sentences=split /[.!?]/, $text; - for (@sentences) { - s@iPOINTePOINT@i\.e\.@g; - s@ePOINTgPOINT@e\.g\.@g; - s@(Mr|Mrs|viz)POINT@\1.@g; - s/^\s*//; - s/\s*$//; - s/\[[0-9]+\]//g; - s/\s+/ /g; - if (/\b$regexp\b/ and $number_of_printed < $number_of_printed_max ) { - print "$_.\n\n"; - $number_of_printed++; +for $page (@ARGV) { + if (open(PAGE, "$page")) { + binmode PAGE,":utf8"; + local $/; + $text=; + $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g; + $text =~ s@\n@@g; + $text =~ s@(Mr|Mrs|viz)\.@\1POINT@g; + $text =~ s@e\.g\.@ePOINTgPOINT@g; + $text =~ s@i\.e\.@iPOINTePOINT@g; + @sentences=split /[.!?]/, $text; + for (@sentences) { + s@iPOINTePOINT@i\.e\.@g; + s@ePOINTgPOINT@e\.g\.@g; + s@(Mr|Mrs|viz)POINT@\1.@g; + s/^\s*//; + s/\s*$//; + s/\[[0-9]+\]//g; + s/\s+/ /g; + if (/\b$regexp\b/ and $number_of_printed < $number_of_printed_max ) { + $mark_start = "\e[35;1m"; + $mark_stop = "\e[0m"; + s/\b($regexp)\b/$mark_start$1$mark_stop/; + print "$_.\n\n"; + $number_of_printed++; + } + if ($number_of_printed >= $number_of_printed_max) { + exit(0); + }; } } }