new-words
diff grep-sentences.pl @ 30:07d89c2505e7
lingvo en->ru by Andrii Grytsenko
author | Igor Chubin <igor@chub.in> |
---|---|
date | Sun Jul 04 12:24:01 2010 +0200 (2010-07-04) |
parents | 7db7bbf96fad |
children | 48ca8248e9cc |
line diff
1.1 --- a/grep-sentences.pl Tue Jun 15 05:46:50 2010 +0300 1.2 +++ b/grep-sentences.pl Sun Jul 04 12:24:01 2010 +0200 1.3 @@ -1,10 +1,17 @@ 1.4 #!/usr/bin/perl 1.5 1.6 +use utf8; 1.7 +use Encode; 1.8 +binmode STDIN,":utf8"; 1.9 +binmode STDOUT,":utf8"; 1.10 1.11 $regexp=$ARGV[0]; 1.12 +$regexp=decode("utf8",$regexp); 1.13 + 1.14 $page=$ARGV[1]; 1.15 #if (open(PAGE, "lynx -dump '$page'|")) { 1.16 if (open(PAGE, "$page")) { 1.17 + binmode PAGE,":utf8"; 1.18 local $/; 1.19 $text=<PAGE>; 1.20 $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g;