new-words

diff grep-sentences.pl @ 30:07d89c2505e7

lingvo en->ru by Andrii Grytsenko
author Igor Chubin <igor@chub.in>
date Sun Jul 04 12:24:01 2010 +0200 (2010-07-04)
parents 7db7bbf96fad
children 48ca8248e9cc
line diff
     1.1 --- a/grep-sentences.pl	Tue Jun 15 05:46:50 2010 +0300
     1.2 +++ b/grep-sentences.pl	Sun Jul 04 12:24:01 2010 +0200
     1.3 @@ -1,10 +1,17 @@
     1.4  #!/usr/bin/perl
     1.5  
     1.6 +use utf8;
     1.7 +use Encode;
     1.8 +binmode STDIN,":utf8";
     1.9 +binmode STDOUT,":utf8";
    1.10  
    1.11  $regexp=$ARGV[0];
    1.12 +$regexp=decode("utf8",$regexp);
    1.13 +
    1.14  $page=$ARGV[1];
    1.15  #if (open(PAGE, "lynx -dump '$page'|")) {
    1.16  if (open(PAGE, "$page")) {
    1.17 +    binmode PAGE,":utf8";
    1.18      local $/;
    1.19      $text=<PAGE>;
    1.20      $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g;