new-words

diff grep-sentences.pl @ 29:c631833fa2be

minifixes related to unicode support
author Igor Chubin <igor@chub.in>
date Mon Jun 21 19:46:58 2010 +0300 (2010-06-21)
parents 7db7bbf96fad
children 48ca8248e9cc
line diff
     1.1 --- a/grep-sentences.pl	Tue Jun 15 05:46:50 2010 +0300
     1.2 +++ b/grep-sentences.pl	Mon Jun 21 19:46:58 2010 +0300
     1.3 @@ -1,10 +1,17 @@
     1.4  #!/usr/bin/perl
     1.5  
     1.6 +use utf8;
     1.7 +use Encode;
     1.8 +binmode STDIN,":utf8";
     1.9 +binmode STDOUT,":utf8";
    1.10  
    1.11  $regexp=$ARGV[0];
    1.12 +$regexp=decode("utf8",$regexp);
    1.13 +
    1.14  $page=$ARGV[1];
    1.15  #if (open(PAGE, "lynx -dump '$page'|")) {
    1.16  if (open(PAGE, "$page")) {
    1.17 +    binmode PAGE,":utf8";
    1.18      local $/;
    1.19      $text=<PAGE>;
    1.20      $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g;