new-words

diff lingvo-en-ru.pl @ 44:7eb1a8c3eade

-2 and -3 are now supported by new-words.py
author Igor Chubin <igor@chub.in>
date Fri Jan 28 21:45:58 2011 +0100 (2011-01-28)
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/lingvo-en-ru.pl	Fri Jan 28 21:45:58 2011 +0100
     1.3 @@ -0,0 +1,49 @@
     1.4 +#!/usr/bin/perl
     1.5 +
     1.6 +# written by Andrii Grytsenko, 2010
     1.7 +
     1.8 +use Locale::Recode;
     1.9 +use LWP::UserAgent;
    1.10 +use strict;
    1.11 +
    1.12 +
    1.13 +sub parse_html {
    1.14 +    my $text = shift;
    1.15 +    my $result ;
    1.16 +    $text =~ s/></>\n</g;
    1.17 +    foreach my $line (split(/\n/,$text)){
    1.18 +	$result .= $line."\n" if ( $line =~ /span class=.translation/ or $line =~ /lol-dict-name/ or 
    1.19 +				   $line =~ /span class=.Abbrev./ or $line =~ /span class=.comment./);
    1.20 +    }
    1.21 +    return $result;
    1.22 +}
    1.23 +
    1.24 +sub print_html {
    1.25 +    my $text = shift; 
    1.26 +    my $class = 0;
    1.27 +    foreach my $line (split(/\n/,$text)){
    1.28 +	$line =~ s/<[^>]+>//g ; #delete all html tags
    1.29 +	print "$line\n";
    1.30 +    }
    1.31 +}
    1.32 +
    1.33 +
    1.34 +my $cd = Locale::Recode->new (from => 'UTF-8', to => 'koi8-r');
    1.35 +die $cd->getError if $cd->getError;
    1.36 +my $ua = LWP::UserAgent->new;
    1.37 +$ua->agent("Firefox/0.1 ");
    1.38 +
    1.39 +my $word = $ARGV[0];
    1.40 +my $req = HTTP::Request->new(GET => "http://lingvo.abbyyonline.com/en/en-ru/$word");
    1.41 +
    1.42 +my $res = $ua->request($req);
    1.43 +
    1.44 +if ($res->is_success) {
    1.45 +    my $text = $res->content;
    1.46 +    $cd->recode($text); # change code encoding from UTF-8 to koi8-ru
    1.47 +    print "Translate for word $word\n";
    1.48 +    $text = parse_html($text);
    1.49 +    print_html($text);
    1.50 +} else {
    1.51 +    print $res->status_line, "\n";
    1.52 +}