new-words
diff lingvo-en-ru.pl @ 36:f95804355b0f
compressed wordlist support
author | Igor Chubin <igor@chub.in> |
---|---|
date | Sat Jan 01 19:47:39 2011 +0100 (2011-01-01) |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/lingvo-en-ru.pl Sat Jan 01 19:47:39 2011 +0100 1.3 @@ -0,0 +1,49 @@ 1.4 +#!/usr/bin/perl 1.5 + 1.6 +# written by Andrii Grytsenko, 2010 1.7 + 1.8 +use Locale::Recode; 1.9 +use LWP::UserAgent; 1.10 +use strict; 1.11 + 1.12 + 1.13 +sub parse_html { 1.14 + my $text = shift; 1.15 + my $result ; 1.16 + $text =~ s/></>\n</g; 1.17 + foreach my $line (split(/\n/,$text)){ 1.18 + $result .= $line."\n" if ( $line =~ /span class=.translation/ or $line =~ /lol-dict-name/ or 1.19 + $line =~ /span class=.Abbrev./ or $line =~ /span class=.comment./); 1.20 + } 1.21 + return $result; 1.22 +} 1.23 + 1.24 +sub print_html { 1.25 + my $text = shift; 1.26 + my $class = 0; 1.27 + foreach my $line (split(/\n/,$text)){ 1.28 + $line =~ s/<[^>]+>//g ; #delete all html tags 1.29 + print "$line\n"; 1.30 + } 1.31 +} 1.32 + 1.33 + 1.34 +my $cd = Locale::Recode->new (from => 'UTF-8', to => 'koi8-r'); 1.35 +die $cd->getError if $cd->getError; 1.36 +my $ua = LWP::UserAgent->new; 1.37 +$ua->agent("Firefox/0.1 "); 1.38 + 1.39 +my $word = $ARGV[0]; 1.40 +my $req = HTTP::Request->new(GET => "http://lingvo.abbyyonline.com/en/en-ru/$word"); 1.41 + 1.42 +my $res = $ua->request($req); 1.43 + 1.44 +if ($res->is_success) { 1.45 + my $text = $res->content; 1.46 + $cd->recode($text); # change code encoding from UTF-8 to koi8-ru 1.47 + print "Translate for word $word\n"; 1.48 + $text = parse_html($text); 1.49 + print_html($text); 1.50 +} else { 1.51 + print $res->status_line, "\n"; 1.52 +}