new-words
annotate lingvo-en-ru.pl @ 31:48ca8248e9cc
+esperanto normalization
author | Igor Chubin <igor@chub.in> |
---|---|
date | Tue Aug 17 21:35:57 2010 +0200 (2010-08-17) |
parents | |
children |
rev | line source |
---|---|
igor@30 | 1 #!/usr/bin/perl |
igor@30 | 2 |
igor@30 | 3 # written by Andrii Grytsenko, 2010 |
igor@30 | 4 |
igor@30 | 5 use Locale::Recode; |
igor@30 | 6 use LWP::UserAgent; |
igor@30 | 7 use strict; |
igor@30 | 8 |
igor@30 | 9 |
igor@30 | 10 sub parse_html { |
igor@30 | 11 my $text = shift; |
igor@30 | 12 my $result ; |
igor@30 | 13 $text =~ s/></>\n</g; |
igor@30 | 14 foreach my $line (split(/\n/,$text)){ |
igor@30 | 15 $result .= $line."\n" if ( $line =~ /span class=.translation/ or $line =~ /lol-dict-name/ or |
igor@30 | 16 $line =~ /span class=.Abbrev./ or $line =~ /span class=.comment./); |
igor@30 | 17 } |
igor@30 | 18 return $result; |
igor@30 | 19 } |
igor@30 | 20 |
igor@30 | 21 sub print_html { |
igor@30 | 22 my $text = shift; |
igor@30 | 23 my $class = 0; |
igor@30 | 24 foreach my $line (split(/\n/,$text)){ |
igor@30 | 25 $line =~ s/<[^>]+>//g ; #delete all html tags |
igor@30 | 26 print "$line\n"; |
igor@30 | 27 } |
igor@30 | 28 } |
igor@30 | 29 |
igor@30 | 30 |
igor@30 | 31 my $cd = Locale::Recode->new (from => 'UTF-8', to => 'koi8-r'); |
igor@30 | 32 die $cd->getError if $cd->getError; |
igor@30 | 33 my $ua = LWP::UserAgent->new; |
igor@30 | 34 $ua->agent("Firefox/0.1 "); |
igor@30 | 35 |
igor@30 | 36 my $word = $ARGV[0]; |
igor@30 | 37 my $req = HTTP::Request->new(GET => "http://lingvo.abbyyonline.com/en/en-ru/$word"); |
igor@30 | 38 |
igor@30 | 39 my $res = $ua->request($req); |
igor@30 | 40 |
igor@30 | 41 if ($res->is_success) { |
igor@30 | 42 my $text = $res->content; |
igor@30 | 43 $cd->recode($text); # change code encoding from UTF-8 to koi8-ru |
igor@30 | 44 print "Translate for word $word\n"; |
igor@30 | 45 $text = parse_html($text); |
igor@30 | 46 print_html($text); |
igor@30 | 47 } else { |
igor@30 | 48 print $res->status_line, "\n"; |
igor@30 | 49 } |