new-words

diff grep-sentences.pl @ 27:0a80b2fa3ed8

initial tagging support
author Igor Chubin <igor@chub.in>
date Fri May 21 19:53:24 2010 +0300 (2010-05-21)
parents c6efd17741aa
children 7db7bbf96fad
line diff
     1.1 --- a/grep-sentences.pl	Sun Apr 04 14:12:35 2010 +0300
     1.2 +++ b/grep-sentences.pl	Fri May 21 19:53:24 2010 +0300
     1.3 @@ -10,7 +10,7 @@
     1.4      $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g;
     1.5      $text =~ s@\n@@g;
     1.6      $text =~ s@(Mr|Mrs)\.@\1POINT@g;
     1.7 -    @sentences=split /\./, $text;
     1.8 +    @sentences=split /[.!?]/, $text;
     1.9      for (@sentences) {
    1.10           s@(Mr|Mrs)POINT@\1.@g;
    1.11          s/^\s*//;