# HG changeset patch # User Igor Chubin # Date 1270379555 -10800 # Node ID c6efd17741aaf1761e6a0eb00a28ea4893838624 # Parent 9b18c7efe31cfd03045313a0c0595c4b532a72fa Точка, следующая за Mr./Mrs. не считается окончанием предложения diff -r 9b18c7efe31c -r c6efd17741aa grep-sentences.pl --- a/grep-sentences.pl Sun Apr 04 12:54:46 2010 +0300 +++ b/grep-sentences.pl Sun Apr 04 14:12:35 2010 +0300 @@ -9,8 +9,10 @@ $text=; $text =~ s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g; $text =~ s@\n@@g; + $text =~ s@(Mr|Mrs)\.@\1POINT@g; @sentences=split /\./, $text; for (@sentences) { + s@(Mr|Mrs)POINT@\1.@g; s/^\s*//; s/\s*$//; s/\[[0-9]+\]//g;