new-words

view new-words.sh @ 33:720a701b2ba9

-S and -N keys
author Igor Chubin <igor@chub.in>
date Sun Dec 12 10:04:06 2010 +0100 (2010-12-12)
parents 753fb84437aa
children 3827cce83602
line source
1 #!/bin/bash
3 show_usage()
4 {
5 cat <<HELP > /dev/stderr
7 USAGE:
9 new-words [ -l lang ] [ -s ] [ ARG ]
11 SWITCHES:
13 -h print this screen
14 -k put higher words that are similar to the known words (only for English)
15 -l lang override language settings
16 -n non-interactive mode (don't run vi)
17 -N turn off known words filtering
18 -a don't add marks (and don't save marks added by user)
19 -p pages work with specified pages only (pages = start-stop/total )
20 -s show the text statistics (percentage of known words and so on) and exit
21 -S show your vocabulary statistics (number of words and word groups)
22 -t tag tag known words with tag
23 -T show list of active tags
24 -m tag merge the words tagged with "tag" into the main vocabulary
25 -M merge the words tagged with any tag into the main vocabulary
26 -r tag remove subvocabulary for the "tag"
27 -2 -3 find 2 and 3 words' sequences
29 The language of the text can be specified also
30 by name of the program new-words (correspondent link must be created before).
31 For example, these calls are equivalent:
33 de-words URL
34 new-words -l de URL
36 HELP
37 }
39 if [ "$1" = "-h" ]
40 then
41 show_usage
42 exit 0
43 fi
45 WORK_DIR=~/.new-words/
46 TEMP1=`mktemp /tmp/new-words-temp1.XXXXXXXXXX`
47 TEMP2=`mktemp /tmp/new-words-temp2.XXXXXXXXXX`
48 export ORIGINAL_TEXT=`mktemp /tmp/new-words-orig.XXXXXXXXXX`
49 editor=${EDITOR:-vim}
51 # language detection
53 LANGUAGE=en
54 my_name="`echo $0 | sed s@.*/@@ | sed s/-.*// `"
55 for arg
56 do
57 if echo "$arg" | grep -q http://...wikipedia.org/wiki/
58 then
59 LANGUAGE="`echo $arg | sed s@http://@@ | sed s@.wikipedia.*@@`"
60 fi
61 done
62 [ "${my_name}" = "new" ] || LANGUAGE="$my_name"
64 #----------------------------------------------------
65 # command line options processing
67 STAT_ONLY=NO
68 NEED_TO_USE_VOCABULARY_WHEN_SORT=NO
69 DONT_ADD_MARKS=NO
70 NON_INTERACTIVE_MODE=NO
71 PART_TO_PROCESS=''
72 GROUP_WORDS_BY_THREE=NO
73 GROUP_WORDS_BY_TWO=NO
74 TAG_NAME=''
75 MERGE_THIS_TAGS=''
76 TAGS_LIST_ONLY=NO
77 MERGE_TAGGED_WORDS=NO
78 MERGE_ALL_TAGGED=NO
79 DONT_ADD_MARKLINES=NO
80 FILTER_WORDS=YES
81 SHOW_VOC_STAT=NO
82 while getopts l:sSkanNp:t:Tm:Mr:23 opt
83 do
84 case "$opt" in
85 s) STAT_ONLY=YES;;
86 S) SHOW_VOC_STAT=YES;;
87 k) NEED_TO_USE_VOCABULARY_WHEN_SORT=YES;;
88 l) LANGUAGE="$OPTARG";;
89 a) DONT_ADD_MARKS=YES;;
90 n) NON_INTERACTIVE_MODE=YES;;
91 N) FILTER_WORDS=NO;;
92 p) PART_TO_PROCESS="$OPTARG";;
93 t) TAG_NAME="$OPTARG";;
94 T) TAGS_LIST_ONLY="YES";;
95 m) DONT_ADD_MARKLINES="YES"; MERGE_TAGGED_WORDS="YES"; MERGE_THIS_TAGS="$TAG_NAME $OPTARG";;
96 M) DONT_ADD_MARKLINES="YES"; MERGE_ALL_TAGGED="YES";;
97 r) REMOVE_TAG="YES"; TAG_NAME="$TAG_NAME $OPTARG";;
98 2) GROUP_WORDS_BY_TWO=YES;;
99 3) GROUP_WORDS_BY_THREE=YES;;
100 \?) # unknown flag
101 show_usage
102 exit 1;;
103 esac
104 done
105 shift `expr $OPTIND - 1`
107 if [ "$1" = "-l" ]
108 then
109 LANGUAGE="$2"
110 shift 2
111 fi
113 VOCABULARY=${LANGUAGE}.txt
114 NOTES_FILE=notes-${LANGUAGE}.txt
116 if [ "${SHOW_VOC_STAT}" = "YES" ]
117 then
118 $0 -l "${LANGUAGE}" -N -n ${WORK_DIR}/${VOCABULARY} | head -1 | awk '{print $5}' | tr -d "<>"
119 exit 0
120 fi
122 #----------------------------------------------------
124 get_words()
125 {
126 export FILTER_WORDS
127 tr ' ' '\n' | sed 's/--/ /g' \
128 | sed "s/'/__APOSTROPHE__/g" \
129 | perl -MEncode -Mutf8 -n -e '$_ = decode( "utf8", $_);y/*\r,.:#@()+=—<>$;"?!|·[]^%&/ /; binmode STDOUT, ":utf8"; print if /^[[:alpha:] '"'"'_-]*$/'\
130 | sed "s/__APOSTROPHE__/'/g" \
131 | tr ' ' '\n' \
132 | tee "$1" \
133 | grep_v_english_perl \
134 | sort | uniq -c | awk '{if ($2!="") print;}' | sort -rn
135 }
137 add_stat()
138 {
139 if [ "$DONT_ADD_MARKLINES" = "YES" ]
140 then
141 cat
142 return
143 fi
144 before="$1"
145 after=${before}2
146 cat > "$after"
147 total="`wc -w $1 | awk '{print $1}'`"
148 total_unknown="`cat $after|awk '{s=s+$1}END{print s}'`"
149 total_known="`echo $total-$total_unknown|bc`"
150 percentage="`echo '100*('$total-$total_unknown')'/$total | bc -l | sed 's/\\.\(.\).*/.\1/'`"
151 #sentences="`cat $after | perl -e 'local $/; $_=<>; s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g; s@\n@@g; s@(Mr|Mrs)\.@\1POINT@g; @sentences=split /\\./;print $#sentences;'`"
152 sentences="`cat $ORIGINAL_TEXT | perl -e 'local $/; $_=<>; s/[^.]//msg; print length($_);'`"
155 if [ "$STAT_ONLY" = "YES" ]
156 then
157 echo "LANG KNOWN% UNKNOWN% KNOWN TOTAL WPS UWPS*10"
158 echo "$LANGUAGE $percentage `echo \(100-$percentage\) | bc -l` $total_known $total `echo $total/$sentences|bc` `echo 10*$total_unknown/$sentences|bc` "
159 rm $after
160 return 0
161 else
162 groups="$(grep '# groups' $after | awk '{print $3}')"
163 words="$(grep -v '^#' $after | wc -l)"
164 echo "# $LANGUAGE, $percentage, <$total_known/$total>, <$groups/$words>"
165 fi
167 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
168 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
169 my $total=shift(@ARGV);
170 my $total_known=shift(@ARGV);
171 my $s=0;
172 my $mark_line=int($total_known*100/$total/5)*5;
173 if ($mark_line>=90) {
174 $mark_line=int($total_known*100/$total)+1;
175 } else { $mark_line +=5; };
176 while(<>)
177 {
178 next if /^#\s*groups\s*/;
179 print;
180 /^\s*([0-9]*)\s*/;
181 $s+=$1;
182 if (($total_known+$s)*100/$total>=$mark_line) {
183 print "# $mark_line\n";
184 if ($mark_line>=90) { $mark_line+=1; } else { $mark_line +=5; };
185 }
186 }
187 PERL_SCRIPT
188 perl $PERL_SCRIPT_TEMP_NAME "$total" "$total_known" "$after"
189 rm $PERL_SCRIPT_TEMP_NAME
190 rm $after
191 }
193 two_and_three_words()
194 {
195 if [ $GROUP_WORDS_BY_THREE = NO -a $GROUP_WORDS_BY_TWO = NO ]
196 then
197 cat
198 else
199 cat
201 export GROUP_WORDS_BY_THREE
202 export GROUP_WORDS_BY_TWO
203 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-two-and-three-XXXXXXXX`
204 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
205 #!/usr/bin/perl
206 local $/;
207 $words=<>;
208 $words=~ s@[!?;,:#1-9".]@ @g;
209 $words =~ s@\s+@ @g;
210 @words = split /\s+/, $words;
211 for ($i=0; $i<$#words-3;$i++) {
212 my ($a,$b,$c)= ($words[$i],$words[$i+1],$words[$i+2]);
213 if ($ENV{GROUP_WORDS_BY_THREE} eq "YES" and ($a && $b && $c)) {
214 print "${a}_${b}_${c}\n";
215 };
216 if ($ENV{GROUP_WORDS_BY_TWO} eq "YES" and ($a && $b)) {
217 print "${a}_${b}\n";
218 };
219 }
220 PERL_SCRIPT
221 perl $PERL_SCRIPT_TEMP_NAME "$ORIGINAL_TEXT"
222 rm $PERL_SCRIPT_TEMP_NAME
223 fi
224 }
226 grep_v_english()
227 {
228 [ -e "$VOCABULARY" ] || touch "$VOCABULARY"
229 eval $(cat $VOCABULARY | tr -d "'" | xargs -n10 echo | tr ' ' '|' | sed 's/^/egrep -xv "RRRRRRR|/' | sed 's/$/"/' | tr '\n' '|')cat
230 }
232 grep_v_english_perl()
233 {
234 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
235 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
236 if ($ENV{FILTER_WORDS} eq "NO") {
237 while(<>) { print; }
238 exit(0);
239 }
240 $voc_files=$ENV{VOC_FILES};
241 $voc_files=~s@^ @@;
242 for $voc_file (split /\s+/,$voc_files) {
243 if (open(VOC, $voc_file)) {
244 while (<VOC>){
245 chomp;
246 #s/'//g;
247 $voc{$_}="1";
248 }
249 }
250 }
251 while(<>) {
252 chomp;
253 if (not defined($voc{$_})) { print "$_\n"; }
254 }
255 PERL_SCRIPT
256 [ -e "$VOCABULARY" ] || touch "$VOCABULARY"
257 export VOCABULARY VOC_FILES
258 VOC_FILES=$VOCABULARY
259 for i in $TAG_NAME
260 do
261 VOC_FILES="${VOC_FILES} `tag_file_name $i`"
262 done
263 perl $PERL_SCRIPT_TEMP_NAME
264 rm $PERL_SCRIPT_TEMP_NAME
265 }
267 group_words()
268 {
269 #if [ "$LANGUAGE" != "en" ]
270 #then
271 # cat
272 # return
273 #fi
274 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-group-words-XXXXXXXX`
275 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
276 #!/usr/bin/perl
278 use Encode;
279 use utf8;
280 use Lingua::Stem::Snowball qw(stem);
282 eval {
283 # http://stackoverflow.com/questions/251694/how-can-i-check-if-i-have-a-perl-module-before-using-it
284 require String::Similarity;
285 String::Similarity->import();
286 };
287 unless($@)
288 {
289 our $HAVE_String_Similarity=1;
290 }
293 sub load_notes_dict()
294 {
295 my %dict;
296 if (open(NOTES, $ENV{NOTES_FILE})) {
297 while(<NOTES>) {
298 $_ = decode( "utf8", $_);
299 chomp;
300 s/^\s+//;
301 my ($a,$b)=split /\s+/,$_,2;
302 $dict{$a}=$b;
303 }
304 }
305 return %dict;
306 }
308 sub similar($$){
309 my $a=shift;
310 my $b=shift;
311 if ($HAVE_String_Similarity) {
312 return $Similarity{"$a $b"};
313 }
314 else {
315 return 0;
316 }
317 }
320 sub normalize_without_linked($)
321 {
322 if ( $ENV{LANGUAGE} eq "en" ) { return normalize_english(shift); }
323 elsif ( $ENV{LANGUAGE} eq "de" ) { return normalize_german(shift); }
324 elsif ( $ENV{LANGUAGE} eq "uk" ) { return normalize_ukrainian(shift); }
325 elsif ( $ENV{LANGUAGE} eq "io" ) { return normalize_esperanto(shift); }
326 else { return shift ; }
327 }
329 sub normalize_with_linked($)
330 {
331 my $word = normalize_without_linked(shift);
332 #return $word;
333 if ($linked_words{$word}) {
334 return $linked_words{$word};
335 }
336 else {
337 return $word;
338 }
339 }
341 sub normalize($)
342 {
343 return normalize_with_linked(shift);
344 }
346 sub normalize_ukrainian($)
347 {
348 $_=lc(shift);
349 s/[юіоеуаи]$//g;
350 return $_;
351 }
353 sub normalize_esperanto($)
354 {
355 $_=lc(shift);
356 # verbs
357 s/i$//; s/is$//; s/os$//; s/as$//; s/us$//;
359 # nouns
360 s/j?n?$//;
362 return $_;
363 }
365 sub normalize_german($)
366 {
367 @stems = stem('de', \@_);
368 return $stems[0];
369 }
371 sub normalize_german_($)
372 {
373 $_=lc(shift);
375 s/heit$//; s/keit$//; s/tum$//; s/ung$//; s/nis$//;s/schaft$//; s/ist$//;
376 s/en$//; s/er$//;
378 s/lich$//; s/ig$//;
379 s/al$//; s/isch$//;
380 s/ell$//; s/haft$//;
382 s/bar$//; s/sam$//; s/lich$//;
384 @prefixes=qw(
385 ab an auf aus bei dazwischen ein fest heraus her hinaus hin los mit nach voraus vorbei vor weg weiter zurück zusammen zu
386 be emp ent er ge miss ver zer durch über um unter wieder);
387 @prefixes=();
388 for $pref (@prefixes) {
389 s/^$pref//;
390 }
393 return $_;
394 }
396 sub normalize_english($)
397 {
398 $_=lc(shift);
400 s/s$//;
402 s/ation$//; s/ness$//; s/ship$//; s/ally$//; s/ance$//;s/ity$//; s/ment$//;
404 s/ed$//;
405 s/en$//;
406 s/er$//;
407 s/est$//;
408 s/ing$//;
410 s/ism$//; s/ist$//; s/ful$//; s/able$//; s/ably$//;
411 s/ify$//; s/fy$//; s/ly$//;
412 s/ise$//; s/ize$//;
414 s/e$//;
415 return $_;
416 }
419 sub compare($$)
420 {
421 my $a=shift;
422 my $b=shift;
423 $a =~ s/^\s*//;
424 $b =~ s/^\s*//;
425 my ($a1, $a2)= split /\s+/,$a,2;
426 my ($b1, $b2)= split /\s+/,$b,2;
428 my $cmp = $group_weight{normalize($a2)} <=> $group_weight{normalize($b2)};
430 if ($cmp) {
431 return $cmp;
432 }
433 else {
434 if (normalize($a2) ne normalize($b2)) {
435 return normalize($a2) cmp normalize($b2);
436 }
437 else {
438 return $a1 <=> $b1;
439 }
440 }
441 }
443 sub log_($)
444 {
445 return;
446 open(LOG, ">>", "/tmp/log1");
447 print LOG $_[0];
448 close(LOG);
449 }
451 sub find_linked_words($)
452 {
453 my %linked_words;
454 my $dict = shift;
455 log_("1");
456 log_(join(" ", keys(%$dict)));
458 for $key (keys(%$dict)) {
459 $val = $dict->{$key};
460 log_($key."\n");
461 if ($val =~ /\@([a-z]*)/) {
462 $linked_words{normalize($key)} = normalize($1);
463 log_(normalize($key)." = ".normalize($1)."\n");
464 }
465 }
466 return %linked_words;
467 }
469 our %dict = load_notes_dict();
470 our %linked_words = find_linked_words(\%dict);
472 our %Vocabulary;
473 open(VOC, $ENV{VOCABULARY})
474 or die "Can't open VOCABULARY";
475 while (<VOC>){
476 chomp;
477 #s/'//g;
478 $Vocabulary{normalize($_)}="1";
479 }
480 close(VOC);
482 binmode STDIN,":utf8";
483 @lines=<STDIN>;
484 for $L (@lines) {
485 chomp($L);
486 #$L = decode( "utf8", $L);
487 $l=$L;
488 $l =~ s/^\s*//;
489 my ($a, $b)=split(/\s+/,$l,2);
490 $group_weight{normalize($b)}+=$a;
491 }
492 if ($ENV{NEED_TO_USE_VOCABULARY_WHEN_SORT} eq "YES") {
493 for $k (keys %group_weight) {
494 if (defined($Vocabulary{$k})) {
495 $group_weight{$k} *= 2;
496 }
497 }
498 }
499 @lines2 = sort { compare($b,$a) } @lines;
500 binmode STDOUT, ":utf8";
501 print "# groups ".scalar(keys(%group_weight))."\n";
502 for $l (@lines2) {
503 print "$l\n";
504 }
505 PERL_SCRIPT
506 export VOCABULARY
507 export NEED_TO_USE_VOCABULARY_WHEN_SORT
508 export LANGUAGE
509 [ -e "$NOTES_FILE" ] || touch "$NOTES_FILE"
510 export NOTES_FILE
511 perl $PERL_SCRIPT_TEMP_NAME
512 rm $PERL_SCRIPT_TEMP_NAME
513 }
515 text_from_url()
516 {
517 lynx -dump "$1" | perl -p -e 's@http://[a-zA-Z&_.:/0-9%?=,#+()\[\]~-]*@@'
518 }
520 add_marks()
521 {
522 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
523 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
524 use Encode;
526 sub load_notes_dict()
527 {
528 my %dict;
529 if (open(NOTES, $ENV{NOTES_FILE})) {
530 while(<NOTES>) {
531 $_ = decode( "utf8", $_);
532 chomp;
533 s/^\s+//;
534 my ($a,$b)=split /\s+/,$_,2;
535 $dict{$a}=$b;
536 }
537 }
538 return %dict;
539 }
541 %dict = load_notes_dict();
543 $file = $ARGV[0];
544 if (open(F, $file)) {
545 @lines=<F>;
546 close(F);
547 for (@lines) {$_ = decode( "utf8", $_);};
549 if (open(F, ">$file")) {
550 binmode F, ":utf8";
551 for (@lines) {
552 m/\s+\S+\s+(\S+)/;
553 $name=$1;
554 if (not /^#/ and defined($dict{$name})) {
555 chomp;
556 $mark=$dict{$name};
557 $space=" "x(30-length($_));
558 print F "$_$space$mark\n";
559 }
560 else {
561 print F "$_";
562 }
563 }
564 close(F);
565 }
566 }
567 PERL_SCRIPT
568 [ -e "$NOTES_FILE" ] || touch "$NOTES_FILE"
569 export NOTES_FILE
570 perl $PERL_SCRIPT_TEMP_NAME "$1"
571 rm $PERL_SCRIPT_TEMP_NAME
572 }
574 remove_marks()
575 {
576 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
577 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
578 $file = $ARGV[0];
579 our %dict;
580 if (open(F, $file)) {
581 @lines=<F>;
582 close(F);
584 if (open(F, ">$file")) {
585 for (@lines) {
586 chomp;
587 if (not /^#/ and m/(\s+)(\S+)(\s+)(\S+)(\s+)(.*)/) {
588 my $name=$4;
589 my $comment=$6;
590 $dict{$name}=$comment;
591 print F "$1$2$3$4\n";
592 }
593 else {
594 print F "$_\n";
595 }
596 }
597 }
598 }
599 if (($ENV{DONT_ADD_MARKS} ne "YES") and open(NOTES, $ENV{NOTES_FILE})) {
600 @lines=<NOTES>;
601 close(NOTES);
603 if (open(NOTES, ">".$ENV{NOTES_FILE})) {
604 for (@lines) {
605 chomp;
606 s/^\s+//;
607 my ($a,$b)=split /\s+/,$_,2;
608 if (not defined($dict{$a}) || ($dict{$a} eq $b)) {
609 print NOTES "$_\n";
610 if (defined($dict{$a})) { unset($dict{$a}); }
611 }
612 }
613 for (keys %dict) {
614 $mark=$dict{$_};
615 $space=" "x(30-length($_));
616 print NOTES "$_$space$mark\n";
617 }
618 }
619 }
620 PERL_SCRIPT
621 [ -e "$NOTES_FILE" ] || touch "$NOTES_FILE"
622 export NOTES_FILE
623 export DONT_ADD_MARKS
624 perl $PERL_SCRIPT_TEMP_NAME "$1"
625 rm $PERL_SCRIPT_TEMP_NAME
626 }
628 part()
629 {
630 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-part-XXXXXXXX`
631 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
632 #!/usr/bin/perl
634 my @lines=<STDIN>;
635 my $lines=$#lines;
636 my $interval=$ARGV[0];
637 if (not $interval) {
638 print @lines;
639 }
640 else {
641 my ($start,$stop,$total);
642 if ($interval =~ m@(.*)/(.*)@) {
643 $start = $1;
644 $total = $2;
645 }
646 else {
647 $start=$interval;
648 $total=0;
649 }
650 if ($start =~ m@(.*)-(.*)@) {
651 $start = $1;
652 $stop = $2;
653 }
654 if ($start =~ m@(.*)\+(.*)@) {
655 $start = $1;
656 $stop = $start+$2;
657 }
659 $start=int($lines/$total*$start);
660 $stop=int($lines/$total*$stop);
662 for($i=$start;$i<$stop;$i++){
663 print $lines[$i];
664 }
665 }
666 PERL_SCRIPT
667 perl $PERL_SCRIPT_TEMP_NAME "$1"
668 rm $PERL_SCRIPT_TEMP_NAME
669 }
671 if [ "$TAGS_LIST_ONLY" = "YES" ]
672 then
673 cd "${WORK_DIR}"
674 echo ${LANGUAGE}_*.txt | tr ' ' '\n' | grep -v '*' | sed 's/[^_]*_//;s/.txt$//'
675 exit 0
676 fi
678 tag_file_name()
679 {
680 echo "${LANGUAGE}_${1}.txt"
681 }
683 if [ "$REMOVE_TAG" = "YES" ]
684 then
685 cd "${WORK_DIR}"
686 for i in $TAG_NAME
687 do
688 echo "$TAGNAME" | grep -q '[/*?]' && continue
689 f="`tag_file_name $i`"
690 if [ -e "$f" ]
691 then
692 rm -f "$f" && echo Tag "'$i'" removed
693 else
694 echo Unknown tag "'$i'"
695 fi
696 done
697 exit 0
698 fi
700 mkdir -p $WORK_DIR
701 oldpwd="$PWD"
702 cd $WORK_DIR
703 if [ "$MERGE_TAGGED_WORDS" = "YES" ]
704 then
705 VOC_FILES=''
706 for i in $MERGE_THIS_TAGS
707 do
708 f=`tag_file_name $i`
709 [ -e "$f" ] && VOC_FILES="${VOC_FILES} $f"
710 done
711 if [ -z "$VOC_FILES" ]
712 then
713 echo Unknown tags: $MERGE_THIS_TAGS > /dev/stderr
714 else
715 cat $VOC_FILES
716 fi
717 elif [ "$MERGE_ALL_TAGGED" = "YES" ]
718 then
719 cat ${LANGUAGE}_*.txt
720 elif echo "$1" | grep -q http:
721 then
722 text_from_url "$1"
723 elif [ "$#" != 0 ]
724 then
725 if echo $1 | grep -q ^/
726 then
727 cat "$1"
728 else
729 cat "$oldpwd/$1"
730 fi
731 else
732 cat
733 fi \
734 | part $PART_TO_PROCESS \
735 | tee $ORIGINAL_TEXT \
736 | two_and_three_words \
737 | get_words ${TEMP1}-full \
738 | group_words \
739 | add_stat ${TEMP1}-full \
740 | tee "$TEMP1" > "$TEMP2"
742 if [ "$STAT_ONLY" = "YES" ]
743 then
744 cat "$TEMP1"
745 elif [ "$NON_INTERACTIVE_MODE" = "YES" ]
746 then
747 cat "$TEMP1"
748 else
749 if [ `wc -l "$TEMP2" | awk '{print $1}'` != 0 ]
750 then
751 [ "$DONT_ADD_MARKS" = "YES" ] || add_marks "$TEMP2"
752 if [ "$editor" = vim ]
753 then
754 vim -c 'set keywordprg='"$LANGUAGE" -c 'set iskeyword=@,48-57,/,.,-,_,+,,,#,$,%,~,=,48-255' "$TEMP2" < /dev/tty > /dev/tty
755 else
756 $editor "$TEMP2"
757 fi
758 remove_marks "$TEMP2"
760 vocabulary="$VOCABULARY"
761 [ -n "$TAG_NAME" ] && vocabulary="`tag_file_name $TAG_NAME`"
762 diff "$TEMP1" "$TEMP2" | awk '{print $3}' | sort -u >> "$vocabulary"
763 fi
764 fi
766 rm -f "$TEMP1" "$TEMP2" "${TEMP1}-full" "$ORIGINAL_TEXT"