new-words

view new-words.sh @ 32:753fb84437aa

link between words support; link is created using @
author Igor Chubin <igor@chub.in>
date Fri Nov 05 20:07:46 2010 +0100 (2010-11-05)
parents 48ca8248e9cc
children 720a701b2ba9
line source
1 #!/bin/bash
3 show_usage()
4 {
5 cat <<HELP > /dev/stderr
7 USAGE:
9 new-words [ -l lang ] [ -s ] [ ARG ]
11 SWITCHES:
13 -h print this screen
14 -k put higher words that are similar to the known words (only for English)
15 -l lang override language settings
16 -n non-interactive mode (don't run vi)
17 -a don't add marks (and don't save marks added by user)
18 -p pages work with specified pages only (pages = start-stop/total )
19 -s show the text statistics (percentage of known words and so on) and exit
20 -t tag tag known words with tag
21 -T show list of active tags
22 -m tag merge the words tagged with "tag" into the main vocabulary
23 -M merge the words tagged with any tag into the main vocabulary
24 -r tag remove subvocabulary for the "tag"
25 -2 -3 find 2 and 3 words' sequences
27 The language of the text can be specified also
28 by name of the program new-words (correspondent link must be created before).
29 For example, these calls are equivalent:
31 de-words URL
32 new-words -l de URL
34 HELP
35 }
37 if [ "$1" = "-h" ]
38 then
39 show_usage
40 exit 0
41 fi
43 WORK_DIR=~/.new-words/
44 TEMP1=`mktemp /tmp/new-words-temp1.XXXXXXXXXX`
45 TEMP2=`mktemp /tmp/new-words-temp2.XXXXXXXXXX`
46 export ORIGINAL_TEXT=`mktemp /tmp/new-words-orig.XXXXXXXXXX`
47 editor=${EDITOR:-vim}
49 # language detection
51 LANGUAGE=en
52 my_name="`echo $0 | sed s@.*/@@ | sed s/-.*// `"
53 for arg
54 do
55 if echo "$arg" | grep -q http://...wikipedia.org/wiki/
56 then
57 LANGUAGE="`echo $arg | sed s@http://@@ | sed s@.wikipedia.*@@`"
58 fi
59 done
60 [ "${my_name}" = "new" ] || LANGUAGE="$my_name"
62 #----------------------------------------------------
63 # command line options processing
65 STAT_ONLY=NO
66 NEED_TO_USE_VOCABULARY_WHEN_SORT=NO
67 DONT_ADD_MARKS=NO
68 NON_INTERACTIVE_MODE=NO
69 PART_TO_PROCESS=''
70 GROUP_WORDS_BY_THREE=NO
71 GROUP_WORDS_BY_TWO=NO
72 TAG_NAME=''
73 MERGE_THIS_TAGS=''
74 TAGS_LIST_ONLY=NO
75 MERGE_TAGGED_WORDS=NO
76 MERGE_ALL_TAGGED=NO
77 DONT_ADD_MARKLINES=NO
78 while getopts l:skanp:t:Tm:Mr:23 opt
79 do
80 case "$opt" in
81 s) STAT_ONLY=YES;;
82 k) NEED_TO_USE_VOCABULARY_WHEN_SORT=YES;;
83 l) LANGUAGE="$OPTARG";;
84 a) DONT_ADD_MARKS=YES;;
85 n) NON_INTERACTIVE_MODE=YES;;
86 p) PART_TO_PROCESS="$OPTARG";;
87 t) TAG_NAME="$OPTARG";;
88 T) TAGS_LIST_ONLY="YES";;
89 m) DONT_ADD_MARKLINES="YES"; MERGE_TAGGED_WORDS="YES"; MERGE_THIS_TAGS="$TAG_NAME $OPTARG";;
90 M) DONT_ADD_MARKLINES="YES"; MERGE_ALL_TAGGED="YES";;
91 r) REMOVE_TAG="YES"; TAG_NAME="$TAG_NAME $OPTARG";;
92 2) GROUP_WORDS_BY_TWO=YES;;
93 3) GROUP_WORDS_BY_THREE=YES;;
94 \?) # unknown flag
95 show_usage
96 exit 1;;
97 esac
98 done
99 shift `expr $OPTIND - 1`
101 if [ "$1" = "-l" ]
102 then
103 LANGUAGE="$2"
104 shift 2
105 fi
107 VOCABULARY=${LANGUAGE}.txt
108 NOTES_FILE=notes-${LANGUAGE}.txt
110 #----------------------------------------------------
112 get_words()
113 {
114 tr ' ' '\n' | sed 's/--/ /g' \
115 | sed "s/'/__APOSTROPHE__/g" \
116 | perl -MEncode -Mutf8 -n -e '$_ = decode( "utf8", $_);y/*\r,.:#@()+=—<>$;"?!|·[]^%&/ /; binmode STDOUT, ":utf8"; print if /^[[:alpha:] '"'"'_-]*$/'\
117 | sed "s/__APOSTROPHE__/'/g" \
118 | tr ' ' '\n' \
119 | tee "$1" \
120 | grep_v_english_perl \
121 | sort | uniq -c | awk '{if ($2!="") print;}' | sort -rn
122 }
124 add_stat()
125 {
126 if [ "$DONT_ADD_MARKLINES" = "YES" ]
127 then
128 cat
129 return
130 fi
131 before="$1"
132 after=${before}2
133 cat > "$after"
134 total="`wc -w $1 | awk '{print $1}'`"
135 total_unknown="`cat $after|awk '{s=s+$1}END{print s}'`"
136 total_known="`echo $total-$total_unknown|bc`"
137 percentage="`echo '100*('$total-$total_unknown')'/$total | bc -l | sed 's/\\.\(.\).*/.\1/'`"
138 #sentences="`cat $after | perl -e 'local $/; $_=<>; s@http://[a-zA-Z&_.:/0-9%?=,\#+()\[\]~-]*@@g; s@\n@@g; s@(Mr|Mrs)\.@\1POINT@g; @sentences=split /\\./;print $#sentences;'`"
139 sentences="`cat $ORIGINAL_TEXT | perl -e 'local $/; $_=<>; s/[^.]//msg; print length($_);'`"
142 if [ "$STAT_ONLY" = "YES" ]
143 then
144 echo "LANG KNOWN% UNKNOWN% KNOWN TOTAL WPS UWPS*10"
145 echo "$LANGUAGE $percentage `echo \(100-$percentage\) | bc -l` $total_known $total `echo $total/$sentences|bc` `echo 10*$total_unknown/$sentences|bc` "
146 rm $after
147 return 0
148 else
149 echo "# $LANGUAGE, $percentage, <$total_known/$total>"
150 fi
152 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
153 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
154 my $total=shift(@ARGV);
155 my $total_known=shift(@ARGV);
156 my $s=0;
157 my $mark_line=int($total_known*100/$total/5)*5;
158 if ($mark_line>=90) {
159 $mark_line=int($total_known*100/$total)+1;
160 } else { $mark_line +=5; };
161 while(<>)
162 {
163 print;
164 /^\s*([0-9]*)\s*/;
165 $s+=$1;
166 if (($total_known+$s)*100/$total>=$mark_line) {
167 print "# $mark_line\n";
168 if ($mark_line>=90) { $mark_line+=1; } else { $mark_line +=5; };
169 }
170 }
171 PERL_SCRIPT
172 perl $PERL_SCRIPT_TEMP_NAME "$total" "$total_known" "$after"
173 rm $PERL_SCRIPT_TEMP_NAME
174 rm $after
175 }
177 two_and_three_words()
178 {
179 if [ $GROUP_WORDS_BY_THREE = NO -a $GROUP_WORDS_BY_TWO = NO ]
180 then
181 cat
182 else
183 cat
185 export GROUP_WORDS_BY_THREE
186 export GROUP_WORDS_BY_TWO
187 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-two-and-three-XXXXXXXX`
188 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
189 #!/usr/bin/perl
190 local $/;
191 $words=<>;
192 $words=~ s@[!?;,:#1-9".]@ @g;
193 $words =~ s@\s+@ @g;
194 @words = split /\s+/, $words;
195 for ($i=0; $i<$#words-3;$i++) {
196 my ($a,$b,$c)= ($words[$i],$words[$i+1],$words[$i+2]);
197 if ($ENV{GROUP_WORDS_BY_THREE} eq "YES" and ($a && $b && $c)) {
198 print "${a}_${b}_${c}\n";
199 };
200 if ($ENV{GROUP_WORDS_BY_TWO} eq "YES" and ($a && $b)) {
201 print "${a}_${b}\n";
202 };
203 }
204 PERL_SCRIPT
205 perl $PERL_SCRIPT_TEMP_NAME "$ORIGINAL_TEXT"
206 rm $PERL_SCRIPT_TEMP_NAME
207 fi
208 }
210 grep_v_english()
211 {
212 [ -e "$VOCABULARY" ] || touch "$VOCABULARY"
213 eval $(cat $VOCABULARY | tr -d "'" | xargs -n10 echo | tr ' ' '|' | sed 's/^/egrep -xv "RRRRRRR|/' | sed 's/$/"/' | tr '\n' '|')cat
214 }
216 grep_v_english_perl()
217 {
218 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
219 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
220 $voc_files=$ENV{VOC_FILES};
221 $voc_files=~s@^ @@;
222 for $voc_file (split /\s+/,$voc_files) {
223 if (open(VOC, $voc_file)) {
224 while (<VOC>){
225 chomp;
226 #s/'//g;
227 $voc{$_}="1";
228 }
229 }
230 }
231 while(<>) {
232 chomp;
233 if (not defined($voc{$_})) { print "$_\n"; }
234 }
235 PERL_SCRIPT
236 [ -e "$VOCABULARY" ] || touch "$VOCABULARY"
237 export VOCABULARY VOC_FILES
238 VOC_FILES=$VOCABULARY
239 for i in $TAG_NAME
240 do
241 VOC_FILES="${VOC_FILES} `tag_file_name $i`"
242 done
243 perl $PERL_SCRIPT_TEMP_NAME
244 rm $PERL_SCRIPT_TEMP_NAME
245 }
247 group_words()
248 {
249 #if [ "$LANGUAGE" != "en" ]
250 #then
251 # cat
252 # return
253 #fi
254 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-group-words-XXXXXXXX`
255 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
256 #!/usr/bin/perl
258 use Encode;
259 use utf8;
260 use Lingua::Stem::Snowball qw(stem);
262 eval {
263 # http://stackoverflow.com/questions/251694/how-can-i-check-if-i-have-a-perl-module-before-using-it
264 require String::Similarity;
265 String::Similarity->import();
266 };
267 unless($@)
268 {
269 our $HAVE_String_Similarity=1;
270 }
273 sub load_notes_dict()
274 {
275 my %dict;
276 if (open(NOTES, $ENV{NOTES_FILE})) {
277 while(<NOTES>) {
278 $_ = decode( "utf8", $_);
279 chomp;
280 s/^\s+//;
281 my ($a,$b)=split /\s+/,$_,2;
282 $dict{$a}=$b;
283 }
284 }
285 return %dict;
286 }
288 sub similar($$){
289 my $a=shift;
290 my $b=shift;
291 if ($HAVE_String_Similarity) {
292 return $Similarity{"$a $b"};
293 }
294 else {
295 return 0;
296 }
297 }
300 sub normalize_without_linked($)
301 {
302 if ( $ENV{LANGUAGE} eq "en" ) { return normalize_english(shift); }
303 elsif ( $ENV{LANGUAGE} eq "de" ) { return normalize_german(shift); }
304 elsif ( $ENV{LANGUAGE} eq "uk" ) { return normalize_ukrainian(shift); }
305 elsif ( $ENV{LANGUAGE} eq "io" ) { return normalize_esperanto(shift); }
306 else { return shift ; }
307 }
309 sub normalize_with_linked($)
310 {
311 my $word = normalize_without_linked(shift);
312 #return $word;
313 if ($linked_words{$word}) {
314 return $linked_words{$word};
315 }
316 else {
317 return $word;
318 }
319 }
321 sub normalize($)
322 {
323 return normalize_with_linked(shift);
324 }
326 sub normalize_ukrainian($)
327 {
328 $_=lc(shift);
329 s/[юіоеуаи]$//g;
330 return $_;
331 }
333 sub normalize_esperanto($)
334 {
335 $_=lc(shift);
336 # verbs
337 s/i$//; s/is$//; s/os$//; s/as$//; s/us$//;
339 # nouns
340 s/j?n?$//;
342 return $_;
343 }
345 sub normalize_german($)
346 {
347 @stems = stem('de', \@_);
348 return $stems[0];
349 }
351 sub normalize_german_($)
352 {
353 $_=lc(shift);
355 s/heit$//; s/keit$//; s/tum$//; s/ung$//; s/nis$//;s/schaft$//; s/ist$//;
356 s/en$//; s/er$//;
358 s/lich$//; s/ig$//;
359 s/al$//; s/isch$//;
360 s/ell$//; s/haft$//;
362 s/bar$//; s/sam$//; s/lich$//;
364 @prefixes=qw(
365 ab an auf aus bei dazwischen ein fest heraus her hinaus hin los mit nach voraus vorbei vor weg weiter zurück zusammen zu
366 be emp ent er ge miss ver zer durch über um unter wieder);
367 @prefixes=();
368 for $pref (@prefixes) {
369 s/^$pref//;
370 }
373 return $_;
374 }
376 sub normalize_english($)
377 {
378 $_=lc(shift);
380 s/s$//;
382 s/ation$//; s/ness$//; s/ship$//; s/ally$//; s/ance$//;s/ity$//; s/ment$//;
384 s/ed$//;
385 s/en$//;
386 s/er$//;
387 s/est$//;
388 s/ing$//;
390 s/ism$//; s/ist$//; s/ful$//; s/able$//; s/ably$//;
391 s/ify$//; s/fy$//; s/ly$//;
392 s/ise$//; s/ize$//;
394 s/e$//;
395 return $_;
396 }
399 sub compare($$)
400 {
401 my $a=shift;
402 my $b=shift;
403 $a =~ s/^\s*//;
404 $b =~ s/^\s*//;
405 my ($a1, $a2)= split /\s+/,$a,2;
406 my ($b1, $b2)= split /\s+/,$b,2;
408 my $cmp = $group_weight{normalize($a2)} <=> $group_weight{normalize($b2)};
410 if ($cmp) {
411 return $cmp;
412 }
413 else {
414 if (normalize($a2) ne normalize($b2)) {
415 return normalize($a2) cmp normalize($b2);
416 }
417 else {
418 return $a1 <=> $b1;
419 }
420 }
421 }
423 sub log_($)
424 {
425 return;
426 open(LOG, ">>", "/tmp/log1");
427 print LOG $_[0];
428 close(LOG);
429 }
431 sub find_linked_words($)
432 {
433 my %linked_words;
434 my $dict = shift;
435 log_("1");
436 log_(join(" ", keys(%$dict)));
438 for $key (keys(%$dict)) {
439 $val = $dict->{$key};
440 log_($key."\n");
441 if ($val =~ /\@([a-z]*)/) {
442 $linked_words{normalize($key)} = normalize($1);
443 log_(normalize($key)." = ".normalize($1)."\n");
444 }
445 }
446 return %linked_words;
447 }
449 our %dict = load_notes_dict();
450 our %linked_words = find_linked_words(\%dict);
452 our %Vocabulary;
453 open(VOC, $ENV{VOCABULARY})
454 or die "Can't open VOCABULARY";
455 while (<VOC>){
456 chomp;
457 #s/'//g;
458 $Vocabulary{normalize($_)}="1";
459 }
460 close(VOC);
462 binmode STDIN,":utf8";
463 @lines=<STDIN>;
464 for $L (@lines) {
465 chomp($L);
466 #$L = decode( "utf8", $L);
467 $l=$L;
468 $l =~ s/^\s*//;
469 my ($a, $b)=split(/\s+/,$l,2);
470 $group_weight{normalize($b)}+=$a;
471 }
472 if ($ENV{NEED_TO_USE_VOCABULARY_WHEN_SORT} eq "YES") {
473 for $k (keys %group_weight) {
474 if (defined($Vocabulary{$k})) {
475 $group_weight{$k} *= 2;
476 }
477 }
478 }
479 @lines2 = sort { compare($b,$a) } @lines;
480 binmode STDOUT, ":utf8";
481 for $l (@lines2) {
482 print "$l\n";
483 }
484 PERL_SCRIPT
485 export VOCABULARY
486 export NEED_TO_USE_VOCABULARY_WHEN_SORT
487 export LANGUAGE
488 [ -e "$NOTES_FILE" ] || touch "$NOTES_FILE"
489 export NOTES_FILE
490 perl $PERL_SCRIPT_TEMP_NAME
491 rm $PERL_SCRIPT_TEMP_NAME
492 }
494 text_from_url()
495 {
496 lynx -dump "$1" | perl -p -e 's@http://[a-zA-Z&_.:/0-9%?=,#+()\[\]~-]*@@'
497 }
499 add_marks()
500 {
501 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
502 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
503 use Encode;
505 sub load_notes_dict()
506 {
507 my %dict;
508 if (open(NOTES, $ENV{NOTES_FILE})) {
509 while(<NOTES>) {
510 $_ = decode( "utf8", $_);
511 chomp;
512 s/^\s+//;
513 my ($a,$b)=split /\s+/,$_,2;
514 $dict{$a}=$b;
515 }
516 }
517 return %dict;
518 }
520 %dict = load_notes_dict();
522 $file = $ARGV[0];
523 if (open(F, $file)) {
524 @lines=<F>;
525 close(F);
526 for (@lines) {$_ = decode( "utf8", $_);};
528 if (open(F, ">$file")) {
529 binmode F, ":utf8";
530 for (@lines) {
531 m/\s+\S+\s+(\S+)/;
532 $name=$1;
533 if (not /^#/ and defined($dict{$name})) {
534 chomp;
535 $mark=$dict{$name};
536 $space=" "x(30-length($_));
537 print F "$_$space$mark\n";
538 }
539 else {
540 print F "$_";
541 }
542 }
543 close(F);
544 }
545 }
546 PERL_SCRIPT
547 [ -e "$NOTES_FILE" ] || touch "$NOTES_FILE"
548 export NOTES_FILE
549 perl $PERL_SCRIPT_TEMP_NAME "$1"
550 rm $PERL_SCRIPT_TEMP_NAME
551 }
553 remove_marks()
554 {
555 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
556 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
557 $file = $ARGV[0];
558 our %dict;
559 if (open(F, $file)) {
560 @lines=<F>;
561 close(F);
563 if (open(F, ">$file")) {
564 for (@lines) {
565 chomp;
566 if (not /^#/ and m/(\s+)(\S+)(\s+)(\S+)(\s+)(.*)/) {
567 my $name=$4;
568 my $comment=$6;
569 $dict{$name}=$comment;
570 print F "$1$2$3$4\n";
571 }
572 else {
573 print F "$_\n";
574 }
575 }
576 }
577 }
578 if (($ENV{DONT_ADD_MARKS} ne "YES") and open(NOTES, $ENV{NOTES_FILE})) {
579 @lines=<NOTES>;
580 close(NOTES);
582 if (open(NOTES, ">".$ENV{NOTES_FILE})) {
583 for (@lines) {
584 chomp;
585 s/^\s+//;
586 my ($a,$b)=split /\s+/,$_,2;
587 if (not defined($dict{$a}) || ($dict{$a} eq $b)) {
588 print NOTES "$_\n";
589 if (defined($dict{$a})) { unset($dict{$a}); }
590 }
591 }
592 for (keys %dict) {
593 $mark=$dict{$_};
594 $space=" "x(30-length($_));
595 print NOTES "$_$space$mark\n";
596 }
597 }
598 }
599 PERL_SCRIPT
600 [ -e "$NOTES_FILE" ] || touch "$NOTES_FILE"
601 export NOTES_FILE
602 export DONT_ADD_MARKS
603 perl $PERL_SCRIPT_TEMP_NAME "$1"
604 rm $PERL_SCRIPT_TEMP_NAME
605 }
607 part()
608 {
609 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-part-XXXXXXXX`
610 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
611 #!/usr/bin/perl
613 my @lines=<STDIN>;
614 my $lines=$#lines;
615 my $interval=$ARGV[0];
616 if (not $interval) {
617 print @lines;
618 }
619 else {
620 my ($start,$stop,$total);
621 if ($interval =~ m@(.*)/(.*)@) {
622 $start = $1;
623 $total = $2;
624 }
625 else {
626 $start=$interval;
627 $total=0;
628 }
629 if ($start =~ m@(.*)-(.*)@) {
630 $start = $1;
631 $stop = $2;
632 }
633 if ($start =~ m@(.*)\+(.*)@) {
634 $start = $1;
635 $stop = $start+$2;
636 }
638 $start=int($lines/$total*$start);
639 $stop=int($lines/$total*$stop);
641 for($i=$start;$i<$stop;$i++){
642 print $lines[$i];
643 }
644 }
645 PERL_SCRIPT
646 perl $PERL_SCRIPT_TEMP_NAME "$1"
647 rm $PERL_SCRIPT_TEMP_NAME
648 }
650 if [ "$TAGS_LIST_ONLY" = "YES" ]
651 then
652 cd "${WORK_DIR}"
653 echo ${LANGUAGE}_*.txt | tr ' ' '\n' | grep -v '*' | sed 's/[^_]*_//;s/.txt$//'
654 exit 0
655 fi
657 tag_file_name()
658 {
659 echo "${LANGUAGE}_${1}.txt"
660 }
662 if [ "$REMOVE_TAG" = "YES" ]
663 then
664 cd "${WORK_DIR}"
665 for i in $TAG_NAME
666 do
667 echo "$TAGNAME" | grep -q '[/*?]' && continue
668 f="`tag_file_name $i`"
669 if [ -e "$f" ]
670 then
671 rm -f "$f" && echo Tag "'$i'" removed
672 else
673 echo Unknown tag "'$i'"
674 fi
675 done
676 exit 0
677 fi
679 mkdir -p $WORK_DIR
680 oldpwd="$PWD"
681 cd $WORK_DIR
682 if [ "$MERGE_TAGGED_WORDS" = "YES" ]
683 then
684 VOC_FILES=''
685 for i in $MERGE_THIS_TAGS
686 do
687 f=`tag_file_name $i`
688 [ -e "$f" ] && VOC_FILES="${VOC_FILES} $f"
689 done
690 if [ -z "$VOC_FILES" ]
691 then
692 echo Unknown tags: $MERGE_THIS_TAGS > /dev/stderr
693 else
694 cat $VOC_FILES
695 fi
696 elif [ "$MERGE_ALL_TAGGED" = "YES" ]
697 then
698 cat ${LANGUAGE}_*.txt
699 elif echo "$1" | grep -q http:
700 then
701 text_from_url "$1"
702 elif [ "$#" != 0 ]
703 then
704 if echo $1 | grep -q ^/
705 then
706 cat "$1"
707 else
708 cat "$oldpwd/$1"
709 fi
710 else
711 cat
712 fi \
713 | part $PART_TO_PROCESS \
714 | tee $ORIGINAL_TEXT \
715 | two_and_three_words \
716 | get_words ${TEMP1}-full \
717 | group_words \
718 | add_stat ${TEMP1}-full \
719 | tee "$TEMP1" > "$TEMP2"
721 if [ "$STAT_ONLY" = "YES" ]
722 then
723 cat "$TEMP1"
724 elif [ "$NON_INTERACTIVE_MODE" = "YES" ]
725 then
726 cat "$TEMP1"
727 else
728 if [ `wc -l "$TEMP2" | awk '{print $1}'` != 0 ]
729 then
730 [ "$DONT_ADD_MARKS" = "YES" ] || add_marks "$TEMP2"
731 if [ "$editor" = vim ]
732 then
733 vim -c 'set keywordprg='"$LANGUAGE" -c 'set iskeyword=@,48-57,/,.,-,_,+,,,#,$,%,~,=,48-255' "$TEMP2" < /dev/tty > /dev/tty
734 else
735 $editor "$TEMP2"
736 fi
737 remove_marks "$TEMP2"
739 vocabulary="$VOCABULARY"
740 [ -n "$TAG_NAME" ] && vocabulary="`tag_file_name $TAG_NAME`"
741 diff "$TEMP1" "$TEMP2" | awk '{print $3}' | sort -u >> "$vocabulary"
742 fi
743 fi
745 rm -f "$TEMP1" "$TEMP2" "${TEMP1}-full" "$ORIGINAL_TEXT"