new-words

view new-words.sh @ 2:68722cd6faff

Добавление пометок к словам.

Справа от слова можно поставить пометку,
которая попадает в файл ~/.new-words/notes-LANG.txt.
Потом, при появлении этого слова,
она вспоминается и выводится вместе с ним.
author igor@book.xt.vpn
date Fri Feb 26 21:41:27 2010 +0200 (2010-02-26)
parents 4c9076f87241
children c703b8898696
line source
1 #!/bin/sh
3 cat <<HELP > /dev/null
5 Поддержка нескольких языков:
7 new-words -l lang URL
9 Например, для немецких текстов:
11 new-words -l de URL
13 Или, предварительно создав соответствующую ссылку:
15 de-words URL
17 HELP
19 WORK_DIR=~/.new-words/
20 TEMP1=`mktemp /tmp/news-words-XXXXXXXXXX`
21 TEMP2=`mktemp /tmp/news-words-XXXXXXXXXX`
22 editor=${EDITOR:-vim}
24 LANGUAGE=en
25 my_name="`echo $0 | sed s@.*/@@ | sed s/-.*// `"
26 [ "${my_name}" = "new" ] || LANGUAGE="$my_name"
27 if [ "$1" = "-l" ]
28 then
29 LANGUAGE="$2"
30 VOCABULARY="$LANGUAGE".txt
31 shift 2
32 fi
33 VOCABULARY=${LANGUAGE}.txt
34 NOTES_FILE=notes-${LANGUAGE}.txt
36 get_words()
37 {
38 tr ' ' '\n' | sed 's/--/ /g' \
39 | tr -d '*\r,.-:#@()+=—<>$;"?!|·[]^%&'"'" \
40 | tr ' ' '\n' | grep_v_english_perl \
41 | grep -x '[[:alnum:]]*' \
42 | sort | uniq -c | awk '{if ($2!="") print;}' | sort -rn
43 }
45 grep_v_english()
46 {
47 [ -e "$VOCABULARY" ] || touch "$VOCABULARY"
48 eval $(cat $VOCABULARY | tr -d "'" | xargs -n10 echo | tr ' ' '|' | sed 's/^/egrep -xv "RRRRRRR|/' | sed 's/$/"/' | tr '\n' '|')cat
49 }
51 grep_v_english_perl()
52 {
53 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
54 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
55 open(VOC, $ENV{VOCABULARY})
56 or die "Can't open VOCABULARY";
57 while (<VOC>){
58 chomp;
59 s/'//g;
60 $voc{$_}="1";
61 }
62 while(<>) {
63 chomp;
64 if (not defined($voc{$_})) { print "$_\n"; }
65 }
66 PERL_SCRIPT
67 [ -e "$VOCABULARY" ] || touch "$VOCABULARY"
68 export VOCABULARY
69 perl $PERL_SCRIPT_TEMP_NAME
70 rm $PERL_SCRIPT_TEMP_NAME
71 }
73 text_from_url()
74 {
75 lynx -dump "$1" | perl -p -e 's@http://[a-zA-Z&_.:/0-9%?=,#+()\[\]~-]*@@'
76 }
78 add_marks()
79 {
80 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
81 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
82 $file = $ARGV[0];
83 our $dict;
84 if (open(NOTES, $ENV{NOTES_FILE})) {
85 while(<NOTES>) {
86 chomp;
87 s/^\s+//;
88 my ($a,$b)=split /\s+/,$_,2;
89 $dict{$a}=$b;
90 }
91 }
92 if (open(F, $file)) {
93 @lines=<F>;
94 close(F);
96 if (open(F, ">$file")) {
97 for (@lines) {
98 m/\s+\S+\s+(\S+)/;
99 $name=$1;
100 if (defined($dict{$name})) {
101 chomp;
102 $mark=$dict{$name};
103 $space=" "x(30-length($_));
104 print F "$_$space$mark\n";
105 }
106 else {
107 print F "$_";
108 }
109 }
110 close(F);
111 }
112 }
113 PERL_SCRIPT
114 [ -e "$NOTES_FILE" ] || touch "$NOTES_FILE"
115 export NOTES_FILE
116 perl $PERL_SCRIPT_TEMP_NAME "$1"
117 rm $PERL_SCRIPT_TEMP_NAME
118 }
120 remove_marks()
121 {
122 PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
123 cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
124 $file = $ARGV[0];
125 our %dict;
126 if (open(F, $file)) {
127 @lines=<F>;
128 close(F);
130 if (open(F, ">$file")) {
131 for (@lines) {
132 chomp;
133 if (m/(\s+)(\S+)(\s+)(\S+)(\s+)(.*)/) {
134 my $name=$4;
135 my $comment=$6;
136 $dict{$name}=$comment;
137 print F "$1$2$3$4\n";
138 }
139 else {
140 print F "$_\n";
141 }
142 }
143 }
144 }
145 if (open(NOTES, $ENV{NOTES_FILE})) {
146 @lines=<NOTES>;
147 close(NOTES);
149 if (open(NOTES, ">".$ENV{NOTES_FILE})) {
150 for (@lines) {
151 chomp;
152 s/^\s+//;
153 my ($a,$b)=split /\s+/,$_,2;
154 if (not defined($dict{$a}) || ($dict{$a} eq $b)) {
155 print NOTES "$_\n";
156 if (defined($dict{$a})) { unset($dict{$a}); }
157 }
158 }
159 for (keys %dict) {
160 $mark=$dict{$_};
161 $space=" "x(30-length($_));
162 print NOTES "$_$space$mark\n";
163 }
164 }
165 }
166 PERL_SCRIPT
167 [ -e "$NOTES_FILE" ] || touch "$NOTES_FILE"
168 export NOTES_FILE
169 perl $PERL_SCRIPT_TEMP_NAME "$1"
170 rm $PERL_SCRIPT_TEMP_NAME
171 }
173 mkdir -p $WORK_DIR
174 cd $WORK_DIR
175 if echo "$1" | grep -q http:
176 then
177 text_from_url "$1" | get_words | tee "$TEMP1" > "$TEMP2"
178 elif [ "$#" != 0 ]
179 then
180 cat "$1" | get_words | tee "$TEMP1" > "$TEMP2"
181 else
182 get_words | tee "$TEMP1" > "$TEMP2"
183 fi
185 add_marks "$TEMP2"
186 if [ "$editor" = vim ]
187 then
188 vim -c 'set keywordprg='"$LANGUAGE" -c 'set iskeyword=@,48-57,/,.,-,_,+,,,#,$,%,~,=' "$TEMP2" < /dev/tty > /dev/tty
189 else
190 echo 2
191 $editor "$TEMP2"
192 fi
193 remove_marks "$TEMP2"
195 diff "$TEMP1" "$TEMP2" | awk '{print $3}' | sort -u >> "$VOCABULARY"
196 rm -f "$TEMP1" "$TEMP2"