new-words

view new-words-py.sh @ 67:87bb1c5e6616

added de script to misc/
author Igor Chubin <igor@chub.in>
date Wed Mar 28 15:54:30 2012 +0200 (2012-03-28)
parents e25de9ea9184
children
line source
1 #!/bin/bash
3 cat <<EOF
4 Please, use the script no more.
5 You can execute new-words.py directly.
6 EOF
7 exit 1
9 show_usage()
10 {
11 cat <<HELP > /dev/stderr
13 USAGE:
15 new-words [ -l lang ] [ -s ] [ ARG ]
17 SWITCHES:
19 -h print this screen
20 -c show compressed wordlist: one word per group
21 -f file show only words related to the words in the file
22 -G turn off word grouping
23 -k put higher words that are similar to the known words (only for English)
24 -l lang override language settings
25 -n non-interactive mode (don't run vi)
26 -N turn off known words filtering
27 -a don't add marks (and don't save marks added by user)
28 -p pages work with specified pages only (pages = start-stop/total )
29 -s show the text statistics (percentage of known words and so on) and exit
30 -S show your vocabulary statistics (number of words and word groups)
31 -t tag tag known words with tag
32 -T show list of active tags
33 -m tag merge the words tagged with "tag" into the main vocabulary
34 -M merge the words tagged with any tag into the main vocabulary
35 -d tag delete subvocabulary for the "tag"
36 -r RANGE show only first RANGE words
37 -R RANGE show only words lower than RANGE percent
38 -2 -3 find 2 and 3 words' sequences
40 The language of the text can be specified also
41 by name of the program new-words (correspondent link must be created before).
42 For example, these calls are equivalent:
44 de-words URL
45 new-words -l de URL
47 HELP
48 }
50 if [ "$1" = "-h" ]
51 then
52 show_usage
53 exit 0
54 fi
56 NEW_WORDS_PY=/home/igor/hg/new-words/new-words.py
57 WORK_DIR=~/.new-words/
58 editor=${EDITOR:-vim}
60 # language detection
62 LANGUAGE=en
63 my_name="`echo $0 | sed s@.*/@@ | sed s/-.*// `"
64 for arg
65 do
66 if echo "$arg" | grep -q http://...wikipedia.org/wiki/
67 then
68 LANGUAGE="`echo $arg | sed s@http://@@ | sed s@.wikipedia.*@@`"
69 fi
70 done
71 [ "${my_name}" = "new" ] || LANGUAGE="$my_name"
73 #----------------------------------------------------
74 # command line options processing
76 STAT_ONLY=NO
77 NEED_TO_USE_VOCABULARY_WHEN_SORT=NO
78 DONT_ADD_MARKS=NO
79 NON_INTERACTIVE_MODE=NO
80 PART_TO_PROCESS=''
81 GROUP_WORDS_BY_THREE=NO
82 GROUP_WORDS_BY_TWO=NO
83 TAG_NAME=''
84 MERGE_THIS_TAGS=''
85 TAGS_LIST_ONLY=NO
86 MERGE_TAGGED_WORDS=NO
87 MERGE_ALL_TAGGED=NO
88 DONT_ADD_MARKLINES=NO
89 FILTER_WORDS=YES
90 SHOW_VOC_STAT=NO
91 COMPRESSED_WORDLIST=NO
92 WORDS_GROUPING=YES
93 ALLOWED_WORDS_FILENAME=''
94 while getopts Gcf:l:sSkanNp:t:Tm:Md:r:R:23 opt
95 do
96 case "$opt" in
97 c) COMPRESSED_WORDLIST=YES;;
98 f) ALLOWED_WORDS_FILENAME="$OPTARG";;
99 G) WORDS_GROUPING=NO;;
100 s) STAT_ONLY=YES;;
101 S) SHOW_VOC_STAT=YES;;
102 k) NEED_TO_USE_VOCABULARY_WHEN_SORT=YES;;
103 l) LANGUAGE="$OPTARG";;
104 a) DONT_ADD_MARKS=YES;;
105 n) NON_INTERACTIVE_MODE=YES;;
106 N) FILTER_WORDS=NO;;
107 p) PART_TO_PROCESS="$OPTARG";;
108 t) TAG_NAME="$OPTARG";;
109 T) TAGS_LIST_ONLY="YES";;
110 m) DONT_ADD_MARKLINES="YES"; MERGE_TAGGED_WORDS="YES"; MERGE_THIS_TAGS="$TAG_NAME $OPTARG";;
111 M) DONT_ADD_MARKLINES="YES"; MERGE_ALL_TAGGED="YES";;
112 d) REMOVE_TAG="YES"; TAG_NAME="$TAG_NAME $OPTARG";;
113 r) SHOW_RANGE="$OPTARG";;
114 R) SHOW_RANGE_PERCENTAGE="$OPTARG";;
115 2) GROUP_WORDS_BY_TWO=YES;;
116 3) GROUP_WORDS_BY_THREE=YES;;
117 \?) # unknown flag
118 show_usage
119 exit 1;;
120 esac
121 done
122 shift `expr $OPTIND - 1`
124 if [ "$1" = "-l" ]
125 then
126 LANGUAGE="$2"
127 shift 2
128 fi
130 VOCABULARY=${LANGUAGE}.txt
131 NOTES_FILE=notes-${LANGUAGE}.txt
133 if [ "${SHOW_VOC_STAT}" = "YES" ]
134 then
135 $0 -l "${LANGUAGE}" -N -n ${WORK_DIR}/${VOCABULARY} | head -1 | awk '{print $5}' | tr -d "<>"
136 exit 0
137 fi
139 get_words_group_words_add_stat()
140 {
141 [ "$PART_TO_PROCESS" == "" ] || PART_TO_PROCESS="-p $PART_TO_PROCESS"
142 [ "$ALLOWED_WORDS_FILENAME" = "" ] || ALLOWED_WORDS_FILENAME="-f $ALLOWED_WORDS_FILENAME"
143 [ "$SHOW_RANGE" = "" ] || SHOW_RANGE="-r $SHOW_RANGE"
144 [ "$SHOW_RANGE_PERCENTAGE" = "" ] || SHOW_RANGE_PERCENTAGE="-R $SHOW_RANGE_PERCENTAGE"
145 [ "$NON_INTERACTIVE_MODE" = YES ] && non_interactive="-n"
146 [ "$STAT_ONLY" = YES ] && stat_only="-s"
147 [ "$COMPRESSED_WORDLIST" = YES ] && compressed_wordlist="-c"
148 [ "$FILTER_WORDS" = NO ] && filter_words="-N"
149 [ "$GROUP_WORDS_BY_TWO" = YES ] && group_words_by_two="-2"
150 [ "$GROUP_WORDS_BY_THREE" = YES ] && group_words_by_three="-3"
151 [ "$WORDS_GROUPING" = NO ] && words_grouping="-G"
153 $NEW_WORDS_PY -l "$LANGUAGE" \
154 $SHOW_RANGE \
155 $SHOW_RANGE_PERCENTAGE \
156 $PART_TO_PROCESS \
157 $ALLOWED_WORDS_FILENAME \
158 $non_interactive \
159 $stat_only \
160 $compressed_wordlist \
161 $filter_words \
162 $group_words_by_two \
163 $group_words_by_three \
164 $words_grouping \
165 -X get_words_group_words_add_stat "$1"
166 }
168 if [ "$TAGS_LIST_ONLY" = "YES" ]
169 then
170 cd "${WORK_DIR}"
171 echo ${LANGUAGE}_*.txt | tr ' ' '\n' | grep -v '*' | sed 's/[^_]*_//;s/.txt$//'
172 exit 0
173 fi
175 tag_file_name()
176 {
177 echo "${LANGUAGE}_${1}.txt"
178 }
180 if [ "$REMOVE_TAG" = "YES" ]
181 then
182 cd "${WORK_DIR}"
183 for i in $TAG_NAME
184 do
185 echo "$TAGNAME" | grep -q '[/*?]' && continue
186 f="`tag_file_name $i`"
187 if [ -e "$f" ]
188 then
189 rm -f "$f" && echo Tag "'$i'" removed
190 else
191 echo Unknown tag "'$i'"
192 fi
193 done
194 exit 0
195 fi
197 get_words_group_words_add_stat "$1"
199 #mkdir -p $WORK_DIR
200 #oldpwd="$PWD"
201 #cd $WORK_DIR
202 #if [ "$MERGE_TAGGED_WORDS" = "YES" ]
203 #then
204 # VOC_FILES=''
205 # for i in $MERGE_THIS_TAGS
206 # do
207 # f=`tag_file_name $i`
208 # [ -e "$f" ] && VOC_FILES="${VOC_FILES} $f"
209 # done
210 # if [ -z "$VOC_FILES" ]
211 # then
212 # echo Unknown tags: $MERGE_THIS_TAGS > /dev/stderr
213 # else
214 # cat $VOC_FILES
215 # fi
216 #elif [ "$MERGE_ALL_TAGGED" = "YES" ]
217 #then
218 # cat ${LANGUAGE}_*.txt
219 #else
220 # cat
221 #fi