new-words: 846240941452 new-words-py.sh

new-words

view new-words-py.sh @ 68:846240941452

added -C key: compress to lines; fixed bug with #90-line

author	Igor Chubin <igor@chub.in>
date	Sun Sep 23 16:07:29 2012 +0300 (2012-09-23)
parents	e25de9ea9184
children

line source

1 #!/bin/bash

3 cat <<EOF

4 Please, use the script no more.

5 You can execute new-words.py directly.

6 EOF

7 exit 1

9 show_usage()

10 {

11 cat <<HELP > /dev/stderr

13 USAGE:

15 new-words [ -l lang ] [ -s ] [ ARG ]

17 SWITCHES:

19 -h print this screen

20 -c show compressed wordlist: one word per group

21 -f file show only words related to the words in the file

22 -G turn off word grouping

23 -k put higher words that are similar to the known words (only for English)

24 -l lang override language settings

25 -n non-interactive mode (don't run vi)

26 -N turn off known words filtering

27 -a don't add marks (and don't save marks added by user)

28 -p pages work with specified pages only (pages = start-stop/total )

29 -s show the text statistics (percentage of known words and so on) and exit

30 -S show your vocabulary statistics (number of words and word groups)

31 -t tag tag known words with tag

32 -T show list of active tags

33 -m tag merge the words tagged with "tag" into the main vocabulary

34 -M merge the words tagged with any tag into the main vocabulary

35 -d tag delete subvocabulary for the "tag"

36 -r RANGE show only first RANGE words

37 -R RANGE show only words lower than RANGE percent

38 -2 -3 find 2 and 3 words' sequences

40 The language of the text can be specified also

41 by name of the program new-words (correspondent link must be created before).

42 For example, these calls are equivalent:

44 de-words URL

45 new-words -l de URL

47 HELP

48 }

50 if [ "$1" = "-h" ]

51 then

52 show_usage

53 exit 0

54 fi

56 NEW_WORDS_PY=/home/igor/hg/new-words/new-words.py

57 WORK_DIR=~/.new-words/

58 editor=${EDITOR:-vim}

60 # language detection

62 LANGUAGE=en

63 my_name="`echo $0 | sed s@.*/@@ | sed s/-.*// `"

64 for arg

65 do

66 if echo "$arg" | grep -q http://...wikipedia.org/wiki/

67 then

68 LANGUAGE="`echo $arg | sed s@http://@@ | sed s@.wikipedia.*@@`"

69 fi

70 done

71 [ "${my_name}" = "new" ] || LANGUAGE="$my_name"

73 #----------------------------------------------------

74 # command line options processing

76 STAT_ONLY=NO

77 NEED_TO_USE_VOCABULARY_WHEN_SORT=NO

78 DONT_ADD_MARKS=NO

79 NON_INTERACTIVE_MODE=NO

80 PART_TO_PROCESS=''

81 GROUP_WORDS_BY_THREE=NO

82 GROUP_WORDS_BY_TWO=NO

83 TAG_NAME=''

84 MERGE_THIS_TAGS=''

85 TAGS_LIST_ONLY=NO

86 MERGE_TAGGED_WORDS=NO

87 MERGE_ALL_TAGGED=NO

88 DONT_ADD_MARKLINES=NO

89 FILTER_WORDS=YES

90 SHOW_VOC_STAT=NO

91 COMPRESSED_WORDLIST=NO

92 WORDS_GROUPING=YES

93 ALLOWED_WORDS_FILENAME=''

94 while getopts Gcf:l:sSkanNp:t:Tm:Md:r:R:23 opt

95 do

96 case "$opt" in

97 c) COMPRESSED_WORDLIST=YES;;

98 f) ALLOWED_WORDS_FILENAME="$OPTARG";;

99 G) WORDS_GROUPING=NO;;

100 s) STAT_ONLY=YES;;

101 S) SHOW_VOC_STAT=YES;;

102 k) NEED_TO_USE_VOCABULARY_WHEN_SORT=YES;;

103 l) LANGUAGE="$OPTARG";;

104 a) DONT_ADD_MARKS=YES;;

105 n) NON_INTERACTIVE_MODE=YES;;

106 N) FILTER_WORDS=NO;;

107 p) PART_TO_PROCESS="$OPTARG";;

108 t) TAG_NAME="$OPTARG";;

109 T) TAGS_LIST_ONLY="YES";;

110 m) DONT_ADD_MARKLINES="YES"; MERGE_TAGGED_WORDS="YES"; MERGE_THIS_TAGS="$TAG_NAME $OPTARG";;

111 M) DONT_ADD_MARKLINES="YES"; MERGE_ALL_TAGGED="YES";;

112 d) REMOVE_TAG="YES"; TAG_NAME="$TAG_NAME $OPTARG";;

113 r) SHOW_RANGE="$OPTARG";;

114 R) SHOW_RANGE_PERCENTAGE="$OPTARG";;

115 2) GROUP_WORDS_BY_TWO=YES;;

116 3) GROUP_WORDS_BY_THREE=YES;;

117 \?) # unknown flag

118 show_usage

119 exit 1;;

120 esac

121 done

122 shift `expr $OPTIND - 1`

123

124 if [ "$1" = "-l" ]

125 then

126 LANGUAGE="$2"

127 shift 2

128 fi

129

130 VOCABULARY=${LANGUAGE}.txt

131 NOTES_FILE=notes-${LANGUAGE}.txt

132

133 if [ "${SHOW_VOC_STAT}" = "YES" ]

134 then

135 $0 -l "${LANGUAGE}" -N -n ${WORK_DIR}/${VOCABULARY} | head -1 | awk '{print $5}' | tr -d "<>"

136 exit 0

137 fi

138

139 get_words_group_words_add_stat()

140 {

141 [ "$PART_TO_PROCESS" == "" ] || PART_TO_PROCESS="-p $PART_TO_PROCESS"

142 [ "$ALLOWED_WORDS_FILENAME" = "" ] || ALLOWED_WORDS_FILENAME="-f $ALLOWED_WORDS_FILENAME"

143 [ "$SHOW_RANGE" = "" ] || SHOW_RANGE="-r $SHOW_RANGE"

144 [ "$SHOW_RANGE_PERCENTAGE" = "" ] || SHOW_RANGE_PERCENTAGE="-R $SHOW_RANGE_PERCENTAGE"

145 [ "$NON_INTERACTIVE_MODE" = YES ] && non_interactive="-n"

146 [ "$STAT_ONLY" = YES ] && stat_only="-s"

147 [ "$COMPRESSED_WORDLIST" = YES ] && compressed_wordlist="-c"

148 [ "$FILTER_WORDS" = NO ] && filter_words="-N"

149 [ "$GROUP_WORDS_BY_TWO" = YES ] && group_words_by_two="-2"

150 [ "$GROUP_WORDS_BY_THREE" = YES ] && group_words_by_three="-3"

151 [ "$WORDS_GROUPING" = NO ] && words_grouping="-G"

152

153 $NEW_WORDS_PY -l "$LANGUAGE" \

154 $SHOW_RANGE \

155 $SHOW_RANGE_PERCENTAGE \

156 $PART_TO_PROCESS \

157 $ALLOWED_WORDS_FILENAME \

158 $non_interactive \

159 $stat_only \

160 $compressed_wordlist \

161 $filter_words \

162 $group_words_by_two \

163 $group_words_by_three \

164 $words_grouping \

165 -X get_words_group_words_add_stat "$1"

166 }

167

168 if [ "$TAGS_LIST_ONLY" = "YES" ]

169 then

170 cd "${WORK_DIR}"

171 echo ${LANGUAGE}_*.txt | tr ' ' '\n' | grep -v '*' | sed 's/[^_]*_//;s/.txt$//'

172 exit 0

173 fi

174

175 tag_file_name()

176 {

177 echo "${LANGUAGE}_${1}.txt"

178 }

179

180 if [ "$REMOVE_TAG" = "YES" ]

181 then

182 cd "${WORK_DIR}"

183 for i in $TAG_NAME

184 do

185 echo "$TAGNAME" | grep -q '[/*?]' && continue

186 f="`tag_file_name $i`"

187 if [ -e "$f" ]

188 then

189 rm -f "$f" && echo Tag "'$i'" removed

190 else

191 echo Unknown tag "'$i'"

192 fi

193 done

194 exit 0

195 fi

196

197 get_words_group_words_add_stat "$1"

198

199 #mkdir -p $WORK_DIR

200 #oldpwd="$PWD"

201 #cd $WORK_DIR

202 #if [ "$MERGE_TAGGED_WORDS" = "YES" ]

203 #then

204 # VOC_FILES=''

205 # for i in $MERGE_THIS_TAGS

206 # do

207 # f=`tag_file_name $i`

208 # [ -e "$f" ] && VOC_FILES="${VOC_FILES} $f"

209 # done

210 # if [ -z "$VOC_FILES" ]

211 # then

212 # echo Unknown tags: $MERGE_THIS_TAGS > /dev/stderr

213 # else

214 # cat $VOC_FILES

215 # fi

216 #elif [ "$MERGE_ALL_TAGGED" = "YES" ]

217 #then

218 # cat ${LANGUAGE}_*.txt

219 #else

220 # cat

221 #fi

222

223