new-words
changeset 50:4e931db74618
filtering by wordlist
author | Igor Chubin <igor@chub.in> |
---|---|
date | Fri Mar 25 22:35:44 2011 +0200 (2011-03-25) |
parents | 00286f6bfa85 |
children | 74e05d4436ee |
files | new-words-py.sh new-words.py new-words.sh |
line diff
1.1 --- a/new-words-py.sh Wed Feb 09 21:08:23 2011 +0200 1.2 +++ b/new-words-py.sh Fri Mar 25 22:35:44 2011 +0200 1.3 @@ -12,6 +12,7 @@ 1.4 1.5 -h print this screen 1.6 -c show compressed wordlist: one word per group 1.7 + -f file show only words related to the words in the file 1.8 -G turn off word grouping 1.9 -k put higher words that are similar to the known words (only for English) 1.10 -l lang override language settings 1.11 @@ -86,10 +87,12 @@ 1.12 SHOW_VOC_STAT=NO 1.13 COMPRESSED_WORDLIST=NO 1.14 WORDS_GROUPING=YES 1.15 -while getopts Gcl:sSkanNp:t:Tm:Md:r:R:23 opt 1.16 +ALLOWED_WORDS_FILENAME='' 1.17 +while getopts Gcf:l:sSkanNp:t:Tm:Md:r:R:23 opt 1.18 do 1.19 case "$opt" in 1.20 c) COMPRESSED_WORDLIST=YES;; 1.21 + f) ALLOWED_WORDS_FILENAME="$OPTARG";; 1.22 G) WORDS_GROUPING=NO;; 1.23 s) STAT_ONLY=YES;; 1.24 S) SHOW_VOC_STAT=YES;; 1.25 @@ -153,6 +156,7 @@ 1.26 STAT_ONLY="$STAT_ONLY" \ 1.27 WORDS_GROUPING="$WORDS_GROUPING" \ 1.28 FILTER_WORDS="$FILTER_WORDS" \ 1.29 + ALLOWED_WORDS_FILENAME="$ALLOWED_WORDS_FILENAME" \ 1.30 $NEW_WORDS_PY -l "$LANGUAGE" -f get_words_group_words_add_stat "$1" 1.31 } 1.32
2.1 --- a/new-words.py Wed Feb 09 21:08:23 2011 +0200 2.2 +++ b/new-words.py Fri Mar 25 22:35:44 2011 +0200 2.3 @@ -544,6 +544,20 @@ 2.4 linked_words = find_linked_words(notes) 2.5 normalizator = Normalizator(config['language'], linked_words) 2.6 2.7 + # filter words by allowed_words_filter 2.8 + if os.environ.get('ALLOWED_WORDS_FILENAME', ''): 2.9 + allowed_words_filename = os.environ.get('ALLOWED_WORDS_FILENAME', '') 2.10 + normalized_allowed_words = [ 2.11 + normalizator.normalize(w.rstrip('\n')) 2.12 + for w in readlines_from_file(allowed_words_filename) 2.13 + ] 2.14 + 2.15 + result = {} 2.16 + for w, wn in words.iteritems(): 2.17 + if normalizator.normalize(w) in normalized_allowed_words: 2.18 + result[w] = wn 2.19 + words = result 2.20 + 2.21 words_with_freq = [] 2.22 for k in sorted(words.keys(), key=lambda k: words[k], reverse=True): 2.23 words_with_freq.append((words[k], k))
3.1 --- a/new-words.sh Wed Feb 09 21:08:23 2011 +0200 3.2 +++ b/new-words.sh Fri Mar 25 22:35:44 2011 +0200 3.3 @@ -12,6 +12,7 @@ 3.4 3.5 -h print this screen 3.6 -c show compressed wordlist: one word per group 3.7 + -f file show only words that are related to the words from the file 3.8 -k put higher words that are similar to the known words (only for English) 3.9 -l lang override language settings 3.10 -n non-interactive mode (don't run vi) 3.11 @@ -81,6 +82,7 @@ 3.12 FILTER_WORDS=YES 3.13 SHOW_VOC_STAT=NO 3.14 COMPRESSED_WORDLIST=NO 3.15 +ALLOWED_WORDS_FILENAME='' 3.16 while getopts cl:sSkanNp:t:Tm:Mr:23 opt 3.17 do 3.18 case "$opt" in