new-words
diff new-words.py @ 50:4e931db74618
filtering by wordlist
author | Igor Chubin <igor@chub.in> |
---|---|
date | Fri Mar 25 22:35:44 2011 +0200 (2011-03-25) |
parents | 00286f6bfa85 |
children | 74e05d4436ee |
line diff
1.1 --- a/new-words.py Wed Feb 09 21:08:23 2011 +0200 1.2 +++ b/new-words.py Fri Mar 25 22:35:44 2011 +0200 1.3 @@ -544,6 +544,20 @@ 1.4 linked_words = find_linked_words(notes) 1.5 normalizator = Normalizator(config['language'], linked_words) 1.6 1.7 + # filter words by allowed_words_filter 1.8 + if os.environ.get('ALLOWED_WORDS_FILENAME', ''): 1.9 + allowed_words_filename = os.environ.get('ALLOWED_WORDS_FILENAME', '') 1.10 + normalized_allowed_words = [ 1.11 + normalizator.normalize(w.rstrip('\n')) 1.12 + for w in readlines_from_file(allowed_words_filename) 1.13 + ] 1.14 + 1.15 + result = {} 1.16 + for w, wn in words.iteritems(): 1.17 + if normalizator.normalize(w) in normalized_allowed_words: 1.18 + result[w] = wn 1.19 + words = result 1.20 + 1.21 words_with_freq = [] 1.22 for k in sorted(words.keys(), key=lambda k: words[k], reverse=True): 1.23 words_with_freq.append((words[k], k))