new-words

diff new-words.py @ 50:4e931db74618

filtering by wordlist
author Igor Chubin <igor@chub.in>
date Fri Mar 25 22:35:44 2011 +0200 (2011-03-25)
parents 00286f6bfa85
children 74e05d4436ee
line diff
     1.1 --- a/new-words.py	Wed Feb 09 21:08:23 2011 +0200
     1.2 +++ b/new-words.py	Fri Mar 25 22:35:44 2011 +0200
     1.3 @@ -544,6 +544,20 @@
     1.4      linked_words = find_linked_words(notes)
     1.5      normalizator = Normalizator(config['language'], linked_words)
     1.6  
     1.7 +    # filter words by allowed_words_filter
     1.8 +    if os.environ.get('ALLOWED_WORDS_FILENAME', ''):
     1.9 +        allowed_words_filename = os.environ.get('ALLOWED_WORDS_FILENAME', '')
    1.10 +        normalized_allowed_words = [
    1.11 +            normalizator.normalize(w.rstrip('\n')) 
    1.12 +            for w in readlines_from_file(allowed_words_filename)
    1.13 +        ]
    1.14 +
    1.15 +        result = {}
    1.16 +        for w, wn in words.iteritems():
    1.17 +            if normalizator.normalize(w) in normalized_allowed_words:
    1.18 +                result[w] = wn
    1.19 +        words = result
    1.20 +
    1.21      words_with_freq = []
    1.22      for k in sorted(words.keys(), key=lambda k: words[k], reverse=True):
    1.23          words_with_freq.append((words[k], k))