# HG changeset patch
# User Igor Chubin <igor@chub.in>
# Date 1301085344 -7200
# Node ID 4e931db74618878956d60815edc4f37020572c78
# Parent  00286f6bfa8580737e9a35281e2cf06ff7fb5d14
filtering by wordlist

diff -r 00286f6bfa85 -r 4e931db74618 new-words-py.sh
--- a/new-words-py.sh	Wed Feb 09 21:08:23 2011 +0200
+++ b/new-words-py.sh	Fri Mar 25 22:35:44 2011 +0200
@@ -12,6 +12,7 @@
 
     -h          print this screen
     -c          show compressed wordlist: one word per group
+    -f file     show only words related to the words in the file
     -G          turn off word grouping
     -k          put higher words that are similar to the known words (only for English)
     -l lang     override language settings
@@ -86,10 +87,12 @@
 SHOW_VOC_STAT=NO
 COMPRESSED_WORDLIST=NO
 WORDS_GROUPING=YES
-while getopts Gcl:sSkanNp:t:Tm:Md:r:R:23 opt
+ALLOWED_WORDS_FILENAME=''
+while getopts Gcf:l:sSkanNp:t:Tm:Md:r:R:23 opt
 do
     case "$opt" in
       c)  COMPRESSED_WORDLIST=YES;;
+      f)  ALLOWED_WORDS_FILENAME="$OPTARG";;
       G)  WORDS_GROUPING=NO;;
       s)  STAT_ONLY=YES;;
       S)  SHOW_VOC_STAT=YES;;
@@ -153,6 +156,7 @@
     STAT_ONLY="$STAT_ONLY" \
     WORDS_GROUPING="$WORDS_GROUPING" \
     FILTER_WORDS="$FILTER_WORDS" \
+    ALLOWED_WORDS_FILENAME="$ALLOWED_WORDS_FILENAME" \
     $NEW_WORDS_PY -l "$LANGUAGE" -f get_words_group_words_add_stat "$1"
 }
 
diff -r 00286f6bfa85 -r 4e931db74618 new-words.py
--- a/new-words.py	Wed Feb 09 21:08:23 2011 +0200
+++ b/new-words.py	Fri Mar 25 22:35:44 2011 +0200
@@ -544,6 +544,20 @@
     linked_words = find_linked_words(notes)
     normalizator = Normalizator(config['language'], linked_words)
 
+    # filter words by allowed_words_filter
+    if os.environ.get('ALLOWED_WORDS_FILENAME', ''):
+        allowed_words_filename = os.environ.get('ALLOWED_WORDS_FILENAME', '')
+        normalized_allowed_words = [
+            normalizator.normalize(w.rstrip('\n')) 
+            for w in readlines_from_file(allowed_words_filename)
+        ]
+
+        result = {}
+        for w, wn in words.iteritems():
+            if normalizator.normalize(w) in normalized_allowed_words:
+                result[w] = wn
+        words = result
+
     words_with_freq = []
     for k in sorted(words.keys(), key=lambda k: words[k], reverse=True):
         words_with_freq.append((words[k], k))
diff -r 00286f6bfa85 -r 4e931db74618 new-words.sh
--- a/new-words.sh	Wed Feb 09 21:08:23 2011 +0200
+++ b/new-words.sh	Fri Mar 25 22:35:44 2011 +0200
@@ -12,6 +12,7 @@
 
     -h          print this screen
     -c          show compressed wordlist: one word per group
+    -f file     show only words that are related to the words from the file
     -k          put higher words that are similar to the known words (only for English)
     -l lang     override language settings
     -n          non-interactive mode (don't run vi)
@@ -81,6 +82,7 @@
 FILTER_WORDS=YES
 SHOW_VOC_STAT=NO
 COMPRESSED_WORDLIST=NO
+ALLOWED_WORDS_FILENAME=''
 while getopts cl:sSkanNp:t:Tm:Mr:23 opt
 do
     case "$opt" in