# HG changeset patch
# User Igor Chubin <igor@chub.in>
# Date 1295789152 -3600
# Node ID a598e0d25784559ddc8d5b33532712a9bb9a6ed2
# Parent  adbc809d39242cc78d735fafc32f3adcbe1c2519
add_notes (add_marks) + remove_notes (remove_marks) implemented in python

diff -r adbc809d3924 -r a598e0d25784 new-words-py.sh
--- a/new-words-py.sh	Sat Jan 22 23:42:31 2011 +0100
+++ b/new-words-py.sh	Sun Jan 23 14:25:52 2011 +0100
@@ -578,6 +578,16 @@
 
 add_marks()
 {
+   if [ "$OLD_STYLE" = NO ]
+   then 
+    $NEW_WORDS_PY -l "$LANGUAGE" -f add_notes "$1"
+   else
+    group_words_OLD "$@"
+   fi
+}
+
+add_marks_OLD()
+{
     PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
     cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
 use Encode;
@@ -632,6 +642,16 @@
 
 remove_marks()
 {
+   if [ "$OLD_STYLE" = NO ]
+   then 
+    $NEW_WORDS_PY -l "$LANGUAGE" -f remove_notes "$1"
+   else
+    group_words_OLD "$@"
+   fi
+}
+
+remove_marks_OLD()
+{
     PERL_SCRIPT_TEMP_NAME=`mktemp /tmp/perl-grep-v-english-XXXXXXXX`
     cat <<'PERL_SCRIPT' > $PERL_SCRIPT_TEMP_NAME
 $file = $ARGV[0];
diff -r adbc809d3924 -r a598e0d25784 new-words.py
--- a/new-words.py	Sat Jan 22 23:42:31 2011 +0100
+++ b/new-words.py	Sun Jan 23 14:25:52 2011 +0100
@@ -177,13 +177,82 @@
 def load_notes(files):
     notes = {}
     for filename in files:
-        with open(filename) as f:
+        with codecs.open(filename, "r", "utf-8") as f:
             for line in f.readlines():
                 (word, note) = re.split('\s+', line.rstrip('\n'), maxsplit=1)
                 notes.setdefault(word, {})
                 notes[word][filename] = note
     return notes
 
+def add_notes(lines, notes):
+    notes_filename = notes_filenames()[0]
+    result = []
+    for line in lines:
+        if line.startswith('#'):
+            result += [line]
+        else:
+            match_object = re.search('^\s*\S+\s*(\S+)', line)
+            if match_object:
+                word = match_object.group(1)
+                if word in notes:
+                    logging.debug(word)
+                    logging.debug(line)
+                    if notes_filename in notes[word]:
+                        line = line.rstrip('\n')
+                        line = "%-30s %s\n" % (line, notes[word][notes_filename])
+                        logging.debug(line)
+                        result += [line]
+                else:
+                    result += [line]
+            else:
+                result += [line]
+    return result
+
+def remove_notes(lines, notes_group):
+    notes_filename = notes_filenames()[0]
+    notes = {}
+    for k in notes_group.keys():
+        if notes_filename in notes_group[k]:
+            notes[k] = notes_group[k][notes_filename]
+
+    result = []
+    for line in lines:
+        line = line.rstrip('\n')
+        match_object = re.match('(\s+)(\S+)(\s+)(\S+)(\s+)(.*)', line)
+        if match_object:
+            result.append("".join([
+                match_object.group(1),
+                match_object.group(2),
+                match_object.group(3),
+                match_object.group(4),
+                "\n"
+                ]))
+            notes[match_object.group(4)] = match_object.group(6)
+        else:
+            result.append(line+"\n")
+
+    save_notes(notes_filename, notes)
+    return result
+
+def save_notes(filename, notes):
+    lines = []
+    saved_words = []
+    with codecs.open(filename, "r", "utf-8") as f:
+        for line in f.readlines():
+            (word, note) = re.split('\s+', line.rstrip('\n'), maxsplit=1)
+            if word in notes:
+                line = "%-29s %s\n" % (word, notes[word])
+                saved_words.append(word)
+            lines.append(line)
+    for word in [x for x in notes.keys() if not x in saved_words]:
+        line = "%-29s %s\n" % (word, notes[word])
+        lines.append(line)
+
+    with codecs.open(filename, "w", "utf-8") as f:
+        for line in lines:
+            f.write(line)
+
+
 def print_words_sorted(words_freq):
     for k in sorted(words_freq.keys(), key=lambda k: words_freq[k], reverse=True):
         codecs.getwriter("utf-8")(sys.stdout).write("%10s %s\n" % (words_freq[k], k))
@@ -221,11 +290,9 @@
     for word in notes.keys():
         for note in notes[word].values():
             if "@" in note:
-                logging.debug("%s %s" % (word, note))
                 result = re.search(r'\@(\S*)', note)
                 if result:
                     main_word = result.group(1)
-                    logging.debug("%s %s" % (word, main_word))
                     if main_word:
                         linked_words[word] = main_word
     return linked_words
@@ -261,7 +328,6 @@
     lines = readlines_from_stdin()
     notes = load_notes(notes_filenames())
     linked_words = find_linked_words(notes)
-    logging.debug(linked_words)
     normalizator = Normalizator(config['language'], linked_words)
 
     wgw = find_wordgroups_weights(lines, normalizator)
@@ -271,14 +337,32 @@
                 reverse=True):
         codecs.getwriter("utf-8")(sys.stdout).write(line)
 
+def filter_add_notes(args):
+    lines = readlines_from_file(args[0])
+    notes = load_notes(notes_filenames())
+    lines = add_notes(lines, notes)
+    with codecs.open(args[0], "w", "utf-8") as f:
+        for line in lines:
+            f.write(line)
+
+def filter_remove_notes(args):
+    lines = readlines_from_file(args[0])
+    notes = load_notes(notes_filenames())
+    lines = remove_notes(lines, notes)
+    with codecs.open(args[0], "w", "utf-8") as f:
+        for line in lines:
+            f.write(line)
+
 (options, args) = parser.parse_args()
 if options.language:
     config['language'] = options.language
 
 if options.function:
     function_names = {
-        'get_words' : filter_get_words,
+        'get_words' :   filter_get_words,
         'group_words' : filter_group_words,
+        'add_notes' :   filter_add_notes,
+        'remove_notes': filter_remove_notes,
     }
     if options.function in function_names:
         function_names[options.function](args)