# HG changeset patch
# User Igor Chubin <igor@chub.in>
# Date 1296797965 -3600
# Node ID bf0aa8e3c1ce0186a3d9502f74818fe205bd3d70
# Parent  5f90e44eecfc432e59518c1ef7c3cf48538e8797
misc scripts added: categorized.pl; mw-shell; zubrator.py

diff -r 5f90e44eecfc -r bf0aa8e3c1ce misc/categorized.pl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/misc/categorized.pl	Fri Feb 04 06:39:25 2011 +0100
@@ -0,0 +1,55 @@
+my @n_m=();
+my @n_n=();
+my @n_f=();
+my @adv=();
+my @modadv=();
+my @adj=();
+my @sub=();
+my @verb=();
+my @cj=();
+my @pron=();
+my @prp=();
+my @rest=();
+
+while(<>) {
+    if    (/adj/) { push @adj, $_; }
+    elsif (/sub/) { push @sub, $_; }
+    elsif (/[| ]v[it][: ]/) { push @verb, $_; }
+    elsif (/\(sich\)/) { push @sich, $_; }
+    elsif (/[| ]m[- :]/) { push @n_m, $_; }
+    elsif (/[| ]n[- :]/) { push @n_n, $_; }
+    elsif (/[| ]f[- :]/) { push @n_f, $_; }
+    elsif (/mod adv/) { push @modadv, $_; }
+    elsif (/adv/) { push @adv, $_; }
+    elsif (/prp[: ]/) { push @prp, $_; }
+    elsif (/cj[: ]/) { push @cj, $_; }
+    elsif (/pron[: ]/) { push @pron, $_; }
+    else  { push @rest, $_; }
+}
+
+print "== Существительные ==\n";
+print "=== Мужской род ===\n";
+print sort(@n_m);
+print "\n=== Средний род ===\n";
+print sort(@n_n);
+print "\n=== Женский род ===\n";
+print sort(@n_f);
+print "\n\n\n== Глаголы ===\n";
+print sort(@verb);
+print "\n\n\n== Прилагательные ==\n";
+print sort(@adj);
+print "\n\n=== Субстантивированные прилагательные и причастия ===\n";
+print sort(@sub);
+print "\n\n\n== Местоимения ==\n";
+print sort(@pron);
+print "\n\n\n== Наречия ==\n";
+print sort(@adv);
+print "\n\n=== Модальные наречия ===\n";
+print sort(@modadv);
+print "\n\n\n== Предлоги ==\n";
+print sort(@prp);
+print "\n\n\n== Союзы ==\n";
+print sort(@cj);
+print "\n\n\n== Прочее ==\n";
+print sort(@rest);
+
diff -r 5f90e44eecfc -r bf0aa8e3c1ce misc/nw-shell
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/misc/nw-shell	Fri Feb 04 06:39:25 2011 +0100
@@ -0,0 +1,11 @@
+TEMP=`mktemp /tmp/nw-shell-XXXXXX`
+~/hg/new-words/new-words-py.sh -n -l de "$1" | grep -v '^#' > "$TEMP"
+rlwrap -f "$TEMP" sh -c '
+    while true; 
+    do 
+        printf "\033[01;33m > " ;
+        read word options || { exit; printf "\033[00m" ; } ;
+        printf "\033[00m" ;
+        de $word; 
+    done'
+rm $TEMP
diff -r 5f90e44eecfc -r bf0aa8e3c1ce misc/zubrator.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/misc/zubrator.py	Fri Feb 04 06:39:25 2011 +0100
@@ -0,0 +1,150 @@
+#!/usr/bin/python
+
+import random
+import sys
+
+# TODO:
+# * persistent weight dict
+# * log
+# * stats (top5, time_total, time_last, correct_answers_rate_total, correct_answers_rate_last)
+
+# DONE:
+# * correct quit (ctrl d)
+
+def color_for_answer(answer):
+    color_table = {
+        'der':  'Blue',
+        'das':  'Green',
+        'die':  'Red',
+    }
+    if not answer in color_table:
+        return 'Normal'
+    else:
+        return color_table[answer]
+
+def colorprint(string, color=None):
+    color_table = {
+        'Gray':     '\033[1;30m',
+        'Red':      '\033[1;31m',
+        'Green':    '\033[1;32m',
+        'Yellow':   '\033[1;33m',
+        'Blue':     '\033[1;34m',
+        'Magenta':  '\033[1;35m',
+        'Cyan':     '\033[1;36m',
+        'White':    '\033[1;37m',
+        'Crimson':  '\033[1;38m',
+        'Highlighted_Red':      '\033[1;41m',
+        'Highlighted_Green':    '\033[1;42m',
+        'Highlighted_Brown':    '\033[1;43m',
+        'Highlighted_Blue':     '\033[1;44m',
+        'Highlighted_Magenta':  '\033[1;45m',
+        'Highlighted_Cyan':     '\033[1;46m',
+        'Highlighted_Gray':     '\033[1;47m',
+        'Highlighted_Crimson':  '\033[1;48m',
+    }
+    normal_color_code = '\033[1;m'
+    if not color or color == 'Normal' or not color in color_table:
+        print string
+    else:
+        print "%s%s%s" % (color_table[color], string, normal_color_code)
+
+def wrandom(dict):
+    total = sum(dict.values())
+    n = random.uniform(0, total)
+
+    for key in sorted(dict.keys()):
+        item = key
+        if n < dict[key]:
+            break
+        n -= dict[key]
+
+    return item
+
+def set_weight(weight, word, new_weight):
+    if len(weight) <= 1:
+        raise Exception("Can't set weight; weight dictionary is too small; need at least two members")
+    sum_before = sum(weight.values())
+    w_before = weight[word]
+    w_after = new_weight
+    delta = (w_after - w_before)*1.0/(len(weight)-1)
+    for k in weight.keys():
+        if k == word:
+            weight[k] = w_after
+        else:
+            weight[k] -= delta
+    sum_after = sum(weight.values())
+    if abs(sum_before-sum_after)> 0.0001:
+        raise Exception("%s != %s ; function set_weight works incorrectly" % (sum_before, sum_after))
+    return weight
+
+def print_stats(stats, weight, correct_answer):
+    print "------------------------"
+    print "total questions = %s" % stats['total_questions']
+    print "last questions = %s" % stats['last_questions']
+    print "total errors = %s (%.2f)" % (stats['total_errors'], 1.0*stats['total_errors']/stats['total_questions'])
+    print "last errors = %s (%.2f)" % (stats['last_errors'], 1.0*stats['last_errors']/stats['last_questions'])
+    print "top 5 questions:"
+    for question in sorted(weight.keys(),key=lambda x: weight[x], reverse=True)[:5]:
+        colorprint(
+            "    %s %s %5.2f" % (correct_answer[question], question, weight[question]),
+            color_for_answer(correct_answer[question])
+            )
+
+    print "------------------------"
+
+filename = sys.argv[1]
+correct_answer = {}
+with open(filename) as f:
+    for line in f.readlines():
+        line = line.rstrip('\n')
+        try:
+            (q, a) = line.split(' ', 1)
+            correct_answer[q] = a
+        except:
+            pass
+
+saved_weight = {
+    'Auskunft'  : 2,
+}
+
+weight = {}
+for word in correct_answer.keys():
+    if word in saved_weight:
+        weight[word] = saved_weight[word]
+    else:
+        weight[word] = 1
+
+stats = {
+    'total_errors'      :0,
+    'last_errors'       :0,
+    'total_questions'   :0,
+    'last_questions'    :0,
+    }
+
+while 1:
+    question = wrandom(weight)
+    colorprint(question, 'Yellow')
+    answer = sys.stdin.readline().rstrip('\n')
+    if not answer:
+        break
+    if answer != correct_answer[question]:
+        colorprint(
+            "%s %s" % (correct_answer[question], question),
+            color_for_answer(correct_answer[question])
+            )
+        weight = set_weight(weight, question, weight[question]*1.5)
+        stats['total_errors'] += 1
+        stats['last_errors'] += 1
+    else:
+        weight = set_weight(weight, question, weight[question]*0.8)
+
+    stats['total_questions'] += 1
+    stats['last_questions'] += 1
+
+    if stats['last_questions'] == 20:
+        print_stats(stats, weight, correct_answer)
+        stats['last_questions'] = 0
+        stats['last_errors'] = 0
+
+    print
+