# HG changeset patch # User Igor Chubin # Date 1296797965 -3600 # Node ID bf0aa8e3c1ce0186a3d9502f74818fe205bd3d70 # Parent 5f90e44eecfc432e59518c1ef7c3cf48538e8797 misc scripts added: categorized.pl; mw-shell; zubrator.py diff -r 5f90e44eecfc -r bf0aa8e3c1ce misc/categorized.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/misc/categorized.pl Fri Feb 04 06:39:25 2011 +0100 @@ -0,0 +1,55 @@ +my @n_m=(); +my @n_n=(); +my @n_f=(); +my @adv=(); +my @modadv=(); +my @adj=(); +my @sub=(); +my @verb=(); +my @cj=(); +my @pron=(); +my @prp=(); +my @rest=(); + +while(<>) { + if (/adj/) { push @adj, $_; } + elsif (/sub/) { push @sub, $_; } + elsif (/[| ]v[it][: ]/) { push @verb, $_; } + elsif (/\(sich\)/) { push @sich, $_; } + elsif (/[| ]m[- :]/) { push @n_m, $_; } + elsif (/[| ]n[- :]/) { push @n_n, $_; } + elsif (/[| ]f[- :]/) { push @n_f, $_; } + elsif (/mod adv/) { push @modadv, $_; } + elsif (/adv/) { push @adv, $_; } + elsif (/prp[: ]/) { push @prp, $_; } + elsif (/cj[: ]/) { push @cj, $_; } + elsif (/pron[: ]/) { push @pron, $_; } + else { push @rest, $_; } +} + +print "== Существительные ==\n"; +print "=== Мужской род ===\n"; +print sort(@n_m); +print "\n=== Средний род ===\n"; +print sort(@n_n); +print "\n=== Женский род ===\n"; +print sort(@n_f); +print "\n\n\n== Глаголы ===\n"; +print sort(@verb); +print "\n\n\n== Прилагательные ==\n"; +print sort(@adj); +print "\n\n=== Субстантивированные прилагательные и причастия ===\n"; +print sort(@sub); +print "\n\n\n== Местоимения ==\n"; +print sort(@pron); +print "\n\n\n== Наречия ==\n"; +print sort(@adv); +print "\n\n=== Модальные наречия ===\n"; +print sort(@modadv); +print "\n\n\n== Предлоги ==\n"; +print sort(@prp); +print "\n\n\n== Союзы ==\n"; +print sort(@cj); +print "\n\n\n== Прочее ==\n"; +print sort(@rest); + diff -r 5f90e44eecfc -r bf0aa8e3c1ce misc/nw-shell --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/misc/nw-shell Fri Feb 04 06:39:25 2011 +0100 @@ -0,0 +1,11 @@ +TEMP=`mktemp /tmp/nw-shell-XXXXXX` +~/hg/new-words/new-words-py.sh -n -l de "$1" | grep -v '^#' > "$TEMP" +rlwrap -f "$TEMP" sh -c ' + while true; + do + printf "\033[01;33m > " ; + read word options || { exit; printf "\033[00m" ; } ; + printf "\033[00m" ; + de $word; + done' +rm $TEMP diff -r 5f90e44eecfc -r bf0aa8e3c1ce misc/zubrator.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/misc/zubrator.py Fri Feb 04 06:39:25 2011 +0100 @@ -0,0 +1,150 @@ +#!/usr/bin/python + +import random +import sys + +# TODO: +# * persistent weight dict +# * log +# * stats (top5, time_total, time_last, correct_answers_rate_total, correct_answers_rate_last) + +# DONE: +# * correct quit (ctrl d) + +def color_for_answer(answer): + color_table = { + 'der': 'Blue', + 'das': 'Green', + 'die': 'Red', + } + if not answer in color_table: + return 'Normal' + else: + return color_table[answer] + +def colorprint(string, color=None): + color_table = { + 'Gray': '\033[1;30m', + 'Red': '\033[1;31m', + 'Green': '\033[1;32m', + 'Yellow': '\033[1;33m', + 'Blue': '\033[1;34m', + 'Magenta': '\033[1;35m', + 'Cyan': '\033[1;36m', + 'White': '\033[1;37m', + 'Crimson': '\033[1;38m', + 'Highlighted_Red': '\033[1;41m', + 'Highlighted_Green': '\033[1;42m', + 'Highlighted_Brown': '\033[1;43m', + 'Highlighted_Blue': '\033[1;44m', + 'Highlighted_Magenta': '\033[1;45m', + 'Highlighted_Cyan': '\033[1;46m', + 'Highlighted_Gray': '\033[1;47m', + 'Highlighted_Crimson': '\033[1;48m', + } + normal_color_code = '\033[1;m' + if not color or color == 'Normal' or not color in color_table: + print string + else: + print "%s%s%s" % (color_table[color], string, normal_color_code) + +def wrandom(dict): + total = sum(dict.values()) + n = random.uniform(0, total) + + for key in sorted(dict.keys()): + item = key + if n < dict[key]: + break + n -= dict[key] + + return item + +def set_weight(weight, word, new_weight): + if len(weight) <= 1: + raise Exception("Can't set weight; weight dictionary is too small; need at least two members") + sum_before = sum(weight.values()) + w_before = weight[word] + w_after = new_weight + delta = (w_after - w_before)*1.0/(len(weight)-1) + for k in weight.keys(): + if k == word: + weight[k] = w_after + else: + weight[k] -= delta + sum_after = sum(weight.values()) + if abs(sum_before-sum_after)> 0.0001: + raise Exception("%s != %s ; function set_weight works incorrectly" % (sum_before, sum_after)) + return weight + +def print_stats(stats, weight, correct_answer): + print "------------------------" + print "total questions = %s" % stats['total_questions'] + print "last questions = %s" % stats['last_questions'] + print "total errors = %s (%.2f)" % (stats['total_errors'], 1.0*stats['total_errors']/stats['total_questions']) + print "last errors = %s (%.2f)" % (stats['last_errors'], 1.0*stats['last_errors']/stats['last_questions']) + print "top 5 questions:" + for question in sorted(weight.keys(),key=lambda x: weight[x], reverse=True)[:5]: + colorprint( + " %s %s %5.2f" % (correct_answer[question], question, weight[question]), + color_for_answer(correct_answer[question]) + ) + + print "------------------------" + +filename = sys.argv[1] +correct_answer = {} +with open(filename) as f: + for line in f.readlines(): + line = line.rstrip('\n') + try: + (q, a) = line.split(' ', 1) + correct_answer[q] = a + except: + pass + +saved_weight = { + 'Auskunft' : 2, +} + +weight = {} +for word in correct_answer.keys(): + if word in saved_weight: + weight[word] = saved_weight[word] + else: + weight[word] = 1 + +stats = { + 'total_errors' :0, + 'last_errors' :0, + 'total_questions' :0, + 'last_questions' :0, + } + +while 1: + question = wrandom(weight) + colorprint(question, 'Yellow') + answer = sys.stdin.readline().rstrip('\n') + if not answer: + break + if answer != correct_answer[question]: + colorprint( + "%s %s" % (correct_answer[question], question), + color_for_answer(correct_answer[question]) + ) + weight = set_weight(weight, question, weight[question]*1.5) + stats['total_errors'] += 1 + stats['last_errors'] += 1 + else: + weight = set_weight(weight, question, weight[question]*0.8) + + stats['total_questions'] += 1 + stats['last_questions'] += 1 + + if stats['last_questions'] == 20: + print_stats(stats, weight, correct_answer) + stats['last_questions'] = 0 + stats['last_errors'] = 0 + + print +