new-words

changeset 46:bf0aa8e3c1ce

misc scripts added: categorized.pl; mw-shell; zubrator.py
author Igor Chubin <igor@chub.in>
date Fri Feb 04 06:39:25 2011 +0100 (2011-02-04)
parents 5f90e44eecfc
children d708e2c1bad8
files misc/categorized.pl misc/nw-shell misc/zubrator.py
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/misc/categorized.pl	Fri Feb 04 06:39:25 2011 +0100
     1.3 @@ -0,0 +1,55 @@
     1.4 +my @n_m=();
     1.5 +my @n_n=();
     1.6 +my @n_f=();
     1.7 +my @adv=();
     1.8 +my @modadv=();
     1.9 +my @adj=();
    1.10 +my @sub=();
    1.11 +my @verb=();
    1.12 +my @cj=();
    1.13 +my @pron=();
    1.14 +my @prp=();
    1.15 +my @rest=();
    1.16 +
    1.17 +while(<>) {
    1.18 +    if    (/adj/) { push @adj, $_; }
    1.19 +    elsif (/sub/) { push @sub, $_; }
    1.20 +    elsif (/[| ]v[it][: ]/) { push @verb, $_; }
    1.21 +    elsif (/\(sich\)/) { push @sich, $_; }
    1.22 +    elsif (/[| ]m[- :]/) { push @n_m, $_; }
    1.23 +    elsif (/[| ]n[- :]/) { push @n_n, $_; }
    1.24 +    elsif (/[| ]f[- :]/) { push @n_f, $_; }
    1.25 +    elsif (/mod adv/) { push @modadv, $_; }
    1.26 +    elsif (/adv/) { push @adv, $_; }
    1.27 +    elsif (/prp[: ]/) { push @prp, $_; }
    1.28 +    elsif (/cj[: ]/) { push @cj, $_; }
    1.29 +    elsif (/pron[: ]/) { push @pron, $_; }
    1.30 +    else  { push @rest, $_; }
    1.31 +}
    1.32 +
    1.33 +print "== Существительные ==\n";
    1.34 +print "=== Мужской род ===\n";
    1.35 +print sort(@n_m);
    1.36 +print "\n=== Средний род ===\n";
    1.37 +print sort(@n_n);
    1.38 +print "\n=== Женский род ===\n";
    1.39 +print sort(@n_f);
    1.40 +print "\n\n\n== Глаголы ===\n";
    1.41 +print sort(@verb);
    1.42 +print "\n\n\n== Прилагательные ==\n";
    1.43 +print sort(@adj);
    1.44 +print "\n\n=== Субстантивированные прилагательные и причастия ===\n";
    1.45 +print sort(@sub);
    1.46 +print "\n\n\n== Местоимения ==\n";
    1.47 +print sort(@pron);
    1.48 +print "\n\n\n== Наречия ==\n";
    1.49 +print sort(@adv);
    1.50 +print "\n\n=== Модальные наречия ===\n";
    1.51 +print sort(@modadv);
    1.52 +print "\n\n\n== Предлоги ==\n";
    1.53 +print sort(@prp);
    1.54 +print "\n\n\n== Союзы ==\n";
    1.55 +print sort(@cj);
    1.56 +print "\n\n\n== Прочее ==\n";
    1.57 +print sort(@rest);
    1.58 +
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/misc/nw-shell	Fri Feb 04 06:39:25 2011 +0100
     2.3 @@ -0,0 +1,11 @@
     2.4 +TEMP=`mktemp /tmp/nw-shell-XXXXXX`
     2.5 +~/hg/new-words/new-words-py.sh -n -l de "$1" | grep -v '^#' > "$TEMP"
     2.6 +rlwrap -f "$TEMP" sh -c '
     2.7 +    while true; 
     2.8 +    do 
     2.9 +        printf "\033[01;33m > " ;
    2.10 +        read word options || { exit; printf "\033[00m" ; } ;
    2.11 +        printf "\033[00m" ;
    2.12 +        de $word; 
    2.13 +    done'
    2.14 +rm $TEMP
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/misc/zubrator.py	Fri Feb 04 06:39:25 2011 +0100
     3.3 @@ -0,0 +1,150 @@
     3.4 +#!/usr/bin/python
     3.5 +
     3.6 +import random
     3.7 +import sys
     3.8 +
     3.9 +# TODO:
    3.10 +# * persistent weight dict
    3.11 +# * log
    3.12 +# * stats (top5, time_total, time_last, correct_answers_rate_total, correct_answers_rate_last)
    3.13 +
    3.14 +# DONE:
    3.15 +# * correct quit (ctrl d)
    3.16 +
    3.17 +def color_for_answer(answer):
    3.18 +    color_table = {
    3.19 +        'der':  'Blue',
    3.20 +        'das':  'Green',
    3.21 +        'die':  'Red',
    3.22 +    }
    3.23 +    if not answer in color_table:
    3.24 +        return 'Normal'
    3.25 +    else:
    3.26 +        return color_table[answer]
    3.27 +
    3.28 +def colorprint(string, color=None):
    3.29 +    color_table = {
    3.30 +        'Gray':     '\033[1;30m',
    3.31 +        'Red':      '\033[1;31m',
    3.32 +        'Green':    '\033[1;32m',
    3.33 +        'Yellow':   '\033[1;33m',
    3.34 +        'Blue':     '\033[1;34m',
    3.35 +        'Magenta':  '\033[1;35m',
    3.36 +        'Cyan':     '\033[1;36m',
    3.37 +        'White':    '\033[1;37m',
    3.38 +        'Crimson':  '\033[1;38m',
    3.39 +        'Highlighted_Red':      '\033[1;41m',
    3.40 +        'Highlighted_Green':    '\033[1;42m',
    3.41 +        'Highlighted_Brown':    '\033[1;43m',
    3.42 +        'Highlighted_Blue':     '\033[1;44m',
    3.43 +        'Highlighted_Magenta':  '\033[1;45m',
    3.44 +        'Highlighted_Cyan':     '\033[1;46m',
    3.45 +        'Highlighted_Gray':     '\033[1;47m',
    3.46 +        'Highlighted_Crimson':  '\033[1;48m',
    3.47 +    }
    3.48 +    normal_color_code = '\033[1;m'
    3.49 +    if not color or color == 'Normal' or not color in color_table:
    3.50 +        print string
    3.51 +    else:
    3.52 +        print "%s%s%s" % (color_table[color], string, normal_color_code)
    3.53 +
    3.54 +def wrandom(dict):
    3.55 +    total = sum(dict.values())
    3.56 +    n = random.uniform(0, total)
    3.57 +
    3.58 +    for key in sorted(dict.keys()):
    3.59 +        item = key
    3.60 +        if n < dict[key]:
    3.61 +            break
    3.62 +        n -= dict[key]
    3.63 +
    3.64 +    return item
    3.65 +
    3.66 +def set_weight(weight, word, new_weight):
    3.67 +    if len(weight) <= 1:
    3.68 +        raise Exception("Can't set weight; weight dictionary is too small; need at least two members")
    3.69 +    sum_before = sum(weight.values())
    3.70 +    w_before = weight[word]
    3.71 +    w_after = new_weight
    3.72 +    delta = (w_after - w_before)*1.0/(len(weight)-1)
    3.73 +    for k in weight.keys():
    3.74 +        if k == word:
    3.75 +            weight[k] = w_after
    3.76 +        else:
    3.77 +            weight[k] -= delta
    3.78 +    sum_after = sum(weight.values())
    3.79 +    if abs(sum_before-sum_after)> 0.0001:
    3.80 +        raise Exception("%s != %s ; function set_weight works incorrectly" % (sum_before, sum_after))
    3.81 +    return weight
    3.82 +
    3.83 +def print_stats(stats, weight, correct_answer):
    3.84 +    print "------------------------"
    3.85 +    print "total questions = %s" % stats['total_questions']
    3.86 +    print "last questions = %s" % stats['last_questions']
    3.87 +    print "total errors = %s (%.2f)" % (stats['total_errors'], 1.0*stats['total_errors']/stats['total_questions'])
    3.88 +    print "last errors = %s (%.2f)" % (stats['last_errors'], 1.0*stats['last_errors']/stats['last_questions'])
    3.89 +    print "top 5 questions:"
    3.90 +    for question in sorted(weight.keys(),key=lambda x: weight[x], reverse=True)[:5]:
    3.91 +        colorprint(
    3.92 +            "    %s %s %5.2f" % (correct_answer[question], question, weight[question]),
    3.93 +            color_for_answer(correct_answer[question])
    3.94 +            )
    3.95 +
    3.96 +    print "------------------------"
    3.97 +
    3.98 +filename = sys.argv[1]
    3.99 +correct_answer = {}
   3.100 +with open(filename) as f:
   3.101 +    for line in f.readlines():
   3.102 +        line = line.rstrip('\n')
   3.103 +        try:
   3.104 +            (q, a) = line.split(' ', 1)
   3.105 +            correct_answer[q] = a
   3.106 +        except:
   3.107 +            pass
   3.108 +
   3.109 +saved_weight = {
   3.110 +    'Auskunft'  : 2,
   3.111 +}
   3.112 +
   3.113 +weight = {}
   3.114 +for word in correct_answer.keys():
   3.115 +    if word in saved_weight:
   3.116 +        weight[word] = saved_weight[word]
   3.117 +    else:
   3.118 +        weight[word] = 1
   3.119 +
   3.120 +stats = {
   3.121 +    'total_errors'      :0,
   3.122 +    'last_errors'       :0,
   3.123 +    'total_questions'   :0,
   3.124 +    'last_questions'    :0,
   3.125 +    }
   3.126 +
   3.127 +while 1:
   3.128 +    question = wrandom(weight)
   3.129 +    colorprint(question, 'Yellow')
   3.130 +    answer = sys.stdin.readline().rstrip('\n')
   3.131 +    if not answer:
   3.132 +        break
   3.133 +    if answer != correct_answer[question]:
   3.134 +        colorprint(
   3.135 +            "%s %s" % (correct_answer[question], question),
   3.136 +            color_for_answer(correct_answer[question])
   3.137 +            )
   3.138 +        weight = set_weight(weight, question, weight[question]*1.5)
   3.139 +        stats['total_errors'] += 1
   3.140 +        stats['last_errors'] += 1
   3.141 +    else:
   3.142 +        weight = set_weight(weight, question, weight[question]*0.8)
   3.143 +
   3.144 +    stats['total_questions'] += 1
   3.145 +    stats['last_questions'] += 1
   3.146 +
   3.147 +    if stats['last_questions'] == 20:
   3.148 +        print_stats(stats, weight, correct_answer)
   3.149 +        stats['last_questions'] = 0
   3.150 +        stats['last_errors'] = 0
   3.151 +
   3.152 +    print
   3.153 +