new-words

diff misc/zubrator.py @ 49:00286f6bfa85

experimental: when -c specified, use dictionary for compression
author Igor Chubin <igor@chub.in>
date Wed Feb 09 21:08:23 2011 +0200 (2011-02-09)
parents
children abd4080ee583
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/misc/zubrator.py	Wed Feb 09 21:08:23 2011 +0200
     1.3 @@ -0,0 +1,150 @@
     1.4 +#!/usr/bin/python
     1.5 +
     1.6 +import random
     1.7 +import sys
     1.8 +
     1.9 +# TODO:
    1.10 +# * persistent weight dict
    1.11 +# * log
    1.12 +# * stats (top5, time_total, time_last, correct_answers_rate_total, correct_answers_rate_last)
    1.13 +
    1.14 +# DONE:
    1.15 +# * correct quit (ctrl d)
    1.16 +
    1.17 +def color_for_answer(answer):
    1.18 +    color_table = {
    1.19 +        'der':  'Blue',
    1.20 +        'das':  'Green',
    1.21 +        'die':  'Red',
    1.22 +    }
    1.23 +    if not answer in color_table:
    1.24 +        return 'Normal'
    1.25 +    else:
    1.26 +        return color_table[answer]
    1.27 +
    1.28 +def colorprint(string, color=None):
    1.29 +    color_table = {
    1.30 +        'Gray':     '\033[1;30m',
    1.31 +        'Red':      '\033[1;31m',
    1.32 +        'Green':    '\033[1;32m',
    1.33 +        'Yellow':   '\033[1;33m',
    1.34 +        'Blue':     '\033[1;34m',
    1.35 +        'Magenta':  '\033[1;35m',
    1.36 +        'Cyan':     '\033[1;36m',
    1.37 +        'White':    '\033[1;37m',
    1.38 +        'Crimson':  '\033[1;38m',
    1.39 +        'Highlighted_Red':      '\033[1;41m',
    1.40 +        'Highlighted_Green':    '\033[1;42m',
    1.41 +        'Highlighted_Brown':    '\033[1;43m',
    1.42 +        'Highlighted_Blue':     '\033[1;44m',
    1.43 +        'Highlighted_Magenta':  '\033[1;45m',
    1.44 +        'Highlighted_Cyan':     '\033[1;46m',
    1.45 +        'Highlighted_Gray':     '\033[1;47m',
    1.46 +        'Highlighted_Crimson':  '\033[1;48m',
    1.47 +    }
    1.48 +    normal_color_code = '\033[1;m'
    1.49 +    if not color or color == 'Normal' or not color in color_table:
    1.50 +        print string
    1.51 +    else:
    1.52 +        print "%s%s%s" % (color_table[color], string, normal_color_code)
    1.53 +
    1.54 +def wrandom(dict):
    1.55 +    total = sum(dict.values())
    1.56 +    n = random.uniform(0, total)
    1.57 +
    1.58 +    for key in sorted(dict.keys()):
    1.59 +        item = key
    1.60 +        if n < dict[key]:
    1.61 +            break
    1.62 +        n -= dict[key]
    1.63 +
    1.64 +    return item
    1.65 +
    1.66 +def set_weight(weight, word, new_weight):
    1.67 +    if len(weight) <= 1:
    1.68 +        raise Exception("Can't set weight; weight dictionary is too small; need at least two members")
    1.69 +    sum_before = sum(weight.values())
    1.70 +    w_before = weight[word]
    1.71 +    w_after = new_weight
    1.72 +    delta = (w_after - w_before)*1.0/(len(weight)-1)
    1.73 +    for k in weight.keys():
    1.74 +        if k == word:
    1.75 +            weight[k] = w_after
    1.76 +        else:
    1.77 +            weight[k] -= delta
    1.78 +    sum_after = sum(weight.values())
    1.79 +    if abs(sum_before-sum_after)> 0.0001:
    1.80 +        raise Exception("%s != %s ; function set_weight works incorrectly" % (sum_before, sum_after))
    1.81 +    return weight
    1.82 +
    1.83 +def print_stats(stats, weight, correct_answer):
    1.84 +    print "------------------------"
    1.85 +    print "total questions = %s" % stats['total_questions']
    1.86 +    print "last questions = %s" % stats['last_questions']
    1.87 +    print "total errors = %s (%.2f)" % (stats['total_errors'], 1.0*stats['total_errors']/stats['total_questions'])
    1.88 +    print "last errors = %s (%.2f)" % (stats['last_errors'], 1.0*stats['last_errors']/stats['last_questions'])
    1.89 +    print "top 5 questions:"
    1.90 +    for question in sorted(weight.keys(),key=lambda x: weight[x], reverse=True)[:5]:
    1.91 +        colorprint(
    1.92 +            "    %s %s %5.2f" % (correct_answer[question], question, weight[question]),
    1.93 +            color_for_answer(correct_answer[question])
    1.94 +            )
    1.95 +
    1.96 +    print "------------------------"
    1.97 +
    1.98 +filename = sys.argv[1]
    1.99 +correct_answer = {}
   1.100 +with open(filename) as f:
   1.101 +    for line in f.readlines():
   1.102 +        line = line.rstrip('\n')
   1.103 +        try:
   1.104 +            (q, a) = line.split(' ', 1)
   1.105 +            correct_answer[q] = a
   1.106 +        except:
   1.107 +            pass
   1.108 +
   1.109 +saved_weight = {
   1.110 +    'Auskunft'  : 2,
   1.111 +}
   1.112 +
   1.113 +weight = {}
   1.114 +for word in correct_answer.keys():
   1.115 +    if word in saved_weight:
   1.116 +        weight[word] = saved_weight[word]
   1.117 +    else:
   1.118 +        weight[word] = 1
   1.119 +
   1.120 +stats = {
   1.121 +    'total_errors'      :0,
   1.122 +    'last_errors'       :0,
   1.123 +    'total_questions'   :0,
   1.124 +    'last_questions'    :0,
   1.125 +    }
   1.126 +
   1.127 +while 1:
   1.128 +    question = wrandom(weight)
   1.129 +    colorprint(question, 'Yellow')
   1.130 +    answer = sys.stdin.readline().rstrip('\n')
   1.131 +    if not answer:
   1.132 +        break
   1.133 +    if answer != correct_answer[question]:
   1.134 +        colorprint(
   1.135 +            "%s %s" % (correct_answer[question], question),
   1.136 +            color_for_answer(correct_answer[question])
   1.137 +            )
   1.138 +        weight = set_weight(weight, question, weight[question]*1.5)
   1.139 +        stats['total_errors'] += 1
   1.140 +        stats['last_errors'] += 1
   1.141 +    else:
   1.142 +        weight = set_weight(weight, question, weight[question]*0.8)
   1.143 +
   1.144 +    stats['total_questions'] += 1
   1.145 +    stats['last_questions'] += 1
   1.146 +
   1.147 +    if stats['last_questions'] == 20:
   1.148 +        print_stats(stats, weight, correct_answer)
   1.149 +        stats['last_questions'] = 0
   1.150 +        stats['last_errors'] = 0
   1.151 +
   1.152 +    print
   1.153 +