new-words
view misc/zubrator.py @ 47:d708e2c1bad8
compressed wordlist support
author | Igor Chubin <igor@chub.in> |
---|---|
date | Mon Feb 07 21:21:17 2011 +0200 (2011-02-07) |
parents | |
children | abd4080ee583 |
line source
1 #!/usr/bin/python
3 import random
4 import sys
6 # TODO:
7 # * persistent weight dict
8 # * log
9 # * stats (top5, time_total, time_last, correct_answers_rate_total, correct_answers_rate_last)
11 # DONE:
12 # * correct quit (ctrl d)
14 def color_for_answer(answer):
15 color_table = {
16 'der': 'Blue',
17 'das': 'Green',
18 'die': 'Red',
19 }
20 if not answer in color_table:
21 return 'Normal'
22 else:
23 return color_table[answer]
25 def colorprint(string, color=None):
26 color_table = {
27 'Gray': '\033[1;30m',
28 'Red': '\033[1;31m',
29 'Green': '\033[1;32m',
30 'Yellow': '\033[1;33m',
31 'Blue': '\033[1;34m',
32 'Magenta': '\033[1;35m',
33 'Cyan': '\033[1;36m',
34 'White': '\033[1;37m',
35 'Crimson': '\033[1;38m',
36 'Highlighted_Red': '\033[1;41m',
37 'Highlighted_Green': '\033[1;42m',
38 'Highlighted_Brown': '\033[1;43m',
39 'Highlighted_Blue': '\033[1;44m',
40 'Highlighted_Magenta': '\033[1;45m',
41 'Highlighted_Cyan': '\033[1;46m',
42 'Highlighted_Gray': '\033[1;47m',
43 'Highlighted_Crimson': '\033[1;48m',
44 }
45 normal_color_code = '\033[1;m'
46 if not color or color == 'Normal' or not color in color_table:
47 print string
48 else:
49 print "%s%s%s" % (color_table[color], string, normal_color_code)
51 def wrandom(dict):
52 total = sum(dict.values())
53 n = random.uniform(0, total)
55 for key in sorted(dict.keys()):
56 item = key
57 if n < dict[key]:
58 break
59 n -= dict[key]
61 return item
63 def set_weight(weight, word, new_weight):
64 if len(weight) <= 1:
65 raise Exception("Can't set weight; weight dictionary is too small; need at least two members")
66 sum_before = sum(weight.values())
67 w_before = weight[word]
68 w_after = new_weight
69 delta = (w_after - w_before)*1.0/(len(weight)-1)
70 for k in weight.keys():
71 if k == word:
72 weight[k] = w_after
73 else:
74 weight[k] -= delta
75 sum_after = sum(weight.values())
76 if abs(sum_before-sum_after)> 0.0001:
77 raise Exception("%s != %s ; function set_weight works incorrectly" % (sum_before, sum_after))
78 return weight
80 def print_stats(stats, weight, correct_answer):
81 print "------------------------"
82 print "total questions = %s" % stats['total_questions']
83 print "last questions = %s" % stats['last_questions']
84 print "total errors = %s (%.2f)" % (stats['total_errors'], 1.0*stats['total_errors']/stats['total_questions'])
85 print "last errors = %s (%.2f)" % (stats['last_errors'], 1.0*stats['last_errors']/stats['last_questions'])
86 print "top 5 questions:"
87 for question in sorted(weight.keys(),key=lambda x: weight[x], reverse=True)[:5]:
88 colorprint(
89 " %s %s %5.2f" % (correct_answer[question], question, weight[question]),
90 color_for_answer(correct_answer[question])
91 )
93 print "------------------------"
95 filename = sys.argv[1]
96 correct_answer = {}
97 with open(filename) as f:
98 for line in f.readlines():
99 line = line.rstrip('\n')
100 try:
101 (q, a) = line.split(' ', 1)
102 correct_answer[q] = a
103 except:
104 pass
106 saved_weight = {
107 'Auskunft' : 2,
108 }
110 weight = {}
111 for word in correct_answer.keys():
112 if word in saved_weight:
113 weight[word] = saved_weight[word]
114 else:
115 weight[word] = 1
117 stats = {
118 'total_errors' :0,
119 'last_errors' :0,
120 'total_questions' :0,
121 'last_questions' :0,
122 }
124 while 1:
125 question = wrandom(weight)
126 colorprint(question, 'Yellow')
127 answer = sys.stdin.readline().rstrip('\n')
128 if not answer:
129 break
130 if answer != correct_answer[question]:
131 colorprint(
132 "%s %s" % (correct_answer[question], question),
133 color_for_answer(correct_answer[question])
134 )
135 weight = set_weight(weight, question, weight[question]*1.5)
136 stats['total_errors'] += 1
137 stats['last_errors'] += 1
138 else:
139 weight = set_weight(weight, question, weight[question]*0.8)
141 stats['total_questions'] += 1
142 stats['last_questions'] += 1
144 if stats['last_questions'] == 20:
145 print_stats(stats, weight, correct_answer)
146 stats['last_questions'] = 0
147 stats['last_errors'] = 0
149 print