new-words

annotate misc/zubrator.py @ 47:d708e2c1bad8

compressed wordlist support
author Igor Chubin <igor@chub.in>
date Mon Feb 07 21:21:17 2011 +0200 (2011-02-07)
parents
children abd4080ee583
rev   line source
igor@46 1 #!/usr/bin/python
igor@46 2
igor@46 3 import random
igor@46 4 import sys
igor@46 5
igor@46 6 # TODO:
igor@46 7 # * persistent weight dict
igor@46 8 # * log
igor@46 9 # * stats (top5, time_total, time_last, correct_answers_rate_total, correct_answers_rate_last)
igor@46 10
igor@46 11 # DONE:
igor@46 12 # * correct quit (ctrl d)
igor@46 13
igor@46 14 def color_for_answer(answer):
igor@46 15 color_table = {
igor@46 16 'der': 'Blue',
igor@46 17 'das': 'Green',
igor@46 18 'die': 'Red',
igor@46 19 }
igor@46 20 if not answer in color_table:
igor@46 21 return 'Normal'
igor@46 22 else:
igor@46 23 return color_table[answer]
igor@46 24
igor@46 25 def colorprint(string, color=None):
igor@46 26 color_table = {
igor@46 27 'Gray': '\033[1;30m',
igor@46 28 'Red': '\033[1;31m',
igor@46 29 'Green': '\033[1;32m',
igor@46 30 'Yellow': '\033[1;33m',
igor@46 31 'Blue': '\033[1;34m',
igor@46 32 'Magenta': '\033[1;35m',
igor@46 33 'Cyan': '\033[1;36m',
igor@46 34 'White': '\033[1;37m',
igor@46 35 'Crimson': '\033[1;38m',
igor@46 36 'Highlighted_Red': '\033[1;41m',
igor@46 37 'Highlighted_Green': '\033[1;42m',
igor@46 38 'Highlighted_Brown': '\033[1;43m',
igor@46 39 'Highlighted_Blue': '\033[1;44m',
igor@46 40 'Highlighted_Magenta': '\033[1;45m',
igor@46 41 'Highlighted_Cyan': '\033[1;46m',
igor@46 42 'Highlighted_Gray': '\033[1;47m',
igor@46 43 'Highlighted_Crimson': '\033[1;48m',
igor@46 44 }
igor@46 45 normal_color_code = '\033[1;m'
igor@46 46 if not color or color == 'Normal' or not color in color_table:
igor@46 47 print string
igor@46 48 else:
igor@46 49 print "%s%s%s" % (color_table[color], string, normal_color_code)
igor@46 50
igor@46 51 def wrandom(dict):
igor@46 52 total = sum(dict.values())
igor@46 53 n = random.uniform(0, total)
igor@46 54
igor@46 55 for key in sorted(dict.keys()):
igor@46 56 item = key
igor@46 57 if n < dict[key]:
igor@46 58 break
igor@46 59 n -= dict[key]
igor@46 60
igor@46 61 return item
igor@46 62
igor@46 63 def set_weight(weight, word, new_weight):
igor@46 64 if len(weight) <= 1:
igor@46 65 raise Exception("Can't set weight; weight dictionary is too small; need at least two members")
igor@46 66 sum_before = sum(weight.values())
igor@46 67 w_before = weight[word]
igor@46 68 w_after = new_weight
igor@46 69 delta = (w_after - w_before)*1.0/(len(weight)-1)
igor@46 70 for k in weight.keys():
igor@46 71 if k == word:
igor@46 72 weight[k] = w_after
igor@46 73 else:
igor@46 74 weight[k] -= delta
igor@46 75 sum_after = sum(weight.values())
igor@46 76 if abs(sum_before-sum_after)> 0.0001:
igor@46 77 raise Exception("%s != %s ; function set_weight works incorrectly" % (sum_before, sum_after))
igor@46 78 return weight
igor@46 79
igor@46 80 def print_stats(stats, weight, correct_answer):
igor@46 81 print "------------------------"
igor@46 82 print "total questions = %s" % stats['total_questions']
igor@46 83 print "last questions = %s" % stats['last_questions']
igor@46 84 print "total errors = %s (%.2f)" % (stats['total_errors'], 1.0*stats['total_errors']/stats['total_questions'])
igor@46 85 print "last errors = %s (%.2f)" % (stats['last_errors'], 1.0*stats['last_errors']/stats['last_questions'])
igor@46 86 print "top 5 questions:"
igor@46 87 for question in sorted(weight.keys(),key=lambda x: weight[x], reverse=True)[:5]:
igor@46 88 colorprint(
igor@46 89 " %s %s %5.2f" % (correct_answer[question], question, weight[question]),
igor@46 90 color_for_answer(correct_answer[question])
igor@46 91 )
igor@46 92
igor@46 93 print "------------------------"
igor@46 94
igor@46 95 filename = sys.argv[1]
igor@46 96 correct_answer = {}
igor@46 97 with open(filename) as f:
igor@46 98 for line in f.readlines():
igor@46 99 line = line.rstrip('\n')
igor@46 100 try:
igor@46 101 (q, a) = line.split(' ', 1)
igor@46 102 correct_answer[q] = a
igor@46 103 except:
igor@46 104 pass
igor@46 105
igor@46 106 saved_weight = {
igor@46 107 'Auskunft' : 2,
igor@46 108 }
igor@46 109
igor@46 110 weight = {}
igor@46 111 for word in correct_answer.keys():
igor@46 112 if word in saved_weight:
igor@46 113 weight[word] = saved_weight[word]
igor@46 114 else:
igor@46 115 weight[word] = 1
igor@46 116
igor@46 117 stats = {
igor@46 118 'total_errors' :0,
igor@46 119 'last_errors' :0,
igor@46 120 'total_questions' :0,
igor@46 121 'last_questions' :0,
igor@46 122 }
igor@46 123
igor@46 124 while 1:
igor@46 125 question = wrandom(weight)
igor@46 126 colorprint(question, 'Yellow')
igor@46 127 answer = sys.stdin.readline().rstrip('\n')
igor@46 128 if not answer:
igor@46 129 break
igor@46 130 if answer != correct_answer[question]:
igor@46 131 colorprint(
igor@46 132 "%s %s" % (correct_answer[question], question),
igor@46 133 color_for_answer(correct_answer[question])
igor@46 134 )
igor@46 135 weight = set_weight(weight, question, weight[question]*1.5)
igor@46 136 stats['total_errors'] += 1
igor@46 137 stats['last_errors'] += 1
igor@46 138 else:
igor@46 139 weight = set_weight(weight, question, weight[question]*0.8)
igor@46 140
igor@46 141 stats['total_questions'] += 1
igor@46 142 stats['last_questions'] += 1
igor@46 143
igor@46 144 if stats['last_questions'] == 20:
igor@46 145 print_stats(stats, weight, correct_answer)
igor@46 146 stats['last_questions'] = 0
igor@46 147 stats['last_errors'] = 0
igor@46 148
igor@46 149 print
igor@46 150