new-words
changeset 46:bf0aa8e3c1ce
misc scripts added: categorized.pl; mw-shell; zubrator.py
author | Igor Chubin <igor@chub.in> |
---|---|
date | Fri Feb 04 06:39:25 2011 +0100 (2011-02-04) |
parents | 5f90e44eecfc |
children | d708e2c1bad8 |
files | misc/categorized.pl misc/nw-shell misc/zubrator.py |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/misc/categorized.pl Fri Feb 04 06:39:25 2011 +0100 1.3 @@ -0,0 +1,55 @@ 1.4 +my @n_m=(); 1.5 +my @n_n=(); 1.6 +my @n_f=(); 1.7 +my @adv=(); 1.8 +my @modadv=(); 1.9 +my @adj=(); 1.10 +my @sub=(); 1.11 +my @verb=(); 1.12 +my @cj=(); 1.13 +my @pron=(); 1.14 +my @prp=(); 1.15 +my @rest=(); 1.16 + 1.17 +while(<>) { 1.18 + if (/adj/) { push @adj, $_; } 1.19 + elsif (/sub/) { push @sub, $_; } 1.20 + elsif (/[| ]v[it][: ]/) { push @verb, $_; } 1.21 + elsif (/\(sich\)/) { push @sich, $_; } 1.22 + elsif (/[| ]m[- :]/) { push @n_m, $_; } 1.23 + elsif (/[| ]n[- :]/) { push @n_n, $_; } 1.24 + elsif (/[| ]f[- :]/) { push @n_f, $_; } 1.25 + elsif (/mod adv/) { push @modadv, $_; } 1.26 + elsif (/adv/) { push @adv, $_; } 1.27 + elsif (/prp[: ]/) { push @prp, $_; } 1.28 + elsif (/cj[: ]/) { push @cj, $_; } 1.29 + elsif (/pron[: ]/) { push @pron, $_; } 1.30 + else { push @rest, $_; } 1.31 +} 1.32 + 1.33 +print "== Существительные ==\n"; 1.34 +print "=== Мужской род ===\n"; 1.35 +print sort(@n_m); 1.36 +print "\n=== Средний род ===\n"; 1.37 +print sort(@n_n); 1.38 +print "\n=== Женский род ===\n"; 1.39 +print sort(@n_f); 1.40 +print "\n\n\n== Глаголы ===\n"; 1.41 +print sort(@verb); 1.42 +print "\n\n\n== Прилагательные ==\n"; 1.43 +print sort(@adj); 1.44 +print "\n\n=== Субстантивированные прилагательные и причастия ===\n"; 1.45 +print sort(@sub); 1.46 +print "\n\n\n== Местоимения ==\n"; 1.47 +print sort(@pron); 1.48 +print "\n\n\n== Наречия ==\n"; 1.49 +print sort(@adv); 1.50 +print "\n\n=== Модальные наречия ===\n"; 1.51 +print sort(@modadv); 1.52 +print "\n\n\n== Предлоги ==\n"; 1.53 +print sort(@prp); 1.54 +print "\n\n\n== Союзы ==\n"; 1.55 +print sort(@cj); 1.56 +print "\n\n\n== Прочее ==\n"; 1.57 +print sort(@rest); 1.58 +
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/misc/nw-shell Fri Feb 04 06:39:25 2011 +0100 2.3 @@ -0,0 +1,11 @@ 2.4 +TEMP=`mktemp /tmp/nw-shell-XXXXXX` 2.5 +~/hg/new-words/new-words-py.sh -n -l de "$1" | grep -v '^#' > "$TEMP" 2.6 +rlwrap -f "$TEMP" sh -c ' 2.7 + while true; 2.8 + do 2.9 + printf "\033[01;33m > " ; 2.10 + read word options || { exit; printf "\033[00m" ; } ; 2.11 + printf "\033[00m" ; 2.12 + de $word; 2.13 + done' 2.14 +rm $TEMP
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/misc/zubrator.py Fri Feb 04 06:39:25 2011 +0100 3.3 @@ -0,0 +1,150 @@ 3.4 +#!/usr/bin/python 3.5 + 3.6 +import random 3.7 +import sys 3.8 + 3.9 +# TODO: 3.10 +# * persistent weight dict 3.11 +# * log 3.12 +# * stats (top5, time_total, time_last, correct_answers_rate_total, correct_answers_rate_last) 3.13 + 3.14 +# DONE: 3.15 +# * correct quit (ctrl d) 3.16 + 3.17 +def color_for_answer(answer): 3.18 + color_table = { 3.19 + 'der': 'Blue', 3.20 + 'das': 'Green', 3.21 + 'die': 'Red', 3.22 + } 3.23 + if not answer in color_table: 3.24 + return 'Normal' 3.25 + else: 3.26 + return color_table[answer] 3.27 + 3.28 +def colorprint(string, color=None): 3.29 + color_table = { 3.30 + 'Gray': '\033[1;30m', 3.31 + 'Red': '\033[1;31m', 3.32 + 'Green': '\033[1;32m', 3.33 + 'Yellow': '\033[1;33m', 3.34 + 'Blue': '\033[1;34m', 3.35 + 'Magenta': '\033[1;35m', 3.36 + 'Cyan': '\033[1;36m', 3.37 + 'White': '\033[1;37m', 3.38 + 'Crimson': '\033[1;38m', 3.39 + 'Highlighted_Red': '\033[1;41m', 3.40 + 'Highlighted_Green': '\033[1;42m', 3.41 + 'Highlighted_Brown': '\033[1;43m', 3.42 + 'Highlighted_Blue': '\033[1;44m', 3.43 + 'Highlighted_Magenta': '\033[1;45m', 3.44 + 'Highlighted_Cyan': '\033[1;46m', 3.45 + 'Highlighted_Gray': '\033[1;47m', 3.46 + 'Highlighted_Crimson': '\033[1;48m', 3.47 + } 3.48 + normal_color_code = '\033[1;m' 3.49 + if not color or color == 'Normal' or not color in color_table: 3.50 + print string 3.51 + else: 3.52 + print "%s%s%s" % (color_table[color], string, normal_color_code) 3.53 + 3.54 +def wrandom(dict): 3.55 + total = sum(dict.values()) 3.56 + n = random.uniform(0, total) 3.57 + 3.58 + for key in sorted(dict.keys()): 3.59 + item = key 3.60 + if n < dict[key]: 3.61 + break 3.62 + n -= dict[key] 3.63 + 3.64 + return item 3.65 + 3.66 +def set_weight(weight, word, new_weight): 3.67 + if len(weight) <= 1: 3.68 + raise Exception("Can't set weight; weight dictionary is too small; need at least two members") 3.69 + sum_before = sum(weight.values()) 3.70 + w_before = weight[word] 3.71 + w_after = new_weight 3.72 + delta = (w_after - w_before)*1.0/(len(weight)-1) 3.73 + for k in weight.keys(): 3.74 + if k == word: 3.75 + weight[k] = w_after 3.76 + else: 3.77 + weight[k] -= delta 3.78 + sum_after = sum(weight.values()) 3.79 + if abs(sum_before-sum_after)> 0.0001: 3.80 + raise Exception("%s != %s ; function set_weight works incorrectly" % (sum_before, sum_after)) 3.81 + return weight 3.82 + 3.83 +def print_stats(stats, weight, correct_answer): 3.84 + print "------------------------" 3.85 + print "total questions = %s" % stats['total_questions'] 3.86 + print "last questions = %s" % stats['last_questions'] 3.87 + print "total errors = %s (%.2f)" % (stats['total_errors'], 1.0*stats['total_errors']/stats['total_questions']) 3.88 + print "last errors = %s (%.2f)" % (stats['last_errors'], 1.0*stats['last_errors']/stats['last_questions']) 3.89 + print "top 5 questions:" 3.90 + for question in sorted(weight.keys(),key=lambda x: weight[x], reverse=True)[:5]: 3.91 + colorprint( 3.92 + " %s %s %5.2f" % (correct_answer[question], question, weight[question]), 3.93 + color_for_answer(correct_answer[question]) 3.94 + ) 3.95 + 3.96 + print "------------------------" 3.97 + 3.98 +filename = sys.argv[1] 3.99 +correct_answer = {} 3.100 +with open(filename) as f: 3.101 + for line in f.readlines(): 3.102 + line = line.rstrip('\n') 3.103 + try: 3.104 + (q, a) = line.split(' ', 1) 3.105 + correct_answer[q] = a 3.106 + except: 3.107 + pass 3.108 + 3.109 +saved_weight = { 3.110 + 'Auskunft' : 2, 3.111 +} 3.112 + 3.113 +weight = {} 3.114 +for word in correct_answer.keys(): 3.115 + if word in saved_weight: 3.116 + weight[word] = saved_weight[word] 3.117 + else: 3.118 + weight[word] = 1 3.119 + 3.120 +stats = { 3.121 + 'total_errors' :0, 3.122 + 'last_errors' :0, 3.123 + 'total_questions' :0, 3.124 + 'last_questions' :0, 3.125 + } 3.126 + 3.127 +while 1: 3.128 + question = wrandom(weight) 3.129 + colorprint(question, 'Yellow') 3.130 + answer = sys.stdin.readline().rstrip('\n') 3.131 + if not answer: 3.132 + break 3.133 + if answer != correct_answer[question]: 3.134 + colorprint( 3.135 + "%s %s" % (correct_answer[question], question), 3.136 + color_for_answer(correct_answer[question]) 3.137 + ) 3.138 + weight = set_weight(weight, question, weight[question]*1.5) 3.139 + stats['total_errors'] += 1 3.140 + stats['last_errors'] += 1 3.141 + else: 3.142 + weight = set_weight(weight, question, weight[question]*0.8) 3.143 + 3.144 + stats['total_questions'] += 1 3.145 + stats['last_questions'] += 1 3.146 + 3.147 + if stats['last_questions'] == 20: 3.148 + print_stats(stats, weight, correct_answer) 3.149 + stats['last_questions'] = 0 3.150 + stats['last_errors'] = 0 3.151 + 3.152 + print 3.153 +