new-words

annotate new-words.py @ 49:00286f6bfa85

experimental: when -c specified, use dictionary for compression
author Igor Chubin <igor@chub.in>
date Wed Feb 09 21:08:23 2011 +0200 (2011-02-09)
parents 7194bdb56475
children 4e931db74618
rev   line source
igor@37 1 #!/usr/bin/env python
igor@38 2 # -*- coding: utf-8 -*-
igor@37 3
igor@40 4 from __future__ import with_statement
igor@38 5 import codecs
igor@49 6 import difflib
igor@38 7 import logging
igor@38 8 import os
igor@37 9 import optparse
igor@38 10 import re
igor@38 11 import subprocess
igor@38 12 import sys
igor@38 13 import Stemmer
igor@42 14 try:
igor@42 15 import psyco
igor@42 16 psyco.full()
igor@42 17 except:
igor@42 18 pass
igor@38 19
igor@38 20 config = {
igor@38 21 'config_directory': os.environ['HOME'] + '/.new-words',
igor@38 22 'language': 'en',
igor@38 23 }
igor@38 24
igor@38 25 logging.basicConfig(filename='/tmp/new-words-py.log', level=logging.DEBUG)
igor@38 26
igor@38 27 class Normalizator:
igor@38 28 def __init__(self, language, linked_words={}):
igor@38 29 stemmer_algorithm = {
igor@38 30 'de' : 'german',
igor@38 31 'en' : 'english',
igor@38 32 'ru' : 'russian',
igor@38 33 'uk' : 'ukrainian',
igor@38 34 }
igor@38 35 self.stemmer = Stemmer.Stemmer(stemmer_algorithm[language])
igor@38 36 self.linked_words = linked_words
igor@38 37
igor@38 38 def normalize(self, word):
igor@38 39 word_chain = []
igor@38 40 while word in self.linked_words and not word in word_chain:
igor@38 41 word_chain.append(word)
igor@38 42 word = self.linked_words[word]
igor@38 43 return self.stemmer.stemWord(word.lower())
igor@37 44
igor@47 45 def best_word_from_group(self, wordpairs_group):
igor@47 46 """Returns the word that is the most relevant to the wordpairs_group.
igor@47 47
igor@47 48 At the moment: returns the word with minimal length"""
igor@49 49
igor@49 50 def f(x, y):
igor@49 51 return difflib.SequenceMatcher(
igor@49 52 None,
igor@49 53 #(x[-2:] == 'en' and x[:-2].lower() or x.lower()),
igor@49 54 x.lower(),
igor@49 55 y.lower()).ratio()
igor@47 56
igor@47 57 minimal_length = min(len(pair[1]) for pair in wordpairs_group)
igor@49 58 best_match = list(x[1] for x in sorted(
igor@47 59 (x for x in wordpairs_group if len(x[1]) == minimal_length),
igor@47 60 key=lambda x:x[0],
igor@47 61 reverse=True))[0]
igor@47 62
igor@49 63 suggestions = self.dictionary_suggestions(best_match)
igor@49 64 if len(suggestions) == 1:
igor@49 65 return best_match
igor@49 66
igor@49 67 #return best_match
igor@49 68
igor@49 69 verb = False
igor@49 70 corrected_best_match = best_match
igor@49 71 if best_match[-2:] == 'et':
igor@49 72 word = best_match[:-1]+"n"
igor@49 73 sugg = self.dictionary_suggestions(word)
igor@49 74 if len(sugg) == 1:
igor@49 75 return word
igor@49 76 suggestions += sugg
igor@49 77 corrected_best_match = word
igor@49 78 corrected_best_match = best_match[:-2]
igor@49 79 verb = True
igor@49 80
igor@49 81 if best_match[-1] == 't':
igor@49 82 word = best_match[:-1]+"en"
igor@49 83 sugg = self.dictionary_suggestions(word)
igor@49 84 if len(sugg) == 1:
igor@49 85 return word
igor@49 86 suggestions += sugg
igor@49 87 corrected_best_match = best_match[:-1]
igor@49 88 verb = True
igor@49 89
igor@49 90 if corrected_best_match[0].lower() == corrected_best_match[0]:
igor@49 91 suggestions = [ x for x in suggestions
igor@49 92 if x[0].lower() == x[0] ]
igor@49 93
igor@49 94 if suggestions == []:
igor@49 95 return best_match+"_"
igor@49 96 return best_match+" "+(" ".join(
igor@49 97 sorted(
igor@49 98 suggestions,
igor@49 99 key = lambda x: f(x, corrected_best_match),
igor@49 100 reverse = True
igor@49 101 )
igor@49 102 )
igor@49 103 )
igor@49 104
igor@49 105 def dictionary_suggestions(self, word):
igor@49 106 return [
igor@49 107 x.decode('utf-8').rstrip('\n')
igor@49 108 for x
igor@49 109 in subprocess.Popen(
igor@49 110 ["de-variants", word],
igor@49 111 stdout=subprocess.PIPE
igor@49 112 ).stdout.readlines() ]
igor@49 113
igor@49 114
igor@37 115 parser = optparse.OptionParser()
igor@37 116
igor@37 117 parser.add_option(
igor@37 118 "-a", "--no-marks",
igor@37 119 help="don't add marks (and don't save marks added by user)",
igor@37 120 action="store_true",
igor@37 121 dest="no_marks")
igor@37 122
igor@37 123 parser.add_option(
igor@37 124 "-c", "--compressed",
igor@37 125 help="show compressed wordlist: one word per group",
igor@37 126 action="store_true",
igor@37 127 dest="compressed")
igor@37 128
igor@37 129 parser.add_option(
igor@37 130 "-k", "--known-words",
igor@37 131 help="put higher words that are similar to the known words (only for English)",
igor@37 132 action="store_true",
igor@37 133 dest="compressed")
igor@37 134
igor@37 135 parser.add_option(
igor@37 136 "-l", "--language",
igor@37 137 help="specify language of text",
igor@37 138 action="store",
igor@37 139 dest="language")
igor@37 140
igor@37 141 parser.add_option(
igor@38 142 "-f", "--function",
igor@38 143 help="filter through subsystem [INTERNAL]",
igor@38 144 action="store",
igor@38 145 dest="function")
igor@38 146
igor@38 147 parser.add_option(
igor@37 148 "-m", "--merge-tag",
igor@37 149 help="merge words tagged with specified tag into the main vocabulary",
igor@37 150 action="store",
igor@37 151 dest="merge_tag")
igor@37 152
igor@37 153 parser.add_option(
igor@37 154 "-M", "--merge-tagged",
igor@37 155 help="merge words tagged with ANY tag into the main vocabulary",
igor@37 156 action="store_true",
igor@37 157 dest="merge_tagged")
igor@37 158
igor@37 159 parser.add_option(
igor@37 160 "-n", "--non-interactive",
igor@37 161 help="non-interactive mode (don't run vi)",
igor@37 162 action="store_true",
igor@37 163 dest="non_interactive")
igor@37 164
igor@37 165 parser.add_option(
igor@37 166 "-N", "--no-filter",
igor@37 167 help="switch off known words filtering",
igor@37 168 action="store_true",
igor@37 169 dest="no_filter")
igor@37 170
igor@37 171 parser.add_option(
igor@37 172 "-p", "--pages",
igor@37 173 help="work with specified pages only (pages = start-stop/total )",
igor@37 174 action="store",
igor@37 175 dest="pages")
igor@37 176
igor@37 177 parser.add_option(
igor@48 178 "-d", "--delete-tag",
igor@48 179 help="delete subvocabulary of specified tag",
igor@37 180 action="store",
igor@48 181 dest="delete_tag")
igor@37 182
igor@37 183 parser.add_option(
igor@37 184 "-s", "--text-stats",
igor@37 185 help="show the text statistics (percentage of known words and so on) and exit",
igor@37 186 action="store_true",
igor@37 187 dest="text_stats")
igor@37 188
igor@37 189 parser.add_option(
igor@37 190 "-S", "--voc-stats",
igor@37 191 help="show your vocabulary statistics (number of words and word groups)",
igor@37 192 action="store_true",
igor@37 193 dest="voc_stats")
igor@37 194
igor@37 195 parser.add_option(
igor@37 196 "-t", "--tag",
igor@37 197 help="tag known words with tag",
igor@37 198 action="store",
igor@37 199 dest="tag")
igor@37 200
igor@37 201 parser.add_option(
igor@37 202 "-T", "--show-tags",
igor@37 203 help="tag known words with tag",
igor@37 204 action="store_true",
igor@37 205 dest="show_tags")
igor@37 206
igor@37 207 parser.add_option(
igor@37 208 "-2", "--two-words",
igor@37 209 help="find 2 words' sequences",
igor@37 210 action="store_true",
igor@37 211 dest="two_words")
igor@37 212
igor@37 213 parser.add_option(
igor@37 214 "-3", "--three-words",
igor@37 215 help="find 3 words' sequences",
igor@37 216 action="store_true",
igor@37 217 dest="three_words")
igor@37 218
igor@38 219 def readlines_from_file(filename):
igor@38 220 res = []
igor@38 221 with codecs.open(filename, "r", "utf-8") as f:
igor@38 222 for line in f.readlines():
igor@38 223 res += [line]
igor@38 224 return res
igor@38 225
igor@38 226 def readlines_from_stdin():
igor@38 227 return codecs.getreader("utf-8")(sys.stdin).readlines()
igor@38 228
igor@38 229 def words_from_line(line):
igor@38 230 line = line.rstrip('\n')
igor@38 231 #return re.split('(?:\s|[*\r,.:#@()+=<>$;"?!|\[\]^%&~{}«»–])+', line)
igor@38 232 #return re.split('[^a-zA-ZäöëüßÄËÖÜß]+', line)
igor@44 233 return re.compile("(?!['_])(?:\W)+", flags=re.UNICODE).split(line)
igor@38 234
igor@44 235 def get_words(lines, group_by=[1]):
igor@38 236 """
igor@38 237 Returns hash of words in a file
igor@38 238 word => number
igor@38 239 """
igor@38 240 result = {}
igor@44 241 (a, b, c) = ("", "", "")
igor@38 242 for line in lines:
igor@38 243 words = words_from_line(line)
igor@38 244 for word in words:
igor@41 245 if re.match('[0-9]*$', word):
igor@41 246 continue
igor@38 247 result.setdefault(word, 0)
igor@38 248 result[word] += 1
igor@44 249 if 2 in group_by and a != "" and b != "":
igor@44 250 w = "%s_%s" % (a,b)
igor@44 251 result.setdefault(w, 0)
igor@44 252 result[w] += 1
igor@44 253 if 3 in group_by and not "" in [a,b,c]:
igor@44 254 w = "%s_%s_%s" % (a,b,c)
igor@44 255 result.setdefault(w, 0)
igor@44 256 result[w] += 1
igor@44 257 (a,b,c) = (b, c, word)
igor@44 258
igor@44 259 logging.debug(result)
igor@38 260 return result
igor@38 261
igor@38 262 def load_vocabulary():
igor@38 263 return get_words(readlines_from_file("%s/%s.txt"%(config['config_directory'], config['language'])))
igor@38 264
igor@38 265 def notes_filenames():
igor@38 266 return ["%s/notes-%s.txt"%(config['config_directory'], config['language'])]
igor@38 267
igor@38 268 def load_notes(files):
igor@38 269 notes = {}
igor@38 270 for filename in files:
igor@39 271 with codecs.open(filename, "r", "utf-8") as f:
igor@38 272 for line in f.readlines():
igor@38 273 (word, note) = re.split('\s+', line.rstrip('\n'), maxsplit=1)
igor@38 274 notes.setdefault(word, {})
igor@38 275 notes[word][filename] = note
igor@38 276 return notes
igor@38 277
igor@39 278 def add_notes(lines, notes):
igor@39 279 notes_filename = notes_filenames()[0]
igor@39 280 result = []
igor@39 281 for line in lines:
igor@39 282 if line.startswith('#'):
igor@39 283 result += [line]
igor@39 284 else:
igor@39 285 match_object = re.search('^\s*\S+\s*(\S+)', line)
igor@39 286 if match_object:
igor@39 287 word = match_object.group(1)
igor@39 288 if word in notes:
igor@39 289 if notes_filename in notes[word]:
igor@39 290 line = line.rstrip('\n')
igor@39 291 line = "%-30s %s\n" % (line, notes[word][notes_filename])
igor@39 292 result += [line]
igor@39 293 else:
igor@39 294 result += [line]
igor@39 295 else:
igor@39 296 result += [line]
igor@39 297 return result
igor@39 298
igor@39 299 def remove_notes(lines, notes_group):
igor@39 300 notes_filename = notes_filenames()[0]
igor@39 301 notes = {}
igor@39 302 for k in notes_group.keys():
igor@39 303 if notes_filename in notes_group[k]:
igor@39 304 notes[k] = notes_group[k][notes_filename]
igor@39 305
igor@39 306 result = []
igor@39 307 for line in lines:
igor@39 308 line = line.rstrip('\n')
igor@39 309 match_object = re.match('(\s+)(\S+)(\s+)(\S+)(\s+)(.*)', line)
igor@39 310 if match_object:
igor@39 311 result.append("".join([
igor@39 312 match_object.group(1),
igor@39 313 match_object.group(2),
igor@39 314 match_object.group(3),
igor@39 315 match_object.group(4),
igor@39 316 "\n"
igor@39 317 ]))
igor@39 318 notes[match_object.group(4)] = match_object.group(6)
igor@39 319 else:
igor@39 320 result.append(line+"\n")
igor@39 321
igor@39 322 save_notes(notes_filename, notes)
igor@39 323 return result
igor@39 324
igor@39 325 def save_notes(filename, notes):
igor@39 326 lines = []
igor@39 327 saved_words = []
igor@39 328 with codecs.open(filename, "r", "utf-8") as f:
igor@39 329 for line in f.readlines():
igor@39 330 (word, note) = re.split('\s+', line.rstrip('\n'), maxsplit=1)
igor@39 331 if word in notes:
igor@39 332 line = "%-29s %s\n" % (word, notes[word])
igor@39 333 saved_words.append(word)
igor@39 334 lines.append(line)
igor@39 335 for word in [x for x in notes.keys() if not x in saved_words]:
igor@39 336 line = "%-29s %s\n" % (word, notes[word])
igor@39 337 lines.append(line)
igor@39 338
igor@39 339 with codecs.open(filename, "w", "utf-8") as f:
igor@39 340 for line in lines:
igor@39 341 f.write(line)
igor@39 342
igor@39 343
igor@38 344 def substract_dictionary(dict1, dict2):
igor@38 345 """
igor@38 346 returns dict1 - dict2
igor@38 347 """
igor@38 348 result = {}
igor@38 349 for (k,v) in dict1.items():
igor@38 350 if not k in dict2:
igor@38 351 result[k] = v
igor@38 352 return result
igor@38 353
igor@38 354 def dump_words(words, filename):
igor@38 355 with codecs.open(filename, "w+", "utf-8") as f:
igor@38 356 for word in words.keys():
igor@38 357 f.write(("%s\n"%word)*words[word])
igor@38 358
igor@38 359 def error_message(text):
igor@38 360 print text
igor@38 361
igor@40 362 def find_wordgroups_weights(word_pairs, normalizator):
igor@38 363 weight = {}
igor@40 364 for (num, word) in word_pairs:
igor@38 365 normalized = normalizator.normalize(word)
igor@38 366 weight.setdefault(normalized, 0)
igor@40 367 weight[normalized] += num
igor@38 368 return weight
igor@38 369
igor@38 370 def find_linked_words(notes):
igor@38 371 linked_words = {}
igor@38 372 for word in notes.keys():
igor@38 373 for note in notes[word].values():
igor@38 374 if "@" in note:
igor@38 375 result = re.search(r'\@(\S*)', note)
igor@38 376 if result:
igor@38 377 main_word = result.group(1)
igor@38 378 if main_word:
igor@38 379 linked_words[word] = main_word
igor@38 380 return linked_words
igor@38 381
igor@40 382 def compare_word_pairs(pair1, pair2, wgw, normalizator, linked_words):
igor@40 383 (num1, word1) = pair1
igor@40 384 (num2, word2) = pair2
igor@38 385
igor@38 386 normalized_word1 = normalizator.normalize(word1)
igor@38 387 normalized_word2 = normalizator.normalize(word2)
igor@38 388
igor@38 389 cmp_res = cmp(wgw[normalized_word1], wgw[normalized_word2])
igor@38 390 if cmp_res != 0:
igor@38 391 return cmp_res
igor@38 392 else:
igor@38 393 cmp_res = cmp(normalized_word1, normalized_word2)
igor@38 394 if cmp_res != 0:
igor@38 395 return cmp_res
igor@38 396 else:
igor@38 397 return cmp(int(num1), int(num2))
igor@38 398
igor@47 399
igor@48 400 def print_words_sorted(
igor@48 401 word_pairs,
igor@48 402 stats,
igor@48 403 normalizator,
igor@48 404 print_stats=True,
igor@48 405 stats_only=False,
igor@48 406 compressed_wordlist=False,
igor@48 407 show_range=0,
igor@48 408 show_range_percentage=0,
igor@48 409 ):
igor@40 410 if stats_only:
igor@43 411 codecs.getwriter("utf-8")(sys.stdout).write(
igor@43 412 " ".join([
igor@43 413 "%-10s" % x for x in [
igor@43 414 "LANG",
igor@43 415 "KNOWN%",
igor@43 416 "UNKNOWN%",
igor@43 417 "KNOWN",
igor@43 418 "TOTAL",
igor@43 419 "WPS",
igor@43 420 "UWPS*10"
igor@43 421 ]]) + "\n")
igor@43 422 codecs.getwriter("utf-8")(sys.stdout).write(
igor@43 423 " ".join([
igor@43 424 "%(language)-10s",
igor@43 425 "%(percentage)-10.2f",
igor@43 426 "%(percentage_unknown)-10.2f",
igor@43 427 "%(total_known)-11d"
igor@43 428 "%(total)-11d"
igor@43 429 "%(wps)-11d"
igor@43 430 "%(uwps)-11d"
igor@43 431 ]) % stats + "\n")
igor@40 432 return
igor@38 433
igor@40 434 if print_stats:
igor@40 435 codecs.getwriter("utf-8")(sys.stdout).write(
igor@43 436 "# %(language)s, %(percentage)-7.2f, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats)
igor@38 437
igor@40 438 level_lines = range(int(float(stats['percentage']))/5*5+5,95,5)+range(90,102)
igor@40 439 known = int(stats['total_known'])
igor@40 440 total = int(stats['total'])
igor@40 441 current_level = 0
igor@47 442 old_normalized_word = None
igor@47 443 words_of_this_group = []
igor@48 444 printed_words = 0
igor@40 445 for word_pair in word_pairs:
igor@47 446
igor@47 447 normalized_word = normalizator.normalize(word_pair[1])
igor@47 448 if old_normalized_word and old_normalized_word != normalized_word:
igor@47 449 #codecs.getwriter("utf-8")(sys.stdout).write(
igor@47 450 # "### %s\n" % normalizator.best_word_from_group(words_of_this_group))
igor@47 451 if compressed_wordlist:
igor@49 452 compressed_word_pair = (
igor@49 453 sum(x[0] for x in words_of_this_group),
igor@49 454 normalizator.best_word_from_group(words_of_this_group)
igor@49 455 )
igor@47 456 codecs.getwriter("utf-8")(sys.stdout).write("%10s %s\n" % compressed_word_pair)
igor@48 457 printed_words += 1
igor@47 458 words_of_this_group = []
igor@47 459
igor@47 460 old_normalized_word = normalized_word
igor@47 461 words_of_this_group.append(word_pair)
igor@47 462
igor@47 463 if not compressed_wordlist:
igor@47 464 codecs.getwriter("utf-8")(sys.stdout).write("%10s %s\n" % word_pair)
igor@48 465 printed_words += 1
igor@47 466
igor@47 467
igor@40 468 known += word_pair[0]
igor@40 469 if 100.0*known/total >= level_lines[0]:
igor@40 470 current_level = level_lines[0]
igor@40 471 while 100.0*known/total > level_lines[0]:
igor@40 472 current_level = level_lines[0]
igor@40 473 level_lines = level_lines[1:]
igor@40 474 codecs.getwriter("utf-8")(sys.stdout).write("# %s\n" % current_level)
igor@38 475
igor@48 476 if show_range >0 and printed_words >= show_range:
igor@48 477 break
igor@48 478 if show_range_percentage >0 and 100.0*known/total >= show_range_percentage:
igor@48 479 break
igor@48 480
igor@39 481 def filter_add_notes(args):
igor@39 482 lines = readlines_from_file(args[0])
igor@39 483 notes = load_notes(notes_filenames())
igor@39 484 lines = add_notes(lines, notes)
igor@39 485 with codecs.open(args[0], "w", "utf-8") as f:
igor@39 486 for line in lines:
igor@39 487 f.write(line)
igor@39 488
igor@39 489 def filter_remove_notes(args):
igor@39 490 lines = readlines_from_file(args[0])
igor@39 491 notes = load_notes(notes_filenames())
igor@39 492 lines = remove_notes(lines, notes)
igor@39 493 with codecs.open(args[0], "w", "utf-8") as f:
igor@39 494 for line in lines:
igor@39 495 f.write(line)
igor@39 496
igor@40 497 def filter_get_words_group_words_add_stat(args):
igor@40 498 vocabulary = load_vocabulary()
igor@40 499 notes = load_notes(notes_filenames())
igor@40 500 lines = readlines_from_stdin()
igor@44 501 group_by = [1]
igor@48 502
igor@44 503 if 'GROUP_WORDS_BY_TWO' in os.environ and os.environ['GROUP_WORDS_BY_TWO'] == 'YES':
igor@44 504 group_by.append(2)
igor@44 505 if 'GROUP_WORDS_BY_THREE' in os.environ and os.environ['GROUP_WORDS_BY_THREE'] == 'YES':
igor@44 506 group_by.append(3)
igor@44 507 words = get_words(lines, group_by)
igor@43 508 stats_only = False
igor@43 509 if 'STAT_ONLY' in os.environ and os.environ['STAT_ONLY'] == 'YES':
igor@43 510 stats_only = True
igor@40 511
igor@47 512 compressed_wordlist = False
igor@47 513 if 'COMPRESSED_WORDLIST' in os.environ and os.environ['COMPRESSED_WORDLIST'] == 'YES':
igor@47 514 compressed_wordlist = True
igor@47 515
igor@48 516 show_range = os.environ.get('SHOW_RANGE', '')
igor@48 517 if show_range != '':
igor@48 518 show_range = int(show_range)
igor@48 519 else:
igor@48 520 show_range = 0
igor@48 521 show_range_percentage = os.environ.get('SHOW_RANGE_PERCENTAGE', '')
igor@48 522 if show_range_percentage != '':
igor@48 523 show_range_percentage = int(show_range_percentage)
igor@48 524 else:
igor@48 525 show_range_percentage = 0
igor@48 526
igor@44 527
igor@40 528 stats = {}
igor@40 529 stats['total'] = sum(words[x] for x in words.keys())
igor@45 530 if 'FILTER_WORDS' in os.environ and os.environ['FILTER_WORDS'] == 'YES':
igor@45 531 words = substract_dictionary(words, vocabulary)
igor@40 532
igor@40 533 stats['total_unknown'] = sum(words[x] for x in words.keys())
igor@40 534 stats['total_known'] = stats['total'] - stats['total_unknown']
igor@43 535 stats['percentage'] = 100.0*stats['total_known']/stats['total']
igor@43 536 stats['percentage_unknown'] = 100.0-100.0*stats['total_known']/stats['total']
igor@40 537 stats['groups'] = 0
igor@40 538 stats['words'] = len(words)
igor@43 539 stats['sentences'] = 0 #FIXME
igor@43 540 stats['wps'] = 0 #FIXME
igor@43 541 stats['uwps'] = 0 #FIXME
igor@40 542 stats['language'] = config['language']
igor@40 543
igor@40 544 linked_words = find_linked_words(notes)
igor@40 545 normalizator = Normalizator(config['language'], linked_words)
igor@40 546
igor@44 547 words_with_freq = []
igor@40 548 for k in sorted(words.keys(), key=lambda k: words[k], reverse=True):
igor@44 549 words_with_freq.append((words[k], k))
igor@40 550
igor@44 551 wgw = find_wordgroups_weights(words_with_freq, normalizator)
igor@45 552 if 'WORDS_GROUPING' in os.environ and os.environ['WORDS_GROUPING'] == 'YES':
igor@45 553 words_with_freq = sorted(
igor@44 554 words_with_freq,
igor@40 555 cmp=lambda x,y:compare_word_pairs(x,y, wgw, normalizator, linked_words),
igor@40 556 reverse=True)
igor@40 557
igor@47 558 print_words_sorted(
igor@47 559 words_with_freq,
igor@47 560 stats,
igor@47 561 normalizator,
igor@47 562 stats_only=stats_only,
igor@48 563 compressed_wordlist=compressed_wordlist,
igor@48 564 show_range=show_range,
igor@48 565 show_range_percentage=show_range_percentage,
igor@47 566 )
igor@40 567
igor@37 568 (options, args) = parser.parse_args()
igor@38 569 if options.language:
igor@38 570 config['language'] = options.language
igor@37 571
igor@38 572 if options.function:
igor@38 573 function_names = {
igor@39 574 'add_notes' : filter_add_notes,
igor@39 575 'remove_notes': filter_remove_notes,
igor@40 576 'get_words_group_words_add_stat': filter_get_words_group_words_add_stat,
igor@38 577 }
igor@38 578 if options.function in function_names:
igor@38 579 function_names[options.function](args)
igor@38 580 else:
igor@38 581 error_message("Unkown function %s.\nAvailable functions:\n%s" % (
igor@38 582 options.function, "".join([" "+x for x in sorted(function_names.keys())])))
igor@38 583 sys.exit(1)
igor@37 584
igor@37 585
igor@37 586
igor@37 587
igor@38 588 #os.system("vim")
igor@37 589