# HG changeset patch # User Igor Chubin # Date 1348405649 -10800 # Node ID 8462409414524eabb9df1fad6e333d42b9f6c930 # Parent 87bb1c5e66161db3ce9904cdd6ee85bf164f02dd added -C key: compress to lines; fixed bug with #90-line diff -r 87bb1c5e6616 -r 846240941452 new-words.py --- a/new-words.py Wed Mar 28 15:54:30 2012 +0200 +++ b/new-words.py Sun Sep 23 16:07:29 2012 +0300 @@ -137,6 +137,12 @@ dest="compressed") parser.add_option( + "-C", "--compressed-to-line", + help="show compressed wordlist: all words of the group in a line", + action="store_true", + dest="compressed_to_line") + +parser.add_option( "-k", "--known-words", help="put higher words that are similar to the known words (only for English)", action="store_true", @@ -465,6 +471,7 @@ print_stats=True, stats_only=False, compressed_wordlist=False, + compressed_to_line = False, show_range=0, show_range_percentage=0, ): @@ -498,10 +505,14 @@ result.append( "# %(language)s, %(percentage)-7.2f, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats) - level_lines = range(int(float(stats['percentage']))/5*5+5,95,5)+range(90,102) known = int(stats['total_known']) total = int(stats['total']) - current_level = 0 + level_lines = range(int(float(stats['percentage']))/5*5+5,90,5)+range(90,102) + if 100.0*known/total >= level_lines[0]: + current_level = level_lines[0] + while 100.0*known/total > level_lines[0]: + current_level = level_lines[0] + level_lines = level_lines[1:] old_normalized_word = None words_of_this_group = [] printed_words = 0 @@ -514,7 +525,10 @@ sum(x[0] for x in words_of_this_group), normalizator.best_word_from_group(words_of_this_group) ) - result.append("%10s %s\n" % compressed_word_pair) + if compressed_to_line: + result.append("%10s %s %s\n" % (compressed_word_pair + (" ".join(y for x,y in words_of_this_group if y not in compressed_word_pair),))) + else: + result.append("%10s %s\n" % compressed_word_pair) printed_words += 1 words_of_this_group = [] @@ -660,9 +674,11 @@ stats_only = True compressed_wordlist = False - if 'compressed' in config: + if 'compressed' in config or 'compressed_to_line' in config: compressed_wordlist = True + compressed_to_line = 'compressed_to_line' in config + if 'show_range' in config: show_range = int(config['show_range']) else: @@ -724,6 +740,7 @@ normalizator, stats_only=stats_only, compressed_wordlist=compressed_wordlist, + compressed_to_line=compressed_to_line, show_range=show_range, show_range_percentage=show_range_percentage, ) @@ -804,6 +821,9 @@ if options.compressed: config['compressed'] = True +if options.compressed_to_line: + config['compressed_to_line'] = True + if options.no_filter: config['no_filter'] = True