new-words
diff new-words.py @ 68:846240941452
added -C key: compress to lines; fixed bug with #90-line
author | Igor Chubin <igor@chub.in> |
---|---|
date | Sun Sep 23 16:07:29 2012 +0300 (2012-09-23) |
parents | 5a003076eb11 |
children |
line diff
1.1 --- a/new-words.py Tue Mar 27 14:09:25 2012 +0200 1.2 +++ b/new-words.py Sun Sep 23 16:07:29 2012 +0300 1.3 @@ -137,6 +137,12 @@ 1.4 dest="compressed") 1.5 1.6 parser.add_option( 1.7 + "-C", "--compressed-to-line", 1.8 + help="show compressed wordlist: all words of the group in a line", 1.9 + action="store_true", 1.10 + dest="compressed_to_line") 1.11 + 1.12 +parser.add_option( 1.13 "-k", "--known-words", 1.14 help="put higher words that are similar to the known words (only for English)", 1.15 action="store_true", 1.16 @@ -465,6 +471,7 @@ 1.17 print_stats=True, 1.18 stats_only=False, 1.19 compressed_wordlist=False, 1.20 + compressed_to_line = False, 1.21 show_range=0, 1.22 show_range_percentage=0, 1.23 ): 1.24 @@ -498,10 +505,14 @@ 1.25 result.append( 1.26 "# %(language)s, %(percentage)-7.2f, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats) 1.27 1.28 - level_lines = range(int(float(stats['percentage']))/5*5+5,95,5)+range(90,102) 1.29 known = int(stats['total_known']) 1.30 total = int(stats['total']) 1.31 - current_level = 0 1.32 + level_lines = range(int(float(stats['percentage']))/5*5+5,90,5)+range(90,102) 1.33 + if 100.0*known/total >= level_lines[0]: 1.34 + current_level = level_lines[0] 1.35 + while 100.0*known/total > level_lines[0]: 1.36 + current_level = level_lines[0] 1.37 + level_lines = level_lines[1:] 1.38 old_normalized_word = None 1.39 words_of_this_group = [] 1.40 printed_words = 0 1.41 @@ -514,7 +525,10 @@ 1.42 sum(x[0] for x in words_of_this_group), 1.43 normalizator.best_word_from_group(words_of_this_group) 1.44 ) 1.45 - result.append("%10s %s\n" % compressed_word_pair) 1.46 + if compressed_to_line: 1.47 + result.append("%10s %s %s\n" % (compressed_word_pair + (" ".join(y for x,y in words_of_this_group if y not in compressed_word_pair),))) 1.48 + else: 1.49 + result.append("%10s %s\n" % compressed_word_pair) 1.50 printed_words += 1 1.51 words_of_this_group = [] 1.52 1.53 @@ -660,9 +674,11 @@ 1.54 stats_only = True 1.55 1.56 compressed_wordlist = False 1.57 - if 'compressed' in config: 1.58 + if 'compressed' in config or 'compressed_to_line' in config: 1.59 compressed_wordlist = True 1.60 1.61 + compressed_to_line = 'compressed_to_line' in config 1.62 + 1.63 if 'show_range' in config: 1.64 show_range = int(config['show_range']) 1.65 else: 1.66 @@ -724,6 +740,7 @@ 1.67 normalizator, 1.68 stats_only=stats_only, 1.69 compressed_wordlist=compressed_wordlist, 1.70 + compressed_to_line=compressed_to_line, 1.71 show_range=show_range, 1.72 show_range_percentage=show_range_percentage, 1.73 ) 1.74 @@ -804,6 +821,9 @@ 1.75 if options.compressed: 1.76 config['compressed'] = True 1.77 1.78 +if options.compressed_to_line: 1.79 + config['compressed_to_line'] = True 1.80 + 1.81 if options.no_filter: 1.82 config['no_filter'] = True 1.83