new-words

changeset 68:846240941452 tip

added -C key: compress to lines; fixed bug with #90-line
author Igor Chubin <igor@chub.in>
date Sun Sep 23 16:07:29 2012 +0300 (2012-09-23)
parents 87bb1c5e6616
children
files new-words.py
line diff
     1.1 --- a/new-words.py	Wed Mar 28 15:54:30 2012 +0200
     1.2 +++ b/new-words.py	Sun Sep 23 16:07:29 2012 +0300
     1.3 @@ -137,6 +137,12 @@
     1.4      dest="compressed")
     1.5  
     1.6  parser.add_option(
     1.7 +    "-C", "--compressed-to-line",
     1.8 +    help="show compressed wordlist: all words of the group in a line",
     1.9 +    action="store_true",
    1.10 +    dest="compressed_to_line")
    1.11 +
    1.12 +parser.add_option(
    1.13      "-k", "--known-words",
    1.14      help="put higher words that are similar to the known words (only for English)",
    1.15      action="store_true",
    1.16 @@ -465,6 +471,7 @@
    1.17          print_stats=True,
    1.18          stats_only=False,
    1.19          compressed_wordlist=False,
    1.20 +        compressed_to_line = False,
    1.21          show_range=0,
    1.22          show_range_percentage=0,
    1.23          ):
    1.24 @@ -498,10 +505,14 @@
    1.25          result.append(
    1.26              "# %(language)s, %(percentage)-7.2f, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats)
    1.27  
    1.28 -    level_lines = range(int(float(stats['percentage']))/5*5+5,95,5)+range(90,102)
    1.29      known = int(stats['total_known'])
    1.30      total = int(stats['total'])
    1.31 -    current_level = 0
    1.32 +    level_lines = range(int(float(stats['percentage']))/5*5+5,90,5)+range(90,102)
    1.33 +    if 100.0*known/total >= level_lines[0]:
    1.34 +        current_level = level_lines[0]
    1.35 +        while 100.0*known/total > level_lines[0]:
    1.36 +            current_level = level_lines[0]
    1.37 +            level_lines = level_lines[1:]
    1.38      old_normalized_word = None
    1.39      words_of_this_group = []
    1.40      printed_words = 0
    1.41 @@ -514,7 +525,10 @@
    1.42                      sum(x[0] for x in words_of_this_group),
    1.43                      normalizator.best_word_from_group(words_of_this_group)
    1.44                      )
    1.45 -                result.append("%10s %s\n" % compressed_word_pair)
    1.46 +                if compressed_to_line:
    1.47 +                    result.append("%10s %s %s\n" % (compressed_word_pair + (" ".join(y for x,y in words_of_this_group if y not in compressed_word_pair),)))
    1.48 +                else:
    1.49 +                    result.append("%10s %s\n" % compressed_word_pair)
    1.50                  printed_words += 1
    1.51              words_of_this_group = []
    1.52  
    1.53 @@ -660,9 +674,11 @@
    1.54          stats_only = True
    1.55  
    1.56      compressed_wordlist = False
    1.57 -    if 'compressed' in config:
    1.58 +    if 'compressed' in config or 'compressed_to_line' in config:
    1.59          compressed_wordlist = True
    1.60  
    1.61 +    compressed_to_line = 'compressed_to_line' in config
    1.62 +
    1.63      if 'show_range' in config:
    1.64          show_range = int(config['show_range'])
    1.65      else:
    1.66 @@ -724,6 +740,7 @@
    1.67          normalizator,
    1.68          stats_only=stats_only,
    1.69          compressed_wordlist=compressed_wordlist,
    1.70 +        compressed_to_line=compressed_to_line,
    1.71          show_range=show_range,
    1.72          show_range_percentage=show_range_percentage,
    1.73          )
    1.74 @@ -804,6 +821,9 @@
    1.75  if options.compressed:
    1.76      config['compressed'] = True
    1.77  
    1.78 +if options.compressed_to_line:
    1.79 +    config['compressed_to_line'] = True
    1.80 +
    1.81  if options.no_filter:
    1.82      config['no_filter'] = True
    1.83