new-words

diff new-words.py @ 48:7194bdb56475
new feature: -r and -R can specify number of words (or percentage) to show
author: Igor Chubin <igor@chub.in>
date: Tue Feb 08 20:35:38 2011 +0200 (2011-02-08)
parents: d708e2c1bad8
children: 00286f6bfa85
     1.1 --- a/new-words.py	Mon Feb 07 21:21:17 2011 +0200
     1.2 +++ b/new-words.py	Tue Feb 08 20:35:38 2011 +0200
     1.3 @@ -115,10 +115,10 @@
     1.4      dest="pages")
     1.5  
     1.6  parser.add_option(
     1.7 -    "-r", "--remove-tag",
     1.8 -    help="remove subvocabulary of specified tag",
     1.9 +    "-d", "--delete-tag",
    1.10 +    help="delete subvocabulary of specified tag",
    1.11      action="store",
    1.12 -    dest="remove_tag")
    1.13 +    dest="delete_tag")
    1.14  
    1.15  parser.add_option(
    1.16      "-s", "--text-stats",
    1.17 @@ -337,7 +337,16 @@
    1.18              return cmp(int(num1), int(num2))
    1.19  
    1.20  
    1.21 -def print_words_sorted(word_pairs, stats, normalizator, print_stats=True, stats_only=False, compressed_wordlist=False):
    1.22 +def print_words_sorted(
    1.23 +        word_pairs,
    1.24 +        stats,
    1.25 +        normalizator,
    1.26 +        print_stats=True,
    1.27 +        stats_only=False,
    1.28 +        compressed_wordlist=False,
    1.29 +        show_range=0,
    1.30 +        show_range_percentage=0,
    1.31 +        ):
    1.32      if stats_only:
    1.33          codecs.getwriter("utf-8")(sys.stdout).write(
    1.34              " ".join([
    1.35 @@ -372,6 +381,7 @@
    1.36      current_level = 0
    1.37      old_normalized_word = None
    1.38      words_of_this_group = []
    1.39 +    printed_words = 0
    1.40      for word_pair in word_pairs:
    1.41  
    1.42          normalized_word = normalizator.normalize(word_pair[1])
    1.43 @@ -384,6 +394,7 @@
    1.44                  )
    1.45              if compressed_wordlist:
    1.46                  codecs.getwriter("utf-8")(sys.stdout).write("%10s %s\n" % compressed_word_pair)
    1.47 +                printed_words += 1
    1.48              words_of_this_group = []
    1.49  
    1.50          old_normalized_word = normalized_word
    1.51 @@ -391,6 +402,7 @@
    1.52  
    1.53          if not compressed_wordlist:
    1.54              codecs.getwriter("utf-8")(sys.stdout).write("%10s %s\n" % word_pair)
    1.55 +            printed_words += 1
    1.56  
    1.57  
    1.58          known += word_pair[0]
    1.59 @@ -401,6 +413,11 @@
    1.60                  level_lines = level_lines[1:]
    1.61              codecs.getwriter("utf-8")(sys.stdout).write("# %s\n" % current_level)
    1.62  
    1.63 +        if show_range >0 and printed_words >= show_range:
    1.64 +            break
    1.65 +        if show_range_percentage >0 and 100.0*known/total >= show_range_percentage:
    1.66 +            break
    1.67 +
    1.68  def filter_add_notes(args):
    1.69      lines = readlines_from_file(args[0])
    1.70      notes = load_notes(notes_filenames())
    1.71 @@ -422,6 +439,7 @@
    1.72      notes = load_notes(notes_filenames())
    1.73      lines = readlines_from_stdin()
    1.74      group_by = [1]
    1.75 +
    1.76      if 'GROUP_WORDS_BY_TWO' in os.environ and os.environ['GROUP_WORDS_BY_TWO'] == 'YES':
    1.77          group_by.append(2)
    1.78      if 'GROUP_WORDS_BY_THREE' in os.environ and os.environ['GROUP_WORDS_BY_THREE'] == 'YES':
    1.79 @@ -435,6 +453,17 @@
    1.80      if 'COMPRESSED_WORDLIST' in os.environ and os.environ['COMPRESSED_WORDLIST'] == 'YES':
    1.81          compressed_wordlist = True
    1.82  
    1.83 +    show_range = os.environ.get('SHOW_RANGE', '')
    1.84 +    if show_range != '':
    1.85 +        show_range = int(show_range)
    1.86 +    else:
    1.87 +        show_range = 0
    1.88 +    show_range_percentage = os.environ.get('SHOW_RANGE_PERCENTAGE', '')
    1.89 +    if show_range_percentage != '':
    1.90 +        show_range_percentage = int(show_range_percentage)
    1.91 +    else:
    1.92 +        show_range_percentage = 0
    1.93 +
    1.94  
    1.95      stats = {}
    1.96      stats['total'] = sum(words[x] for x in words.keys())
    1.97 @@ -471,7 +500,9 @@
    1.98          stats,
    1.99          normalizator,
   1.100          stats_only=stats_only,
   1.101 -        compressed_wordlist=compressed_wordlist
   1.102 +        compressed_wordlist=compressed_wordlist,
   1.103 +        show_range=show_range,
   1.104 +        show_range_percentage=show_range_percentage,
   1.105          )
   1.106  
   1.107  (options, args) = parser.parse_args()
author	Igor Chubin <igor@chub.in>
date	Tue Feb 08 20:35:38 2011 +0200 (2011-02-08)
parents	d708e2c1bad8
children	00286f6bfa85