new-words
diff new-words.py @ 48:7194bdb56475
new feature: -r and -R can specify number of words (or percentage) to show
author | Igor Chubin <igor@chub.in> |
---|---|
date | Tue Feb 08 20:35:38 2011 +0200 (2011-02-08) |
parents | d708e2c1bad8 |
children | 00286f6bfa85 |
line diff
1.1 --- a/new-words.py Mon Feb 07 21:21:17 2011 +0200 1.2 +++ b/new-words.py Tue Feb 08 20:35:38 2011 +0200 1.3 @@ -115,10 +115,10 @@ 1.4 dest="pages") 1.5 1.6 parser.add_option( 1.7 - "-r", "--remove-tag", 1.8 - help="remove subvocabulary of specified tag", 1.9 + "-d", "--delete-tag", 1.10 + help="delete subvocabulary of specified tag", 1.11 action="store", 1.12 - dest="remove_tag") 1.13 + dest="delete_tag") 1.14 1.15 parser.add_option( 1.16 "-s", "--text-stats", 1.17 @@ -337,7 +337,16 @@ 1.18 return cmp(int(num1), int(num2)) 1.19 1.20 1.21 -def print_words_sorted(word_pairs, stats, normalizator, print_stats=True, stats_only=False, compressed_wordlist=False): 1.22 +def print_words_sorted( 1.23 + word_pairs, 1.24 + stats, 1.25 + normalizator, 1.26 + print_stats=True, 1.27 + stats_only=False, 1.28 + compressed_wordlist=False, 1.29 + show_range=0, 1.30 + show_range_percentage=0, 1.31 + ): 1.32 if stats_only: 1.33 codecs.getwriter("utf-8")(sys.stdout).write( 1.34 " ".join([ 1.35 @@ -372,6 +381,7 @@ 1.36 current_level = 0 1.37 old_normalized_word = None 1.38 words_of_this_group = [] 1.39 + printed_words = 0 1.40 for word_pair in word_pairs: 1.41 1.42 normalized_word = normalizator.normalize(word_pair[1]) 1.43 @@ -384,6 +394,7 @@ 1.44 ) 1.45 if compressed_wordlist: 1.46 codecs.getwriter("utf-8")(sys.stdout).write("%10s %s\n" % compressed_word_pair) 1.47 + printed_words += 1 1.48 words_of_this_group = [] 1.49 1.50 old_normalized_word = normalized_word 1.51 @@ -391,6 +402,7 @@ 1.52 1.53 if not compressed_wordlist: 1.54 codecs.getwriter("utf-8")(sys.stdout).write("%10s %s\n" % word_pair) 1.55 + printed_words += 1 1.56 1.57 1.58 known += word_pair[0] 1.59 @@ -401,6 +413,11 @@ 1.60 level_lines = level_lines[1:] 1.61 codecs.getwriter("utf-8")(sys.stdout).write("# %s\n" % current_level) 1.62 1.63 + if show_range >0 and printed_words >= show_range: 1.64 + break 1.65 + if show_range_percentage >0 and 100.0*known/total >= show_range_percentage: 1.66 + break 1.67 + 1.68 def filter_add_notes(args): 1.69 lines = readlines_from_file(args[0]) 1.70 notes = load_notes(notes_filenames()) 1.71 @@ -422,6 +439,7 @@ 1.72 notes = load_notes(notes_filenames()) 1.73 lines = readlines_from_stdin() 1.74 group_by = [1] 1.75 + 1.76 if 'GROUP_WORDS_BY_TWO' in os.environ and os.environ['GROUP_WORDS_BY_TWO'] == 'YES': 1.77 group_by.append(2) 1.78 if 'GROUP_WORDS_BY_THREE' in os.environ and os.environ['GROUP_WORDS_BY_THREE'] == 'YES': 1.79 @@ -435,6 +453,17 @@ 1.80 if 'COMPRESSED_WORDLIST' in os.environ and os.environ['COMPRESSED_WORDLIST'] == 'YES': 1.81 compressed_wordlist = True 1.82 1.83 + show_range = os.environ.get('SHOW_RANGE', '') 1.84 + if show_range != '': 1.85 + show_range = int(show_range) 1.86 + else: 1.87 + show_range = 0 1.88 + show_range_percentage = os.environ.get('SHOW_RANGE_PERCENTAGE', '') 1.89 + if show_range_percentage != '': 1.90 + show_range_percentage = int(show_range_percentage) 1.91 + else: 1.92 + show_range_percentage = 0 1.93 + 1.94 1.95 stats = {} 1.96 stats['total'] = sum(words[x] for x in words.keys()) 1.97 @@ -471,7 +500,9 @@ 1.98 stats, 1.99 normalizator, 1.100 stats_only=stats_only, 1.101 - compressed_wordlist=compressed_wordlist 1.102 + compressed_wordlist=compressed_wordlist, 1.103 + show_range=show_range, 1.104 + show_range_percentage=show_range_percentage, 1.105 ) 1.106 1.107 (options, args) = parser.parse_args()