new-words

diff new-words.py @ 43:d532e7b52ab2

-s key support in new-words.py

Now new-words-py.sh -s works in the same way as new-words.sh.
(WPS and UWPS fields are not calculated correctly yet).
author Igor Chubin <igor@chub.in>
date Fri Jan 28 12:40:58 2011 +0200 (2011-01-28)
parents 3ec83a7cc544
children 7eb1a8c3eade
line diff
     1.1 --- a/new-words.py	Mon Jan 24 06:31:42 2011 +0100
     1.2 +++ b/new-words.py	Fri Jan 28 12:40:58 2011 +0200
     1.3 @@ -318,12 +318,32 @@
     1.4  
     1.5  def print_words_sorted(word_pairs, stats, print_stats=True, stats_only=False):
     1.6      if stats_only:
     1.7 -        codecs.getwriter("utf-8")(sys.stdout).write("stat_only")
     1.8 +        codecs.getwriter("utf-8")(sys.stdout).write(
     1.9 +            " ".join([
    1.10 +                "%-10s" % x for x in [
    1.11 +                "LANG",
    1.12 +                "KNOWN%",
    1.13 +                "UNKNOWN%",
    1.14 +                "KNOWN",
    1.15 +                "TOTAL",
    1.16 +                "WPS",
    1.17 +                "UWPS*10"
    1.18 +                ]]) + "\n")
    1.19 +        codecs.getwriter("utf-8")(sys.stdout).write(
    1.20 +            " ".join([
    1.21 +                "%(language)-10s",
    1.22 +                "%(percentage)-10.2f",
    1.23 +                "%(percentage_unknown)-10.2f",
    1.24 +                "%(total_known)-11d"
    1.25 +                "%(total)-11d"
    1.26 +                "%(wps)-11d"
    1.27 +                "%(uwps)-11d"
    1.28 +                ]) % stats + "\n")
    1.29          return
    1.30  
    1.31      if print_stats:
    1.32          codecs.getwriter("utf-8")(sys.stdout).write(
    1.33 -            "# %(language)s, %(percentage)s, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats)
    1.34 +            "# %(language)s, %(percentage)-7.2f, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats)
    1.35  
    1.36      level_lines = range(int(float(stats['percentage']))/5*5+5,95,5)+range(90,102)
    1.37      known = int(stats['total_known'])
    1.38 @@ -360,6 +380,9 @@
    1.39      notes = load_notes(notes_filenames())
    1.40      lines = readlines_from_stdin()
    1.41      words = get_words(lines)
    1.42 +    stats_only = False
    1.43 +    if 'STAT_ONLY' in os.environ and os.environ['STAT_ONLY'] == 'YES':
    1.44 +        stats_only = True
    1.45  
    1.46      stats = {}
    1.47      stats['total'] = sum(words[x] for x in words.keys())
    1.48 @@ -367,10 +390,13 @@
    1.49  
    1.50      stats['total_unknown'] = sum(words[x] for x in words.keys())
    1.51      stats['total_known'] = stats['total'] - stats['total_unknown']
    1.52 -    stats['percentage'] = "%7.2f"%(100.0*stats['total_known']/stats['total'])
    1.53 +    stats['percentage'] = 100.0*stats['total_known']/stats['total']
    1.54 +    stats['percentage_unknown'] = 100.0-100.0*stats['total_known']/stats['total']
    1.55      stats['groups'] = 0
    1.56      stats['words'] = len(words)
    1.57 -    stats['sentences'] = 0 #FIXME
    1.58 +    stats['sentences'] = 0  #FIXME
    1.59 +    stats['wps'] = 0        #FIXME
    1.60 +    stats['uwps'] = 0       #FIXME
    1.61      stats['language'] = config['language']
    1.62  
    1.63      linked_words = find_linked_words(notes)
    1.64 @@ -386,7 +412,7 @@
    1.65                  cmp=lambda x,y:compare_word_pairs(x,y, wgw, normalizator, linked_words),
    1.66                  reverse=True)
    1.67  
    1.68 -    print_words_sorted(word_pairs, stats)
    1.69 +    print_words_sorted(word_pairs, stats, stats_only=stats_only)
    1.70  
    1.71  (options, args) = parser.parse_args()
    1.72  if options.language: