new-words

changeset 43:d532e7b52ab2
-s key support in new-words.py

Now new-words-py.sh -s works in the same way as new-words.sh.
(WPS and UWPS fields are not calculated correctly yet).
author: Igor Chubin <igor@chub.in>
date: Fri Jan 28 12:40:58 2011 +0200 (2011-01-28)
parents: 3ec83a7cc544
children: 7eb1a8c3eade
files: new-words-py.sh new-words.py
     1.1 --- a/new-words-py.sh	Mon Jan 24 06:31:42 2011 +0100
     1.2 +++ b/new-words-py.sh	Fri Jan 28 12:40:58 2011 +0200
     1.3 @@ -283,7 +283,7 @@
     1.4     | part $PART_TO_PROCESS \
     1.5     | tee $ORIGINAL_TEXT \
     1.6     | two_and_three_words \
     1.7 -   | get_words_group_words_add_stat \
     1.8 +   | STAT_ONLY="$STAT_ONLY" get_words_group_words_add_stat \
     1.9     | tee "$TEMP1" > "$TEMP2"
    1.10  
    1.11  if [ "$STAT_ONLY" = "YES" ]

     2.1 --- a/new-words.py	Mon Jan 24 06:31:42 2011 +0100
     2.2 +++ b/new-words.py	Fri Jan 28 12:40:58 2011 +0200
     2.3 @@ -318,12 +318,32 @@
     2.4  
     2.5  def print_words_sorted(word_pairs, stats, print_stats=True, stats_only=False):
     2.6      if stats_only:
     2.7 -        codecs.getwriter("utf-8")(sys.stdout).write("stat_only")
     2.8 +        codecs.getwriter("utf-8")(sys.stdout).write(
     2.9 +            " ".join([
    2.10 +                "%-10s" % x for x in [
    2.11 +                "LANG",
    2.12 +                "KNOWN%",
    2.13 +                "UNKNOWN%",
    2.14 +                "KNOWN",
    2.15 +                "TOTAL",
    2.16 +                "WPS",
    2.17 +                "UWPS*10"
    2.18 +                ]]) + "\n")
    2.19 +        codecs.getwriter("utf-8")(sys.stdout).write(
    2.20 +            " ".join([
    2.21 +                "%(language)-10s",
    2.22 +                "%(percentage)-10.2f",
    2.23 +                "%(percentage_unknown)-10.2f",
    2.24 +                "%(total_known)-11d"
    2.25 +                "%(total)-11d"
    2.26 +                "%(wps)-11d"
    2.27 +                "%(uwps)-11d"
    2.28 +                ]) % stats + "\n")
    2.29          return
    2.30  
    2.31      if print_stats:
    2.32          codecs.getwriter("utf-8")(sys.stdout).write(
    2.33 -            "# %(language)s, %(percentage)s, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats)
    2.34 +            "# %(language)s, %(percentage)-7.2f, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats)
    2.35  
    2.36      level_lines = range(int(float(stats['percentage']))/5*5+5,95,5)+range(90,102)
    2.37      known = int(stats['total_known'])
    2.38 @@ -360,6 +380,9 @@
    2.39      notes = load_notes(notes_filenames())
    2.40      lines = readlines_from_stdin()
    2.41      words = get_words(lines)
    2.42 +    stats_only = False
    2.43 +    if 'STAT_ONLY' in os.environ and os.environ['STAT_ONLY'] == 'YES':
    2.44 +        stats_only = True
    2.45  
    2.46      stats = {}
    2.47      stats['total'] = sum(words[x] for x in words.keys())
    2.48 @@ -367,10 +390,13 @@
    2.49  
    2.50      stats['total_unknown'] = sum(words[x] for x in words.keys())
    2.51      stats['total_known'] = stats['total'] - stats['total_unknown']
    2.52 -    stats['percentage'] = "%7.2f"%(100.0*stats['total_known']/stats['total'])
    2.53 +    stats['percentage'] = 100.0*stats['total_known']/stats['total']
    2.54 +    stats['percentage_unknown'] = 100.0-100.0*stats['total_known']/stats['total']
    2.55      stats['groups'] = 0
    2.56      stats['words'] = len(words)
    2.57 -    stats['sentences'] = 0 #FIXME
    2.58 +    stats['sentences'] = 0  #FIXME
    2.59 +    stats['wps'] = 0        #FIXME
    2.60 +    stats['uwps'] = 0       #FIXME
    2.61      stats['language'] = config['language']
    2.62  
    2.63      linked_words = find_linked_words(notes)
    2.64 @@ -386,7 +412,7 @@
    2.65                  cmp=lambda x,y:compare_word_pairs(x,y, wgw, normalizator, linked_words),
    2.66                  reverse=True)
    2.67  
    2.68 -    print_words_sorted(word_pairs, stats)
    2.69 +    print_words_sorted(word_pairs, stats, stats_only=stats_only)
    2.70  
    2.71  (options, args) = parser.parse_args()
    2.72  if options.language:
author	Igor Chubin <igor@chub.in>
date	Fri Jan 28 12:40:58 2011 +0200 (2011-01-28)
parents	3ec83a7cc544
children	7eb1a8c3eade
files	new-words-py.sh new-words.py