# HG changeset patch # User Igor Chubin # Date 1296211258 -7200 # Node ID d532e7b52ab22d45f691d22f344dd92cad9d6e96 # Parent 3ec83a7cc544d26e0ca43adcb2ed3c2a13eb1164 -s key support in new-words.py Now new-words-py.sh -s works in the same way as new-words.sh. (WPS and UWPS fields are not calculated correctly yet). diff -r 3ec83a7cc544 -r d532e7b52ab2 new-words-py.sh --- a/new-words-py.sh Mon Jan 24 06:31:42 2011 +0100 +++ b/new-words-py.sh Fri Jan 28 12:40:58 2011 +0200 @@ -283,7 +283,7 @@ | part $PART_TO_PROCESS \ | tee $ORIGINAL_TEXT \ | two_and_three_words \ - | get_words_group_words_add_stat \ + | STAT_ONLY="$STAT_ONLY" get_words_group_words_add_stat \ | tee "$TEMP1" > "$TEMP2" if [ "$STAT_ONLY" = "YES" ] diff -r 3ec83a7cc544 -r d532e7b52ab2 new-words.py --- a/new-words.py Mon Jan 24 06:31:42 2011 +0100 +++ b/new-words.py Fri Jan 28 12:40:58 2011 +0200 @@ -318,12 +318,32 @@ def print_words_sorted(word_pairs, stats, print_stats=True, stats_only=False): if stats_only: - codecs.getwriter("utf-8")(sys.stdout).write("stat_only") + codecs.getwriter("utf-8")(sys.stdout).write( + " ".join([ + "%-10s" % x for x in [ + "LANG", + "KNOWN%", + "UNKNOWN%", + "KNOWN", + "TOTAL", + "WPS", + "UWPS*10" + ]]) + "\n") + codecs.getwriter("utf-8")(sys.stdout).write( + " ".join([ + "%(language)-10s", + "%(percentage)-10.2f", + "%(percentage_unknown)-10.2f", + "%(total_known)-11d" + "%(total)-11d" + "%(wps)-11d" + "%(uwps)-11d" + ]) % stats + "\n") return if print_stats: codecs.getwriter("utf-8")(sys.stdout).write( - "# %(language)s, %(percentage)s, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats) + "# %(language)s, %(percentage)-7.2f, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats) level_lines = range(int(float(stats['percentage']))/5*5+5,95,5)+range(90,102) known = int(stats['total_known']) @@ -360,6 +380,9 @@ notes = load_notes(notes_filenames()) lines = readlines_from_stdin() words = get_words(lines) + stats_only = False + if 'STAT_ONLY' in os.environ and os.environ['STAT_ONLY'] == 'YES': + stats_only = True stats = {} stats['total'] = sum(words[x] for x in words.keys()) @@ -367,10 +390,13 @@ stats['total_unknown'] = sum(words[x] for x in words.keys()) stats['total_known'] = stats['total'] - stats['total_unknown'] - stats['percentage'] = "%7.2f"%(100.0*stats['total_known']/stats['total']) + stats['percentage'] = 100.0*stats['total_known']/stats['total'] + stats['percentage_unknown'] = 100.0-100.0*stats['total_known']/stats['total'] stats['groups'] = 0 stats['words'] = len(words) - stats['sentences'] = 0 #FIXME + stats['sentences'] = 0 #FIXME + stats['wps'] = 0 #FIXME + stats['uwps'] = 0 #FIXME stats['language'] = config['language'] linked_words = find_linked_words(notes) @@ -386,7 +412,7 @@ cmp=lambda x,y:compare_word_pairs(x,y, wgw, normalizator, linked_words), reverse=True) - print_words_sorted(word_pairs, stats) + print_words_sorted(word_pairs, stats, stats_only=stats_only) (options, args) = parser.parse_args() if options.language: