new-words
changeset 43:d532e7b52ab2
-s key support in new-words.py
Now new-words-py.sh -s works in the same way as new-words.sh.
(WPS and UWPS fields are not calculated correctly yet).
Now new-words-py.sh -s works in the same way as new-words.sh.
(WPS and UWPS fields are not calculated correctly yet).
author | Igor Chubin <igor@chub.in> |
---|---|
date | Fri Jan 28 12:40:58 2011 +0200 (2011-01-28) |
parents | 3ec83a7cc544 |
children | 7eb1a8c3eade |
files | new-words-py.sh new-words.py |
line diff
1.1 --- a/new-words-py.sh Mon Jan 24 06:31:42 2011 +0100 1.2 +++ b/new-words-py.sh Fri Jan 28 12:40:58 2011 +0200 1.3 @@ -283,7 +283,7 @@ 1.4 | part $PART_TO_PROCESS \ 1.5 | tee $ORIGINAL_TEXT \ 1.6 | two_and_three_words \ 1.7 - | get_words_group_words_add_stat \ 1.8 + | STAT_ONLY="$STAT_ONLY" get_words_group_words_add_stat \ 1.9 | tee "$TEMP1" > "$TEMP2" 1.10 1.11 if [ "$STAT_ONLY" = "YES" ]
2.1 --- a/new-words.py Mon Jan 24 06:31:42 2011 +0100 2.2 +++ b/new-words.py Fri Jan 28 12:40:58 2011 +0200 2.3 @@ -318,12 +318,32 @@ 2.4 2.5 def print_words_sorted(word_pairs, stats, print_stats=True, stats_only=False): 2.6 if stats_only: 2.7 - codecs.getwriter("utf-8")(sys.stdout).write("stat_only") 2.8 + codecs.getwriter("utf-8")(sys.stdout).write( 2.9 + " ".join([ 2.10 + "%-10s" % x for x in [ 2.11 + "LANG", 2.12 + "KNOWN%", 2.13 + "UNKNOWN%", 2.14 + "KNOWN", 2.15 + "TOTAL", 2.16 + "WPS", 2.17 + "UWPS*10" 2.18 + ]]) + "\n") 2.19 + codecs.getwriter("utf-8")(sys.stdout).write( 2.20 + " ".join([ 2.21 + "%(language)-10s", 2.22 + "%(percentage)-10.2f", 2.23 + "%(percentage_unknown)-10.2f", 2.24 + "%(total_known)-11d" 2.25 + "%(total)-11d" 2.26 + "%(wps)-11d" 2.27 + "%(uwps)-11d" 2.28 + ]) % stats + "\n") 2.29 return 2.30 2.31 if print_stats: 2.32 codecs.getwriter("utf-8")(sys.stdout).write( 2.33 - "# %(language)s, %(percentage)s, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats) 2.34 + "# %(language)s, %(percentage)-7.2f, <%(total_known)s/%(total)s>, <%(groups)s/%(words)s>\n" % stats) 2.35 2.36 level_lines = range(int(float(stats['percentage']))/5*5+5,95,5)+range(90,102) 2.37 known = int(stats['total_known']) 2.38 @@ -360,6 +380,9 @@ 2.39 notes = load_notes(notes_filenames()) 2.40 lines = readlines_from_stdin() 2.41 words = get_words(lines) 2.42 + stats_only = False 2.43 + if 'STAT_ONLY' in os.environ and os.environ['STAT_ONLY'] == 'YES': 2.44 + stats_only = True 2.45 2.46 stats = {} 2.47 stats['total'] = sum(words[x] for x in words.keys()) 2.48 @@ -367,10 +390,13 @@ 2.49 2.50 stats['total_unknown'] = sum(words[x] for x in words.keys()) 2.51 stats['total_known'] = stats['total'] - stats['total_unknown'] 2.52 - stats['percentage'] = "%7.2f"%(100.0*stats['total_known']/stats['total']) 2.53 + stats['percentage'] = 100.0*stats['total_known']/stats['total'] 2.54 + stats['percentage_unknown'] = 100.0-100.0*stats['total_known']/stats['total'] 2.55 stats['groups'] = 0 2.56 stats['words'] = len(words) 2.57 - stats['sentences'] = 0 #FIXME 2.58 + stats['sentences'] = 0 #FIXME 2.59 + stats['wps'] = 0 #FIXME 2.60 + stats['uwps'] = 0 #FIXME 2.61 stats['language'] = config['language'] 2.62 2.63 linked_words = find_linked_words(notes) 2.64 @@ -386,7 +412,7 @@ 2.65 cmp=lambda x,y:compare_word_pairs(x,y, wgw, normalizator, linked_words), 2.66 reverse=True) 2.67 2.68 - print_words_sorted(word_pairs, stats) 2.69 + print_words_sorted(word_pairs, stats, stats_only=stats_only) 2.70 2.71 (options, args) = parser.parse_args() 2.72 if options.language: