new-words
diff new-words.py @ 67:87bb1c5e6616
added de script to misc/
author | Igor Chubin <igor@chub.in> |
---|---|
date | Wed Mar 28 15:54:30 2012 +0200 (2012-03-28) |
parents | 1b8b30ad7c95 |
children | 846240941452 |
line diff
1.1 --- a/new-words.py Sat Nov 12 14:03:20 2011 +0100 1.2 +++ b/new-words.py Wed Mar 28 15:54:30 2012 +0200 1.3 @@ -36,7 +36,10 @@ 1.4 'it' : 'italian', 1.5 'uk' : 'ukrainian', 1.6 } 1.7 - self.stemmer = Stemmer.Stemmer(stemmer_algorithm[language]) 1.8 + try: 1.9 + self.stemmer = Stemmer.Stemmer(stemmer_algorithm[language]) 1.10 + except: 1.11 + self.stemmer = None 1.12 self.linked_words = linked_words 1.13 1.14 def normalize(self, word): 1.15 @@ -44,7 +47,10 @@ 1.16 while word in self.linked_words and not word in word_chain: 1.17 word_chain.append(word) 1.18 word = self.linked_words[word] 1.19 - return self.stemmer.stemWord(word.lower()) 1.20 + if self.stemmer: 1.21 + return self.stemmer.stemWord(word.lower()) 1.22 + else: 1.23 + return word.lower() 1.24 1.25 def best_word_from_group(self, wordpairs_group): 1.26 """Returns the word that is the most relevant to the wordpairs_group. 1.27 @@ -239,6 +245,12 @@ 1.28 dest="vocabulary_filename") 1.29 1.30 parser.add_option( 1.31 + "-w", "--web", 1.32 + help="Web browser version", 1.33 + action="store_true", 1.34 + dest="web") 1.35 + 1.36 +parser.add_option( 1.37 "-2", "--two-words", 1.38 help="find 2 words' sequences", 1.39 action="store_true", 1.40 @@ -571,17 +583,60 @@ 1.41 result += [lines[i]] 1.42 return result 1.43 1.44 +def web_editor(output): 1.45 + from twisted.internet import reactor 1.46 + from twisted.web.server import Site 1.47 + from twisted.web.static import File 1.48 + from twisted.web.resource import Resource 1.49 + import json 1.50 + 1.51 + word_list = [] 1.52 + 1.53 + for o in output: 1.54 + a = re.split('\s+', o.strip(), 2) 1.55 + a = a + ['']*(3-len(a)) 1.56 + word_list.append({'number':a[0], 'word':a[1], 'comment':a[2]}) 1.57 + 1.58 + print "Loaded ", len(word_list) 1.59 + 1.60 + new_words_html = "/home/igor/hg/new-words/web" 1.61 + 1.62 + class JSONPage(Resource): 1.63 + isLeaf = True 1.64 + def render_GET(self, request): 1.65 + return json.dumps({"word_list": word_list}) 1.66 + 1.67 + class SaveJSON(Resource): 1.68 + isLeaf = True 1.69 + def render_POST(self, request): 1.70 + print json.loads(request.args["selected_words"][0]) 1.71 + return json.dumps({"status": "ok"}) 1.72 + 1.73 + json_page = JSONPage() 1.74 + save_json = SaveJSON() 1.75 + 1.76 + resource = File(new_words_html) 1.77 + resource.putChild("json", json_page) 1.78 + resource.putChild("save", save_json) 1.79 + 1.80 + factory = Site(resource) 1.81 + reactor.listenTCP(8880, factory) 1.82 + reactor.run() 1.83 + 1.84 + 1.85 def filter_get_words_group_words_add_stat(args): 1.86 vocabulary = load_vocabulary() 1.87 notes = load_notes(notes_filenames()) 1.88 1.89 + input_lines = [] 1.90 if len(args) > 0: 1.91 - if 'http://' in args[0]: 1.92 - input_lines = readlines_from_url(args[0]) 1.93 - else: 1.94 - input_lines = readlines_from_file(args[0]) 1.95 + for arg in args: 1.96 + if 'http://' in arg: 1.97 + input_lines += readlines_from_url(arg) 1.98 + else: 1.99 + input_lines += readlines_from_file(arg) 1.100 else: 1.101 - input_lines = readlines_from_stdin() 1.102 + input_lines += readlines_from_stdin() 1.103 1.104 if len(input_lines) == 0: 1.105 print >> sys.stderr, "Nothing to do, standard input is empty, exiting." 1.106 @@ -676,6 +731,8 @@ 1.107 1.108 if ('non_interactive' in config or 'text_stats' in config): 1.109 codecs.getwriter("utf-8")(sys.stdout).write("".join(output)) 1.110 + elif config.get('web', False): 1.111 + web_editor(output) 1.112 else: 1.113 (_, temp1) = tempfile.mkstemp(prefix='new-word') 1.114 (_, temp2) = tempfile.mkstemp(prefix='new-word') 1.115 @@ -759,6 +816,9 @@ 1.116 if options.no_words_grouping: 1.117 config['no_words_grouping'] = True 1.118 1.119 +if options.web: 1.120 + config['web'] = True 1.121 + 1.122 filter_get_words_group_words_add_stat(args) 1.123 1.124 #if options.function: