# HG changeset patch # User Igor Chubin # Date 1332850165 -7200 # Node ID 5a003076eb11745b9826e00c536f5969a70d6bb3 # Parent c3adf6452eda6173fad805255218bfd59d594e6d -w for web support (alpha) diff -r c3adf6452eda -r 5a003076eb11 new-words.py --- a/new-words.py Sat Nov 12 14:03:54 2011 +0100 +++ b/new-words.py Tue Mar 27 14:09:25 2012 +0200 @@ -36,7 +36,10 @@ 'it' : 'italian', 'uk' : 'ukrainian', } - self.stemmer = Stemmer.Stemmer(stemmer_algorithm[language]) + try: + self.stemmer = Stemmer.Stemmer(stemmer_algorithm[language]) + except: + self.stemmer = None self.linked_words = linked_words def normalize(self, word): @@ -44,7 +47,10 @@ while word in self.linked_words and not word in word_chain: word_chain.append(word) word = self.linked_words[word] - return self.stemmer.stemWord(word.lower()) + if self.stemmer: + return self.stemmer.stemWord(word.lower()) + else: + return word.lower() def best_word_from_group(self, wordpairs_group): """Returns the word that is the most relevant to the wordpairs_group. @@ -239,6 +245,12 @@ dest="vocabulary_filename") parser.add_option( + "-w", "--web", + help="Web browser version", + action="store_true", + dest="web") + +parser.add_option( "-2", "--two-words", help="find 2 words' sequences", action="store_true", @@ -571,17 +583,60 @@ result += [lines[i]] return result +def web_editor(output): + from twisted.internet import reactor + from twisted.web.server import Site + from twisted.web.static import File + from twisted.web.resource import Resource + import json + + word_list = [] + + for o in output: + a = re.split('\s+', o.strip(), 2) + a = a + ['']*(3-len(a)) + word_list.append({'number':a[0], 'word':a[1], 'comment':a[2]}) + + print "Loaded ", len(word_list) + + new_words_html = "/home/igor/hg/new-words/web" + + class JSONPage(Resource): + isLeaf = True + def render_GET(self, request): + return json.dumps({"word_list": word_list}) + + class SaveJSON(Resource): + isLeaf = True + def render_POST(self, request): + print json.loads(request.args["selected_words"][0]) + return json.dumps({"status": "ok"}) + + json_page = JSONPage() + save_json = SaveJSON() + + resource = File(new_words_html) + resource.putChild("json", json_page) + resource.putChild("save", save_json) + + factory = Site(resource) + reactor.listenTCP(8880, factory) + reactor.run() + + def filter_get_words_group_words_add_stat(args): vocabulary = load_vocabulary() notes = load_notes(notes_filenames()) + input_lines = [] if len(args) > 0: - if 'http://' in args[0]: - input_lines = readlines_from_url(args[0]) - else: - input_lines = readlines_from_file(args[0]) + for arg in args: + if 'http://' in arg: + input_lines += readlines_from_url(arg) + else: + input_lines += readlines_from_file(arg) else: - input_lines = readlines_from_stdin() + input_lines += readlines_from_stdin() if len(input_lines) == 0: print >> sys.stderr, "Nothing to do, standard input is empty, exiting." @@ -676,6 +731,8 @@ if ('non_interactive' in config or 'text_stats' in config): codecs.getwriter("utf-8")(sys.stdout).write("".join(output)) + elif config.get('web', False): + web_editor(output) else: (_, temp1) = tempfile.mkstemp(prefix='new-word') (_, temp2) = tempfile.mkstemp(prefix='new-word') @@ -759,6 +816,9 @@ if options.no_words_grouping: config['no_words_grouping'] = True +if options.web: + config['web'] = True + filter_get_words_group_words_add_stat(args) #if options.function: