new-words

diff new-words.py @ 67:87bb1c5e6616

added de script to misc/
author Igor Chubin <igor@chub.in>
date Wed Mar 28 15:54:30 2012 +0200 (2012-03-28)
parents 1b8b30ad7c95
children 846240941452
line diff
     1.1 --- a/new-words.py	Sat Nov 12 14:03:20 2011 +0100
     1.2 +++ b/new-words.py	Wed Mar 28 15:54:30 2012 +0200
     1.3 @@ -36,7 +36,10 @@
     1.4              'it' : 'italian',
     1.5              'uk' : 'ukrainian',
     1.6          }
     1.7 -        self.stemmer = Stemmer.Stemmer(stemmer_algorithm[language])
     1.8 +        try:
     1.9 +            self.stemmer = Stemmer.Stemmer(stemmer_algorithm[language])
    1.10 +        except:
    1.11 +            self.stemmer = None
    1.12          self.linked_words = linked_words
    1.13  
    1.14      def normalize(self, word):
    1.15 @@ -44,7 +47,10 @@
    1.16          while word in self.linked_words and not word in word_chain:
    1.17              word_chain.append(word)
    1.18              word = self.linked_words[word]
    1.19 -        return self.stemmer.stemWord(word.lower())
    1.20 +        if self.stemmer:
    1.21 +            return self.stemmer.stemWord(word.lower())
    1.22 +        else:
    1.23 +            return word.lower()
    1.24  
    1.25      def best_word_from_group(self, wordpairs_group):
    1.26          """Returns the word that is the most relevant to the wordpairs_group.
    1.27 @@ -239,6 +245,12 @@
    1.28      dest="vocabulary_filename")
    1.29  
    1.30  parser.add_option(
    1.31 +    "-w", "--web",
    1.32 +    help="Web browser version",
    1.33 +    action="store_true",
    1.34 +    dest="web")
    1.35 +
    1.36 +parser.add_option(
    1.37      "-2", "--two-words",
    1.38      help="find 2 words' sequences",
    1.39      action="store_true",
    1.40 @@ -571,17 +583,60 @@
    1.41              result += [lines[i]]
    1.42      return result
    1.43  
    1.44 +def web_editor(output):
    1.45 +    from twisted.internet import reactor
    1.46 +    from twisted.web.server import Site
    1.47 +    from twisted.web.static import File
    1.48 +    from twisted.web.resource import Resource
    1.49 +    import json
    1.50 +
    1.51 +    word_list = []
    1.52 +
    1.53 +    for o in output:
    1.54 +        a = re.split('\s+', o.strip(), 2)
    1.55 +        a = a + ['']*(3-len(a))
    1.56 +        word_list.append({'number':a[0], 'word':a[1], 'comment':a[2]})
    1.57 +    
    1.58 +    print "Loaded ", len(word_list)
    1.59 +
    1.60 +    new_words_html = "/home/igor/hg/new-words/web"
    1.61 +
    1.62 +    class JSONPage(Resource):
    1.63 +        isLeaf = True
    1.64 +        def render_GET(self, request):
    1.65 +            return json.dumps({"word_list": word_list})
    1.66 +
    1.67 +    class SaveJSON(Resource):
    1.68 +        isLeaf = True
    1.69 +        def render_POST(self, request):
    1.70 +            print json.loads(request.args["selected_words"][0])
    1.71 +            return json.dumps({"status": "ok"})
    1.72 +
    1.73 +    json_page = JSONPage()
    1.74 +    save_json = SaveJSON()
    1.75 +
    1.76 +    resource = File(new_words_html)
    1.77 +    resource.putChild("json", json_page)
    1.78 +    resource.putChild("save", save_json)
    1.79 +
    1.80 +    factory = Site(resource)
    1.81 +    reactor.listenTCP(8880, factory)
    1.82 +    reactor.run()
    1.83 +
    1.84 +
    1.85  def filter_get_words_group_words_add_stat(args):
    1.86      vocabulary = load_vocabulary()
    1.87      notes = load_notes(notes_filenames())
    1.88  
    1.89 +    input_lines = []
    1.90      if len(args) > 0:
    1.91 -        if 'http://' in args[0]:
    1.92 -            input_lines = readlines_from_url(args[0])
    1.93 -        else:
    1.94 -            input_lines = readlines_from_file(args[0])
    1.95 +        for arg in args:
    1.96 +            if 'http://' in arg:
    1.97 +                input_lines += readlines_from_url(arg)
    1.98 +            else:
    1.99 +                input_lines += readlines_from_file(arg)
   1.100      else:
   1.101 -        input_lines = readlines_from_stdin()
   1.102 +        input_lines += readlines_from_stdin()
   1.103  
   1.104      if len(input_lines) == 0:
   1.105          print >> sys.stderr, "Nothing to do, standard input is empty, exiting."
   1.106 @@ -676,6 +731,8 @@
   1.107  
   1.108      if ('non_interactive' in config or 'text_stats' in config):
   1.109          codecs.getwriter("utf-8")(sys.stdout).write("".join(output))
   1.110 +    elif config.get('web', False):
   1.111 +        web_editor(output)
   1.112      else:
   1.113          (_, temp1) = tempfile.mkstemp(prefix='new-word')
   1.114          (_, temp2) = tempfile.mkstemp(prefix='new-word')
   1.115 @@ -759,6 +816,9 @@
   1.116  if options.no_words_grouping:
   1.117      config['no_words_grouping'] = True
   1.118  
   1.119 +if options.web:
   1.120 +    config['web'] = True
   1.121 +
   1.122  filter_get_words_group_words_add_stat(args)
   1.123  
   1.124  #if options.function: