From 330f535a10243886ae4735827131a6d533127fa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Scharffe?= Date: Tue, 3 Nov 2015 16:29:07 -0500 Subject: [PATCH 1/3] updating GloVe links and adding 840B corpus --- README.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 3bfa491..2678428 100644 --- a/README.md +++ b/README.md @@ -29,14 +29,15 @@ Please feel free to submit additions to this list through a pull request. | [Google News](GoogleNews-vectors-negative300.bin.gz) | 300 |Google News (100B) | 3M | Google | word2vec | negative sampling | BoW - ~5| [link](http://code.google.com/p/word2vec/) | | [Freebase IDs](https://docs.google.com/file/d/0B7XkCwpI5KDYaDBDQm1tZGNDRHc/edit?usp=sharing) | 1000 | Gooogle News (100B) | 1.4M | Google | word2vec, skip-gram | ? | BoW - ~10 | [link](http://code.google.com/p/word2vec/) | | [Freebase names](https://docs.google.com/file/d/0B7XkCwpI5KDYeFdmcVltWkhtbmM/edit?usp=sharing) | 1000 | Gooogle News (100B) | 1.4M | Google | word2vec, skip-gram | ? | BoW - ~10 | [link](http://code.google.com/p/word2vec/) | -| [Wikipedia+Gigaword 5](http://www-nlp.stanford.edu/data/glove.6B.50d.txt.gz) | 50 | Wikipedia+Gigaword 5 (6B) | 400,000 | GloVe | GloVe | AdaGrad | 10+10 | [link](http://nlp.stanford.edu/projects/glove/) | -| [Wikipedia+Gigaword 5](http://www-nlp.stanford.edu/data/glove.6B.100d.txt.gz) | 100 | Wikipedia+Gigaword 5 (6B) | 400,000 | GloVe | GloVe | AdaGrad | 10+10 | [link](http://nlp.stanford.edu/projects/glove/) | -| [Wikipedia+Gigaword 5](http://www-nlp.stanford.edu/data/glove.6B.200d.txt.gz) | 200 | Wikipedia+Gigaword 5 (6B) | 400,000 | GloVe | GloVe | AdaGrad | 10+10 | [link](http://nlp.stanford.edu/projects/glove/) | -| [Wikipedia+Gigaword 5](http://www-nlp.stanford.edu/data/glove.6B.300d.txt.gz) | 300 | Wikipedia+Gigaword 5 (6B) | 400,000 | GloVe | GloVe | AdaGrad | 10+10 | [link](http://nlp.stanford.edu/projects/glove/) | -| [Common Crawl 42B](http://www-nlp.stanford.edu/data/glove.42B.300d.txt.gz) | 300 | Common Crawl (42B) | ~2M | GloVe | GloVe | GloVe | AdaGrad | [link](http://nlp.stanford.edu/projects/glove/) | -| [Twitter (2B Tweets)](http://www-nlp.stanford.edu/data/glove.twitter.27B.25d.txt.gz) | 25 | Twitter (27B) | ? | GloVe | GloVe | GloVe | AdaGrad | [link](http://nlp.stanford.edu/projects/glove/) | -| [Twitter (2B Tweets)](http://www-nlp.stanford.edu/data/glove.twitter.27B.50d.txt.gz) | 50 | Twitter (27B) | ? | GloVe | GloVe | GloVe | AdaGrad | [link](http://nlp.stanford.edu/projects/glove/) | -| [Twitter (2B Tweets)](http://www-nlp.stanford.edu/data/glove.twitter.27B.100d.txt.gz) | 100 | Twitter (27B) | ? | GloVe | GloVe | GloVe | AdaGrad | [link](http://nlp.stanford.edu/projects/glove/) | -| [Twitter (2B Tweets)](http://www-nlp.stanford.edu/data/glove.twitter.27B.200d.txt.gz) | 200 | Twitter (27B) | ? | GloVe | GloVe | GloVe | AdaGrad | [link](http://nlp.stanford.edu/projects/glove/) | +| [Wikipedia+Gigaword 5](http://nlp.stanford.edu/data/glove.6B.zip) | 50 | Wikipedia+Gigaword 5 (6B) | 400,000 | GloVe | GloVe | AdaGrad | 10+10 | [link](http://nlp.stanford.edu/projects/glove/) | +| [Wikipedia+Gigaword 5](http://nlp.stanford.edu/data/glove.6B.zip) | 100 | Wikipedia+Gigaword 5 (6B) | 400,000 | GloVe | GloVe | AdaGrad | 10+10 | [link](http://nlp.stanford.edu/projects/glove/) | +| [Wikipedia+Gigaword 5](http://nlp.stanford.edu/data/glove.6B.zip) | 200 | Wikipedia+Gigaword 5 (6B) | 400,000 | GloVe | GloVe | AdaGrad | 10+10 | [link](http://nlp.stanford.edu/projects/glove/) | +| [Wikipedia+Gigaword 5](http://nlp.stanford.edu/data/glove.6B.zip) | 300 | Wikipedia+Gigaword 5 (6B) | 400,000 | GloVe | GloVe | AdaGrad | 10+10 | [link](http://nlp.stanford.edu/projects/glove/) | +| [Common Crawl 42B](http://nlp.stanford.edu/data/glove.42B.300d.zip) | 300 | Common Crawl (42B) | 1.9M | GloVe | GloVe | GloVe | AdaGrad | [link](http://nlp.stanford.edu/projects/glove/) | +| [Common Crawl 840B](http://nlp.stanford.edu/data/glove.840B.300d.zip) | 300 | Common Crawl (840B) | 2.2M | GloVe | GloVe | GloVe | AdaGrad | [link](http://nlp.stanford.edu/projects/glove/) | +| [Twitter (2B Tweets)](http://www-nlp.stanford.edu/data/glove.twitter.27B.zip) | 25 | Twitter (27B) | ? | GloVe | GloVe | GloVe | AdaGrad | [link](http://nlp.stanford.edu/projects/glove/) | +| [Twitter (2B Tweets)](http://www-nlp.stanford.edu/data/glove.twitter.27B.zip) | 50 | Twitter (27B) | ? | GloVe | GloVe | GloVe | AdaGrad | [link](http://nlp.stanford.edu/projects/glove/) | +| [Twitter (2B Tweets)](http://www-nlp.stanford.edu/data/glove.twitter.27B.zip) | 100 | Twitter (27B) | ? | GloVe | GloVe | GloVe | AdaGrad | [link](http://nlp.stanford.edu/projects/glove/) | +| [Twitter (2B Tweets)](http://www-nlp.stanford.edu/data/glove.twitter.27B.zip) | 200 | Twitter (27B) | ? | GloVe | GloVe | GloVe | AdaGrad | [link](http://nlp.stanford.edu/projects/glove/) | | [Wikipedia dependency](http://u.cs.biu.ac.il/~yogo/data/syntemb/deps.words.bz2) | 300 | Wikipedia (?) | 174,015 | Levy \& Goldberg | word2vec modified | word2vec | syntactic dependencies | [link](https://levyomer.wordpress.com/2014/04/25/dependency-based-word-embeddings/) | | [DBPedia vectors](https://github.com/idio/wiki2vec/raw/master/torrents/enwiki-gensim-word2vec-1000-nostem-10cbow.torrent) | 1000 | Wikipedia (?) | ? | wiki2vec | word2vec | word2vec, skip-gram | BoW, 10 | [link](https://github.com/idio/wiki2vec#prebuilt-models) | From 71b331a435ef9959ca2a6d2ccd89f8232fbdc8bc Mon Sep 17 00:00:00 2001 From: lechatpito Date: Tue, 24 Nov 2015 17:22:41 +0000 Subject: [PATCH 2/3] Adding a path argument for the service --- word2vec-api.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/word2vec-api.py b/word2vec-api.py index 6c2c4d7..d586043 100644 --- a/word2vec-api.py +++ b/word2vec-api.py @@ -89,11 +89,6 @@ def pageNotFound(error): def raiseError(error): return error -api.add_resource(N_Similarity, '/word2vec/n_similarity') -api.add_resource(Similarity, '/word2vec/similarity') -api.add_resource(MostSimilar, '/word2vec/most_similar') -api.add_resource(Model, '/word2vec/model') - if __name__ == '__main__': global model @@ -103,13 +98,20 @@ def raiseError(error): p.add_argument("--binary", help="Specifies the loaded model is binary") p.add_argument("--host", help="Host name (default: localhost)") p.add_argument("--port", help="Port (default: 5000)") + p.add_argument("--path", help="Path (default: /word2vec)") args = p.parse_args() model_path = args.model if args.model else "./model.bin.gz" binary = True if args.binary else False host = args.host if args.host else "localhost" + path = args.path if args.path else "/word2vec" port = int(args.port) if args.port else 5000 if not args.model: - print "Usage: word2vec-apy.py --model path/to/the/model [--host host --port 1234]" + print "Usage: word2vec-apy.py --model path/to/the/model [--host host --path /path --port 1234]" model = w.load_word2vec_format(model_path, binary=binary) + api.add_resource(N_Similarity, path+'/n_similarity') + api.add_resource(Similarity, path+'/similarity') + api.add_resource(MostSimilar, path+'/most_similar') + api.add_resource(Model, path+'/model') + app.run(host=host, port=port) From a1274803497f9eb22533af0579c26042b948d118 Mon Sep 17 00:00:00 2001 From: lechatpito Date: Tue, 24 Nov 2015 20:28:03 +0000 Subject: [PATCH 3/3] adding requirements.txt --- requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5731a8c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +Flask==0.10.1 +Flask-RESTful==0.2.12 +gensim==0.12.3 +