├── .gitignore ├── LICENSE ├── README.md ├── dataset └── .gitkeep ├── docs └── images │ ├── ELMo.png │ ├── elmo_classification_accuracy.png │ ├── lstm_model.png │ └── model.png ├── elmo_experiment └── notebooks │ └── elmo_text_classification_on_imdb.ipynb └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # idea 107 | .idea/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Junya Kamura 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # elmo_experiments 2 | This repository is experiments of ELMo that deep contextualized word representation by Keras. 3 | 4 | 5 | 6 | 7 | ELMo is contextualized word representations using character-based word representations and bidirectional LSTMs. 8 | TensorFlow hub provides ELMo module that was trained on the 1 Billion Word Benchmark. 9 | 10 | # Getting start 11 | ``` 12 | $ pip install -r requirements.txt 13 | ``` 14 | NOTE:If use tensorflow with cpu support, need installing that by your self. 15 | 16 | # Experiments 17 | ## Text classification on IMDB 18 | This experiments is text classification with ELMo, LSTM and word embedding on IMDB dataset. 19 | Evaluate ELMo text classification and compare that to LSTM model. 20 | 21 | https://github.com/kamujun/elmo_experiments/blob/master/elmo_experiment/notebooks/elmo_text_classification_on_imdb.ipynb 22 | 23 | ## Result 24 | ELMo's 1st epoch get best score. 25 | 26 | 27 | -------------------------------------------------------------------------------- /dataset/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamujun/elmo_experiments/6a67249e180a72b5ff7083f2480e78142e11935f/dataset/.gitkeep -------------------------------------------------------------------------------- /docs/images/ELMo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamujun/elmo_experiments/6a67249e180a72b5ff7083f2480e78142e11935f/docs/images/ELMo.png -------------------------------------------------------------------------------- /docs/images/elmo_classification_accuracy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamujun/elmo_experiments/6a67249e180a72b5ff7083f2480e78142e11935f/docs/images/elmo_classification_accuracy.png -------------------------------------------------------------------------------- /docs/images/lstm_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamujun/elmo_experiments/6a67249e180a72b5ff7083f2480e78142e11935f/docs/images/lstm_model.png -------------------------------------------------------------------------------- /docs/images/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamujun/elmo_experiments/6a67249e180a72b5ff7083f2480e78142e11935f/docs/images/model.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.2.2 2 | appnope==0.1.0 3 | astor==0.6.2 4 | backcall==0.1.0 5 | bleach==1.5.0 6 | colorama==0.3.9 7 | cycler==0.10.0 8 | decorator==4.3.0 9 | entrypoints==0.2.3 10 | gast==0.2.0 11 | grpcio==1.12.0 12 | h5py==2.8.0rc1 13 | html5lib==0.9999999 14 | ipykernel==4.8.2 15 | ipython==6.4.0 16 | ipython-genutils==0.2.0 17 | ipywidgets==7.2.1 18 | jedi==0.12.0 19 | Jinja2==2.10 20 | joblib==0.11 21 | jsonschema==2.6.0 22 | jupyter==1.0.0 23 | jupyter-client==5.2.3 24 | jupyter-console==5.2.0 25 | jupyter-core==4.4.0 26 | Keras==2.1.6 27 | kiwisolver==1.0.1 28 | Markdown==2.6.11 29 | MarkupSafe==1.0 30 | matplotlib==2.2.2 31 | memory-profiler==0.52.0 32 | mistune==0.8.3 33 | nbconvert==5.3.1 34 | nbformat==4.4.0 35 | notebook==5.5.0 36 | numpy==1.14.3 37 | pandas==0.23.0 38 | pandocfilters==1.4.2 39 | parso==0.2.1 40 | pexpect==4.5.0 41 | pickleshare==0.7.4 42 | prompt-toolkit==1.0.15 43 | protobuf==3.5.2.post1 44 | psutil==5.4.5 45 | ptyprocess==0.5.2 46 | pydot==1.2.4 47 | Pygments==2.2.0 48 | pyparsing==2.2.0 49 | python-dateutil==2.7.3 50 | pytz==2018.4 51 | pywinpty==0.5.1 52 | PyYAML==3.12 53 | pyzmq==17.0.0 54 | qtconsole==4.3.1 55 | scipy==1.1.0 56 | seaborn==0.8.1 57 | Send2Trash==1.5.0 58 | simplegeneric==0.8.1 59 | six==1.11.0 60 | tensorboard==1.8.0 61 | tensorflow-gpu==1.8.0 62 | tensorflow-hub==0.1.0 63 | termcolor==1.1.0 64 | terminado==0.8.1 65 | testpath==0.3.1 66 | tornado==5.0.2 67 | traitlets==4.3.2 68 | wcwidth==0.1.7 69 | Werkzeug==0.14.1 70 | widgetsnbextension==3.2.1 71 | --------------------------------------------------------------------------------