├── .editorconfig
├── .github
    └── ISSUE_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── AUTHORS.rst
├── CONTRIBUTING.rst
├── HISTORY.rst
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.rst
├── docs
    ├── Makefile
    ├── authors.rst
    ├── conf.py
    ├── contributing.rst
    ├── demo.jpg
    ├── history.rst
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    ├── readme.rst
    └── usage.rst
├── examples
    ├── config.json
    ├── fake_pytorch_model.bin
    ├── labels.txt
    ├── model_store
    │   └── .gitkeep
    ├── serve.ipynb
    ├── serve_pretrained.ipynb
    ├── start_demo.ipynb
    └── torchserve.config
├── lit_ner
    ├── __init__.py
    ├── lit_ner.py
    ├── serve.py
    ├── serve_pretrained.py
    └── utils.py
├── requirements.txt
├── requirements_dev.txt
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    └── test_lit_ner.py
└── tox.ini


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | charset = utf-8
11 | end_of_line = lf
12 | 
13 | [*.bat]
14 | indent_style = tab
15 | end_of_line = crlf
16 | 
17 | [LICENSE]
18 | insert_final_newline = false
19 | 
20 | [Makefile]
21 | indent_style = tab
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | * lit-NER version:
 2 | * Python version:
 3 | * Operating System:
 4 | 
 5 | ### Description
 6 | 
 7 | Describe what you were trying to get done.
 8 | Tell us what happened, what went wrong, and what you expected to happen.
 9 | 
10 | ### What I Did
11 | 
12 | ```
13 | Paste the command(s) you ran and the output.
14 | If there was a crash, please include the traceback here.
15 | ```
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # dotenv
 84 | .env
 85 | 
 86 | # virtualenv
 87 | .venv
 88 | venv/
 89 | ENV/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | # IDE settings
105 | .vscode/


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Config file for automatic testing at travis-ci.com
 2 | 
 3 | language: python
 4 | python:
 5 |   - 3.8
 6 |   - 3.7
 7 |   - 3.6
 8 |   - 3.5
 9 | 
10 | # Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
11 | install: pip install -U tox-travis
12 | 
13 | # Command to run tests, e.g. python setup.py test
14 | script: tox
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Credits
 3 | =======
 4 | 
 5 | Development Lead
 6 | ----------------
 7 | 
 8 | * Ceyda Cinarel <15624271+cceyda@users.noreply.github.com>
 9 | 
10 | Contributors
11 | ------------
12 | 
13 | None yet. Why not be the first?
14 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | .. highlight:: shell
  2 | 
  3 | ============
  4 | Contributing
  5 | ============
  6 | 
  7 | Contributions are welcome, and they are greatly appreciated! Every little bit
  8 | helps, and credit will always be given.
  9 | 
 10 | You can contribute in many ways:
 11 | 
 12 | Types of Contributions
 13 | ----------------------
 14 | 
 15 | Report Bugs
 16 | ~~~~~~~~~~~
 17 | 
 18 | Report bugs at https://github.com/cceyda/lit_ner/issues.
 19 | 
 20 | If you are reporting a bug, please include:
 21 | 
 22 | * Your operating system name and version.
 23 | * Any details about your local setup that might be helpful in troubleshooting.
 24 | * Detailed steps to reproduce the bug.
 25 | 
 26 | Fix Bugs
 27 | ~~~~~~~~
 28 | 
 29 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
 30 | wanted" is open to whoever wants to implement it.
 31 | 
 32 | Implement Features
 33 | ~~~~~~~~~~~~~~~~~~
 34 | 
 35 | Look through the GitHub issues for features. Anything tagged with "enhancement"
 36 | and "help wanted" is open to whoever wants to implement it.
 37 | 
 38 | Write Documentation
 39 | ~~~~~~~~~~~~~~~~~~~
 40 | 
 41 | lit-NER could always use more documentation, whether as part of the
 42 | official lit-NER docs, in docstrings, or even on the web in blog posts,
 43 | articles, and such.
 44 | 
 45 | Submit Feedback
 46 | ~~~~~~~~~~~~~~~
 47 | 
 48 | The best way to send feedback is to file an issue at https://github.com/cceyda/lit_ner/issues.
 49 | 
 50 | If you are proposing a feature:
 51 | 
 52 | * Explain in detail how it would work.
 53 | * Keep the scope as narrow as possible, to make it easier to implement.
 54 | * Remember that this is a volunteer-driven project, and that contributions
 55 |   are welcome :)
 56 | 
 57 | Get Started!
 58 | ------------
 59 | 
 60 | Ready to contribute? Here's how to set up `lit_ner` for local development.
 61 | 
 62 | 1. Fork the `lit_ner` repo on GitHub.
 63 | 2. Clone your fork locally::
 64 | 
 65 |     $ git clone git@github.com:your_name_here/lit_ner.git
 66 | 
 67 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
 68 | 
 69 |     $ mkvirtualenv lit_ner
 70 |     $ cd lit_ner/
 71 |     $ python setup.py develop
 72 | 
 73 | 4. Create a branch for local development::
 74 | 
 75 |     $ git checkout -b name-of-your-bugfix-or-feature
 76 | 
 77 |    Now you can make your changes locally.
 78 | 
 79 | 5. When you're done making changes, check that your changes pass flake8 and the
 80 |    tests, including testing other Python versions with tox::
 81 | 
 82 |     $ flake8 lit_ner tests
 83 |     $ python setup.py test or pytest
 84 |     $ tox
 85 | 
 86 |    To get flake8 and tox, just pip install them into your virtualenv.
 87 | 
 88 | 6. Commit your changes and push your branch to GitHub::
 89 | 
 90 |     $ git add .
 91 |     $ git commit -m "Your detailed description of your changes."
 92 |     $ git push origin name-of-your-bugfix-or-feature
 93 | 
 94 | 7. Submit a pull request through the GitHub website.
 95 | 
 96 | Pull Request Guidelines
 97 | -----------------------
 98 | 
 99 | Before you submit a pull request, check that it meets these guidelines:
100 | 
101 | 1. The pull request should include tests.
102 | 2. If the pull request adds functionality, the docs should be updated. Put
103 |    your new functionality into a function with a docstring, and add the
104 |    feature to the list in README.rst.
105 | 3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. Check
106 |    https://travis-ci.com/cceyda/lit_ner/pull_requests
107 |    and make sure that the tests pass for all supported Python versions.
108 | 
109 | Tips
110 | ----
111 | 
112 | To run a subset of tests::
113 | 
114 | 
115 |     $ python -m unittest tests.test_lit_ner
116 | 
117 | Deploying
118 | ---------
119 | 
120 | A reminder for the maintainers on how to deploy.
121 | Make sure all your changes are committed (including an entry in HISTORY.rst).
122 | Then run::
123 | 
124 | $ bump2version patch # possible: major / minor / patch
125 | $ git push
126 | $ git push --tags
127 | 
128 | Travis will then deploy to PyPI if tests pass.
129 | 


--------------------------------------------------------------------------------
/HISTORY.rst:
--------------------------------------------------------------------------------
1 | =======
2 | History
3 | =======
4 | 
5 | 0.1.0 (2020-09-18)
6 | ------------------
7 | 
8 | * First release on PyPI.
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Apache Software License 2.0
 2 | 
 3 | Copyright (c) 2020, Ceyda Cinarel
 4 | 
 5 | Licensed under the Apache License, Version 2.0 (the "License");
 6 | you may not use this file except in compliance with the License.
 7 | You may obtain a copy of the License at
 8 | 
 9 | http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 | 
17 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include AUTHORS.rst
 2 | include CONTRIBUTING.rst
 3 | include HISTORY.rst
 4 | include LICENSE
 5 | include README.rst
 6 | 
 7 | recursive-include tests *
 8 | recursive-exclude * __pycache__
 9 | recursive-exclude * *.py[co]
10 | 
11 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
12 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean clean-test clean-pyc clean-build docs help
 2 | .DEFAULT_GOAL := help
 3 | 
 4 | define BROWSER_PYSCRIPT
 5 | import os, webbrowser, sys
 6 | 
 7 | from urllib.request import pathname2url
 8 | 
 9 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
10 | endef
11 | export BROWSER_PYSCRIPT
12 | 
13 | define PRINT_HELP_PYSCRIPT
14 | import re, sys
15 | 
16 | for line in sys.stdin:
17 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
18 | 	if match:
19 | 		target, help = match.groups()
20 | 		print("%-20s %s" % (target, help))
21 | endef
22 | export PRINT_HELP_PYSCRIPT
23 | 
24 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
25 | 
26 | help:
27 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
28 | 
29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
30 | 
31 | clean-build: ## remove build artifacts
32 | 	rm -fr build/
33 | 	rm -fr dist/
34 | 	rm -fr .eggs/
35 | 	find . -name '*.egg-info' -exec rm -fr {} +
36 | 	find . -name '*.egg' -exec rm -f {} +
37 | 
38 | clean-pyc: ## remove Python file artifacts
39 | 	find . -name '*.pyc' -exec rm -f {} +
40 | 	find . -name '*.pyo' -exec rm -f {} +
41 | 	find . -name '*~' -exec rm -f {} +
42 | 	find . -name '__pycache__' -exec rm -fr {} +
43 | 
44 | clean-test: ## remove test and coverage artifacts
45 | 	rm -fr .tox/
46 | 	rm -f .coverage
47 | 	rm -fr htmlcov/
48 | 	rm -fr .pytest_cache
49 | 
50 | lint: ## check style with flake8
51 | 	flake8 lit_ner tests
52 | 
53 | test: ## run tests quickly with the default Python
54 | 	python setup.py test
55 | 
56 | test-all: ## run tests on every Python version with tox
57 | 	tox
58 | 
59 | coverage: ## check code coverage quickly with the default Python
60 | 	coverage run --source lit_ner setup.py test
61 | 	coverage report -m
62 | 	coverage html
63 | 	$(BROWSER) htmlcov/index.html
64 | 
65 | docs: ## generate Sphinx HTML documentation, including API docs
66 | 	rm -f docs/lit_ner.rst
67 | 	rm -f docs/modules.rst
68 | 	sphinx-apidoc -o docs/ lit_ner
69 | 	$(MAKE) -C docs clean
70 | 	$(MAKE) -C docs html
71 | 	$(BROWSER) docs/_build/html/index.html
72 | 
73 | servedocs: docs ## compile the docs watching for changes
74 | 	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
75 | 
76 | release: dist ## package and upload a release
77 | 	twine upload dist/*
78 | 
79 | dist: clean ## builds source and wheel package
80 | 	python setup.py sdist
81 | 	python setup.py bdist_wheel
82 | 	ls -l dist
83 | 
84 | install: clean ## install the package to the active Python's site-packages
85 | 	python setup.py install
86 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | lit-NER
 3 | =======
 4 | 
 5 | Streamlit demo for HuggingFace NER models
 6 | 
 7 | Public demo_
 8 | 
 9 | .. _demo: https://share.streamlit.io/cceyda/lit-ner/public/lit_ner.py
10 | 
11 | .. image:: docs/demo.jpg
12 |   :width: 200
13 |   :alt: Demo view
14 | 
15 | *****
16 | Requirements
17 | *****
18 | 
19 | spacy>=2.3.2
20 | 
21 | streamlit>=0.62.1
22 | 
23 | transformers>=3.1.0
24 | 
25 | torchserve_
26 | 
27 | .. _torchserve: http://pytorch.org/serve/install.html
28 | 
29 | (might also work with lower versions...not tested)
30 | 
31 | *****
32 | How To
33 | *****
34 | 
35 | :code:`git clone https://github.com/cceyda/lit-NER.git`
36 | 
37 | Serve a model using torchserve
38 | ########
39 | 
40 | If you don't have a NER model use :code:`examples/serve_pretrained.ipynb`
41 |  
42 | OR
43 | 
44 | If you have a pretrained model use :code:`examples/serve.ipynb`
45 |  
46 | Start the Streamlit Demo 
47 | ########
48 | 
49 | :code:`examples/start_demo.ipynb`
50 | 
51 | 
52 | Features
53 | --------
54 | 
55 | * TODO
56 | 
57 | Credits
58 | -------
59 | 
60 | This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template.
61 | 
62 | * Free software: Apache Software License 2.0
63 | 
64 | .. _Cookiecutter: https://github.com/audreyr/cookiecutter
65 | .. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage
66 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = lit_ner
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/authors.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../AUTHORS.rst
2 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # lit_ner documentation build configuration file, created by
  4 | # sphinx-quickstart on Fri Jun  9 13:47:02 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another
 16 | # directory, add these directories to sys.path here. If the directory is
 17 | # relative to the documentation root, use os.path.abspath to make it
 18 | # absolute, like shown here.
 19 | #
 20 | import os
 21 | import sys
 22 | 
 23 | sys.path.insert(0, os.path.abspath(".."))
 24 | 
 25 | import lit_ner
 26 | 
 27 | # -- General configuration ---------------------------------------------
 28 | 
 29 | # If your documentation needs a minimal Sphinx version, state it here.
 30 | #
 31 | # needs_sphinx = '1.0'
 32 | 
 33 | # Add any Sphinx extension module names here, as strings. They can be
 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 35 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"]
 36 | 
 37 | # Add any paths that contain templates here, relative to this directory.
 38 | templates_path = ["_templates"]
 39 | 
 40 | # The suffix(es) of source filenames.
 41 | # You can specify multiple suffix as a list of string:
 42 | #
 43 | # source_suffix = ['.rst', '.md']
 44 | source_suffix = ".rst"
 45 | 
 46 | # The master toctree document.
 47 | master_doc = "index"
 48 | 
 49 | # General information about the project.
 50 | project = "lit-NER"
 51 | copyright = "2020, Ceyda Cinarel"
 52 | author = "Ceyda Cinarel"
 53 | 
 54 | # The version info for the project you're documenting, acts as replacement
 55 | # for |version| and |release|, also used in various other places throughout
 56 | # the built documents.
 57 | #
 58 | # The short X.Y version.
 59 | version = lit_ner.__version__
 60 | # The full version, including alpha/beta/rc tags.
 61 | release = lit_ner.__version__
 62 | 
 63 | # The language for content autogenerated by Sphinx. Refer to documentation
 64 | # for a list of supported languages.
 65 | #
 66 | # This is also used if you do content translation via gettext catalogs.
 67 | # Usually you set "language" from the command line for these cases.
 68 | language = None
 69 | 
 70 | # List of patterns, relative to source directory, that match files and
 71 | # directories to ignore when looking for source files.
 72 | # This patterns also effect to html_static_path and html_extra_path
 73 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 74 | 
 75 | # The name of the Pygments (syntax highlighting) style to use.
 76 | pygments_style = "sphinx"
 77 | 
 78 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 79 | todo_include_todos = False
 80 | 
 81 | 
 82 | # -- Options for HTML output -------------------------------------------
 83 | 
 84 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 85 | # a list of builtin themes.
 86 | #
 87 | html_theme = "alabaster"
 88 | 
 89 | # Theme options are theme-specific and customize the look and feel of a
 90 | # theme further.  For a list of options available for each theme, see the
 91 | # documentation.
 92 | #
 93 | # html_theme_options = {}
 94 | 
 95 | # Add any paths that contain custom static files (such as style sheets) here,
 96 | # relative to this directory. They are copied after the builtin static files,
 97 | # so a file named "default.css" will overwrite the builtin "default.css".
 98 | html_static_path = ["_static"]
 99 | 
100 | 
101 | # -- Options for HTMLHelp output ---------------------------------------
102 | 
103 | # Output file base name for HTML help builder.
104 | htmlhelp_basename = "lit_nerdoc"
105 | 
106 | 
107 | # -- Options for LaTeX output ------------------------------------------
108 | 
109 | latex_elements = {
110 |     # The paper size ('letterpaper' or 'a4paper').
111 |     #
112 |     # 'papersize': 'letterpaper',
113 |     # The font size ('10pt', '11pt' or '12pt').
114 |     #
115 |     # 'pointsize': '10pt',
116 |     # Additional stuff for the LaTeX preamble.
117 |     #
118 |     # 'preamble': '',
119 |     # Latex figure (float) alignment
120 |     #
121 |     # 'figure_align': 'htbp',
122 | }
123 | 
124 | # Grouping the document tree into LaTeX files. List of tuples
125 | # (source start file, target name, title, author, documentclass
126 | # [howto, manual, or own class]).
127 | latex_documents = [
128 |     (master_doc, "lit_ner.tex", "lit-NER Documentation", "Ceyda Cinarel", "manual"),
129 | ]
130 | 
131 | 
132 | # -- Options for manual page output ------------------------------------
133 | 
134 | # One entry per manual page. List of tuples
135 | # (source start file, name, description, authors, manual section).
136 | man_pages = [(master_doc, "lit_ner", "lit-NER Documentation", [author], 1)]
137 | 
138 | 
139 | # -- Options for Texinfo output ----------------------------------------
140 | 
141 | # Grouping the document tree into Texinfo files. List of tuples
142 | # (source start file, target name, title, author,
143 | #  dir menu entry, description, category)
144 | texinfo_documents = [
145 |     (
146 |         master_doc,
147 |         "lit_ner",
148 |         "lit-NER Documentation",
149 |         author,
150 |         "lit_ner",
151 |         "One line description of project.",
152 |         "Miscellaneous",
153 |     ),
154 | ]
155 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/docs/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cceyda/lit-NER/4a43c193c571c310376e63c4239d80bc94814db2/docs/demo.jpg


--------------------------------------------------------------------------------
/docs/history.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../HISTORY.rst
2 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to lit-NER's documentation!
 2 | ======================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 |    :caption: Contents:
 7 | 
 8 |    readme
 9 |    installation
10 |    usage
11 |    modules
12 |    contributing
13 |    authors
14 |    history
15 | 
16 | Indices and tables
17 | ==================
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | .. highlight:: shell
 2 | 
 3 | ============
 4 | Installation
 5 | ============
 6 | 
 7 | 
 8 | Stable release
 9 | --------------
10 | 
11 | To install lit-NER, run this command in your terminal:
12 | 
13 | .. code-block:: console
14 | 
15 |     $ pip install lit_ner
16 | 
17 | This is the preferred method to install lit-NER, as it will always install the most recent stable release.
18 | 
19 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide
20 | you through the process.
21 | 
22 | .. _pip: https://pip.pypa.io
23 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
24 | 
25 | 
26 | From sources
27 | ------------
28 | 
29 | The sources for lit-NER can be downloaded from the `Github repo`_.
30 | 
31 | You can either clone the public repository:
32 | 
33 | .. code-block:: console
34 | 
35 |     $ git clone git://github.com/cceyda/lit_ner
36 | 
37 | Or download the `tarball`_:
38 | 
39 | .. code-block:: console
40 | 
41 |     $ curl -OJL https://github.com/cceyda/lit_ner/tarball/master
42 | 
43 | Once you have a copy of the source, you can install it with:
44 | 
45 | .. code-block:: console
46 | 
47 |     $ python setup.py install
48 | 
49 | 
50 | .. _Github repo: https://github.com/cceyda/lit_ner
51 | .. _tarball: https://github.com/cceyda/lit_ner/tarball/master
52 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=python -msphinx
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=lit_ner
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | 	echo.then set the SPHINXBUILD environment variable to point to the full
21 | 	echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | 	echo.Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | =====
2 | Usage
3 | =====
4 | 
5 | To use lit-NER in a project::
6 | 
7 |     import lit_ner
8 | 


--------------------------------------------------------------------------------
/examples/config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "model_name":"dslim/bert-base-NER"
3 | }


--------------------------------------------------------------------------------
/examples/fake_pytorch_model.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cceyda/lit-NER/4a43c193c571c310376e63c4239d80bc94814db2/examples/fake_pytorch_model.bin


--------------------------------------------------------------------------------
/examples/labels.txt:
--------------------------------------------------------------------------------
1 | MISC
2 | PER
3 | ORG
4 | LOC
5 | O


--------------------------------------------------------------------------------
/examples/model_store/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cceyda/lit-NER/4a43c193c571c310376e63c4239d80bc94814db2/examples/model_store/.gitkeep


--------------------------------------------------------------------------------
/examples/serve.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# TorchServe\n",
  8 |     "\n",
  9 |     "Run this file to serve a custom NER model\n",
 10 |     "\n",
 11 |     "You can change the ports @ torchserve.config -> also change lit_ner.py API_URL"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "#Change these\n",
 21 |     "model_dir = \"/mnt/outputs/ner_bert/serve/en\"\n",
 22 |     "model_store_dir = \"/mnt/pretrained/model_store\"\n",
 23 |     "\n",
 24 |     "#Make sure these exist\n",
 25 |     "serialized = model_dir + \"/pytorch_model.bin\"\n",
 26 |     "extras = f\"{model_dir}/labels.txt,{model_dir}/vocab.txt,{model_dir}/vocab.txt,{model_dir}/special_tokens_map.json,{model_dir}/tokenizer_config.json,{model_dir}/config.json\""
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "!torch-model-archiver --model-name lit_ner_model \\\n",
 36 |     "--version 0.1 \\\n",
 37 |     "--serialized-file \"$serialized\" \\\n",
 38 |     "--handler ../lit_ner/serve.py \\\n",
 39 |     "--runtime python3 \\\n",
 40 |     "--extra-files \"$extras\" \\\n",
 41 |     "--export-path \"$model_store_dir\" \\\n",
 42 |     "# --force "
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "#start server on the foreground\n",
 52 |     "!torchserve --start --ncs --model-store \"$model_store_dir\" --models ner_model=lit_ner_model.mar --ts-config ./torchserve.config --foreground"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "# HELP"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "!torchserve --stop"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "!torchserve --help"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "!torch-model-archiver --help"
 87 |    ]
 88 |   }
 89 |  ],
 90 |  "metadata": {
 91 |   "kernelspec": {
 92 |    "display_name": "Python3-singularity-c",
 93 |    "language": "python3",
 94 |    "name": "python3_singularity"
 95 |   },
 96 |   "language_info": {
 97 |    "codemirror_mode": {
 98 |     "name": "ipython",
 99 |     "version": 3
100 |    },
101 |    "file_extension": ".py",
102 |    "mimetype": "text/x-python",
103 |    "name": "python",
104 |    "nbconvert_exporter": "python",
105 |    "pygments_lexer": "ipython3",
106 |    "version": "3.6.9"
107 |   },
108 |   "widgets": {
109 |    "application/vnd.jupyter.widget-state+json": {
110 |     "state": {},
111 |     "version_major": 2,
112 |     "version_minor": 0
113 |    }
114 |   }
115 |  },
116 |  "nbformat": 4,
117 |  "nbformat_minor": 4
118 | }
119 | 


--------------------------------------------------------------------------------
/examples/serve_pretrained.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# TorchServe\n",
  8 |     "\n",
  9 |     "Run this file to locally serve a pretrained NER model available @ https://huggingface.co/models?search=ner&filter=token-classification\n",
 10 |     "You can change which model is served @ config.json\n",
 11 |     "You can change the ports @ torchserve.config -> also change lit_ner.py API_URL"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "model_store_dir = \"./model_store\"\n",
 21 |     "serialized = \"./fake_pytorch_model.bin\" # just a fake file because --serialized-file is a required argument\n",
 22 |     "model_config = \"./config.json\""
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "!torch-model-archiver --model-name lit_ner_model \\\n",
 32 |     "--version 0.1 \\\n",
 33 |     "--serialized-file \"$serialized\" \\\n",
 34 |     "--handler ../lit_ner/serve_pretrained.py \\\n",
 35 |     "--runtime python3 \\\n",
 36 |     "--extra-files \"$model_config\" \\\n",
 37 |     "--export-path \"$model_store_dir\" \\\n",
 38 |     "# --force "
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "#start server on the foreground\n",
 48 |     "!torchserve --start --ncs --model-store \"$model_store_dir\" --models ner_model=lit_ner_model.mar --ts-config ./torchserve.config --foreground"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "# HELP"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "!torchserve --stop"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "!torchserve --help"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "!torch-model-archiver --help"
 83 |    ]
 84 |   }
 85 |  ],
 86 |  "metadata": {
 87 |   "kernelspec": {
 88 |    "display_name": "Python3-singularity-c",
 89 |    "language": "python3",
 90 |    "name": "python3_singularity"
 91 |   },
 92 |   "language_info": {
 93 |    "codemirror_mode": {
 94 |     "name": "ipython",
 95 |     "version": 3
 96 |    },
 97 |    "file_extension": ".py",
 98 |    "mimetype": "text/x-python",
 99 |    "name": "python",
100 |    "nbconvert_exporter": "python",
101 |    "pygments_lexer": "ipython3",
102 |    "version": "3.6.9"
103 |   },
104 |   "widgets": {
105 |    "application/vnd.jupyter.widget-state+json": {
106 |     "state": {},
107 |     "version_major": 2,
108 |     "version_minor": 0
109 |    }
110 |   }
111 |  },
112 |  "nbformat": 4,
113 |  "nbformat_minor": 4
114 | }
115 | 


--------------------------------------------------------------------------------
/examples/start_demo.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Run Streamlit Demo\n",
 8 |     "\n",
 9 |     "If you have different labels change ./labels.txt to have colored output"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": null,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "!streamlit run ../lit_ner/lit_ner.py \\\n",
19 |     "# --server.port 7864"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": null,
25 |    "metadata": {},
26 |    "outputs": [],
27 |    "source": []
28 |   }
29 |  ],
30 |  "metadata": {
31 |   "kernelspec": {
32 |    "display_name": "Python3-singularity-c",
33 |    "language": "python3",
34 |    "name": "python3_singularity"
35 |   },
36 |   "language_info": {
37 |    "codemirror_mode": {
38 |     "name": "ipython",
39 |     "version": 3
40 |    },
41 |    "file_extension": ".py",
42 |    "mimetype": "text/x-python",
43 |    "name": "python",
44 |    "nbconvert_exporter": "python",
45 |    "pygments_lexer": "ipython3",
46 |    "version": "3.6.9"
47 |   },
48 |   "widgets": {
49 |    "application/vnd.jupyter.widget-state+json": {
50 |     "state": {},
51 |     "version_major": 2,
52 |     "version_minor": 0
53 |    }
54 |   }
55 |  },
56 |  "nbformat": 4,
57 |  "nbformat_minor": 4
58 | }
59 | 


--------------------------------------------------------------------------------
/examples/torchserve.config:
--------------------------------------------------------------------------------
1 | inference_address=http://127.0.0.1:7863
2 | management_address=http://127.0.0.1:8863
3 | metrics_address=http://127.0.0.1:8864
4 | number_of_gpu=1
5 | #install_py_dep_per_model=true
6 | #enable_metrics_api=false


--------------------------------------------------------------------------------
/lit_ner/__init__.py:
--------------------------------------------------------------------------------
1 | """Top-level package for lit-NER."""
2 | 
3 | __author__ = """Ceyda Cinarel"""
4 | __email__ = "15624271+cceyda@users.noreply.github.com"
5 | __version__ = "0.1.0"
6 | 


--------------------------------------------------------------------------------
/lit_ner/lit_ner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import streamlit as st
 4 | from spacy import displacy
 5 | 
 6 | import httpx
 7 | from utils import hf_ents_to_displacy_format, make_color_palette
 8 | from httpx import HTTPError
 9 | import random
10 | 
11 | # Modify these
12 | API_URL = "http://127.0.0.1:7863/predictions/"
13 | MODEL_NAME = "ner_model"
14 | LOCAL = False
15 | 
16 | # from https://github.com/explosion/spacy-streamlit/util.py#L26
17 | WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
18 | 
19 | if not LOCAL:
20 |     API_URL = "https://api-inference.huggingface.co/models/"
21 |     MODEL_NAME = "dslim/bert-base-NER"
22 |     API_URL = st.sidebar.text_input("API URL", API_URL)
23 |     MODEL_NAME = st.sidebar.text_input("MODEL NAME", MODEL_NAME)
24 |     st.write(f"API endpoint: {API_URL}{MODEL_NAME}")
25 | 
26 | 
27 | def raise_on_not200(response):
28 |     if response.status_code != 200:
29 |         st.write("There was an error!")
30 |         st.write(response)
31 | 
32 | 
33 | client = httpx.Client(timeout=1000, event_hooks={"response": [raise_on_not200]})
34 | 
35 | 
36 | @st.cache(allow_output_mutation=True)
37 | def get_colormap():
38 |     with open("./labels.txt", "r") as f:
39 |         labels = f.read().splitlines()
40 |     color_map = make_color_palette(labels)
41 |     return color_map
42 | 
43 | 
44 | def add_colormap(labels):
45 |     for label in labels:
46 |         if label not in color_map:
47 |             rand_color = "#"+"%06x" % random.randint(0, 0xFFFFFF)
48 |             color_map[label]=rand_color
49 |     return color_map
50 | 
51 | 
52 | def sanitize_input(input_):
53 |     clean = str(input_)
54 |     return clean
55 | 
56 | 
57 | def predict(model, input_):
58 |     res = client.post(API_URL + model, json=input_)
59 |     return res.json()
60 | 
61 | 
62 | def display(bert_ents):
63 |     bert_doc = hf_ents_to_displacy_format(bert_ents, ignore_entities=["O"])
64 |     labels = list(set([a["label"] for a in bert_doc["ents"]]))
65 |     color_map = add_colormap(labels)
66 |     html = displacy.render(bert_doc, manual=True, style="ent", options={"colors": color_map})
67 | 
68 |     html = html.replace("\n", " ")
69 |     st.write(WRAPPER.format(html), unsafe_allow_html=True)
70 | 
71 | 
72 | color_map = get_colormap()
73 | 
74 | st.header("NER")
75 | input_ = st.text_input("Input", "My name is Ceyda and I live in Seoul, Korea.")
76 | input_ = sanitize_input(input_)
77 | bert_ents = predict(MODEL_NAME, input_)
78 | if bert_ents:
79 |     if isinstance(bert_ents, dict) and "error" in bert_ents:
80 |         st.write(bert_ents)
81 |     else:
82 |         display(bert_ents)
83 |         st.write(bert_ents)
84 | 


--------------------------------------------------------------------------------
/lit_ner/serve.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import re
  4 | from abc import ABC
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | from ts.torch_handler.base_handler import BaseHandler
  9 | 
 10 | from transformers import (
 11 |     AutoConfig,
 12 |     AutoModelForTokenClassification,
 13 |     AutoTokenizer,
 14 |     pipeline,
 15 | )
 16 | 
 17 | # from ts.metrics.dimension import Dimension
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | def get_labels(path):
 22 |     if path:
 23 |         with open(path, "r") as f:
 24 |             labels = f.read().splitlines()
 25 |         if "O" not in labels:
 26 |             labels = ["O"] + labels
 27 |         return labels
 28 |     else:
 29 |         return [
 30 |             "O",
 31 |             "B-MISC",
 32 |             "I-MISC",
 33 |             "B-PER",
 34 |             "I-PER",
 35 |             "B-ORG",
 36 |             "I-ORG",
 37 |             "B-LOC",
 38 |             "I-LOC",
 39 |         ]
 40 | 
 41 | 
 42 | # Returns grouped_entities=False
 43 | class TransformersClassifierHandler(BaseHandler, ABC):
 44 |     """
 45 |     Transformers text classifier handler class. This handler takes a text (string) and
 46 |     as input and returns the classification text based on the serialized transformers checkpoint.
 47 |     """
 48 | 
 49 |     def __init__(self):
 50 |         super(TransformersClassifierHandler, self).__init__()
 51 |         self._batch_size = 0
 52 |         self.initialized = False
 53 | 
 54 |     def initialize(self, ctx):
 55 |         self.manifest = ctx.manifest
 56 |         self.metrics = ctx.metrics
 57 | 
 58 |         logger.info(f"Manifest: {self.manifest}")
 59 | 
 60 |         properties = ctx.system_properties
 61 |         self._batch_size = properties["batch_size"]
 62 | 
 63 |         logger.info(f"properties: {properties}")
 64 | 
 65 |         model_dir = properties.get("model_dir")
 66 |         self.device = torch.device(
 67 |             "cuda:" + str(properties.get("gpu_id"))
 68 |             if torch.cuda.is_available()
 69 |             else "cpu"
 70 |         )
 71 | 
 72 |         labels = get_labels(os.path.join(model_dir, "labels.txt"))
 73 |         label_map = {i: label for i, label in enumerate(labels)}
 74 |         num_labels = len(labels)
 75 | 
 76 |         config = AutoConfig.from_pretrained(
 77 |             os.path.join(model_dir, "config.json"),
 78 |             num_labels=num_labels,
 79 |             id2label=label_map,
 80 |             label2id={label: i for i, label in enumerate(labels)},
 81 |         )
 82 | 
 83 |         # Read model serialize/pt file
 84 |         self.model = AutoModelForTokenClassification.from_pretrained(
 85 |             model_dir, config=config
 86 |         )
 87 |         self.tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=True)
 88 | 
 89 |         self.nlp = pipeline(
 90 |             "ner",
 91 |             model=self.model,
 92 |             tokenizer=self.tokenizer,
 93 |             ignore_labels=[],
 94 |             grouped_entities=True,
 95 |             #             ignore_subwords=True,
 96 |             device=self.device.index,
 97 |         )
 98 | 
 99 |         logger.debug(
100 |             "Transformer model from path {0} loaded successfully".format(model_dir)
101 |         )
102 | 
103 |         self.initialized = True
104 | 
105 |     def preprocess(self, data):
106 |         """Very basic preprocessing code - only tokenizes.
107 |         Extend with your own preprocessing steps as needed.
108 |         """
109 |         logger.info(f"Received data: {data}")
110 |         processed_sentences = []
111 |         for d in data:
112 |             text = d.get("data")
113 |             if text is None:
114 |                 text = d.get("body")
115 |             if isinstance(text, (bytes, bytearray)):
116 |                 sentence = text.decode("utf-8")
117 |             else:
118 |                 sentence = text
119 |             logger.info("Received text: '%s'", sentence)
120 | 
121 |             # Modify this with your preprocessing
122 |             num_separated = [s.strip() for s in re.split("(\d+)", sentence)]
123 |             digit_processed = " ".join(num_separated)
124 |             processed_sentences.append(digit_processed)
125 | 
126 |         return processed_sentences
127 | 
128 |     def inference(self, inputs):
129 |         """
130 |         Predict the class of a text using a trained transformer model.
131 |         """
132 |         # NOTE: This makes the assumption that your model expects text to be tokenized
133 |         # with "input_ids" and "token_type_ids" - which is true for some popular transformer models, e.g. bert.
134 |         # If your transformer model expects different tokenization, adapt this code to suit
135 |         # its expected input format.
136 |         ents = self.nlp(inputs)
137 |         if len(inputs) == 1:
138 |             ents = [ents]
139 |         return ents
140 | 
141 |     def postprocess(self, inference_output):
142 |         # TODO: Add any needed post-processing of the model predictions here
143 | 
144 |         return inference_output
145 | 
146 | 
147 | _service = TransformersClassifierHandler()
148 | 
149 | 
150 | def handle(data, context):
151 |     try:
152 | 
153 |         if not _service.initialized:
154 |             _service.initialize(context)
155 | 
156 |         if data is None:
157 |             return None
158 | 
159 |         data = _service.preprocess(data)
160 |         data = _service.inference(data)
161 |         data = _service.postprocess(data)
162 | 
163 |         return data
164 |     except Exception as e:
165 |         raise e
166 | 


--------------------------------------------------------------------------------
/lit_ner/serve_pretrained.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import re
  4 | from abc import ABC
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | from ts.torch_handler.base_handler import BaseHandler
  9 | 
 10 | from transformers import (
 11 |     AutoConfig,
 12 |     AutoModelForTokenClassification,
 13 |     AutoTokenizer,
 14 |     pipeline,
 15 | )
 16 | import json
 17 | 
 18 | # from ts.metrics.dimension import Dimension
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | def get_config(path):
 23 |     if path:
 24 |         with open(path, "r") as f:
 25 |             config = json.load(f)
 26 |         return config
 27 |     else:
 28 |         return {"model_name": "dslim/bert-base-NER"}
 29 | 
 30 | 
 31 | # Returns grouped_entities=True
 32 | class TransformersClassifierHandler(BaseHandler, ABC):
 33 |     """
 34 |     Transformers text classifier handler class. This handler takes a text (string) and
 35 |     as input and returns the classification text based on the serialized transformers checkpoint.
 36 |     """
 37 | 
 38 |     def __init__(self):
 39 |         super(TransformersClassifierHandler, self).__init__()
 40 |         self._batch_size = 0
 41 |         self.initialized = False
 42 | 
 43 |     def initialize(self, ctx):
 44 |         self.manifest = ctx.manifest
 45 |         self.metrics = ctx.metrics
 46 | 
 47 |         logger.info(f"Manifest: {self.manifest}")
 48 | 
 49 |         properties = ctx.system_properties
 50 |         self._batch_size = properties["batch_size"]
 51 | 
 52 |         logger.info(f"properties: {properties}")
 53 | 
 54 |         model_dir = properties.get("model_dir")
 55 |         self.device = torch.device(
 56 |             "cuda:" + str(properties.get("gpu_id"))
 57 |             if torch.cuda.is_available()
 58 |             else "cpu"
 59 |         )
 60 | 
 61 |         config = get_config(os.path.join(model_dir, "config.json"))
 62 |         model_name = config["model_name"]
 63 | 
 64 |         # Read model serialize/pt file
 65 |         self.model = AutoModelForTokenClassification.from_pretrained(model_name)
 66 |         self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
 67 | 
 68 |         self.nlp = pipeline(
 69 |             "ner",
 70 |             model=self.model,
 71 |             tokenizer=self.tokenizer,
 72 |             ignore_labels=[],
 73 |             grouped_entities=True,
 74 |             #             ignore_subwords=True,
 75 |             device=self.device.index,
 76 |         )
 77 | 
 78 |         logger.debug(
 79 |             "Transformer model from path {0} loaded successfully".format(model_dir)
 80 |         )
 81 | 
 82 |         self.initialized = True
 83 | 
 84 |     def preprocess(self, data):
 85 |         """Very basic preprocessing code - only tokenizes.
 86 |         Extend with your own preprocessing steps as needed.
 87 |         """
 88 |         logger.info(f"Received data: {data}")
 89 |         processed_sentences = []
 90 |         for d in data:
 91 |             text = d.get("data")
 92 |             if text is None:
 93 |                 text = d.get("body")
 94 |             if isinstance(text, (bytes, bytearray)):
 95 |                 sentence = text.decode("utf-8")
 96 |             else:
 97 |                 sentence = text
 98 |             logger.info("Received text: '%s'", sentence)
 99 | 
100 |             # Modify this with your preprocessing
101 |             num_separated = [s.strip() for s in re.split("(\d+)", sentence)]
102 |             digit_processed = " ".join(num_separated)
103 |             processed_sentences.append(digit_processed)
104 | 
105 |         return processed_sentences
106 | 
107 |     def inference(self, inputs):
108 |         """
109 |         Predict the class of a text using a trained transformer model.
110 |         """
111 |         # NOTE: This makes the assumption that your model expects text to be tokenized
112 |         # with "input_ids" and "token_type_ids" - which is true for some popular transformer models, e.g. bert.
113 |         # If your transformer model expects different tokenization, adapt this code to suit
114 |         # its expected input format.
115 |         ents = self.nlp(inputs)
116 |         if len(inputs) == 1:
117 |             ents = [ents]
118 |         return ents
119 | 
120 |     def postprocess(self, inference_output):
121 |         # TODO: Add any needed post-processing of the model predictions here
122 | 
123 |         return inference_output
124 | 
125 | 
126 | _service = TransformersClassifierHandler()
127 | 
128 | 
129 | def handle(data, context):
130 |     try:
131 | 
132 |         if not _service.initialized:
133 |             _service.initialize(context)
134 | 
135 |         if data is None:
136 |             return None
137 | 
138 |         data = _service.preprocess(data)
139 |         data = _service.inference(data)
140 |         data = _service.postprocess(data)
141 | 
142 |         return data
143 |     except Exception as e:
144 |         raise e
145 | 


--------------------------------------------------------------------------------
/lit_ner/utils.py:
--------------------------------------------------------------------------------
 1 | import seaborn as sns
 2 | 
 3 | 
 4 | def rgb2hex(r, g, b):
 5 |     return "#{:02x}{:02x}{:02x}".format(int(r * 255), int(g * 255), int(b * 255))
 6 | 
 7 | 
 8 | def make_color_palette(labels):
 9 |     color_palette = sns.color_palette(n_colors=len(labels))
10 |     color_map = {x: rgb2hex(*y) for x, y in zip(labels, color_palette)}
11 |     return color_map
12 | 
13 | 
14 | def hf_ents_to_displacy_format(ents, ignore_entities=[]):
15 | 
16 |     s_ents = {}
17 |     s_ents["text"] = " ".join([e["word"] for e in ents])
18 |     spacy_ents = []
19 |     start_pointer = 0
20 |     if "entity_group" in ents[0]:
21 |         entity_key = "entity_group"
22 |     else:
23 |         entity_key = "entity"
24 |     for i, ent in enumerate(ents):
25 |         if ent[entity_key] not in ignore_entities:
26 |             spacy_ents.append(
27 |                 {
28 |                     "start": start_pointer,
29 |                     "end": start_pointer + len(ent["word"]),
30 |                     "label": ent[entity_key],
31 |                 }
32 |             )
33 |         start_pointer = start_pointer + len(ent["word"]) + 1
34 |     s_ents["ents"] = spacy_ents
35 |     s_ents["title"] = None
36 |     return s_ents
37 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | spacy>=2.3.2
2 | streamlit>=0.62.1
3 | transformers>=3.1.0
4 | httpx>=0.15.3 
5 | seaborn>=0.10.1


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | pip==19.2.3
 2 | bump2version==0.5.11
 3 | wheel==0.33.6
 4 | watchdog==0.9.0
 5 | flake8==3.7.8
 6 | tox==3.14.0
 7 | coverage==4.5.4
 8 | Sphinx==1.8.5
 9 | twine==1.14.0
10 | 
11 | spacy>=2.3.2
12 | streamlit>=0.62.1
13 | torchserve>=0.2.0
14 | torch-model-archiver>=0.2.0
15 | transformers>=3.1.0
16 | 
17 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.1.0
 3 | commit = True
 4 | tag = True
 5 | 
 6 | [bumpversion:file:setup.py]
 7 | search = version='{current_version}'
 8 | replace = version='{new_version}'
 9 | 
10 | [bumpversion:file:lit_ner/__init__.py]
11 | search = __version__ = '{current_version}'
12 | replace = __version__ = '{new_version}'
13 | 
14 | [bdist_wheel]
15 | universal = 1
16 | 
17 | [flake8]
18 | exclude = docs
19 | 
20 | [aliases]
21 | # Define setup.py command aliases here
22 | 
23 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """The setup script."""
 4 | 
 5 | from setuptools import setup, find_packages
 6 | 
 7 | with open("README.rst") as readme_file:
 8 |     readme = readme_file.read()
 9 | 
10 | with open("HISTORY.rst") as history_file:
11 |     history = history_file.read()
12 | 
13 | requirements = ["A>=1", "B>=2"]
14 | 
15 | setup_requirements = []
16 | 
17 | test_requirements = []
18 | 
19 | setup(
20 |     author="Ceyda Cinarel",
21 |     author_email="15624271+cceyda@users.noreply.github.com",
22 |     python_requires=">=3.5",
23 |     classifiers=[
24 |         "Development Status :: 2 - Pre-Alpha",
25 |         "Intended Audience :: Developers",
26 |         "License :: OSI Approved :: Apache Software License",
27 |         "Natural Language :: English",
28 |         "Programming Language :: Python :: 3",
29 |         "Programming Language :: Python :: 3.5",
30 |         "Programming Language :: Python :: 3.6",
31 |         "Programming Language :: Python :: 3.7",
32 |         "Programming Language :: Python :: 3.8",
33 |     ],
34 |     description="Streamlit demo for HuggingFace NER models",
35 |     install_requires=requirements,
36 |     license="Apache Software License 2.0",
37 |     long_description=readme + "\n\n" + history,
38 |     include_package_data=True,
39 |     keywords="lit_ner",
40 |     name="lit_ner",
41 |     packages=find_packages(include=["lit_ner", "lit_ner.*"]),
42 |     setup_requires=setup_requirements,
43 |     test_suite="tests",
44 |     tests_require=test_requirements,
45 |     url="https://github.com/cceyda/lit_ner",
46 |     version="0.1.0",
47 |     zip_safe=False,
48 | )
49 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Unit test package for lit_ner."""
2 | 


--------------------------------------------------------------------------------
/tests/test_lit_ner.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """Tests for `lit_ner` package."""
 4 | 
 5 | 
 6 | import unittest
 7 | 
 8 | from lit_ner import lit_ner
 9 | 
10 | 
11 | class TestLit_ner(unittest.TestCase):
12 |     """Tests for `lit_ner` package."""
13 | 
14 |     def setUp(self):
15 |         """Set up test fixtures, if any."""
16 | 
17 |     def tearDown(self):
18 |         """Tear down test fixtures, if any."""
19 | 
20 |     def test_000_something(self):
21 |         """Test something."""
22 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py35, py36, py37, py38, flake8
 3 | 
 4 | [travis]
 5 | python =
 6 |     3.8: py38
 7 |     3.7: py37
 8 |     3.6: py36
 9 |     3.5: py35
10 | 
11 | [testenv:flake8]
12 | basepython = python
13 | deps = flake8
14 | commands = flake8 lit_ner tests
15 | 
16 | [testenv]
17 | setenv =
18 |     PYTHONPATH = {toxinidir}
19 | 
20 | commands = python setup.py test
21 | 


--------------------------------------------------------------------------------