├── .dockerignore ├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── build_sassc.sh ├── config ├── nginx.conf └── supervisor.conf ├── displacy_service ├── __init__.py ├── parse.py ├── scripts │ ├── __init__.py │ ├── app.py │ └── download.py └── server.py ├── displacy_service_tests ├── __init__.py ├── test_parse.py └── test_server.py ├── docker ├── all │ └── Dockerfile ├── de │ └── Dockerfile ├── en │ ├── Dockerfile │ ├── Dockerfile.lg │ └── Dockerfile.md ├── es │ └── Dockerfile ├── fr │ └── Dockerfile ├── it │ └── Dockerfile ├── nl │ └── Dockerfile └── pt │ └── Dockerfile ├── frontend ├── Makefile ├── _data.json ├── _layout.jade ├── _mixins.jade ├── assets │ ├── css │ │ ├── _base.sass │ │ ├── _displacy-theme.sass │ │ ├── _ui.sass │ │ └── style.sass │ ├── img │ │ ├── icons.svg │ │ └── preview.jpg │ └── js │ │ ├── displacy.js │ │ └── main.js ├── index.jade ├── package-lock.json └── package.json ├── requirements.txt ├── setup.py └── start.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | *.swp 3 | *.swo 4 | .DS_STORE 5 | *.md 6 | env 7 | .idea 8 | *.egg-info 9 | !README.md 10 | coverage.xml 11 | .coverage 12 | frontend/node_modules 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | frontend/node_modules 92 | .DS_Store 93 | .venv 94 | 95 | .idea/ 96 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | language: generic 4 | 5 | services: 6 | - docker 7 | 8 | before_install: 9 | - docker login -u="$DOCKERUSER" -p="$DOCKERPASS" 10 | - docker login -u="$HEROKUUSER" -p="$HEROKUPASS" $HEROKUREPO 11 | 12 | install: 13 | - travis_wait 30 docker build -t jgontrum/spacyapi:base_v2 . 14 | - travis_wait 30 docker build -t jgontrum/spacyapi:en_v2 -f docker/en/Dockerfile . 15 | - travis_wait 30 docker build -t jgontrum/spacyapi:en_v2_lg -f docker/en/Dockerfile.lg . 16 | - travis_wait 30 docker build -t jgontrum/spacyapi:en_v2_md -f docker/en/Dockerfile.md . 17 | - travis_wait 30 docker build -t jgontrum/spacyapi:de_v2 -f docker/de/Dockerfile . 18 | - travis_wait 30 docker build -t jgontrum/spacyapi:es_v2 -f docker/es/Dockerfile . 19 | - travis_wait 30 docker build -t jgontrum/spacyapi:fr_v2 -f docker/fr/Dockerfile . 20 | - travis_wait 30 docker build -t jgontrum/spacyapi:nl_v2 -f docker/nl/Dockerfile . 21 | - travis_wait 30 docker build -t jgontrum/spacyapi:pt_v2 -f docker/pt/Dockerfile . 22 | - travis_wait 30 docker build -t jgontrum/spacyapi:it_v2 -f docker/it/Dockerfile . 23 | - travis_wait 30 docker build -t jgontrum/spacyapi:all_v2 -f docker/all/Dockerfile . 24 | 25 | after_success: 26 | - docker push jgontrum/spacyapi:base_v2 27 | - docker push jgontrum/spacyapi:en_v2 28 | - docker push jgontrum/spacyapi:en_v2_lg 29 | - docker push jgontrum/spacyapi:en_v2_md 30 | - docker push jgontrum/spacyapi:de_v2 31 | - docker push jgontrum/spacyapi:es_v2 32 | - docker push jgontrum/spacyapi:fr_v2 33 | - docker push jgontrum/spacyapi:pt_v2 34 | - docker push jgontrum/spacyapi:it_v2 35 | - docker push jgontrum/spacyapi:nl_v2 36 | - docker push jgontrum/spacyapi:all_v2 37 | - docker tag jgontrum/spacyapi:en_v2 registry.heroku.com/spacy-en/web 38 | - docker push registry.heroku.com/spacy-en/web 39 | - docker tag jgontrum/spacyapi:de_v2 registry.heroku.com/spacy-de/web 40 | - docker push registry.heroku.com/spacy-de/web 41 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | LABEL maintainer="gontrum@me.com" 3 | LABEL version="0.2" 4 | LABEL description="Base image, containing no language models." 5 | 6 | # Install the required packages 7 | RUN apt-get update && apt-get install -y \ 8 | build-essential \ 9 | libssl-dev \ 10 | supervisor \ 11 | curl \ 12 | nginx && \ 13 | apt-get -q clean -y && rm -rf /var/lib/apt/lists/* && rm -f /var/cache/apt/*.bin 14 | 15 | # Install node for the frontend 16 | RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - && \ 17 | apt-get install -y nodejs &&\ 18 | apt-get -q clean -y && rm -rf /var/lib/apt/lists/* && rm -f /var/cache/apt/*.bin 19 | 20 | # Copy and set up the app 21 | COPY . /app 22 | 23 | # Build SASSC 24 | RUN bash /app/build_sassc.sh 25 | 26 | # Build app 27 | RUN cd /app/frontend && make clean && make 28 | RUN cd /app && make clean && make 29 | 30 | # Configure nginx & supervisor 31 | RUN mv /app/config/nginx.conf /etc/nginx/sites-available/default &&\ 32 | echo "daemon off;" >> /etc/nginx/nginx.conf && \ 33 | mv /app/config/supervisor.conf /etc/supervisor/conf.d/ 34 | 35 | ENV PORT 80 36 | EXPOSE 80 37 | CMD ["bash", "/app/start.sh"] 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (C) 2016 ExplosionAI UG (haftungsbeschränkt) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean start build-and-push test 2 | 3 | PYTHON3=python3.6 4 | 5 | all: env/bin/python 6 | 7 | env/bin/python: 8 | $(PYTHON3) -m venv env 9 | env/bin/pip install --upgrade pip 10 | env/bin/pip install wheel 11 | env/bin/pip install -r requirements.txt 12 | env/bin/python setup.py develop 13 | 14 | clean: 15 | rm -rfv bin develop-eggs dist downloads eggs env parts .cache .scannerwork 16 | rm -fv .DS_Store .coverage .installed.cfg bootstrap.py .coverage 17 | find . -name '*.pyc' -exec rm -fv {} \; 18 | find . -name '*.pyo' -exec rm -fv {} \; 19 | find . -depth -name '*.egg-info' -exec rm -rfv {} \; 20 | find . -depth -name '__pycache__' -exec rm -rfv {} \; 21 | 22 | test: env/bin/python 23 | languages=en env/bin/download_models 24 | env/bin/py.test displacy_service_tests 25 | 26 | start: env/bin/python 27 | env/bin/run_server 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # spaCy API Docker 2 | 3 | **Ready-to-use Docker images for the [spaCy NLP library](https://github.com/explosion/spaCy).** 4 | 5 | --- 6 | **[spaCy API Docker](https://github.com/jgontrum/spacy-api-docker) is being sponsored by the following tool; please help to support us by taking a look and signing up to a free trial** 7 | 8 | 9 | [GitAds](https://tracking.gitads.io/?repo=spacy-api-docker) 10 | --- 11 | 12 | ### Features 13 | 14 | - Use the awesome spaCy NLP framework with other programming languages. 15 | - Better scaling: One NLP - multiple services. 16 | - Build using the official [spaCy REST services](https://github.com/explosion/spacy-services). 17 | - Dependency parsing visualisation with [displaCy](https://demos.explosion.ai/displacy/). 18 | - Docker images for **English**, **German**, **Spanish**, **Italian**, **Dutch** and **French**. 19 | - Automated builds to stay up to date with spaCy. 20 | - Current spaCy version: 2.0.16 21 | 22 | Please note that this is a completely new API and is incompatible with the previous one. If you still need them, use `jgontrum/spacyapi:en-legacy` or `jgontrum/spacyapi:de-legacy`. 23 | 24 | _Documentation, API- and frontend code based upon [spaCy REST services](https://github.com/explosion/spacy-services) by [Explosion AI](https://explosion.ai)._ 25 | 26 | --- 27 | 28 | ## Images 29 | 30 | | Image | Description | 31 | | --------------------------- | ----------------------------------------------------------------- | 32 | | jgontrum/spacyapi:base_v2 | Base image for spaCy 2.0, containing no language model | 33 | | jgontrum/spacyapi:en_v2 | English language model, spaCy 2.0 | 34 | | jgontrum/spacyapi:de_v2 | German language model, spaCy 2.0 | 35 | | jgontrum/spacyapi:es_v2 | Spanish language model, spaCy 2.0 | 36 | | jgontrum/spacyapi:fr_v2 | French language model, spaCy 2.0 | 37 | | jgontrum/spacyapi:pt_v2 | Portuguese language model, spaCy 2.0 | 38 | | jgontrum/spacyapi:it_v2 | Italian language model, spaCy 2.0 | 39 | | jgontrum/spacyapi:nl_v2 | Dutch language model, spaCy 2.0 | 40 | | jgontrum/spacyapi:all_v2 | Contains EN, DE, ES, PT, NL, IT and FR language models, spaCy 2.0 | 41 | | _OLD RELEASES_ | | 42 | | jgontrum/spacyapi:base | Base image, containing no language model | 43 | | jgontrum/spacyapi:latest | English language model | 44 | | jgontrum/spacyapi:en | English language model | 45 | | jgontrum/spacyapi:de | German language model | 46 | | jgontrum/spacyapi:es | Spanish language model | 47 | | jgontrum/spacyapi:fr | French language model | 48 | | jgontrum/spacyapi:all | Contains EN, DE, ES and FR language models | 49 | | jgontrum/spacyapi:en-legacy | Old API with English model | 50 | | jgontrum/spacyapi:de-legacy | Old API with German model | 51 | 52 | --- 53 | 54 | ## Usage 55 | 56 | `docker run -p "127.0.0.1:8080:80" jgontrum/spacyapi:en_v2` 57 | 58 | All models are loaded at start up time. Depending on the model size and server 59 | performance, this can take a few minutes. 60 | 61 | The displaCy frontend is available at `/ui`. 62 | 63 | ### Docker Compose 64 | 65 | ```json 66 | version: '2' 67 | 68 | services: 69 | spacyapi: 70 | image: jgontrum/spacyapi:en_v2 71 | ports: 72 | - "127.0.0.1:8080:80" 73 | restart: always 74 | 75 | ``` 76 | 77 | ### Running Tests 78 | 79 | In order to run unit tests locally `pytest` is included. 80 | 81 | `docker run -it jgontrum/spacyapi:en_v2 app/env/bin/pytest app/displacy_service_tests` 82 | 83 | ### Special Cases 84 | 85 | The API includes rudimentary support for specifying [special cases](https://spacy.io/usage/linguistic-features#special-cases) 86 | for your deployment. Currently only basic special cases are supported; for example, in the spaCy parlance: 87 | 88 | ```python 89 | tokenizer.add_special_case("isn't", [{ORTH: "isn't"}]) 90 | ``` 91 | 92 | They can be supplied in an environment variable corresponding to the desired language model. For example, `en_special_cases` 93 | or `en_core_web_lg_special_cases`. They are configured as a single comma-delimited string, such as `"isn't,doesn't,won't"`. 94 | 95 | Use the following syntax to specify basic special case rules, such as for preserving contractions: 96 | 97 | `docker run -p "127.0.0.1:8080:80" -e en_special_cases="isn't,doesn't" jgontrum/spacyapi:en_v2` 98 | 99 | You can also configure this in a `.env` file if using `docker-compose` as above. 100 | 101 | --- 102 | 103 | ## REST API Documentation 104 | 105 | ### `GET` `/ui/` 106 | 107 | displaCy frontend is available here. 108 | 109 | --- 110 | 111 | ### `POST` `/dep` 112 | 113 | Example request: 114 | 115 | ```json 116 | { 117 | "text": "They ate the pizza with anchovies", 118 | "model": "en", 119 | "collapse_punctuation": 0, 120 | "collapse_phrases": 1 121 | } 122 | ``` 123 | 124 | | Name | Type | Description | 125 | | ---------------------- | ------- | -------------------------------------------------------- | 126 | | `text` | string | text to be parsed | 127 | | `model` | string | identifier string for a model installed on the server | 128 | | `collapse_punctuation` | boolean | Merge punctuation onto the preceding token? | 129 | | `collapse_phrases` | boolean | Merge noun chunks and named entities into single tokens? | 130 | 131 | Example request using the Python [Requests library](http://docs.python-requests.org/en/master/): 132 | 133 | ```python 134 | import json 135 | import requests 136 | 137 | url = "http://localhost:8000/dep" 138 | message_text = "They ate the pizza with anchovies" 139 | headers = {'content-type': 'application/json'} 140 | d = {'text': message_text, 'model': 'en'} 141 | 142 | response = requests.post(url, data=json.dumps(d), headers=headers) 143 | r = response.json() 144 | ``` 145 | 146 | Example response: 147 | 148 | ```json 149 | { 150 | "arcs": [ 151 | { "dir": "left", "start": 0, "end": 1, "label": "nsubj" }, 152 | { "dir": "right", "start": 1, "end": 2, "label": "dobj" }, 153 | { "dir": "right", "start": 1, "end": 3, "label": "prep" }, 154 | { "dir": "right", "start": 3, "end": 4, "label": "pobj" }, 155 | { "dir": "left", "start": 2, "end": 3, "label": "prep" } 156 | ], 157 | "words": [ 158 | { "tag": "PRP", "text": "They" }, 159 | { "tag": "VBD", "text": "ate" }, 160 | { "tag": "NN", "text": "the pizza" }, 161 | { "tag": "IN", "text": "with" }, 162 | { "tag": "NNS", "text": "anchovies" } 163 | ] 164 | } 165 | ``` 166 | 167 | | Name | Type | Description | 168 | | ------- | ------- | ------------------------------------------ | 169 | | `arcs` | array | data to generate the arrows | 170 | | `dir` | string | direction of arrow (`"left"` or `"right"`) | 171 | | `start` | integer | offset of word the arrow starts **on** | 172 | | `end` | integer | offset of word the arrow ends **on** | 173 | | `label` | string | dependency label | 174 | | `words` | array | data to generate the words | 175 | | `tag` | string | part-of-speech tag | 176 | | `text` | string | token | 177 | 178 | --- 179 | 180 | Curl command: 181 | 182 | ``` 183 | curl -s localhost:8000/dep -d '{"text":"Pastafarians are smarter than people with Coca Cola bottles.", "model":"en"}' 184 | ``` 185 | 186 | ```json 187 | { 188 | "arcs": [ 189 | { 190 | "dir": "left", 191 | "end": 1, 192 | "label": "nsubj", 193 | "start": 0 194 | }, 195 | { 196 | "dir": "right", 197 | "end": 2, 198 | "label": "acomp", 199 | "start": 1 200 | }, 201 | { 202 | "dir": "right", 203 | "end": 3, 204 | "label": "prep", 205 | "start": 2 206 | }, 207 | { 208 | "dir": "right", 209 | "end": 4, 210 | "label": "pobj", 211 | "start": 3 212 | }, 213 | { 214 | "dir": "right", 215 | "end": 5, 216 | "label": "prep", 217 | "start": 4 218 | }, 219 | { 220 | "dir": "right", 221 | "end": 6, 222 | "label": "pobj", 223 | "start": 5 224 | } 225 | ], 226 | "words": [ 227 | { 228 | "tag": "NNPS", 229 | "text": "Pastafarians" 230 | }, 231 | { 232 | "tag": "VBP", 233 | "text": "are" 234 | }, 235 | { 236 | "tag": "JJR", 237 | "text": "smarter" 238 | }, 239 | { 240 | "tag": "IN", 241 | "text": "than" 242 | }, 243 | { 244 | "tag": "NNS", 245 | "text": "people" 246 | }, 247 | { 248 | "tag": "IN", 249 | "text": "with" 250 | }, 251 | { 252 | "tag": "NNS", 253 | "text": "Coca Cola bottles." 254 | } 255 | ] 256 | } 257 | ``` 258 | 259 | --- 260 | 261 | ### `POST` `/ent` 262 | 263 | Example request: 264 | 265 | ```json 266 | { 267 | "text": "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously.", 268 | "model": "en" 269 | } 270 | ``` 271 | 272 | | Name | Type | Description | 273 | | ------- | ------ | ----------------------------------------------------- | 274 | | `text` | string | text to be parsed | 275 | | `model` | string | identifier string for a model installed on the server | 276 | 277 | Example request using the Python [Requests library](http://docs.python-requests.org/en/master/): 278 | 279 | ```python 280 | import json 281 | import requests 282 | 283 | url = "http://localhost:8000/ent" 284 | message_text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously." 285 | headers = {'content-type': 'application/json'} 286 | d = {'text': message_text, 'model': 'en'} 287 | 288 | response = requests.post(url, data=json.dumps(d), headers=headers) 289 | r = response.json() 290 | ``` 291 | 292 | Example response: 293 | 294 | ```json 295 | [ 296 | { "end": 20, "start": 5, "type": "PERSON" }, 297 | { "end": 67, "start": 61, "type": "ORG" }, 298 | { "end": 75, "start": 71, "type": "DATE" } 299 | ] 300 | ``` 301 | 302 | | Name | Type | Description | 303 | | ------- | ------- | ------------------------------------------ | 304 | | `end` | integer | character offset the entity ends **after** | 305 | | `start` | integer | character offset the entity starts **on** | 306 | | `type` | string | entity type | 307 | 308 | ``` 309 | curl -s localhost:8000/ent -d '{"text":"Pastafarians are smarter than people with Coca Cola bottles.", "model":"en"}' 310 | ``` 311 | 312 | ```json 313 | [ 314 | { 315 | "end": 12, 316 | "start": 0, 317 | "text": "Pastafarians", 318 | "type": "NORP" 319 | }, 320 | { 321 | "end": 51, 322 | "start": 42, 323 | "text": "Coca Cola", 324 | "type": "ORG" 325 | } 326 | ] 327 | ``` 328 | 329 | --- 330 | 331 | ### `POST` `/sents` 332 | 333 | Example request: 334 | 335 | ```json 336 | { 337 | "text": "In 2012 I was a mediocre developer. But today I am at least a bit better.", 338 | "model": "en" 339 | } 340 | ``` 341 | 342 | | Name | Type | Description | 343 | | ------- | ------ | ----------------------------------------------------- | 344 | | `text` | string | text to be parsed | 345 | | `model` | string | identifier string for a model installed on the server | 346 | 347 | Example request using the Python [Requests library](http://docs.python-requests.org/en/master/): 348 | 349 | ```python 350 | import json 351 | import requests 352 | 353 | url = "http://localhost:8000/sents" 354 | message_text = "In 2012 I was a mediocre developer. But today I am at least a bit better." 355 | headers = {'content-type': 'application/json'} 356 | d = {'text': message_text, 'model': 'en'} 357 | 358 | response = requests.post(url, data=json.dumps(d), headers=headers) 359 | r = response.json() 360 | ``` 361 | 362 | Example response: 363 | 364 | ```json 365 | ["In 2012 I was a mediocre developer.", "But today I am at least a bit better."] 366 | ``` 367 | 368 | --- 369 | 370 | ### `POST` `/sents_dep` 371 | 372 | Combination of `/sents` and `/dep`, returns sentences and dependency parses 373 | 374 | Example request: 375 | 376 | ```json 377 | { 378 | "text": "In 2012 I was a mediocre developer. But today I am at least a bit better.", 379 | "model": "en" 380 | } 381 | ``` 382 | 383 | | Name | Type | Description | 384 | | ------- | ------ | ----------------------------------------------------- | 385 | | `text` | string | text to be parsed | 386 | | `model` | string | identifier string for a model installed on the server | 387 | 388 | Example request using the Python [Requests library](http://docs.python-requests.org/en/master/): 389 | 390 | ```python 391 | import json 392 | import requests 393 | 394 | url = "http://localhost:8000/sents_dep" 395 | message_text = "In 2012 I was a mediocre developer. But today I am at least a bit better." 396 | headers = {'content-type': 'application/json'} 397 | d = {'text': message_text, 'model': 'en'} 398 | 399 | response = requests.post(url, data=json.dumps(d), headers=headers) 400 | r = response.json() 401 | ``` 402 | 403 | Example response: 404 | 405 | ```json 406 | [ 407 | { 408 | "sentence": "In 2012 I was a mediocre developer.", 409 | "dep_parse": { 410 | "arcs": [ 411 | { 412 | "dir": "left", 413 | "end": 3, 414 | "label": "prep", 415 | "start": 0, 416 | "text": "In" 417 | }, 418 | { 419 | "dir": "right", 420 | "end": 1, 421 | "label": "pobj", 422 | "start": 0, 423 | "text": "2012" 424 | }, 425 | { 426 | "dir": "left", 427 | "end": 3, 428 | "label": "nsubj", 429 | "start": 2, 430 | "text": "I" 431 | }, 432 | { 433 | "dir": "left", 434 | "end": 6, 435 | "label": "det", 436 | "start": 4, 437 | "text": "a" 438 | }, 439 | { 440 | "dir": "left", 441 | "end": 6, 442 | "label": "amod", 443 | "start": 5, 444 | "text": "mediocre" 445 | }, 446 | { 447 | "dir": "right", 448 | "end": 6, 449 | "label": "attr", 450 | "start": 3, 451 | "text": "developer" 452 | }, 453 | { 454 | "dir": "right", 455 | "end": 7, 456 | "label": "punct", 457 | "start": 3, 458 | "text": "." 459 | } 460 | ], 461 | "words": [ 462 | { 463 | "tag": "IN", 464 | "text": "In" 465 | }, 466 | { 467 | "tag": "CD", 468 | "text": "2012" 469 | }, 470 | { 471 | "tag": "PRP", 472 | "text": "I" 473 | }, 474 | { 475 | "tag": "VBD", 476 | "text": "was" 477 | }, 478 | { 479 | "tag": "DT", 480 | "text": "a" 481 | }, 482 | { 483 | "tag": "JJ", 484 | "text": "mediocre" 485 | }, 486 | { 487 | "tag": "NN", 488 | "text": "developer" 489 | }, 490 | { 491 | "tag": ".", 492 | "text": "." 493 | } 494 | ] 495 | } 496 | }, 497 | { 498 | "sentence": "But today I am at least a bit better.", 499 | "dep_parse": { 500 | "arcs": [ 501 | { 502 | "dir": "left", 503 | "end": 11, 504 | "label": "cc", 505 | "start": 8, 506 | "text": "But" 507 | }, 508 | { 509 | "dir": "left", 510 | "end": 11, 511 | "label": "npadvmod", 512 | "start": 9, 513 | "text": "today" 514 | }, 515 | { 516 | "dir": "left", 517 | "end": 11, 518 | "label": "nsubj", 519 | "start": 10, 520 | "text": "I" 521 | }, 522 | { 523 | "dir": "left", 524 | "end": 13, 525 | "label": "advmod", 526 | "start": 12, 527 | "text": "at" 528 | }, 529 | { 530 | "dir": "left", 531 | "end": 15, 532 | "label": "advmod", 533 | "start": 13, 534 | "text": "least" 535 | }, 536 | { 537 | "dir": "left", 538 | "end": 15, 539 | "label": "det", 540 | "start": 14, 541 | "text": "a" 542 | }, 543 | { 544 | "dir": "left", 545 | "end": 16, 546 | "label": "npadvmod", 547 | "start": 15, 548 | "text": "bit" 549 | }, 550 | { 551 | "dir": "right", 552 | "end": 16, 553 | "label": "acomp", 554 | "start": 11, 555 | "text": "better" 556 | }, 557 | { 558 | "dir": "right", 559 | "end": 17, 560 | "label": "punct", 561 | "start": 11, 562 | "text": "." 563 | } 564 | ], 565 | "words": [ 566 | { 567 | "tag": "CC", 568 | "text": "But" 569 | }, 570 | { 571 | "tag": "NN", 572 | "text": "today" 573 | }, 574 | { 575 | "tag": "PRP", 576 | "text": "I" 577 | }, 578 | { 579 | "tag": "VBP", 580 | "text": "am" 581 | }, 582 | { 583 | "tag": "IN", 584 | "text": "at" 585 | }, 586 | { 587 | "tag": "JJS", 588 | "text": "least" 589 | }, 590 | { 591 | "tag": "DT", 592 | "text": "a" 593 | }, 594 | { 595 | "tag": "NN", 596 | "text": "bit" 597 | }, 598 | { 599 | "tag": "RBR", 600 | "text": "better" 601 | }, 602 | { 603 | "tag": ".", 604 | "text": "." 605 | } 606 | ] 607 | } 608 | } 609 | ] 610 | ``` 611 | 612 | ### `GET` `/models` 613 | 614 | List the names of models installed on the server. 615 | 616 | Example request: 617 | 618 | ``` 619 | GET /models 620 | ``` 621 | 622 | Example response: 623 | 624 | ```json 625 | ["en", "de"] 626 | ``` 627 | 628 | --- 629 | 630 | ### `GET` `/{model}/schema` 631 | 632 | Example request: 633 | 634 | ``` 635 | GET /en/schema 636 | ``` 637 | 638 | | Name | Type | Description | 639 | | ------- | ------ | ----------------------------------------------------- | 640 | | `model` | string | identifier string for a model installed on the server | 641 | 642 | Example response: 643 | 644 | ```json 645 | { 646 | "dep_types": ["ROOT", "nsubj"], 647 | "ent_types": ["PERSON", "LOC", "ORG"], 648 | "pos_types": ["NN", "VBZ", "SP"] 649 | } 650 | ``` 651 | 652 | --- 653 | 654 | ### `GET` `/version` 655 | 656 | Show the used spaCy version. 657 | 658 | Example request: 659 | 660 | ``` 661 | GET /version 662 | ``` 663 | 664 | Example response: 665 | 666 | ```json 667 | { 668 | "spacy": "2.2.4" 669 | } 670 | ``` 671 | -------------------------------------------------------------------------------- /build_sassc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p /tmp/sass 4 | cd /tmp/sass 5 | git clone https://github.com/sass/sassc.git 6 | . sassc/script/bootstrap 7 | make -C sassc -j4 8 | PREFIX="/usr" make -C sassc install 9 | cd 10 | rm -rf /tmp/sass 11 | 12 | -------------------------------------------------------------------------------- /config/nginx.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen PORT default_server; 3 | charset utf-8; 4 | 5 | location /ui/ { 6 | proxy_pass http://127.0.0.1:8080/; 7 | } 8 | 9 | location / { 10 | proxy_pass http://127.0.0.1:8000/; 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /config/supervisor.conf: -------------------------------------------------------------------------------- 1 | [program:api] 2 | command = /usr/bin/make --directory /app start 3 | 4 | [program:frontend] 5 | command = /usr/bin/make --directory /app/frontend start 6 | 7 | [program:nginx] 8 | command = /usr/sbin/nginx 9 | -------------------------------------------------------------------------------- /displacy_service/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jgontrum/spacy-api-docker/249f96a2387741bc1037eab8318966a284cceeec/displacy_service/__init__.py -------------------------------------------------------------------------------- /displacy_service/parse.py: -------------------------------------------------------------------------------- 1 | class Parse(object): 2 | def __init__(self, nlp, text, collapse_punctuation, collapse_phrases): 3 | self.doc = nlp(text) 4 | if collapse_punctuation: 5 | spans = [] 6 | for word in self.doc[:-1]: 7 | if word.is_punct: 8 | continue 9 | if not word.nbor(1).is_punct: 10 | continue 11 | start = word.i 12 | end = word.i + 1 13 | while end < len(self.doc) and self.doc[end].is_punct: 14 | end += 1 15 | span = self.doc[start: end] 16 | spans.append( 17 | (span.start_char, span.end_char, word.tag_, word.lemma_, word.ent_type_) 18 | ) 19 | for span_props in spans: 20 | self.doc.merge(*span_props) 21 | 22 | if collapse_phrases: 23 | for np in list(self.doc.noun_chunks): 24 | np.merge(np.root.tag_, np.root.lemma_, np.root.ent_type_) 25 | 26 | def to_json(self): 27 | words = [{'text': w.text, 'tag': w.tag_} for w in self.doc] 28 | arcs = [] 29 | for word in self.doc: 30 | if word.i < word.head.i: 31 | arcs.append( 32 | { 33 | 'start': word.i, 34 | 'end': word.head.i, 35 | 'label': word.dep_, 36 | 'text': str(word), 37 | 'dir': 'left' 38 | }) 39 | elif word.i > word.head.i: 40 | arcs.append( 41 | { 42 | 'start': word.head.i, 43 | 'end': word.i, 44 | 'label': word.dep_, 45 | 'text': str(word), 46 | 'dir': 'right' 47 | }) 48 | return {'words': words, 'arcs': arcs} 49 | 50 | 51 | class Entities(object): 52 | def __init__(self, nlp, text): 53 | self.doc = nlp(text) 54 | 55 | def to_json(self): 56 | return [ 57 | { 58 | 'start': ent.start_char, 59 | 'end': ent.end_char, 60 | 'type': ent.label_, 61 | 'text': str(ent) 62 | } for ent in self.doc.ents 63 | ] 64 | 65 | 66 | class Sentences(object): 67 | def __init__(self, nlp, text): 68 | self.doc = nlp(text) 69 | 70 | def to_json(self): 71 | sents = [sent.string.strip() for sent in self.doc.sents] 72 | return sents 73 | 74 | 75 | class SentencesDependencies(object): 76 | def __init__(self, nlp, text, collapse_punctuation, collapse_phrases): 77 | 78 | self.doc = nlp(text) 79 | 80 | if collapse_punctuation: 81 | spans = [] 82 | for word in self.doc[:-1]: 83 | if word.is_punct: 84 | continue 85 | if not word.nbor(1).is_punct: 86 | continue 87 | start = word.i 88 | end = word.i + 1 89 | while end < len(self.doc) and self.doc[end].is_punct: 90 | end += 1 91 | span = self.doc[start: end] 92 | spans.append( 93 | (span.start_char, span.end_char, word.tag_, word.lemma_, word.ent_type_) 94 | ) 95 | for span_props in spans: 96 | self.doc.merge(*span_props) 97 | 98 | if collapse_phrases: 99 | for np in list(self.doc.noun_chunks): 100 | np.merge(np.root.tag_, np.root.lemma_, np.root.ent_type_) 101 | 102 | def to_json(self): 103 | sents = [] 104 | for sent in self.doc.sents: 105 | words = [{'text': w.text, 'tag': w.tag_} for w in sent] 106 | arcs = [] 107 | for word in sent: 108 | if word.i < word.head.i: 109 | arcs.append( 110 | { 111 | 'start': word.i, 112 | 'end': word.head.i, 113 | 'label': word.dep_, 114 | 'text': str(word), 115 | 'dir': 'left' 116 | }) 117 | elif word.i > word.head.i: 118 | arcs.append( 119 | { 120 | 'start': word.head.i, 121 | 'end': word.i, 122 | 'label': word.dep_, 123 | 'text': str(word), 124 | 'dir': 'right' 125 | }) 126 | 127 | sents.append({'sentence': sent.string.strip(), 128 | 'dep_parse': {'words': words, 129 | 'arcs': arcs}}) 130 | return sents 131 | -------------------------------------------------------------------------------- /displacy_service/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jgontrum/spacy-api-docker/249f96a2387741bc1037eab8318966a284cceeec/displacy_service/scripts/__init__.py -------------------------------------------------------------------------------- /displacy_service/scripts/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from spacy.symbols import ORTH 4 | 5 | from wsgiref import simple_server 6 | 7 | from displacy_service.server import APP, MODELS, get_model 8 | 9 | 10 | def run(): 11 | for model in MODELS: 12 | print("Load model ", model) 13 | loaded_model = get_model(model) 14 | special_cases_str = os.getenv(f"{model}_special_cases", "") 15 | if special_cases_str: 16 | for special_case in special_cases_str.split(','): 17 | loaded_model.tokenizer.add_special_case( 18 | special_case, 19 | [{ORTH: special_case}] 20 | ) 21 | 22 | print("Loaded all models. Starting HTTP server.") 23 | httpd = simple_server.make_server('0.0.0.0', 8000, APP) 24 | httpd.serve_forever() 25 | -------------------------------------------------------------------------------- /displacy_service/scripts/download.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from spacy.cli import download 5 | 6 | 7 | def download_models(): 8 | languages = os.getenv("languages", "en").split() 9 | for lang in languages: 10 | download(model=lang, direct=False) 11 | 12 | print("Updating frontend settings...") 13 | frontend_settings = json.load(open("frontend/_data.json")) 14 | 15 | frontend_settings['index']['languages'] = { 16 | l: l for l in languages 17 | } 18 | frontend_settings['index']['default_language'] = languages[0] 19 | 20 | json.dump(frontend_settings, open("frontend/_data.json", "w"), 21 | sort_keys=True, 22 | indent=2) 23 | 24 | print("Done!") 25 | -------------------------------------------------------------------------------- /displacy_service/server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import falcon 3 | import spacy 4 | import json 5 | import os 6 | 7 | from spacy.symbols import ENT_TYPE, TAG, DEP 8 | import spacy.about 9 | import spacy.util 10 | 11 | from .parse import Parse, Entities, Sentences, SentencesDependencies 12 | 13 | 14 | MODELS = os.getenv("languages", "").split() 15 | 16 | _models = {} 17 | 18 | 19 | def get_model(model_name): 20 | if model_name not in _models: 21 | _models[model_name] = spacy.load(model_name) 22 | return _models[model_name] 23 | 24 | 25 | def get_dep_types(model): 26 | '''List the available dep labels in the model.''' 27 | labels = [] 28 | for label_id in model.parser.moves.freqs[DEP]: 29 | labels.append(model.vocab.strings[label_id]) 30 | return labels 31 | 32 | 33 | def get_ent_types(model): 34 | '''List the available entity types in the model.''' 35 | labels = [] 36 | for label_id in model.entity.moves.freqs[ENT_TYPE]: 37 | labels.append(model.vocab.strings[label_id]) 38 | return labels 39 | 40 | 41 | def get_pos_types(model): 42 | '''List the available part-of-speech tags in the model.''' 43 | labels = [] 44 | for label_id in model.tagger.moves.freqs[TAG]: 45 | labels.append(model.vocab.strings[label_id]) 46 | return labels 47 | 48 | 49 | class ModelsResource(object): 50 | """List the available models. 51 | 52 | test with: curl -s localhost:8000/models 53 | """ 54 | 55 | def on_get(self, req, resp): 56 | try: 57 | output = list(MODELS) 58 | resp.body = json.dumps(output, sort_keys=True, indent=2) 59 | resp.content_type = 'text/string' 60 | resp.append_header('Access-Control-Allow-Origin', "*") 61 | resp.status = falcon.HTTP_200 62 | except Exception as e: 63 | raise falcon.HTTPInternalServerError( 64 | 'Models retrieval failed', 65 | '{}'.format(e)) 66 | 67 | 68 | class VersionResource(object): 69 | """Return the used spacy / api version 70 | 71 | test with: curl -s localhost:8000/version 72 | """ 73 | 74 | def on_get(self, req, resp): 75 | try: 76 | resp.body = json.dumps({ 77 | "spacy": spacy.about.__version__ 78 | }, sort_keys=True, indent=2) 79 | resp.content_type = 'text/string' 80 | resp.append_header('Access-Control-Allow-Origin', "*") 81 | resp.status = falcon.HTTP_200 82 | except Exception as e: 83 | raise falcon.HTTPInternalServerError( 84 | 'Version retrieval failed', 85 | '{}'.format(e)) 86 | 87 | 88 | class SchemaResource(object): 89 | """Describe the annotation scheme of a model. 90 | 91 | This does not appear to work with later spacy 92 | versions. 93 | """ 94 | 95 | def on_get(self, req, resp, model_name): 96 | try: 97 | model = get_model(model_name) 98 | output = { 99 | 'dep_types': get_dep_types(model), 100 | 'ent_types': get_ent_types(model), 101 | 'pos_types': get_pos_types(model) 102 | } 103 | 104 | resp.body = json.dumps(output, sort_keys=True, indent=2) 105 | resp.content_type = 'text/string' 106 | resp.append_header('Access-Control-Allow-Origin', "*") 107 | resp.status = falcon.HTTP_200 108 | except Exception as e: 109 | raise falcon.HTTPBadRequest( 110 | 'Schema construction failed', 111 | '{}'.format(e)) 112 | 113 | 114 | class DepResource(object): 115 | """Parse text and return displacy's expected JSON output. 116 | 117 | test with: curl -s localhost:8000/dep -d '{"text":"Pastafarians are smarter than people with Coca Cola bottles."}' 118 | """ 119 | 120 | def on_post(self, req, resp): 121 | req_body = req.bounded_stream.read() 122 | json_data = json.loads(req_body.decode('utf8')) 123 | text = json_data.get('text') 124 | model_name = json_data.get('model', 'en') 125 | collapse_punctuation = json_data.get('collapse_punctuation', True) 126 | collapse_phrases = json_data.get('collapse_phrases', True) 127 | 128 | try: 129 | model = get_model(model_name) 130 | parse = Parse(model, text, collapse_punctuation, collapse_phrases) 131 | resp.body = json.dumps(parse.to_json(), sort_keys=True, indent=2) 132 | resp.content_type = 'text/string' 133 | resp.append_header('Access-Control-Allow-Origin', "*") 134 | resp.status = falcon.HTTP_200 135 | except Exception as e: 136 | raise falcon.HTTPBadRequest( 137 | 'Dependency parsing failed', 138 | '{}'.format(e)) 139 | 140 | 141 | class EntResource(object): 142 | """Parse text and return displaCy ent's expected output.""" 143 | 144 | def on_post(self, req, resp): 145 | req_body = req.bounded_stream.read() 146 | json_data = json.loads(req_body.decode('utf8')) 147 | text = json_data.get('text') 148 | model_name = json_data.get('model', 'en') 149 | try: 150 | model = get_model(model_name) 151 | entities = Entities(model, text) 152 | resp.body = json.dumps(entities.to_json(), sort_keys=True, 153 | indent=2) 154 | resp.content_type = 'text/string' 155 | resp.append_header('Access-Control-Allow-Origin', "*") 156 | resp.status = falcon.HTTP_200 157 | except Exception as e: 158 | raise falcon.HTTPBadRequest( 159 | 'Text parsing failed', 160 | '{}'.format(e)) 161 | 162 | 163 | class SentsResources(object): 164 | """Returns sentences""" 165 | 166 | def on_post(self, req, resp): 167 | req_body = req.bounded_stream.read() 168 | json_data = json.loads(req_body.decode('utf8')) 169 | text = json_data.get('text') 170 | model_name = json_data.get('model', 'en') 171 | 172 | try: 173 | model = get_model(model_name) 174 | sentences = Sentences(model, text) 175 | resp.body = json.dumps(sentences.to_json(), sort_keys=True, 176 | indent=2) 177 | resp.content_type = 'text/string' 178 | resp.append_header('Access-Control-Allow-Origin', "*") 179 | resp.status = falcon.HTTP_200 180 | except Exception as e: 181 | raise falcon.HTTPBadRequest( 182 | 'Sentence tokenization failed', 183 | '{}'.format(e)) 184 | 185 | 186 | class SentsDepResources(object): 187 | """Returns sentences and dependency parses""" 188 | 189 | def on_post(self, req, resp): 190 | req_body = req.bounded_stream.read() 191 | json_data = json.loads(req_body.decode('utf8')) 192 | text = json_data.get('text') 193 | model_name = json_data.get('model', 'en') 194 | collapse_punctuation = json_data.get('collapse_punctuation', False) 195 | collapse_phrases = json_data.get('collapse_phrases', False) 196 | 197 | try: 198 | model = get_model(model_name) 199 | sentences = SentencesDependencies(model, 200 | text, 201 | collapse_punctuation=collapse_punctuation, 202 | collapse_phrases=collapse_phrases) 203 | 204 | resp.body = json.dumps(sentences.to_json(), 205 | sort_keys=True, 206 | indent=2) 207 | resp.content_type = 'text/string' 208 | resp.append_header('Access-Control-Allow-Origin', "*") 209 | resp.status = falcon.HTTP_200 210 | except Exception as e: 211 | raise falcon.HTTPBadRequest( 212 | 'Sentence tokenization and Dependency parsing failed', 213 | '{}'.format(e)) 214 | 215 | 216 | APP = falcon.API() 217 | APP.add_route('/dep', DepResource()) 218 | APP.add_route('/ent', EntResource()) 219 | APP.add_route('/sents', SentsResources()) 220 | APP.add_route('/sents_dep', SentsDepResources()) 221 | APP.add_route('/{model_name}/schema', SchemaResource()) 222 | APP.add_route('/models', ModelsResource()) 223 | APP.add_route('/version', VersionResource()) 224 | -------------------------------------------------------------------------------- /displacy_service_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jgontrum/spacy-api-docker/249f96a2387741bc1037eab8318966a284cceeec/displacy_service_tests/__init__.py -------------------------------------------------------------------------------- /displacy_service_tests/test_parse.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import spacy 4 | from displacy_service.parse import Parse 5 | 6 | 7 | @pytest.fixture(scope="session") 8 | def nlp(): 9 | return spacy.load('en') 10 | 11 | 12 | def test_parse_to_json(nlp): 13 | parse = Parse(nlp, u'Hello, this is a parse.', False, False) 14 | json_model = parse.to_json() 15 | assert len(json_model['words']) == 7 16 | assert len(json_model['arcs']) == 6 17 | 18 | 19 | def test_collapse_punct(nlp): 20 | parse = Parse(nlp, u'Hello, this is a parse.', True, False) 21 | json_model = parse.to_json() 22 | assert len(json_model['words']) == 5 23 | assert len(json_model['arcs']) == 4 24 | assert [w['text'] for w in json_model['words']] == [u'Hello,', u'this', u'is', u'a', u'parse.'] 25 | 26 | 27 | def test_collapse_phrases(nlp): 28 | parse = Parse(nlp, u'This example is a parse.', False, True) 29 | json_model = parse.to_json() 30 | assert len(json_model['words']) == 4 31 | assert len(json_model['arcs']) == 3 32 | assert [w['text'] for w in json_model['words']] == [u'This example', u'is', u'a parse', u'.'] 33 | -------------------------------------------------------------------------------- /displacy_service_tests/test_server.py: -------------------------------------------------------------------------------- 1 | import falcon.testing 2 | import pytest 3 | import json 4 | 5 | from displacy_service.server import APP, MODELS 6 | 7 | 8 | model = MODELS[0] 9 | 10 | 11 | @pytest.fixture() 12 | def api(): 13 | return falcon.testing.TestClient(APP) 14 | 15 | 16 | def test_deps(api): 17 | result = api.simulate_post( 18 | path='/dep', 19 | body='{{"text": "This is a test.", "model": "{model}", "collapse_punctuation": false, "collapse_phrases": false}}'.format(model=model) 20 | ) 21 | result = json.loads(result.text) 22 | words = [w['text'] for w in result['words']] 23 | assert words == ["This", "is", "a", "test", "."] 24 | 25 | 26 | def test_ents(api): 27 | result = api.simulate_post( 28 | path='/ent', 29 | body='{{"text": "What a great company Google is.", "model": "{model}"}}'.format(model=model)) 30 | ents = json.loads(result.text) 31 | assert ents == [ 32 | {"start": 21, "end": 27, "type": "ORG", "text": "Google"}] 33 | 34 | 35 | def test_sents(api): 36 | sentences = api.simulate_post( 37 | path='/sents', 38 | body='{{"text": "This a test that should split into sentences! This is the second. Is this the third?", "model": "{model}"}}'.format(model=model) 39 | ) 40 | 41 | assert sentences.json == ['This a test that should split into sentences!', 'This is the second.', 'Is this the third?'] 42 | 43 | 44 | def test_sents_dep(api): 45 | sentence_parse = api.simulate_post( 46 | path='/sents_dep', 47 | body='{{"text": "This a test that should split into sentences! This is the second. Is this the third?", "model": "{model}", "collapse_punctuation": false, "collapse_phrases": false}}'.format(model=model) 48 | ) 49 | sentences = [sp["sentence"] for sp in sentence_parse.json] 50 | assert sentences == [ 51 | "This a test that should split into sentences!", 52 | "This is the second.", 53 | "Is this the third?", 54 | ] 55 | words = [[w["text"] for w in sp["dep_parse"]["words"]] for sp in sentence_parse.json] 56 | assert words == [ 57 | ["This", "a", "test", "that", "should", "split", "into", "sentences", "!"], 58 | ["This", "is", "the", "second", "."], 59 | ["Is", "this", "the", "third", "?"], 60 | ] 61 | 62 | 63 | @pytest.mark.parametrize('endpoint, expected_message', [ 64 | ('/dep', 'Dependency parsing failed'), 65 | ('/ent', 'Text parsing failed'), 66 | ('/sents', 'Sentence tokenization failed'), 67 | ('/sents_dep', 'Sentence tokenization and Dependency parsing failed'), 68 | ]) 69 | def test_bad_model_error_handling(endpoint, expected_message, api): 70 | response = api.simulate_post( 71 | path=endpoint, 72 | body='{"text": "Here is some text for testing.", "model": "fake_model"}' 73 | ) 74 | assert expected_message == response.json['title'] 75 | assert "Can't find model 'fake_model'." in response.json["description"] 76 | -------------------------------------------------------------------------------- /docker/all/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jgontrum/spacyapi:base_v2 2 | 3 | ENV languages "en de es fr it nl pt" 4 | RUN cd /app && env/bin/download_models 5 | -------------------------------------------------------------------------------- /docker/de/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jgontrum/spacyapi:base_v2 2 | 3 | ENV languages "de" 4 | RUN cd /app && env/bin/download_models 5 | -------------------------------------------------------------------------------- /docker/en/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jgontrum/spacyapi:base_v2 2 | 3 | ENV languages "en" 4 | RUN cd /app && env/bin/download_models 5 | -------------------------------------------------------------------------------- /docker/en/Dockerfile.lg: -------------------------------------------------------------------------------- 1 | FROM jgontrum/spacyapi:base_v2 2 | 3 | ENV languages "en_core_web_lg" 4 | RUN cd /app && env/bin/download_models 5 | -------------------------------------------------------------------------------- /docker/en/Dockerfile.md: -------------------------------------------------------------------------------- 1 | FROM jgontrum/spacyapi:base_v2 2 | 3 | ENV languages "en_core_web_md" 4 | RUN cd /app && env/bin/download_models 5 | -------------------------------------------------------------------------------- /docker/es/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jgontrum/spacyapi:base_v2 2 | 3 | ENV languages "es" 4 | RUN cd /app && env/bin/download_models 5 | -------------------------------------------------------------------------------- /docker/fr/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jgontrum/spacyapi:base_v2 2 | 3 | ENV languages "fr" 4 | RUN cd /app && env/bin/download_models 5 | -------------------------------------------------------------------------------- /docker/it/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jgontrum/spacyapi:base_v2 2 | 3 | ENV languages "it" 4 | RUN cd /app && env/bin/download_models 5 | -------------------------------------------------------------------------------- /docker/nl/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jgontrum/spacyapi:base_v2 2 | 3 | ENV languages "nl" 4 | RUN cd /app && env/bin/download_models 5 | -------------------------------------------------------------------------------- /docker/pt/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jgontrum/spacyapi:base_v2 2 | 3 | ENV languages "pt" 4 | RUN cd /app && env/bin/download_models 5 | -------------------------------------------------------------------------------- /frontend/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean run 2 | 3 | all: node_modules 4 | 5 | node_modules: 6 | npm install 7 | 8 | clean: 9 | rm -rfv node_modules/ 10 | 11 | start: node_modules 12 | NODE_ENV=production node_modules/harp/bin/harp server --port 8080 13 | -------------------------------------------------------------------------------- /frontend/_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "index": { 3 | "api": "/dep", 4 | "default_language": "", 5 | "description": "Visualise spaCy's guess at the syntactic structure of a sentence. Arrows point from children to heads, and are labelled by their relation type.", 6 | "github": "explosion/displacy", 7 | "languages": {}, 8 | "readmore": "https://explosion.ai/blog/displacy-js-nlp-visualizer", 9 | "scripts": [ 10 | "displacy", 11 | "main" 12 | ], 13 | "title": "displaCy Dependency Visualizer" 14 | } 15 | } -------------------------------------------------------------------------------- /frontend/_layout.jade: -------------------------------------------------------------------------------- 1 | //- ---------------------------------- 2 | //- 💥 DEMOS > LAYOUT 3 | //- ---------------------------------- 4 | 5 | include _mixins 6 | 7 | doctype html 8 | html(lang="en") 9 | 10 | head 11 | title #{title ? title + " | " : ""} Demos | Explosion AI 12 | 13 | meta(http-equiv="content-type" content="text/html; charset=utf-8") 14 | meta(name="viewport" content="width=device-width, initial-scale=1.0") 15 | meta(name="referrer" content="always") 16 | link(rel="shortcut icon" href="/assets/img/favicon.ico") 17 | 18 | link(rel="stylesheet" href="assets/css/style.css") 19 | 20 | if stylesheets 21 | each stylesheet in stylesheets 22 | link(rel="stylesheet" href="assets/css/#{stylesheet}.css") 23 | 24 | if partial("../_sidebar") 25 | link(rel="stylesheet" href="/assets/css/sidebar.css") 26 | 27 | meta(name="twitter:card" content="summary_large_image") 28 | meta(name="twitter:site" content="@" + twitter) 29 | meta(name="twitter:title" content=title) 30 | meta(name="twitter:description" content=description) 31 | meta(name="twitter:image" content="#{demos_url}/displacy/assets/img/preview.jpg") 32 | 33 | meta(property="og:type" content="website") 34 | meta(property="og:site_name" content=sitename) 35 | meta(property="og:url" content=url) 36 | meta(property="og:title" content=title) 37 | meta(property="og:description" content=description) 38 | meta(property="og:image" content="#{demos_url}/displacy/assets/img/preview.jpg") 39 | 40 | script var api = '#{api}'; 41 | 42 | body 43 | 44 | !=partial("../_sidebar") 45 | 46 | main.o-main 47 | !=yield 48 | 49 | if scripts 50 | each script in scripts 51 | script(src="assets/js/#{script}.js") 52 | -------------------------------------------------------------------------------- /frontend/_mixins.jade: -------------------------------------------------------------------------------- 1 | //- ---------------------------------- 2 | //- 💥 MIXINS 3 | //- ---------------------------------- 4 | 5 | //- Error 6 | 7 | mixin error(message) 8 | #error.c-alert.c-alert--error.u-text-small(onclick="this.classList.add('c-alert--is-closing'); setTimeout(function() { this.style.display = 'none'; this.classList.remove('c-alert--is-closing') }.bind(this), 2500)")=message 9 | 10 | 11 | //- Icon 12 | 13 | mixin icon(name, size) 14 | svg.o-icon(aria-hidden="true" viewBox="0 0 20 20" width="20" height="20" fill="currentColor")&attributes(attributes) 15 | use(xlink:href="assets/img/icons.svg#icon-#{name}") 16 | 17 | 18 | //- Input 19 | 20 | mixin input(placeholder, label) 21 | menu.c-input.c-input--full.u-text-medium 22 | if label 23 | label.c-input__label(for=id)=label 24 | 25 | input.c-input__field(type="text" placeholder=placeholder autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" onfocus="this.select()")&attributes(attributes) 26 | 27 | block 28 | 29 | 30 | //- Textarea 31 | 32 | mixin textarea(placeholder, label) 33 | menu.c-input.c-input--full.u-text-medium 34 | if label 35 | label.c-input__label.u-label(for=id)=label 36 | 37 | textarea.c-input__field.c-input__field--area(type="text" placeholder=placeholder autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" onfocus="this.select()")&attributes(attributes) 38 | 39 | block 40 | 41 | 42 | //- Search button 43 | 44 | mixin search(size) 45 | button.c-input__button(class=size ? "c-input__button--#{size}" : "" role="button")&attributes(attributes) 46 | +icon("search").c-input__button__icon 47 | +icon("spinner").c-input__button__spinner 48 | 49 | 50 | //- Dropdown 51 | 52 | mixin dropdown(type, name, label, options, checked) 53 | menu.c-input&attributes(attributes) 54 | label.c-input__label(for=name)=label 55 | +icon("arrow-down").c-input__icon 56 | input.c-dropdown__trigger(id=name type="checkbox" aria-hidden="true") 57 | 58 | ul.c-dropdown.u-text-small 59 | each option, id in options 60 | li 61 | input.c-dropdown__trigger(name=name value=id id="#{name}-#{id}" type=type checked=(checked.indexOf(id) != -1)) 62 | label.c-dropdown__option(for="#{name}-#{id}")=option 63 | +icon("check").c-dropdown__option__icon 64 | 65 | 66 | //- Button 67 | 68 | mixin button(icon, label) 69 | a.c-input(role="button" aria-label=label data-tooltip=label)&attributes(attributes) 70 | 71 | if icon 72 | +icon(icon).c-input__button 73 | 74 | block 75 | 76 | 77 | //- External Link 78 | 79 | mixin a(url, trusted) 80 | a(href=url target="_blank" rel=(!trusted) ? "noopener nofollow" : "")&attributes(attributes) 81 | block 82 | -------------------------------------------------------------------------------- /frontend/assets/css/_base.sass: -------------------------------------------------------------------------------- 1 | //- ---------------------------------- 2 | //- 💥 DEMOS > BASE 3 | //- ---------------------------------- 4 | 5 | //- Variables 6 | 7 | $font-primary: "Sailec", Helvetica, Arial, sans-serif !default 8 | $font-secondary: "Input Mono Compressed", Consolas, "Andale Mono", Menlo, Monaco, Courier, monospace !default 9 | 10 | $color-front: #1e1935 11 | $color-back: #fff 12 | $color-subtle: #ddd 13 | 14 | 15 | //- Mixins 16 | 17 | @mixin size($width, $height: $width) 18 | width: $width 19 | height: $height 20 | 21 | @mixin position($position, $pos-y, $pos-x, $pos-y-value, $pos-x-value) 22 | position: $position 23 | #{$pos-y}: $pos-y-value 24 | #{$pos-x}: $pos-x-value 25 | 26 | 27 | //- Reset 28 | 29 | * 30 | box-sizing: border-box 31 | padding: 0 32 | margin: 0 33 | border: 0 34 | outline: 0 35 | -webkit-font-smoothing: antialiased 36 | 37 | html 38 | font-family: sans-serif 39 | -ms-text-size-adjust: 100% 40 | -webkit-text-size-adjust: 100% 41 | 42 | @media(min-width: 1200px) 43 | font-size: 16px 44 | 45 | @media(max-width: 1199px) 46 | font-size: 12px 47 | 48 | body 49 | @include size(100%) 50 | background: $color-back 51 | color: $color-front 52 | font: normal normal 1rem/#{1.5} $font-primary 53 | padding: 0 54 | margin: 0 55 | overflow: auto 56 | min-height: 100vh 57 | 58 | a 59 | color: inherit 60 | text-decoration: none 61 | 62 | ul, ol 63 | list-style: none 64 | -------------------------------------------------------------------------------- /frontend/assets/css/_displacy-theme.sass: -------------------------------------------------------------------------------- 1 | //- ---------------------------------- 2 | //- 💥 DISPLACY THEME 3 | //- ---------------------------------- 4 | 5 | $displacy: ( text: #ffffff, arrow: #87908a, arrowhead: #ffffff, label: #87908a, tag: #a6e22d, tag-ent: #8e7dff, tag-verb: #e00084, tag-noun: #fd9720, tag-punct: #87908a) 6 | 7 | .displacy-word 8 | font-size: 1.25rem 9 | color: map-get($displacy, text) 10 | 11 | .displacy-tag 12 | font: 1rem $font-secondary 13 | color: map-get($displacy, tag) 14 | 15 | .displacy-label 16 | font: 0.85rem $font-secondary 17 | color: map-get($displacy, label) 18 | 19 | .displacy-arc 20 | color: map-get($displacy, arrow) 21 | 22 | .displacy-arrowhead 23 | color: map-get($displacy, arrowhead) 24 | 25 | @each $format, $tags in (spacy: (NN: tag-noun, VB: tag-verb, ".": tag-punct, ",": tag-punct), google: (NOUN: tag-noun, VERB: tag-verb, "PUNCT": tag-punct)) 26 | @each $tag, $color in $tags 27 | [data-format="#{$format}"] .displacy-tag[data-tag^="#{$tag}"] 28 | color: map-get($displacy, $color) 29 | -------------------------------------------------------------------------------- /frontend/assets/css/_ui.sass: -------------------------------------------------------------------------------- 1 | //- ---------------------------------- 2 | //- 💥 UI 3 | //- ---------------------------------- 4 | 5 | $theme-bg: $color-back !default 6 | $theme-bg2: $color-back !default 7 | $theme-color: $color-front !default 8 | 9 | $field-border-radius: 0.5rem !default 10 | $color-error: #f44d61 !default 11 | 12 | 13 | //- Utilities 14 | 15 | .u-text-small.u-text-small 16 | font-size: 0.85rem 17 | 18 | .u-text-medium.u-text-medium 19 | font-size: 1.25rem 20 | 21 | .u-text-large.u-text-large 22 | font-size: 1.75rem 23 | 24 | .u-label 25 | font: normal bold 1em $font-primary 26 | 27 | .u-code.u-code 28 | font-family: $font-secondary 29 | 30 | .u-heading-1 31 | font: normal bold 2.75rem/#{1.375} $font-primary 32 | margin-bottom: 1rem 33 | 34 | .u-heading-2 35 | font: normal bold 2rem/#{1.375} $font-primary 36 | margin-bottom: 0.5rem 37 | 38 | 39 | //- Base 40 | 41 | .o-main 42 | width: 100% 43 | 44 | .o-container 45 | flex: 1 46 | position: relative 47 | background: inherit 48 | width: 100% 49 | 50 | .o-banner 51 | flex: 0 0 1 52 | background: $theme-bg 53 | color: $theme-color 54 | padding: 2rem 3rem 55 | width: 100% 56 | 57 | .o-form 58 | display: flex 59 | flex-flow: row wrap 60 | 61 | .o-block 62 | margin-bottom: 2.5rem 63 | 64 | a 65 | border-bottom: 1px solid 66 | 67 | .o-button 68 | display: inline-block 69 | background: $theme-color 70 | box-shadow: 2px 2px 0 $theme-bg2 71 | color: $theme-bg2 72 | padding: 0.75em 1em 73 | border-radius: $field-border-radius 74 | 75 | 76 | //- Components: Input 77 | 78 | .c-input 79 | display: flex 80 | align-items: center 81 | flex-flow: row wrap 82 | position: relative 83 | background: $theme-bg2 84 | padding: 0.5rem 1rem 85 | margin: 0 0.75rem 0.75rem 0 86 | border-color: $theme-bg2 87 | border-radius: $field-border-radius 88 | width: auto 89 | 90 | &.c-input--full 91 | flex: 1 92 | 93 | .c-input__label 94 | display: flex 95 | align-items: center 96 | justify-content: space-between 97 | cursor: pointer 98 | width: 100% 99 | user-select: none 100 | 101 | .c-input__field 102 | flex: 1 103 | font: inherit 104 | background: transparent 105 | color: inherit 106 | line-height: 1 107 | width: 100% 108 | 109 | &.c-input__field--area 110 | padding: 1rem 0.5rem 111 | line-height: 1.375 112 | min-height: 10rem 113 | resize: vertical 114 | 115 | &::placeholder 116 | color: rgba($theme-color, 0.5) 117 | 118 | ::selection 119 | background: $theme-color 120 | color: $theme-bg2 121 | 122 | .c-input__icon 123 | margin-left: 0.75em 124 | 125 | .c-input__button 126 | font-size: inherit 127 | background: transparent 128 | color: inherit 129 | cursor: pointer 130 | 131 | &.c-input__button--large 132 | font-size: 1.5em 133 | 134 | .c-input__button__icon 135 | @include size(1.25em) 136 | 137 | .loading & 138 | display: none 139 | 140 | .c-input__button__spinner 141 | @include size(1.15em) 142 | display: none 143 | animation: spinner 0.5s linear infinite 144 | 145 | .loading & 146 | display: inline-block 147 | 148 | @keyframes spinner 149 | to 150 | transform: rotate(360deg) 151 | 152 | 153 | //- Components: Dropdown 154 | 155 | .c-dropdown 156 | @include position(absolute, top, left, calc(100% - #{$field-border-radius}), 0) 157 | background: $theme-bg2 158 | display: none 159 | width: 100% 160 | z-index: 100 161 | user-select: none 162 | list-style: none 163 | border-bottom-left-radius: $field-border-radius 164 | border-bottom-right-radius: $field-border-radius 165 | max-height: 50vh 166 | overflow-y: scroll 167 | 168 | .c-dropdown__trigger:checked + & 169 | display: block 170 | z-index: 10 171 | 172 | .c-dropdown__option 173 | display: flex 174 | align-items: center 175 | justify-content: space-between 176 | color: $theme-color 177 | cursor: pointer 178 | padding: 0.75rem 179 | border-top: 1px solid darken($theme-bg2, 10) 180 | line-height: 1.25 181 | width: 100% 182 | 183 | .c-dropdown__trigger:checked + & 184 | font-weight: bold 185 | 186 | .c-dropdown__option__icon 187 | opacity: 1 188 | 189 | .c-dropdown__option__icon 190 | flex: 1.25rem 0 0 191 | opacity: 0 192 | 193 | .c-dropdown__trigger 194 | display: none 195 | 196 | 197 | //- Components: Tooltips 198 | 199 | [data-tooltip] 200 | position: relative 201 | cursor: default 202 | 203 | &:after 204 | @include position(absolute, top, left, 125%, 50%) 205 | display: inline-block 206 | background: $theme-bg 207 | border-radius: 2px 208 | color: $theme-color 209 | content: attr(data-tooltip) 210 | font: normal normal 0.85rem $font-primary 211 | opacity: 0 212 | padding: 0.25em 0.5em 213 | transform: translateX(-50%) translateY(-2px) 214 | transition: opacity 0.1s ease-out, transform 0.1s ease-out 215 | visibility: hidden 216 | white-space: nowrap 217 | z-index: 200 218 | 219 | &:hover:after 220 | opacity: 1 221 | transform: translateX(-50%) translateY(0) 222 | visibility: visible 223 | 224 | 225 | //- Components: Alerts 226 | 227 | .c-alert 228 | @include position(fixed, bottom, right, 1rem, 1rem) 229 | padding: 1rem 230 | z-index: 100 231 | color: $color-back 232 | background: $color-front 233 | font-weight: bold 234 | cursor: pointer 235 | line-height: 1 236 | display: none 237 | 238 | &:before 239 | content: '\00d7' 240 | display: inline-block 241 | font-size: 1.5em 242 | margin-right: 0.5em 243 | transition: transform 0.05s ease 244 | vertical-align: middle 245 | 246 | &:hover:before 247 | transform: scale(1.25) translateY(0.025em) 248 | 249 | &.c-alert--is-closing 250 | transform: translateX(150%) 251 | transition: transform 0.2s ease 252 | 253 | &.c-alert--error 254 | background: $color-error 255 | 256 | 257 | 258 | //- Selection 259 | 260 | ::selection 261 | background: $theme-bg 262 | color: $theme-color 263 | -------------------------------------------------------------------------------- /frontend/assets/css/style.sass: -------------------------------------------------------------------------------- 1 | //- ---------------------------------- 2 | //- 💥 DEMOS > DISPLACY DEP 3 | //- ---------------------------------- 4 | 5 | //- Variables 6 | 7 | $theme-bg: #272822 8 | $theme-bg2: desaturate(darken($theme-bg, 5), 10) 9 | $theme-color: #eee 10 | 11 | 12 | //- Imports 13 | 14 | @import base 15 | @import ui 16 | 17 | @import displacy-theme 18 | 19 | 20 | //- Container 21 | 22 | body 23 | background: $theme-bg 24 | 25 | .o-main 26 | display: flex 27 | flex-flow: column nowrap 28 | background: $theme-bg 29 | overflow-x: hidden 30 | 31 | .o-banner 32 | padding: 1rem 100px 0 1rem 33 | 34 | .o-container 35 | overflow-x: auto 36 | -------------------------------------------------------------------------------- /frontend/assets/img/icons.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | arrow-right 5 | 6 | 7 | 8 | arrow-down 9 | 10 | 11 | 12 | github 13 | 14 | 15 | 16 | twitter 17 | 18 | 19 | 20 | search 21 | 22 | 23 | 24 | spinner 25 | 26 | 27 | 28 | check 29 | 30 | 31 | 32 | download 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /frontend/assets/img/preview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jgontrum/spacy-api-docker/249f96a2387741bc1037eab8318966a284cceeec/frontend/assets/img/preview.jpg -------------------------------------------------------------------------------- /frontend/assets/js/displacy.js: -------------------------------------------------------------------------------- 1 | //- ---------------------------------- 2 | //- 💥 DISPLACY 3 | //- ---------------------------------- 4 | 5 | 'use strict'; 6 | 7 | class displaCy { 8 | constructor (api, options) { 9 | this.api = api; 10 | this.container = typeof(options.container) == 'string' ? document.querySelector(options.container || '#displacy') : options.container; 11 | 12 | this.format = options.format || 'spacy'; 13 | this.defaultText = options.defaultText || 'Hello World.'; 14 | this.defaultModel = options.defaultModel || 'en'; 15 | this.collapsePunct = (options.collapsePunct != undefined) ? options.collapsePunct : true; 16 | this.collapsePhrase = (options.collapsePhrase != undefined) ? options.collapsePhrase : true; 17 | 18 | this.onStart = options.onStart || false; 19 | this.onSuccess = options.onSuccess || false; 20 | this.onError = options.onError || false; 21 | 22 | this.distance = options.distance || 200; 23 | this.offsetX = options.offsetX || 50; 24 | this.arrowSpacing = options.arrowSpacing || 20; 25 | this.arrowWidth = options.arrowWidth || 10; 26 | this.arrowStroke = options.arrowStroke || 2; 27 | this.wordSpacing = options.wordSpacing || 75; 28 | this.font = options.font || 'inherit'; 29 | this.color = options.color || '#000000'; 30 | this.bg = options.bg || '#ffffff'; 31 | } 32 | 33 | parse(text = this.defaultText, model = this.defaultModel, settings = {}) { 34 | if(typeof this.onStart === 'function') this.onStart(); 35 | 36 | let xhr = new XMLHttpRequest(); 37 | xhr.open('POST', this.api, true); 38 | xhr.setRequestHeader('Content-type', 'text/plain'); 39 | xhr.onreadystatechange = () => { 40 | if(xhr.readyState === 4 && xhr.status === 200) { 41 | if(typeof this.onSuccess === 'function') this.onSuccess(); 42 | this.render(JSON.parse(xhr.responseText), settings, text); 43 | } 44 | 45 | else if(xhr.status !== 200) { 46 | if(typeof this.onError === 'function') this.onError(xhr.statusText); 47 | } 48 | } 49 | 50 | xhr.onerror = () => { 51 | xhr.abort(); 52 | if(typeof this.onError === 'function') this.onError(); 53 | } 54 | 55 | xhr.send(JSON.stringify({ text, model, 56 | collapse_punctuation: (settings.collapsePunct != undefined) ? settings.collapsePunct : this.collapsePunct, 57 | collapse_phrases: (settings.collapsePhrase != undefined) ? settings.collapsePhrase : this.collapsePhrase 58 | })); 59 | } 60 | 61 | render(parse, settings = {}, text) { 62 | parse = this.handleConversion(parse); 63 | 64 | if(text) console.log(`%c💥 JSON for "${text}"\n%c${JSON.stringify(parse)}`, 'font: bold 16px/2 arial, sans-serif', 'font: 13px/1.5 Consolas, "Andale Mono", Menlo, Monaco, Courier, monospace'); 65 | 66 | this.levels = [...new Set(parse.arcs.map(({ end, start }) => end - start).sort((a, b) => a - b))]; 67 | this.highestLevel = this.levels.indexOf(this.levels.slice(-1)[0]) + 1; 68 | this.offsetY = this.distance / 2 * this.highestLevel; 69 | 70 | const width = this.offsetX + parse.words.length * this.distance; 71 | const height = this.offsetY + 3 * this.wordSpacing; 72 | 73 | this.container.innerHTML = ''; 74 | this.container.appendChild(this._el('svg', { 75 | id: 'displacy-svg', 76 | classnames: [ 'displacy' ], 77 | attributes: [ 78 | [ 'width', width ], 79 | [ 'height', height ], 80 | [ 'viewBox', `0 0 ${width} ${height}`], 81 | [ 'preserveAspectRatio', 'xMinYMax meet' ], 82 | [ 'data-format', this.format ] 83 | ], 84 | style: [ 85 | [ 'color', settings.color || this.color ], 86 | [ 'background', settings.bg || this.bg ], 87 | [ 'fontFamily', settings.font || this.font ] 88 | ], 89 | children: [ 90 | ...this.renderWords(parse.words), 91 | ...this.renderArrows(parse.arcs) 92 | ] 93 | })); 94 | } 95 | 96 | renderWords(words) { 97 | return (words.map(( { text, tag, data = [] }, i) => this._el('text', { 98 | classnames: [ 'displacy-token' ], 99 | attributes: [ 100 | ['fill', 'currentColor'], 101 | ['data-tag', tag], 102 | ['text-anchor', 'middle'], 103 | ['y', this.offsetY + this.wordSpacing], 104 | ...data.map(([attr, value]) => (['data-' + attr.replace(' ', '-'), value])) 105 | ], 106 | children: [ 107 | this._el('tspan', { 108 | classnames: [ 'displacy-word' ], 109 | attributes: [ 110 | ['x', this.offsetX + i * this.distance], 111 | ['fill', 'currentColor'], 112 | ['data-tag', tag] 113 | ], 114 | text: text 115 | }), 116 | this._el('tspan', { 117 | classnames: [ 'displacy-tag' ], 118 | attributes: [ 119 | ['x', this.offsetX + i * this.distance], 120 | ['dy', '2em'], 121 | ['fill', 'currentColor'], 122 | ['data-tag', tag] 123 | ], 124 | text: tag 125 | }) 126 | ] 127 | }))); 128 | } 129 | 130 | renderArrows(arcs) { 131 | return arcs.map(({ label, end, start, dir, data = [] }, i) => { 132 | const rand = Math.random().toString(36).substr(2, 8); 133 | const level = this.levels.indexOf(end - start) + 1; 134 | const startX = this.offsetX + start * this.distance + this.arrowSpacing * (this.highestLevel - level) / 4; 135 | const startY = this.offsetY; 136 | const endpoint = this.offsetX + (end - start) * this.distance + start * this.distance - this.arrowSpacing * (this.highestLevel - level) / 4; 137 | 138 | let curve = this.offsetY - level * this.distance / 2; 139 | if(curve == 0 && this.levels.length > 5) curve = -this.distance; 140 | 141 | return this._el('g', { 142 | classnames: [ 'displacy-arrow' ], 143 | attributes: [ 144 | [ 'data-dir', dir ], 145 | [ 'data-label', label ], 146 | ...data.map(([attr, value]) => (['data-' + attr.replace(' ', '-'), value])) 147 | ], 148 | children: [ 149 | this._el('path', { 150 | id: 'arrow-' + rand, 151 | classnames: [ 'displacy-arc' ], 152 | attributes: [ 153 | [ 'd', `M${startX},${startY} C${startX},${curve} ${endpoint},${curve} ${endpoint},${startY}`], 154 | [ 'stroke-width', this.arrowStroke + 'px' ], 155 | [ 'fill', 'none' ], 156 | [ 'stroke', 'currentColor' ], 157 | [ 'data-dir', dir ], 158 | [ 'data-label', label ] 159 | ] 160 | }), 161 | 162 | this._el('text', { 163 | attributes: [ 164 | [ 'dy', '1em' ] 165 | ], 166 | children: [ 167 | this._el('textPath', { 168 | xlink: '#arrow-' + rand, 169 | classnames: [ 'displacy-label' ], 170 | attributes: [ 171 | [ 'startOffset', '50%' ], 172 | [ 'fill', 'currentColor' ], 173 | [ 'text-anchor', 'middle' ], 174 | [ 'data-label', label ], 175 | [ 'data-dir', dir ] 176 | ], 177 | text: label 178 | }) 179 | ] 180 | }), 181 | 182 | this._el('path', { 183 | classnames: [ 'displacy-arrowhead' ], 184 | attributes: [ 185 | [ 'd', `M${(dir == 'left') ? startX : endpoint},${startY + 2} L${(dir == 'left') ? startX - this.arrowWidth + 2 : endpoint + this.arrowWidth - 2},${startY - this.arrowWidth} ${(dir == 'left') ? startX + this.arrowWidth - 2 : endpoint - this.arrowWidth + 2},${startY - this.arrowWidth}` ], 186 | [ 'fill', 'currentColor' ], 187 | [ 'data-label', label ], 188 | [ 'data-dir', dir ] 189 | ] 190 | }) 191 | ] 192 | }); 193 | }); 194 | } 195 | 196 | handleConversion(parse) { 197 | switch(this.format) { 198 | case 'spacy': return parse; break; 199 | case 'google': return({ 200 | words: parse.map(({ text: { content: text }, partOfSpeech: { tag }} ) => ({ text, tag })), 201 | arcs: parse.map(({ dependencyEdge: { label, headTokenIndex: j }}, i) => (i != j) ? ({ label, start: Math.min(i, j), end: Math.max(i, j), dir: (j > i) ? 'left' : 'right' }) : null).filter(word => word != null) 202 | }); break; 203 | default: return parse; 204 | } 205 | } 206 | 207 | _el(tag, options) { 208 | const { classnames = [], attributes = [], style = [], children = [], text, id, xlink } = options; 209 | const ns = 'http://www.w3.org/2000/svg'; 210 | const nsx = 'http://www.w3.org/1999/xlink'; 211 | const el = document.createElementNS(ns, tag); 212 | 213 | classnames.forEach(name => el.classList.add(name)); 214 | attributes.forEach(([attr, value]) => el.setAttribute(attr, value)); 215 | style.forEach(([ prop, value ]) => el.style[prop] = value); 216 | if(xlink) el.setAttributeNS(nsx, 'xlink:href', xlink); 217 | if(text) el.appendChild(document.createTextNode(text)); 218 | if(id) el.id = id; 219 | children.forEach(child => el.appendChild(child)); 220 | return el; 221 | } 222 | } 223 | -------------------------------------------------------------------------------- /frontend/assets/js/main.js: -------------------------------------------------------------------------------- 1 | //- ---------------------------------- 2 | //- 💥 DISPLACY DEMO 3 | //- ---------------------------------- 4 | 5 | 'use strict'; 6 | 7 | { 8 | const defaultText = 'displaCy uses JavaScript, SVG and CSS to show you how computers understand language'; 9 | const defaultModel = 'en'; 10 | const loading = () => document.body.classList.toggle('loading'); 11 | const onError = (err) => $('#error').style.display = 'block'; 12 | 13 | const displacy = new displaCy(api, { 14 | container: '#displacy', 15 | engine: 'spacy', 16 | defaultText: defaultText, 17 | defaultModel: defaultModel, 18 | collapsePunct: true, 19 | collapsePhrase: true, 20 | distance: 200, 21 | offsetX: 150, 22 | arrowSpacing: 10, 23 | arrowWidth: 8, 24 | arrowStroke: 2, 25 | wordSpacing: 40, 26 | font: 'inherit', 27 | color: '#f5f4f0', 28 | bg: '#272822', 29 | onStart: loading, 30 | onSuccess: loading 31 | }); 32 | 33 | 34 | // UI 35 | 36 | const $ = document.querySelector.bind(document); 37 | 38 | 39 | // First Run 40 | 41 | document.addEventListener('DOMContentLoaded', () => { 42 | const text = getQueryVar('text') || getQueryVar('full') || getQueryVar('manual') || getQueryVar('steps') || defaultText; 43 | const model = getQueryVar('model') || defaultModel; 44 | const collapsePunct = (getQueryVar('cpu')) ? (getQueryVar('cpu') == 0 ? 0 : 1) : 1; 45 | const collapsePhrase = (getQueryVar('cph')) ? (getQueryVar('cph') == 0 ? 0 : 1) : 1; 46 | 47 | const args = [text, model, { collapsePhrase, collapsePunct }]; 48 | 49 | if (getQueryVar('text')) updateView(...args); 50 | if (getQueryVar('full') || getQueryVar('manual') || getQueryVar('steps')) updateURL(...args); 51 | }); 52 | 53 | 54 | // Run Demo 55 | 56 | const run = ( 57 | text = $('#input').value || defaultText, 58 | model = $('[name="model"]:checked').value || defaultModel, 59 | settings = { 60 | collapsePunct: $('#settings-punctuation').checked, 61 | collapsePhrase: $('#settings-phrases').checked 62 | }) => { 63 | displacy.parse(text, model, settings); 64 | updateView(text, model, settings); 65 | updateURL(text, model, settings); 66 | } 67 | 68 | 69 | // UI Event Listeners 70 | 71 | $('#submit').addEventListener('click', ev => run()); 72 | $('#input').addEventListener('keydown', ev => (event.keyCode == 13) && run()); 73 | $('#download').addEventListener('click', ev => $('#download').setAttribute('href', downloadSVG()).click()); 74 | 75 | 76 | // Update View 77 | 78 | const updateView = (text, model, settings) => { 79 | $('#input').value = text; 80 | $(`[value="${model}"]`).checked = true; 81 | $('#settings-punctuation').checked = settings.collapsePunct; 82 | $('#settings-phrases').checked = settings.collapsePhrase; 83 | } 84 | 85 | 86 | // Update URL 87 | 88 | const updateURL = (text, model, settings) => { 89 | const url = [ 90 | 'text=' + encodeURIComponent(text), 91 | 'model=' + encodeURIComponent(model), 92 | 'cpu=' + (settings.collapsePunct ? 1 : 0), 93 | 'cph=' + (settings.collapsePhrase ? 1 : 0) 94 | ]; 95 | 96 | history.pushState({ text, model, settings }, null, '?' + url.join('&')); 97 | } 98 | 99 | // Get URL Query Variables 100 | 101 | const getQueryVar = (key) => { 102 | const query = window.location.search.substring(1); 103 | const params = query.split('&').map(param => param.split('=')); 104 | 105 | for (let param of params) 106 | if (param[0] == key) return decodeURIComponent(param[1]); 107 | return false; 108 | } 109 | 110 | 111 | // Download SVG 112 | 113 | const downloadSVG = () => { 114 | const serializer = new XMLSerializer(); 115 | return ($('#displacy-svg')) ? 'data:image/svg+xml;charset=utf-8,' + encodeURIComponent('\r\n' + serializer.serializeToString($('#displacy-svg'))) : false; 116 | } 117 | } -------------------------------------------------------------------------------- /frontend/index.jade: -------------------------------------------------------------------------------- 1 | //- ---------------------------------- 2 | //- 💥 DEMOS > DISPLACY 3 | //- ---------------------------------- 4 | 5 | include _mixins 6 | 7 | header.o-banner.o-form 8 | +input("Your sentence here...")#input 9 | +search#submit 10 | 11 | +dropdown("radio", "model", "Model", languages, default_language) 12 | 13 | +dropdown("checkbox", "settings", "Settings", { punctuation: "Collapse Punctuation", phrases: "Collapse Phrases" }, [ "punctuation", "phrases" ]) 14 | 15 | +button("download", "Download")#download(download="displacy.svg") 16 | 17 | .o-container#displacy 18 | 19 | +error("Oops, something went wrong. Please try again!") 20 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "displacy-demo", 3 | "version": "1.0.2", 4 | "description": "An open-source NLP visualiser for the modern web", 5 | "main": "index.jade", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/explosion/displacy.git" 12 | }, 13 | "keywords": [ 14 | "nlp", 15 | "visualizer", 16 | "spacy" 17 | ], 18 | "author": "Ines Montani", 19 | "license": "MIT", 20 | "bugs": { 21 | "url": "https://github.com/explosion/displacy/issues" 22 | }, 23 | "homepage": "https://github.com/explosion/displacy#readme", 24 | "devDependencies": { 25 | "harp": "^0.29.0" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | spacy==2.2.* 2 | falcon==2.0.0 3 | pytest 4 | requests==2.21.0 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='displacy_service', 5 | version='0.1', 6 | description='REST microservice for Explosion AI\'s interactive demos ' + 7 | 'and visualisers.', 8 | author='explosion.ai & Johannes Gontrum', 9 | author_email='gontrum@me.com', 10 | include_package_data=True, 11 | license='MIT', 12 | entry_points={ 13 | 'console_scripts': [ 14 | 'download_models = displacy_service.scripts.download:download_models', 15 | 'run_server = displacy_service.scripts.app:run' 16 | ] 17 | } 18 | ) 19 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sed -i "s/PORT/$PORT/g" /etc/nginx/sites-enabled/default 4 | supervisord -n 5 | --------------------------------------------------------------------------------