├── .dockerignore
├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── build_sassc.sh
├── config
├── nginx.conf
└── supervisor.conf
├── displacy_service
├── __init__.py
├── parse.py
├── scripts
│ ├── __init__.py
│ ├── app.py
│ └── download.py
└── server.py
├── displacy_service_tests
├── __init__.py
├── test_parse.py
└── test_server.py
├── docker
├── all
│ └── Dockerfile
├── de
│ └── Dockerfile
├── en
│ ├── Dockerfile
│ ├── Dockerfile.lg
│ └── Dockerfile.md
├── es
│ └── Dockerfile
├── fr
│ └── Dockerfile
├── it
│ └── Dockerfile
├── nl
│ └── Dockerfile
└── pt
│ └── Dockerfile
├── frontend
├── Makefile
├── _data.json
├── _layout.jade
├── _mixins.jade
├── assets
│ ├── css
│ │ ├── _base.sass
│ │ ├── _displacy-theme.sass
│ │ ├── _ui.sass
│ │ └── style.sass
│ ├── img
│ │ ├── icons.svg
│ │ └── preview.jpg
│ └── js
│ │ ├── displacy.js
│ │ └── main.js
├── index.jade
├── package-lock.json
└── package.json
├── requirements.txt
├── setup.py
└── start.sh
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 | *.swp
3 | *.swo
4 | .DS_STORE
5 | *.md
6 | env
7 | .idea
8 | *.egg-info
9 | !README.md
10 | coverage.xml
11 | .coverage
12 | frontend/node_modules
13 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 |
85 | # Spyder project settings
86 | .spyderproject
87 |
88 | # Rope project settings
89 | .ropeproject
90 |
91 | frontend/node_modules
92 | .DS_Store
93 | .venv
94 |
95 | .idea/
96 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 |
3 | language: generic
4 |
5 | services:
6 | - docker
7 |
8 | before_install:
9 | - docker login -u="$DOCKERUSER" -p="$DOCKERPASS"
10 | - docker login -u="$HEROKUUSER" -p="$HEROKUPASS" $HEROKUREPO
11 |
12 | install:
13 | - travis_wait 30 docker build -t jgontrum/spacyapi:base_v2 .
14 | - travis_wait 30 docker build -t jgontrum/spacyapi:en_v2 -f docker/en/Dockerfile .
15 | - travis_wait 30 docker build -t jgontrum/spacyapi:en_v2_lg -f docker/en/Dockerfile.lg .
16 | - travis_wait 30 docker build -t jgontrum/spacyapi:en_v2_md -f docker/en/Dockerfile.md .
17 | - travis_wait 30 docker build -t jgontrum/spacyapi:de_v2 -f docker/de/Dockerfile .
18 | - travis_wait 30 docker build -t jgontrum/spacyapi:es_v2 -f docker/es/Dockerfile .
19 | - travis_wait 30 docker build -t jgontrum/spacyapi:fr_v2 -f docker/fr/Dockerfile .
20 | - travis_wait 30 docker build -t jgontrum/spacyapi:nl_v2 -f docker/nl/Dockerfile .
21 | - travis_wait 30 docker build -t jgontrum/spacyapi:pt_v2 -f docker/pt/Dockerfile .
22 | - travis_wait 30 docker build -t jgontrum/spacyapi:it_v2 -f docker/it/Dockerfile .
23 | - travis_wait 30 docker build -t jgontrum/spacyapi:all_v2 -f docker/all/Dockerfile .
24 |
25 | after_success:
26 | - docker push jgontrum/spacyapi:base_v2
27 | - docker push jgontrum/spacyapi:en_v2
28 | - docker push jgontrum/spacyapi:en_v2_lg
29 | - docker push jgontrum/spacyapi:en_v2_md
30 | - docker push jgontrum/spacyapi:de_v2
31 | - docker push jgontrum/spacyapi:es_v2
32 | - docker push jgontrum/spacyapi:fr_v2
33 | - docker push jgontrum/spacyapi:pt_v2
34 | - docker push jgontrum/spacyapi:it_v2
35 | - docker push jgontrum/spacyapi:nl_v2
36 | - docker push jgontrum/spacyapi:all_v2
37 | - docker tag jgontrum/spacyapi:en_v2 registry.heroku.com/spacy-en/web
38 | - docker push registry.heroku.com/spacy-en/web
39 | - docker tag jgontrum/spacyapi:de_v2 registry.heroku.com/spacy-de/web
40 | - docker push registry.heroku.com/spacy-de/web
41 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.6
2 | LABEL maintainer="gontrum@me.com"
3 | LABEL version="0.2"
4 | LABEL description="Base image, containing no language models."
5 |
6 | # Install the required packages
7 | RUN apt-get update && apt-get install -y \
8 | build-essential \
9 | libssl-dev \
10 | supervisor \
11 | curl \
12 | nginx && \
13 | apt-get -q clean -y && rm -rf /var/lib/apt/lists/* && rm -f /var/cache/apt/*.bin
14 |
15 | # Install node for the frontend
16 | RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - && \
17 | apt-get install -y nodejs &&\
18 | apt-get -q clean -y && rm -rf /var/lib/apt/lists/* && rm -f /var/cache/apt/*.bin
19 |
20 | # Copy and set up the app
21 | COPY . /app
22 |
23 | # Build SASSC
24 | RUN bash /app/build_sassc.sh
25 |
26 | # Build app
27 | RUN cd /app/frontend && make clean && make
28 | RUN cd /app && make clean && make
29 |
30 | # Configure nginx & supervisor
31 | RUN mv /app/config/nginx.conf /etc/nginx/sites-available/default &&\
32 | echo "daemon off;" >> /etc/nginx/nginx.conf && \
33 | mv /app/config/supervisor.conf /etc/supervisor/conf.d/
34 |
35 | ENV PORT 80
36 | EXPOSE 80
37 | CMD ["bash", "/app/start.sh"]
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (C) 2016 ExplosionAI UG (haftungsbeschränkt)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: clean start build-and-push test
2 |
3 | PYTHON3=python3.6
4 |
5 | all: env/bin/python
6 |
7 | env/bin/python:
8 | $(PYTHON3) -m venv env
9 | env/bin/pip install --upgrade pip
10 | env/bin/pip install wheel
11 | env/bin/pip install -r requirements.txt
12 | env/bin/python setup.py develop
13 |
14 | clean:
15 | rm -rfv bin develop-eggs dist downloads eggs env parts .cache .scannerwork
16 | rm -fv .DS_Store .coverage .installed.cfg bootstrap.py .coverage
17 | find . -name '*.pyc' -exec rm -fv {} \;
18 | find . -name '*.pyo' -exec rm -fv {} \;
19 | find . -depth -name '*.egg-info' -exec rm -rfv {} \;
20 | find . -depth -name '__pycache__' -exec rm -rfv {} \;
21 |
22 | test: env/bin/python
23 | languages=en env/bin/download_models
24 | env/bin/py.test displacy_service_tests
25 |
26 | start: env/bin/python
27 | env/bin/run_server
28 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # spaCy API Docker
2 |
3 | **Ready-to-use Docker images for the [spaCy NLP library](https://github.com/explosion/spaCy).**
4 |
5 | ---
6 | **[spaCy API Docker](https://github.com/jgontrum/spacy-api-docker) is being sponsored by the following tool; please help to support us by taking a look and signing up to a free trial**
7 |
8 |
9 | [
](https://tracking.gitads.io/?repo=spacy-api-docker)
10 | ---
11 |
12 | ### Features
13 |
14 | - Use the awesome spaCy NLP framework with other programming languages.
15 | - Better scaling: One NLP - multiple services.
16 | - Build using the official [spaCy REST services](https://github.com/explosion/spacy-services).
17 | - Dependency parsing visualisation with [displaCy](https://demos.explosion.ai/displacy/).
18 | - Docker images for **English**, **German**, **Spanish**, **Italian**, **Dutch** and **French**.
19 | - Automated builds to stay up to date with spaCy.
20 | - Current spaCy version: 2.0.16
21 |
22 | Please note that this is a completely new API and is incompatible with the previous one. If you still need them, use `jgontrum/spacyapi:en-legacy` or `jgontrum/spacyapi:de-legacy`.
23 |
24 | _Documentation, API- and frontend code based upon [spaCy REST services](https://github.com/explosion/spacy-services) by [Explosion AI](https://explosion.ai)._
25 |
26 | ---
27 |
28 | ## Images
29 |
30 | | Image | Description |
31 | | --------------------------- | ----------------------------------------------------------------- |
32 | | jgontrum/spacyapi:base_v2 | Base image for spaCy 2.0, containing no language model |
33 | | jgontrum/spacyapi:en_v2 | English language model, spaCy 2.0 |
34 | | jgontrum/spacyapi:de_v2 | German language model, spaCy 2.0 |
35 | | jgontrum/spacyapi:es_v2 | Spanish language model, spaCy 2.0 |
36 | | jgontrum/spacyapi:fr_v2 | French language model, spaCy 2.0 |
37 | | jgontrum/spacyapi:pt_v2 | Portuguese language model, spaCy 2.0 |
38 | | jgontrum/spacyapi:it_v2 | Italian language model, spaCy 2.0 |
39 | | jgontrum/spacyapi:nl_v2 | Dutch language model, spaCy 2.0 |
40 | | jgontrum/spacyapi:all_v2 | Contains EN, DE, ES, PT, NL, IT and FR language models, spaCy 2.0 |
41 | | _OLD RELEASES_ | |
42 | | jgontrum/spacyapi:base | Base image, containing no language model |
43 | | jgontrum/spacyapi:latest | English language model |
44 | | jgontrum/spacyapi:en | English language model |
45 | | jgontrum/spacyapi:de | German language model |
46 | | jgontrum/spacyapi:es | Spanish language model |
47 | | jgontrum/spacyapi:fr | French language model |
48 | | jgontrum/spacyapi:all | Contains EN, DE, ES and FR language models |
49 | | jgontrum/spacyapi:en-legacy | Old API with English model |
50 | | jgontrum/spacyapi:de-legacy | Old API with German model |
51 |
52 | ---
53 |
54 | ## Usage
55 |
56 | `docker run -p "127.0.0.1:8080:80" jgontrum/spacyapi:en_v2`
57 |
58 | All models are loaded at start up time. Depending on the model size and server
59 | performance, this can take a few minutes.
60 |
61 | The displaCy frontend is available at `/ui`.
62 |
63 | ### Docker Compose
64 |
65 | ```json
66 | version: '2'
67 |
68 | services:
69 | spacyapi:
70 | image: jgontrum/spacyapi:en_v2
71 | ports:
72 | - "127.0.0.1:8080:80"
73 | restart: always
74 |
75 | ```
76 |
77 | ### Running Tests
78 |
79 | In order to run unit tests locally `pytest` is included.
80 |
81 | `docker run -it jgontrum/spacyapi:en_v2 app/env/bin/pytest app/displacy_service_tests`
82 |
83 | ### Special Cases
84 |
85 | The API includes rudimentary support for specifying [special cases](https://spacy.io/usage/linguistic-features#special-cases)
86 | for your deployment. Currently only basic special cases are supported; for example, in the spaCy parlance:
87 |
88 | ```python
89 | tokenizer.add_special_case("isn't", [{ORTH: "isn't"}])
90 | ```
91 |
92 | They can be supplied in an environment variable corresponding to the desired language model. For example, `en_special_cases`
93 | or `en_core_web_lg_special_cases`. They are configured as a single comma-delimited string, such as `"isn't,doesn't,won't"`.
94 |
95 | Use the following syntax to specify basic special case rules, such as for preserving contractions:
96 |
97 | `docker run -p "127.0.0.1:8080:80" -e en_special_cases="isn't,doesn't" jgontrum/spacyapi:en_v2`
98 |
99 | You can also configure this in a `.env` file if using `docker-compose` as above.
100 |
101 | ---
102 |
103 | ## REST API Documentation
104 |
105 | ### `GET` `/ui/`
106 |
107 | displaCy frontend is available here.
108 |
109 | ---
110 |
111 | ### `POST` `/dep`
112 |
113 | Example request:
114 |
115 | ```json
116 | {
117 | "text": "They ate the pizza with anchovies",
118 | "model": "en",
119 | "collapse_punctuation": 0,
120 | "collapse_phrases": 1
121 | }
122 | ```
123 |
124 | | Name | Type | Description |
125 | | ---------------------- | ------- | -------------------------------------------------------- |
126 | | `text` | string | text to be parsed |
127 | | `model` | string | identifier string for a model installed on the server |
128 | | `collapse_punctuation` | boolean | Merge punctuation onto the preceding token? |
129 | | `collapse_phrases` | boolean | Merge noun chunks and named entities into single tokens? |
130 |
131 | Example request using the Python [Requests library](http://docs.python-requests.org/en/master/):
132 |
133 | ```python
134 | import json
135 | import requests
136 |
137 | url = "http://localhost:8000/dep"
138 | message_text = "They ate the pizza with anchovies"
139 | headers = {'content-type': 'application/json'}
140 | d = {'text': message_text, 'model': 'en'}
141 |
142 | response = requests.post(url, data=json.dumps(d), headers=headers)
143 | r = response.json()
144 | ```
145 |
146 | Example response:
147 |
148 | ```json
149 | {
150 | "arcs": [
151 | { "dir": "left", "start": 0, "end": 1, "label": "nsubj" },
152 | { "dir": "right", "start": 1, "end": 2, "label": "dobj" },
153 | { "dir": "right", "start": 1, "end": 3, "label": "prep" },
154 | { "dir": "right", "start": 3, "end": 4, "label": "pobj" },
155 | { "dir": "left", "start": 2, "end": 3, "label": "prep" }
156 | ],
157 | "words": [
158 | { "tag": "PRP", "text": "They" },
159 | { "tag": "VBD", "text": "ate" },
160 | { "tag": "NN", "text": "the pizza" },
161 | { "tag": "IN", "text": "with" },
162 | { "tag": "NNS", "text": "anchovies" }
163 | ]
164 | }
165 | ```
166 |
167 | | Name | Type | Description |
168 | | ------- | ------- | ------------------------------------------ |
169 | | `arcs` | array | data to generate the arrows |
170 | | `dir` | string | direction of arrow (`"left"` or `"right"`) |
171 | | `start` | integer | offset of word the arrow starts **on** |
172 | | `end` | integer | offset of word the arrow ends **on** |
173 | | `label` | string | dependency label |
174 | | `words` | array | data to generate the words |
175 | | `tag` | string | part-of-speech tag |
176 | | `text` | string | token |
177 |
178 | ---
179 |
180 | Curl command:
181 |
182 | ```
183 | curl -s localhost:8000/dep -d '{"text":"Pastafarians are smarter than people with Coca Cola bottles.", "model":"en"}'
184 | ```
185 |
186 | ```json
187 | {
188 | "arcs": [
189 | {
190 | "dir": "left",
191 | "end": 1,
192 | "label": "nsubj",
193 | "start": 0
194 | },
195 | {
196 | "dir": "right",
197 | "end": 2,
198 | "label": "acomp",
199 | "start": 1
200 | },
201 | {
202 | "dir": "right",
203 | "end": 3,
204 | "label": "prep",
205 | "start": 2
206 | },
207 | {
208 | "dir": "right",
209 | "end": 4,
210 | "label": "pobj",
211 | "start": 3
212 | },
213 | {
214 | "dir": "right",
215 | "end": 5,
216 | "label": "prep",
217 | "start": 4
218 | },
219 | {
220 | "dir": "right",
221 | "end": 6,
222 | "label": "pobj",
223 | "start": 5
224 | }
225 | ],
226 | "words": [
227 | {
228 | "tag": "NNPS",
229 | "text": "Pastafarians"
230 | },
231 | {
232 | "tag": "VBP",
233 | "text": "are"
234 | },
235 | {
236 | "tag": "JJR",
237 | "text": "smarter"
238 | },
239 | {
240 | "tag": "IN",
241 | "text": "than"
242 | },
243 | {
244 | "tag": "NNS",
245 | "text": "people"
246 | },
247 | {
248 | "tag": "IN",
249 | "text": "with"
250 | },
251 | {
252 | "tag": "NNS",
253 | "text": "Coca Cola bottles."
254 | }
255 | ]
256 | }
257 | ```
258 |
259 | ---
260 |
261 | ### `POST` `/ent`
262 |
263 | Example request:
264 |
265 | ```json
266 | {
267 | "text": "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously.",
268 | "model": "en"
269 | }
270 | ```
271 |
272 | | Name | Type | Description |
273 | | ------- | ------ | ----------------------------------------------------- |
274 | | `text` | string | text to be parsed |
275 | | `model` | string | identifier string for a model installed on the server |
276 |
277 | Example request using the Python [Requests library](http://docs.python-requests.org/en/master/):
278 |
279 | ```python
280 | import json
281 | import requests
282 |
283 | url = "http://localhost:8000/ent"
284 | message_text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
285 | headers = {'content-type': 'application/json'}
286 | d = {'text': message_text, 'model': 'en'}
287 |
288 | response = requests.post(url, data=json.dumps(d), headers=headers)
289 | r = response.json()
290 | ```
291 |
292 | Example response:
293 |
294 | ```json
295 | [
296 | { "end": 20, "start": 5, "type": "PERSON" },
297 | { "end": 67, "start": 61, "type": "ORG" },
298 | { "end": 75, "start": 71, "type": "DATE" }
299 | ]
300 | ```
301 |
302 | | Name | Type | Description |
303 | | ------- | ------- | ------------------------------------------ |
304 | | `end` | integer | character offset the entity ends **after** |
305 | | `start` | integer | character offset the entity starts **on** |
306 | | `type` | string | entity type |
307 |
308 | ```
309 | curl -s localhost:8000/ent -d '{"text":"Pastafarians are smarter than people with Coca Cola bottles.", "model":"en"}'
310 | ```
311 |
312 | ```json
313 | [
314 | {
315 | "end": 12,
316 | "start": 0,
317 | "text": "Pastafarians",
318 | "type": "NORP"
319 | },
320 | {
321 | "end": 51,
322 | "start": 42,
323 | "text": "Coca Cola",
324 | "type": "ORG"
325 | }
326 | ]
327 | ```
328 |
329 | ---
330 |
331 | ### `POST` `/sents`
332 |
333 | Example request:
334 |
335 | ```json
336 | {
337 | "text": "In 2012 I was a mediocre developer. But today I am at least a bit better.",
338 | "model": "en"
339 | }
340 | ```
341 |
342 | | Name | Type | Description |
343 | | ------- | ------ | ----------------------------------------------------- |
344 | | `text` | string | text to be parsed |
345 | | `model` | string | identifier string for a model installed on the server |
346 |
347 | Example request using the Python [Requests library](http://docs.python-requests.org/en/master/):
348 |
349 | ```python
350 | import json
351 | import requests
352 |
353 | url = "http://localhost:8000/sents"
354 | message_text = "In 2012 I was a mediocre developer. But today I am at least a bit better."
355 | headers = {'content-type': 'application/json'}
356 | d = {'text': message_text, 'model': 'en'}
357 |
358 | response = requests.post(url, data=json.dumps(d), headers=headers)
359 | r = response.json()
360 | ```
361 |
362 | Example response:
363 |
364 | ```json
365 | ["In 2012 I was a mediocre developer.", "But today I am at least a bit better."]
366 | ```
367 |
368 | ---
369 |
370 | ### `POST` `/sents_dep`
371 |
372 | Combination of `/sents` and `/dep`, returns sentences and dependency parses
373 |
374 | Example request:
375 |
376 | ```json
377 | {
378 | "text": "In 2012 I was a mediocre developer. But today I am at least a bit better.",
379 | "model": "en"
380 | }
381 | ```
382 |
383 | | Name | Type | Description |
384 | | ------- | ------ | ----------------------------------------------------- |
385 | | `text` | string | text to be parsed |
386 | | `model` | string | identifier string for a model installed on the server |
387 |
388 | Example request using the Python [Requests library](http://docs.python-requests.org/en/master/):
389 |
390 | ```python
391 | import json
392 | import requests
393 |
394 | url = "http://localhost:8000/sents_dep"
395 | message_text = "In 2012 I was a mediocre developer. But today I am at least a bit better."
396 | headers = {'content-type': 'application/json'}
397 | d = {'text': message_text, 'model': 'en'}
398 |
399 | response = requests.post(url, data=json.dumps(d), headers=headers)
400 | r = response.json()
401 | ```
402 |
403 | Example response:
404 |
405 | ```json
406 | [
407 | {
408 | "sentence": "In 2012 I was a mediocre developer.",
409 | "dep_parse": {
410 | "arcs": [
411 | {
412 | "dir": "left",
413 | "end": 3,
414 | "label": "prep",
415 | "start": 0,
416 | "text": "In"
417 | },
418 | {
419 | "dir": "right",
420 | "end": 1,
421 | "label": "pobj",
422 | "start": 0,
423 | "text": "2012"
424 | },
425 | {
426 | "dir": "left",
427 | "end": 3,
428 | "label": "nsubj",
429 | "start": 2,
430 | "text": "I"
431 | },
432 | {
433 | "dir": "left",
434 | "end": 6,
435 | "label": "det",
436 | "start": 4,
437 | "text": "a"
438 | },
439 | {
440 | "dir": "left",
441 | "end": 6,
442 | "label": "amod",
443 | "start": 5,
444 | "text": "mediocre"
445 | },
446 | {
447 | "dir": "right",
448 | "end": 6,
449 | "label": "attr",
450 | "start": 3,
451 | "text": "developer"
452 | },
453 | {
454 | "dir": "right",
455 | "end": 7,
456 | "label": "punct",
457 | "start": 3,
458 | "text": "."
459 | }
460 | ],
461 | "words": [
462 | {
463 | "tag": "IN",
464 | "text": "In"
465 | },
466 | {
467 | "tag": "CD",
468 | "text": "2012"
469 | },
470 | {
471 | "tag": "PRP",
472 | "text": "I"
473 | },
474 | {
475 | "tag": "VBD",
476 | "text": "was"
477 | },
478 | {
479 | "tag": "DT",
480 | "text": "a"
481 | },
482 | {
483 | "tag": "JJ",
484 | "text": "mediocre"
485 | },
486 | {
487 | "tag": "NN",
488 | "text": "developer"
489 | },
490 | {
491 | "tag": ".",
492 | "text": "."
493 | }
494 | ]
495 | }
496 | },
497 | {
498 | "sentence": "But today I am at least a bit better.",
499 | "dep_parse": {
500 | "arcs": [
501 | {
502 | "dir": "left",
503 | "end": 11,
504 | "label": "cc",
505 | "start": 8,
506 | "text": "But"
507 | },
508 | {
509 | "dir": "left",
510 | "end": 11,
511 | "label": "npadvmod",
512 | "start": 9,
513 | "text": "today"
514 | },
515 | {
516 | "dir": "left",
517 | "end": 11,
518 | "label": "nsubj",
519 | "start": 10,
520 | "text": "I"
521 | },
522 | {
523 | "dir": "left",
524 | "end": 13,
525 | "label": "advmod",
526 | "start": 12,
527 | "text": "at"
528 | },
529 | {
530 | "dir": "left",
531 | "end": 15,
532 | "label": "advmod",
533 | "start": 13,
534 | "text": "least"
535 | },
536 | {
537 | "dir": "left",
538 | "end": 15,
539 | "label": "det",
540 | "start": 14,
541 | "text": "a"
542 | },
543 | {
544 | "dir": "left",
545 | "end": 16,
546 | "label": "npadvmod",
547 | "start": 15,
548 | "text": "bit"
549 | },
550 | {
551 | "dir": "right",
552 | "end": 16,
553 | "label": "acomp",
554 | "start": 11,
555 | "text": "better"
556 | },
557 | {
558 | "dir": "right",
559 | "end": 17,
560 | "label": "punct",
561 | "start": 11,
562 | "text": "."
563 | }
564 | ],
565 | "words": [
566 | {
567 | "tag": "CC",
568 | "text": "But"
569 | },
570 | {
571 | "tag": "NN",
572 | "text": "today"
573 | },
574 | {
575 | "tag": "PRP",
576 | "text": "I"
577 | },
578 | {
579 | "tag": "VBP",
580 | "text": "am"
581 | },
582 | {
583 | "tag": "IN",
584 | "text": "at"
585 | },
586 | {
587 | "tag": "JJS",
588 | "text": "least"
589 | },
590 | {
591 | "tag": "DT",
592 | "text": "a"
593 | },
594 | {
595 | "tag": "NN",
596 | "text": "bit"
597 | },
598 | {
599 | "tag": "RBR",
600 | "text": "better"
601 | },
602 | {
603 | "tag": ".",
604 | "text": "."
605 | }
606 | ]
607 | }
608 | }
609 | ]
610 | ```
611 |
612 | ### `GET` `/models`
613 |
614 | List the names of models installed on the server.
615 |
616 | Example request:
617 |
618 | ```
619 | GET /models
620 | ```
621 |
622 | Example response:
623 |
624 | ```json
625 | ["en", "de"]
626 | ```
627 |
628 | ---
629 |
630 | ### `GET` `/{model}/schema`
631 |
632 | Example request:
633 |
634 | ```
635 | GET /en/schema
636 | ```
637 |
638 | | Name | Type | Description |
639 | | ------- | ------ | ----------------------------------------------------- |
640 | | `model` | string | identifier string for a model installed on the server |
641 |
642 | Example response:
643 |
644 | ```json
645 | {
646 | "dep_types": ["ROOT", "nsubj"],
647 | "ent_types": ["PERSON", "LOC", "ORG"],
648 | "pos_types": ["NN", "VBZ", "SP"]
649 | }
650 | ```
651 |
652 | ---
653 |
654 | ### `GET` `/version`
655 |
656 | Show the used spaCy version.
657 |
658 | Example request:
659 |
660 | ```
661 | GET /version
662 | ```
663 |
664 | Example response:
665 |
666 | ```json
667 | {
668 | "spacy": "2.2.4"
669 | }
670 | ```
671 |
--------------------------------------------------------------------------------
/build_sassc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | mkdir -p /tmp/sass
4 | cd /tmp/sass
5 | git clone https://github.com/sass/sassc.git
6 | . sassc/script/bootstrap
7 | make -C sassc -j4
8 | PREFIX="/usr" make -C sassc install
9 | cd
10 | rm -rf /tmp/sass
11 |
12 |
--------------------------------------------------------------------------------
/config/nginx.conf:
--------------------------------------------------------------------------------
1 | server {
2 | listen PORT default_server;
3 | charset utf-8;
4 |
5 | location /ui/ {
6 | proxy_pass http://127.0.0.1:8080/;
7 | }
8 |
9 | location / {
10 | proxy_pass http://127.0.0.1:8000/;
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/config/supervisor.conf:
--------------------------------------------------------------------------------
1 | [program:api]
2 | command = /usr/bin/make --directory /app start
3 |
4 | [program:frontend]
5 | command = /usr/bin/make --directory /app/frontend start
6 |
7 | [program:nginx]
8 | command = /usr/sbin/nginx
9 |
--------------------------------------------------------------------------------
/displacy_service/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jgontrum/spacy-api-docker/249f96a2387741bc1037eab8318966a284cceeec/displacy_service/__init__.py
--------------------------------------------------------------------------------
/displacy_service/parse.py:
--------------------------------------------------------------------------------
1 | class Parse(object):
2 | def __init__(self, nlp, text, collapse_punctuation, collapse_phrases):
3 | self.doc = nlp(text)
4 | if collapse_punctuation:
5 | spans = []
6 | for word in self.doc[:-1]:
7 | if word.is_punct:
8 | continue
9 | if not word.nbor(1).is_punct:
10 | continue
11 | start = word.i
12 | end = word.i + 1
13 | while end < len(self.doc) and self.doc[end].is_punct:
14 | end += 1
15 | span = self.doc[start: end]
16 | spans.append(
17 | (span.start_char, span.end_char, word.tag_, word.lemma_, word.ent_type_)
18 | )
19 | for span_props in spans:
20 | self.doc.merge(*span_props)
21 |
22 | if collapse_phrases:
23 | for np in list(self.doc.noun_chunks):
24 | np.merge(np.root.tag_, np.root.lemma_, np.root.ent_type_)
25 |
26 | def to_json(self):
27 | words = [{'text': w.text, 'tag': w.tag_} for w in self.doc]
28 | arcs = []
29 | for word in self.doc:
30 | if word.i < word.head.i:
31 | arcs.append(
32 | {
33 | 'start': word.i,
34 | 'end': word.head.i,
35 | 'label': word.dep_,
36 | 'text': str(word),
37 | 'dir': 'left'
38 | })
39 | elif word.i > word.head.i:
40 | arcs.append(
41 | {
42 | 'start': word.head.i,
43 | 'end': word.i,
44 | 'label': word.dep_,
45 | 'text': str(word),
46 | 'dir': 'right'
47 | })
48 | return {'words': words, 'arcs': arcs}
49 |
50 |
51 | class Entities(object):
52 | def __init__(self, nlp, text):
53 | self.doc = nlp(text)
54 |
55 | def to_json(self):
56 | return [
57 | {
58 | 'start': ent.start_char,
59 | 'end': ent.end_char,
60 | 'type': ent.label_,
61 | 'text': str(ent)
62 | } for ent in self.doc.ents
63 | ]
64 |
65 |
66 | class Sentences(object):
67 | def __init__(self, nlp, text):
68 | self.doc = nlp(text)
69 |
70 | def to_json(self):
71 | sents = [sent.string.strip() for sent in self.doc.sents]
72 | return sents
73 |
74 |
75 | class SentencesDependencies(object):
76 | def __init__(self, nlp, text, collapse_punctuation, collapse_phrases):
77 |
78 | self.doc = nlp(text)
79 |
80 | if collapse_punctuation:
81 | spans = []
82 | for word in self.doc[:-1]:
83 | if word.is_punct:
84 | continue
85 | if not word.nbor(1).is_punct:
86 | continue
87 | start = word.i
88 | end = word.i + 1
89 | while end < len(self.doc) and self.doc[end].is_punct:
90 | end += 1
91 | span = self.doc[start: end]
92 | spans.append(
93 | (span.start_char, span.end_char, word.tag_, word.lemma_, word.ent_type_)
94 | )
95 | for span_props in spans:
96 | self.doc.merge(*span_props)
97 |
98 | if collapse_phrases:
99 | for np in list(self.doc.noun_chunks):
100 | np.merge(np.root.tag_, np.root.lemma_, np.root.ent_type_)
101 |
102 | def to_json(self):
103 | sents = []
104 | for sent in self.doc.sents:
105 | words = [{'text': w.text, 'tag': w.tag_} for w in sent]
106 | arcs = []
107 | for word in sent:
108 | if word.i < word.head.i:
109 | arcs.append(
110 | {
111 | 'start': word.i,
112 | 'end': word.head.i,
113 | 'label': word.dep_,
114 | 'text': str(word),
115 | 'dir': 'left'
116 | })
117 | elif word.i > word.head.i:
118 | arcs.append(
119 | {
120 | 'start': word.head.i,
121 | 'end': word.i,
122 | 'label': word.dep_,
123 | 'text': str(word),
124 | 'dir': 'right'
125 | })
126 |
127 | sents.append({'sentence': sent.string.strip(),
128 | 'dep_parse': {'words': words,
129 | 'arcs': arcs}})
130 | return sents
131 |
--------------------------------------------------------------------------------
/displacy_service/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jgontrum/spacy-api-docker/249f96a2387741bc1037eab8318966a284cceeec/displacy_service/scripts/__init__.py
--------------------------------------------------------------------------------
/displacy_service/scripts/app.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from spacy.symbols import ORTH
4 |
5 | from wsgiref import simple_server
6 |
7 | from displacy_service.server import APP, MODELS, get_model
8 |
9 |
10 | def run():
11 | for model in MODELS:
12 | print("Load model ", model)
13 | loaded_model = get_model(model)
14 | special_cases_str = os.getenv(f"{model}_special_cases", "")
15 | if special_cases_str:
16 | for special_case in special_cases_str.split(','):
17 | loaded_model.tokenizer.add_special_case(
18 | special_case,
19 | [{ORTH: special_case}]
20 | )
21 |
22 | print("Loaded all models. Starting HTTP server.")
23 | httpd = simple_server.make_server('0.0.0.0', 8000, APP)
24 | httpd.serve_forever()
25 |
--------------------------------------------------------------------------------
/displacy_service/scripts/download.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 |
4 | from spacy.cli import download
5 |
6 |
7 | def download_models():
8 | languages = os.getenv("languages", "en").split()
9 | for lang in languages:
10 | download(model=lang, direct=False)
11 |
12 | print("Updating frontend settings...")
13 | frontend_settings = json.load(open("frontend/_data.json"))
14 |
15 | frontend_settings['index']['languages'] = {
16 | l: l for l in languages
17 | }
18 | frontend_settings['index']['default_language'] = languages[0]
19 |
20 | json.dump(frontend_settings, open("frontend/_data.json", "w"),
21 | sort_keys=True,
22 | indent=2)
23 |
24 | print("Done!")
25 |
--------------------------------------------------------------------------------
/displacy_service/server.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import falcon
3 | import spacy
4 | import json
5 | import os
6 |
7 | from spacy.symbols import ENT_TYPE, TAG, DEP
8 | import spacy.about
9 | import spacy.util
10 |
11 | from .parse import Parse, Entities, Sentences, SentencesDependencies
12 |
13 |
14 | MODELS = os.getenv("languages", "").split()
15 |
16 | _models = {}
17 |
18 |
19 | def get_model(model_name):
20 | if model_name not in _models:
21 | _models[model_name] = spacy.load(model_name)
22 | return _models[model_name]
23 |
24 |
25 | def get_dep_types(model):
26 | '''List the available dep labels in the model.'''
27 | labels = []
28 | for label_id in model.parser.moves.freqs[DEP]:
29 | labels.append(model.vocab.strings[label_id])
30 | return labels
31 |
32 |
33 | def get_ent_types(model):
34 | '''List the available entity types in the model.'''
35 | labels = []
36 | for label_id in model.entity.moves.freqs[ENT_TYPE]:
37 | labels.append(model.vocab.strings[label_id])
38 | return labels
39 |
40 |
41 | def get_pos_types(model):
42 | '''List the available part-of-speech tags in the model.'''
43 | labels = []
44 | for label_id in model.tagger.moves.freqs[TAG]:
45 | labels.append(model.vocab.strings[label_id])
46 | return labels
47 |
48 |
49 | class ModelsResource(object):
50 | """List the available models.
51 |
52 | test with: curl -s localhost:8000/models
53 | """
54 |
55 | def on_get(self, req, resp):
56 | try:
57 | output = list(MODELS)
58 | resp.body = json.dumps(output, sort_keys=True, indent=2)
59 | resp.content_type = 'text/string'
60 | resp.append_header('Access-Control-Allow-Origin', "*")
61 | resp.status = falcon.HTTP_200
62 | except Exception as e:
63 | raise falcon.HTTPInternalServerError(
64 | 'Models retrieval failed',
65 | '{}'.format(e))
66 |
67 |
68 | class VersionResource(object):
69 | """Return the used spacy / api version
70 |
71 | test with: curl -s localhost:8000/version
72 | """
73 |
74 | def on_get(self, req, resp):
75 | try:
76 | resp.body = json.dumps({
77 | "spacy": spacy.about.__version__
78 | }, sort_keys=True, indent=2)
79 | resp.content_type = 'text/string'
80 | resp.append_header('Access-Control-Allow-Origin', "*")
81 | resp.status = falcon.HTTP_200
82 | except Exception as e:
83 | raise falcon.HTTPInternalServerError(
84 | 'Version retrieval failed',
85 | '{}'.format(e))
86 |
87 |
88 | class SchemaResource(object):
89 | """Describe the annotation scheme of a model.
90 |
91 | This does not appear to work with later spacy
92 | versions.
93 | """
94 |
95 | def on_get(self, req, resp, model_name):
96 | try:
97 | model = get_model(model_name)
98 | output = {
99 | 'dep_types': get_dep_types(model),
100 | 'ent_types': get_ent_types(model),
101 | 'pos_types': get_pos_types(model)
102 | }
103 |
104 | resp.body = json.dumps(output, sort_keys=True, indent=2)
105 | resp.content_type = 'text/string'
106 | resp.append_header('Access-Control-Allow-Origin', "*")
107 | resp.status = falcon.HTTP_200
108 | except Exception as e:
109 | raise falcon.HTTPBadRequest(
110 | 'Schema construction failed',
111 | '{}'.format(e))
112 |
113 |
114 | class DepResource(object):
115 | """Parse text and return displacy's expected JSON output.
116 |
117 | test with: curl -s localhost:8000/dep -d '{"text":"Pastafarians are smarter than people with Coca Cola bottles."}'
118 | """
119 |
120 | def on_post(self, req, resp):
121 | req_body = req.bounded_stream.read()
122 | json_data = json.loads(req_body.decode('utf8'))
123 | text = json_data.get('text')
124 | model_name = json_data.get('model', 'en')
125 | collapse_punctuation = json_data.get('collapse_punctuation', True)
126 | collapse_phrases = json_data.get('collapse_phrases', True)
127 |
128 | try:
129 | model = get_model(model_name)
130 | parse = Parse(model, text, collapse_punctuation, collapse_phrases)
131 | resp.body = json.dumps(parse.to_json(), sort_keys=True, indent=2)
132 | resp.content_type = 'text/string'
133 | resp.append_header('Access-Control-Allow-Origin', "*")
134 | resp.status = falcon.HTTP_200
135 | except Exception as e:
136 | raise falcon.HTTPBadRequest(
137 | 'Dependency parsing failed',
138 | '{}'.format(e))
139 |
140 |
141 | class EntResource(object):
142 | """Parse text and return displaCy ent's expected output."""
143 |
144 | def on_post(self, req, resp):
145 | req_body = req.bounded_stream.read()
146 | json_data = json.loads(req_body.decode('utf8'))
147 | text = json_data.get('text')
148 | model_name = json_data.get('model', 'en')
149 | try:
150 | model = get_model(model_name)
151 | entities = Entities(model, text)
152 | resp.body = json.dumps(entities.to_json(), sort_keys=True,
153 | indent=2)
154 | resp.content_type = 'text/string'
155 | resp.append_header('Access-Control-Allow-Origin', "*")
156 | resp.status = falcon.HTTP_200
157 | except Exception as e:
158 | raise falcon.HTTPBadRequest(
159 | 'Text parsing failed',
160 | '{}'.format(e))
161 |
162 |
163 | class SentsResources(object):
164 | """Returns sentences"""
165 |
166 | def on_post(self, req, resp):
167 | req_body = req.bounded_stream.read()
168 | json_data = json.loads(req_body.decode('utf8'))
169 | text = json_data.get('text')
170 | model_name = json_data.get('model', 'en')
171 |
172 | try:
173 | model = get_model(model_name)
174 | sentences = Sentences(model, text)
175 | resp.body = json.dumps(sentences.to_json(), sort_keys=True,
176 | indent=2)
177 | resp.content_type = 'text/string'
178 | resp.append_header('Access-Control-Allow-Origin', "*")
179 | resp.status = falcon.HTTP_200
180 | except Exception as e:
181 | raise falcon.HTTPBadRequest(
182 | 'Sentence tokenization failed',
183 | '{}'.format(e))
184 |
185 |
186 | class SentsDepResources(object):
187 | """Returns sentences and dependency parses"""
188 |
189 | def on_post(self, req, resp):
190 | req_body = req.bounded_stream.read()
191 | json_data = json.loads(req_body.decode('utf8'))
192 | text = json_data.get('text')
193 | model_name = json_data.get('model', 'en')
194 | collapse_punctuation = json_data.get('collapse_punctuation', False)
195 | collapse_phrases = json_data.get('collapse_phrases', False)
196 |
197 | try:
198 | model = get_model(model_name)
199 | sentences = SentencesDependencies(model,
200 | text,
201 | collapse_punctuation=collapse_punctuation,
202 | collapse_phrases=collapse_phrases)
203 |
204 | resp.body = json.dumps(sentences.to_json(),
205 | sort_keys=True,
206 | indent=2)
207 | resp.content_type = 'text/string'
208 | resp.append_header('Access-Control-Allow-Origin', "*")
209 | resp.status = falcon.HTTP_200
210 | except Exception as e:
211 | raise falcon.HTTPBadRequest(
212 | 'Sentence tokenization and Dependency parsing failed',
213 | '{}'.format(e))
214 |
215 |
216 | APP = falcon.API()
217 | APP.add_route('/dep', DepResource())
218 | APP.add_route('/ent', EntResource())
219 | APP.add_route('/sents', SentsResources())
220 | APP.add_route('/sents_dep', SentsDepResources())
221 | APP.add_route('/{model_name}/schema', SchemaResource())
222 | APP.add_route('/models', ModelsResource())
223 | APP.add_route('/version', VersionResource())
224 |
--------------------------------------------------------------------------------
/displacy_service_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jgontrum/spacy-api-docker/249f96a2387741bc1037eab8318966a284cceeec/displacy_service_tests/__init__.py
--------------------------------------------------------------------------------
/displacy_service_tests/test_parse.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import spacy
4 | from displacy_service.parse import Parse
5 |
6 |
7 | @pytest.fixture(scope="session")
8 | def nlp():
9 | return spacy.load('en')
10 |
11 |
12 | def test_parse_to_json(nlp):
13 | parse = Parse(nlp, u'Hello, this is a parse.', False, False)
14 | json_model = parse.to_json()
15 | assert len(json_model['words']) == 7
16 | assert len(json_model['arcs']) == 6
17 |
18 |
19 | def test_collapse_punct(nlp):
20 | parse = Parse(nlp, u'Hello, this is a parse.', True, False)
21 | json_model = parse.to_json()
22 | assert len(json_model['words']) == 5
23 | assert len(json_model['arcs']) == 4
24 | assert [w['text'] for w in json_model['words']] == [u'Hello,', u'this', u'is', u'a', u'parse.']
25 |
26 |
27 | def test_collapse_phrases(nlp):
28 | parse = Parse(nlp, u'This example is a parse.', False, True)
29 | json_model = parse.to_json()
30 | assert len(json_model['words']) == 4
31 | assert len(json_model['arcs']) == 3
32 | assert [w['text'] for w in json_model['words']] == [u'This example', u'is', u'a parse', u'.']
33 |
--------------------------------------------------------------------------------
/displacy_service_tests/test_server.py:
--------------------------------------------------------------------------------
1 | import falcon.testing
2 | import pytest
3 | import json
4 |
5 | from displacy_service.server import APP, MODELS
6 |
7 |
8 | model = MODELS[0]
9 |
10 |
11 | @pytest.fixture()
12 | def api():
13 | return falcon.testing.TestClient(APP)
14 |
15 |
16 | def test_deps(api):
17 | result = api.simulate_post(
18 | path='/dep',
19 | body='{{"text": "This is a test.", "model": "{model}", "collapse_punctuation": false, "collapse_phrases": false}}'.format(model=model)
20 | )
21 | result = json.loads(result.text)
22 | words = [w['text'] for w in result['words']]
23 | assert words == ["This", "is", "a", "test", "."]
24 |
25 |
26 | def test_ents(api):
27 | result = api.simulate_post(
28 | path='/ent',
29 | body='{{"text": "What a great company Google is.", "model": "{model}"}}'.format(model=model))
30 | ents = json.loads(result.text)
31 | assert ents == [
32 | {"start": 21, "end": 27, "type": "ORG", "text": "Google"}]
33 |
34 |
35 | def test_sents(api):
36 | sentences = api.simulate_post(
37 | path='/sents',
38 | body='{{"text": "This a test that should split into sentences! This is the second. Is this the third?", "model": "{model}"}}'.format(model=model)
39 | )
40 |
41 | assert sentences.json == ['This a test that should split into sentences!', 'This is the second.', 'Is this the third?']
42 |
43 |
44 | def test_sents_dep(api):
45 | sentence_parse = api.simulate_post(
46 | path='/sents_dep',
47 | body='{{"text": "This a test that should split into sentences! This is the second. Is this the third?", "model": "{model}", "collapse_punctuation": false, "collapse_phrases": false}}'.format(model=model)
48 | )
49 | sentences = [sp["sentence"] for sp in sentence_parse.json]
50 | assert sentences == [
51 | "This a test that should split into sentences!",
52 | "This is the second.",
53 | "Is this the third?",
54 | ]
55 | words = [[w["text"] for w in sp["dep_parse"]["words"]] for sp in sentence_parse.json]
56 | assert words == [
57 | ["This", "a", "test", "that", "should", "split", "into", "sentences", "!"],
58 | ["This", "is", "the", "second", "."],
59 | ["Is", "this", "the", "third", "?"],
60 | ]
61 |
62 |
63 | @pytest.mark.parametrize('endpoint, expected_message', [
64 | ('/dep', 'Dependency parsing failed'),
65 | ('/ent', 'Text parsing failed'),
66 | ('/sents', 'Sentence tokenization failed'),
67 | ('/sents_dep', 'Sentence tokenization and Dependency parsing failed'),
68 | ])
69 | def test_bad_model_error_handling(endpoint, expected_message, api):
70 | response = api.simulate_post(
71 | path=endpoint,
72 | body='{"text": "Here is some text for testing.", "model": "fake_model"}'
73 | )
74 | assert expected_message == response.json['title']
75 | assert "Can't find model 'fake_model'." in response.json["description"]
76 |
--------------------------------------------------------------------------------
/docker/all/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM jgontrum/spacyapi:base_v2
2 |
3 | ENV languages "en de es fr it nl pt"
4 | RUN cd /app && env/bin/download_models
5 |
--------------------------------------------------------------------------------
/docker/de/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM jgontrum/spacyapi:base_v2
2 |
3 | ENV languages "de"
4 | RUN cd /app && env/bin/download_models
5 |
--------------------------------------------------------------------------------
/docker/en/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM jgontrum/spacyapi:base_v2
2 |
3 | ENV languages "en"
4 | RUN cd /app && env/bin/download_models
5 |
--------------------------------------------------------------------------------
/docker/en/Dockerfile.lg:
--------------------------------------------------------------------------------
1 | FROM jgontrum/spacyapi:base_v2
2 |
3 | ENV languages "en_core_web_lg"
4 | RUN cd /app && env/bin/download_models
5 |
--------------------------------------------------------------------------------
/docker/en/Dockerfile.md:
--------------------------------------------------------------------------------
1 | FROM jgontrum/spacyapi:base_v2
2 |
3 | ENV languages "en_core_web_md"
4 | RUN cd /app && env/bin/download_models
5 |
--------------------------------------------------------------------------------
/docker/es/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM jgontrum/spacyapi:base_v2
2 |
3 | ENV languages "es"
4 | RUN cd /app && env/bin/download_models
5 |
--------------------------------------------------------------------------------
/docker/fr/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM jgontrum/spacyapi:base_v2
2 |
3 | ENV languages "fr"
4 | RUN cd /app && env/bin/download_models
5 |
--------------------------------------------------------------------------------
/docker/it/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM jgontrum/spacyapi:base_v2
2 |
3 | ENV languages "it"
4 | RUN cd /app && env/bin/download_models
5 |
--------------------------------------------------------------------------------
/docker/nl/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM jgontrum/spacyapi:base_v2
2 |
3 | ENV languages "nl"
4 | RUN cd /app && env/bin/download_models
5 |
--------------------------------------------------------------------------------
/docker/pt/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM jgontrum/spacyapi:base_v2
2 |
3 | ENV languages "pt"
4 | RUN cd /app && env/bin/download_models
5 |
--------------------------------------------------------------------------------
/frontend/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: all clean run
2 |
3 | all: node_modules
4 |
5 | node_modules:
6 | npm install
7 |
8 | clean:
9 | rm -rfv node_modules/
10 |
11 | start: node_modules
12 | NODE_ENV=production node_modules/harp/bin/harp server --port 8080
13 |
--------------------------------------------------------------------------------
/frontend/_data.json:
--------------------------------------------------------------------------------
1 | {
2 | "index": {
3 | "api": "/dep",
4 | "default_language": "",
5 | "description": "Visualise spaCy's guess at the syntactic structure of a sentence. Arrows point from children to heads, and are labelled by their relation type.",
6 | "github": "explosion/displacy",
7 | "languages": {},
8 | "readmore": "https://explosion.ai/blog/displacy-js-nlp-visualizer",
9 | "scripts": [
10 | "displacy",
11 | "main"
12 | ],
13 | "title": "displaCy Dependency Visualizer"
14 | }
15 | }
--------------------------------------------------------------------------------
/frontend/_layout.jade:
--------------------------------------------------------------------------------
1 | //- ----------------------------------
2 | //- 💥 DEMOS > LAYOUT
3 | //- ----------------------------------
4 |
5 | include _mixins
6 |
7 | doctype html
8 | html(lang="en")
9 |
10 | head
11 | title #{title ? title + " | " : ""} Demos | Explosion AI
12 |
13 | meta(http-equiv="content-type" content="text/html; charset=utf-8")
14 | meta(name="viewport" content="width=device-width, initial-scale=1.0")
15 | meta(name="referrer" content="always")
16 | link(rel="shortcut icon" href="/assets/img/favicon.ico")
17 |
18 | link(rel="stylesheet" href="assets/css/style.css")
19 |
20 | if stylesheets
21 | each stylesheet in stylesheets
22 | link(rel="stylesheet" href="assets/css/#{stylesheet}.css")
23 |
24 | if partial("../_sidebar")
25 | link(rel="stylesheet" href="/assets/css/sidebar.css")
26 |
27 | meta(name="twitter:card" content="summary_large_image")
28 | meta(name="twitter:site" content="@" + twitter)
29 | meta(name="twitter:title" content=title)
30 | meta(name="twitter:description" content=description)
31 | meta(name="twitter:image" content="#{demos_url}/displacy/assets/img/preview.jpg")
32 |
33 | meta(property="og:type" content="website")
34 | meta(property="og:site_name" content=sitename)
35 | meta(property="og:url" content=url)
36 | meta(property="og:title" content=title)
37 | meta(property="og:description" content=description)
38 | meta(property="og:image" content="#{demos_url}/displacy/assets/img/preview.jpg")
39 |
40 | script var api = '#{api}';
41 |
42 | body
43 |
44 | !=partial("../_sidebar")
45 |
46 | main.o-main
47 | !=yield
48 |
49 | if scripts
50 | each script in scripts
51 | script(src="assets/js/#{script}.js")
52 |
--------------------------------------------------------------------------------
/frontend/_mixins.jade:
--------------------------------------------------------------------------------
1 | //- ----------------------------------
2 | //- 💥 MIXINS
3 | //- ----------------------------------
4 |
5 | //- Error
6 |
7 | mixin error(message)
8 | #error.c-alert.c-alert--error.u-text-small(onclick="this.classList.add('c-alert--is-closing'); setTimeout(function() { this.style.display = 'none'; this.classList.remove('c-alert--is-closing') }.bind(this), 2500)")=message
9 |
10 |
11 | //- Icon
12 |
13 | mixin icon(name, size)
14 | svg.o-icon(aria-hidden="true" viewBox="0 0 20 20" width="20" height="20" fill="currentColor")&attributes(attributes)
15 | use(xlink:href="assets/img/icons.svg#icon-#{name}")
16 |
17 |
18 | //- Input
19 |
20 | mixin input(placeholder, label)
21 | menu.c-input.c-input--full.u-text-medium
22 | if label
23 | label.c-input__label(for=id)=label
24 |
25 | input.c-input__field(type="text" placeholder=placeholder autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" onfocus="this.select()")&attributes(attributes)
26 |
27 | block
28 |
29 |
30 | //- Textarea
31 |
32 | mixin textarea(placeholder, label)
33 | menu.c-input.c-input--full.u-text-medium
34 | if label
35 | label.c-input__label.u-label(for=id)=label
36 |
37 | textarea.c-input__field.c-input__field--area(type="text" placeholder=placeholder autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false" onfocus="this.select()")&attributes(attributes)
38 |
39 | block
40 |
41 |
42 | //- Search button
43 |
44 | mixin search(size)
45 | button.c-input__button(class=size ? "c-input__button--#{size}" : "" role="button")&attributes(attributes)
46 | +icon("search").c-input__button__icon
47 | +icon("spinner").c-input__button__spinner
48 |
49 |
50 | //- Dropdown
51 |
52 | mixin dropdown(type, name, label, options, checked)
53 | menu.c-input&attributes(attributes)
54 | label.c-input__label(for=name)=label
55 | +icon("arrow-down").c-input__icon
56 | input.c-dropdown__trigger(id=name type="checkbox" aria-hidden="true")
57 |
58 | ul.c-dropdown.u-text-small
59 | each option, id in options
60 | li
61 | input.c-dropdown__trigger(name=name value=id id="#{name}-#{id}" type=type checked=(checked.indexOf(id) != -1))
62 | label.c-dropdown__option(for="#{name}-#{id}")=option
63 | +icon("check").c-dropdown__option__icon
64 |
65 |
66 | //- Button
67 |
68 | mixin button(icon, label)
69 | a.c-input(role="button" aria-label=label data-tooltip=label)&attributes(attributes)
70 |
71 | if icon
72 | +icon(icon).c-input__button
73 |
74 | block
75 |
76 |
77 | //- External Link
78 |
79 | mixin a(url, trusted)
80 | a(href=url target="_blank" rel=(!trusted) ? "noopener nofollow" : "")&attributes(attributes)
81 | block
82 |
--------------------------------------------------------------------------------
/frontend/assets/css/_base.sass:
--------------------------------------------------------------------------------
1 | //- ----------------------------------
2 | //- 💥 DEMOS > BASE
3 | //- ----------------------------------
4 |
5 | //- Variables
6 |
7 | $font-primary: "Sailec", Helvetica, Arial, sans-serif !default
8 | $font-secondary: "Input Mono Compressed", Consolas, "Andale Mono", Menlo, Monaco, Courier, monospace !default
9 |
10 | $color-front: #1e1935
11 | $color-back: #fff
12 | $color-subtle: #ddd
13 |
14 |
15 | //- Mixins
16 |
17 | @mixin size($width, $height: $width)
18 | width: $width
19 | height: $height
20 |
21 | @mixin position($position, $pos-y, $pos-x, $pos-y-value, $pos-x-value)
22 | position: $position
23 | #{$pos-y}: $pos-y-value
24 | #{$pos-x}: $pos-x-value
25 |
26 |
27 | //- Reset
28 |
29 | *
30 | box-sizing: border-box
31 | padding: 0
32 | margin: 0
33 | border: 0
34 | outline: 0
35 | -webkit-font-smoothing: antialiased
36 |
37 | html
38 | font-family: sans-serif
39 | -ms-text-size-adjust: 100%
40 | -webkit-text-size-adjust: 100%
41 |
42 | @media(min-width: 1200px)
43 | font-size: 16px
44 |
45 | @media(max-width: 1199px)
46 | font-size: 12px
47 |
48 | body
49 | @include size(100%)
50 | background: $color-back
51 | color: $color-front
52 | font: normal normal 1rem/#{1.5} $font-primary
53 | padding: 0
54 | margin: 0
55 | overflow: auto
56 | min-height: 100vh
57 |
58 | a
59 | color: inherit
60 | text-decoration: none
61 |
62 | ul, ol
63 | list-style: none
64 |
--------------------------------------------------------------------------------
/frontend/assets/css/_displacy-theme.sass:
--------------------------------------------------------------------------------
1 | //- ----------------------------------
2 | //- 💥 DISPLACY THEME
3 | //- ----------------------------------
4 |
5 | $displacy: ( text: #ffffff, arrow: #87908a, arrowhead: #ffffff, label: #87908a, tag: #a6e22d, tag-ent: #8e7dff, tag-verb: #e00084, tag-noun: #fd9720, tag-punct: #87908a)
6 |
7 | .displacy-word
8 | font-size: 1.25rem
9 | color: map-get($displacy, text)
10 |
11 | .displacy-tag
12 | font: 1rem $font-secondary
13 | color: map-get($displacy, tag)
14 |
15 | .displacy-label
16 | font: 0.85rem $font-secondary
17 | color: map-get($displacy, label)
18 |
19 | .displacy-arc
20 | color: map-get($displacy, arrow)
21 |
22 | .displacy-arrowhead
23 | color: map-get($displacy, arrowhead)
24 |
25 | @each $format, $tags in (spacy: (NN: tag-noun, VB: tag-verb, ".": tag-punct, ",": tag-punct), google: (NOUN: tag-noun, VERB: tag-verb, "PUNCT": tag-punct))
26 | @each $tag, $color in $tags
27 | [data-format="#{$format}"] .displacy-tag[data-tag^="#{$tag}"]
28 | color: map-get($displacy, $color)
29 |
--------------------------------------------------------------------------------
/frontend/assets/css/_ui.sass:
--------------------------------------------------------------------------------
1 | //- ----------------------------------
2 | //- 💥 UI
3 | //- ----------------------------------
4 |
5 | $theme-bg: $color-back !default
6 | $theme-bg2: $color-back !default
7 | $theme-color: $color-front !default
8 |
9 | $field-border-radius: 0.5rem !default
10 | $color-error: #f44d61 !default
11 |
12 |
13 | //- Utilities
14 |
15 | .u-text-small.u-text-small
16 | font-size: 0.85rem
17 |
18 | .u-text-medium.u-text-medium
19 | font-size: 1.25rem
20 |
21 | .u-text-large.u-text-large
22 | font-size: 1.75rem
23 |
24 | .u-label
25 | font: normal bold 1em $font-primary
26 |
27 | .u-code.u-code
28 | font-family: $font-secondary
29 |
30 | .u-heading-1
31 | font: normal bold 2.75rem/#{1.375} $font-primary
32 | margin-bottom: 1rem
33 |
34 | .u-heading-2
35 | font: normal bold 2rem/#{1.375} $font-primary
36 | margin-bottom: 0.5rem
37 |
38 |
39 | //- Base
40 |
41 | .o-main
42 | width: 100%
43 |
44 | .o-container
45 | flex: 1
46 | position: relative
47 | background: inherit
48 | width: 100%
49 |
50 | .o-banner
51 | flex: 0 0 1
52 | background: $theme-bg
53 | color: $theme-color
54 | padding: 2rem 3rem
55 | width: 100%
56 |
57 | .o-form
58 | display: flex
59 | flex-flow: row wrap
60 |
61 | .o-block
62 | margin-bottom: 2.5rem
63 |
64 | a
65 | border-bottom: 1px solid
66 |
67 | .o-button
68 | display: inline-block
69 | background: $theme-color
70 | box-shadow: 2px 2px 0 $theme-bg2
71 | color: $theme-bg2
72 | padding: 0.75em 1em
73 | border-radius: $field-border-radius
74 |
75 |
76 | //- Components: Input
77 |
78 | .c-input
79 | display: flex
80 | align-items: center
81 | flex-flow: row wrap
82 | position: relative
83 | background: $theme-bg2
84 | padding: 0.5rem 1rem
85 | margin: 0 0.75rem 0.75rem 0
86 | border-color: $theme-bg2
87 | border-radius: $field-border-radius
88 | width: auto
89 |
90 | &.c-input--full
91 | flex: 1
92 |
93 | .c-input__label
94 | display: flex
95 | align-items: center
96 | justify-content: space-between
97 | cursor: pointer
98 | width: 100%
99 | user-select: none
100 |
101 | .c-input__field
102 | flex: 1
103 | font: inherit
104 | background: transparent
105 | color: inherit
106 | line-height: 1
107 | width: 100%
108 |
109 | &.c-input__field--area
110 | padding: 1rem 0.5rem
111 | line-height: 1.375
112 | min-height: 10rem
113 | resize: vertical
114 |
115 | &::placeholder
116 | color: rgba($theme-color, 0.5)
117 |
118 | ::selection
119 | background: $theme-color
120 | color: $theme-bg2
121 |
122 | .c-input__icon
123 | margin-left: 0.75em
124 |
125 | .c-input__button
126 | font-size: inherit
127 | background: transparent
128 | color: inherit
129 | cursor: pointer
130 |
131 | &.c-input__button--large
132 | font-size: 1.5em
133 |
134 | .c-input__button__icon
135 | @include size(1.25em)
136 |
137 | .loading &
138 | display: none
139 |
140 | .c-input__button__spinner
141 | @include size(1.15em)
142 | display: none
143 | animation: spinner 0.5s linear infinite
144 |
145 | .loading &
146 | display: inline-block
147 |
148 | @keyframes spinner
149 | to
150 | transform: rotate(360deg)
151 |
152 |
153 | //- Components: Dropdown
154 |
155 | .c-dropdown
156 | @include position(absolute, top, left, calc(100% - #{$field-border-radius}), 0)
157 | background: $theme-bg2
158 | display: none
159 | width: 100%
160 | z-index: 100
161 | user-select: none
162 | list-style: none
163 | border-bottom-left-radius: $field-border-radius
164 | border-bottom-right-radius: $field-border-radius
165 | max-height: 50vh
166 | overflow-y: scroll
167 |
168 | .c-dropdown__trigger:checked + &
169 | display: block
170 | z-index: 10
171 |
172 | .c-dropdown__option
173 | display: flex
174 | align-items: center
175 | justify-content: space-between
176 | color: $theme-color
177 | cursor: pointer
178 | padding: 0.75rem
179 | border-top: 1px solid darken($theme-bg2, 10)
180 | line-height: 1.25
181 | width: 100%
182 |
183 | .c-dropdown__trigger:checked + &
184 | font-weight: bold
185 |
186 | .c-dropdown__option__icon
187 | opacity: 1
188 |
189 | .c-dropdown__option__icon
190 | flex: 1.25rem 0 0
191 | opacity: 0
192 |
193 | .c-dropdown__trigger
194 | display: none
195 |
196 |
197 | //- Components: Tooltips
198 |
199 | [data-tooltip]
200 | position: relative
201 | cursor: default
202 |
203 | &:after
204 | @include position(absolute, top, left, 125%, 50%)
205 | display: inline-block
206 | background: $theme-bg
207 | border-radius: 2px
208 | color: $theme-color
209 | content: attr(data-tooltip)
210 | font: normal normal 0.85rem $font-primary
211 | opacity: 0
212 | padding: 0.25em 0.5em
213 | transform: translateX(-50%) translateY(-2px)
214 | transition: opacity 0.1s ease-out, transform 0.1s ease-out
215 | visibility: hidden
216 | white-space: nowrap
217 | z-index: 200
218 |
219 | &:hover:after
220 | opacity: 1
221 | transform: translateX(-50%) translateY(0)
222 | visibility: visible
223 |
224 |
225 | //- Components: Alerts
226 |
227 | .c-alert
228 | @include position(fixed, bottom, right, 1rem, 1rem)
229 | padding: 1rem
230 | z-index: 100
231 | color: $color-back
232 | background: $color-front
233 | font-weight: bold
234 | cursor: pointer
235 | line-height: 1
236 | display: none
237 |
238 | &:before
239 | content: '\00d7'
240 | display: inline-block
241 | font-size: 1.5em
242 | margin-right: 0.5em
243 | transition: transform 0.05s ease
244 | vertical-align: middle
245 |
246 | &:hover:before
247 | transform: scale(1.25) translateY(0.025em)
248 |
249 | &.c-alert--is-closing
250 | transform: translateX(150%)
251 | transition: transform 0.2s ease
252 |
253 | &.c-alert--error
254 | background: $color-error
255 |
256 |
257 |
258 | //- Selection
259 |
260 | ::selection
261 | background: $theme-bg
262 | color: $theme-color
263 |
--------------------------------------------------------------------------------
/frontend/assets/css/style.sass:
--------------------------------------------------------------------------------
1 | //- ----------------------------------
2 | //- 💥 DEMOS > DISPLACY DEP
3 | //- ----------------------------------
4 |
5 | //- Variables
6 |
7 | $theme-bg: #272822
8 | $theme-bg2: desaturate(darken($theme-bg, 5), 10)
9 | $theme-color: #eee
10 |
11 |
12 | //- Imports
13 |
14 | @import base
15 | @import ui
16 |
17 | @import displacy-theme
18 |
19 |
20 | //- Container
21 |
22 | body
23 | background: $theme-bg
24 |
25 | .o-main
26 | display: flex
27 | flex-flow: column nowrap
28 | background: $theme-bg
29 | overflow-x: hidden
30 |
31 | .o-banner
32 | padding: 1rem 100px 0 1rem
33 |
34 | .o-container
35 | overflow-x: auto
36 |
--------------------------------------------------------------------------------
/frontend/assets/img/icons.svg:
--------------------------------------------------------------------------------
1 |
37 |
--------------------------------------------------------------------------------
/frontend/assets/img/preview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jgontrum/spacy-api-docker/249f96a2387741bc1037eab8318966a284cceeec/frontend/assets/img/preview.jpg
--------------------------------------------------------------------------------
/frontend/assets/js/displacy.js:
--------------------------------------------------------------------------------
1 | //- ----------------------------------
2 | //- 💥 DISPLACY
3 | //- ----------------------------------
4 |
5 | 'use strict';
6 |
7 | class displaCy {
8 | constructor (api, options) {
9 | this.api = api;
10 | this.container = typeof(options.container) == 'string' ? document.querySelector(options.container || '#displacy') : options.container;
11 |
12 | this.format = options.format || 'spacy';
13 | this.defaultText = options.defaultText || 'Hello World.';
14 | this.defaultModel = options.defaultModel || 'en';
15 | this.collapsePunct = (options.collapsePunct != undefined) ? options.collapsePunct : true;
16 | this.collapsePhrase = (options.collapsePhrase != undefined) ? options.collapsePhrase : true;
17 |
18 | this.onStart = options.onStart || false;
19 | this.onSuccess = options.onSuccess || false;
20 | this.onError = options.onError || false;
21 |
22 | this.distance = options.distance || 200;
23 | this.offsetX = options.offsetX || 50;
24 | this.arrowSpacing = options.arrowSpacing || 20;
25 | this.arrowWidth = options.arrowWidth || 10;
26 | this.arrowStroke = options.arrowStroke || 2;
27 | this.wordSpacing = options.wordSpacing || 75;
28 | this.font = options.font || 'inherit';
29 | this.color = options.color || '#000000';
30 | this.bg = options.bg || '#ffffff';
31 | }
32 |
33 | parse(text = this.defaultText, model = this.defaultModel, settings = {}) {
34 | if(typeof this.onStart === 'function') this.onStart();
35 |
36 | let xhr = new XMLHttpRequest();
37 | xhr.open('POST', this.api, true);
38 | xhr.setRequestHeader('Content-type', 'text/plain');
39 | xhr.onreadystatechange = () => {
40 | if(xhr.readyState === 4 && xhr.status === 200) {
41 | if(typeof this.onSuccess === 'function') this.onSuccess();
42 | this.render(JSON.parse(xhr.responseText), settings, text);
43 | }
44 |
45 | else if(xhr.status !== 200) {
46 | if(typeof this.onError === 'function') this.onError(xhr.statusText);
47 | }
48 | }
49 |
50 | xhr.onerror = () => {
51 | xhr.abort();
52 | if(typeof this.onError === 'function') this.onError();
53 | }
54 |
55 | xhr.send(JSON.stringify({ text, model,
56 | collapse_punctuation: (settings.collapsePunct != undefined) ? settings.collapsePunct : this.collapsePunct,
57 | collapse_phrases: (settings.collapsePhrase != undefined) ? settings.collapsePhrase : this.collapsePhrase
58 | }));
59 | }
60 |
61 | render(parse, settings = {}, text) {
62 | parse = this.handleConversion(parse);
63 |
64 | if(text) console.log(`%c💥 JSON for "${text}"\n%c${JSON.stringify(parse)}`, 'font: bold 16px/2 arial, sans-serif', 'font: 13px/1.5 Consolas, "Andale Mono", Menlo, Monaco, Courier, monospace');
65 |
66 | this.levels = [...new Set(parse.arcs.map(({ end, start }) => end - start).sort((a, b) => a - b))];
67 | this.highestLevel = this.levels.indexOf(this.levels.slice(-1)[0]) + 1;
68 | this.offsetY = this.distance / 2 * this.highestLevel;
69 |
70 | const width = this.offsetX + parse.words.length * this.distance;
71 | const height = this.offsetY + 3 * this.wordSpacing;
72 |
73 | this.container.innerHTML = '';
74 | this.container.appendChild(this._el('svg', {
75 | id: 'displacy-svg',
76 | classnames: [ 'displacy' ],
77 | attributes: [
78 | [ 'width', width ],
79 | [ 'height', height ],
80 | [ 'viewBox', `0 0 ${width} ${height}`],
81 | [ 'preserveAspectRatio', 'xMinYMax meet' ],
82 | [ 'data-format', this.format ]
83 | ],
84 | style: [
85 | [ 'color', settings.color || this.color ],
86 | [ 'background', settings.bg || this.bg ],
87 | [ 'fontFamily', settings.font || this.font ]
88 | ],
89 | children: [
90 | ...this.renderWords(parse.words),
91 | ...this.renderArrows(parse.arcs)
92 | ]
93 | }));
94 | }
95 |
96 | renderWords(words) {
97 | return (words.map(( { text, tag, data = [] }, i) => this._el('text', {
98 | classnames: [ 'displacy-token' ],
99 | attributes: [
100 | ['fill', 'currentColor'],
101 | ['data-tag', tag],
102 | ['text-anchor', 'middle'],
103 | ['y', this.offsetY + this.wordSpacing],
104 | ...data.map(([attr, value]) => (['data-' + attr.replace(' ', '-'), value]))
105 | ],
106 | children: [
107 | this._el('tspan', {
108 | classnames: [ 'displacy-word' ],
109 | attributes: [
110 | ['x', this.offsetX + i * this.distance],
111 | ['fill', 'currentColor'],
112 | ['data-tag', tag]
113 | ],
114 | text: text
115 | }),
116 | this._el('tspan', {
117 | classnames: [ 'displacy-tag' ],
118 | attributes: [
119 | ['x', this.offsetX + i * this.distance],
120 | ['dy', '2em'],
121 | ['fill', 'currentColor'],
122 | ['data-tag', tag]
123 | ],
124 | text: tag
125 | })
126 | ]
127 | })));
128 | }
129 |
130 | renderArrows(arcs) {
131 | return arcs.map(({ label, end, start, dir, data = [] }, i) => {
132 | const rand = Math.random().toString(36).substr(2, 8);
133 | const level = this.levels.indexOf(end - start) + 1;
134 | const startX = this.offsetX + start * this.distance + this.arrowSpacing * (this.highestLevel - level) / 4;
135 | const startY = this.offsetY;
136 | const endpoint = this.offsetX + (end - start) * this.distance + start * this.distance - this.arrowSpacing * (this.highestLevel - level) / 4;
137 |
138 | let curve = this.offsetY - level * this.distance / 2;
139 | if(curve == 0 && this.levels.length > 5) curve = -this.distance;
140 |
141 | return this._el('g', {
142 | classnames: [ 'displacy-arrow' ],
143 | attributes: [
144 | [ 'data-dir', dir ],
145 | [ 'data-label', label ],
146 | ...data.map(([attr, value]) => (['data-' + attr.replace(' ', '-'), value]))
147 | ],
148 | children: [
149 | this._el('path', {
150 | id: 'arrow-' + rand,
151 | classnames: [ 'displacy-arc' ],
152 | attributes: [
153 | [ 'd', `M${startX},${startY} C${startX},${curve} ${endpoint},${curve} ${endpoint},${startY}`],
154 | [ 'stroke-width', this.arrowStroke + 'px' ],
155 | [ 'fill', 'none' ],
156 | [ 'stroke', 'currentColor' ],
157 | [ 'data-dir', dir ],
158 | [ 'data-label', label ]
159 | ]
160 | }),
161 |
162 | this._el('text', {
163 | attributes: [
164 | [ 'dy', '1em' ]
165 | ],
166 | children: [
167 | this._el('textPath', {
168 | xlink: '#arrow-' + rand,
169 | classnames: [ 'displacy-label' ],
170 | attributes: [
171 | [ 'startOffset', '50%' ],
172 | [ 'fill', 'currentColor' ],
173 | [ 'text-anchor', 'middle' ],
174 | [ 'data-label', label ],
175 | [ 'data-dir', dir ]
176 | ],
177 | text: label
178 | })
179 | ]
180 | }),
181 |
182 | this._el('path', {
183 | classnames: [ 'displacy-arrowhead' ],
184 | attributes: [
185 | [ 'd', `M${(dir == 'left') ? startX : endpoint},${startY + 2} L${(dir == 'left') ? startX - this.arrowWidth + 2 : endpoint + this.arrowWidth - 2},${startY - this.arrowWidth} ${(dir == 'left') ? startX + this.arrowWidth - 2 : endpoint - this.arrowWidth + 2},${startY - this.arrowWidth}` ],
186 | [ 'fill', 'currentColor' ],
187 | [ 'data-label', label ],
188 | [ 'data-dir', dir ]
189 | ]
190 | })
191 | ]
192 | });
193 | });
194 | }
195 |
196 | handleConversion(parse) {
197 | switch(this.format) {
198 | case 'spacy': return parse; break;
199 | case 'google': return({
200 | words: parse.map(({ text: { content: text }, partOfSpeech: { tag }} ) => ({ text, tag })),
201 | arcs: parse.map(({ dependencyEdge: { label, headTokenIndex: j }}, i) => (i != j) ? ({ label, start: Math.min(i, j), end: Math.max(i, j), dir: (j > i) ? 'left' : 'right' }) : null).filter(word => word != null)
202 | }); break;
203 | default: return parse;
204 | }
205 | }
206 |
207 | _el(tag, options) {
208 | const { classnames = [], attributes = [], style = [], children = [], text, id, xlink } = options;
209 | const ns = 'http://www.w3.org/2000/svg';
210 | const nsx = 'http://www.w3.org/1999/xlink';
211 | const el = document.createElementNS(ns, tag);
212 |
213 | classnames.forEach(name => el.classList.add(name));
214 | attributes.forEach(([attr, value]) => el.setAttribute(attr, value));
215 | style.forEach(([ prop, value ]) => el.style[prop] = value);
216 | if(xlink) el.setAttributeNS(nsx, 'xlink:href', xlink);
217 | if(text) el.appendChild(document.createTextNode(text));
218 | if(id) el.id = id;
219 | children.forEach(child => el.appendChild(child));
220 | return el;
221 | }
222 | }
223 |
--------------------------------------------------------------------------------
/frontend/assets/js/main.js:
--------------------------------------------------------------------------------
1 | //- ----------------------------------
2 | //- 💥 DISPLACY DEMO
3 | //- ----------------------------------
4 |
5 | 'use strict';
6 |
7 | {
8 | const defaultText = 'displaCy uses JavaScript, SVG and CSS to show you how computers understand language';
9 | const defaultModel = 'en';
10 | const loading = () => document.body.classList.toggle('loading');
11 | const onError = (err) => $('#error').style.display = 'block';
12 |
13 | const displacy = new displaCy(api, {
14 | container: '#displacy',
15 | engine: 'spacy',
16 | defaultText: defaultText,
17 | defaultModel: defaultModel,
18 | collapsePunct: true,
19 | collapsePhrase: true,
20 | distance: 200,
21 | offsetX: 150,
22 | arrowSpacing: 10,
23 | arrowWidth: 8,
24 | arrowStroke: 2,
25 | wordSpacing: 40,
26 | font: 'inherit',
27 | color: '#f5f4f0',
28 | bg: '#272822',
29 | onStart: loading,
30 | onSuccess: loading
31 | });
32 |
33 |
34 | // UI
35 |
36 | const $ = document.querySelector.bind(document);
37 |
38 |
39 | // First Run
40 |
41 | document.addEventListener('DOMContentLoaded', () => {
42 | const text = getQueryVar('text') || getQueryVar('full') || getQueryVar('manual') || getQueryVar('steps') || defaultText;
43 | const model = getQueryVar('model') || defaultModel;
44 | const collapsePunct = (getQueryVar('cpu')) ? (getQueryVar('cpu') == 0 ? 0 : 1) : 1;
45 | const collapsePhrase = (getQueryVar('cph')) ? (getQueryVar('cph') == 0 ? 0 : 1) : 1;
46 |
47 | const args = [text, model, { collapsePhrase, collapsePunct }];
48 |
49 | if (getQueryVar('text')) updateView(...args);
50 | if (getQueryVar('full') || getQueryVar('manual') || getQueryVar('steps')) updateURL(...args);
51 | });
52 |
53 |
54 | // Run Demo
55 |
56 | const run = (
57 | text = $('#input').value || defaultText,
58 | model = $('[name="model"]:checked').value || defaultModel,
59 | settings = {
60 | collapsePunct: $('#settings-punctuation').checked,
61 | collapsePhrase: $('#settings-phrases').checked
62 | }) => {
63 | displacy.parse(text, model, settings);
64 | updateView(text, model, settings);
65 | updateURL(text, model, settings);
66 | }
67 |
68 |
69 | // UI Event Listeners
70 |
71 | $('#submit').addEventListener('click', ev => run());
72 | $('#input').addEventListener('keydown', ev => (event.keyCode == 13) && run());
73 | $('#download').addEventListener('click', ev => $('#download').setAttribute('href', downloadSVG()).click());
74 |
75 |
76 | // Update View
77 |
78 | const updateView = (text, model, settings) => {
79 | $('#input').value = text;
80 | $(`[value="${model}"]`).checked = true;
81 | $('#settings-punctuation').checked = settings.collapsePunct;
82 | $('#settings-phrases').checked = settings.collapsePhrase;
83 | }
84 |
85 |
86 | // Update URL
87 |
88 | const updateURL = (text, model, settings) => {
89 | const url = [
90 | 'text=' + encodeURIComponent(text),
91 | 'model=' + encodeURIComponent(model),
92 | 'cpu=' + (settings.collapsePunct ? 1 : 0),
93 | 'cph=' + (settings.collapsePhrase ? 1 : 0)
94 | ];
95 |
96 | history.pushState({ text, model, settings }, null, '?' + url.join('&'));
97 | }
98 |
99 | // Get URL Query Variables
100 |
101 | const getQueryVar = (key) => {
102 | const query = window.location.search.substring(1);
103 | const params = query.split('&').map(param => param.split('='));
104 |
105 | for (let param of params)
106 | if (param[0] == key) return decodeURIComponent(param[1]);
107 | return false;
108 | }
109 |
110 |
111 | // Download SVG
112 |
113 | const downloadSVG = () => {
114 | const serializer = new XMLSerializer();
115 | return ($('#displacy-svg')) ? 'data:image/svg+xml;charset=utf-8,' + encodeURIComponent('\r\n' + serializer.serializeToString($('#displacy-svg'))) : false;
116 | }
117 | }
--------------------------------------------------------------------------------
/frontend/index.jade:
--------------------------------------------------------------------------------
1 | //- ----------------------------------
2 | //- 💥 DEMOS > DISPLACY
3 | //- ----------------------------------
4 |
5 | include _mixins
6 |
7 | header.o-banner.o-form
8 | +input("Your sentence here...")#input
9 | +search#submit
10 |
11 | +dropdown("radio", "model", "Model", languages, default_language)
12 |
13 | +dropdown("checkbox", "settings", "Settings", { punctuation: "Collapse Punctuation", phrases: "Collapse Phrases" }, [ "punctuation", "phrases" ])
14 |
15 | +button("download", "Download")#download(download="displacy.svg")
16 |
17 | .o-container#displacy
18 |
19 | +error("Oops, something went wrong. Please try again!")
20 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "displacy-demo",
3 | "version": "1.0.2",
4 | "description": "An open-source NLP visualiser for the modern web",
5 | "main": "index.jade",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1"
8 | },
9 | "repository": {
10 | "type": "git",
11 | "url": "git+https://github.com/explosion/displacy.git"
12 | },
13 | "keywords": [
14 | "nlp",
15 | "visualizer",
16 | "spacy"
17 | ],
18 | "author": "Ines Montani",
19 | "license": "MIT",
20 | "bugs": {
21 | "url": "https://github.com/explosion/displacy/issues"
22 | },
23 | "homepage": "https://github.com/explosion/displacy#readme",
24 | "devDependencies": {
25 | "harp": "^0.29.0"
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | spacy==2.2.*
2 | falcon==2.0.0
3 | pytest
4 | requests==2.21.0
5 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(
4 | name='displacy_service',
5 | version='0.1',
6 | description='REST microservice for Explosion AI\'s interactive demos ' +
7 | 'and visualisers.',
8 | author='explosion.ai & Johannes Gontrum',
9 | author_email='gontrum@me.com',
10 | include_package_data=True,
11 | license='MIT',
12 | entry_points={
13 | 'console_scripts': [
14 | 'download_models = displacy_service.scripts.download:download_models',
15 | 'run_server = displacy_service.scripts.app:run'
16 | ]
17 | }
18 | )
19 |
--------------------------------------------------------------------------------
/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | sed -i "s/PORT/$PORT/g" /etc/nginx/sites-enabled/default
4 | supervisord -n
5 |
--------------------------------------------------------------------------------