├── .babelrc
├── .gitignore
├── LICENSE.txt
├── README.md
├── app
├── assets
│ ├── fonts
│ │ ├── glyphicons-halflings-regular.eot
│ │ ├── glyphicons-halflings-regular.svg
│ │ ├── glyphicons-halflings-regular.ttf
│ │ ├── glyphicons-halflings-regular.woff
│ │ └── glyphicons-halflings-regular.woff2
│ └── index.html
├── components
│ └── Annotator.jsx
├── constants.js
├── main.jsx
├── reducers
│ ├── editor.js
│ └── index.js
├── sagas
│ ├── api.js
│ ├── entities.js
│ ├── index.js
│ └── train.js
├── store.js
├── styles
│ └── application.css
└── util
│ ├── parcyToSlate.js
│ └── slateToParcy.js
├── brunch-config.js
├── jest
├── .gitignore
└── setup-jasmine-env.js
├── package.json
├── test
├── sagas
│ └── train.spec.js
└── util
│ ├── parcyToSlate.spec.js
│ └── slateToParcy.spec.js
└── yarn.lock
/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "presets": ["es2015", "react", "stage-3"]
3 | }
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Numerous always-ignore extensions
2 | *.diff
3 | *.err
4 | *.orig
5 | *.log
6 | *.rej
7 | *.swo
8 | *.swp
9 | *.vi
10 | *~
11 | *.sass-cache
12 |
13 | # OS or Editor folders
14 | .DS_Store
15 | .cache
16 | .project
17 | .settings
18 | .tmproj
19 | nbproject
20 | Thumbs.db
21 |
22 | # NPM packages folder.
23 | node_modules/
24 |
25 | # Brunch output folder.
26 | public/
27 |
28 | # IDE-specific files
29 | .idea/
30 |
31 | # Coverage
32 | coverage/
33 |
34 | # Version file
35 | version
36 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Tomas Crossland
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # spaCy Annotator
2 |
3 | A web application for annotating text corpora with NLP annotations.
4 |
5 | ## Getting started
6 |
7 | * Install (if you don't have them):
8 | * [Node.js](http://nodejs.org): `brew install node` on OS X
9 | * [Brunch](http://brunch.io): `npm install -g brunch`
10 | * Brunch plugins and app dependencies: `npm install`
11 | * Run:
12 | * `brunch watch --server` — watches the project with continuous rebuild. This will also launch HTTP server with [pushState](https://developer.mozilla.org/en-US/docs/Web/Guide/API/DOM/Manipulating_the_browser_history).
13 | * `brunch build --production` — builds minified project for production
14 | * Test:
15 | * `npm test` — executes test suites
16 | * `npm run test:watch` - executes test suites continuously
17 | * Learn:
18 | * `public/` dir is fully auto-generated and served by HTTP server. Write your code in `app/` dir.
19 | * Place static files you want to be copied from `app/assets/` to `public/`.
20 | * [Brunch site](http://brunch.io), [Getting started guide](https://github.com/brunch/brunch-guide#readme)
21 |
--------------------------------------------------------------------------------
/app/assets/fonts/glyphicons-halflings-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcrossland/spacy-annotator/56fca1ac0721f20f439b8686aecab541479e8fc9/app/assets/fonts/glyphicons-halflings-regular.eot
--------------------------------------------------------------------------------
/app/assets/fonts/glyphicons-halflings-regular.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/app/assets/fonts/glyphicons-halflings-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcrossland/spacy-annotator/56fca1ac0721f20f439b8686aecab541479e8fc9/app/assets/fonts/glyphicons-halflings-regular.ttf
--------------------------------------------------------------------------------
/app/assets/fonts/glyphicons-halflings-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcrossland/spacy-annotator/56fca1ac0721f20f439b8686aecab541479e8fc9/app/assets/fonts/glyphicons-halflings-regular.woff
--------------------------------------------------------------------------------
/app/assets/fonts/glyphicons-halflings-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcrossland/spacy-annotator/56fca1ac0721f20f439b8686aecab541479e8fc9/app/assets/fonts/glyphicons-halflings-regular.woff2
--------------------------------------------------------------------------------
/app/assets/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
9 | spaCy Annotator
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/app/components/Annotator.jsx:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | import { connect } from 'react-redux'
3 | import { Editor, Raw } from 'slate'
4 | import { slateToParcy } from '../util/slateToParcy'
5 | import { REQUEST_TRAIN, REQUEST_ENTITIES } from '../constants'
6 |
7 | const renderMark = (mark) => (props) => ({props.children})
8 |
9 | const toParcy = (editor) => {
10 | return slateToParcy(Raw.serialize(editor).document)
11 | }
12 |
13 | const mapStateToProps = ({editor}) => (
14 | {
15 | editorState: editor,
16 | raw: toParcy(editor),
17 | schema: {
18 | marks: {
19 | org: renderMark('org'),
20 | person: renderMark('person'),
21 | norp: renderMark('norp'),
22 | gpe: renderMark('gpe'),
23 | loc: renderMark('loc'),
24 | date: renderMark('date')
25 | }
26 | }
27 | }
28 | )
29 |
30 | const mapDispatchToProps = (dispatch) => (
31 | {
32 | onChange: (state) => dispatch(
33 | { type: 'EDITOR_STATE', state: state }
34 | ),
35 | onMark: (type) => (e) => {
36 | // e.preventDefault()
37 | dispatch({type: 'MARK', mark: type})
38 | },
39 | onTrain: (raw) => (e) => {
40 | dispatch({ type: REQUEST_TRAIN, model: 'es', paragraphs: raw })
41 | },
42 | onDetect: (raw) => (e) => {
43 | dispatch({ type: REQUEST_ENTITIES, model: 'es', paragraphs: raw })
44 | }
45 | }
46 | )
47 |
48 | const TagButton = (k, v) => (
49 |
50 | )
51 |
52 | const AnnotatorPane = ({editorState, schema, onChange, onMark, onTrain, onDetect, raw}) => (
53 |
54 |
Annotator
55 |
56 | {Object.keys(schema.marks).map(k => TagButton(k, onMark(k)))}
57 |
58 |
63 |
Detect
64 |
Train
65 |
66 | )
67 |
68 | export const Annotator = connect(mapStateToProps, mapDispatchToProps)(AnnotatorPane)
69 |
--------------------------------------------------------------------------------
/app/constants.js:
--------------------------------------------------------------------------------
1 |
2 | export const RECEIVE_TRAIN = 'RECEIVE_TRAIN'
3 | export const REQUEST_TRAIN = 'REQUEST_TRAIN'
4 | export const REQUEST_TRAIN_FAILED = 'REQUEST_TRAIN_FAILED'
5 |
6 | export const RECEIVE_ENTITIES = 'RECEIVE_ENTITIES'
7 | export const REQUEST_ENTITIES = 'REQUEST_ENTITIES'
8 | export const REQUEST_ENTITIES_FAILED = 'REQUEST_ENTITIES_FAILED'
9 |
--------------------------------------------------------------------------------
/app/main.jsx:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | import ReactDOM from 'react-dom'
3 | import { store } from './store'
4 | import { Provider } from 'react-redux'
5 | import { browserHistory, Router, Route, IndexRoute } from 'react-router'
6 | import { routerActions, syncHistoryWithStore } from 'react-router-redux'
7 | import { Annotator } from './components/Annotator'
8 |
9 | const history = syncHistoryWithStore(browserHistory, store)
10 |
11 | const render = () => {
12 | ReactDOM.render(
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | , document.getElementById('app')
21 | )
22 | }
23 |
24 | if (document.readyState !== 'complete') {
25 | document.addEventListener('DOMContentLoaded', render)
26 | } else {
27 | render()
28 | }
29 |
--------------------------------------------------------------------------------
/app/reducers/editor.js:
--------------------------------------------------------------------------------
1 | import { Raw, Plain, Text, Document, State, Selection } from 'slate'
2 | import { parcyToSlate } from '../util/parcyToSlate'
3 |
4 | const initialState = Raw.deserialize({
5 | "nodes": [
6 | {
7 | "data": {},
8 | "kind": "block",
9 | "isVoid": false,
10 | "type": "paragraph",
11 | "nodes": [
12 | {
13 | "kind": "text",
14 | "ranges": [
15 | {
16 | "kind": "range",
17 | "text": "Banco Bilbao Vizcaya Argentaria",
18 | "marks": [
19 | {
20 | "data": {},
21 | "kind": "mark",
22 | "type": "org"
23 | }
24 | ]
25 | },
26 | {
27 | "kind": "range",
28 | "text": " (",
29 | "marks": []
30 | },
31 | {
32 | "kind": "range",
33 | "text": "BBVA",
34 | "marks": [
35 | {
36 | "data": {},
37 | "kind": "mark",
38 | "type": "org"
39 | }
40 | ]
41 | },
42 | {
43 | "kind": "range",
44 | "text": ") es una entidad bancaria ",
45 | "marks": []
46 | },
47 | {
48 | "kind": "range",
49 | "text": "española",
50 | "marks": [
51 | {
52 | "data": {},
53 | "kind": "mark",
54 | "type": "norp"
55 | }
56 | ]
57 | },
58 | {
59 | "kind": "range",
60 | "text": ", presidida por ",
61 | "marks": []
62 | },
63 | {
64 | "kind": "range",
65 | "text": "Francisco González Rodríguez",
66 | "marks": [
67 | {
68 | "data": {},
69 | "kind": "mark",
70 | "type": "person"
71 | }
72 | ]
73 | },
74 | {
75 | "kind": "range",
76 | "text": ". Es uno de los mayores bancos de ",
77 | "marks": []
78 | },
79 | {
80 | "kind": "range",
81 | "text": "España",
82 | "marks": [
83 | {
84 | "data": {},
85 | "kind": "mark",
86 | "type": "gpe"
87 | }
88 | ]
89 | },
90 | {
91 | "kind": "range",
92 | "text": ", siendo la primera entidad financiera de ",
93 | "marks": []
94 | },
95 | {
96 | "kind": "range",
97 | "text": "México",
98 | "marks": [
99 | {
100 | "data": {},
101 | "kind": "mark",
102 | "type": "gpe"
103 | }
104 | ]
105 | },
106 | {
107 | "kind": "range",
108 | "text": ", segunda en ",
109 | "marks": []
110 | },
111 | {
112 | "kind": "range",
113 | "text": "España",
114 | "marks": [
115 | {
116 | "data": {},
117 | "kind": "mark",
118 | "type": "gpe"
119 | }
120 | ]
121 | },
122 | {
123 | "kind": "range",
124 | "text": ", ",
125 | "marks": []
126 | },
127 | {
128 | "kind": "range",
129 | "text": "Turquía",
130 | "marks": [
131 | {
132 | "data": {},
133 | "kind": "mark",
134 | "type": "gpe"
135 | }
136 | ]
137 | },
138 | {
139 | "kind": "range",
140 | "text": " y ",
141 | "marks": []
142 | },
143 | {
144 | "kind": "range",
145 | "text": "Perú",
146 | "marks": [
147 | {
148 | "data": {},
149 | "kind": "mark",
150 | "type": "gpe"
151 | }
152 | ]
153 | },
154 | {
155 | "kind": "range",
156 | "text": ", tercera en ",
157 | "marks": []
158 | },
159 | {
160 | "kind": "range",
161 | "text": "Venezuela",
162 | "marks": [
163 | {
164 | "data": {},
165 | "kind": "mark",
166 | "type": "gpe"
167 | }
168 | ]
169 | },
170 | {
171 | "kind": "range",
172 | "text": " y cuarta en ",
173 | "marks": []
174 | },
175 | {
176 | "kind": "range",
177 | "text": "Colombia",
178 | "marks": [
179 | {
180 | "data": {},
181 | "kind": "mark",
182 | "type": "gpe"
183 | }
184 | ]
185 | },
186 | {
187 | "kind": "range",
188 | "text": ".",
189 | "marks": []
190 | }
191 | ]
192 | }
193 | ]
194 | },
195 | {
196 | "data": {},
197 | "kind": "block",
198 | "isVoid": false,
199 | "type": "paragraph",
200 | "nodes": [
201 | {
202 | "kind": "text",
203 | "ranges": [
204 | {
205 | "kind": "range",
206 | "text": "El banco tiene su sede social y fiscal desde ",
207 | "marks": []
208 | },
209 | {
210 | "kind": "range",
211 | "text": "1868",
212 | "marks": [
213 | {
214 | "data": {},
215 | "kind": "mark",
216 | "type": "date"
217 | }
218 | ]
219 | },
220 | {
221 | "kind": "range",
222 | "text": " en la ",
223 | "marks": []
224 | },
225 | {
226 | "kind": "range",
227 | "text": "Plaza de San Nicolás",
228 | "marks": [
229 | {
230 | "data": {},
231 | "kind": "mark",
232 | "type": "loc"
233 | }
234 | ]
235 | },
236 | {
237 | "kind": "range",
238 | "text": " nº 4, en el ",
239 | "marks": []
240 | },
241 | {
242 | "kind": "range",
243 | "text": "Casco Viejo",
244 | "marks": [
245 | {
246 | "data": {},
247 | "kind": "mark",
248 | "type": "loc"
249 | }
250 | ]
251 | },
252 | {
253 | "kind": "range",
254 | "text": " de la ciudad de ",
255 | "marks": []
256 | },
257 | {
258 | "kind": "range",
259 | "text": "Bilbao",
260 | "marks": [
261 | {
262 | "data": {},
263 | "kind": "mark",
264 | "type": "gpe"
265 | }
266 | ]
267 | },
268 | {
269 | "kind": "range",
270 | "text": " (",
271 | "marks": []
272 | },
273 | {
274 | "kind": "range",
275 | "text": "Vizcaya",
276 | "marks": [
277 | {
278 | "data": {},
279 | "kind": "mark",
280 | "type": "gpe"
281 | }
282 | ]
283 | },
284 | {
285 | "kind": "range",
286 | "text": ", ",
287 | "marks": []
288 | },
289 | {
290 | "kind": "range",
291 | "text": "País Vasco",
292 | "marks": [
293 | {
294 | "data": {},
295 | "kind": "mark",
296 | "type": "gpe"
297 | }
298 | ]
299 | },
300 | {
301 | "kind": "range",
302 | "text": "), donde fue fundado como ",
303 | "marks": []
304 | },
305 | {
306 | "kind": "range",
307 | "text": "Banco de Bilbao",
308 | "marks": [
309 | {
310 | "data": {},
311 | "kind": "mark",
312 | "type": "org"
313 | }
314 | ]
315 | },
316 | {
317 | "kind": "range",
318 | "text": " en ",
319 | "marks": []
320 | },
321 | {
322 | "kind": "range",
323 | "text": "1857",
324 | "marks": [
325 | {
326 | "data": {},
327 | "kind": "mark",
328 | "type": "date"
329 | }
330 | ]
331 | },
332 | {
333 | "kind": "range",
334 | "text": ". La mayor parte de los servicios centrales de la entidad y su sede operativa se concentran principalmente en sus oficinas centrales de ",
335 | "marks": []
336 | },
337 | {
338 | "kind": "range",
339 | "text": "Madrid",
340 | "marks": [
341 | {
342 | "data": {},
343 | "kind": "mark",
344 | "type": "gpe"
345 | }
346 | ]
347 | },
348 | {
349 | "kind": "range",
350 | "text": ", situadas en el complejo ",
351 | "marks": []
352 | },
353 | {
354 | "kind": "range",
355 | "text": "Ciudad BBVA",
356 | "marks": [
357 | {
358 | "data": {},
359 | "kind": "mark",
360 | "type": "loc"
361 | }
362 | ]
363 | },
364 | {
365 | "kind": "range",
366 | "text": " de la zona de ",
367 | "marks": []
368 | },
369 | {
370 | "kind": "range",
371 | "text": "Las Tablas",
372 | "marks": [
373 | {
374 | "data": {},
375 | "kind": "mark",
376 | "type": "gpe"
377 | }
378 | ]
379 | },
380 | {
381 | "kind": "range",
382 | "text": ". Su sede de servicios técnicos se encuentra en la bilbaina ",
383 | "marks": []
384 | },
385 | {
386 | "kind": "range",
387 | "text": "Torre BBVA",
388 | "marks": [
389 | {
390 | "data": {},
391 | "kind": "mark",
392 | "type": "loc"
393 | }
394 | ]
395 | },
396 | {
397 | "kind": "range",
398 | "text": " de ",
399 | "marks": []
400 | },
401 | {
402 | "kind": "range",
403 | "text": "Gran Vía",
404 | "marks": [
405 | {
406 | "data": {},
407 | "kind": "mark",
408 | "type": "loc"
409 | }
410 | ]
411 | },
412 | {
413 | "kind": "range",
414 | "text": ", nº 1. Su principal edificio fuera de ",
415 | "marks": []
416 | },
417 | {
418 | "kind": "range",
419 | "text": "España",
420 | "marks": [
421 | {
422 | "data": {},
423 | "kind": "mark",
424 | "type": "gpe"
425 | }
426 | ]
427 | },
428 | {
429 | "kind": "range",
430 | "text": " es la ",
431 | "marks": []
432 | },
433 | {
434 | "kind": "range",
435 | "text": "Torre BBVA Bancomer",
436 | "marks": [
437 | {
438 | "data": {},
439 | "kind": "mark",
440 | "type": "loc"
441 | }
442 | ]
443 | },
444 | {
445 | "kind": "range",
446 | "text": ", en el nº 506 del ",
447 | "marks": []
448 | },
449 | {
450 | "kind": "range",
451 | "text": "Paseo de la Reforma",
452 | "marks": [
453 | {
454 | "data": {},
455 | "kind": "mark",
456 | "type": "loc"
457 | }
458 | ]
459 | },
460 | {
461 | "kind": "range",
462 | "text": " de la ",
463 | "marks": []
464 | },
465 | {
466 | "kind": "range",
467 | "text": "Ciudad de México",
468 | "marks": [
469 | {
470 | "data": {},
471 | "kind": "mark",
472 | "type": "loc"
473 | }
474 | ]
475 | },
476 | {
477 | "kind": "range",
478 | "text": ".",
479 | "marks": []
480 | }
481 | ]
482 | }
483 | ]
484 | },
485 | {
486 | "data": {},
487 | "kind": "block",
488 | "isVoid": false,
489 | "type": "paragraph",
490 | "nodes": [
491 | {
492 | "kind": "text",
493 | "ranges": [
494 | {
495 | "kind": "range",
496 | "text": "A ",
497 | "marks": []
498 | },
499 | {
500 | "kind": "range",
501 | "text": "31 de diciembre de 2015",
502 | "marks": [
503 | {
504 | "data": {},
505 | "kind": "mark",
506 | "type": "date"
507 | }
508 | ]
509 | },
510 | {
511 | "kind": "range",
512 | "text": ", los activos de ",
513 | "marks": []
514 | },
515 | {
516 | "kind": "range",
517 | "text": "BBVA",
518 | "marks": [
519 | {
520 | "data": {},
521 | "kind": "mark",
522 | "type": "org"
523 | }
524 | ]
525 | },
526 | {
527 | "kind": "range",
528 | "text": " eran de 750.078 millones de euros, siendo la segunda entidad financiera ",
529 | "marks": []
530 | },
531 | {
532 | "kind": "range",
533 | "text": "española",
534 | "marks": [
535 | {
536 | "data": {},
537 | "kind": "mark",
538 | "type": "norp"
539 | }
540 | ]
541 | },
542 | {
543 | "kind": "range",
544 | "text": " por volumen de activos. Esa misma fecha, contaba con 9.145 oficinas, 137.968 empleados y 66 millones de clientes, estando presente en 35 países. A ",
545 | "marks": []
546 | },
547 | {
548 | "kind": "range",
549 | "text": "30 de junio de 2015",
550 | "marks": [
551 | {
552 | "data": {},
553 | "kind": "mark",
554 | "type": "date"
555 | }
556 | ]
557 | },
558 | {
559 | "kind": "range",
560 | "text": ", era el 37º banco del mundo por volumen de activos. Cotiza en la Bolsa de Madrid (BBVA) y forma parte del IBEX 35 así como del Dow Jones EURO STOXX 50.",
561 | "marks": []
562 | }
563 | ]
564 | }
565 | ]
566 | }
567 | ]
568 | }, { terse: true })
569 |
570 | const applyTag = (node, tag, idx) => {
571 | node
572 | .addMark(tag.start, tag.len, {type: tag.type})
573 | }
574 |
575 | const sentenceToNode = ({ text, tags }) => {
576 | const node = Text.createFromString(text)
577 | tags.forEach(t => node.addMark(t.start, t.len, {type: t.type}))
578 | return node
579 | }
580 |
581 | export const editor = (state = initialState, action) => {
582 | switch (action.type) {
583 | case 'EDITOR_STATE':
584 | return action.state
585 | case 'MARK':
586 | return state.transform().toggleMark(action.mark).apply()
587 | case 'RECEIVE_ENTITIES':
588 | const nextState = parcyToSlate(action.paragraphs)
589 | return Raw.deserialize(nextState, { terse: true })
590 | default:
591 | return state
592 | }
593 | }
594 |
--------------------------------------------------------------------------------
/app/reducers/index.js:
--------------------------------------------------------------------------------
1 | import { editor } from './editor'
2 |
3 | export { editor }
4 |
--------------------------------------------------------------------------------
/app/sagas/api.js:
--------------------------------------------------------------------------------
1 | import 'isomorphic-fetch'
2 |
3 | function handleErrors(response) {
4 | if (!response.ok) throw Error(response.statusText)
5 | return response
6 | }
7 |
8 | export function apiJson(url, opts) {
9 | return fetch(url, opts)
10 | .then(handleErrors)
11 | .then(resp => resp.json())
12 | .then(json => ({ json }))
13 | .catch(ex => ({ error: ex.message }))
14 | }
15 |
16 | export function apiHttp(url, opts) {
17 | return fetch(url, opts)
18 | .then(handleErrors)
19 | .then(response => ({ response }))
20 | .catch(ex => ({ error: ex.message }))
21 | }
22 |
--------------------------------------------------------------------------------
/app/sagas/entities.js:
--------------------------------------------------------------------------------
1 | import 'babel-polyfill'
2 | import { call, put, takeLatest } from 'redux-saga/effects'
3 | import { apiJson } from './api'
4 | import { RECEIVE_ENTITIES, REQUEST_ENTITIES, REQUEST_ENTITIES_FAILED } from '../constants'
5 |
6 | export function* postDetectEntities({ model, paragraphs }) {
7 | const opts = {
8 | method: 'POST',
9 | body: JSON.stringify({ model, paragraphs })
10 | }
11 | const { json, error } = yield call(apiJson, `http://127.0.0.1:8000/ent`, opts)
12 | if (json)
13 | yield put({ type: RECEIVE_ENTITIES, paragraphs: json })
14 | else
15 | yield put({ type: REQUEST_ENTITIES_FAILED, message: error || 'Request failed' })
16 | }
17 |
18 | export function* detectEntitiesSaga() {
19 | yield takeLatest(REQUEST_ENTITIES, postDetectEntities)
20 | }
21 |
--------------------------------------------------------------------------------
/app/sagas/index.js:
--------------------------------------------------------------------------------
1 | import { detectEntitiesSaga } from './entities'
2 | import { trainModelSaga } from './train'
3 |
4 | export default function* sagas() {
5 | yield [
6 | trainModelSaga(),
7 | detectEntitiesSaga()
8 | ]
9 | }
10 |
--------------------------------------------------------------------------------
/app/sagas/train.js:
--------------------------------------------------------------------------------
1 | import 'babel-polyfill'
2 | import { call, put, takeLatest } from 'redux-saga/effects'
3 | import { apiJson } from './api'
4 | import { RECEIVE_TRAIN, REQUEST_TRAIN, REQUEST_TRAIN_FAILED } from '../constants'
5 |
6 | export function* postTrainModel({ model, paragraphs }) {
7 | const opts = {
8 | method: 'POST',
9 | body: JSON.stringify({ model, paragraphs })
10 | }
11 | const { json, error } = yield call(apiJson, `http://127.0.0.1:8000/train`, opts)
12 | if (json)
13 | yield put({ type: RECEIVE_TRAIN, paragraphs: json })
14 | else
15 | yield put({ type: REQUEST_TRAIN_FAILED, message: error || 'Request failed' })
16 | }
17 |
18 | export function* trainModelSaga() {
19 | yield takeLatest(REQUEST_TRAIN, postTrainModel)
20 | }
21 |
--------------------------------------------------------------------------------
/app/store.js:
--------------------------------------------------------------------------------
1 | import 'babel-polyfill'
2 | import { createStore, combineReducers, applyMiddleware, compose } from 'redux'
3 | import { browserHistory } from 'react-router'
4 | import { routerReducer, routerMiddleware } from 'react-router-redux'
5 | import createSagaMiddleware from 'redux-saga'
6 | import * as reducers from './reducers'
7 | import sagas from './sagas'
8 | import { trainModelSaga } from './sagas/train'
9 |
10 | const routingMiddleware = routerMiddleware(browserHistory)
11 | const annotationApp = combineReducers(
12 | Object.assign(
13 | {},
14 | reducers,
15 | { routing: routerReducer }
16 | )
17 | )
18 |
19 | const composeEnhancers = window.__REDUX_DEVTOOLS_EXTENSION_COMPOSE__ || compose
20 | const sagaMiddleware = createSagaMiddleware()
21 | export const store = createStore(
22 | annotationApp, composeEnhancers(applyMiddleware(
23 | routingMiddleware,
24 | sagaMiddleware
25 | ))
26 | )
27 |
28 | sagaMiddleware.run(sagas)
29 |
--------------------------------------------------------------------------------
/app/styles/application.css:
--------------------------------------------------------------------------------
1 | .entities {
2 | line-height: 2;
3 | }
4 |
5 | [data-entity] {
6 | padding: 0.25em 0.35em;
7 | margin: 0px 0.25em;
8 | line-height: 1;
9 | display: inline-block;
10 | border-radius: 0.25em;
11 | border: 1px solid;
12 | }
13 |
14 | [data-entity]::after {
15 | box-sizing: border-box;
16 | content: attr(data-entity);
17 | font-size: 0.6em;
18 | line-height: 1;
19 | padding: 0.35em;
20 | border-radius: 0.35em;
21 | text-transform: uppercase;
22 | display: inline-block;
23 | vertical-align: middle;
24 | margin: 0px 0px 0.1rem 0.5rem;
25 | }
26 |
27 | [data-entity][data-entity="person"] {
28 | background: rgba(166, 226, 45, 0.2) none repeat scroll 0% 0%;
29 | border-color: rgb(166, 226, 45);
30 | }
31 |
32 | [data-entity][data-entity="person"]::after {
33 | background: rgb(166, 226, 45) none repeat scroll 0% 0%;
34 | }
35 |
36 | [data-entity][data-entity="norp"] {
37 | background: rgba(224, 0, 132, 0.2) none repeat scroll 0% 0%;
38 | border-color: rgb(224, 0, 132);
39 | }
40 |
41 | [data-entity][data-entity="norp"]::after {
42 | background: rgb(224, 0, 132) none repeat scroll 0% 0%;
43 | }
44 |
45 | [data-entity][data-entity="facility"] {
46 | background: rgba(67, 198, 252, 0.2) none repeat scroll 0% 0%;
47 | border-color: rgb(67, 198, 252);
48 | }
49 |
50 | [data-entity][data-entity="facility"]::after {
51 | background: rgb(67, 198, 252) none repeat scroll 0% 0%;
52 | }
53 |
54 | [data-entity][data-entity="org"] {
55 | background: rgba(67, 198, 252, 0.2) none repeat scroll 0% 0%;
56 | border-color: rgb(67, 198, 252);
57 | }
58 |
59 | [data-entity][data-entity="org"]::after {
60 | background: rgb(67, 198, 252) none repeat scroll 0% 0%;
61 | }
62 |
63 | [data-entity][data-entity="gpe"] {
64 | background: rgba(253, 151, 32, 0.2) none repeat scroll 0% 0%;
65 | border-color: rgb(253, 151, 32);
66 | }
67 |
68 | [data-entity][data-entity="gpe"]::after {
69 | background: rgb(253, 151, 32) none repeat scroll 0% 0%;
70 | }
71 |
72 | [data-entity][data-entity="loc"] {
73 | background: rgba(253, 151, 32, 0.2) none repeat scroll 0% 0%;
74 | border-color: rgb(253, 151, 32);
75 | }
76 |
77 | [data-entity][data-entity="loc"]::after {
78 | background: rgb(253, 151, 32) none repeat scroll 0% 0%;
79 | }
80 |
81 | [data-entity][data-entity="product"] {
82 | background: rgba(142, 125, 255, 0.2) none repeat scroll 0% 0%;
83 | border-color: rgb(142, 125, 255);
84 | }
85 |
86 | [data-entity][data-entity="product"]::after {
87 | background: rgb(142, 125, 255) none repeat scroll 0% 0%;
88 | }
89 |
90 | [data-entity][data-entity="event"] {
91 | background: rgba(255, 204, 0, 0.2) none repeat scroll 0% 0%;
92 | border-color: rgb(255, 204, 0);
93 | }
94 |
95 | [data-entity][data-entity="event"]::after {
96 | background: rgb(255, 204, 0) none repeat scroll 0% 0%;
97 | }
98 |
99 | [data-entity][data-entity="work_of_art"] {
100 | background: rgba(255, 204, 0, 0.2) none repeat scroll 0% 0%;
101 | border-color: rgb(255, 204, 0);
102 | }
103 |
104 | [data-entity][data-entity="work_of_art"]::after {
105 | background: rgb(255, 204, 0) none repeat scroll 0% 0%;
106 | }
107 |
108 | [data-entity][data-entity="language"] {
109 | background: rgba(255, 204, 0, 0.2) none repeat scroll 0% 0%;
110 | border-color: rgb(255, 204, 0);
111 | }
112 |
113 | [data-entity][data-entity="language"]::after {
114 | background: rgb(255, 204, 0) none repeat scroll 0% 0%;
115 | }
116 |
117 | [data-entity][data-entity="date"] {
118 | background: rgba(47, 187, 171, 0.2) none repeat scroll 0% 0%;
119 | border-color: rgb(47, 187, 171);
120 | }
121 |
122 | [data-entity][data-entity="date"]::after {
123 | background: rgb(47, 187, 171) none repeat scroll 0% 0%;
124 | }
125 |
126 | [data-entity][data-entity="time"] {
127 | background: rgba(47, 187, 171, 0.2) none repeat scroll 0% 0%;
128 | border-color: rgb(47, 187, 171);
129 | }
130 |
131 | [data-entity][data-entity="time"]::after {
132 | background: rgb(47, 187, 171) none repeat scroll 0% 0%;
133 | }
134 |
135 | [data-entity][data-entity="percent"] {
136 | background: rgba(153, 153, 153, 0.2) none repeat scroll 0% 0%;
137 | border-color: rgb(153, 153, 153);
138 | }
139 |
140 | [data-entity][data-entity="percent"]::after {
141 | background: rgb(153, 153, 153) none repeat scroll 0% 0%;
142 | }
143 |
144 | [data-entity][data-entity="money"] {
145 | background: rgba(153, 153, 153, 0.2) none repeat scroll 0% 0%;
146 | border-color: rgb(153, 153, 153);
147 | }
148 |
149 | [data-entity][data-entity="money"]::after {
150 | background: rgb(153, 153, 153) none repeat scroll 0% 0%;
151 | }
152 |
153 | [data-entity][data-entity="quantity"] {
154 | background: rgba(153, 153, 153, 0.2) none repeat scroll 0% 0%;
155 | border-color: rgb(153, 153, 153);
156 | }
157 |
158 | [data-entity][data-entity="quantity"]::after {
159 | background: rgb(153, 153, 153) none repeat scroll 0% 0%;
160 | }
161 |
162 | [data-entity][data-entity="ordinal"] {
163 | background: rgba(153, 153, 153, 0.2) none repeat scroll 0% 0%;
164 | border-color: rgb(153, 153, 153);
165 | }
166 |
167 | [data-entity][data-entity="ordinal"]::after {
168 | background: rgb(153, 153, 153) none repeat scroll 0% 0%;
169 | }
170 |
171 | [data-entity][data-entity="cardinal"] {
172 | background: rgba(153, 153, 153, 0.2) none repeat scroll 0% 0%;
173 | border-color: rgb(153, 153, 153);
174 | }
175 |
176 | [data-entity][data-entity="cardinal"]::after {
177 | background: rgb(153, 153, 153) none repeat scroll 0% 0%;
178 | }
179 |
--------------------------------------------------------------------------------
/app/util/parcyToSlate.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash'
2 |
3 | const createMark = ({start, end, type}) => ({
4 | data: {},
5 | kind: "mark",
6 | type: _.lowerCase(type)
7 | })
8 |
9 | const createRange = ({text, tags}) => t => ({
10 | kind: 'range',
11 | text: text.substring(t[0], t[1]),
12 | marks: _.map(_.filter(tags, { start: t[0], end: t[1] }), createMark)
13 | })
14 |
15 | const createBlock = o => {
16 | var breaks = _.sortBy(_.uniq(_.reduce(o.tags, (a, t) => _.concat(a, t.start, t.end), [0, o.text.length])), _.identity)
17 | var pairs = _.zip(_.initial(breaks), _.tail(breaks))
18 | var ranges = _.map(pairs, createRange(o))
19 | return {
20 | data: {},
21 | kind: "block",
22 | isVoid: false,
23 | type: "paragraph",
24 | nodes: [
25 | {
26 | kind: "text",
27 | ranges
28 | }
29 | ]
30 | }
31 | }
32 |
33 | export const parcyToSlate = paragraphs => ({ nodes: _.map(paragraphs, createBlock) })
34 |
--------------------------------------------------------------------------------
/app/util/slateToParcy.js:
--------------------------------------------------------------------------------
1 | import _ from 'lodash'
2 |
3 | const mapTags = (prefix, text) => (mark) => {
4 | return { start: prefix.length, len: text.length, type: mark.type.toUpperCase() }
5 | }
6 |
7 | const reduceRange = (accumulator, range) => (
8 | {
9 | text: accumulator.text + range.text,
10 | tags: _.concat(accumulator.tags, range.marks.map(mapTags(accumulator.text, range.text)))
11 | }
12 | )
13 |
14 | const mapSubNode = (node) => {
15 | return node.ranges.reduce(reduceRange, {text: "", tags: []})
16 | }
17 |
18 | const mapTopNode = (node) => {
19 | return _.map(node.nodes, mapSubNode)
20 | }
21 |
22 | export const slateToParcy = (rawDoc) => _.flatten(rawDoc.nodes.map(mapTopNode))
23 |
--------------------------------------------------------------------------------
/brunch-config.js:
--------------------------------------------------------------------------------
1 | exports.config = {
2 | files: {
3 | javascripts: { joinTo: 'app.js' },
4 | stylesheets: { joinTo: 'app.css' }
5 | },
6 |
7 | plugins: {
8 | babel: { presets: ['es2015', 'react'] }
9 | },
10 |
11 | npm: {
12 | enabled: true,
13 | styles: {
14 | bootstrap: ['dist/css/bootstrap.css']
15 | },
16 | globals: {
17 | process: 'process'
18 | },
19 | aliases: {
20 | 'redux-saga/effects': 'redux-saga/lib/effects'
21 | }
22 | }
23 | };
24 |
--------------------------------------------------------------------------------
/jest/.gitignore:
--------------------------------------------------------------------------------
1 | report/
2 |
--------------------------------------------------------------------------------
/jest/setup-jasmine-env.js:
--------------------------------------------------------------------------------
1 | import reporters from 'jasmine-reporters'
2 |
3 | const reporter = new reporters.JUnitXmlReporter({
4 | // Jest runs many instances of Jasmine in parallel. Force distinct file output
5 | // per test to avoid collisions.
6 | consolidateAll: false,
7 | filePrefix: 'jest-junit-result-',
8 | savePath: __dirname + '/report/'
9 | })
10 | jasmine.getEnv().addReporter(reporter)
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "spacy-annotator",
3 | "version": "1.0.0",
4 | "description": "A web application for annotating text corpora with NLP annotations",
5 | "scripts": {
6 | "start": "brunch watch --server",
7 | "prod": "brunch build --production",
8 | "test": "jest --coverage --verbose"
9 | },
10 | "author": "Tom Crossland",
11 | "license": "MIT",
12 | "dependencies": {
13 | "babel-polyfill": "^6.22.0",
14 | "bootstrap": "^3.3.7",
15 | "es6-promise": "^4.0.5",
16 | "isomorphic-fetch": "^2.2.1",
17 | "lodash": "^4.17.4",
18 | "react": "^15.4.2",
19 | "react-dom": "^15.4.2",
20 | "react-redux": "^5.0.2",
21 | "react-router": "^3.0.1",
22 | "react-router-redux": "^4.0.7",
23 | "redux": "^3.6.0",
24 | "redux-saga": "^0.14.8",
25 | "slate": "^0.19.22"
26 | },
27 | "devDependencies": {
28 | "babel-brunch": "^6.0.6",
29 | "babel-jest": "^18.0.0",
30 | "babel-preset-es2015": "^6.22.0",
31 | "babel-preset-react": "^6.22.0",
32 | "babel-preset-stage-3": "^6.22.0",
33 | "brunch": "^2.10.5",
34 | "clean-css-brunch": "^2.0.0",
35 | "css-brunch": "^2.6.1",
36 | "jasmine-reporters": "^2.2.0",
37 | "javascript-brunch": "^2.0.0",
38 | "jest": "^18.1.0",
39 | "redux-saga-test-plan": "^2.2.0",
40 | "uglify-js-brunch": "^2.1.1"
41 | },
42 | "jest": {
43 | "automock": false,
44 | "setupTestFrameworkScriptFile": "/jest/setup-jasmine-env.js",
45 | "collectCoverageFrom": [
46 | "app/**/*.js",
47 | "app/**/*.jsx"
48 | ],
49 | "coverageThreshold": {
50 | "global": {
51 | "branches": 0,
52 | "functions": 0,
53 | "lines": 0,
54 | "statements": 0
55 | }
56 | }
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/test/sagas/train.spec.js:
--------------------------------------------------------------------------------
1 | import testSaga from 'redux-saga-test-plan'
2 | import { call, put } from 'redux-saga/effects'
3 | import { apiJson } from '../../app/sagas/api'
4 | import { postTrainModel, trainModelSaga } from '../../app/sagas/train'
5 | import {
6 | RECEIVE_TRAIN,
7 | REQUEST_TRAIN,
8 | REQUEST_TRAIN_FAILED
9 | } from '../../app/constants'
10 |
11 | describe('training saga', () => {
12 | const model = 'es'
13 | const paragraphs = [
14 | { test: "Hello", tags: [] }
15 | ]
16 | const req = { model, paragraphs }
17 | const opts = {
18 | method: "POST",
19 | body: JSON.stringify(req)
20 | }
21 |
22 | it('should call training endpoint and create a put effect for a successful response', () => {
23 | const json = paragraphs
24 | expect(() => {
25 | testSaga(postTrainModel, req)
26 | .next()
27 | .call(apiJson, `http://127.0.0.1:8000/train`, opts)
28 | .next({ json })
29 | .put({ type: RECEIVE_TRAIN, paragraphs: json })
30 | .next()
31 | .isDone()
32 | }).not.toThrow()
33 | })
34 |
35 | it('should call training endpoint and create a put effect for a failure response', () => {
36 | const error = "Something failed"
37 | expect(() => {
38 | testSaga(postTrainModel, req)
39 | .next()
40 | .call(apiJson, `http://127.0.0.1:8000/train`, opts)
41 | .next({ error })
42 | .put({ type: REQUEST_TRAIN_FAILED, message: error })
43 | .next()
44 | .isDone()
45 | }).not.toThrow()
46 | })
47 | })
48 |
--------------------------------------------------------------------------------
/test/util/parcyToSlate.spec.js:
--------------------------------------------------------------------------------
1 | import { parcyToSlate } from '../../app/util/parcyToSlate'
2 |
3 | describe('the parcy to slate transformation', () => {
4 | it('should extract the editor state from the parcy response', () => {
5 | const response = {
6 | paragraphs: [{
7 | tags: [
8 | { start: 0, end: 20, type: "ORG" },
9 | { start: 33, end: 37, type: "ORG" },
10 | { start: 63, end: 71, type: "NORP" },
11 | { start: 87, end: 115, type: "PERSON" },
12 | { start: 149, end: 155, type: "GPE" },
13 | { start: 197, end: 203, type: "GPE" },
14 | { start: 216, end: 222, type: "GPE" },
15 | { start: 224, end: 231, type: "GPE" },
16 | { start: 234, end: 238, type: "GPE" },
17 | { start: 251, end: 260, type: "GPE" },
18 | { start: 273, end: 281, type: "GPE" },
19 | ],
20 | text: "Banco Bilbao Vizcaya Argentaria (BBVA) es una entidad bancaria española, presidida por Francisco González Rodríguez. Es uno de los mayores bancos de España, siendo la primera entidad financiera de México, segunda en España, Turquía y Perú, tercera en Venezuela y cuarta en Colombia."
21 | }]
22 | }
23 | const state = parcyToSlate(response.paragraphs)
24 | expect(state).toBeTruthy()
25 | expect(state.nodes).toBeTruthy()
26 | expect(state.nodes.length).toEqual(1)
27 | expect(state.nodes[0]).toBeTruthy()
28 | expect(state.nodes[0].nodes).toBeTruthy()
29 | expect(state.nodes[0].nodes.length).toEqual(1)
30 | expect(state.nodes[0].nodes[0].ranges).toBeTruthy()
31 | expect(state.nodes[0].nodes[0].ranges.length).toEqual(22)
32 | expect(state.nodes[0].nodes[0].ranges[4]).toEqual({kind: "range", marks: [{data:{}, kind: "mark", type: "norp"}], text: "española"})
33 | })
34 | })
35 |
--------------------------------------------------------------------------------
/test/util/slateToParcy.spec.js:
--------------------------------------------------------------------------------
1 | import { editor } from '../../app/reducers'
2 | import { slateToParcy } from '../../app/util/slateToParcy'
3 | import { Raw } from 'slate'
4 |
5 | const initialState = editor(undefined, {})
6 |
7 | describe('the editor\'s initial state', () => {
8 | it('should contain a document with nodes', () => {
9 | expect(initialState).toBeTruthy()
10 | const raw = Raw.serialize(initialState)
11 | expect(raw).toBeTruthy()
12 | expect(raw.document).toBeTruthy()
13 | expect(raw.document.nodes).toBeTruthy()
14 | })
15 | })
16 |
17 | describe('the slate to parcy transformation', () => {
18 | it('should extract text and tags from the raw slate document', () => {
19 | const raw = Raw.serialize(initialState)
20 | expect(raw.document).toBeTruthy()
21 | const sents = slateToParcy(raw.document)
22 | expect(sents.length).toEqual(3)
23 | expect(sents[0].text).toEqual("Banco Bilbao Vizcaya Argentaria (BBVA) es una entidad bancaria española, presidida por Francisco González Rodríguez. Es uno de los mayores bancos de España, siendo la primera entidad financiera de México, segunda en España, Turquía y Perú, tercera en Venezuela y cuarta en Colombia.")
24 | expect(sents[0].tags.length).toEqual(11)
25 | expect(sents[1].text).toEqual("El banco tiene su sede social y fiscal desde 1868 en la Plaza de San Nicolás nº 4, en el Casco Viejo de la ciudad de Bilbao (Vizcaya, País Vasco), donde fue fundado como Banco de Bilbao en 1857. La mayor parte de los servicios centrales de la entidad y su sede operativa se concentran principalmente en sus oficinas centrales de Madrid, situadas en el complejo Ciudad BBVA de la zona de Las Tablas. Su sede de servicios técnicos se encuentra en la bilbaina Torre BBVA de Gran Vía, nº 1. Su principal edificio fuera de España es la Torre BBVA Bancomer, en el nº 506 del Paseo de la Reforma de la Ciudad de México.")
26 | expect(sents[1].tags.length).toEqual(17)
27 | expect(sents[2].text).toEqual("A 31 de diciembre de 2015, los activos de BBVA eran de 750.078 millones de euros, siendo la segunda entidad financiera española por volumen de activos. Esa misma fecha, contaba con 9.145 oficinas, 137.968 empleados y 66 millones de clientes, estando presente en 35 países. A 30 de junio de 2015, era el 37º banco del mundo por volumen de activos. Cotiza en la Bolsa de Madrid (BBVA) y forma parte del IBEX 35 así como del Dow Jones EURO STOXX 50.")
28 | expect(sents[2].tags.length).toEqual(4)
29 | })
30 | })
31 |
--------------------------------------------------------------------------------