├── .babelrc ├── .gitignore ├── LICENSE.txt ├── README.md ├── app ├── assets │ ├── fonts │ │ ├── glyphicons-halflings-regular.eot │ │ ├── glyphicons-halflings-regular.svg │ │ ├── glyphicons-halflings-regular.ttf │ │ ├── glyphicons-halflings-regular.woff │ │ └── glyphicons-halflings-regular.woff2 │ └── index.html ├── components │ └── Annotator.jsx ├── constants.js ├── main.jsx ├── reducers │ ├── editor.js │ └── index.js ├── sagas │ ├── api.js │ ├── entities.js │ ├── index.js │ └── train.js ├── store.js ├── styles │ └── application.css └── util │ ├── parcyToSlate.js │ └── slateToParcy.js ├── brunch-config.js ├── jest ├── .gitignore └── setup-jasmine-env.js ├── package.json ├── test ├── sagas │ └── train.spec.js └── util │ ├── parcyToSlate.spec.js │ └── slateToParcy.spec.js └── yarn.lock /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": ["es2015", "react", "stage-3"] 3 | } 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Numerous always-ignore extensions 2 | *.diff 3 | *.err 4 | *.orig 5 | *.log 6 | *.rej 7 | *.swo 8 | *.swp 9 | *.vi 10 | *~ 11 | *.sass-cache 12 | 13 | # OS or Editor folders 14 | .DS_Store 15 | .cache 16 | .project 17 | .settings 18 | .tmproj 19 | nbproject 20 | Thumbs.db 21 | 22 | # NPM packages folder. 23 | node_modules/ 24 | 25 | # Brunch output folder. 26 | public/ 27 | 28 | # IDE-specific files 29 | .idea/ 30 | 31 | # Coverage 32 | coverage/ 33 | 34 | # Version file 35 | version 36 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Tomas Crossland 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # spaCy Annotator 2 | 3 | A web application for annotating text corpora with NLP annotations. 4 | 5 | ## Getting started 6 | 7 | * Install (if you don't have them): 8 | * [Node.js](http://nodejs.org): `brew install node` on OS X 9 | * [Brunch](http://brunch.io): `npm install -g brunch` 10 | * Brunch plugins and app dependencies: `npm install` 11 | * Run: 12 | * `brunch watch --server` — watches the project with continuous rebuild. This will also launch HTTP server with [pushState](https://developer.mozilla.org/en-US/docs/Web/Guide/API/DOM/Manipulating_the_browser_history). 13 | * `brunch build --production` — builds minified project for production 14 | * Test: 15 | * `npm test` — executes test suites 16 | * `npm run test:watch` - executes test suites continuously 17 | * Learn: 18 | * `public/` dir is fully auto-generated and served by HTTP server. Write your code in `app/` dir. 19 | * Place static files you want to be copied from `app/assets/` to `public/`. 20 | * [Brunch site](http://brunch.io), [Getting started guide](https://github.com/brunch/brunch-guide#readme) 21 | -------------------------------------------------------------------------------- /app/assets/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcrossland/spacy-annotator/56fca1ac0721f20f439b8686aecab541479e8fc9/app/assets/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /app/assets/fonts/glyphicons-halflings-regular.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | -------------------------------------------------------------------------------- /app/assets/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcrossland/spacy-annotator/56fca1ac0721f20f439b8686aecab541479e8fc9/app/assets/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /app/assets/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcrossland/spacy-annotator/56fca1ac0721f20f439b8686aecab541479e8fc9/app/assets/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /app/assets/fonts/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcrossland/spacy-annotator/56fca1ac0721f20f439b8686aecab541479e8fc9/app/assets/fonts/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /app/assets/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | spaCy Annotator 10 | 11 | 12 | 13 | 14 | 15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /app/components/Annotator.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | import { connect } from 'react-redux' 3 | import { Editor, Raw } from 'slate' 4 | import { slateToParcy } from '../util/slateToParcy' 5 | import { REQUEST_TRAIN, REQUEST_ENTITIES } from '../constants' 6 | 7 | const renderMark = (mark) => (props) => ({props.children}) 8 | 9 | const toParcy = (editor) => { 10 | return slateToParcy(Raw.serialize(editor).document) 11 | } 12 | 13 | const mapStateToProps = ({editor}) => ( 14 | { 15 | editorState: editor, 16 | raw: toParcy(editor), 17 | schema: { 18 | marks: { 19 | org: renderMark('org'), 20 | person: renderMark('person'), 21 | norp: renderMark('norp'), 22 | gpe: renderMark('gpe'), 23 | loc: renderMark('loc'), 24 | date: renderMark('date') 25 | } 26 | } 27 | } 28 | ) 29 | 30 | const mapDispatchToProps = (dispatch) => ( 31 | { 32 | onChange: (state) => dispatch( 33 | { type: 'EDITOR_STATE', state: state } 34 | ), 35 | onMark: (type) => (e) => { 36 | // e.preventDefault() 37 | dispatch({type: 'MARK', mark: type}) 38 | }, 39 | onTrain: (raw) => (e) => { 40 | dispatch({ type: REQUEST_TRAIN, model: 'es', paragraphs: raw }) 41 | }, 42 | onDetect: (raw) => (e) => { 43 | dispatch({ type: REQUEST_ENTITIES, model: 'es', paragraphs: raw }) 44 | } 45 | } 46 | ) 47 | 48 | const TagButton = (k, v) => ( 49 | 50 | ) 51 | 52 | const AnnotatorPane = ({editorState, schema, onChange, onMark, onTrain, onDetect, raw}) => ( 53 |
54 |

Annotator

55 |
56 | {Object.keys(schema.marks).map(k => TagButton(k, onMark(k)))} 57 |
58 |
59 |
60 | 61 |
62 |
63 | Detect 64 | Train 65 |
66 | ) 67 | 68 | export const Annotator = connect(mapStateToProps, mapDispatchToProps)(AnnotatorPane) 69 | -------------------------------------------------------------------------------- /app/constants.js: -------------------------------------------------------------------------------- 1 | 2 | export const RECEIVE_TRAIN = 'RECEIVE_TRAIN' 3 | export const REQUEST_TRAIN = 'REQUEST_TRAIN' 4 | export const REQUEST_TRAIN_FAILED = 'REQUEST_TRAIN_FAILED' 5 | 6 | export const RECEIVE_ENTITIES = 'RECEIVE_ENTITIES' 7 | export const REQUEST_ENTITIES = 'REQUEST_ENTITIES' 8 | export const REQUEST_ENTITIES_FAILED = 'REQUEST_ENTITIES_FAILED' 9 | -------------------------------------------------------------------------------- /app/main.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | import ReactDOM from 'react-dom' 3 | import { store } from './store' 4 | import { Provider } from 'react-redux' 5 | import { browserHistory, Router, Route, IndexRoute } from 'react-router' 6 | import { routerActions, syncHistoryWithStore } from 'react-router-redux' 7 | import { Annotator } from './components/Annotator' 8 | 9 | const history = syncHistoryWithStore(browserHistory, store) 10 | 11 | const render = () => { 12 | ReactDOM.render( 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | , document.getElementById('app') 21 | ) 22 | } 23 | 24 | if (document.readyState !== 'complete') { 25 | document.addEventListener('DOMContentLoaded', render) 26 | } else { 27 | render() 28 | } 29 | -------------------------------------------------------------------------------- /app/reducers/editor.js: -------------------------------------------------------------------------------- 1 | import { Raw, Plain, Text, Document, State, Selection } from 'slate' 2 | import { parcyToSlate } from '../util/parcyToSlate' 3 | 4 | const initialState = Raw.deserialize({ 5 | "nodes": [ 6 | { 7 | "data": {}, 8 | "kind": "block", 9 | "isVoid": false, 10 | "type": "paragraph", 11 | "nodes": [ 12 | { 13 | "kind": "text", 14 | "ranges": [ 15 | { 16 | "kind": "range", 17 | "text": "Banco Bilbao Vizcaya Argentaria", 18 | "marks": [ 19 | { 20 | "data": {}, 21 | "kind": "mark", 22 | "type": "org" 23 | } 24 | ] 25 | }, 26 | { 27 | "kind": "range", 28 | "text": " (", 29 | "marks": [] 30 | }, 31 | { 32 | "kind": "range", 33 | "text": "BBVA", 34 | "marks": [ 35 | { 36 | "data": {}, 37 | "kind": "mark", 38 | "type": "org" 39 | } 40 | ] 41 | }, 42 | { 43 | "kind": "range", 44 | "text": ") es una entidad bancaria ", 45 | "marks": [] 46 | }, 47 | { 48 | "kind": "range", 49 | "text": "española", 50 | "marks": [ 51 | { 52 | "data": {}, 53 | "kind": "mark", 54 | "type": "norp" 55 | } 56 | ] 57 | }, 58 | { 59 | "kind": "range", 60 | "text": ", presidida por ", 61 | "marks": [] 62 | }, 63 | { 64 | "kind": "range", 65 | "text": "Francisco González Rodríguez", 66 | "marks": [ 67 | { 68 | "data": {}, 69 | "kind": "mark", 70 | "type": "person" 71 | } 72 | ] 73 | }, 74 | { 75 | "kind": "range", 76 | "text": ". Es uno de los mayores bancos de ", 77 | "marks": [] 78 | }, 79 | { 80 | "kind": "range", 81 | "text": "España", 82 | "marks": [ 83 | { 84 | "data": {}, 85 | "kind": "mark", 86 | "type": "gpe" 87 | } 88 | ] 89 | }, 90 | { 91 | "kind": "range", 92 | "text": ", siendo la primera entidad financiera de ", 93 | "marks": [] 94 | }, 95 | { 96 | "kind": "range", 97 | "text": "México", 98 | "marks": [ 99 | { 100 | "data": {}, 101 | "kind": "mark", 102 | "type": "gpe" 103 | } 104 | ] 105 | }, 106 | { 107 | "kind": "range", 108 | "text": ", segunda en ", 109 | "marks": [] 110 | }, 111 | { 112 | "kind": "range", 113 | "text": "España", 114 | "marks": [ 115 | { 116 | "data": {}, 117 | "kind": "mark", 118 | "type": "gpe" 119 | } 120 | ] 121 | }, 122 | { 123 | "kind": "range", 124 | "text": ", ", 125 | "marks": [] 126 | }, 127 | { 128 | "kind": "range", 129 | "text": "Turquía", 130 | "marks": [ 131 | { 132 | "data": {}, 133 | "kind": "mark", 134 | "type": "gpe" 135 | } 136 | ] 137 | }, 138 | { 139 | "kind": "range", 140 | "text": " y ", 141 | "marks": [] 142 | }, 143 | { 144 | "kind": "range", 145 | "text": "Perú", 146 | "marks": [ 147 | { 148 | "data": {}, 149 | "kind": "mark", 150 | "type": "gpe" 151 | } 152 | ] 153 | }, 154 | { 155 | "kind": "range", 156 | "text": ", tercera en ", 157 | "marks": [] 158 | }, 159 | { 160 | "kind": "range", 161 | "text": "Venezuela", 162 | "marks": [ 163 | { 164 | "data": {}, 165 | "kind": "mark", 166 | "type": "gpe" 167 | } 168 | ] 169 | }, 170 | { 171 | "kind": "range", 172 | "text": " y cuarta en ", 173 | "marks": [] 174 | }, 175 | { 176 | "kind": "range", 177 | "text": "Colombia", 178 | "marks": [ 179 | { 180 | "data": {}, 181 | "kind": "mark", 182 | "type": "gpe" 183 | } 184 | ] 185 | }, 186 | { 187 | "kind": "range", 188 | "text": ".", 189 | "marks": [] 190 | } 191 | ] 192 | } 193 | ] 194 | }, 195 | { 196 | "data": {}, 197 | "kind": "block", 198 | "isVoid": false, 199 | "type": "paragraph", 200 | "nodes": [ 201 | { 202 | "kind": "text", 203 | "ranges": [ 204 | { 205 | "kind": "range", 206 | "text": "El banco tiene su sede social y fiscal desde ", 207 | "marks": [] 208 | }, 209 | { 210 | "kind": "range", 211 | "text": "1868", 212 | "marks": [ 213 | { 214 | "data": {}, 215 | "kind": "mark", 216 | "type": "date" 217 | } 218 | ] 219 | }, 220 | { 221 | "kind": "range", 222 | "text": " en la ", 223 | "marks": [] 224 | }, 225 | { 226 | "kind": "range", 227 | "text": "Plaza de San Nicolás", 228 | "marks": [ 229 | { 230 | "data": {}, 231 | "kind": "mark", 232 | "type": "loc" 233 | } 234 | ] 235 | }, 236 | { 237 | "kind": "range", 238 | "text": " nº 4, en el ", 239 | "marks": [] 240 | }, 241 | { 242 | "kind": "range", 243 | "text": "Casco Viejo", 244 | "marks": [ 245 | { 246 | "data": {}, 247 | "kind": "mark", 248 | "type": "loc" 249 | } 250 | ] 251 | }, 252 | { 253 | "kind": "range", 254 | "text": " de la ciudad de ", 255 | "marks": [] 256 | }, 257 | { 258 | "kind": "range", 259 | "text": "Bilbao", 260 | "marks": [ 261 | { 262 | "data": {}, 263 | "kind": "mark", 264 | "type": "gpe" 265 | } 266 | ] 267 | }, 268 | { 269 | "kind": "range", 270 | "text": " (", 271 | "marks": [] 272 | }, 273 | { 274 | "kind": "range", 275 | "text": "Vizcaya", 276 | "marks": [ 277 | { 278 | "data": {}, 279 | "kind": "mark", 280 | "type": "gpe" 281 | } 282 | ] 283 | }, 284 | { 285 | "kind": "range", 286 | "text": ", ", 287 | "marks": [] 288 | }, 289 | { 290 | "kind": "range", 291 | "text": "País Vasco", 292 | "marks": [ 293 | { 294 | "data": {}, 295 | "kind": "mark", 296 | "type": "gpe" 297 | } 298 | ] 299 | }, 300 | { 301 | "kind": "range", 302 | "text": "), donde fue fundado como ", 303 | "marks": [] 304 | }, 305 | { 306 | "kind": "range", 307 | "text": "Banco de Bilbao", 308 | "marks": [ 309 | { 310 | "data": {}, 311 | "kind": "mark", 312 | "type": "org" 313 | } 314 | ] 315 | }, 316 | { 317 | "kind": "range", 318 | "text": " en ", 319 | "marks": [] 320 | }, 321 | { 322 | "kind": "range", 323 | "text": "1857", 324 | "marks": [ 325 | { 326 | "data": {}, 327 | "kind": "mark", 328 | "type": "date" 329 | } 330 | ] 331 | }, 332 | { 333 | "kind": "range", 334 | "text": ". La mayor parte de los servicios centrales de la entidad y su sede operativa se concentran principalmente en sus oficinas centrales de ", 335 | "marks": [] 336 | }, 337 | { 338 | "kind": "range", 339 | "text": "Madrid", 340 | "marks": [ 341 | { 342 | "data": {}, 343 | "kind": "mark", 344 | "type": "gpe" 345 | } 346 | ] 347 | }, 348 | { 349 | "kind": "range", 350 | "text": ", situadas en el complejo ", 351 | "marks": [] 352 | }, 353 | { 354 | "kind": "range", 355 | "text": "Ciudad BBVA", 356 | "marks": [ 357 | { 358 | "data": {}, 359 | "kind": "mark", 360 | "type": "loc" 361 | } 362 | ] 363 | }, 364 | { 365 | "kind": "range", 366 | "text": " de la zona de ", 367 | "marks": [] 368 | }, 369 | { 370 | "kind": "range", 371 | "text": "Las Tablas", 372 | "marks": [ 373 | { 374 | "data": {}, 375 | "kind": "mark", 376 | "type": "gpe" 377 | } 378 | ] 379 | }, 380 | { 381 | "kind": "range", 382 | "text": ". Su sede de servicios técnicos se encuentra en la bilbaina ", 383 | "marks": [] 384 | }, 385 | { 386 | "kind": "range", 387 | "text": "Torre BBVA", 388 | "marks": [ 389 | { 390 | "data": {}, 391 | "kind": "mark", 392 | "type": "loc" 393 | } 394 | ] 395 | }, 396 | { 397 | "kind": "range", 398 | "text": " de ", 399 | "marks": [] 400 | }, 401 | { 402 | "kind": "range", 403 | "text": "Gran Vía", 404 | "marks": [ 405 | { 406 | "data": {}, 407 | "kind": "mark", 408 | "type": "loc" 409 | } 410 | ] 411 | }, 412 | { 413 | "kind": "range", 414 | "text": ", nº 1. Su principal edificio fuera de ", 415 | "marks": [] 416 | }, 417 | { 418 | "kind": "range", 419 | "text": "España", 420 | "marks": [ 421 | { 422 | "data": {}, 423 | "kind": "mark", 424 | "type": "gpe" 425 | } 426 | ] 427 | }, 428 | { 429 | "kind": "range", 430 | "text": " es la ", 431 | "marks": [] 432 | }, 433 | { 434 | "kind": "range", 435 | "text": "Torre BBVA Bancomer", 436 | "marks": [ 437 | { 438 | "data": {}, 439 | "kind": "mark", 440 | "type": "loc" 441 | } 442 | ] 443 | }, 444 | { 445 | "kind": "range", 446 | "text": ", en el nº 506 del ", 447 | "marks": [] 448 | }, 449 | { 450 | "kind": "range", 451 | "text": "Paseo de la Reforma", 452 | "marks": [ 453 | { 454 | "data": {}, 455 | "kind": "mark", 456 | "type": "loc" 457 | } 458 | ] 459 | }, 460 | { 461 | "kind": "range", 462 | "text": " de la ", 463 | "marks": [] 464 | }, 465 | { 466 | "kind": "range", 467 | "text": "Ciudad de México", 468 | "marks": [ 469 | { 470 | "data": {}, 471 | "kind": "mark", 472 | "type": "loc" 473 | } 474 | ] 475 | }, 476 | { 477 | "kind": "range", 478 | "text": ".", 479 | "marks": [] 480 | } 481 | ] 482 | } 483 | ] 484 | }, 485 | { 486 | "data": {}, 487 | "kind": "block", 488 | "isVoid": false, 489 | "type": "paragraph", 490 | "nodes": [ 491 | { 492 | "kind": "text", 493 | "ranges": [ 494 | { 495 | "kind": "range", 496 | "text": "A ", 497 | "marks": [] 498 | }, 499 | { 500 | "kind": "range", 501 | "text": "31 de diciembre de 2015", 502 | "marks": [ 503 | { 504 | "data": {}, 505 | "kind": "mark", 506 | "type": "date" 507 | } 508 | ] 509 | }, 510 | { 511 | "kind": "range", 512 | "text": ", los activos de ", 513 | "marks": [] 514 | }, 515 | { 516 | "kind": "range", 517 | "text": "BBVA", 518 | "marks": [ 519 | { 520 | "data": {}, 521 | "kind": "mark", 522 | "type": "org" 523 | } 524 | ] 525 | }, 526 | { 527 | "kind": "range", 528 | "text": " eran de 750.078 millones de euros, siendo la segunda entidad financiera ", 529 | "marks": [] 530 | }, 531 | { 532 | "kind": "range", 533 | "text": "española", 534 | "marks": [ 535 | { 536 | "data": {}, 537 | "kind": "mark", 538 | "type": "norp" 539 | } 540 | ] 541 | }, 542 | { 543 | "kind": "range", 544 | "text": " por volumen de activos. Esa misma fecha, contaba con 9.145 oficinas, 137.968 empleados y 66 millones de clientes, estando presente en 35 países. A ", 545 | "marks": [] 546 | }, 547 | { 548 | "kind": "range", 549 | "text": "30 de junio de 2015", 550 | "marks": [ 551 | { 552 | "data": {}, 553 | "kind": "mark", 554 | "type": "date" 555 | } 556 | ] 557 | }, 558 | { 559 | "kind": "range", 560 | "text": ", era el 37º banco del mundo por volumen de activos. Cotiza en la Bolsa de Madrid (BBVA) y forma parte del IBEX 35 así como del Dow Jones EURO STOXX 50.", 561 | "marks": [] 562 | } 563 | ] 564 | } 565 | ] 566 | } 567 | ] 568 | }, { terse: true }) 569 | 570 | const applyTag = (node, tag, idx) => { 571 | node 572 | .addMark(tag.start, tag.len, {type: tag.type}) 573 | } 574 | 575 | const sentenceToNode = ({ text, tags }) => { 576 | const node = Text.createFromString(text) 577 | tags.forEach(t => node.addMark(t.start, t.len, {type: t.type})) 578 | return node 579 | } 580 | 581 | export const editor = (state = initialState, action) => { 582 | switch (action.type) { 583 | case 'EDITOR_STATE': 584 | return action.state 585 | case 'MARK': 586 | return state.transform().toggleMark(action.mark).apply() 587 | case 'RECEIVE_ENTITIES': 588 | const nextState = parcyToSlate(action.paragraphs) 589 | return Raw.deserialize(nextState, { terse: true }) 590 | default: 591 | return state 592 | } 593 | } 594 | -------------------------------------------------------------------------------- /app/reducers/index.js: -------------------------------------------------------------------------------- 1 | import { editor } from './editor' 2 | 3 | export { editor } 4 | -------------------------------------------------------------------------------- /app/sagas/api.js: -------------------------------------------------------------------------------- 1 | import 'isomorphic-fetch' 2 | 3 | function handleErrors(response) { 4 | if (!response.ok) throw Error(response.statusText) 5 | return response 6 | } 7 | 8 | export function apiJson(url, opts) { 9 | return fetch(url, opts) 10 | .then(handleErrors) 11 | .then(resp => resp.json()) 12 | .then(json => ({ json })) 13 | .catch(ex => ({ error: ex.message })) 14 | } 15 | 16 | export function apiHttp(url, opts) { 17 | return fetch(url, opts) 18 | .then(handleErrors) 19 | .then(response => ({ response })) 20 | .catch(ex => ({ error: ex.message })) 21 | } 22 | -------------------------------------------------------------------------------- /app/sagas/entities.js: -------------------------------------------------------------------------------- 1 | import 'babel-polyfill' 2 | import { call, put, takeLatest } from 'redux-saga/effects' 3 | import { apiJson } from './api' 4 | import { RECEIVE_ENTITIES, REQUEST_ENTITIES, REQUEST_ENTITIES_FAILED } from '../constants' 5 | 6 | export function* postDetectEntities({ model, paragraphs }) { 7 | const opts = { 8 | method: 'POST', 9 | body: JSON.stringify({ model, paragraphs }) 10 | } 11 | const { json, error } = yield call(apiJson, `http://127.0.0.1:8000/ent`, opts) 12 | if (json) 13 | yield put({ type: RECEIVE_ENTITIES, paragraphs: json }) 14 | else 15 | yield put({ type: REQUEST_ENTITIES_FAILED, message: error || 'Request failed' }) 16 | } 17 | 18 | export function* detectEntitiesSaga() { 19 | yield takeLatest(REQUEST_ENTITIES, postDetectEntities) 20 | } 21 | -------------------------------------------------------------------------------- /app/sagas/index.js: -------------------------------------------------------------------------------- 1 | import { detectEntitiesSaga } from './entities' 2 | import { trainModelSaga } from './train' 3 | 4 | export default function* sagas() { 5 | yield [ 6 | trainModelSaga(), 7 | detectEntitiesSaga() 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /app/sagas/train.js: -------------------------------------------------------------------------------- 1 | import 'babel-polyfill' 2 | import { call, put, takeLatest } from 'redux-saga/effects' 3 | import { apiJson } from './api' 4 | import { RECEIVE_TRAIN, REQUEST_TRAIN, REQUEST_TRAIN_FAILED } from '../constants' 5 | 6 | export function* postTrainModel({ model, paragraphs }) { 7 | const opts = { 8 | method: 'POST', 9 | body: JSON.stringify({ model, paragraphs }) 10 | } 11 | const { json, error } = yield call(apiJson, `http://127.0.0.1:8000/train`, opts) 12 | if (json) 13 | yield put({ type: RECEIVE_TRAIN, paragraphs: json }) 14 | else 15 | yield put({ type: REQUEST_TRAIN_FAILED, message: error || 'Request failed' }) 16 | } 17 | 18 | export function* trainModelSaga() { 19 | yield takeLatest(REQUEST_TRAIN, postTrainModel) 20 | } 21 | -------------------------------------------------------------------------------- /app/store.js: -------------------------------------------------------------------------------- 1 | import 'babel-polyfill' 2 | import { createStore, combineReducers, applyMiddleware, compose } from 'redux' 3 | import { browserHistory } from 'react-router' 4 | import { routerReducer, routerMiddleware } from 'react-router-redux' 5 | import createSagaMiddleware from 'redux-saga' 6 | import * as reducers from './reducers' 7 | import sagas from './sagas' 8 | import { trainModelSaga } from './sagas/train' 9 | 10 | const routingMiddleware = routerMiddleware(browserHistory) 11 | const annotationApp = combineReducers( 12 | Object.assign( 13 | {}, 14 | reducers, 15 | { routing: routerReducer } 16 | ) 17 | ) 18 | 19 | const composeEnhancers = window.__REDUX_DEVTOOLS_EXTENSION_COMPOSE__ || compose 20 | const sagaMiddleware = createSagaMiddleware() 21 | export const store = createStore( 22 | annotationApp, composeEnhancers(applyMiddleware( 23 | routingMiddleware, 24 | sagaMiddleware 25 | )) 26 | ) 27 | 28 | sagaMiddleware.run(sagas) 29 | -------------------------------------------------------------------------------- /app/styles/application.css: -------------------------------------------------------------------------------- 1 | .entities { 2 | line-height: 2; 3 | } 4 | 5 | [data-entity] { 6 | padding: 0.25em 0.35em; 7 | margin: 0px 0.25em; 8 | line-height: 1; 9 | display: inline-block; 10 | border-radius: 0.25em; 11 | border: 1px solid; 12 | } 13 | 14 | [data-entity]::after { 15 | box-sizing: border-box; 16 | content: attr(data-entity); 17 | font-size: 0.6em; 18 | line-height: 1; 19 | padding: 0.35em; 20 | border-radius: 0.35em; 21 | text-transform: uppercase; 22 | display: inline-block; 23 | vertical-align: middle; 24 | margin: 0px 0px 0.1rem 0.5rem; 25 | } 26 | 27 | [data-entity][data-entity="person"] { 28 | background: rgba(166, 226, 45, 0.2) none repeat scroll 0% 0%; 29 | border-color: rgb(166, 226, 45); 30 | } 31 | 32 | [data-entity][data-entity="person"]::after { 33 | background: rgb(166, 226, 45) none repeat scroll 0% 0%; 34 | } 35 | 36 | [data-entity][data-entity="norp"] { 37 | background: rgba(224, 0, 132, 0.2) none repeat scroll 0% 0%; 38 | border-color: rgb(224, 0, 132); 39 | } 40 | 41 | [data-entity][data-entity="norp"]::after { 42 | background: rgb(224, 0, 132) none repeat scroll 0% 0%; 43 | } 44 | 45 | [data-entity][data-entity="facility"] { 46 | background: rgba(67, 198, 252, 0.2) none repeat scroll 0% 0%; 47 | border-color: rgb(67, 198, 252); 48 | } 49 | 50 | [data-entity][data-entity="facility"]::after { 51 | background: rgb(67, 198, 252) none repeat scroll 0% 0%; 52 | } 53 | 54 | [data-entity][data-entity="org"] { 55 | background: rgba(67, 198, 252, 0.2) none repeat scroll 0% 0%; 56 | border-color: rgb(67, 198, 252); 57 | } 58 | 59 | [data-entity][data-entity="org"]::after { 60 | background: rgb(67, 198, 252) none repeat scroll 0% 0%; 61 | } 62 | 63 | [data-entity][data-entity="gpe"] { 64 | background: rgba(253, 151, 32, 0.2) none repeat scroll 0% 0%; 65 | border-color: rgb(253, 151, 32); 66 | } 67 | 68 | [data-entity][data-entity="gpe"]::after { 69 | background: rgb(253, 151, 32) none repeat scroll 0% 0%; 70 | } 71 | 72 | [data-entity][data-entity="loc"] { 73 | background: rgba(253, 151, 32, 0.2) none repeat scroll 0% 0%; 74 | border-color: rgb(253, 151, 32); 75 | } 76 | 77 | [data-entity][data-entity="loc"]::after { 78 | background: rgb(253, 151, 32) none repeat scroll 0% 0%; 79 | } 80 | 81 | [data-entity][data-entity="product"] { 82 | background: rgba(142, 125, 255, 0.2) none repeat scroll 0% 0%; 83 | border-color: rgb(142, 125, 255); 84 | } 85 | 86 | [data-entity][data-entity="product"]::after { 87 | background: rgb(142, 125, 255) none repeat scroll 0% 0%; 88 | } 89 | 90 | [data-entity][data-entity="event"] { 91 | background: rgba(255, 204, 0, 0.2) none repeat scroll 0% 0%; 92 | border-color: rgb(255, 204, 0); 93 | } 94 | 95 | [data-entity][data-entity="event"]::after { 96 | background: rgb(255, 204, 0) none repeat scroll 0% 0%; 97 | } 98 | 99 | [data-entity][data-entity="work_of_art"] { 100 | background: rgba(255, 204, 0, 0.2) none repeat scroll 0% 0%; 101 | border-color: rgb(255, 204, 0); 102 | } 103 | 104 | [data-entity][data-entity="work_of_art"]::after { 105 | background: rgb(255, 204, 0) none repeat scroll 0% 0%; 106 | } 107 | 108 | [data-entity][data-entity="language"] { 109 | background: rgba(255, 204, 0, 0.2) none repeat scroll 0% 0%; 110 | border-color: rgb(255, 204, 0); 111 | } 112 | 113 | [data-entity][data-entity="language"]::after { 114 | background: rgb(255, 204, 0) none repeat scroll 0% 0%; 115 | } 116 | 117 | [data-entity][data-entity="date"] { 118 | background: rgba(47, 187, 171, 0.2) none repeat scroll 0% 0%; 119 | border-color: rgb(47, 187, 171); 120 | } 121 | 122 | [data-entity][data-entity="date"]::after { 123 | background: rgb(47, 187, 171) none repeat scroll 0% 0%; 124 | } 125 | 126 | [data-entity][data-entity="time"] { 127 | background: rgba(47, 187, 171, 0.2) none repeat scroll 0% 0%; 128 | border-color: rgb(47, 187, 171); 129 | } 130 | 131 | [data-entity][data-entity="time"]::after { 132 | background: rgb(47, 187, 171) none repeat scroll 0% 0%; 133 | } 134 | 135 | [data-entity][data-entity="percent"] { 136 | background: rgba(153, 153, 153, 0.2) none repeat scroll 0% 0%; 137 | border-color: rgb(153, 153, 153); 138 | } 139 | 140 | [data-entity][data-entity="percent"]::after { 141 | background: rgb(153, 153, 153) none repeat scroll 0% 0%; 142 | } 143 | 144 | [data-entity][data-entity="money"] { 145 | background: rgba(153, 153, 153, 0.2) none repeat scroll 0% 0%; 146 | border-color: rgb(153, 153, 153); 147 | } 148 | 149 | [data-entity][data-entity="money"]::after { 150 | background: rgb(153, 153, 153) none repeat scroll 0% 0%; 151 | } 152 | 153 | [data-entity][data-entity="quantity"] { 154 | background: rgba(153, 153, 153, 0.2) none repeat scroll 0% 0%; 155 | border-color: rgb(153, 153, 153); 156 | } 157 | 158 | [data-entity][data-entity="quantity"]::after { 159 | background: rgb(153, 153, 153) none repeat scroll 0% 0%; 160 | } 161 | 162 | [data-entity][data-entity="ordinal"] { 163 | background: rgba(153, 153, 153, 0.2) none repeat scroll 0% 0%; 164 | border-color: rgb(153, 153, 153); 165 | } 166 | 167 | [data-entity][data-entity="ordinal"]::after { 168 | background: rgb(153, 153, 153) none repeat scroll 0% 0%; 169 | } 170 | 171 | [data-entity][data-entity="cardinal"] { 172 | background: rgba(153, 153, 153, 0.2) none repeat scroll 0% 0%; 173 | border-color: rgb(153, 153, 153); 174 | } 175 | 176 | [data-entity][data-entity="cardinal"]::after { 177 | background: rgb(153, 153, 153) none repeat scroll 0% 0%; 178 | } 179 | -------------------------------------------------------------------------------- /app/util/parcyToSlate.js: -------------------------------------------------------------------------------- 1 | import _ from 'lodash' 2 | 3 | const createMark = ({start, end, type}) => ({ 4 | data: {}, 5 | kind: "mark", 6 | type: _.lowerCase(type) 7 | }) 8 | 9 | const createRange = ({text, tags}) => t => ({ 10 | kind: 'range', 11 | text: text.substring(t[0], t[1]), 12 | marks: _.map(_.filter(tags, { start: t[0], end: t[1] }), createMark) 13 | }) 14 | 15 | const createBlock = o => { 16 | var breaks = _.sortBy(_.uniq(_.reduce(o.tags, (a, t) => _.concat(a, t.start, t.end), [0, o.text.length])), _.identity) 17 | var pairs = _.zip(_.initial(breaks), _.tail(breaks)) 18 | var ranges = _.map(pairs, createRange(o)) 19 | return { 20 | data: {}, 21 | kind: "block", 22 | isVoid: false, 23 | type: "paragraph", 24 | nodes: [ 25 | { 26 | kind: "text", 27 | ranges 28 | } 29 | ] 30 | } 31 | } 32 | 33 | export const parcyToSlate = paragraphs => ({ nodes: _.map(paragraphs, createBlock) }) 34 | -------------------------------------------------------------------------------- /app/util/slateToParcy.js: -------------------------------------------------------------------------------- 1 | import _ from 'lodash' 2 | 3 | const mapTags = (prefix, text) => (mark) => { 4 | return { start: prefix.length, len: text.length, type: mark.type.toUpperCase() } 5 | } 6 | 7 | const reduceRange = (accumulator, range) => ( 8 | { 9 | text: accumulator.text + range.text, 10 | tags: _.concat(accumulator.tags, range.marks.map(mapTags(accumulator.text, range.text))) 11 | } 12 | ) 13 | 14 | const mapSubNode = (node) => { 15 | return node.ranges.reduce(reduceRange, {text: "", tags: []}) 16 | } 17 | 18 | const mapTopNode = (node) => { 19 | return _.map(node.nodes, mapSubNode) 20 | } 21 | 22 | export const slateToParcy = (rawDoc) => _.flatten(rawDoc.nodes.map(mapTopNode)) 23 | -------------------------------------------------------------------------------- /brunch-config.js: -------------------------------------------------------------------------------- 1 | exports.config = { 2 | files: { 3 | javascripts: { joinTo: 'app.js' }, 4 | stylesheets: { joinTo: 'app.css' } 5 | }, 6 | 7 | plugins: { 8 | babel: { presets: ['es2015', 'react'] } 9 | }, 10 | 11 | npm: { 12 | enabled: true, 13 | styles: { 14 | bootstrap: ['dist/css/bootstrap.css'] 15 | }, 16 | globals: { 17 | process: 'process' 18 | }, 19 | aliases: { 20 | 'redux-saga/effects': 'redux-saga/lib/effects' 21 | } 22 | } 23 | }; 24 | -------------------------------------------------------------------------------- /jest/.gitignore: -------------------------------------------------------------------------------- 1 | report/ 2 | -------------------------------------------------------------------------------- /jest/setup-jasmine-env.js: -------------------------------------------------------------------------------- 1 | import reporters from 'jasmine-reporters' 2 | 3 | const reporter = new reporters.JUnitXmlReporter({ 4 | // Jest runs many instances of Jasmine in parallel. Force distinct file output 5 | // per test to avoid collisions. 6 | consolidateAll: false, 7 | filePrefix: 'jest-junit-result-', 8 | savePath: __dirname + '/report/' 9 | }) 10 | jasmine.getEnv().addReporter(reporter) -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "spacy-annotator", 3 | "version": "1.0.0", 4 | "description": "A web application for annotating text corpora with NLP annotations", 5 | "scripts": { 6 | "start": "brunch watch --server", 7 | "prod": "brunch build --production", 8 | "test": "jest --coverage --verbose" 9 | }, 10 | "author": "Tom Crossland", 11 | "license": "MIT", 12 | "dependencies": { 13 | "babel-polyfill": "^6.22.0", 14 | "bootstrap": "^3.3.7", 15 | "es6-promise": "^4.0.5", 16 | "isomorphic-fetch": "^2.2.1", 17 | "lodash": "^4.17.4", 18 | "react": "^15.4.2", 19 | "react-dom": "^15.4.2", 20 | "react-redux": "^5.0.2", 21 | "react-router": "^3.0.1", 22 | "react-router-redux": "^4.0.7", 23 | "redux": "^3.6.0", 24 | "redux-saga": "^0.14.8", 25 | "slate": "^0.19.22" 26 | }, 27 | "devDependencies": { 28 | "babel-brunch": "^6.0.6", 29 | "babel-jest": "^18.0.0", 30 | "babel-preset-es2015": "^6.22.0", 31 | "babel-preset-react": "^6.22.0", 32 | "babel-preset-stage-3": "^6.22.0", 33 | "brunch": "^2.10.5", 34 | "clean-css-brunch": "^2.0.0", 35 | "css-brunch": "^2.6.1", 36 | "jasmine-reporters": "^2.2.0", 37 | "javascript-brunch": "^2.0.0", 38 | "jest": "^18.1.0", 39 | "redux-saga-test-plan": "^2.2.0", 40 | "uglify-js-brunch": "^2.1.1" 41 | }, 42 | "jest": { 43 | "automock": false, 44 | "setupTestFrameworkScriptFile": "/jest/setup-jasmine-env.js", 45 | "collectCoverageFrom": [ 46 | "app/**/*.js", 47 | "app/**/*.jsx" 48 | ], 49 | "coverageThreshold": { 50 | "global": { 51 | "branches": 0, 52 | "functions": 0, 53 | "lines": 0, 54 | "statements": 0 55 | } 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /test/sagas/train.spec.js: -------------------------------------------------------------------------------- 1 | import testSaga from 'redux-saga-test-plan' 2 | import { call, put } from 'redux-saga/effects' 3 | import { apiJson } from '../../app/sagas/api' 4 | import { postTrainModel, trainModelSaga } from '../../app/sagas/train' 5 | import { 6 | RECEIVE_TRAIN, 7 | REQUEST_TRAIN, 8 | REQUEST_TRAIN_FAILED 9 | } from '../../app/constants' 10 | 11 | describe('training saga', () => { 12 | const model = 'es' 13 | const paragraphs = [ 14 | { test: "Hello", tags: [] } 15 | ] 16 | const req = { model, paragraphs } 17 | const opts = { 18 | method: "POST", 19 | body: JSON.stringify(req) 20 | } 21 | 22 | it('should call training endpoint and create a put effect for a successful response', () => { 23 | const json = paragraphs 24 | expect(() => { 25 | testSaga(postTrainModel, req) 26 | .next() 27 | .call(apiJson, `http://127.0.0.1:8000/train`, opts) 28 | .next({ json }) 29 | .put({ type: RECEIVE_TRAIN, paragraphs: json }) 30 | .next() 31 | .isDone() 32 | }).not.toThrow() 33 | }) 34 | 35 | it('should call training endpoint and create a put effect for a failure response', () => { 36 | const error = "Something failed" 37 | expect(() => { 38 | testSaga(postTrainModel, req) 39 | .next() 40 | .call(apiJson, `http://127.0.0.1:8000/train`, opts) 41 | .next({ error }) 42 | .put({ type: REQUEST_TRAIN_FAILED, message: error }) 43 | .next() 44 | .isDone() 45 | }).not.toThrow() 46 | }) 47 | }) 48 | -------------------------------------------------------------------------------- /test/util/parcyToSlate.spec.js: -------------------------------------------------------------------------------- 1 | import { parcyToSlate } from '../../app/util/parcyToSlate' 2 | 3 | describe('the parcy to slate transformation', () => { 4 | it('should extract the editor state from the parcy response', () => { 5 | const response = { 6 | paragraphs: [{ 7 | tags: [ 8 | { start: 0, end: 20, type: "ORG" }, 9 | { start: 33, end: 37, type: "ORG" }, 10 | { start: 63, end: 71, type: "NORP" }, 11 | { start: 87, end: 115, type: "PERSON" }, 12 | { start: 149, end: 155, type: "GPE" }, 13 | { start: 197, end: 203, type: "GPE" }, 14 | { start: 216, end: 222, type: "GPE" }, 15 | { start: 224, end: 231, type: "GPE" }, 16 | { start: 234, end: 238, type: "GPE" }, 17 | { start: 251, end: 260, type: "GPE" }, 18 | { start: 273, end: 281, type: "GPE" }, 19 | ], 20 | text: "Banco Bilbao Vizcaya Argentaria (BBVA) es una entidad bancaria española, presidida por Francisco González Rodríguez. Es uno de los mayores bancos de España, siendo la primera entidad financiera de México, segunda en España, Turquía y Perú, tercera en Venezuela y cuarta en Colombia." 21 | }] 22 | } 23 | const state = parcyToSlate(response.paragraphs) 24 | expect(state).toBeTruthy() 25 | expect(state.nodes).toBeTruthy() 26 | expect(state.nodes.length).toEqual(1) 27 | expect(state.nodes[0]).toBeTruthy() 28 | expect(state.nodes[0].nodes).toBeTruthy() 29 | expect(state.nodes[0].nodes.length).toEqual(1) 30 | expect(state.nodes[0].nodes[0].ranges).toBeTruthy() 31 | expect(state.nodes[0].nodes[0].ranges.length).toEqual(22) 32 | expect(state.nodes[0].nodes[0].ranges[4]).toEqual({kind: "range", marks: [{data:{}, kind: "mark", type: "norp"}], text: "española"}) 33 | }) 34 | }) 35 | -------------------------------------------------------------------------------- /test/util/slateToParcy.spec.js: -------------------------------------------------------------------------------- 1 | import { editor } from '../../app/reducers' 2 | import { slateToParcy } from '../../app/util/slateToParcy' 3 | import { Raw } from 'slate' 4 | 5 | const initialState = editor(undefined, {}) 6 | 7 | describe('the editor\'s initial state', () => { 8 | it('should contain a document with nodes', () => { 9 | expect(initialState).toBeTruthy() 10 | const raw = Raw.serialize(initialState) 11 | expect(raw).toBeTruthy() 12 | expect(raw.document).toBeTruthy() 13 | expect(raw.document.nodes).toBeTruthy() 14 | }) 15 | }) 16 | 17 | describe('the slate to parcy transformation', () => { 18 | it('should extract text and tags from the raw slate document', () => { 19 | const raw = Raw.serialize(initialState) 20 | expect(raw.document).toBeTruthy() 21 | const sents = slateToParcy(raw.document) 22 | expect(sents.length).toEqual(3) 23 | expect(sents[0].text).toEqual("Banco Bilbao Vizcaya Argentaria (BBVA) es una entidad bancaria española, presidida por Francisco González Rodríguez. Es uno de los mayores bancos de España, siendo la primera entidad financiera de México, segunda en España, Turquía y Perú, tercera en Venezuela y cuarta en Colombia.") 24 | expect(sents[0].tags.length).toEqual(11) 25 | expect(sents[1].text).toEqual("El banco tiene su sede social y fiscal desde 1868 en la Plaza de San Nicolás nº 4, en el Casco Viejo de la ciudad de Bilbao (Vizcaya, País Vasco), donde fue fundado como Banco de Bilbao en 1857. La mayor parte de los servicios centrales de la entidad y su sede operativa se concentran principalmente en sus oficinas centrales de Madrid, situadas en el complejo Ciudad BBVA de la zona de Las Tablas. Su sede de servicios técnicos se encuentra en la bilbaina Torre BBVA de Gran Vía, nº 1. Su principal edificio fuera de España es la Torre BBVA Bancomer, en el nº 506 del Paseo de la Reforma de la Ciudad de México.") 26 | expect(sents[1].tags.length).toEqual(17) 27 | expect(sents[2].text).toEqual("A 31 de diciembre de 2015, los activos de BBVA eran de 750.078 millones de euros, siendo la segunda entidad financiera española por volumen de activos. Esa misma fecha, contaba con 9.145 oficinas, 137.968 empleados y 66 millones de clientes, estando presente en 35 países. A 30 de junio de 2015, era el 37º banco del mundo por volumen de activos. Cotiza en la Bolsa de Madrid (BBVA) y forma parte del IBEX 35 así como del Dow Jones EURO STOXX 50.") 28 | expect(sents[2].tags.length).toEqual(4) 29 | }) 30 | }) 31 | --------------------------------------------------------------------------------