├── NER using Bidirectional LSTM - CRF .ipynb ├── NER using CRF.ipynb ├── README.md ├── model.h5 ├── tag_to_index.pickle └── word_to_index.pickle /NER using Bidirectional LSTM - CRF .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Named Entity Recognition\n", 8 | "In Natural Language Processing (NLP) an Entity Recognition is one of the common problem. The entity is referred to as the part of the text that is interested in. In NLP, NER is a method of extracting the relevant information from a large corpus and classifying those entities into predefined categories such as location, organization, name and so on. \n", 9 | "Information about lables: \n", 10 | "* geo = Geographical Entity\n", 11 | "* org = Organization\n", 12 | "* per = Person\n", 13 | "* gpe = Geopolitical Entity\n", 14 | "* tim = Time indicator\n", 15 | "* art = Artifact\n", 16 | "* eve = Event\n", 17 | "* nat = Natural Phenomenon\n", 18 | "\n", 19 | " 1. Total Words Count = 1354149 \n", 20 | " 2. Target Data Column: Tag" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "#### Importing Libraries" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": { 34 | "colab": {}, 35 | "colab_type": "code", 36 | "id": "wt4u0Lf1YJPH" 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "import pandas as pd\n", 41 | "import numpy as np\n", 42 | "\n", 43 | "from keras.preprocessing.sequence import pad_sequences\n", 44 | "from keras.utils import to_categorical\n", 45 | "from keras.layers import LSTM, Dense, TimeDistributed, Embedding, Bidirectional\n", 46 | "from keras.models import Model, Input\n", 47 | "from keras_contrib.layers import CRF\n", 48 | "from keras.callbacks import ModelCheckpoint\n", 49 | "\n", 50 | "import warnings\n", 51 | "warnings.filterwarnings(\"ignore\")\n", 52 | "\n", 53 | "from sklearn.model_selection import train_test_split\n", 54 | "import matplotlib.pyplot as plt\n", 55 | "%matplotlib inline\n", 56 | "\n", 57 | "from sklearn_crfsuite.metrics import flat_classification_report\n", 58 | "from sklearn.metrics import f1_score\n", 59 | "from seqeval.metrics import precision_score, recall_score, f1_score, classification_report\n", 60 | "from keras.preprocessing.text import text_to_word_sequence\n", 61 | "import pickle" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 12, 67 | "metadata": { 68 | "colab": {}, 69 | "colab_type": "code", 70 | "id": "PmFVrk0JY-Mv" 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "#Reading the csv file\n", 75 | "df = pd.read_csv('ner_dataset.csv', encoding = \"ISO-8859-1\")" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 24, 81 | "metadata": { 82 | "colab": { 83 | "base_uri": "https://localhost:8080/", 84 | "height": 359 85 | }, 86 | "colab_type": "code", 87 | "executionInfo": { 88 | "elapsed": 668, 89 | "status": "ok", 90 | "timestamp": 1560703170937, 91 | "user": { 92 | "displayName": "CHAVAN AKSHAY", 93 | "photoUrl": "", 94 | "userId": "10674464813829582221" 95 | }, 96 | "user_tz": -330 97 | }, 98 | "id": "LYlRwss8ZPZr", 99 | "outputId": "c3366c42-0a14-4925-c108-af64ba6d1921" 100 | }, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/html": [ 105 | "
\n", 106 | "\n", 119 | "\n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | "
Sentence #WordPOSTag
0Sentence: 1ThousandsNNSO
1NaNofINO
2NaNdemonstratorsNNSO
3NaNhaveVBPO
4NaNmarchedVBNO
5NaNthroughINO
6NaNLondonNNPB-geo
7NaNtoTOO
8NaNprotestVBO
9NaNtheDTO
\n", 202 | "
" 203 | ], 204 | "text/plain": [ 205 | " Sentence # Word POS Tag\n", 206 | "0 Sentence: 1 Thousands NNS O\n", 207 | "1 NaN of IN O\n", 208 | "2 NaN demonstrators NNS O\n", 209 | "3 NaN have VBP O\n", 210 | "4 NaN marched VBN O\n", 211 | "5 NaN through IN O\n", 212 | "6 NaN London NNP B-geo\n", 213 | "7 NaN to TO O\n", 214 | "8 NaN protest VB O\n", 215 | "9 NaN the DT O" 216 | ] 217 | }, 218 | "execution_count": 24, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "#Display first 10 rows\n", 225 | "df.head(10)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 5, 231 | "metadata": {}, 232 | "outputs": [ 233 | { 234 | "data": { 235 | "text/html": [ 236 | "
\n", 237 | "\n", 250 | "\n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | "
Sentence #WordPOSTag
count47959104857510485751048575
unique47959351784217
topSentence: 36965theNNO
freq152573145807887908
\n", 291 | "
" 292 | ], 293 | "text/plain": [ 294 | " Sentence # Word POS Tag\n", 295 | "count 47959 1048575 1048575 1048575\n", 296 | "unique 47959 35178 42 17\n", 297 | "top Sentence: 36965 the NN O\n", 298 | "freq 1 52573 145807 887908" 299 | ] 300 | }, 301 | "execution_count": 5, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "df.describe()" 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "metadata": {}, 313 | "source": [ 314 | "#### Observations : \n", 315 | "* There are total 47959 sentences in the dataset.\n", 316 | "* Number unique words in the dataset are 35178.\n", 317 | "* Total 17 lables (Tags)." 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 6, 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "data": { 327 | "text/plain": [ 328 | "array(['O', 'B-geo', 'B-gpe', 'B-per', 'I-geo', 'B-org', 'I-org', 'B-tim',\n", 329 | " 'B-art', 'I-art', 'I-per', 'I-gpe', 'I-tim', 'B-nat', 'B-eve',\n", 330 | " 'I-eve', 'I-nat'], dtype=object)" 331 | ] 332 | }, 333 | "execution_count": 6, 334 | "metadata": {}, 335 | "output_type": "execute_result" 336 | } 337 | ], 338 | "source": [ 339 | "#Displaying the unique Tags\n", 340 | "df['Tag'].unique()" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 7, 346 | "metadata": {}, 347 | "outputs": [ 348 | { 349 | "data": { 350 | "text/plain": [ 351 | "Sentence # 1000616\n", 352 | "Word 0\n", 353 | "POS 0\n", 354 | "Tag 0\n", 355 | "dtype: int64" 356 | ] 357 | }, 358 | "execution_count": 7, 359 | "metadata": {}, 360 | "output_type": "execute_result" 361 | } 362 | ], 363 | "source": [ 364 | "#Checking null values, if any.\n", 365 | "df.isnull().sum()" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "There are lots of missing values in 'Sentence #' attribute. So we will use pandas fillna technique and use 'ffill' method which propagates last valid observation forward to next." 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 13, 378 | "metadata": { 379 | "colab": {}, 380 | "colab_type": "code", 381 | "id": "9PTsjCdBZ9Xy" 382 | }, 383 | "outputs": [], 384 | "source": [ 385 | "df = df.fillna(method = 'ffill')" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 14, 391 | "metadata": { 392 | "colab": {}, 393 | "colab_type": "code", 394 | "id": "b9DYzRvMagm5" 395 | }, 396 | "outputs": [], 397 | "source": [ 398 | "# This is a class te get sentence. The each sentence will be list of tuples with its tag and pos.\n", 399 | "class sentence(object):\n", 400 | " def __init__(self, df):\n", 401 | " self.n_sent = 1\n", 402 | " self.df = df\n", 403 | " self.empty = False\n", 404 | " agg = lambda s : [(w, p, t) for w, p, t in zip(s['Word'].values.tolist(),\n", 405 | " s['POS'].values.tolist(),\n", 406 | " s['Tag'].values.tolist())]\n", 407 | " self.grouped = self.df.groupby(\"Sentence #\").apply(agg)\n", 408 | " self.sentences = [s for s in self.grouped]\n", 409 | " \n", 410 | " def get_text(self):\n", 411 | " try:\n", 412 | " s = self.grouped['Sentence: {}'.format(self.n_sent)]\n", 413 | " self.n_sent +=1\n", 414 | " return s\n", 415 | " except:\n", 416 | " return None" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 15, 422 | "metadata": {}, 423 | "outputs": [ 424 | { 425 | "data": { 426 | "text/plain": [ 427 | "'Thousands of demonstrators have marched through London to protest the war in Iraq and demand the withdrawal of British troops from that country .'" 428 | ] 429 | }, 430 | "execution_count": 15, 431 | "metadata": {}, 432 | "output_type": "execute_result" 433 | } 434 | ], 435 | "source": [ 436 | "#Displaying one full sentence\n", 437 | "getter = sentence(df)\n", 438 | "sentences = [\" \".join([s[0] for s in sent]) for sent in getter.sentences]\n", 439 | "sentences[0]" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 16, 445 | "metadata": {}, 446 | "outputs": [ 447 | { 448 | "name": "stdout", 449 | "output_type": "stream", 450 | "text": [ 451 | "[('Thousands', 'NNS', 'O'), ('of', 'IN', 'O'), ('demonstrators', 'NNS', 'O'), ('have', 'VBP', 'O'), ('marched', 'VBN', 'O'), ('through', 'IN', 'O'), ('London', 'NNP', 'B-geo'), ('to', 'TO', 'O'), ('protest', 'VB', 'O'), ('the', 'DT', 'O'), ('war', 'NN', 'O'), ('in', 'IN', 'O'), ('Iraq', 'NNP', 'B-geo'), ('and', 'CC', 'O'), ('demand', 'VB', 'O'), ('the', 'DT', 'O'), ('withdrawal', 'NN', 'O'), ('of', 'IN', 'O'), ('British', 'JJ', 'B-gpe'), ('troops', 'NNS', 'O'), ('from', 'IN', 'O'), ('that', 'DT', 'O'), ('country', 'NN', 'O'), ('.', '.', 'O')]\n" 452 | ] 453 | } 454 | ], 455 | "source": [ 456 | "#sentence with its pos and tag.\n", 457 | "sent = getter.get_text()\n", 458 | "print(sent)" 459 | ] 460 | }, 461 | { 462 | "cell_type": "markdown", 463 | "metadata": {}, 464 | "source": [ 465 | "Getting all the sentences in the dataset." 466 | ] 467 | }, 468 | { 469 | "cell_type": "code", 470 | "execution_count": 17, 471 | "metadata": { 472 | "colab": {}, 473 | "colab_type": "code", 474 | "id": "3F0_tiOmaiVi" 475 | }, 476 | "outputs": [], 477 | "source": [ 478 | "sentences = getter.sentences" 479 | ] 480 | }, 481 | { 482 | "cell_type": "markdown", 483 | "metadata": {}, 484 | "source": [ 485 | "#### Defining the parameters for LSTM network" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": 18, 491 | "metadata": { 492 | "colab": {}, 493 | "colab_type": "code", 494 | "id": "eRQJJSoyamU4" 495 | }, 496 | "outputs": [], 497 | "source": [ 498 | "# Number of data points passed in each iteration\n", 499 | "batch_size = 64 \n", 500 | "# Passes through entire dataset\n", 501 | "epochs = 8\n", 502 | "# Maximum length of review\n", 503 | "max_len = 75 \n", 504 | "# Dimension of embedding vector\n", 505 | "embedding = 40 " 506 | ] 507 | }, 508 | { 509 | "cell_type": "markdown", 510 | "metadata": {}, 511 | "source": [ 512 | "#### Preprocessing Data\n", 513 | "We will process our text data before feeding to the network.\n", 514 | "* Here word_to_index dictionary used to convert word into index value and tag_to_index is for the labels. So overall we represent each word as integer." 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 26, 520 | "metadata": { 521 | "colab": {}, 522 | "colab_type": "code", 523 | "id": "32qpbWMVau_5" 524 | }, 525 | "outputs": [], 526 | "source": [ 527 | "#Getting unique words and labels from data\n", 528 | "words = list(df['Word'].unique())\n", 529 | "tags = list(df['Tag'].unique())\n", 530 | "# Dictionary word:index pair\n", 531 | "# word is key and its value is corresponding index\n", 532 | "word_to_index = {w : i + 2 for i, w in enumerate(words)}\n", 533 | "word_to_index[\"UNK\"] = 1\n", 534 | "word_to_index[\"PAD\"] = 0\n", 535 | "\n", 536 | "# Dictionary lable:index pair\n", 537 | "# label is key and value is index.\n", 538 | "tag_to_index = {t : i + 1 for i, t in enumerate(tags)}\n", 539 | "tag_to_index[\"PAD\"] = 0\n", 540 | "\n", 541 | "idx2word = {i: w for w, i in word_to_index.items()}\n", 542 | "idx2tag = {i: w for w, i in tag_to_index.items()}" 543 | ] 544 | }, 545 | { 546 | "cell_type": "code", 547 | "execution_count": 17, 548 | "metadata": {}, 549 | "outputs": [ 550 | { 551 | "name": "stdout", 552 | "output_type": "stream", 553 | "text": [ 554 | "The word India is identified by the index: 2570\n", 555 | "The label B-org for the organization is identified by the index: 6\n" 556 | ] 557 | } 558 | ], 559 | "source": [ 560 | "print(\"The word India is identified by the index: {}\".format(word_to_index[\"India\"]))\n", 561 | "print(\"The label B-org for the organization is identified by the index: {}\".format(tag_to_index[\"B-org\"]))" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": 31, 567 | "metadata": { 568 | "colab": {}, 569 | "colab_type": "code", 570 | "id": "tcC_UuUbav7y" 571 | }, 572 | "outputs": [], 573 | "source": [ 574 | "# Converting each sentence into list of index from list of tokens\n", 575 | "X = [[word_to_index[w[0]] for w in s] for s in sentences]\n", 576 | "\n", 577 | "# Padding each sequence to have same length of each word\n", 578 | "X = pad_sequences(maxlen = max_len, sequences = X, padding = \"post\", value = word_to_index[\"PAD\"])" 579 | ] 580 | }, 581 | { 582 | "cell_type": "code", 583 | "execution_count": 32, 584 | "metadata": { 585 | "colab": {}, 586 | "colab_type": "code", 587 | "id": "N-C7iFNjaytc" 588 | }, 589 | "outputs": [], 590 | "source": [ 591 | "# Convert label to index\n", 592 | "y = [[tag_to_index[w[2]] for w in s] for s in sentences]\n", 593 | "\n", 594 | "# padding\n", 595 | "y = pad_sequences(maxlen = max_len, sequences = y, padding = \"post\", value = tag_to_index[\"PAD\"])" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": 33, 601 | "metadata": { 602 | "colab": {}, 603 | "colab_type": "code", 604 | "id": "SbnAi9kwa0gL" 605 | }, 606 | "outputs": [], 607 | "source": [ 608 | "num_tag = df['Tag'].nunique()\n", 609 | "# One hot encoded labels\n", 610 | "y = [to_categorical(i, num_classes = num_tag + 1) for i in y]" 611 | ] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "execution_count": 34, 616 | "metadata": { 617 | "colab": {}, 618 | "colab_type": "code", 619 | "id": "bmj_9AzCa23d" 620 | }, 621 | "outputs": [], 622 | "source": [ 623 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15)" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": 22, 629 | "metadata": {}, 630 | "outputs": [ 631 | { 632 | "name": "stdout", 633 | "output_type": "stream", 634 | "text": [ 635 | "Size of training input data : (40765, 75)\n", 636 | "Size of training output data : (40765, 75, 18)\n", 637 | "Size of testing input data : (7194, 75)\n", 638 | "Size of testing output data : (7194, 75, 18)\n" 639 | ] 640 | } 641 | ], 642 | "source": [ 643 | "print(\"Size of training input data : \", X_train.shape)\n", 644 | "print(\"Size of training output data : \", np.array(y_train).shape)\n", 645 | "print(\"Size of testing input data : \", X_test.shape)\n", 646 | "print(\"Size of testing output data : \", np.array(y_test).shape)" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": 23, 652 | "metadata": {}, 653 | "outputs": [ 654 | { 655 | "name": "stdout", 656 | "output_type": "stream", 657 | "text": [ 658 | "*****Before Processing first sentence : *****\n", 659 | " Thousands of demonstrators have marched through London to protest the war in Iraq and demand the withdrawal of British troops from that country .\n", 660 | "*****After Processing first sentence : *****\n", 661 | " [ 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 11 17 3 18 19 20 21 22 23\n", 662 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", 663 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", 664 | " 0 0 0]\n" 665 | ] 666 | } 667 | ], 668 | "source": [ 669 | "# Let's check the first sentence before and after processing.\n", 670 | "print('*****Before Processing first sentence : *****\\n', ' '.join([w[0] for w in sentences[0]]))\n", 671 | "print('*****After Processing first sentence : *****\\n ', X[0])" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 24, 677 | "metadata": {}, 678 | "outputs": [ 679 | { 680 | "name": "stdout", 681 | "output_type": "stream", 682 | "text": [ 683 | "*****Before Processing first sentence : *****\n", 684 | " O O O O O O B-geo O O O O O B-geo O O O O O B-gpe O O O O O\n", 685 | "*****After Processing first sentence : *****\n", 686 | " [[0. 1. 0. ... 0. 0. 0.]\n", 687 | " [0. 1. 0. ... 0. 0. 0.]\n", 688 | " [0. 1. 0. ... 0. 0. 0.]\n", 689 | " ...\n", 690 | " [1. 0. 0. ... 0. 0. 0.]\n", 691 | " [1. 0. 0. ... 0. 0. 0.]\n", 692 | " [1. 0. 0. ... 0. 0. 0.]]\n" 693 | ] 694 | } 695 | ], 696 | "source": [ 697 | "# First label before and after processing.\n", 698 | "print('*****Before Processing first sentence : *****\\n', ' '.join([w[2] for w in sentences[0]]))\n", 699 | "print('*****After Processing first sentence : *****\\n ', y[0])" 700 | ] 701 | }, 702 | { 703 | "cell_type": "markdown", 704 | "metadata": {}, 705 | "source": [ 706 | "#### Bidirectional LSTM-CRF Network" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": 96, 712 | "metadata": { 713 | "colab": { 714 | "base_uri": "https://localhost:8080/", 715 | "height": 306 716 | }, 717 | "colab_type": "code", 718 | "executionInfo": { 719 | "elapsed": 2791, 720 | "status": "ok", 721 | "timestamp": 1560703209499, 722 | "user": { 723 | "displayName": "CHAVAN AKSHAY", 724 | "photoUrl": "", 725 | "userId": "10674464813829582221" 726 | }, 727 | "user_tz": -330 728 | }, 729 | "id": "6WRJfQ5ca4vD", 730 | "outputId": "a908468a-3b1f-4680-afe7-6cc22a8a9394" 731 | }, 732 | "outputs": [ 733 | { 734 | "name": "stdout", 735 | "output_type": "stream", 736 | "text": [ 737 | "_________________________________________________________________\n", 738 | "Layer (type) Output Shape Param # \n", 739 | "=================================================================\n", 740 | "input_8 (InputLayer) (None, 75) 0 \n", 741 | "_________________________________________________________________\n", 742 | "embedding_8 (Embedding) (None, 75, 40) 1407200 \n", 743 | "_________________________________________________________________\n", 744 | "bidirectional_8 (Bidirection (None, 75, 100) 36400 \n", 745 | "_________________________________________________________________\n", 746 | "time_distributed_8 (TimeDist (None, 75, 50) 5050 \n", 747 | "_________________________________________________________________\n", 748 | "crf_8 (CRF) (None, 75, 18) 1278 \n", 749 | "=================================================================\n", 750 | "Total params: 1,449,928\n", 751 | "Trainable params: 1,449,928\n", 752 | "Non-trainable params: 0\n", 753 | "_________________________________________________________________\n" 754 | ] 755 | } 756 | ], 757 | "source": [ 758 | "num_tags = df['Tag'].nunique()\n", 759 | "# Model architecture\n", 760 | "input = Input(shape = (max_len,))\n", 761 | "model = Embedding(input_dim = len(words) + 2, output_dim = embedding, input_length = max_len, mask_zero = True)(input)\n", 762 | "model = Bidirectional(LSTM(units = 50, return_sequences=True, recurrent_dropout=0.1))(model)\n", 763 | "model = TimeDistributed(Dense(50, activation=\"relu\"))(model)\n", 764 | "crf = CRF(num_tags+1) # CRF layer\n", 765 | "out = crf(model) # output\n", 766 | "\n", 767 | "model = Model(input, out)\n", 768 | "model.compile(optimizer=\"rmsprop\", loss=crf.loss_function, metrics=[crf.accuracy])\n", 769 | "\n", 770 | "model.summary()" 771 | ] 772 | }, 773 | { 774 | "cell_type": "markdown", 775 | "metadata": {}, 776 | "source": [ 777 | "Making Checkpoint each epoch to check and save the best model performance till last and also avoiding further validation loss drop due to overfitting." 778 | ] 779 | }, 780 | { 781 | "cell_type": "code", 782 | "execution_count": 25, 783 | "metadata": { 784 | "colab": {}, 785 | "colab_type": "code", 786 | "id": "CJcJLVXWa7r1" 787 | }, 788 | "outputs": [], 789 | "source": [ 790 | "checkpointer = ModelCheckpoint(filepath = 'model.h5',\n", 791 | " verbose = 0,\n", 792 | " mode = 'auto',\n", 793 | " save_best_only = True,\n", 794 | " monitor='val_loss')" 795 | ] 796 | }, 797 | { 798 | "cell_type": "code", 799 | "execution_count": 119, 800 | "metadata": { 801 | "colab": { 802 | "base_uri": "https://localhost:8080/", 803 | "height": 326 804 | }, 805 | "colab_type": "code", 806 | "executionInfo": { 807 | "elapsed": 2003225, 808 | "status": "ok", 809 | "timestamp": 1560708147077, 810 | "user": { 811 | "displayName": "CHAVAN AKSHAY", 812 | "photoUrl": "", 813 | "userId": "10674464813829582221" 814 | }, 815 | "user_tz": -330 816 | }, 817 | "id": "SjKhhXHMG-jJ", 818 | "outputId": "bd461b08-3920-4920-c3a6-10eb8f3cf432" 819 | }, 820 | "outputs": [ 821 | { 822 | "name": "stdout", 823 | "output_type": "stream", 824 | "text": [ 825 | "Train on 36688 samples, validate on 4077 samples\n", 826 | "Epoch 1/8\n", 827 | "36688/36688 [==============================] - 251s 7ms/step - loss: 8.8293 - crf_viterbi_accuracy: 0.9762 - val_loss: 8.7456 - val_crf_viterbi_accuracy: 0.9693\n", 828 | "Epoch 2/8\n", 829 | "36688/36688 [==============================] - 249s 7ms/step - loss: 8.8255 - crf_viterbi_accuracy: 0.9776 - val_loss: 8.7474 - val_crf_viterbi_accuracy: 0.9692\n", 830 | "Epoch 3/8\n", 831 | "36688/36688 [==============================] - 250s 7ms/step - loss: 8.8225 - crf_viterbi_accuracy: 0.9787 - val_loss: 8.7480 - val_crf_viterbi_accuracy: 0.9669\n", 832 | "Epoch 4/8\n", 833 | "36688/36688 [==============================] - 250s 7ms/step - loss: 8.8199 - crf_viterbi_accuracy: 0.9796 - val_loss: 8.7469 - val_crf_viterbi_accuracy: 0.9695\n", 834 | "Epoch 5/8\n", 835 | "36688/36688 [==============================] - 250s 7ms/step - loss: 8.8174 - crf_viterbi_accuracy: 0.9810 - val_loss: 8.7537 - val_crf_viterbi_accuracy: 0.9668\n", 836 | "Epoch 6/8\n", 837 | "36688/36688 [==============================] - 251s 7ms/step - loss: 8.8150 - crf_viterbi_accuracy: 0.9820 - val_loss: 8.7508 - val_crf_viterbi_accuracy: 0.9669\n", 838 | "Epoch 7/8\n", 839 | "36688/36688 [==============================] - 250s 7ms/step - loss: 8.8128 - crf_viterbi_accuracy: 0.9830 - val_loss: 8.7558 - val_crf_viterbi_accuracy: 0.9650\n", 840 | "Epoch 8/8\n", 841 | "36688/36688 [==============================] - 250s 7ms/step - loss: 8.8106 - crf_viterbi_accuracy: 0.9839 - val_loss: 8.7559 - val_crf_viterbi_accuracy: 0.9662\n" 842 | ] 843 | } 844 | ], 845 | "source": [ 846 | "history = model.fit(X_train, np.array(y_train), batch_size=batch_size, epochs=epochs,\n", 847 | " validation_split=0.1, callbacks=[checkpointer])" 848 | ] 849 | }, 850 | { 851 | "cell_type": "code", 852 | "execution_count": 99, 853 | "metadata": { 854 | "colab": { 855 | "base_uri": "https://localhost:8080/", 856 | "height": 34 857 | }, 858 | "colab_type": "code", 859 | "executionInfo": { 860 | "elapsed": 1054, 861 | "status": "ok", 862 | "timestamp": 1560705868376, 863 | "user": { 864 | "displayName": "CHAVAN AKSHAY", 865 | "photoUrl": "", 866 | "userId": "10674464813829582221" 867 | }, 868 | "user_tz": -330 869 | }, 870 | "id": "vEsREje5ubq-", 871 | "outputId": "083b774d-ae35-4720-e515-7930617364d4" 872 | }, 873 | "outputs": [ 874 | { 875 | "data": { 876 | "text/plain": [ 877 | "dict_keys(['val_loss', 'val_crf_viterbi_accuracy', 'loss', 'crf_viterbi_accuracy'])" 878 | ] 879 | }, 880 | "execution_count": 99, 881 | "metadata": { 882 | "tags": [] 883 | }, 884 | "output_type": "execute_result" 885 | } 886 | ], 887 | "source": [ 888 | "history.history.keys()" 889 | ] 890 | }, 891 | { 892 | "cell_type": "markdown", 893 | "metadata": {}, 894 | "source": [ 895 | "Visualizing the performance of model." 896 | ] 897 | }, 898 | { 899 | "cell_type": "code", 900 | "execution_count": 120, 901 | "metadata": { 902 | "colab": { 903 | "base_uri": "https://localhost:8080/", 904 | "height": 516 905 | }, 906 | "colab_type": "code", 907 | "executionInfo": { 908 | "elapsed": 1106, 909 | "status": "ok", 910 | "timestamp": 1560709905938, 911 | "user": { 912 | "displayName": "CHAVAN AKSHAY", 913 | "photoUrl": "", 914 | "userId": "10674464813829582221" 915 | }, 916 | "user_tz": -330 917 | }, 918 | "id": "QElZwYqqbSFV", 919 | "outputId": "60eea78e-8ea6-45be-9151-281264768c72" 920 | }, 921 | "outputs": [ 922 | { 923 | "data": { 924 | "text/plain": [ 925 | "" 926 | ] 927 | }, 928 | "execution_count": 120, 929 | "metadata": { 930 | "tags": [] 931 | }, 932 | "output_type": "execute_result" 933 | }, 934 | { 935 | "data": { 936 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfoAAAHiCAYAAAAAkA6/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3Xl4VdX59vHvw2wYRAGVgiRUrcxD\njIBVZHC2CkopglGLVmNtsVarrYoDxVKtU52ob1OLFX8RRK2KA3VAFK0TAQEFRFABGaqIDGJEBp/3\nj7WDhxjICZzkJDv357py5Zy1p7VPuLjP2nvttczdERERkXiqle4KiIiISMVR0IuIiMSYgl5ERCTG\nFPQiIiIxpqAXERGJMQW9iIhIjCnoRQAzq21mG82sTSrXTSczO9jMUv78rJkda2ZLEt4vNLPeyay7\nG8e6z8yu3t3tRQTqpLsCIrvDzDYmvM0AvgG2Re8vdPeC8uzP3bcBjVK9bk3g7oemYj9mdj5wlrv3\nTdj3+anYt0hNpqCXasndtwdt1GI8391f3Nn6ZlbH3bdWRt1EyqJ/j1KZdOleYsnM/mRmD5vZBDP7\nEjjLzI4wszfNbJ2ZrTKzu8ysbrR+HTNzM8uK3v9ftHyKmX1pZm+YWdvyrhstP8nMPjCz9WZ2t5n9\n18yG76TeydTxQjNbbGZrzeyuhG1rm9lfzWyNmX0EnLiLz2ekmU0sUTbWzG6PXp9vZgui8/kwam3v\nbF/Lzaxv9DrDzB6M6jYPOKzEuteY2UfRfueZ2YCovDNwD9A7ui3yecJnOyph+19G577GzJ4ws5bJ\nfDbl+ZyL62NmL5rZF2b2PzP7fcJxro0+kw1mVmhmPyjtNomZvVb8d44+z+nRcb4ArjGzQ8xsWnSM\nz6PPbe+E7TOjc1wdLb/TzBpEdW6fsF5LMysys2Y7O1+p2RT0EmenAw8BewMPA1uBS4DmwJGEILxw\nF9ufCVwL7AssA24o77pmth8wCbgiOu7HQI9d7CeZOp5MCNDuhC8wx0blFwHHA12Bw4EhuzjOBOAU\nM2sY1bMO8DPC5wXwKfAToAlwAXC3mXXZxf6KjQYOBH4Y1fPnJZZ/EJ3X3sAY4CEz29/d3wVGAK+6\neyN3b15yx2Z2fLT/wUArYCVQ8hbNzj6bknb6OUdh+yLwFNAS+BHwcrTdFdHxTwSaAucDm3b1gST4\nMbAAaAH8BTDgT8ABQAfCZ3ZtVIc6wDPAYiCL8JlOcvdNhH9PZyXs90zgOXdfk2Q9pIZR0Eucvebu\nT7n7t+7+tbvPcPe33H2ru38E5AN9drH9o+5e6O5bCIHSbTfWPQWY7e5PRsv+Cny+s50kWccb3X29\nuy8hBFDxsYYAf3X35dF/+jft4jgfAe8BA6Oi44C17l4YLX/K3T/y4CVgKlBqh7sShgB/cve17r6U\n0EpPPO4kd18V/U0eApYAOUnsFyAXuM/dZ0eBdyXQx8xaJ6yzs89mB2V8zgOAZe5+p7t/4+4b3P3t\naNn5wNXuvig6h9nu/kWS9V/m7ve6+7bo3+MH7j7V3Te7+2eEfxvFdTiC8CXkD+7+VbT+f6NlDwBn\nmplF788GHkyyDlIDKeglzj5JfGNm7czsmehS7AZC6/B7LccE/0t4XcSuO+DtbN0fJNbDwyxSy3e2\nkyTrmNSxgKW7qC+E1vuw6PWZfNeax8xOMbO3osvK6whXCnb1WRVruas6mNlwM5sTXX5eB7RLcr8Q\nzm/7/tx9A7CW0LovltTfrIzP+UDgw53UYVfLylLy3+MBZjbJzFZEdfhXiTosiTp+7iAK/K3AUWbW\nCWhDaP2LlEpBL3FW8tGyvxNasQe7exPgOsLl04q0Ctje4oxaYa12vvoe1XEVISCKlfX43yTgWDNr\nRWjZPxTVcS/gUeBGYH93bwo8n2Q9/rezOpjZD4F7CbcYmkX7fT9hv2U9CrgSyEzYX2NgH2BFEvUq\naVef8yfAQTvZbmfLvorqlJFQdkCJdUqe318IT4t0juowvEQdMs2s9k7qMZ5w+f5swiX9b3aynoiC\nXmqUxsB64KuoM9Ou7s+nytNAtpmdGt13vYRwj7Yi6jgJ+K2ZtYo6Zv1hVyu7+/+A1wgtyYXuviha\nVB+oB6wGtpnZKcAx5ajD1WbW1MI4AyMSljUihN1qwneeCwgt+mKfAq0TO8WVMAH4hZl1MbP6hC8i\nr7r7Tq+Q7MKuPufJQBszG2Fm9c2siZkV96u4D/iTmR1kQTcz25fwBed/hH4Btc0sj4QvJbuow1fA\nejM7ELg8YdkbwBrgzxY6OO5lZkcmLH+Q0FfgTELoi+yUgl5qkt8ROod9SWjRPVzRB3T3T4EzgNsJ\n/3EfBLxDaMmluo73Eu6lvwvMILTKy/IQcCwJl+3dfR1wKfA48AUhUJ5Osg7XE64sLAGmkBBC7j4X\nuBt4O1rnUOCthG1fABYBn5pZ4iX44u3/Q7jE/ni0fRvCffvdsdPP2d3XE/os/JTw5eMDvrt3fgvw\nBOFz3kC4t98guiVzAXA1oQ/GwSXOrTTXEzpmrid8uXgsoQ5bCf072hNa98sIf4fi5UsIf+dv3P31\ncp671DAW/n2KSGWILsWuBAa7+6vpro9UX2Y2HvjI3Ueluy5StWnAHJEKZmYnAm8CXwNXAVsIrVqR\n3RL1dxgIdE53XaTq06V7kYp3FPAR4d70CcDp6jwlu8vMbgTmAH9292Xpro9Ufbp0LyIiEmNq0YuI\niMSYgl5ERCTGYtEZr3nz5p6VlZXuaoiIiFSamTNnfu7uuxqXA4hJ0GdlZVFYWJjuaoiIiFQaMytr\nmGtAl+5FRERiTUEvIiISYwp6ERGRGIvFPfrSbNmyheXLl7Np06Z0V0V2oUGDBrRu3Zq6dXc2j4mI\niOyJ2Ab98uXLady4MVlZWYSZQaWqcXfWrFnD8uXLadu2bbqrIyISS7G9dL9p0yaaNWumkK/CzIxm\nzZrpqouISAWKbdADCvlqQH8jEZGKFeugT6c1a9bQrVs3unXrxgEHHECrVq22v9+8eXNS+zj33HNZ\nuHDhLtcZO3YsBQUFqaiyiIjEUGzv0ZdXQQGMHAnLlkGbNjBmDOTm7v7+mjVrxuzZswEYNWoUjRo1\n4vLLL99hHXfH3alVq/TvW/fff3+Zx/n1r3+9+5UUEZHYU4ueEPJ5ebB0KbiH33l5oTzVFi9eTIcO\nHcjNzaVjx46sWrWKvLw8cnJy6NixI6NHj96+7lFHHcXs2bPZunUrTZs25corr6Rr164cccQRfPbZ\nZwBcc8013HHHHdvXv/LKK+nRoweHHnoor7/+OgBfffUVP/3pT+nQoQODBw8mJydn+5eQRNdffz2H\nH344nTp14pe//CXFMxt+8MEH9O/fn65du5Kdnc2SJUsA+POf/0znzp3p2rUrI0eOTP2HJSIie0xB\nT2jJFxXtWFZUFMorwvvvv8+ll17K/PnzadWqFTfddBOFhYXMmTOHF154gfnz539vm/Xr19OnTx/m\nzJnDEUccwbhx40rdt7vz9ttvc8stt2z/0nD33XdzwAEHMH/+fK699lreeeedUre95JJLmDFjBu++\n+y7r16/nP//5DwDDhg3j0ksvZc6cObz++uvst99+PPXUU0yZMoW3336bOXPm8Lvf/S5Fn46IiKSS\ngp5wub485XvqoIMOIicnZ/v7CRMmkJ2dTXZ2NgsWLCg16Pfaay9OOukkAA477LDtreqSBg0a9L11\nXnvtNYYOHQpA165d6dixY6nbTp06lR49etC1a1deeeUV5s2bx9q1a/n888859dRTgfDce0ZGBi++\n+CLnnXcee+21FwD77rtv+T8IERGpcLpHT7gnv7SUqQHatKmY4zVs2HD760WLFnHnnXfy9ttv07Rp\nU84666xSHzerV6/e9te1a9dm69atpe67fv36Za5TmqKiIkaMGMGsWbNo1aoV11xzjR57ExGJAbXo\nCR3vMjJ2LMvICOUVbcOGDTRu3JgmTZqwatUqnnvuuZQf48gjj2TSpEkAvPvuu6VeMfj666+pVasW\nzZs358svv+Sxxx4DYJ999qFFixY89dRTQBifoKioiOOOO45x48bx9ddfA/DFF1+kvN4iIrLn1KLn\nu971qex1n6zs7Gw6dOhAu3btyMzM5Mgjj0z5MS6++GLOOeccOnTosP1n77333mGdZs2a8fOf/5wO\nHTrQsmVLevbsuX1ZQUEBF154ISNHjqRevXo89thjnHLKKcyZM4ecnBzq1q3Lqaeeyg033JDyuouI\nyJ6x4p7V1VlOTo6XnI9+wYIFtG/fPk01qlq2bt3K1q1badCgAYsWLeL4449n0aJF1KlTNb7n6W8l\nInGV6ke3E5nZTHfPKWu9qvE/vVSojRs3cswxx7B161bcnb///e9VJuRFROKq+NHt4qe6ih/dhsq5\nYlxM/9vXAE2bNmXmzJnproaISI2yq0e3KzPo1RlPRESkAlT2o9s7o6AXERGpADt7RLuiHt3eGQW9\niIhIBUjno9uJFPQiIiIVIDcX8vMhMxPMwu/8/Mq9Pw8K+grTr1+/7w1+c8cdd3DRRRftcrtGjRoB\nsHLlSgYPHlzqOn379qXk44Ql3XHHHRQl9AI5+eSTWbduXTJVFxGRFMnNhSVL4Ntvw+/KDnlQ0FeY\nYcOGMXHixB3KJk6cyLBhw5La/gc/+AGPPvrobh+/ZNA/++yzNG3adLf3JyIi1ZOCvoIMHjyYZ555\nhs2bNwOwZMkSVq5cSe/evbc/156dnU3nzp158sknv7f9kiVL6NSpExCGpx06dCjt27fn9NNP3z7s\nLMBFF120fYrb66+/HoC77rqLlStX0q9fP/r16wdAVlYWn3/+OQC33347nTp1olOnTtunuF2yZAnt\n27fnggsuoGPHjhx//PE7HKfYU089Rc+ePenevTvHHnssn376KRCe1T/33HPp3LkzXbp02T6E7n/+\n8x+ys7Pp2rUrxxxzTEo+WxERSV6NeI7+t7+FUqZf3yPdukGUkaXad9996dGjB1OmTGHgwIFMnDiR\nIUOGYGY0aNCAxx9/nCZNmvD555/Tq1cvBgwYgJmVuq97772XjIwMFixYwNy5c8nOzt6+bMyYMey7\n775s27aNY445hrlz5/Kb3/yG22+/nWnTptG8efMd9jVz5kzuv/9+3nrrLdydnj170qdPH/bZZx8W\nLVrEhAkT+Mc//sGQIUN47LHHOOuss3bY/qijjuLNN9/EzLjvvvu4+eabue2227jhhhvYe++9effd\ndwFYu3Ytq1ev5oILLmD69Om0bdtW4+GLiKSBWvQVKPHyfeJle3fn6quvpkuXLhx77LGsWLFie8u4\nNNOnT98euF26dKFLly7bl02aNIns7Gy6d+/OvHnzSp2wJtFrr73G6aefTsOGDWnUqBGDBg3i1Vdf\nBaBt27Z069YN2PlUuMuXL+eEE06gc+fO3HLLLcybNw+AF198kV//+tfb19tnn3148803Ofroo2nb\nti2gqWxFRNKhRrTod9XyrkgDBw7k0ksvZdasWRQVFXHYYYcBYZKY1atXM3PmTOrWrUtWVtZuTQn7\n8ccfc+uttzJjxgz22Wcfhg8fvkdTyxZPcQthmtvSLt1ffPHFXHbZZQwYMICXX36ZUaNG7fbxRESk\n4qlFX4EaNWpEv379OO+883bohLd+/Xr2228/6taty7Rp01i6dOku93P00Ufz0EMPAfDee+8xd+5c\nIExx27BhQ/bee28+/fRTpkyZsn2bxo0b8+WXX35vX7179+aJJ56gqKiIr776iscff5zevXsnfU7r\n16+nVatWADzwwAPby4877jjGjh27/f3atWvp1asX06dP5+OPPwY0la2ISDoo6CvYsGHDmDNnzg5B\nn5ubS2FhIZ07d2b8+PG0a9dul/u46KKL2LhxI+3bt+e6667bfmWga9eudO/enXbt2nHmmWfuMMVt\nXl4eJ5544vbOeMWys7MZPnw4PXr0oGfPnpx//vl079496fMZNWoUP/vZzzjssMN2uP9/zTXXsHbt\nWjp16kTXrl2ZNm0aLVq0ID8/n0GDBtG1a1fOOOOMpI8jIiKpoWlqJe30txIRKb9kp6lVi15ERCTG\nFPQiIiIxpqAXERGJsVgHfRz6H8Sd/kYiNVNBAWRlQa1a4XdBQbprFF+xDfoGDRqwZs0aBUkV5u6s\nWbOGBg0apLsqIlKJCgogLw+WLgX38DsvT2FfUWLb637Lli0sX758jwaQkYrXoEEDWrduTd26ddNd\nFRGpJFlZIdxLyswMM7xJcpLtdR/bkfHq1q27fehVERGpOpYtK1+57JnYXroXEZGqqU2b8pXLnlHQ\ni4hIpRozBjIydizLyAjlknoKehERqVS5uZCfH+7Jm4Xf+fmhXFIvtvfoRUSk6srNVbBXFrXoRURE\nYiypoDezE81soZktNrMrS1meaWZTzWyumb1sZq0Tlt1sZvPMbIGZ3WVBhpk9Y2bvR8tuSlh/uJmt\nNrPZ0c/5qTlVERGRmqfMoDez2sBY4CSgAzDMzDqUWO1WYLy7dwFGAzdG2/4YOBLoAnQCDgf6FG/j\n7u2A7sCRZnZSwv4edvdu0c99u312IiIiNVwyLfoewGJ3/8jdNwMTgYEl1ukAvBS9npaw3IEGQD2g\nPlAX+NTdi9x9GkC0z1lAa0RERCSlkgn6VsAnCe+XR2WJ5gCDotenA43NrJm7v0EI/lXRz3PuviBx\nQzNrCpwKTE0o/ml0G+BRMzsw6bMRERGRHaSqM97lQB8ze4dwaX4FsM3MDgbaE1rrrYD+Zta7eCMz\nqwNMAO5y94+i4qeArOg2wAvAA6Ud0MzyzKzQzApXr16dotMQERGJl2SCfgWQ2KpuHZVt5+4r3X2Q\nu3cHRkZl6wit+zfdfaO7bwSmAEckbJoPLHL3OxL2tcbdv4ne3gccVlql3D3f3XPcPadFixZJnIaI\niEjNk0zQzwAOMbO2ZlYPGApMTlzBzJqbWfG+rgLGRa+XEVr6dcysLqG1vyDa5k/A3sBvS+yrZcLb\nAcXri4iISPmVGfTuvhUYATxHCN1J7j7PzEab2YBotb7AQjP7ANgfKB7I8FHgQ+Bdwn38Oe7+VPT4\n3UhCJ75ZJR6j+030yN0c4DfA8BScp4iISI0U22lqRURE4izZaWo1Mp6IiEiMKehFRERiTEEvIlKF\nFBRAVhbUqhV+FxSku0ZS3Wn2OhGRKqKgAPLyoKgovF+6NLwHzfQmu08tehGRKmLkyO9CvlhRUSgX\n2V0KehGRKmLZsvKViyRDQS8iUkW0aVO+cpFkKOhFRKqIMWMgI2PHsoyMUC6yuxT0IiJVRG4u5OdD\nZiaYhd/5+eqIJ3tGve5FRKqQ3FwFu6SWWvQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMK\nehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT\n0ItItVBQAFlZUKtW+F1QkO4aiVQPmo9eRKq8ggLIy4OiovB+6dLwHjR3u0hZ1KIXkSpv5MjvQr5Y\nUVEoF5FdU9CLSJW3bFn5ykXkOwp6Eany2rQpX7mIfEdBLyJV3pgxkJGxY1lGRigXkV1T0ItIlZeb\nC/n5kJkJZuF3fr464okkQ73uRaRayM1VsIvsDrXoRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5E\nRCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMRYUkFvZiea2UIz\nW2xmV5ayPNPMpprZXDN72cxaJyy72czmmdkCM7vLzCwqP8zM3o32mVi+r5m9YGaLot/7pOpkRURE\napoyg97MagNjgZOADsAwM+tQYrVbgfHu3gUYDdwYbftj4EigC9AJOBzoE21zL3ABcEj0c2JUfiUw\n1d0PAaZG70VERGQ3JNOi7wEsdveP3H0zMBEYWGKdDsBL0etpCcsdaADUA+oDdYFPzawl0MTd33R3\nB8YDp0XbDAQeiF4/kFAuIiIi5ZRM0LcCPkl4vzwqSzQHGBS9Ph1obGbN3P0NQvCvin6ec/cF0fbL\nd7LP/d19VfT6f8D+pVXKzPLMrNDMClevXp3EaYiIiNQ8qeqMdznQx8zeIVyaXwFsM7ODgfZAa0KQ\n9zez3snuNGrt+06W5bt7jrvntGjRYo9PQEREJI6SCfoVwIEJ71tHZdu5+0p3H+Tu3YGRUdk6Quv+\nTXff6O4bgSnAEdH2rXeyz+JL+0S/Pyv3WYmIiAiQXNDPAA4xs7ZmVg8YCkxOXMHMmptZ8b6uAsZF\nr5cRWvp1zKwuobW/ILo0v8HMekW97c8Bnoy2mQz8PHr984RyERERKacyg97dtwIjgOeABcAkd59n\nZqPNbEC0Wl9goZl9QLinPiYqfxT4EHiXcB9/jrs/FS37FXAfsDhaZ0pUfhNwnJktAo6N3ouIiMhu\nsHAbvHrLycnxwsLCdFdDJC0KCmDkSFi2DNq0gTFjIDc33bUSkYpmZjPdPaes9epURmVEpGIUFEBe\nHhQVhfdLl4b3oLAXkUBD4IpUYyNHfhfyxYqKQrmICCjoRaq1ZcvKVy4iNY+CXqQaa9OmfOUiUvMo\n6EWqsTFjICNjx7KMjFAuIgIKepFqLTcX8vMhMxPMwu/8fHXEE5HvqNe9SDWXm6tgF5GdU4teREQk\nxhT0IiIiMaagFxERiTEFvYiISIwp6EVERGJMQS8iIhJjCnoREZEYU9CLiIjEmIJeREQkxhT0IiIi\nMaagFxERiTEFvYiISIwp6CW2CgogKwtq1Qq/CwrSXSMRkcqn2esklgoKIC8PiorC+6VLw3vQTG8i\nUrOoRS+xNHLkdyFfrKgolIuI1CQKeomlZcvKVy4iElcKeomlNm3KVy4iElcKeomlMWMgI2PHsoyM\nUC4iUpMo6CWWcnMhPx8yM8Es/M7PV0c8Eal51OteYis3V8EuIqIWvYiISIwp6EVERGJMQS8iIhJj\nCnoREZEYU9CLiIjEmIJeREQkxhT0IiIiMaagFxERiTEFvYiISIwp6EVERGJMQS8iIhJjCnoREZEY\nU9CLiIjEmIJeREQkxhT0IiIiMaagr4EKCiArC2rVCr8LCtJdIxERqShJBb2ZnWhmC81ssZldWcry\nTDObamZzzexlM2sdlfczs9kJP5vM7LRo2asJ5SvN7ImovK+ZrU9Ydl0qT7imKyiAvDxYuhTcw++8\nPIW9iEhcmbvvegWz2sAHwHHAcmAGMMzd5yes8wjwtLs/YGb9gXPd/ewS+9kXWAy0dveiEsseA550\n9/Fm1he43N1PSfYkcnJyvLCwMNnVa7SsrBDuJWVmwpIllV0bERHZXWY2091zylovmRZ9D2Cxu3/k\n7puBicDAEut0AF6KXk8rZTnAYGBKKSHfBOgPPJFEXWQPLVtWvnIREanekgn6VsAnCe+XR2WJ5gCD\notenA43NrFmJdYYCE0rZ/2nAVHffkFB2hJnNMbMpZtYxiTpKktq0KV+5iIhUb6nqjHc50MfM3gH6\nACuAbcULzawl0Bl4rpRth7HjF4BZQKa7dwXuZictfTPLM7NCMytcvXp1as6iBhgzBjIydizLyAjl\nIiISP8kE/QrgwIT3raOy7dx9pbsPcvfuwMiobF3CKkOAx919S+J2ZtaccGvgmYR9bXD3jdHrZ4G6\n0Xo7cPd8d89x95wWLVokcRoCkJsL+fnhnrxZ+J2fH8pFRCR+6iSxzgzgEDNrSwj4ocCZiStEQfyF\nu38LXAWMK7GPYVF5SYMJnfg2JezrAOBTd3cz60H4MrImyfORJOTmKthFRGqKMlv07r4VGEG47L4A\nmOTu88xstJkNiFbrCyw0sw+A/YHtF4LNLItwReCVUnZf2n37wcB7ZjYHuAsY6mU9GiAiIiKlKvPx\nuupAj9eJiEhNk8rH60RERKSaUtCLiIjEmIJeREQkxhT0IiIiMaagFxERiTEFvYiISIwp6EVERGJM\nQS8iIhJjCnoREZEYU9CLiIjEmIJeREQkxhT0IiIiMaagFxERiTEFvYiISIwp6EVERGJMQS8iIhJj\nCnoREZEYU9CLiIjEmIJeREQkxhT0CQoKICsLatUKvwsK0l0jERGRPVMn3RWoKgoKIC8PiorC+6VL\nw3uA3Nz01UtERGRPqEUfGTnyu5AvVlQUykVERKorBX1k2bLylYuIiFQHCvpImzblKxcREakOFPSR\nMWMgI2PHsoyMUC4iIlJdKegjubmQnw+ZmWAWfufnqyOeiIhUb+p1nyA3V8EuIiLxoha9iIhIjCno\nRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxB\nLyIiEmMKehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJsaSC3sxONLOFZrbYzK4sZXmmmU01\ns7lm9rKZtY7K+5nZ7ISfTWZ2WrTsX2b2ccKyblG5mdld0bHmmll2Kk9YRESkJqlT1gpmVhsYCxwH\nLAdmmNlkd5+fsNqtwHh3f8DM+gM3Ame7+zSgOMD3BRYDzydsd4W7P1rikCcBh0Q/PYF7o98iIiJS\nTsm06HsAi939I3ffDEwEBpZYpwPwUvR6WinLAQYDU9y9qIzjDSR8aXB3fxNoamYtk6iniIiIlJBM\n0LcCPkl4vzwqSzQHGBS9Ph1obGbNSqwzFJhQomxMdHn+r2ZWvxzHExERkSSkqjPe5UAfM3sH6AOs\nALYVL4xa5J2B5xK2uQpoBxwO7Av8oTwHNLM8Mys0s8LVq1fvYfVFRETiKZmgXwEcmPC+dVS2nbuv\ndPdB7t4dGBmVrUtYZQjwuLtvSdhmVXR5/hvgfsItgqSOF22f7+457p7TokWLJE5DRESk5kkm6GcA\nh5hZWzOrR7gEPzlxBTNrbmbF+7oKGFdiH8Mocdm++L67mRlwGvBetGgycE7U+74XsN7dV5XjnERE\nRCRSZq97d99qZiMIl91rA+PcfZ6ZjQYK3X0y0Be40cwcmA78unh7M8sitNBfKbHrAjNrARgwG/hl\nVP4scDKhh34RcO7unpyIiEhNZ+6e7jrssZycHC8sLEx3NURERCqNmc1095yy1tPIeCIiIjGmoBcR\nEYkxBb2IiEiMKehFRERiTEHbRt+eAAAgAElEQVQvIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIi\nIjGmoBcREYkxBb2IiEiMKehFRERiTEEvIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIiIjGmoBcR\nEYkxBb2IiEiMKehFRERiTEEvIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIiIjGmoBcREYkxBb2I\niEiMKehFRERiTEEvIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIiIjGmoBcREYkxBb2IiEiMKehF\nRERiTEEvIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIiIjGmoBcREYkxBb2IiEiMKehFRERiLKmg\nN7MTzWyhmS02sytLWZ5pZlPNbK6ZvWxmraPyfmY2O+Fnk5mdFi0riPb5npmNM7O6UXlfM1ufsM11\nqTxhERGRmqTMoDez2sBY4CSgAzDMzDqUWO1WYLy7dwFGAzcCuPs0d+/m7t2A/kAR8Hy0TQHQDugM\n7AWcn7C/V4u3c/fRu312IiIiNVwyLfoewGJ3/8jdNwMTgYEl1ukAvBS9nlbKcoDBwBR3LwJw92c9\nArwNtN6dExAREZGdSyboWwGfJLxfHpUlmgMMil6fDjQ2s2Yl1hkKTCi58+iS/dnAfxKKjzCzOWY2\nxcw6llYpM8szs0IzK1y9enUSpyEiIlLzpKoz3uVAHzN7B+gDrAC2FS80s5aES/TPlbLt34Dp7v5q\n9H4WkOnuXYG7gSdKO6C757t7jrvntGjRIkWnISIiEi/JBP0K4MCE962jsu3cfaW7D3L37sDIqGxd\nwipDgMfdfUvidmZ2PdACuCxhXxvcfWP0+lmgrpk1T/6UREREpFgyQT8DOMTM2ppZPcIl+MmJK5hZ\nczMr3tdVwLgS+xhGicv2ZnY+cAIwzN2/TSg/wMwset0jquOa5E9JREREipUZ9O6+FRhBuOy+AJjk\n7vPMbLSZDYhW6wssNLMPgP2BMcXbm1kW4YrAKyV2/f+idd8o8RjdYOA9M5sD3AUMjTrsiYiISDlZ\nHDI0JyfHCwsL010NERGRSmNmM909p6z1NDKeiIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMSY\ngl5ERCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5ERCTG\nFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5ERCTGFPQiIiIx\npqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJ\nMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJMQW9iIhI\njCnoRUREYkxBLyIiEmNJBb2ZnWhmC81ssZldWcryTDObamZzzexlM2sdlfczs9kJP5vM7LRoWVsz\neyva58NmVi8qrx+9Xxwtz0rd6YqIiNQsZQa9mdUGxgInAR2AYWbWocRqtwLj3b0LMBq4EcDdp7l7\nN3fvBvQHioDno23+AvzV3Q8G1gK/iMp/AayNyv8arSciIiK7IZkWfQ9gsbt/5O6bgYnAwBLrdABe\nil5PK2U5wGBgirsXmZkRgv/RaNkDwGnR64HRe6Llx0Tri4iISDklE/StgE8S3i+PyhLNAQZFr08H\nGptZsxLrDAUmRK+bAevcfWsp+9x+vGj5+mh9ERERKadUdca7HOhjZu8AfYAVwLbihWbWEugMPJei\n42FmeWZWaGaFq1evTtVuRUREYiWZoF8BHJjwvnVUtp27r3T3Qe7eHRgZla1LWGUI8Li7b4nerwGa\nmlmdUva5/XjR8r2j9Xfg7vnunuPuOS1atEjiNERERGqeZIJ+BnBI1Eu+HuES/OTEFcysuZkV7+sq\nYFyJfQzju8v2uLsT7uUPjop+DjwZvZ4cvSda/lK0voiIiJRTmUEf3ScfQbjsvgCY5O7zzGy0mQ2I\nVusLLDSzD4D9gTHF20ePxx0IvFJi138ALjOzxYR78P+Myv8JNIvKLwO+9zifiIiIJMfi0FjOycnx\nwsLCdFdDRESk0pjZTHfPKWs9jYwnIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIxsGULzJoFMehb\nKyIppqAXqeaKimDgQDjsMPjDHxT2IrIjBb1INbZ2LRx3HPznP3DssXDLLXDJJQp7EflOnbJXqVnW\nr4f334eMDGjYMPwu/qmjT0uqkFWr4IQTYOFCmDQJfvpTuOIKuO02+OYbuPdeqKWv8iI1nqKrhMLC\n0DIqTb163/8CkMrXe+2l/5glOR9+GFryn30Gzzzz3b/ZW26BBg1gzBjYtAn++U99QRWp6fRfQAld\nu4b/OIuKws9XXyX3evXq75dv3lz+4zdoUHFfJDIywv7NUv+5SeWZOze05Ddvhpdegh49vltmBn/6\nU/g7X3ttaNk/+CDUrZu++opIeinoS2jeHE4+OTX72rIFvv66fF8YdvZ6/fpwqbZk+bZtZdcjkdl3\n4b/ffnDXXdC/f2rOVyref/8LP/kJNGoEr74KHTqUvt4114Swv+KKEPYTJ0L9+pVbVxGpGhT0Fahu\n3fDTpEnF7N89fJnY3S8P06bB8cfD3XfDRRdVTB0ldZ59FgYPhgMPhOefh8zMXa9/+eUh7C++GE4/\nHR57LNweEpGaRUFfjZmFfgP16kHTpuXffsMGGDYMfvUreO89uOMOXeKtqgoKYPhw6NIFpkwJV2OS\nMWJECPu8PDj1VHjyyXAbR0RqDnX9qsGaNIHJk0PL729/g5NOgi++SHetpKR77oGzzoIjjwxXYZIN\n+WLnnw8PPBC2Pemk8AVPRGoOBX0NV7t26Kl9//0wfTr07BkeL5T0c4dRo8Kl94EDw7Pyu3sb6Oyz\nYcIEeOONcLtm3bqUVlVEqjAFvQDhsvC0aaHTX69e8Nxz6a5Rzfbtt/Cb38Af/xj+No8+Gi7B74kh\nQ8J+Zs2CY46BNWtSUlURqeIU9LLdkUfCjBmhk9fJJ8Odd2qEtXTYsiVcqr/nHvjd71L7LPzAgeE+\n/fz50K8ffPppavYrIlWXgl52kJkZHuEaMAB++9vQiWt3xgOQ3VM8bv2ECXDjjeG2SqoHUTrppDBW\nxIcfQt++sGJFavcvIlWLgl6+p1Gj8CjW1VfDffeFEdg+/zzdtYq/xHHr//53uPLKihvcqH//cHtm\nxQro0weWLq2Y44hI+inopVS1aoVhVAsK4K23wuhr772X7lrF16pVIXALC8O49Xl5FX/Mo46CF14I\n9+qPPjq08EUkfhT0sktnngmvvBJG+DviCHj66XTXKH4+/DCE7kcfhUvqgwdX3rF79oSpU8MASkcf\nHSbIEZF4UdBLmXr2DJ30fvSjcO/+llvUSS9V5s4NIb9uXRi3fmcTKlWk7Gx4+WXYujVcVdCVG5F4\nUdBLUlq3DmOrDx4Mv/89nHtuGENddt9//xta0bVrh882cXKaytapU7hyU7t26KD3zjvpq4uIpJaC\nXpKWkREmRxk1Koy0psezdt+zz4aOd/vvHwJ/Z5PTVKZ27cKgSQ0bhs56b7+d7hqJSCoo6KVcatWC\n668PHcZmzw6t0Dlz0l2r6uWhh8IjdO3bh5Z8WZPTVKaDDgph36xZuI3w2mvprpGI7CkFveyWn/0s\nhNS2bfDjH8Pjj6e7RtXDPfdAbu7uj1tfGTIzw2X8H/wgzHv/0kvprpGI7AkFvey2ww4LnfQ6dYJB\ng+DPf1YnvZ1xD8PZpmLc+srQqlUI+x/+EH7yk1BfEameFPSyR1q2DD22c3Nh5Mjw++uv012rqqV4\n3PpRo1I3bn1l2H//cNWhffvvhs4VkepHQS97bK+94MEHQ4t+woTwiNbKlemuVdVQkePWV4bmzcNz\n9t27hycuHnkk3TUSkfJS0EtKmMFVV4V79fPnh056M2emu1bpVRnj1leGffaB558PsxoOHQr/93/p\nrpGIlEc1/G9HqrLTTguPi9WuDb17h975NdHatWHe98oYt74yNGkSzqVvXzjnnDAHgohUDwp6Sbmu\nXUMnvexsOOOMcG/622/TXavKUzxu/YwZlTdufWVo2DAMgXziiXDBBTB2bLprJCLJUNBLhdhvv3Bv\nd/jw0Nv8jDPCeOpx99FH6Ru3vjLstVe4PTNwIIwYAbfdlu4aiUhZFPRSYerXh3Hj4NZbw7S3vXvD\n8uXprlXFmTs3PB+fznHrK0P9+qFT3pAhcPnlYZZDEam6FPRSocxCb/OnnoLFi+Hww8O0t3FTlcat\nrwx164YpjM8+G665Bq69VmMoiFRVCnqpFD/5CbzxRrj026dPvHpuV8Vx6ytDnTrwr3/B+efDn/4E\nV1yhsBepihT0Umk6dgwTpfTqFVqCV11V/TvpVeVx6ytDrVrhqYLi+/UXX1z9/6YicVONhu6QOGje\nPDyTPWIE3HQTLFgQBttp3DjdNSu/e+4JwdanD0yeXLWHtK1ItWrBXXeF0f5uvTVMX/z//l+4jSEi\n6aegl0pXr15oBXbqBJdeGjqwPfVU9WkNu8Po0eGxwYEDw9S91WFI24pkBjffHG7N3HBDCPtx46rX\nKIAicaVL95IWZmH89ylTYNmy0EmvOkyJWl3Hra8MZuEL0Jgx4SrNmWeGIYBFJL0U9JJWxx8feuE3\nbQr9+8P996e7Rju3ZUvoW1Bdx62vLFdfHe7XP/JImM74m2/SXSORmk1BL2l36KEh7Pv0gfPOCyG6\nbVu6a7Wj4nHrH3qoeo9bX1kuuyyMnPfkk2FYZM1oKJI++q9KqoR99gmPqY0YAbffDgMGwPr16a5V\nsG5dvMatryy/+lUYE/+55+CUU2rGyIgiVVFSQW9mJ5rZQjNbbGZXlrI808ymmtlcM3vZzFonLGtj\nZs+b2QIzm29mWVH5q2Y2O/pZaWZPROV9zWx9wrLrUnOqUtXVrQt33w333hvC4Ygj4MMP01unuI5b\nX1l+8QsYPx5efjmMkb9hQ7prJFLzlBn0ZlYbGAucBHQAhplZySFBbgXGu3sXYDRwY8Ky8cAt7t4e\n6AF8BuDuvd29m7t3A94A/p2wzavFy9x99G6em1RTv/xleATvf/8LI8y9/HJ66lE8bv2HH8Zz3PrK\nctZZ8PDD8OabYWChtWvTXSORmiWZFn0PYLG7f+Tum4GJwMAS63QAXopeTyteHn0hqOPuLwC4+0Z3\nL0rc0MyaAP2BJ3b7LCR2+vcPg+vsv38Ih/z8yj1+TRm3vrIMHgz//jfMnh3+tp9/nu4aidQcyQR9\nK+CThPfLo7JEc4BB0evTgcZm1gz4EbDOzP5tZu+Y2S3RFYJEpwFT3T3xot4RZjbHzKaYWcekz0Zi\n5eCDw7C5xx0HF14YHmvburXij/vf/4bL9TVl3PrKcuqpYWCh998P89p/+mm6ayRSM6SqM97lQB8z\newfoA6wAthEG5OkdLT8c+CEwvMS2w4AJCe9nAZnu3hW4m5209M0sz8wKzaxw9erVKToNqWr23jsM\npnPZZeH+/cknV+yl3+Jx6/fbr2aNW19ZTjghfMYffxy+TK1Yke4aicRfMkG/Ajgw4X3rqGw7d1/p\n7oPcvTswMipbR2j9z44u+28lhHZ28XZm1pxwa+CZhH1tcPeN0etngbrRejtw93x3z3H3nBYtWiR3\ntlIt1a4dnsv+5z/D/fpeveCDD1J/nJo+bn1l6dcv9MFYuTLM+Ld0abprJBJvyQT9DOAQM2trZvWA\nocDkxBXMrLmZFe/rKmBcwrZNzaw4ifsD8xM2HQw87e6bEvZ1gFl4eMnMekR1XFO+05I4Ou88mDoV\nvvgCevaEF15I3b7vuSd0GjvySJg2LbTopeIceSS8+GL4Wx59dJjCWEQqRplBH7XERwDPAQuASe4+\nz8xGm9mAaLW+wEIz+wDYHxgTbbuNcNl+qpm9Cxjwj4TdD2XHy/YQwv89M5sD3AUMddfklxL07h0e\ndWvdGk46KQT0nvzrcIc//jFMTjNgQHhWvqZOTlPZevQIX6q++ipcxn///XTXSCSeLA4ZmpOT44WF\nhemuhlSiL7+E3Nxw//7CC8P9+7p1y7ePb7+FSy4JXxaGD4d//END2qbDvHlwzDHhS9eLL0Lnzumu\nkUj1YGYz3T2nrPU0Mp5US40bw+OPh1Hq/v73MHLdmnLc4Ekct/6yyzRufTp17AjTp4cvan37wqxZ\n6a6RSLwo6KXaql07jDs/fjy8/nq4FDx/ftnblRy3/tZbNW59uv3oRyHsGzcOz9m/9Va6ayQSH/rv\nTaq9s8+GV14J93p79QqPb+2Mxq2vun74wxD2LVqEAYpefTXdNRKJBwW9xEKvXmEkvYMOCgOz3H77\n9zvpFY9b//bbGre+qmrTJnxpa906jI0/dWq6ayRS/SnoJTbatIHXXgvTov7ud2FCleK50DVuffXx\ngx+EsD/oIPjJT2DKlHTXSKR6U/cjiZWGDeGRR2DUKLjhBli0KDw+l5sLmzeHces1pG3Vt99+4dG7\n448P/SkmTQpf4OLCPXwJ3bAhTMec+Lv4dYMGcM45UL9+umsr1Z0er5PYevjh8Njcpk3QqlUYjU1D\n2lYv69aF8RJmzICCAjjjjHTXKDyxkRjIpYV0Msu2bCn7WF27hvPuqBk/pBTJPl6nFr3E1hlnhMu/\n99wTWvUa0rb6ado0fEE75RQ488zQCj7nnN3b17ZtsHFj8kG8s7Kvvy77WHXqhHkamjT57nerVuGL\nZmJZ8e/Syl5/Hc4/Hw47DG6+GUaM0NMhsnvUoheRKq/4kcipU8PjkJ06lb8FvXFj2ccxSz6ISysr\n/t2gQWqe5vj00xD2Tz8dbmPcf3/owyACybfoFfQiUi1s2gQ//enOH59s1GjPQrpJk7CPqva4pTvk\n58Oll8Jee4URHAcNKns7iT8FvYjEzpYtYfrgunV3DOnGjcMASnG2cGGYeKmwEM49F+68M5y31Fy6\nRy8isVM8TG5NdOih4b79DTfAmDHhEcQHH4Qf/zjdNZOqTl07RESqibp1YfToMGqge5jN8brrkuvB\nLzWXgl5EpJr58Y9h9mz4+c9DC//II+GDD9JdK6mqFPQiItVQkyYwblwYIOrDD6F799BpLwbdriTF\nFPQiItXY4MEwd25o1V94YXgM8bPP0l0rqUoU9CIi1VyrVmFGxjvuCAMMde4c5nQQAQW9iEgs1KoF\nl1wCM2dCy5ZhNMGLLgrTN0vNpqAXEYmRjh3hrbfgiivg73+H7Ozw7L3UXAp6EZGYqV8/jI8/dWoY\nPviII8Kz99u2pbtmkg4KehGRmOrXL3TUGzwYrrkG+vSBjz9Od61qnlWr4J//TN/xFfQiIjG2zz4w\nYUKY7vbdd8PUtw88oMfwKkPxWAeZmZCXB0uXpqceCnoRkRrgzDND6z47G4YPhyFDYM2adNcqfr79\nFp56KlxN6d4dHnsMfvnLMFdBuqbKVtCLiNQQmZnhvv1f/gJPPgldusALL6S7VvHw1Vcwdiy0awcD\nBoRBjG6+GZYvh7vugoMPTl/dFPQiIjVI7drw+9+Hnvl77x3mub/00jANsJTf8uXwhz9A69YwYgTs\nuy9MnBiC/ooroGnTdNdQQS8iUiN17x6eub/44jDQTk4OzJmT7lpVHzNmhNshbdvCrbfCsceG2QXf\nfBPOOCNMQFRVKOhFRGqovfYKl5WnTAn363v0gNtuC/eZ5fu2bQv33I86KnxWzzwDv/lNaL0/8kh4\njLEqUtCLiNRwJ54YeuSffDJcfnlonX7ySbprVXVs2BCuehx8cHhUceXK8P6TT8IXo6ysdNdw1xT0\nIiJC8+bw73+H571nzAgd9R5+ON21Sq8lS+Cyy8L990svDb8fewwWLQrDDTdpku4aJkdBLyIiAJjB\neeeF57/bt4ehQ+Hss2H9+nTXrPK4h3vtP/sZHHQQ3H03nHpq+PLz6qswaFDo0FidKOhFRGQHBx0E\n06fD6NFhsJ0uXeCVV9Jdq4q1ZUvoLd+rV5jy98UXQ6/5jz8Ogw3l5KS7hrtPQS8iIt9Tpw5cey38\n979Qr14YAObKK2Hz5nTXLLXWrYNbbglfboYNg7Vrw/Pwy5fDTTeFy/XVnYJeRER2qmdPeOcdOP/8\nMNBOr16wYEG6a7XnFi8Ojxa2bh3GFTj4YJg8Gd5/H371K2jYMN01TB0FvYiI7FKjRpCfH0bT++ST\nMIzu2LHVb7x893AL4rTT4Ec/CtP4Dh4cvsi89FK4F18rhqkYw1MSEZGKMGBAeAyvf/8wCtzJJ4eZ\n2aq6zZvhwQfhsMOgb1947TUYOTJMMvOvf0G3bumuYcVS0IuISNIOOACefhr+9rfQOu7cGR5/PN21\nKt2aNfDnP4fn3M85Jwzzm58frkrccAO0bJnuGlYOBb2IiJSLGVx0EcyaFSbKGTQo3MPfuDHdNQve\nfz/MGHfggaHl3rlzGP1v3jy44IIwImBNoqAXEZHd0q4dvPEGXH01jBsXLoG/+WZ66uIeZuI7+eQw\nBsC//gW5ufDee/Dcc2H0P7P01C3dFPQiIrLb6tWDMWPCZfytW8M48KNGhdeVYdOm8CWja9cwE9/M\nmfDHP8KyZfCPf0DHjpVTj6pMQS8iInusd+8w+11ubgjao44Kj7BVlM8+C8fJzIRf/CK01u+/PwT8\nddfBfvtV3LGrGwW9iIikxN57wwMPhDHyP/ggXMq/777UPob33nsh2Nu0CVcODj88jGI3ezYMHw71\n66fuWHGhoBcRkZQaMgTmzg2D61xwAZx+Oqxevfv7+/bb0JnuuONCx7oJE8KY/O+/H54AOOaYmnv/\nPRkKehERSbnWreH55+H220NIF/d8L4+iojCoTceOoZPd/PnhcblPPgmP9x16aMXUPW6SCnozO9HM\nFprZYjO7spTlmWY21czmmtnLZtY6YVkbM3vezBaY2Xwzy4rK/2VmH5vZ7OinW1RuZnZXdKy5Zpad\nmlMVEZHKVKtWmN61sDDcMz/55DDQTlHRrrdbuTI8FtemTXhMrmFD+L//CxPMXHUVNGtWOfWPizKD\n3sxqA2OBk4AOwDAz61BitVuB8e7eBRgN3JiwbDxwi7u3B3oAnyUsu8Ldu0U/s6Oyk4BDop884N7y\nn5aIiFQVnTvD22+Hud3Hjg0j1M2a9f313nknDGyTlQU33hg6+E2fHqaIzc0NPfyl/JJp0fcAFrv7\nR+6+GZgIDCyxTgfgpej1tOLl0ReCOu7+AoC7b3T3Mr7LMZDwpcHd/U2gqZnVkPGLRETiqUEDuO22\n0HHuyy/DZDk33RSmh33yyTA0bXZ2GGXvootg0aLwundv3X/fU8kEfSvgk4T3y6OyRHOAQdHr04HG\nZtYM+BGwzsz+bWbvmNkt0RWCYmOiy/N/NbPivpLJHE9ERKqhY44JHfUGDfruMvxpp4XL8rfeGu6/\n33lnmDZWUiNVnfEuB/qY2TtAH2AFsA2oA/SOlh8O/BAYHm1zFdAuKt8X+EN5DmhmeWZWaGaFq/ek\nO6eIiFSqffeFiRPDRDMnnBAex/vwQ/jd76Bp03TXLn6SCfoVwIEJ71tHZdu5+0p3H+Tu3YGRUdk6\nQmt8dnTZfyvwBJAdLV8VXZ7/BrifcIsgqeNF2+e7e46757Ro0SKJ0xARkarCDM46Cx55JDyOV6dO\numsUX8kE/QzgEDNra2b1gKHA5MQVzKy5mRXv6ypgXMK2Tc2sOIn7A/OjbVpGvw04DXgvWmcycE7U\n+74XsN7dq8FEiCIiIlVPmd+h3H2rmY0AngNqA+PcfZ6ZjQYK3X0y0Be40cwcmA78Otp2m5ldDkyN\nAn0m8I9o1wXRFwADZgO/jMqfBU4GFgNFwLkpOVMREZEayDyVYxOmSU5OjhcWFqa7GiIiIpXGzGa6\ne05Z62lkPBERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMK\nehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYiwW89Gb2WpgaQp32Rz4\nPIX7q6p0nvGi84wXnWe8VMR5Zrp7i7JWikXQp5qZFbp7TrrrUdF0nvGi84wXnWe8pPM8deleREQk\nxhT0IiIiMaagL11+uitQSXSe8aLzjBedZ7yk7Tx1j15ERCTG1KIXERGJMQV9AjMbZ2afmdl76a5L\nRTKzA81smpnNN7N5ZnZJuutUEcysgZm9bWZzovP8Y7rrVJHMrLaZvWNmT6e7LhXFzJaY2btmNtvM\nCtNdn4piZk3N7FEze9/MFpjZEemuU6qZ2aHR37H4Z4OZ/Tbd9aoIZnZp9H/Qe2Y2wcwaVOrxden+\nO2Z2NLARGO/undJdn4piZi2Blu4+y8waAzOB09x9fpqrllJmZkBDd99oZnWB14BL3P3NNFetQpjZ\nZUAO0MTdT0l3fSqCmS0Bctw91s9dm9kDwKvufp+Z1QMy3H1duutVUcysNrAC6OnuqRwTJe3MrBXh\n/54O7v61mU0CnnX3f1VWHdSiT+Du04Ev0l2Piubuq9x9VvT6S2AB0Cq9tUo9DzZGb+tGP7H8Zmtm\nrYGfAPeluy6yZ8xsb+Bo4J8A7r45ziEfOQb4MG4hn6AOsJeZ1QEygJWVeXAFfQ1nZllAd+Ct9Nak\nYkSXs2cDnwEvuHsszxO4A/g98G26K1LBHHjezGaaWV66K1NB2gKrgfujWzH3mVnDdFeqgg3l/7dz\nh65VRnEYx78PWHQGDSLKgm11LgxxJufEicwsaDKKYNR/wCRWk80xwW0mQQyuCILiQA3ahHlFnUnB\nJPIY3iMMs+974Nznky43PeHlPu85v3MurNQO0Qfbn4BbwBbwGfhu+8mQGVL0Y0zSXmANuGb7R+08\nfbD92/Y0MAnMSmpuJCPpHLBt+1XtLAM4YXsGWASulHFba3YBM8Ad20eBn8D1upH6U0YTS8CD2ln6\nIGk/cJ7uBe4wMCHp4s+obtsAAAFOSURBVJAZUvRjqsys14Bl2+u18/StbH1uAGdqZ+nBHLBU5tf3\ngZOS7tWN1I+yOsL2NvAQmK2bqBcjYLRj92mVrvhbtQhs2v5aO0hPTgEfbH+z/QtYB44PGSBFP4bK\nIbW7wDvbt2vn6YukA5L2lc+7gQXgfd1U/5/tG7YnbR+h2wJ9anvQFcMQJE2Uw6OUrezTQHM3ZGx/\nAT5KmipfzQNNHZT9xwUa3bYvtoBjkvaU3955unNRg0nR7yBpBXgOTEkaSbpcO1NP5oBLdCu/v1db\nztYO1YNDwIakN8BLuhl9s1fPxsBB4Jmk18AL4JHtx5Uz9eUqsFye3WngZuU8vSgvbAt0q9wmlZ2Z\nVWATeEvXu4P+S16u10VERDQsK/qIiIiGpegjIiIalqKPiIhoWIo+IiKiYSn6iIiIhqXoIyIiGpai\nj4iIaFiKPiIiomF/ABFx9NHo/BaYAAAAAElFTkSuQmCC\n", 937 | "text/plain": [ 938 | "
" 939 | ] 940 | }, 941 | "metadata": { 942 | "tags": [] 943 | }, 944 | "output_type": "display_data" 945 | } 946 | ], 947 | "source": [ 948 | "acc = history.history['crf_viterbi_accuracy']\n", 949 | "val_acc = history.history['val_crf_viterbi_accuracy']\n", 950 | "loss = history.history['loss']\n", 951 | "val_loss = history.history['val_loss']\n", 952 | "plt.figure(figsize = (8, 8))\n", 953 | "epochs = range(1, len(acc) + 1)\n", 954 | "plt.plot(epochs, acc, 'bo', label='Training acc')\n", 955 | "plt.plot(epochs, val_acc, 'b', label='Validation acc')\n", 956 | "plt.title('Training and validation accuracy')\n", 957 | "plt.legend()" 958 | ] 959 | }, 960 | { 961 | "cell_type": "code", 962 | "execution_count": 121, 963 | "metadata": { 964 | "colab": { 965 | "base_uri": "https://localhost:8080/", 966 | "height": 499 967 | }, 968 | "colab_type": "code", 969 | "executionInfo": { 970 | "elapsed": 1215, 971 | "status": "ok", 972 | "timestamp": 1560709916909, 973 | "user": { 974 | "displayName": "CHAVAN AKSHAY", 975 | "photoUrl": "", 976 | "userId": "10674464813829582221" 977 | }, 978 | "user_tz": -330 979 | }, 980 | "id": "sASoBK0_bYgo", 981 | "outputId": "bfe6b0a3-a22b-4a0a-db9c-793a71184516" 982 | }, 983 | "outputs": [ 984 | { 985 | "data": { 986 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAe0AAAHiCAYAAADF4pQuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3X28VWWd9/HPT0CeBQTMhBSKSXlQ\ngU74gISomVrqYOqgaNmDqHejZdnEqJVjtzPqOGV6O91So6YhxGiWPVozcqdOhQIqqPgsIoIIKAiC\nyYHr/mPtAwc8nHPgPGyufT7v12u/ztlrX2vt39qHF999XWuta0VKCUmStOvbrdwFSJKkxjG0JUnK\nhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjaUi0R0S4i1kbEvs3ZtpwiYlBENPu1nRFxTEQsrPX8mYgY\n05i2O/FeP4qIS3d2/Xq2+78j4rbm3q7UUtqXuwCpKSJiba2nXYC/AhtLz89LKU3dke2llDYC3Zq7\nbVuQUtq/ObYTEV8EzkopHVlr219sjm1LuTO0lbWU0ubQLPXkvphS+q/ttY+I9iml6taoTZKam8Pj\nqmil4c+fRsS0iFgDnBURh0XEXyJiVUQsjYgbIqJDqX37iEgRMaD0/Cel138bEWsi4s8RMXBH25Ze\nPz4ino2I1RFxY0T8T0Scs526G1PjeRHxfES8GRE31Fq3XUR8LyJWRsSLwHH1fD6XRcT0bZbdFBHf\nLf3+xYhYUNqfF0q94O1ta3FEHFn6vUtE3FGq7UngI9u0vTwiXixt98mIOKm0/EDg/wBjSoceVtT6\nbK+otf75pX1fGRE/j4j3N+azaUhEjC/Vsyoi7o+I/Wu9dmlELImItyLi6Vr7emhEzC0tXxYR/9rY\n95N2lKGttmA8cCfQA/gpUA18GegDjKYItfPqWf9M4JvAnsAi4Ds72jYi9gJmAF8vve9LwKh6ttOY\nGk+gCMMRFF9GjiktvwA4FjgY+Chwej3vMw34VER0LdXZHjiN4vMCWAZ8EtgDOBe4MSIOqmd7Na4E\nPgB8sFTnZ7d5/dnSfvUArgLujIj3pZTmA38PPJhS6pZS6rPthiPi2NL2TwX6AUuAbQ+DbO+z2a6I\nGAzcAVwI9AX+C7g3IjpExFCKz39kSmkP4HiKvy/AjcC/lpYPAu5q6L2knWVoqy14KKX0y5TSppTS\n+pTSIymlWSml6pTSi8AUYGw969+VUpqdUtpAEQ7Dd6Ltp4DHUkq/KL32PWDF9jbSyBr/JaW0OqW0\nEPh/td7rdOB7KaXFKaWVwNX1vM+LwBPAyaVFHwfeTCnNLr3+y5TSi6lwP/DfQJ0nm23jdOB/p5Te\nTCm9TNF7rv2+M1JKS0t/kzuBhUBVI7YLMBH4UUrpsZTSO8BkYGxE9K/VZnufTX0mAPemlO4v/Y2u\npvhScQjFl6hOwNDSIZaXSp8dwAbgbyKid0ppTUppViP3Q9phhrbagldqP4mIAyLi1xHxWkS8RdFr\ne0+PrpbXav2+jvpPPtte231q15GKO/Us3t5GGlljo94LeLmeeqHoVZ9R+v1MtvSyiYhPRcSsiHgj\nIlZR9ODr+6xqvL++GiLinIh4vDQMvQo4oJHbhWL/Nm8vpfQW8CZFr7vGjvzNtrfdTRR/o34ppWeA\nr1H8HV6P4nDL3qWmnwOGAM9ExMMRcUIj90PaYYa22oJtL3e6maJ3Oag0pPktIFq4hqXA5p5gRARb\nh8y2mlLjUoqh6RoNXZI2AzgmIvpR9LjvLNXYmWKo91+A96WUegK/b2Qdr22vhoj4IPADimH83qXt\nPl1ruw1dnrYE2K/W9roDvYBXG1HXjmx3N4q/2asAKaWfpJRGAwOBdhSfCymlZ1JKE4C9gH8D7o6I\nTk2sRaqToa22qDuwGni7dByzvuPZzeVXwMiIOLF03PjLFMdNW6LGGcBXIqJfRPQGvlFf45TSa8BD\nwG3AMyml50ovdQR2B5YDGyPiU8DRO1DDpRHRM4rr2P++1mvdKIJ5OcX3l3Mpeto1lgH9a068q8M0\n4AsRcVBEdKQIzwdTStsdudiBmk+KiCNL7/11YA0wKyIGR8S40vutLz02UezA2RHRp9QzX13at01N\nrEWqk6GttuhrFCdGraHo0f60pd8wpbQM+Dvgu8BK4EPAoxTXlTd3jT+gOPY8H3iExp0YdSdwDLWG\nxlNKq4CLgXuANyhO/PpVI2v4NkWPfyHwW+D2WtudR3Hy1sOlNvsDtY8D/wF4DlgWEbWHuWvW/x3F\nMPU9pfX3pTjO3SQppScpPvMfUHyhOA44qXR8uyNwLcV5CK9R9OwvK616ArAgiqsTrgP+LqX0blPr\nkeoSxaE1Sa0pItpRDMeemlJ6sNz1SMqDPW2plUTEcaXh4o4Ul4VtoOhtSlKjGNpS6zkCeJFi6PUT\nwPiU0vaGxyXpPRwelyQpE/a0JUnKhKEtSVImdrm7fPXp0ycNGDCg3GVIktRq5syZsyKlVN/cDcAu\nGNoDBgxg9uzZ5S5DkqRWExENTTcMODwuSVI2DG1JkjJhaEuSlIld7pi2JKnxNmzYwOLFi3nnnXfK\nXYoaoVOnTvTv358OHbZ3P5z6GdqSlLHFixfTvXt3BgwYQHHHV+2qUkqsXLmSxYsXM3DgwJ3ahsPj\nkpSxd955h969exvYGYgIevfu3aRREUNbkjJnYOejqX8rQ1uStNNWrlzJ8OHDGT58OHvvvTf9+vXb\n/Pzddxt3W/HPfe5zPPPMM/W2uemmm5g6dWpzlMwRRxzBY4891izbam0e05akNmTqVLjsMli0CPbd\nF666CiZO3Pnt9e7de3MAXnHFFXTr1o1LLrlkqzYpJVJK7LZb3f3EW2+9tcH3+dKXvrTzRVYQe9qS\n1EZMnQqTJsHLL0NKxc9Jk4rlze35559nyJAhTJw4kaFDh7J06VImTZpEVVUVQ4cO5corr9zctqbn\nW11dTc+ePZk8eTIHH3wwhx12GK+//joAl19+Oddff/3m9pMnT2bUqFHsv//+/OlPfwLg7bff5tOf\n/jRDhgzh1FNPpaqqqsEe9U9+8hMOPPBAhg0bxqWXXgpAdXU1Z5999ublN9xwAwDf+973GDJkCAcd\ndBBnnXVWs39mjWFPW5LaiMsug3Xrtl62bl2xvCm97e15+umnuf3226mqqgLg6quvZs8996S6uppx\n48Zx6qmnMmTIkK3WWb16NWPHjuXqq6/mq1/9KrfccguTJ09+z7ZTSjz88MPce++9XHnllfzud7/j\nxhtvZO+99+buu+/m8ccfZ+TIkfXWt3jxYi6//HJmz55Njx49OOaYY/jVr35F3759WbFiBfPnzwdg\n1apVAFx77bW8/PLL7L777puXtTZ72pLURixatGPLm+pDH/rQ5sAGmDZtGiNHjmTkyJEsWLCAp556\n6j3rdO7cmeOPPx6Aj3zkIyxcuLDObZ9yyinvafPQQw8xYcIEAA4++GCGDh1ab32zZs3iqKOOok+f\nPnTo0IEzzzyTBx54gEGDBvHMM89w0UUXcd9999GjRw8Ahg4dyllnncXUqVN3+jrrpjK0JamN2Hff\nHVveVF27dt38+3PPPcf3v/997r//fubNm8dxxx1X56VPu+++++bf27VrR3V1dZ3b7tixY4Ntdlbv\n3r2ZN28eY8aM4aabbuK8884D4L777uP888/nkUceYdSoUWzcuLFZ37cxDG1JaiOuugq6dNl6WZcu\nxfKW9tZbb9G9e3f22GMPli5dyn333dfs7zF69GhmzJgBwPz58+vsydd2yCGHMHPmTFauXEl1dTXT\np09n7NixLF++nJQSp512GldeeSVz585l48aNLF68mKOOOoprr72WFStWsG7bYw2twGPaktRG1By3\nbs6zxxtr5MiRDBkyhAMOOID99tuP0aNHN/t7XHjhhXzmM59hyJAhmx81Q9t16d+/P9/5znc48sgj\nSSlx4okn8slPfpK5c+fyhS98gZQSEcE111xDdXU1Z555JmvWrGHTpk1ccskldO/evdn3oSGRUmr1\nN61PVVVVaq77aTf3pQ2StKtZsGABgwcPLncZu4Tq6mqqq6vp1KkTzz33HMceeyzPPfcc7dvvWv3T\nuv5mETEnpVS1nVU227X2pBnVXNpQM3pRc2kDGNySVInWrl3L0UcfTXV1NSklbr755l0usJuqsvam\nlta+tEGSVF49e/Zkzpw55S6jRVXsiWitfWmDJEktrWJDu7UvbZAkqaVVbGiX89IGSZJaQsWG9sSJ\nMGUK7LcfRBQ/p0zxeLYkKV8VG9pQBPTChbBpU/HTwJak5jVu3Lj3TJRy/fXXc8EFF9S7Xrdu3QBY\nsmQJp556ap1tjjzySBq6BPj666/fapKTE044oVnmBb/iiiu47rrrmryd5lbRoS1JallnnHEG06dP\n32rZ9OnTOeOMMxq1/j777MNdd9210++/bWj/5je/oWfPnju9vV2doS1J2mmnnnoqv/71r3n33XcB\nWLhwIUuWLGHMmDGbr5seOXIkBx54IL/4xS/es/7ChQsZNmwYAOvXr2fChAkMHjyY8ePHs379+s3t\nLrjggs239fz2t78NwA033MCSJUsYN24c48aNA2DAgAGsWLECgO9+97sMGzaMYcOGbb6t58KFCxk8\neDDnnnsuQ4cO5dhjj93qfery2GOPceihh3LQQQcxfvx43nzzzc3vX3Orzpoblfzxj39k+PDhDB8+\nnBEjRrBmzZqd/mzrUrHXaUtSW/OVr0ADt4/eYcOHQynv6rTnnnsyatQofvvb33LyySczffp0Tj/9\ndCKCTp06cc8997DHHnuwYsUKDj30UE466SQios5t/eAHP6BLly4sWLCAefPmbXVrzauuuoo999yT\njRs3cvTRRzNv3jwuuugivvvd7zJz5kz69Omz1bbmzJnDrbfeyqxZs0gpccghhzB27Fh69erFc889\nx7Rp0/jhD3/I6aefzt13313v/bE/85nPcOONNzJ27Fi+9a1v8U//9E9cf/31XH311bz00kt07Nhx\n85D8ddddx0033cTo0aNZu3YtnTp12oFPu2H2tCVJTVJ7iLz20HhKiUsvvZSDDjqIY445hldffZVl\ny5ZtdzsPPPDA5vA86KCDOOiggza/NmPGDEaOHMmIESN48sknG7wZyEMPPcT48ePp2rUr3bp145RT\nTuHBBx8EYODAgQwfPhyo//afUNzfe9WqVYwdOxaAz372szzwwAOba5w4cSI/+clPNs+8Nnr0aL76\n1a9yww03sGrVqmafkc2etiRViPp6xC3p5JNP5uKLL2bu3LmsW7eOj3zkIwBMnTqV5cuXM2fOHDp0\n6MCAAQPqvB1nQ1566SWuu+46HnnkEXr16sU555yzU9upUXNbTyhu7dnQ8Pj2/PrXv+aBBx7gl7/8\nJVdddRXz589n8uTJfPKTn+Q3v/kNo0eP5r777uOAAw7Y6Vq3ZU9bktQk3bp1Y9y4cXz+85/f6gS0\n1atXs9dee9GhQwdmzpzJyy+/XO92Pvaxj3HnnXcC8MQTTzBv3jyguK1n165d6dGjB8uWLeO3v/3t\n5nW6d+9e53HjMWPG8POf/5x169bx9ttvc8899zBmzJgd3rcePXrQq1evzb30O+64g7Fjx7Jp0yZe\neeUVxo0bxzXXXMPq1atZu3YtL7zwAgceeCDf+MY3+OhHP8rTTz+9w+9ZH3vakqQmO+OMMxg/fvxW\nZ5JPnDiRE088kQMPPJCqqqoGe5wXXHABn/vc5xg8eDCDBw/e3GM/+OCDGTFiBAcccAAf+MAHtrqt\n56RJkzjuuOPYZ599mDlz5ublI0eO5JxzzmHUqFEAfPGLX2TEiBH1DoVvz49//GPOP/981q1bxwc/\n+EFuvfVWNm7cyFlnncXq1atJKXHRRRfRs2dPvvnNbzJz5kx22203hg4dyvHHH7/D71efir41pyRV\nOm/NmZ+m3JrT4XFJkjJhaEuSlAlDuwJMnQoDBsBuuxU/p04td0WSpJbgiWiZmzoVJk2Cmln8Xn65\neA7OtS61FSml7U5Yol1LU88js6educsu2xLYNdatK5ZLqnydOnVi5cqVTQ4DtbyUEitXrmzSLGn2\ntDO3aNGOLZdUWfr378/ixYtZvnx5uUtRI3Tq1In+/fvv9PqGdub23bcYEq9ruaTK16FDBwYOHFju\nMtRKHB7P3FVXQZcuWy/r0qVYLkmqLIZ25iZOhClTYL/9IKL4OWWKJ6FJUiVyeLwCTJxoSEtSW2BP\nW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxtZcO7mUlq67xOW1nwbmaSZE9bmfBuZpJk\naCsT3s1MkgxtZWJ7dy3zbmaS2hJDW1nwbmaSZGgrE97NTJI8e1wZ8W5mkto6e9qSJGXC0JYkKROG\ntiRJmWhUaEfExRHxZEQ8ERHTIqLTNq/vGxEzI+LRiJgXESeUln88IuZExPzSz6NaYickSWoLGgzt\niOgHXARUpZSGAe2ACds0uxyYkVIaUXrt30vLVwAnppQOBD4L3NFchUuVyjnWJW1PY88ebw90jogN\nQBdgyTavJ2CP0u89al5PKT1aq82TpW10TCn9dedLliqXc6xLqk+DPe2U0qvAdcAiYCmwOqX0+22a\nXQGcFRGLgd8AF9axqU8Dcw1safucY11SfRozPN4LOBkYCOwDdI2Is7ZpdgZwW0qpP3ACcEdE7FZr\nG0OBa4DztvMekyJidkTMXr58+c7tiVQBnGNdUn0acyLaMcBLKaXlKaUNwM+Aw7dp8wVgBkBK6c9A\nJ6APQET0B+4BPpNSeqGuN0gpTUkpVaWUqvr27btzeyJVAOdYl1SfxoT2IuDQiOgSEQEcDSyoo83R\nABExmCK0l0dET+DXwOSU0v80X9lSZXKOdUn1acwx7VnAXcBcYH5pnSkRcWVEnFRq9jXg3Ih4HJgG\nnJNSSsDfA4OAb0XEY6XHXi2xI1IlcI51SfWJIlt3HVVVVWn27NnlLkOSpFYTEXNSSlUNtXNGNEmS\nMmFoS5KUCUNbkqRMGNqSJGXC0JZUFs6xLu24xs49LknNxjnWpZ1jT1tSq3OOdWnnGNqSWp1zrEs7\nx9CW1OqcY13aOYa2pFbnHOvSzjG0JbU651iXdo5nj0sqi4kTDWlpR9nTliQpE4a2JEmZMLQlScqE\noS1JUiYMbUlqQc6xrubk2eOS1EKcY13NzZ62JLUQ51hXczO0JamFOMe6mpuhLUktxDnW1dwMbUlq\nIc6xruZmaEtSC3GOdTU3zx6XpBbkHOtqTva0JUnKhKEtSVImDG1JUpM581vr8Ji2JKlJnPmt9djT\nliQ1iTO/tR5DW5LUJM781noMbUlSkzjzW+sxtCVJTeLMb63H0JYkNYkzv7Uezx6XJDWZM7+1Dnva\nkiRlwtCWJCkThrYkSY1U7pnfPKYtSVIj7Aozv9nTliSpEXaFmd8MbUmSGmFXmPnN0JYkqRF2hZnf\nDG1JkhphV5j5zdCWJKkRdoWZ3zx7XJKkRir3zG/2tCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uSpEwY\n2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROGtiRJ\nmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxt\nSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRM\nGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImGhXaEXFxRDwZEU9ExLSI6LTN6/tGxMyIeDQi5kXE\nCbVe+8eIeD4inomITzT3DkiS1FY0GNoR0Q+4CKhKKQ0D2gETtml2OTAjpTSi9Nq/l9YdUno+FDgO\n+PeIaNd85UuS1HY0dni8PdA5ItoDXYAl27yegD1Kv/eo9frJwPSU0l9TSi8BzwOjmlayJEltU4Oh\nnVJ6FbgOWAQsBVanlH6/TbMrgLMiYjHwG+DC0vJ+wCu12i0uLdtKREyKiNkRMXv58uU7vBOSJLUF\njRke70XRYx4I7AN0jYiztml2BnBbSqk/cAJwR0Q0+iS3lNKUlFJVSqmqb9++ja9ekqQ2pDHBegzw\nUkppeUppA/Az4PBt2nwBmAGQUvoz0AnoA7wKfKBWu/6lZZIkaQc1JrQXAYdGRJeICOBoYEEdbY4G\niIjBFKG9HLgXmBARHSNiIPA3wMPNVbwkSW1J+4YapJRmRcRdwFygGngUmBIRVwKzU0r3Al8DfhgR\nF1OclHZOSikBT0bEDOCp0rpfSiltbKF9kSSpokWRrbuOqqqqNHv27HKXIUlSq4mIOSmlqobaOSOa\nJEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVIm\nDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uS\npEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROG\ntiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlS\nJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNb\nkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkT\nhrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1J\nUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlIlG\nhXZEXBwRT0bEExExLSI6bfP69yLisdLj2YhYVeu1a0vrLoiIGyIimnsnJElqCxoM7YjoB1wEVKWU\nhgHtgAm126SULk4pDU8pDQduBH5WWvdwYDRwEDAM+Cgwtln3QJKkNqKxw+Ptgc4R0R7oAiypp+0Z\nwLTS7wnoBOwOdAQ6AMt2rlRJktq2BkM7pfQqcB2wCFgKrE4p/b6uthGxHzAQuL+07p+BmaX1lgL3\npZQWNE/pkiS1LY0ZHu8FnEwRxvsAXSPirO00nwDclVLaWFp3EDAY6A/0A46KiDF1vMekiJgdEbOX\nL1++c3siSVKFa8zw+DHASyml5SmlDRTHqw/fTtsJbBkaBxgP/CWltDaltBb4LXDYtiullKaklKpS\nSlV9+/bdsT2QJKmNaExoLwIOjYgupTO/jwbeM8QdEQcAvYA/b7Pu2IhoHxEdKE5Cc3hckqSd0Jhj\n2rOAu4C5wPzSOlMi4sqIOKlW0wnA9JRSqrXsLuCF0nqPA4+nlH7ZXMVLktSWxNYZW35VVVVp9uzZ\n5S5DkqRWExFzUkpVDbVzRjRJkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlS\nJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNb\nkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkT\nhrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1J\nUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlD\nW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQp\nE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEt\nSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJ\nQ1uSpEwY2pIkZcLQliQpE40K7Yi4OCKejIgnImJaRHTa5vXvRcRjpcezEbGq1mv7RsTvI2JBRDwV\nEQOadxckSWob2jfUICL6ARcBQ1JK6yNiBjABuK2mTUrp4lrtLwRG1NrE7cBVKaU/REQ3YFMz1S5J\nUpvS2OHx9kDniGgPdAGW1NP2DGAaQEQMAdqnlP4AkFJam1Ja14R6JUlqsxoM7ZTSq8B1wCJgKbA6\npfT7utpGxH7AQOD+0qIPA6si4mcR8WhE/GtEtGue0iVJalsaDO2I6AWcTBHG+wBdI+Ks7TSfANyV\nUtpYet4eGANcAnwU+CBwTh3vMSkiZkfE7OXLl+/wTkiS1BY0Znj8GOCllNLylNIG4GfA4dtpO4HS\n0HjJYuCxlNKLKaVq4OfAyG1XSilNSSlVpZSq+vbtu2N7IElSG9GY0F4EHBoRXSIigKOBBds2iogD\ngF7An2stfgToGRE1SXwU8FTTSpYkqW1qzDHtWcBdwFxgfmmdKRFxZUScVKvpBGB6SinVWncjxdD4\nf0fEfCCAHzZj/ZIktRlRK2N3CVVVVWn27NnlLkOSpFYTEXNSSlUNtXNGNEmSMmFoS5KUCUNbkqRM\nGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkT7ctd\ngCRJzS0l2LixeGzatOX37S3b0ecHHgh9+rT+fhnakqQmSQn++EeYMQPWrNmx8GuOAK3reUvfdfrn\nP4eTT27Z96iLoS1J2ikrV8KPfww33wzPPgvdukHfvtCu3ZbHbrvV/7xDh/pfb8w2GnreEusMG1ae\nz9zQliQ1Wkrw4INFUN91F7z7Lhx2GNx2G5x2GnTpUu4KK5uhLUlq0BtvwO23w5QpsGAB7LEHnHsu\nnHdecXxXrcPQliTVKSX405+KXvV//ie88w4ccgj8x3/A3/0ddO1a7grbHkNbkrSVVavgjjuKXvUT\nT0D37nDOOUWvevjwclfXthn7Nk7xAAAN4ElEQVTakiRSglmzil71T38K69dDVRX88IcwYUJxkpnK\nz9CWpDZs9WqYOrUI63nzinA+++yiVz1yZLmr07YMbUlqY1KC2bOLoJ42DdatgxEj4P/+XzjzzGI4\nXLsmQ1tS2aQEEeWuou1YswbuvLMI60cfLS7POuOMolddVeXfIgfOPS6p1a1ZA9/8ZnHZ0Ic/DF/4\nAtx6Kzz/fMvPZNUWzZkDkybB+98P559fzBp2002wZAn86Efw0Y8a2Lmwpy2p1WzcCLfcUgT2smUw\nfjxUV8M99xTLAfbeG444AsaMKX4efHAxA5V2zNq1xdD3lCnFUHjnzsVlWuedV1y2ZUjnydCW1Cru\nuw8uuaS4hGj0aPjFL4rwgGKu6AUL4KGHitm2HnywmG0LiuOrhx++JchHjSoCSHV77LFi+Hvq1GJE\nY+hQuOGG4uSynj3LXZ2aKtIuNhZVVVWVZs+eXe4yJDWT+fPh618vQvtDH4JrroFTTmm4p7doURHi\nNUH+xBPF8g4diuOvNT3x0aNhzz1bfj92ZW+/XVymdfPN8PDD0KkTnH56MSR++OH2qnMQEXNSSlUN\ntjO0JbWE114rhsFvuQV69Ch+/1//Czp23LntvfFGMTvXgw8WQf7II7BhQ/Ha0KFFiNcE+b77Nt9+\n7Mrmzy+C+o474K23YPDgYvj77LP9IpMbQ1tSWaxbB//2b0WP+t134UtfKgK7uUNk/foiuGtC/H/+\npxgOhiK0a4bTx4wpwmy3Cjntdv364haYN98Mf/5z8SXo1FOLsD7iCHvVuTK0JbWqTZuKHt9ll8Gr\nr8KnPw1XXw2DBrXO+2/cWEwOUvu4+GuvFa/tuWcxjF4T5B/5COy+e+vU1VyeeqoI6ttvL6YZ/fCH\ni6D+7Gehd+9yV6emMrQltZr774evfa04CWrUqKKnfcQR5a0pJXjxxS098QcfLO75DMUx30MO2TKc\nfthhxeVnu5p33ilOyLv55mIfOnQovgyddx6MHWuvupIY2pJa3NNPFyeZ/epXsN9+8C//UlxWtKsO\nRb/++tYntz36aNFD32234kYYtS8123vv8tX59NPFpVo//nFxLH/QoOKksnPOgb59y1eXWo6hLanF\nLF8OV1xR9AC7doVLL4Uvf7noweZk7Vr4y1+29Mb/8pfimDwUQVn7uPigQS3bs/3rX+Huu4vP9IEH\noH374jr2886DceN23S9Cah6GtqRm98478P3vw1VXFeF2/vnw7W9XTu9vw4ai911zTPyhh2DlyuK1\n972vCPGaID/44CJYm+rZZ4te9W23Fe/1wQ/CuefC5z5XvKfaBkNbUrPZtAmmT4d//Mfi+ukTT4Rr\nr4UDDih3ZS0rpWKouvbJbQsXFq9161YcC68ZTj/kkGIu78Z4991iFribb4aZM4vwP/nkold99NH2\nqtsiQ1tSs3joIfjqV4vLq0aMKE4yGzeu3FWVz+LFWx8Xnz+/CPcOHYqz0mt64qNHv/es7hdeKHrV\nt95aHGIYMGBLr/r97y/L7mgXYWhLapLnn4dvfAN+9jPo168YEj/7bHuB21q1autJXx5+uOhJAwwZ\nUgT4sGHFtK3/9V/FPOonnlj0qj/+cedVV8HQlrRT3ngDvvOd4i5Qu+8OkycXPe3GDv22de+8U9yg\no/akL6tXwwc+UPSqP//54kuQVFtjQ9sbhkgCirOXb7qpCOy33ipul3nlleW99ClHnTptOWENikvK\nXn65uCTOXrWaytCW2riUikuNvvGNYjKST3wCrruuGNJV07VrV5wRLjUHj05JbdisWcUx19NOK4a/\nf/e74mFgS7smQ1tqgxYuhAkT4NBDizOaf/jDYgrST3yi3JVJqo/D41IbsmoV/PM/FxOktGtX3H3r\nH/6huOZY0q7P0JbagA0biok8rriiODv8s58tTjjr37/clUnaEQ6PSxUsJbj3XjjwQLjwwmLqzTlz\nisk9DGwpP4a2VKHmzIGjjiqmx4yAX/6ymNxjxIhyVyZpZxnaUoV55RX4zGegqgqeeKK49nrePPjU\np7z/spQ7j2lLFWLNGrjmmmJu8JSKmcwmT4YePcpdmaTmYmhLmauuhltugW99C5YtgzPPLM4Q32+/\nclcmqbkZ2lKmUiomQvn61+HJJ4tpM++9F0aNKndlklqKoa0WlVJxx6O334a1a4ufNY/6nm/vtfXr\nYY894H3vq/ux117Fz969K3ue53nz4JJL4A9/gEGDimlIx4/3mLVU6QxtAcUQ644Ga2ODduPGxtcR\nAV27FpN9dO265dG9e3Hjis6dizsmLVtW3Md42bLiGuRt7bYb9O1bf7DXPPr2Le6FnIOlS4sJUW69\ntThWff31cMEFxd24JFU+QztzGzbASy8V9z5+882dD9qa+/82VufOWwK1dsD26rX1823Dt6HXOnXa\nsd5iSsUsX8uWweuvFz/rejz3XPFz/fq6t9O7d8PhXvPo2HHHPqvm8PbbxQlm115b/K2+8hW4/PLi\n85bUdhjaGUgJXn0Vnn32vY8XX9x+T7ZDh7qDca+9di5Qa37v0mXXGXqOKIKrVy844ID626ZUfEmp\nL9yXLSvuhbxsWXE2dl169Gg42GseXbs2bf82boTbby8CeskSOPVUuPpq+NCHmrZdSXkytHchb7xR\ndzA/9xysW7elXefO8Dd/U8xuddpp8OEPF8c1+/TZOmBzGfJtLRHFMHv37o0LvfXrG+7BP/kk3H9/\nMcpRl65dGw72mscee2w9yvDf/w1f+xo8/jgccgjMmAGjRzfPZyEpT4Z2K1u3rhjKriucV67c0q5d\nOxg4sAjkceOKnzWPfv2KY7ZqWZ07w4ABxaMh775bhHt9Af/CC/CnP8GKFUWvf1sdO24J+Hbtittm\nDhgA06fD6ad7kpkkQ7tFVFcXtz6sK5hfeWXrtv36FUH86U9vHcwDB3pyUU52372Yy7sx83lXVxdf\n0Oobon/zzeL49YUXFsf5JQkM7Z2WUnEmb13B/MILxX/MNXr2hP33hyOP3DqYBw3ylohtUfv2W4bE\nJWlHGNoNePPN4phyXeH89ttb2nXqVBxnHjYMTjll63Du3duhTUlS0xnaFCcc1T7OXDukly/f0q5d\nu+IY44c/DB/72NbB3L+/x5klSS2rzYR2dTW8/PL2jzPXPjFon32KIB4/3uPMkqRdR0WH9ttvFzdP\nqDnOXHvmrB49iuPM2/aYBw0qLgmSJGlXU9Gh3aULvPYaDBkCf/u3W4dznz4eZ5Yk5aWiQzuiuNZV\nkqRK4KlTkiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQp\nE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKRORUip3DVuJiOXAy828\n2T7Aimbe5q7I/aws7mdlcT8rS3Pv534ppb4NNdrlQrslRMTslFJVuetoae5nZXE/K4v7WVnKtZ8O\nj0uSlAlDW5KkTLSV0J5S7gJaiftZWdzPyuJ+Vpay7GebOKYtSVIlaCs9bUmSslfRoR0Rt0TE6xHx\nRLlraSkR8YGImBkRT0XEkxHx5XLX1BIiolNEPBwRj5f285/KXVNLioh2EfFoRPyq3LW0lIhYGBHz\nI+KxiJhd7npaSkT0jIi7IuLpiFgQEYeVu6bmFhH7l/6ONY+3IuIr5a6rJUTExaX/g56IiGkR0alV\n37+Sh8cj4mPAWuD2lNKwctfTEiLi/cD7U0pzI6I7MAf425TSU2UurVlFRABdU0prI6ID8BDw5ZTS\nX8pcWouIiK8CVcAeKaVPlbuelhARC4GqlFJFX9MbET8GHkwp/Sgidge6pJRWlbuulhIR7YBXgUNS\nSs0950ZZRUQ/iv97hqSU1kfEDOA3KaXbWquGiu5pp5QeAN4odx0tKaW0NKU0t/T7GmAB0K+8VTW/\nVFhbetqh9KjIb5wR0R/4JPCjcteipomIHsDHgP8ASCm9W8mBXXI08EKlBXYt7YHOEdEe6AIsac03\nr+jQbmsiYgAwAphV3kpaRmnI+DHgdeAPKaWK3E/geuAfgE3lLqSFJeD3ETEnIiaVu5gWMhBYDtxa\nOtzxo4joWu6iWtgEYFq5i2gJKaVXgeuARcBSYHVK6fetWYOhXSEiohtwN/CVlNJb5a6nJaSUNqaU\nhgP9gVERUXGHPCLiU8DrKaU55a6lFRyRUhoJHA98qXQ4q9K0B0YCP0gpjQDeBiaXt6SWUxr+Pwn4\nz3LX0hIiohdwMsWXsX2ArhFxVmvWYGhXgNIx3ruBqSmln5W7npZWGl6cCRxX7lpawGjgpNLx3unA\nURHxk/KW1DJKvRZSSq8D9wCjyltRi1gMLK41KnQXRYhXquOBuSmlZeUupIUcA7yUUlqeUtoA/Aw4\nvDULMLQzVzpB6z+ABSml75a7npYSEX0jomfp987Ax4Gny1tV80sp/WNKqX9KaQDFMOP9KaVW/Sbf\nGiKia+nESUrDxccCFXeVR0rpNeCViNi/tOhooKJOEt3GGVTo0HjJIuDQiOhS+r/3aIrziFpNRYd2\nREwD/gzsHxGLI+IL5a6pBYwGzqbokdVcbnFCuYtqAe8HZkbEPOARimPaFXs5VBvwPuChiHgceBj4\ndUrpd2WuqaVcCEwt/dsdDvxzmetpEaUvXx+n6H1WpNKIyV3AXGA+RYa26sxoFX3JlyRJlaSie9qS\nJFUSQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMvH/AZq1GDIPjALkAAAAAElF\nTkSuQmCC\n", 987 | "text/plain": [ 988 | "
" 989 | ] 990 | }, 991 | "metadata": { 992 | "tags": [] 993 | }, 994 | "output_type": "display_data" 995 | } 996 | ], 997 | "source": [ 998 | "plt.figure(figsize = (8, 8))\n", 999 | "plt.plot(epochs, loss, 'bo', label='Training loss')\n", 1000 | "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n", 1001 | "plt.title('Training and validation loss')\n", 1002 | "plt.legend()\n", 1003 | "plt.show()" 1004 | ] 1005 | }, 1006 | { 1007 | "cell_type": "markdown", 1008 | "metadata": {}, 1009 | "source": [ 1010 | "#### Evaluating the model on test set" 1011 | ] 1012 | }, 1013 | { 1014 | "cell_type": "code", 1015 | "execution_count": 59, 1016 | "metadata": { 1017 | "colab": {}, 1018 | "colab_type": "code", 1019 | "id": "tNtfnN3kdiDr" 1020 | }, 1021 | "outputs": [], 1022 | "source": [ 1023 | "# Evaluation\n", 1024 | "y_pred = model.predict(X_test)\n", 1025 | "y_pred = np.argmax(y_pred, axis=-1)\n", 1026 | "y_test_true = np.argmax(y_test, -1)" 1027 | ] 1028 | }, 1029 | { 1030 | "cell_type": "code", 1031 | "execution_count": 60, 1032 | "metadata": { 1033 | "colab": {}, 1034 | "colab_type": "code", 1035 | "id": "hlAMoN7Lfx4k" 1036 | }, 1037 | "outputs": [], 1038 | "source": [ 1039 | "# Convert the index to tag\n", 1040 | "y_pred = [[idx2tag[i] for i in row] for row in y_pred]\n", 1041 | "y_test_true = [[idx2tag[i] for i in row] for row in y_test_true] " 1042 | ] 1043 | }, 1044 | { 1045 | "cell_type": "code", 1046 | "execution_count": 61, 1047 | "metadata": { 1048 | "colab": { 1049 | "base_uri": "https://localhost:8080/", 1050 | "height": 34 1051 | }, 1052 | "colab_type": "code", 1053 | "executionInfo": { 1054 | "elapsed": 2597, 1055 | "status": "ok", 1056 | "timestamp": 1560709997195, 1057 | "user": { 1058 | "displayName": "CHAVAN AKSHAY", 1059 | "photoUrl": "", 1060 | "userId": "10674464813829582221" 1061 | }, 1062 | "user_tz": -330 1063 | }, 1064 | "id": "IXWG7vqDf7X4", 1065 | "outputId": "8fe95519-f489-4a35-aabb-322dda91d765" 1066 | }, 1067 | "outputs": [ 1068 | { 1069 | "name": "stdout", 1070 | "output_type": "stream", 1071 | "text": [ 1072 | "F1-score is : 90.4%\n" 1073 | ] 1074 | } 1075 | ], 1076 | "source": [ 1077 | "print(\"F1-score is : {:.1%}\".format(f1_score(y_test_true, y_pred)))" 1078 | ] 1079 | }, 1080 | { 1081 | "cell_type": "code", 1082 | "execution_count": 62, 1083 | "metadata": { 1084 | "colab": { 1085 | "base_uri": "https://localhost:8080/", 1086 | "height": 442 1087 | }, 1088 | "colab_type": "code", 1089 | "executionInfo": { 1090 | "elapsed": 5255, 1091 | "status": "ok", 1092 | "timestamp": 1560710007669, 1093 | "user": { 1094 | "displayName": "CHAVAN AKSHAY", 1095 | "photoUrl": "", 1096 | "userId": "10674464813829582221" 1097 | }, 1098 | "user_tz": -330 1099 | }, 1100 | "id": "8E2X4JzEgJjK", 1101 | "outputId": "e963aa51-efe0-4523-952a-d2978ca48290" 1102 | }, 1103 | "outputs": [ 1104 | { 1105 | "name": "stdout", 1106 | "output_type": "stream", 1107 | "text": [ 1108 | " precision recall f1-score support\n", 1109 | "\n", 1110 | " B-art 0.00 0.00 0.00 47\n", 1111 | " B-eve 0.56 0.19 0.29 47\n", 1112 | " B-geo 0.86 0.93 0.89 5632\n", 1113 | " B-gpe 0.97 0.94 0.96 2418\n", 1114 | " B-nat 0.00 0.00 0.00 30\n", 1115 | " B-org 0.84 0.75 0.79 3001\n", 1116 | " B-per 0.90 0.85 0.87 2562\n", 1117 | " B-tim 0.93 0.90 0.91 3031\n", 1118 | " I-art 0.00 0.00 0.00 27\n", 1119 | " I-eve 0.00 0.00 0.00 40\n", 1120 | " I-geo 0.80 0.86 0.83 1086\n", 1121 | " I-gpe 1.00 0.52 0.68 25\n", 1122 | " I-nat 0.00 0.00 0.00 6\n", 1123 | " I-org 0.80 0.85 0.82 2436\n", 1124 | " I-per 0.90 0.90 0.90 2626\n", 1125 | " I-tim 0.86 0.74 0.80 941\n", 1126 | " O 0.99 0.99 0.99 132279\n", 1127 | " PAD 1.00 1.00 1.00 383316\n", 1128 | "\n", 1129 | "avg / total 0.99 0.99 0.99 539550\n", 1130 | "\n" 1131 | ] 1132 | } 1133 | ], 1134 | "source": [ 1135 | "report = flat_classification_report(y_pred=y_pred, y_true=y_test_true)\n", 1136 | "print(report)" 1137 | ] 1138 | }, 1139 | { 1140 | "cell_type": "code", 1141 | "execution_count": 147, 1142 | "metadata": { 1143 | "colab": { 1144 | "base_uri": "https://localhost:8080/", 1145 | "height": 595 1146 | }, 1147 | "colab_type": "code", 1148 | "executionInfo": { 1149 | "elapsed": 914, 1150 | "status": "ok", 1151 | "timestamp": 1560710040586, 1152 | "user": { 1153 | "displayName": "CHAVAN AKSHAY", 1154 | "photoUrl": "", 1155 | "userId": "10674464813829582221" 1156 | }, 1157 | "user_tz": -330 1158 | }, 1159 | "id": "hdmpuybYxPJ3", 1160 | "outputId": "57b72e90-9032-4573-f7b5-928a517c6ee0" 1161 | }, 1162 | "outputs": [ 1163 | { 1164 | "name": "stdout", 1165 | "output_type": "stream", 1166 | "text": [ 1167 | "Sample number 3435 of 7194 (Test Set)\n", 1168 | "Word ||True ||Pred\n", 1169 | "==============================\n", 1170 | "It : O O\n", 1171 | "is : O O\n", 1172 | "the : O O\n", 1173 | "second : O O\n", 1174 | "major : O O\n", 1175 | "quarterly : O O\n", 1176 | "loss : O O\n", 1177 | "for : O O\n", 1178 | "Citigroup : B-org B-org\n", 1179 | ", : O O\n", 1180 | "and : O O\n", 1181 | "it : O O\n", 1182 | "is : O O\n", 1183 | "the : O O\n", 1184 | "latest : O O\n", 1185 | "in : O O\n", 1186 | "a : O O\n", 1187 | "wave : O O\n", 1188 | "of : O O\n", 1189 | "dismal : O O\n", 1190 | "bank : O O\n", 1191 | "earning : O O\n", 1192 | "reports : O O\n", 1193 | "over : O O\n", 1194 | "the : O O\n", 1195 | "past : B-tim B-tim\n", 1196 | "week : O O\n", 1197 | ". : O O\n" 1198 | ] 1199 | } 1200 | ], 1201 | "source": [ 1202 | "# At every execution model picks some random test sample from test set.\n", 1203 | "i = np.random.randint(0,X_test.shape[0]) # choose a random number between 0 and len(X_te)b\n", 1204 | "p = model.predict(np.array([X_test[i]]))\n", 1205 | "p = np.argmax(p, axis=-1)\n", 1206 | "true = np.argmax(y_test[i], -1)\n", 1207 | "\n", 1208 | "print(\"Sample number {} of {} (Test Set)\".format(i, X_test.shape[0]))\n", 1209 | "# Visualization\n", 1210 | "print(\"{:15}||{:5}||{}\".format(\"Word\", \"True\", \"Pred\"))\n", 1211 | "print(30 * \"=\")\n", 1212 | "for w, t, pred in zip(X_test[i], true, p[0]):\n", 1213 | " if w != 0:\n", 1214 | " print(\"{:15}: {:5} {}\".format(words[w-2], idx2tag[t], idx2tag[pred]))" 1215 | ] 1216 | }, 1217 | { 1218 | "cell_type": "markdown", 1219 | "metadata": {}, 1220 | "source": [ 1221 | "The results looks quite interesting." 1222 | ] 1223 | }, 1224 | { 1225 | "cell_type": "markdown", 1226 | "metadata": {}, 1227 | "source": [ 1228 | "#### Save the result" 1229 | ] 1230 | }, 1231 | { 1232 | "cell_type": "code", 1233 | "execution_count": 119, 1234 | "metadata": {}, 1235 | "outputs": [], 1236 | "source": [ 1237 | "with open('word_to_index.pickle', 'wb') as f:\n", 1238 | " pickle.dump(word_to_index, f)\n", 1239 | "\n", 1240 | "with open('tag_to_index.pickle', 'wb') as f:\n", 1241 | " pickle.dump(tag_to_index, f)" 1242 | ] 1243 | }, 1244 | { 1245 | "cell_type": "code", 1246 | "execution_count": null, 1247 | "metadata": {}, 1248 | "outputs": [], 1249 | "source": [] 1250 | } 1251 | ], 1252 | "metadata": { 1253 | "accelerator": "GPU", 1254 | "colab": { 1255 | "collapsed_sections": [], 1256 | "name": "Untitled1.ipynb", 1257 | "provenance": [], 1258 | "version": "0.3.2" 1259 | }, 1260 | "kernelspec": { 1261 | "display_name": "Python 3", 1262 | "language": "python", 1263 | "name": "python3" 1264 | }, 1265 | "language_info": { 1266 | "codemirror_mode": { 1267 | "name": "ipython", 1268 | "version": 3 1269 | }, 1270 | "file_extension": ".py", 1271 | "mimetype": "text/x-python", 1272 | "name": "python", 1273 | "nbconvert_exporter": "python", 1274 | "pygments_lexer": "ipython3", 1275 | "version": "3.6.5" 1276 | } 1277 | }, 1278 | "nbformat": 4, 1279 | "nbformat_minor": 1 1280 | } 1281 | -------------------------------------------------------------------------------- /NER using CRF.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Named Entity Recognition using CRF model\n", 8 | "In Natural Language Processing (NLP) an Entity Recognition is one of the common problem. The entity is referred to as the part of the text that is interested in. In NLP, NER is a method of extracting the relevant information from a large corpus and classifying those entities into predefined categories such as location, organization, name and so on. \n", 9 | "Information about lables: \n", 10 | "* geo = Geographical Entity\n", 11 | "* org = Organization\n", 12 | "* per = Person\n", 13 | "* gpe = Geopolitical Entity\n", 14 | "* tim = Time indicator\n", 15 | "* art = Artifact\n", 16 | "* eve = Event\n", 17 | "* nat = Natural Phenomenon\n", 18 | "\n", 19 | " 1. Total Words Count = 1354149 \n", 20 | " 2. Target Data Column: Tag" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "#### Importing Libraries" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "import pandas as pd\n", 37 | "\n", 38 | "from sklearn.model_selection import train_test_split\n", 39 | "from sklearn_crfsuite import CRF\n", 40 | "from sklearn_crfsuite.metrics import flat_f1_score\n", 41 | "from sklearn_crfsuite.metrics import flat_classification_report" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "#Reading the csv file\n", 51 | "df = pd.read_csv('ner_dataset.csv', encoding = \"ISO-8859-1\")" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/html": [ 62 | "
\n", 63 | "\n", 76 | "\n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | "
Sentence #WordPOSTag
0Sentence: 1ThousandsNNSO
1NaNofINO
2NaNdemonstratorsNNSO
3NaNhaveVBPO
4NaNmarchedVBNO
5NaNthroughINO
6NaNLondonNNPB-geo
7NaNtoTOO
8NaNprotestVBO
9NaNtheDTO
\n", 159 | "
" 160 | ], 161 | "text/plain": [ 162 | " Sentence # Word POS Tag\n", 163 | "0 Sentence: 1 Thousands NNS O\n", 164 | "1 NaN of IN O\n", 165 | "2 NaN demonstrators NNS O\n", 166 | "3 NaN have VBP O\n", 167 | "4 NaN marched VBN O\n", 168 | "5 NaN through IN O\n", 169 | "6 NaN London NNP B-geo\n", 170 | "7 NaN to TO O\n", 171 | "8 NaN protest VB O\n", 172 | "9 NaN the DT O" 173 | ] 174 | }, 175 | "execution_count": 3, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "#Display first 10 rows\n", 182 | "df.head(10)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 4, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/html": [ 193 | "
\n", 194 | "\n", 207 | "\n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | "
Sentence #WordPOSTag
count47959104857510485751048575
unique47959351784217
topSentence: 9309theNNO
freq152573145807887908
\n", 248 | "
" 249 | ], 250 | "text/plain": [ 251 | " Sentence # Word POS Tag\n", 252 | "count 47959 1048575 1048575 1048575\n", 253 | "unique 47959 35178 42 17\n", 254 | "top Sentence: 9309 the NN O\n", 255 | "freq 1 52573 145807 887908" 256 | ] 257 | }, 258 | "execution_count": 4, 259 | "metadata": {}, 260 | "output_type": "execute_result" 261 | } 262 | ], 263 | "source": [ 264 | "df.describe()" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "#### Observations : \n", 272 | "* There are total 47959 sentences in the dataset.\n", 273 | "* Number unique words in the dataset are 35178.\n", 274 | "* Total 17 lables (Tags)." 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 5, 280 | "metadata": {}, 281 | "outputs": [ 282 | { 283 | "data": { 284 | "text/plain": [ 285 | "array(['O', 'B-geo', 'B-gpe', 'B-per', 'I-geo', 'B-org', 'I-org', 'B-tim',\n", 286 | " 'B-art', 'I-art', 'I-per', 'I-gpe', 'I-tim', 'B-nat', 'B-eve',\n", 287 | " 'I-eve', 'I-nat'], dtype=object)" 288 | ] 289 | }, 290 | "execution_count": 5, 291 | "metadata": {}, 292 | "output_type": "execute_result" 293 | } 294 | ], 295 | "source": [ 296 | "#Displaying the unique Tags\n", 297 | "df['Tag'].unique()" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 6, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "text/plain": [ 308 | "Sentence # 1000616\n", 309 | "Word 0\n", 310 | "POS 0\n", 311 | "Tag 0\n", 312 | "dtype: int64" 313 | ] 314 | }, 315 | "execution_count": 6, 316 | "metadata": {}, 317 | "output_type": "execute_result" 318 | } 319 | ], 320 | "source": [ 321 | "#Checking null values, if any.\n", 322 | "df.isnull().sum()" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "There are lots of missing values in 'Sentence #' attribute. So we will use pandas fillna technique and use 'ffill' method which propagates last valid observation forward to next." 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 7, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "df = df.fillna(method = 'ffill')" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 8, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "# This is a class te get sentence. The each sentence will be list of tuples with its tag and pos.\n", 348 | "class sentence(object):\n", 349 | " def __init__(self, df):\n", 350 | " self.n_sent = 1\n", 351 | " self.df = df\n", 352 | " self.empty = False\n", 353 | " agg = lambda s : [(w, p, t) for w, p, t in zip(s['Word'].values.tolist(),\n", 354 | " s['POS'].values.tolist(),\n", 355 | " s['Tag'].values.tolist())]\n", 356 | " self.grouped = self.df.groupby(\"Sentence #\").apply(agg)\n", 357 | " self.sentences = [s for s in self.grouped]\n", 358 | " \n", 359 | " def get_text(self):\n", 360 | " try:\n", 361 | " s = self.grouped['Sentence: {}'.format(self.n_sent)]\n", 362 | " self.n_sent +=1\n", 363 | " return s\n", 364 | " except:\n", 365 | " return None" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 9, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/plain": [ 376 | "'Thousands of demonstrators have marched through London to protest the war in Iraq and demand the withdrawal of British troops from that country .'" 377 | ] 378 | }, 379 | "execution_count": 9, 380 | "metadata": {}, 381 | "output_type": "execute_result" 382 | } 383 | ], 384 | "source": [ 385 | "#Displaying one full sentence\n", 386 | "getter = sentence(df)\n", 387 | "sentences = [\" \".join([s[0] for s in sent]) for sent in getter.sentences]\n", 388 | "sentences[0]" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 10, 394 | "metadata": {}, 395 | "outputs": [ 396 | { 397 | "name": "stdout", 398 | "output_type": "stream", 399 | "text": [ 400 | "[('Thousands', 'NNS', 'O'), ('of', 'IN', 'O'), ('demonstrators', 'NNS', 'O'), ('have', 'VBP', 'O'), ('marched', 'VBN', 'O'), ('through', 'IN', 'O'), ('London', 'NNP', 'B-geo'), ('to', 'TO', 'O'), ('protest', 'VB', 'O'), ('the', 'DT', 'O'), ('war', 'NN', 'O'), ('in', 'IN', 'O'), ('Iraq', 'NNP', 'B-geo'), ('and', 'CC', 'O'), ('demand', 'VB', 'O'), ('the', 'DT', 'O'), ('withdrawal', 'NN', 'O'), ('of', 'IN', 'O'), ('British', 'JJ', 'B-gpe'), ('troops', 'NNS', 'O'), ('from', 'IN', 'O'), ('that', 'DT', 'O'), ('country', 'NN', 'O'), ('.', '.', 'O')]\n" 401 | ] 402 | } 403 | ], 404 | "source": [ 405 | "#sentence with its pos and tag.\n", 406 | "sent = getter.get_text()\n", 407 | "print(sent)" 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "Getting all the sentences in the dataset." 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 11, 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [ 423 | "sentences = getter.sentences" 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "#### Feature Preparation\n", 431 | "These are the default features used by the NER in nltk. We can also modify it for our customization." 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 12, 437 | "metadata": {}, 438 | "outputs": [], 439 | "source": [ 440 | "def word2features(sent, i):\n", 441 | " word = sent[i][0]\n", 442 | " postag = sent[i][1]\n", 443 | "\n", 444 | " features = {\n", 445 | " 'bias': 1.0,\n", 446 | " 'word.lower()': word.lower(),\n", 447 | " 'word[-3:]': word[-3:],\n", 448 | " 'word[-2:]': word[-2:],\n", 449 | " 'word.isupper()': word.isupper(),\n", 450 | " 'word.istitle()': word.istitle(),\n", 451 | " 'word.isdigit()': word.isdigit(),\n", 452 | " 'postag': postag,\n", 453 | " 'postag[:2]': postag[:2],\n", 454 | " }\n", 455 | " if i > 0:\n", 456 | " word1 = sent[i-1][0]\n", 457 | " postag1 = sent[i-1][1]\n", 458 | " features.update({\n", 459 | " '-1:word.lower()': word1.lower(),\n", 460 | " '-1:word.istitle()': word1.istitle(),\n", 461 | " '-1:word.isupper()': word1.isupper(),\n", 462 | " '-1:postag': postag1,\n", 463 | " '-1:postag[:2]': postag1[:2],\n", 464 | " })\n", 465 | " else:\n", 466 | " features['BOS'] = True\n", 467 | "\n", 468 | " if i < len(sent)-1:\n", 469 | " word1 = sent[i+1][0]\n", 470 | " postag1 = sent[i+1][1]\n", 471 | " features.update({\n", 472 | " '+1:word.lower()': word1.lower(),\n", 473 | " '+1:word.istitle()': word1.istitle(),\n", 474 | " '+1:word.isupper()': word1.isupper(),\n", 475 | " '+1:postag': postag1,\n", 476 | " '+1:postag[:2]': postag1[:2],\n", 477 | " })\n", 478 | " else:\n", 479 | " features['EOS'] = True\n", 480 | "\n", 481 | " return features\n", 482 | "\n", 483 | "\n", 484 | "def sent2features(sent):\n", 485 | " return [word2features(sent, i) for i in range(len(sent))]\n", 486 | "\n", 487 | "def sent2labels(sent):\n", 488 | " return [label for token, postag, label in sent]\n", 489 | "\n", 490 | "def sent2tokens(sent):\n", 491 | " return [token for token, postag, label in sent]" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 13, 497 | "metadata": {}, 498 | "outputs": [], 499 | "source": [ 500 | "X = [sent2features(s) for s in sentences]\n", 501 | "y = [sent2labels(s) for s in sentences]" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": 14, 507 | "metadata": {}, 508 | "outputs": [], 509 | "source": [ 510 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 15, 516 | "metadata": {}, 517 | "outputs": [ 518 | { 519 | "data": { 520 | "text/plain": [ 521 | "CRF(algorithm='lbfgs', all_possible_states=None,\n", 522 | " all_possible_transitions=False, averaging=None, c=None, c1=0.1, c2=0.1,\n", 523 | " calibration_candidates=None, calibration_eta=None,\n", 524 | " calibration_max_trials=None, calibration_rate=None,\n", 525 | " calibration_samples=None, delta=None, epsilon=None, error_sensitive=None,\n", 526 | " gamma=None, keep_tempfiles=None, linesearch=None, max_iterations=100,\n", 527 | " max_linesearch=None, min_freq=None, model_filename=None,\n", 528 | " num_memories=None, pa_type=None, period=None, trainer_cls=None,\n", 529 | " variance=None, verbose=False)" 530 | ] 531 | }, 532 | "execution_count": 15, 533 | "metadata": {}, 534 | "output_type": "execute_result" 535 | } 536 | ], 537 | "source": [ 538 | "crf = CRF(algorithm = 'lbfgs',\n", 539 | " c1 = 0.1,\n", 540 | " c2 = 0.1,\n", 541 | " max_iterations = 100,\n", 542 | " all_possible_transitions = False)\n", 543 | "crf.fit(X_train, y_train)" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": 16, 549 | "metadata": {}, 550 | "outputs": [], 551 | "source": [ 552 | "#Predicting on the test set.\n", 553 | "y_pred = crf.predict(X_test)" 554 | ] 555 | }, 556 | { 557 | "cell_type": "markdown", 558 | "metadata": {}, 559 | "source": [ 560 | "#### Evaluating the model performance.\n", 561 | "We will use precision, recall and f1-score metrics to evaluate the performance of the model since the accuracy is not a good metric for this dataset because we have an unequal number of data points in each class." 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": 19, 567 | "metadata": {}, 568 | "outputs": [ 569 | { 570 | "name": "stdout", 571 | "output_type": "stream", 572 | "text": [ 573 | "0.9719578426137272\n" 574 | ] 575 | } 576 | ], 577 | "source": [ 578 | "f1_score = flat_f1_score(y_test, y_pred, average = 'weighted')\n", 579 | "print(f1_score)" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 20, 585 | "metadata": {}, 586 | "outputs": [ 587 | { 588 | "name": "stdout", 589 | "output_type": "stream", 590 | "text": [ 591 | " precision recall f1-score support\n", 592 | "\n", 593 | " B-art 0.43 0.17 0.24 78\n", 594 | " B-eve 0.68 0.41 0.51 61\n", 595 | " B-geo 0.86 0.91 0.88 7481\n", 596 | " B-gpe 0.97 0.94 0.95 3185\n", 597 | " B-nat 0.85 0.36 0.51 47\n", 598 | " B-org 0.81 0.74 0.77 4187\n", 599 | " B-per 0.86 0.83 0.84 3421\n", 600 | " B-tim 0.93 0.88 0.90 4030\n", 601 | " I-art 0.25 0.08 0.12 64\n", 602 | " I-eve 0.52 0.30 0.38 44\n", 603 | " I-geo 0.81 0.80 0.80 1461\n", 604 | " I-gpe 0.81 0.46 0.59 37\n", 605 | " I-nat 0.50 0.17 0.25 12\n", 606 | " I-org 0.83 0.81 0.82 3441\n", 607 | " I-per 0.86 0.90 0.88 3488\n", 608 | " I-tim 0.84 0.74 0.79 1245\n", 609 | " O 0.99 0.99 0.99 177951\n", 610 | "\n", 611 | "avg / total 0.97 0.97 0.97 210233\n", 612 | "\n" 613 | ] 614 | } 615 | ], 616 | "source": [ 617 | "report = flat_classification_report(y_test, y_pred)\n", 618 | "print(report)" 619 | ] 620 | }, 621 | { 622 | "cell_type": "markdown", 623 | "metadata": {}, 624 | "source": [ 625 | "This looks quite nice." 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": null, 631 | "metadata": {}, 632 | "outputs": [], 633 | "source": [] 634 | } 635 | ], 636 | "metadata": { 637 | "kernelspec": { 638 | "display_name": "Python 3", 639 | "language": "python", 640 | "name": "python3" 641 | }, 642 | "language_info": { 643 | "codemirror_mode": { 644 | "name": "ipython", 645 | "version": 3 646 | }, 647 | "file_extension": ".py", 648 | "mimetype": "text/x-python", 649 | "name": "python", 650 | "nbconvert_exporter": "python", 651 | "pygments_lexer": "ipython3", 652 | "version": "3.6.5" 653 | } 654 | }, 655 | "nbformat": 4, 656 | "nbformat_minor": 2 657 | } 658 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Named Entity Recognition 2 | 3 | In Natural Language Processing (NLP) an Entity Recognition is one of the common problem. The entity is referred to as the part of the text that is interested in. In NLP, NER is a method of extracting the relevant information from a large corpus and classifying those entities into predefined categories such as location, organization, name and so on. 4 | Information about lables: 5 | * geo = Geographical Entity 6 | * org = Organization 7 | * per = Person 8 | * gpe = Geopolitical Entity 9 | * tim = Time indicator 10 | * art = Artifact 11 | * eve = Event 12 | * nat = Natural Phenomenon 13 | 14 | 1. Total Words Count = 1354149 15 | 2. Target Data Column: Tag 16 | -------------------------------------------------------------------------------- /model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Akshayc1/named-entity-recognition/8a64faf36529b42eec3244cc3bd774050abbc651/model.h5 -------------------------------------------------------------------------------- /tag_to_index.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Akshayc1/named-entity-recognition/8a64faf36529b42eec3244cc3bd774050abbc651/tag_to_index.pickle -------------------------------------------------------------------------------- /word_to_index.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Akshayc1/named-entity-recognition/8a64faf36529b42eec3244cc3bd774050abbc651/word_to_index.pickle --------------------------------------------------------------------------------