├── NER using Bidirectional LSTM - CRF .ipynb
├── NER using CRF.ipynb
├── README.md
├── model.h5
├── tag_to_index.pickle
└── word_to_index.pickle
/NER using Bidirectional LSTM - CRF .ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Named Entity Recognition\n",
8 | "In Natural Language Processing (NLP) an Entity Recognition is one of the common problem. The entity is referred to as the part of the text that is interested in. In NLP, NER is a method of extracting the relevant information from a large corpus and classifying those entities into predefined categories such as location, organization, name and so on. \n",
9 | "Information about lables: \n",
10 | "* geo = Geographical Entity\n",
11 | "* org = Organization\n",
12 | "* per = Person\n",
13 | "* gpe = Geopolitical Entity\n",
14 | "* tim = Time indicator\n",
15 | "* art = Artifact\n",
16 | "* eve = Event\n",
17 | "* nat = Natural Phenomenon\n",
18 | "\n",
19 | " 1. Total Words Count = 1354149 \n",
20 | " 2. Target Data Column: Tag"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "#### Importing Libraries"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 2,
33 | "metadata": {
34 | "colab": {},
35 | "colab_type": "code",
36 | "id": "wt4u0Lf1YJPH"
37 | },
38 | "outputs": [],
39 | "source": [
40 | "import pandas as pd\n",
41 | "import numpy as np\n",
42 | "\n",
43 | "from keras.preprocessing.sequence import pad_sequences\n",
44 | "from keras.utils import to_categorical\n",
45 | "from keras.layers import LSTM, Dense, TimeDistributed, Embedding, Bidirectional\n",
46 | "from keras.models import Model, Input\n",
47 | "from keras_contrib.layers import CRF\n",
48 | "from keras.callbacks import ModelCheckpoint\n",
49 | "\n",
50 | "import warnings\n",
51 | "warnings.filterwarnings(\"ignore\")\n",
52 | "\n",
53 | "from sklearn.model_selection import train_test_split\n",
54 | "import matplotlib.pyplot as plt\n",
55 | "%matplotlib inline\n",
56 | "\n",
57 | "from sklearn_crfsuite.metrics import flat_classification_report\n",
58 | "from sklearn.metrics import f1_score\n",
59 | "from seqeval.metrics import precision_score, recall_score, f1_score, classification_report\n",
60 | "from keras.preprocessing.text import text_to_word_sequence\n",
61 | "import pickle"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 12,
67 | "metadata": {
68 | "colab": {},
69 | "colab_type": "code",
70 | "id": "PmFVrk0JY-Mv"
71 | },
72 | "outputs": [],
73 | "source": [
74 | "#Reading the csv file\n",
75 | "df = pd.read_csv('ner_dataset.csv', encoding = \"ISO-8859-1\")"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 24,
81 | "metadata": {
82 | "colab": {
83 | "base_uri": "https://localhost:8080/",
84 | "height": 359
85 | },
86 | "colab_type": "code",
87 | "executionInfo": {
88 | "elapsed": 668,
89 | "status": "ok",
90 | "timestamp": 1560703170937,
91 | "user": {
92 | "displayName": "CHAVAN AKSHAY",
93 | "photoUrl": "",
94 | "userId": "10674464813829582221"
95 | },
96 | "user_tz": -330
97 | },
98 | "id": "LYlRwss8ZPZr",
99 | "outputId": "c3366c42-0a14-4925-c108-af64ba6d1921"
100 | },
101 | "outputs": [
102 | {
103 | "data": {
104 | "text/html": [
105 | "
\n",
106 | "\n",
119 | "
\n",
120 | " \n",
121 | " \n",
122 | " | \n",
123 | " Sentence # | \n",
124 | " Word | \n",
125 | " POS | \n",
126 | " Tag | \n",
127 | "
\n",
128 | " \n",
129 | " \n",
130 | " \n",
131 | " 0 | \n",
132 | " Sentence: 1 | \n",
133 | " Thousands | \n",
134 | " NNS | \n",
135 | " O | \n",
136 | "
\n",
137 | " \n",
138 | " 1 | \n",
139 | " NaN | \n",
140 | " of | \n",
141 | " IN | \n",
142 | " O | \n",
143 | "
\n",
144 | " \n",
145 | " 2 | \n",
146 | " NaN | \n",
147 | " demonstrators | \n",
148 | " NNS | \n",
149 | " O | \n",
150 | "
\n",
151 | " \n",
152 | " 3 | \n",
153 | " NaN | \n",
154 | " have | \n",
155 | " VBP | \n",
156 | " O | \n",
157 | "
\n",
158 | " \n",
159 | " 4 | \n",
160 | " NaN | \n",
161 | " marched | \n",
162 | " VBN | \n",
163 | " O | \n",
164 | "
\n",
165 | " \n",
166 | " 5 | \n",
167 | " NaN | \n",
168 | " through | \n",
169 | " IN | \n",
170 | " O | \n",
171 | "
\n",
172 | " \n",
173 | " 6 | \n",
174 | " NaN | \n",
175 | " London | \n",
176 | " NNP | \n",
177 | " B-geo | \n",
178 | "
\n",
179 | " \n",
180 | " 7 | \n",
181 | " NaN | \n",
182 | " to | \n",
183 | " TO | \n",
184 | " O | \n",
185 | "
\n",
186 | " \n",
187 | " 8 | \n",
188 | " NaN | \n",
189 | " protest | \n",
190 | " VB | \n",
191 | " O | \n",
192 | "
\n",
193 | " \n",
194 | " 9 | \n",
195 | " NaN | \n",
196 | " the | \n",
197 | " DT | \n",
198 | " O | \n",
199 | "
\n",
200 | " \n",
201 | "
\n",
202 | "
"
203 | ],
204 | "text/plain": [
205 | " Sentence # Word POS Tag\n",
206 | "0 Sentence: 1 Thousands NNS O\n",
207 | "1 NaN of IN O\n",
208 | "2 NaN demonstrators NNS O\n",
209 | "3 NaN have VBP O\n",
210 | "4 NaN marched VBN O\n",
211 | "5 NaN through IN O\n",
212 | "6 NaN London NNP B-geo\n",
213 | "7 NaN to TO O\n",
214 | "8 NaN protest VB O\n",
215 | "9 NaN the DT O"
216 | ]
217 | },
218 | "execution_count": 24,
219 | "metadata": {},
220 | "output_type": "execute_result"
221 | }
222 | ],
223 | "source": [
224 | "#Display first 10 rows\n",
225 | "df.head(10)"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 5,
231 | "metadata": {},
232 | "outputs": [
233 | {
234 | "data": {
235 | "text/html": [
236 | "\n",
237 | "\n",
250 | "
\n",
251 | " \n",
252 | " \n",
253 | " | \n",
254 | " Sentence # | \n",
255 | " Word | \n",
256 | " POS | \n",
257 | " Tag | \n",
258 | "
\n",
259 | " \n",
260 | " \n",
261 | " \n",
262 | " count | \n",
263 | " 47959 | \n",
264 | " 1048575 | \n",
265 | " 1048575 | \n",
266 | " 1048575 | \n",
267 | "
\n",
268 | " \n",
269 | " unique | \n",
270 | " 47959 | \n",
271 | " 35178 | \n",
272 | " 42 | \n",
273 | " 17 | \n",
274 | "
\n",
275 | " \n",
276 | " top | \n",
277 | " Sentence: 36965 | \n",
278 | " the | \n",
279 | " NN | \n",
280 | " O | \n",
281 | "
\n",
282 | " \n",
283 | " freq | \n",
284 | " 1 | \n",
285 | " 52573 | \n",
286 | " 145807 | \n",
287 | " 887908 | \n",
288 | "
\n",
289 | " \n",
290 | "
\n",
291 | "
"
292 | ],
293 | "text/plain": [
294 | " Sentence # Word POS Tag\n",
295 | "count 47959 1048575 1048575 1048575\n",
296 | "unique 47959 35178 42 17\n",
297 | "top Sentence: 36965 the NN O\n",
298 | "freq 1 52573 145807 887908"
299 | ]
300 | },
301 | "execution_count": 5,
302 | "metadata": {},
303 | "output_type": "execute_result"
304 | }
305 | ],
306 | "source": [
307 | "df.describe()"
308 | ]
309 | },
310 | {
311 | "cell_type": "markdown",
312 | "metadata": {},
313 | "source": [
314 | "#### Observations : \n",
315 | "* There are total 47959 sentences in the dataset.\n",
316 | "* Number unique words in the dataset are 35178.\n",
317 | "* Total 17 lables (Tags)."
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": 6,
323 | "metadata": {},
324 | "outputs": [
325 | {
326 | "data": {
327 | "text/plain": [
328 | "array(['O', 'B-geo', 'B-gpe', 'B-per', 'I-geo', 'B-org', 'I-org', 'B-tim',\n",
329 | " 'B-art', 'I-art', 'I-per', 'I-gpe', 'I-tim', 'B-nat', 'B-eve',\n",
330 | " 'I-eve', 'I-nat'], dtype=object)"
331 | ]
332 | },
333 | "execution_count": 6,
334 | "metadata": {},
335 | "output_type": "execute_result"
336 | }
337 | ],
338 | "source": [
339 | "#Displaying the unique Tags\n",
340 | "df['Tag'].unique()"
341 | ]
342 | },
343 | {
344 | "cell_type": "code",
345 | "execution_count": 7,
346 | "metadata": {},
347 | "outputs": [
348 | {
349 | "data": {
350 | "text/plain": [
351 | "Sentence # 1000616\n",
352 | "Word 0\n",
353 | "POS 0\n",
354 | "Tag 0\n",
355 | "dtype: int64"
356 | ]
357 | },
358 | "execution_count": 7,
359 | "metadata": {},
360 | "output_type": "execute_result"
361 | }
362 | ],
363 | "source": [
364 | "#Checking null values, if any.\n",
365 | "df.isnull().sum()"
366 | ]
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "metadata": {},
371 | "source": [
372 | "There are lots of missing values in 'Sentence #' attribute. So we will use pandas fillna technique and use 'ffill' method which propagates last valid observation forward to next."
373 | ]
374 | },
375 | {
376 | "cell_type": "code",
377 | "execution_count": 13,
378 | "metadata": {
379 | "colab": {},
380 | "colab_type": "code",
381 | "id": "9PTsjCdBZ9Xy"
382 | },
383 | "outputs": [],
384 | "source": [
385 | "df = df.fillna(method = 'ffill')"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": 14,
391 | "metadata": {
392 | "colab": {},
393 | "colab_type": "code",
394 | "id": "b9DYzRvMagm5"
395 | },
396 | "outputs": [],
397 | "source": [
398 | "# This is a class te get sentence. The each sentence will be list of tuples with its tag and pos.\n",
399 | "class sentence(object):\n",
400 | " def __init__(self, df):\n",
401 | " self.n_sent = 1\n",
402 | " self.df = df\n",
403 | " self.empty = False\n",
404 | " agg = lambda s : [(w, p, t) for w, p, t in zip(s['Word'].values.tolist(),\n",
405 | " s['POS'].values.tolist(),\n",
406 | " s['Tag'].values.tolist())]\n",
407 | " self.grouped = self.df.groupby(\"Sentence #\").apply(agg)\n",
408 | " self.sentences = [s for s in self.grouped]\n",
409 | " \n",
410 | " def get_text(self):\n",
411 | " try:\n",
412 | " s = self.grouped['Sentence: {}'.format(self.n_sent)]\n",
413 | " self.n_sent +=1\n",
414 | " return s\n",
415 | " except:\n",
416 | " return None"
417 | ]
418 | },
419 | {
420 | "cell_type": "code",
421 | "execution_count": 15,
422 | "metadata": {},
423 | "outputs": [
424 | {
425 | "data": {
426 | "text/plain": [
427 | "'Thousands of demonstrators have marched through London to protest the war in Iraq and demand the withdrawal of British troops from that country .'"
428 | ]
429 | },
430 | "execution_count": 15,
431 | "metadata": {},
432 | "output_type": "execute_result"
433 | }
434 | ],
435 | "source": [
436 | "#Displaying one full sentence\n",
437 | "getter = sentence(df)\n",
438 | "sentences = [\" \".join([s[0] for s in sent]) for sent in getter.sentences]\n",
439 | "sentences[0]"
440 | ]
441 | },
442 | {
443 | "cell_type": "code",
444 | "execution_count": 16,
445 | "metadata": {},
446 | "outputs": [
447 | {
448 | "name": "stdout",
449 | "output_type": "stream",
450 | "text": [
451 | "[('Thousands', 'NNS', 'O'), ('of', 'IN', 'O'), ('demonstrators', 'NNS', 'O'), ('have', 'VBP', 'O'), ('marched', 'VBN', 'O'), ('through', 'IN', 'O'), ('London', 'NNP', 'B-geo'), ('to', 'TO', 'O'), ('protest', 'VB', 'O'), ('the', 'DT', 'O'), ('war', 'NN', 'O'), ('in', 'IN', 'O'), ('Iraq', 'NNP', 'B-geo'), ('and', 'CC', 'O'), ('demand', 'VB', 'O'), ('the', 'DT', 'O'), ('withdrawal', 'NN', 'O'), ('of', 'IN', 'O'), ('British', 'JJ', 'B-gpe'), ('troops', 'NNS', 'O'), ('from', 'IN', 'O'), ('that', 'DT', 'O'), ('country', 'NN', 'O'), ('.', '.', 'O')]\n"
452 | ]
453 | }
454 | ],
455 | "source": [
456 | "#sentence with its pos and tag.\n",
457 | "sent = getter.get_text()\n",
458 | "print(sent)"
459 | ]
460 | },
461 | {
462 | "cell_type": "markdown",
463 | "metadata": {},
464 | "source": [
465 | "Getting all the sentences in the dataset."
466 | ]
467 | },
468 | {
469 | "cell_type": "code",
470 | "execution_count": 17,
471 | "metadata": {
472 | "colab": {},
473 | "colab_type": "code",
474 | "id": "3F0_tiOmaiVi"
475 | },
476 | "outputs": [],
477 | "source": [
478 | "sentences = getter.sentences"
479 | ]
480 | },
481 | {
482 | "cell_type": "markdown",
483 | "metadata": {},
484 | "source": [
485 | "#### Defining the parameters for LSTM network"
486 | ]
487 | },
488 | {
489 | "cell_type": "code",
490 | "execution_count": 18,
491 | "metadata": {
492 | "colab": {},
493 | "colab_type": "code",
494 | "id": "eRQJJSoyamU4"
495 | },
496 | "outputs": [],
497 | "source": [
498 | "# Number of data points passed in each iteration\n",
499 | "batch_size = 64 \n",
500 | "# Passes through entire dataset\n",
501 | "epochs = 8\n",
502 | "# Maximum length of review\n",
503 | "max_len = 75 \n",
504 | "# Dimension of embedding vector\n",
505 | "embedding = 40 "
506 | ]
507 | },
508 | {
509 | "cell_type": "markdown",
510 | "metadata": {},
511 | "source": [
512 | "#### Preprocessing Data\n",
513 | "We will process our text data before feeding to the network.\n",
514 | "* Here word_to_index dictionary used to convert word into index value and tag_to_index is for the labels. So overall we represent each word as integer."
515 | ]
516 | },
517 | {
518 | "cell_type": "code",
519 | "execution_count": 26,
520 | "metadata": {
521 | "colab": {},
522 | "colab_type": "code",
523 | "id": "32qpbWMVau_5"
524 | },
525 | "outputs": [],
526 | "source": [
527 | "#Getting unique words and labels from data\n",
528 | "words = list(df['Word'].unique())\n",
529 | "tags = list(df['Tag'].unique())\n",
530 | "# Dictionary word:index pair\n",
531 | "# word is key and its value is corresponding index\n",
532 | "word_to_index = {w : i + 2 for i, w in enumerate(words)}\n",
533 | "word_to_index[\"UNK\"] = 1\n",
534 | "word_to_index[\"PAD\"] = 0\n",
535 | "\n",
536 | "# Dictionary lable:index pair\n",
537 | "# label is key and value is index.\n",
538 | "tag_to_index = {t : i + 1 for i, t in enumerate(tags)}\n",
539 | "tag_to_index[\"PAD\"] = 0\n",
540 | "\n",
541 | "idx2word = {i: w for w, i in word_to_index.items()}\n",
542 | "idx2tag = {i: w for w, i in tag_to_index.items()}"
543 | ]
544 | },
545 | {
546 | "cell_type": "code",
547 | "execution_count": 17,
548 | "metadata": {},
549 | "outputs": [
550 | {
551 | "name": "stdout",
552 | "output_type": "stream",
553 | "text": [
554 | "The word India is identified by the index: 2570\n",
555 | "The label B-org for the organization is identified by the index: 6\n"
556 | ]
557 | }
558 | ],
559 | "source": [
560 | "print(\"The word India is identified by the index: {}\".format(word_to_index[\"India\"]))\n",
561 | "print(\"The label B-org for the organization is identified by the index: {}\".format(tag_to_index[\"B-org\"]))"
562 | ]
563 | },
564 | {
565 | "cell_type": "code",
566 | "execution_count": 31,
567 | "metadata": {
568 | "colab": {},
569 | "colab_type": "code",
570 | "id": "tcC_UuUbav7y"
571 | },
572 | "outputs": [],
573 | "source": [
574 | "# Converting each sentence into list of index from list of tokens\n",
575 | "X = [[word_to_index[w[0]] for w in s] for s in sentences]\n",
576 | "\n",
577 | "# Padding each sequence to have same length of each word\n",
578 | "X = pad_sequences(maxlen = max_len, sequences = X, padding = \"post\", value = word_to_index[\"PAD\"])"
579 | ]
580 | },
581 | {
582 | "cell_type": "code",
583 | "execution_count": 32,
584 | "metadata": {
585 | "colab": {},
586 | "colab_type": "code",
587 | "id": "N-C7iFNjaytc"
588 | },
589 | "outputs": [],
590 | "source": [
591 | "# Convert label to index\n",
592 | "y = [[tag_to_index[w[2]] for w in s] for s in sentences]\n",
593 | "\n",
594 | "# padding\n",
595 | "y = pad_sequences(maxlen = max_len, sequences = y, padding = \"post\", value = tag_to_index[\"PAD\"])"
596 | ]
597 | },
598 | {
599 | "cell_type": "code",
600 | "execution_count": 33,
601 | "metadata": {
602 | "colab": {},
603 | "colab_type": "code",
604 | "id": "SbnAi9kwa0gL"
605 | },
606 | "outputs": [],
607 | "source": [
608 | "num_tag = df['Tag'].nunique()\n",
609 | "# One hot encoded labels\n",
610 | "y = [to_categorical(i, num_classes = num_tag + 1) for i in y]"
611 | ]
612 | },
613 | {
614 | "cell_type": "code",
615 | "execution_count": 34,
616 | "metadata": {
617 | "colab": {},
618 | "colab_type": "code",
619 | "id": "bmj_9AzCa23d"
620 | },
621 | "outputs": [],
622 | "source": [
623 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15)"
624 | ]
625 | },
626 | {
627 | "cell_type": "code",
628 | "execution_count": 22,
629 | "metadata": {},
630 | "outputs": [
631 | {
632 | "name": "stdout",
633 | "output_type": "stream",
634 | "text": [
635 | "Size of training input data : (40765, 75)\n",
636 | "Size of training output data : (40765, 75, 18)\n",
637 | "Size of testing input data : (7194, 75)\n",
638 | "Size of testing output data : (7194, 75, 18)\n"
639 | ]
640 | }
641 | ],
642 | "source": [
643 | "print(\"Size of training input data : \", X_train.shape)\n",
644 | "print(\"Size of training output data : \", np.array(y_train).shape)\n",
645 | "print(\"Size of testing input data : \", X_test.shape)\n",
646 | "print(\"Size of testing output data : \", np.array(y_test).shape)"
647 | ]
648 | },
649 | {
650 | "cell_type": "code",
651 | "execution_count": 23,
652 | "metadata": {},
653 | "outputs": [
654 | {
655 | "name": "stdout",
656 | "output_type": "stream",
657 | "text": [
658 | "*****Before Processing first sentence : *****\n",
659 | " Thousands of demonstrators have marched through London to protest the war in Iraq and demand the withdrawal of British troops from that country .\n",
660 | "*****After Processing first sentence : *****\n",
661 | " [ 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 11 17 3 18 19 20 21 22 23\n",
662 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
663 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
664 | " 0 0 0]\n"
665 | ]
666 | }
667 | ],
668 | "source": [
669 | "# Let's check the first sentence before and after processing.\n",
670 | "print('*****Before Processing first sentence : *****\\n', ' '.join([w[0] for w in sentences[0]]))\n",
671 | "print('*****After Processing first sentence : *****\\n ', X[0])"
672 | ]
673 | },
674 | {
675 | "cell_type": "code",
676 | "execution_count": 24,
677 | "metadata": {},
678 | "outputs": [
679 | {
680 | "name": "stdout",
681 | "output_type": "stream",
682 | "text": [
683 | "*****Before Processing first sentence : *****\n",
684 | " O O O O O O B-geo O O O O O B-geo O O O O O B-gpe O O O O O\n",
685 | "*****After Processing first sentence : *****\n",
686 | " [[0. 1. 0. ... 0. 0. 0.]\n",
687 | " [0. 1. 0. ... 0. 0. 0.]\n",
688 | " [0. 1. 0. ... 0. 0. 0.]\n",
689 | " ...\n",
690 | " [1. 0. 0. ... 0. 0. 0.]\n",
691 | " [1. 0. 0. ... 0. 0. 0.]\n",
692 | " [1. 0. 0. ... 0. 0. 0.]]\n"
693 | ]
694 | }
695 | ],
696 | "source": [
697 | "# First label before and after processing.\n",
698 | "print('*****Before Processing first sentence : *****\\n', ' '.join([w[2] for w in sentences[0]]))\n",
699 | "print('*****After Processing first sentence : *****\\n ', y[0])"
700 | ]
701 | },
702 | {
703 | "cell_type": "markdown",
704 | "metadata": {},
705 | "source": [
706 | "#### Bidirectional LSTM-CRF Network"
707 | ]
708 | },
709 | {
710 | "cell_type": "code",
711 | "execution_count": 96,
712 | "metadata": {
713 | "colab": {
714 | "base_uri": "https://localhost:8080/",
715 | "height": 306
716 | },
717 | "colab_type": "code",
718 | "executionInfo": {
719 | "elapsed": 2791,
720 | "status": "ok",
721 | "timestamp": 1560703209499,
722 | "user": {
723 | "displayName": "CHAVAN AKSHAY",
724 | "photoUrl": "",
725 | "userId": "10674464813829582221"
726 | },
727 | "user_tz": -330
728 | },
729 | "id": "6WRJfQ5ca4vD",
730 | "outputId": "a908468a-3b1f-4680-afe7-6cc22a8a9394"
731 | },
732 | "outputs": [
733 | {
734 | "name": "stdout",
735 | "output_type": "stream",
736 | "text": [
737 | "_________________________________________________________________\n",
738 | "Layer (type) Output Shape Param # \n",
739 | "=================================================================\n",
740 | "input_8 (InputLayer) (None, 75) 0 \n",
741 | "_________________________________________________________________\n",
742 | "embedding_8 (Embedding) (None, 75, 40) 1407200 \n",
743 | "_________________________________________________________________\n",
744 | "bidirectional_8 (Bidirection (None, 75, 100) 36400 \n",
745 | "_________________________________________________________________\n",
746 | "time_distributed_8 (TimeDist (None, 75, 50) 5050 \n",
747 | "_________________________________________________________________\n",
748 | "crf_8 (CRF) (None, 75, 18) 1278 \n",
749 | "=================================================================\n",
750 | "Total params: 1,449,928\n",
751 | "Trainable params: 1,449,928\n",
752 | "Non-trainable params: 0\n",
753 | "_________________________________________________________________\n"
754 | ]
755 | }
756 | ],
757 | "source": [
758 | "num_tags = df['Tag'].nunique()\n",
759 | "# Model architecture\n",
760 | "input = Input(shape = (max_len,))\n",
761 | "model = Embedding(input_dim = len(words) + 2, output_dim = embedding, input_length = max_len, mask_zero = True)(input)\n",
762 | "model = Bidirectional(LSTM(units = 50, return_sequences=True, recurrent_dropout=0.1))(model)\n",
763 | "model = TimeDistributed(Dense(50, activation=\"relu\"))(model)\n",
764 | "crf = CRF(num_tags+1) # CRF layer\n",
765 | "out = crf(model) # output\n",
766 | "\n",
767 | "model = Model(input, out)\n",
768 | "model.compile(optimizer=\"rmsprop\", loss=crf.loss_function, metrics=[crf.accuracy])\n",
769 | "\n",
770 | "model.summary()"
771 | ]
772 | },
773 | {
774 | "cell_type": "markdown",
775 | "metadata": {},
776 | "source": [
777 | "Making Checkpoint each epoch to check and save the best model performance till last and also avoiding further validation loss drop due to overfitting."
778 | ]
779 | },
780 | {
781 | "cell_type": "code",
782 | "execution_count": 25,
783 | "metadata": {
784 | "colab": {},
785 | "colab_type": "code",
786 | "id": "CJcJLVXWa7r1"
787 | },
788 | "outputs": [],
789 | "source": [
790 | "checkpointer = ModelCheckpoint(filepath = 'model.h5',\n",
791 | " verbose = 0,\n",
792 | " mode = 'auto',\n",
793 | " save_best_only = True,\n",
794 | " monitor='val_loss')"
795 | ]
796 | },
797 | {
798 | "cell_type": "code",
799 | "execution_count": 119,
800 | "metadata": {
801 | "colab": {
802 | "base_uri": "https://localhost:8080/",
803 | "height": 326
804 | },
805 | "colab_type": "code",
806 | "executionInfo": {
807 | "elapsed": 2003225,
808 | "status": "ok",
809 | "timestamp": 1560708147077,
810 | "user": {
811 | "displayName": "CHAVAN AKSHAY",
812 | "photoUrl": "",
813 | "userId": "10674464813829582221"
814 | },
815 | "user_tz": -330
816 | },
817 | "id": "SjKhhXHMG-jJ",
818 | "outputId": "bd461b08-3920-4920-c3a6-10eb8f3cf432"
819 | },
820 | "outputs": [
821 | {
822 | "name": "stdout",
823 | "output_type": "stream",
824 | "text": [
825 | "Train on 36688 samples, validate on 4077 samples\n",
826 | "Epoch 1/8\n",
827 | "36688/36688 [==============================] - 251s 7ms/step - loss: 8.8293 - crf_viterbi_accuracy: 0.9762 - val_loss: 8.7456 - val_crf_viterbi_accuracy: 0.9693\n",
828 | "Epoch 2/8\n",
829 | "36688/36688 [==============================] - 249s 7ms/step - loss: 8.8255 - crf_viterbi_accuracy: 0.9776 - val_loss: 8.7474 - val_crf_viterbi_accuracy: 0.9692\n",
830 | "Epoch 3/8\n",
831 | "36688/36688 [==============================] - 250s 7ms/step - loss: 8.8225 - crf_viterbi_accuracy: 0.9787 - val_loss: 8.7480 - val_crf_viterbi_accuracy: 0.9669\n",
832 | "Epoch 4/8\n",
833 | "36688/36688 [==============================] - 250s 7ms/step - loss: 8.8199 - crf_viterbi_accuracy: 0.9796 - val_loss: 8.7469 - val_crf_viterbi_accuracy: 0.9695\n",
834 | "Epoch 5/8\n",
835 | "36688/36688 [==============================] - 250s 7ms/step - loss: 8.8174 - crf_viterbi_accuracy: 0.9810 - val_loss: 8.7537 - val_crf_viterbi_accuracy: 0.9668\n",
836 | "Epoch 6/8\n",
837 | "36688/36688 [==============================] - 251s 7ms/step - loss: 8.8150 - crf_viterbi_accuracy: 0.9820 - val_loss: 8.7508 - val_crf_viterbi_accuracy: 0.9669\n",
838 | "Epoch 7/8\n",
839 | "36688/36688 [==============================] - 250s 7ms/step - loss: 8.8128 - crf_viterbi_accuracy: 0.9830 - val_loss: 8.7558 - val_crf_viterbi_accuracy: 0.9650\n",
840 | "Epoch 8/8\n",
841 | "36688/36688 [==============================] - 250s 7ms/step - loss: 8.8106 - crf_viterbi_accuracy: 0.9839 - val_loss: 8.7559 - val_crf_viterbi_accuracy: 0.9662\n"
842 | ]
843 | }
844 | ],
845 | "source": [
846 | "history = model.fit(X_train, np.array(y_train), batch_size=batch_size, epochs=epochs,\n",
847 | " validation_split=0.1, callbacks=[checkpointer])"
848 | ]
849 | },
850 | {
851 | "cell_type": "code",
852 | "execution_count": 99,
853 | "metadata": {
854 | "colab": {
855 | "base_uri": "https://localhost:8080/",
856 | "height": 34
857 | },
858 | "colab_type": "code",
859 | "executionInfo": {
860 | "elapsed": 1054,
861 | "status": "ok",
862 | "timestamp": 1560705868376,
863 | "user": {
864 | "displayName": "CHAVAN AKSHAY",
865 | "photoUrl": "",
866 | "userId": "10674464813829582221"
867 | },
868 | "user_tz": -330
869 | },
870 | "id": "vEsREje5ubq-",
871 | "outputId": "083b774d-ae35-4720-e515-7930617364d4"
872 | },
873 | "outputs": [
874 | {
875 | "data": {
876 | "text/plain": [
877 | "dict_keys(['val_loss', 'val_crf_viterbi_accuracy', 'loss', 'crf_viterbi_accuracy'])"
878 | ]
879 | },
880 | "execution_count": 99,
881 | "metadata": {
882 | "tags": []
883 | },
884 | "output_type": "execute_result"
885 | }
886 | ],
887 | "source": [
888 | "history.history.keys()"
889 | ]
890 | },
891 | {
892 | "cell_type": "markdown",
893 | "metadata": {},
894 | "source": [
895 | "Visualizing the performance of model."
896 | ]
897 | },
898 | {
899 | "cell_type": "code",
900 | "execution_count": 120,
901 | "metadata": {
902 | "colab": {
903 | "base_uri": "https://localhost:8080/",
904 | "height": 516
905 | },
906 | "colab_type": "code",
907 | "executionInfo": {
908 | "elapsed": 1106,
909 | "status": "ok",
910 | "timestamp": 1560709905938,
911 | "user": {
912 | "displayName": "CHAVAN AKSHAY",
913 | "photoUrl": "",
914 | "userId": "10674464813829582221"
915 | },
916 | "user_tz": -330
917 | },
918 | "id": "QElZwYqqbSFV",
919 | "outputId": "60eea78e-8ea6-45be-9151-281264768c72"
920 | },
921 | "outputs": [
922 | {
923 | "data": {
924 | "text/plain": [
925 | ""
926 | ]
927 | },
928 | "execution_count": 120,
929 | "metadata": {
930 | "tags": []
931 | },
932 | "output_type": "execute_result"
933 | },
934 | {
935 | "data": {
936 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfoAAAHiCAYAAAAAkA6/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3Xl4VdX59vHvw2wYRAGVgiRUrcxD\njIBVZHC2CkopglGLVmNtsVarrYoDxVKtU52ob1OLFX8RRK2KA3VAFK0TAQEFRFABGaqIDGJEBp/3\nj7WDhxjICZzkJDv357py5Zy1p7VPuLjP2nvttczdERERkXiqle4KiIiISMVR0IuIiMSYgl5ERCTG\nFPQiIiIxpqAXERGJMQW9iIhIjCnoRQAzq21mG82sTSrXTSczO9jMUv78rJkda2ZLEt4vNLPeyay7\nG8e6z8yu3t3tRQTqpLsCIrvDzDYmvM0AvgG2Re8vdPeC8uzP3bcBjVK9bk3g7oemYj9mdj5wlrv3\nTdj3+anYt0hNpqCXasndtwdt1GI8391f3Nn6ZlbH3bdWRt1EyqJ/j1KZdOleYsnM/mRmD5vZBDP7\nEjjLzI4wszfNbJ2ZrTKzu8ysbrR+HTNzM8uK3v9ftHyKmX1pZm+YWdvyrhstP8nMPjCz9WZ2t5n9\n18yG76TeydTxQjNbbGZrzeyuhG1rm9lfzWyNmX0EnLiLz2ekmU0sUTbWzG6PXp9vZgui8/kwam3v\nbF/Lzaxv9DrDzB6M6jYPOKzEuteY2UfRfueZ2YCovDNwD9A7ui3yecJnOyph+19G577GzJ4ws5bJ\nfDbl+ZyL62NmL5rZF2b2PzP7fcJxro0+kw1mVmhmPyjtNomZvVb8d44+z+nRcb4ArjGzQ8xsWnSM\nz6PPbe+E7TOjc1wdLb/TzBpEdW6fsF5LMysys2Y7O1+p2RT0EmenAw8BewMPA1uBS4DmwJGEILxw\nF9ufCVwL7AssA24o77pmth8wCbgiOu7HQI9d7CeZOp5MCNDuhC8wx0blFwHHA12Bw4EhuzjOBOAU\nM2sY1bMO8DPC5wXwKfAToAlwAXC3mXXZxf6KjQYOBH4Y1fPnJZZ/EJ3X3sAY4CEz29/d3wVGAK+6\neyN3b15yx2Z2fLT/wUArYCVQ8hbNzj6bknb6OUdh+yLwFNAS+BHwcrTdFdHxTwSaAucDm3b1gST4\nMbAAaAH8BTDgT8ABQAfCZ3ZtVIc6wDPAYiCL8JlOcvdNhH9PZyXs90zgOXdfk2Q9pIZR0Eucvebu\nT7n7t+7+tbvPcPe33H2ru38E5AN9drH9o+5e6O5bCIHSbTfWPQWY7e5PRsv+Cny+s50kWccb3X29\nuy8hBFDxsYYAf3X35dF/+jft4jgfAe8BA6Oi44C17l4YLX/K3T/y4CVgKlBqh7sShgB/cve17r6U\n0EpPPO4kd18V/U0eApYAOUnsFyAXuM/dZ0eBdyXQx8xaJ6yzs89mB2V8zgOAZe5+p7t/4+4b3P3t\naNn5wNXuvig6h9nu/kWS9V/m7ve6+7bo3+MH7j7V3Te7+2eEfxvFdTiC8CXkD+7+VbT+f6NlDwBn\nmplF788GHkyyDlIDKeglzj5JfGNm7czsmehS7AZC6/B7LccE/0t4XcSuO+DtbN0fJNbDwyxSy3e2\nkyTrmNSxgKW7qC+E1vuw6PWZfNeax8xOMbO3osvK6whXCnb1WRVruas6mNlwM5sTXX5eB7RLcr8Q\nzm/7/tx9A7CW0LovltTfrIzP+UDgw53UYVfLylLy3+MBZjbJzFZEdfhXiTosiTp+7iAK/K3AUWbW\nCWhDaP2LlEpBL3FW8tGyvxNasQe7exPgOsLl04q0Ctje4oxaYa12vvoe1XEVISCKlfX43yTgWDNr\nRWjZPxTVcS/gUeBGYH93bwo8n2Q9/rezOpjZD4F7CbcYmkX7fT9hv2U9CrgSyEzYX2NgH2BFEvUq\naVef8yfAQTvZbmfLvorqlJFQdkCJdUqe318IT4t0juowvEQdMs2s9k7qMZ5w+f5swiX9b3aynoiC\nXmqUxsB64KuoM9Ou7s+nytNAtpmdGt13vYRwj7Yi6jgJ+K2ZtYo6Zv1hVyu7+/+A1wgtyYXuviha\nVB+oB6wGtpnZKcAx5ajD1WbW1MI4AyMSljUihN1qwneeCwgt+mKfAq0TO8WVMAH4hZl1MbP6hC8i\nr7r7Tq+Q7MKuPufJQBszG2Fm9c2siZkV96u4D/iTmR1kQTcz25fwBed/hH4Btc0sj4QvJbuow1fA\nejM7ELg8YdkbwBrgzxY6OO5lZkcmLH+Q0FfgTELoi+yUgl5qkt8ROod9SWjRPVzRB3T3T4EzgNsJ\n/3EfBLxDaMmluo73Eu6lvwvMILTKy/IQcCwJl+3dfR1wKfA48AUhUJ5Osg7XE64sLAGmkBBC7j4X\nuBt4O1rnUOCthG1fABYBn5pZ4iX44u3/Q7jE/ni0fRvCffvdsdPP2d3XE/os/JTw5eMDvrt3fgvw\nBOFz3kC4t98guiVzAXA1oQ/GwSXOrTTXEzpmrid8uXgsoQ5bCf072hNa98sIf4fi5UsIf+dv3P31\ncp671DAW/n2KSGWILsWuBAa7+6vpro9UX2Y2HvjI3Ueluy5StWnAHJEKZmYnAm8CXwNXAVsIrVqR\n3RL1dxgIdE53XaTq06V7kYp3FPAR4d70CcDp6jwlu8vMbgTmAH9292Xpro9Ufbp0LyIiEmNq0YuI\niMSYgl5ERCTGYtEZr3nz5p6VlZXuaoiIiFSamTNnfu7uuxqXA4hJ0GdlZVFYWJjuaoiIiFQaMytr\nmGtAl+5FRERiTUEvIiISYwp6ERGRGIvFPfrSbNmyheXLl7Np06Z0V0V2oUGDBrRu3Zq6dXc2j4mI\niOyJ2Ab98uXLady4MVlZWYSZQaWqcXfWrFnD8uXLadu2bbqrIyISS7G9dL9p0yaaNWumkK/CzIxm\nzZrpqouISAWKbdADCvlqQH8jEZGKFeugT6c1a9bQrVs3unXrxgEHHECrVq22v9+8eXNS+zj33HNZ\nuHDhLtcZO3YsBQUFqaiyiIjEUGzv0ZdXQQGMHAnLlkGbNjBmDOTm7v7+mjVrxuzZswEYNWoUjRo1\n4vLLL99hHXfH3alVq/TvW/fff3+Zx/n1r3+9+5UUEZHYU4ueEPJ5ebB0KbiH33l5oTzVFi9eTIcO\nHcjNzaVjx46sWrWKvLw8cnJy6NixI6NHj96+7lFHHcXs2bPZunUrTZs25corr6Rr164cccQRfPbZ\nZwBcc8013HHHHdvXv/LKK+nRoweHHnoor7/+OgBfffUVP/3pT+nQoQODBw8mJydn+5eQRNdffz2H\nH344nTp14pe//CXFMxt+8MEH9O/fn65du5Kdnc2SJUsA+POf/0znzp3p2rUrI0eOTP2HJSIie0xB\nT2jJFxXtWFZUFMorwvvvv8+ll17K/PnzadWqFTfddBOFhYXMmTOHF154gfnz539vm/Xr19OnTx/m\nzJnDEUccwbhx40rdt7vz9ttvc8stt2z/0nD33XdzwAEHMH/+fK699lreeeedUre95JJLmDFjBu++\n+y7r16/nP//5DwDDhg3j0ksvZc6cObz++uvst99+PPXUU0yZMoW3336bOXPm8Lvf/S5Fn46IiKSS\ngp5wub485XvqoIMOIicnZ/v7CRMmkJ2dTXZ2NgsWLCg16Pfaay9OOukkAA477LDtreqSBg0a9L11\nXnvtNYYOHQpA165d6dixY6nbTp06lR49etC1a1deeeUV5s2bx9q1a/n888859dRTgfDce0ZGBi++\n+CLnnXcee+21FwD77rtv+T8IERGpcLpHT7gnv7SUqQHatKmY4zVs2HD760WLFnHnnXfy9ttv07Rp\nU84666xSHzerV6/e9te1a9dm69atpe67fv36Za5TmqKiIkaMGMGsWbNo1aoV11xzjR57ExGJAbXo\nCR3vMjJ2LMvICOUVbcOGDTRu3JgmTZqwatUqnnvuuZQf48gjj2TSpEkAvPvuu6VeMfj666+pVasW\nzZs358svv+Sxxx4DYJ999qFFixY89dRTQBifoKioiOOOO45x48bx9ddfA/DFF1+kvN4iIrLn1KLn\nu971qex1n6zs7Gw6dOhAu3btyMzM5Mgjj0z5MS6++GLOOeccOnTosP1n77333mGdZs2a8fOf/5wO\nHTrQsmVLevbsuX1ZQUEBF154ISNHjqRevXo89thjnHLKKcyZM4ecnBzq1q3Lqaeeyg033JDyuouI\nyJ6x4p7V1VlOTo6XnI9+wYIFtG/fPk01qlq2bt3K1q1badCgAYsWLeL4449n0aJF1KlTNb7n6W8l\nInGV6ke3E5nZTHfPKWu9qvE/vVSojRs3cswxx7B161bcnb///e9VJuRFROKq+NHt4qe6ih/dhsq5\nYlxM/9vXAE2bNmXmzJnproaISI2yq0e3KzPo1RlPRESkAlT2o9s7o6AXERGpADt7RLuiHt3eGQW9\niIhIBUjno9uJFPQiIiIVIDcX8vMhMxPMwu/8/Mq9Pw8K+grTr1+/7w1+c8cdd3DRRRftcrtGjRoB\nsHLlSgYPHlzqOn379qXk44Ql3XHHHRQl9AI5+eSTWbduXTJVFxGRFMnNhSVL4Ntvw+/KDnlQ0FeY\nYcOGMXHixB3KJk6cyLBhw5La/gc/+AGPPvrobh+/ZNA/++yzNG3adLf3JyIi1ZOCvoIMHjyYZ555\nhs2bNwOwZMkSVq5cSe/evbc/156dnU3nzp158sknv7f9kiVL6NSpExCGpx06dCjt27fn9NNP3z7s\nLMBFF120fYrb66+/HoC77rqLlStX0q9fP/r16wdAVlYWn3/+OQC33347nTp1olOnTtunuF2yZAnt\n27fnggsuoGPHjhx//PE7HKfYU089Rc+ePenevTvHHnssn376KRCe1T/33HPp3LkzXbp02T6E7n/+\n8x+ys7Pp2rUrxxxzTEo+WxERSV6NeI7+t7+FUqZf3yPdukGUkaXad9996dGjB1OmTGHgwIFMnDiR\nIUOGYGY0aNCAxx9/nCZNmvD555/Tq1cvBgwYgJmVuq97772XjIwMFixYwNy5c8nOzt6+bMyYMey7\n775s27aNY445hrlz5/Kb3/yG22+/nWnTptG8efMd9jVz5kzuv/9+3nrrLdydnj170qdPH/bZZx8W\nLVrEhAkT+Mc//sGQIUN47LHHOOuss3bY/qijjuLNN9/EzLjvvvu4+eabue2227jhhhvYe++9effd\ndwFYu3Ytq1ev5oILLmD69Om0bdtW4+GLiKSBWvQVKPHyfeJle3fn6quvpkuXLhx77LGsWLFie8u4\nNNOnT98euF26dKFLly7bl02aNIns7Gy6d+/OvHnzSp2wJtFrr73G6aefTsOGDWnUqBGDBg3i1Vdf\nBaBt27Z069YN2PlUuMuXL+eEE06gc+fO3HLLLcybNw+AF198kV//+tfb19tnn3148803Ofroo2nb\nti2gqWxFRNKhRrTod9XyrkgDBw7k0ksvZdasWRQVFXHYYYcBYZKY1atXM3PmTOrWrUtWVtZuTQn7\n8ccfc+uttzJjxgz22Wcfhg8fvkdTyxZPcQthmtvSLt1ffPHFXHbZZQwYMICXX36ZUaNG7fbxRESk\n4qlFX4EaNWpEv379OO+883bohLd+/Xr2228/6taty7Rp01i6dOku93P00Ufz0EMPAfDee+8xd+5c\nIExx27BhQ/bee28+/fRTpkyZsn2bxo0b8+WXX35vX7179+aJJ56gqKiIr776iscff5zevXsnfU7r\n16+nVatWADzwwAPby4877jjGjh27/f3atWvp1asX06dP5+OPPwY0la2ISDoo6CvYsGHDmDNnzg5B\nn5ubS2FhIZ07d2b8+PG0a9dul/u46KKL2LhxI+3bt+e6667bfmWga9eudO/enXbt2nHmmWfuMMVt\nXl4eJ5544vbOeMWys7MZPnw4PXr0oGfPnpx//vl079496fMZNWoUP/vZzzjssMN2uP9/zTXXsHbt\nWjp16kTXrl2ZNm0aLVq0ID8/n0GDBtG1a1fOOOOMpI8jIiKpoWlqJe30txIRKb9kp6lVi15ERCTG\nFPQiIiIxpqAXERGJsVgHfRz6H8Sd/kYiNVNBAWRlQa1a4XdBQbprFF+xDfoGDRqwZs0aBUkV5u6s\nWbOGBg0apLsqIlKJCgogLw+WLgX38DsvT2FfUWLb637Lli0sX758jwaQkYrXoEEDWrduTd26ddNd\nFRGpJFlZIdxLyswMM7xJcpLtdR/bkfHq1q27fehVERGpOpYtK1+57JnYXroXEZGqqU2b8pXLnlHQ\ni4hIpRozBjIydizLyAjlknoKehERqVS5uZCfH+7Jm4Xf+fmhXFIvtvfoRUSk6srNVbBXFrXoRURE\nYiypoDezE81soZktNrMrS1meaWZTzWyumb1sZq0Tlt1sZvPMbIGZ3WVBhpk9Y2bvR8tuSlh/uJmt\nNrPZ0c/5qTlVERGRmqfMoDez2sBY4CSgAzDMzDqUWO1WYLy7dwFGAzdG2/4YOBLoAnQCDgf6FG/j\n7u2A7sCRZnZSwv4edvdu0c99u312IiIiNVwyLfoewGJ3/8jdNwMTgYEl1ukAvBS9npaw3IEGQD2g\nPlAX+NTdi9x9GkC0z1lAa0RERCSlkgn6VsAnCe+XR2WJ5gCDotenA43NrJm7v0EI/lXRz3PuviBx\nQzNrCpwKTE0o/ml0G+BRMzsw6bMRERGRHaSqM97lQB8ze4dwaX4FsM3MDgbaE1rrrYD+Zta7eCMz\nqwNMAO5y94+i4qeArOg2wAvAA6Ud0MzyzKzQzApXr16dotMQERGJl2SCfgWQ2KpuHZVt5+4r3X2Q\nu3cHRkZl6wit+zfdfaO7bwSmAEckbJoPLHL3OxL2tcbdv4ne3gccVlql3D3f3XPcPadFixZJnIaI\niEjNk0zQzwAOMbO2ZlYPGApMTlzBzJqbWfG+rgLGRa+XEVr6dcysLqG1vyDa5k/A3sBvS+yrZcLb\nAcXri4iISPmVGfTuvhUYATxHCN1J7j7PzEab2YBotb7AQjP7ANgfKB7I8FHgQ+Bdwn38Oe7+VPT4\n3UhCJ75ZJR6j+030yN0c4DfA8BScp4iISI0U22lqRURE4izZaWo1Mp6IiEiMKehFRERiTEEvIlKF\nFBRAVhbUqhV+FxSku0ZS3Wn2OhGRKqKgAPLyoKgovF+6NLwHzfQmu08tehGRKmLkyO9CvlhRUSgX\n2V0KehGRKmLZsvKViyRDQS8iUkW0aVO+cpFkKOhFRKqIMWMgI2PHsoyMUC6yuxT0IiJVRG4u5OdD\nZiaYhd/5+eqIJ3tGve5FRKqQ3FwFu6SWWvQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMK\nehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT\n0ItItVBQAFlZUKtW+F1QkO4aiVQPmo9eRKq8ggLIy4OiovB+6dLwHjR3u0hZ1KIXkSpv5MjvQr5Y\nUVEoF5FdU9CLSJW3bFn5ykXkOwp6Eany2rQpX7mIfEdBLyJV3pgxkJGxY1lGRigXkV1T0ItIlZeb\nC/n5kJkJZuF3fr464okkQ73uRaRayM1VsIvsDrXoRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5E\nRCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMRYUkFvZiea2UIz\nW2xmV5ayPNPMpprZXDN72cxaJyy72czmmdkCM7vLzCwqP8zM3o32mVi+r5m9YGaLot/7pOpkRURE\napoyg97MagNjgZOADsAwM+tQYrVbgfHu3gUYDdwYbftj4EigC9AJOBzoE21zL3ABcEj0c2JUfiUw\n1d0PAaZG70VERGQ3JNOi7wEsdveP3H0zMBEYWGKdDsBL0etpCcsdaADUA+oDdYFPzawl0MTd33R3\nB8YDp0XbDAQeiF4/kFAuIiIi5ZRM0LcCPkl4vzwqSzQHGBS9Ph1obGbN3P0NQvCvin6ec/cF0fbL\nd7LP/d19VfT6f8D+pVXKzPLMrNDMClevXp3EaYiIiNQ8qeqMdznQx8zeIVyaXwFsM7ODgfZAa0KQ\n9zez3snuNGrt+06W5bt7jrvntGjRYo9PQEREJI6SCfoVwIEJ71tHZdu5+0p3H+Tu3YGRUdk6Quv+\nTXff6O4bgSnAEdH2rXeyz+JL+0S/Pyv3WYmIiAiQXNDPAA4xs7ZmVg8YCkxOXMHMmptZ8b6uAsZF\nr5cRWvp1zKwuobW/ILo0v8HMekW97c8Bnoy2mQz8PHr984RyERERKacyg97dtwIjgOeABcAkd59n\nZqPNbEC0Wl9goZl9QLinPiYqfxT4EHiXcB9/jrs/FS37FXAfsDhaZ0pUfhNwnJktAo6N3ouIiMhu\nsHAbvHrLycnxwsLCdFdDJC0KCmDkSFi2DNq0gTFjIDc33bUSkYpmZjPdPaes9epURmVEpGIUFEBe\nHhQVhfdLl4b3oLAXkUBD4IpUYyNHfhfyxYqKQrmICCjoRaq1ZcvKVy4iNY+CXqQaa9OmfOUiUvMo\n6EWqsTFjICNjx7KMjFAuIgIKepFqLTcX8vMhMxPMwu/8fHXEE5HvqNe9SDWXm6tgF5GdU4teREQk\nxhT0IiIiMaagFxERiTEFvYiISIwp6EVERGJMQS8iIhJjCnoREZEYU9CLiIjEmIJeREQkxhT0IiIi\nMaagFxERiTEFvYiISIwp6CW2CgogKwtq1Qq/CwrSXSMRkcqn2esklgoKIC8PiorC+6VLw3vQTG8i\nUrOoRS+xNHLkdyFfrKgolIuI1CQKeomlZcvKVy4iElcKeomlNm3KVy4iElcKeomlMWMgI2PHsoyM\nUC4iUpMo6CWWcnMhPx8yM8Es/M7PV0c8Eal51OteYis3V8EuIqIWvYiISIwp6EVERGJMQS8iIhJj\nCnoREZEYU9CLiIjEmIJeREQkxhT0IiIiMaagFxERiTEFvYiISIwp6EVERGJMQS8iIhJjCnoREZEY\nU9CLiIjEmIJeREQkxhT0IiIiMaagr4EKCiArC2rVCr8LCtJdIxERqShJBb2ZnWhmC81ssZldWcry\nTDObamZzzexlM2sdlfczs9kJP5vM7LRo2asJ5SvN7ImovK+ZrU9Ydl0qT7imKyiAvDxYuhTcw++8\nPIW9iEhcmbvvegWz2sAHwHHAcmAGMMzd5yes8wjwtLs/YGb9gXPd/ewS+9kXWAy0dveiEsseA550\n9/Fm1he43N1PSfYkcnJyvLCwMNnVa7SsrBDuJWVmwpIllV0bERHZXWY2091zylovmRZ9D2Cxu3/k\n7puBicDAEut0AF6KXk8rZTnAYGBKKSHfBOgPPJFEXWQPLVtWvnIREanekgn6VsAnCe+XR2WJ5gCD\notenA43NrFmJdYYCE0rZ/2nAVHffkFB2hJnNMbMpZtYxiTpKktq0KV+5iIhUb6nqjHc50MfM3gH6\nACuAbcULzawl0Bl4rpRth7HjF4BZQKa7dwXuZictfTPLM7NCMytcvXp1as6iBhgzBjIydizLyAjl\nIiISP8kE/QrgwIT3raOy7dx9pbsPcvfuwMiobF3CKkOAx919S+J2ZtaccGvgmYR9bXD3jdHrZ4G6\n0Xo7cPd8d89x95wWLVokcRoCkJsL+fnhnrxZ+J2fH8pFRCR+6iSxzgzgEDNrSwj4ocCZiStEQfyF\nu38LXAWMK7GPYVF5SYMJnfg2JezrAOBTd3cz60H4MrImyfORJOTmKthFRGqKMlv07r4VGEG47L4A\nmOTu88xstJkNiFbrCyw0sw+A/YHtF4LNLItwReCVUnZf2n37wcB7ZjYHuAsY6mU9GiAiIiKlKvPx\nuupAj9eJiEhNk8rH60RERKSaUtCLiIjEmIJeREQkxhT0IiIiMaagFxERiTEFvYiISIwp6EVERGJM\nQS8iIhJjCnoREZEYU9CLiIjEmIJeREQkxhT0IiIiMaagFxERiTEFvYiISIwp6EVERGJMQS8iIhJj\nCnoREZEYU9CLiIjEmIJeREQkxhT0CQoKICsLatUKvwsK0l0jERGRPVMn3RWoKgoKIC8PiorC+6VL\nw3uA3Nz01UtERGRPqEUfGTnyu5AvVlQUykVERKorBX1k2bLylYuIiFQHCvpImzblKxcREakOFPSR\nMWMgI2PHsoyMUC4iIlJdKegjubmQnw+ZmWAWfufnqyOeiIhUb+p1nyA3V8EuIiLxoha9iIhIjCno\nRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxB\nLyIiEmMKehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJsaSC3sxONLOFZrbYzK4sZXmmmU01\ns7lm9rKZtY7K+5nZ7ISfTWZ2WrTsX2b2ccKyblG5mdld0bHmmll2Kk9YRESkJqlT1gpmVhsYCxwH\nLAdmmNlkd5+fsNqtwHh3f8DM+gM3Ame7+zSgOMD3BRYDzydsd4W7P1rikCcBh0Q/PYF7o98iIiJS\nTsm06HsAi939I3ffDEwEBpZYpwPwUvR6WinLAQYDU9y9qIzjDSR8aXB3fxNoamYtk6iniIiIlJBM\n0LcCPkl4vzwqSzQHGBS9Ph1obGbNSqwzFJhQomxMdHn+r2ZWvxzHExERkSSkqjPe5UAfM3sH6AOs\nALYVL4xa5J2B5xK2uQpoBxwO7Av8oTwHNLM8Mys0s8LVq1fvYfVFRETiKZmgXwEcmPC+dVS2nbuv\ndPdB7t4dGBmVrUtYZQjwuLtvSdhmVXR5/hvgfsItgqSOF22f7+457p7TokWLJE5DRESk5kkm6GcA\nh5hZWzOrR7gEPzlxBTNrbmbF+7oKGFdiH8Mocdm++L67mRlwGvBetGgycE7U+74XsN7dV5XjnERE\nRCRSZq97d99qZiMIl91rA+PcfZ6ZjQYK3X0y0Be40cwcmA78unh7M8sitNBfKbHrAjNrARgwG/hl\nVP4scDKhh34RcO7unpyIiEhNZ+6e7jrssZycHC8sLEx3NURERCqNmc1095yy1tPIeCIiIjGmoBcR\nEYkxBb2IiEiMKehFRERiTEHbRt+eAAAgAElEQVQvIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIi\nIjGmoBcREYkxBb2IiEiMKehFRERiTEEvIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIiIjGmoBcR\nEYkxBb2IiEiMKehFRERiTEEvIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIiIjGmoBcREYkxBb2I\niEiMKehFRERiTEEvIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIiIjGmoBcREYkxBb2IiEiMKehF\nRERiTEEvIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIiIjGmoBcREYkxBb2IiEiMKehFRERiLKmg\nN7MTzWyhmS02sytLWZ5pZlPNbK6ZvWxmraPyfmY2O+Fnk5mdFi0riPb5npmNM7O6UXlfM1ufsM11\nqTxhERGRmqTMoDez2sBY4CSgAzDMzDqUWO1WYLy7dwFGAzcCuPs0d+/m7t2A/kAR8Hy0TQHQDugM\n7AWcn7C/V4u3c/fRu312IiIiNVwyLfoewGJ3/8jdNwMTgYEl1ukAvBS9nlbKcoDBwBR3LwJw92c9\nArwNtN6dExAREZGdSyboWwGfJLxfHpUlmgMMil6fDjQ2s2Yl1hkKTCi58+iS/dnAfxKKjzCzOWY2\nxcw6llYpM8szs0IzK1y9enUSpyEiIlLzpKoz3uVAHzN7B+gDrAC2FS80s5aES/TPlbLt34Dp7v5q\n9H4WkOnuXYG7gSdKO6C757t7jrvntGjRIkWnISIiEi/JBP0K4MCE962jsu3cfaW7D3L37sDIqGxd\nwipDgMfdfUvidmZ2PdACuCxhXxvcfWP0+lmgrpk1T/6UREREpFgyQT8DOMTM2ppZPcIl+MmJK5hZ\nczMr3tdVwLgS+xhGicv2ZnY+cAIwzN2/TSg/wMwset0jquOa5E9JREREipUZ9O6+FRhBuOy+AJjk\n7vPMbLSZDYhW6wssNLMPgP2BMcXbm1kW4YrAKyV2/f+idd8o8RjdYOA9M5sD3AUMjTrsiYiISDlZ\nHDI0JyfHCwsL010NERGRSmNmM909p6z1NDKeiIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMSY\ngl5ERCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5ERCTG\nFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5ERCTGFPQiIiIx\npqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJ\nMQW9iIhIjCnoRUREYkxBLyIiEmMKehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJMQW9iIhI\njCnoRUREYkxBLyIiEmNJBb2ZnWhmC81ssZldWcryTDObamZzzexlM2sdlfczs9kJP5vM7LRoWVsz\neyva58NmVi8qrx+9Xxwtz0rd6YqIiNQsZQa9mdUGxgInAR2AYWbWocRqtwLj3b0LMBq4EcDdp7l7\nN3fvBvQHioDno23+AvzV3Q8G1gK/iMp/AayNyv8arSciIiK7IZkWfQ9gsbt/5O6bgYnAwBLrdABe\nil5PK2U5wGBgirsXmZkRgv/RaNkDwGnR64HRe6Llx0Tri4iISDklE/StgE8S3i+PyhLNAQZFr08H\nGptZsxLrDAUmRK+bAevcfWsp+9x+vGj5+mh9ERERKadUdca7HOhjZu8AfYAVwLbihWbWEugMPJei\n42FmeWZWaGaFq1evTtVuRUREYiWZoF8BHJjwvnVUtp27r3T3Qe7eHRgZla1LWGUI8Li7b4nerwGa\nmlmdUva5/XjR8r2j9Xfg7vnunuPuOS1atEjiNERERGqeZIJ+BnBI1Eu+HuES/OTEFcysuZkV7+sq\nYFyJfQzju8v2uLsT7uUPjop+DjwZvZ4cvSda/lK0voiIiJRTmUEf3ScfQbjsvgCY5O7zzGy0mQ2I\nVusLLDSzD4D9gTHF20ePxx0IvFJi138ALjOzxYR78P+Myv8JNIvKLwO+9zifiIiIJMfi0FjOycnx\nwsLCdFdDRESk0pjZTHfPKWs9jYwnIiISYwp6ERGRGFPQi4iIxJiCXkREJMYU9CIxsGULzJoFMehb\nKyIppqAXqeaKimDgQDjsMPjDHxT2IrIjBb1INbZ2LRx3HPznP3DssXDLLXDJJQp7EflOnbJXqVnW\nr4f334eMDGjYMPwu/qmjT0uqkFWr4IQTYOFCmDQJfvpTuOIKuO02+OYbuPdeqKWv8iI1nqKrhMLC\n0DIqTb163/8CkMrXe+2l/5glOR9+GFryn30Gzzzz3b/ZW26BBg1gzBjYtAn++U99QRWp6fRfQAld\nu4b/OIuKws9XXyX3evXq75dv3lz+4zdoUHFfJDIywv7NUv+5SeWZOze05Ddvhpdegh49vltmBn/6\nU/g7X3ttaNk/+CDUrZu++opIeinoS2jeHE4+OTX72rIFvv66fF8YdvZ6/fpwqbZk+bZtZdcjkdl3\n4b/ffnDXXdC/f2rOVyref/8LP/kJNGoEr74KHTqUvt4114Swv+KKEPYTJ0L9+pVbVxGpGhT0Fahu\n3fDTpEnF7N89fJnY3S8P06bB8cfD3XfDRRdVTB0ldZ59FgYPhgMPhOefh8zMXa9/+eUh7C++GE4/\nHR57LNweEpGaRUFfjZmFfgP16kHTpuXffsMGGDYMfvUreO89uOMOXeKtqgoKYPhw6NIFpkwJV2OS\nMWJECPu8PDj1VHjyyXAbR0RqDnX9qsGaNIHJk0PL729/g5NOgi++SHetpKR77oGzzoIjjwxXYZIN\n+WLnnw8PPBC2Pemk8AVPRGoOBX0NV7t26Kl9//0wfTr07BkeL5T0c4dRo8Kl94EDw7Pyu3sb6Oyz\nYcIEeOONcLtm3bqUVlVEqjAFvQDhsvC0aaHTX69e8Nxz6a5Rzfbtt/Cb38Af/xj+No8+Gi7B74kh\nQ8J+Zs2CY46BNWtSUlURqeIU9LLdkUfCjBmhk9fJJ8Odd2qEtXTYsiVcqr/nHvjd71L7LPzAgeE+\n/fz50K8ffPppavYrIlWXgl52kJkZHuEaMAB++9vQiWt3xgOQ3VM8bv2ECXDjjeG2SqoHUTrppDBW\nxIcfQt++sGJFavcvIlWLgl6+p1Gj8CjW1VfDffeFEdg+/zzdtYq/xHHr//53uPLKihvcqH//cHtm\nxQro0weWLq2Y44hI+inopVS1aoVhVAsK4K23wuhr772X7lrF16pVIXALC8O49Xl5FX/Mo46CF14I\n9+qPPjq08EUkfhT0sktnngmvvBJG+DviCHj66XTXKH4+/DCE7kcfhUvqgwdX3rF79oSpU8MASkcf\nHSbIEZF4UdBLmXr2DJ30fvSjcO/+llvUSS9V5s4NIb9uXRi3fmcTKlWk7Gx4+WXYujVcVdCVG5F4\nUdBLUlq3DmOrDx4Mv/89nHtuGENddt9//xta0bVrh882cXKaytapU7hyU7t26KD3zjvpq4uIpJaC\nXpKWkREmRxk1Koy0psezdt+zz4aOd/vvHwJ/Z5PTVKZ27cKgSQ0bhs56b7+d7hqJSCoo6KVcatWC\n668PHcZmzw6t0Dlz0l2r6uWhh8IjdO3bh5Z8WZPTVKaDDgph36xZuI3w2mvprpGI7CkFveyWn/0s\nhNS2bfDjH8Pjj6e7RtXDPfdAbu7uj1tfGTIzw2X8H/wgzHv/0kvprpGI7AkFvey2ww4LnfQ6dYJB\ng+DPf1YnvZ1xD8PZpmLc+srQqlUI+x/+EH7yk1BfEameFPSyR1q2DD22c3Nh5Mjw++uv012rqqV4\n3PpRo1I3bn1l2H//cNWhffvvhs4VkepHQS97bK+94MEHQ4t+woTwiNbKlemuVdVQkePWV4bmzcNz\n9t27hycuHnkk3TUSkfJS0EtKmMFVV4V79fPnh056M2emu1bpVRnj1leGffaB558PsxoOHQr/93/p\nrpGIlEc1/G9HqrLTTguPi9WuDb17h975NdHatWHe98oYt74yNGkSzqVvXzjnnDAHgohUDwp6Sbmu\nXUMnvexsOOOMcG/622/TXavKUzxu/YwZlTdufWVo2DAMgXziiXDBBTB2bLprJCLJUNBLhdhvv3Bv\nd/jw0Nv8jDPCeOpx99FH6Ru3vjLstVe4PTNwIIwYAbfdlu4aiUhZFPRSYerXh3Hj4NZbw7S3vXvD\n8uXprlXFmTs3PB+fznHrK0P9+qFT3pAhcPnlYZZDEam6FPRSocxCb/OnnoLFi+Hww8O0t3FTlcat\nrwx164YpjM8+G665Bq69VmMoiFRVCnqpFD/5CbzxRrj026dPvHpuV8Vx6ytDnTrwr3/B+efDn/4E\nV1yhsBepihT0Umk6dgwTpfTqFVqCV11V/TvpVeVx6ytDrVrhqYLi+/UXX1z9/6YicVONhu6QOGje\nPDyTPWIE3HQTLFgQBttp3DjdNSu/e+4JwdanD0yeXLWHtK1ItWrBXXeF0f5uvTVMX/z//l+4jSEi\n6aegl0pXr15oBXbqBJdeGjqwPfVU9WkNu8Po0eGxwYEDw9S91WFI24pkBjffHG7N3HBDCPtx46rX\nKIAicaVL95IWZmH89ylTYNmy0EmvOkyJWl3Hra8MZuEL0Jgx4SrNmWeGIYBFJL0U9JJWxx8feuE3\nbQr9+8P996e7Rju3ZUvoW1Bdx62vLFdfHe7XP/JImM74m2/SXSORmk1BL2l36KEh7Pv0gfPOCyG6\nbVu6a7Wj4nHrH3qoeo9bX1kuuyyMnPfkk2FYZM1oKJI++q9KqoR99gmPqY0YAbffDgMGwPr16a5V\nsG5dvMatryy/+lUYE/+55+CUU2rGyIgiVVFSQW9mJ5rZQjNbbGZXlrI808ymmtlcM3vZzFonLGtj\nZs+b2QIzm29mWVH5q2Y2O/pZaWZPROV9zWx9wrLrUnOqUtXVrQt33w333hvC4Ygj4MMP01unuI5b\nX1l+8QsYPx5efjmMkb9hQ7prJFLzlBn0ZlYbGAucBHQAhplZySFBbgXGu3sXYDRwY8Ky8cAt7t4e\n6AF8BuDuvd29m7t3A94A/p2wzavFy9x99G6em1RTv/xleATvf/8LI8y9/HJ66lE8bv2HH8Zz3PrK\nctZZ8PDD8OabYWChtWvTXSORmiWZFn0PYLG7f+Tum4GJwMAS63QAXopeTyteHn0hqOPuLwC4+0Z3\nL0rc0MyaAP2BJ3b7LCR2+vcPg+vsv38Ih/z8yj1+TRm3vrIMHgz//jfMnh3+tp9/nu4aidQcyQR9\nK+CThPfLo7JEc4BB0evTgcZm1gz4EbDOzP5tZu+Y2S3RFYJEpwFT3T3xot4RZjbHzKaYWcekz0Zi\n5eCDw7C5xx0HF14YHmvburXij/vf/4bL9TVl3PrKcuqpYWCh998P89p/+mm6ayRSM6SqM97lQB8z\newfoA6wAthEG5OkdLT8c+CEwvMS2w4AJCe9nAZnu3hW4m5209M0sz8wKzaxw9erVKToNqWr23jsM\npnPZZeH+/cknV+yl3+Jx6/fbr2aNW19ZTjghfMYffxy+TK1Yke4aicRfMkG/Ajgw4X3rqGw7d1/p\n7oPcvTswMipbR2j9z44u+28lhHZ28XZm1pxwa+CZhH1tcPeN0etngbrRejtw93x3z3H3nBYtWiR3\ntlIt1a4dnsv+5z/D/fpeveCDD1J/nJo+bn1l6dcv9MFYuTLM+Ld0abprJBJvyQT9DOAQM2trZvWA\nocDkxBXMrLmZFe/rKmBcwrZNzaw4ifsD8xM2HQw87e6bEvZ1gFl4eMnMekR1XFO+05I4Ou88mDoV\nvvgCevaEF15I3b7vuSd0GjvySJg2LbTopeIceSS8+GL4Wx59dJjCWEQqRplBH7XERwDPAQuASe4+\nz8xGm9mAaLW+wEIz+wDYHxgTbbuNcNl+qpm9Cxjwj4TdD2XHy/YQwv89M5sD3AUMddfklxL07h0e\ndWvdGk46KQT0nvzrcIc//jFMTjNgQHhWvqZOTlPZevQIX6q++ipcxn///XTXSCSeLA4ZmpOT44WF\nhemuhlSiL7+E3Nxw//7CC8P9+7p1y7ePb7+FSy4JXxaGD4d//END2qbDvHlwzDHhS9eLL0Lnzumu\nkUj1YGYz3T2nrPU0Mp5US40bw+OPh1Hq/v73MHLdmnLc4Ekct/6yyzRufTp17AjTp4cvan37wqxZ\n6a6RSLwo6KXaql07jDs/fjy8/nq4FDx/ftnblRy3/tZbNW59uv3oRyHsGzcOz9m/9Va6ayQSH/rv\nTaq9s8+GV14J93p79QqPb+2Mxq2vun74wxD2LVqEAYpefTXdNRKJBwW9xEKvXmEkvYMOCgOz3H77\n9zvpFY9b//bbGre+qmrTJnxpa906jI0/dWq6ayRS/SnoJTbatIHXXgvTov7ud2FCleK50DVuffXx\ngx+EsD/oIPjJT2DKlHTXSKR6U/cjiZWGDeGRR2DUKLjhBli0KDw+l5sLmzeHces1pG3Vt99+4dG7\n448P/SkmTQpf4OLCPXwJ3bAhTMec+Lv4dYMGcM45UL9+umsr1Z0er5PYevjh8Njcpk3QqlUYjU1D\n2lYv69aF8RJmzICCAjjjjHTXKDyxkRjIpYV0Msu2bCn7WF27hvPuqBk/pBTJPl6nFr3E1hlnhMu/\n99wTWvUa0rb6ado0fEE75RQ488zQCj7nnN3b17ZtsHFj8kG8s7Kvvy77WHXqhHkamjT57nerVuGL\nZmJZ8e/Syl5/Hc4/Hw47DG6+GUaM0NMhsnvUoheRKq/4kcipU8PjkJ06lb8FvXFj2ccxSz6ISysr\n/t2gQWqe5vj00xD2Tz8dbmPcf3/owyACybfoFfQiUi1s2gQ//enOH59s1GjPQrpJk7CPqva4pTvk\n58Oll8Jee4URHAcNKns7iT8FvYjEzpYtYfrgunV3DOnGjcMASnG2cGGYeKmwEM49F+68M5y31Fy6\nRy8isVM8TG5NdOih4b79DTfAmDHhEcQHH4Qf/zjdNZOqTl07RESqibp1YfToMGqge5jN8brrkuvB\nLzWXgl5EpJr58Y9h9mz4+c9DC//II+GDD9JdK6mqFPQiItVQkyYwblwYIOrDD6F799BpLwbdriTF\nFPQiItXY4MEwd25o1V94YXgM8bPP0l0rqUoU9CIi1VyrVmFGxjvuCAMMde4c5nQQAQW9iEgs1KoF\nl1wCM2dCy5ZhNMGLLgrTN0vNpqAXEYmRjh3hrbfgiivg73+H7Ozw7L3UXAp6EZGYqV8/jI8/dWoY\nPviII8Kz99u2pbtmkg4KehGRmOrXL3TUGzwYrrkG+vSBjz9Od61qnlWr4J//TN/xFfQiIjG2zz4w\nYUKY7vbdd8PUtw88oMfwKkPxWAeZmZCXB0uXpqceCnoRkRrgzDND6z47G4YPhyFDYM2adNcqfr79\nFp56KlxN6d4dHnsMfvnLMFdBuqbKVtCLiNQQmZnhvv1f/gJPPgldusALL6S7VvHw1Vcwdiy0awcD\nBoRBjG6+GZYvh7vugoMPTl/dFPQiIjVI7drw+9+Hnvl77x3mub/00jANsJTf8uXwhz9A69YwYgTs\nuy9MnBiC/ooroGnTdNdQQS8iUiN17x6eub/44jDQTk4OzJmT7lpVHzNmhNshbdvCrbfCsceG2QXf\nfBPOOCNMQFRVKOhFRGqovfYKl5WnTAn363v0gNtuC/eZ5fu2bQv33I86KnxWzzwDv/lNaL0/8kh4\njLEqUtCLiNRwJ54YeuSffDJcfnlonX7ySbprVXVs2BCuehx8cHhUceXK8P6TT8IXo6ysdNdw1xT0\nIiJC8+bw73+H571nzAgd9R5+ON21Sq8lS+Cyy8L990svDb8fewwWLQrDDTdpku4aJkdBLyIiAJjB\neeeF57/bt4ehQ+Hss2H9+nTXrPK4h3vtP/sZHHQQ3H03nHpq+PLz6qswaFDo0FidKOhFRGQHBx0E\n06fD6NFhsJ0uXeCVV9Jdq4q1ZUvoLd+rV5jy98UXQ6/5jz8Ogw3l5KS7hrtPQS8iIt9Tpw5cey38\n979Qr14YAObKK2Hz5nTXLLXWrYNbbglfboYNg7Vrw/Pwy5fDTTeFy/XVnYJeRER2qmdPeOcdOP/8\nMNBOr16wYEG6a7XnFi8Ojxa2bh3GFTj4YJg8Gd5/H371K2jYMN01TB0FvYiI7FKjRpCfH0bT++ST\nMIzu2LHVb7x893AL4rTT4Ec/CtP4Dh4cvsi89FK4F18rhqkYw1MSEZGKMGBAeAyvf/8wCtzJJ4eZ\n2aq6zZvhwQfhsMOgb1947TUYOTJMMvOvf0G3bumuYcVS0IuISNIOOACefhr+9rfQOu7cGR5/PN21\nKt2aNfDnP4fn3M85Jwzzm58frkrccAO0bJnuGlYOBb2IiJSLGVx0EcyaFSbKGTQo3MPfuDHdNQve\nfz/MGHfggaHl3rlzGP1v3jy44IIwImBNoqAXEZHd0q4dvPEGXH01jBsXLoG/+WZ66uIeZuI7+eQw\nBsC//gW5ufDee/Dcc2H0P7P01C3dFPQiIrLb6tWDMWPCZfytW8M48KNGhdeVYdOm8CWja9cwE9/M\nmfDHP8KyZfCPf0DHjpVTj6pMQS8iInusd+8w+11ubgjao44Kj7BVlM8+C8fJzIRf/CK01u+/PwT8\nddfBfvtV3LGrGwW9iIikxN57wwMPhDHyP/ggXMq/777UPob33nsh2Nu0CVcODj88jGI3ezYMHw71\n66fuWHGhoBcRkZQaMgTmzg2D61xwAZx+Oqxevfv7+/bb0JnuuONCx7oJE8KY/O+/H54AOOaYmnv/\nPRkKehERSbnWreH55+H220NIF/d8L4+iojCoTceOoZPd/PnhcblPPgmP9x16aMXUPW6SCnozO9HM\nFprZYjO7spTlmWY21czmmtnLZtY6YVkbM3vezBaY2Xwzy4rK/2VmH5vZ7OinW1RuZnZXdKy5Zpad\nmlMVEZHKVKtWmN61sDDcMz/55DDQTlHRrrdbuTI8FtemTXhMrmFD+L//CxPMXHUVNGtWOfWPizKD\n3sxqA2OBk4AOwDAz61BitVuB8e7eBRgN3JiwbDxwi7u3B3oAnyUsu8Ldu0U/s6Oyk4BDop884N7y\nn5aIiFQVnTvD22+Hud3Hjg0j1M2a9f313nknDGyTlQU33hg6+E2fHqaIzc0NPfyl/JJp0fcAFrv7\nR+6+GZgIDCyxTgfgpej1tOLl0ReCOu7+AoC7b3T3Mr7LMZDwpcHd/U2gqZnVkPGLRETiqUEDuO22\n0HHuyy/DZDk33RSmh33yyTA0bXZ2GGXvootg0aLwundv3X/fU8kEfSvgk4T3y6OyRHOAQdHr04HG\nZtYM+BGwzsz+bWbvmNkt0RWCYmOiy/N/NbPivpLJHE9ERKqhY44JHfUGDfruMvxpp4XL8rfeGu6/\n33lnmDZWUiNVnfEuB/qY2TtAH2AFsA2oA/SOlh8O/BAYHm1zFdAuKt8X+EN5DmhmeWZWaGaFq/ek\nO6eIiFSqffeFiRPDRDMnnBAex/vwQ/jd76Bp03TXLn6SCfoVwIEJ71tHZdu5+0p3H+Tu3YGRUdk6\nQmt8dnTZfyvwBJAdLV8VXZ7/BrifcIsgqeNF2+e7e46757Ro0SKJ0xARkarCDM46Cx55JDyOV6dO\numsUX8kE/QzgEDNra2b1gKHA5MQVzKy5mRXv6ypgXMK2Tc2sOIn7A/OjbVpGvw04DXgvWmcycE7U\n+74XsN7dq8FEiCIiIlVPmd+h3H2rmY0AngNqA+PcfZ6ZjQYK3X0y0Be40cwcmA78Otp2m5ldDkyN\nAn0m8I9o1wXRFwADZgO/jMqfBU4GFgNFwLkpOVMREZEayDyVYxOmSU5OjhcWFqa7GiIiIpXGzGa6\ne05Z62lkPBERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYkxBLyIiEmMK\nehERkRhT0IuIiMSYgl5ERCTGFPQiIiIxpqAXERGJMQW9iIhIjCnoRUREYiwW89Gb2WpgaQp32Rz4\nPIX7q6p0nvGi84wXnWe8VMR5Zrp7i7JWikXQp5qZFbp7TrrrUdF0nvGi84wXnWe8pPM8deleREQk\nxhT0IiIiMaagL11+uitQSXSe8aLzjBedZ7yk7Tx1j15ERCTG1KIXERGJMQV9AjMbZ2afmdl76a5L\nRTKzA81smpnNN7N5ZnZJuutUEcysgZm9bWZzovP8Y7rrVJHMrLaZvWNmT6e7LhXFzJaY2btmNtvM\nCtNdn4piZk3N7FEze9/MFpjZEemuU6qZ2aHR37H4Z4OZ/Tbd9aoIZnZp9H/Qe2Y2wcwaVOrxden+\nO2Z2NLARGO/undJdn4piZi2Blu4+y8waAzOB09x9fpqrllJmZkBDd99oZnWB14BL3P3NNFetQpjZ\nZUAO0MTdT0l3fSqCmS0Bctw91s9dm9kDwKvufp+Z1QMy3H1duutVUcysNrAC6OnuqRwTJe3MrBXh\n/54O7v61mU0CnnX3f1VWHdSiT+Du04Ev0l2Piubuq9x9VvT6S2AB0Cq9tUo9DzZGb+tGP7H8Zmtm\nrYGfAPeluy6yZ8xsb+Bo4J8A7r45ziEfOQb4MG4hn6AOsJeZ1QEygJWVeXAFfQ1nZllAd+Ct9Nak\nYkSXs2cDnwEvuHsszxO4A/g98G26K1LBHHjezGaaWV66K1NB2gKrgfujWzH3mVnDdFeqgg3l/7dz\nh65VRnEYx78PWHQGDSLKgm11LgxxJufEicwsaDKKYNR/wCRWk80xwW0mQQyuCILiQA3ahHlFnUnB\nJPIY3iMMs+974Nznky43PeHlPu85v3MurNQO0Qfbn4BbwBbwGfhu+8mQGVL0Y0zSXmANuGb7R+08\nfbD92/Y0MAnMSmpuJCPpHLBt+1XtLAM4YXsGWASulHFba3YBM8Ad20eBn8D1upH6U0YTS8CD2ln6\nIGk/cJ7uBe4wMCHp4s+obtsAAAFOSURBVJAZUvRjqsys14Bl2+u18/StbH1uAGdqZ+nBHLBU5tf3\ngZOS7tWN1I+yOsL2NvAQmK2bqBcjYLRj92mVrvhbtQhs2v5aO0hPTgEfbH+z/QtYB44PGSBFP4bK\nIbW7wDvbt2vn6YukA5L2lc+7gQXgfd1U/5/tG7YnbR+h2wJ9anvQFcMQJE2Uw6OUrezTQHM3ZGx/\nAT5KmipfzQNNHZT9xwUa3bYvtoBjkvaU3955unNRg0nR7yBpBXgOTEkaSbpcO1NP5oBLdCu/v1db\nztYO1YNDwIakN8BLuhl9s1fPxsBB4Jmk18AL4JHtx5Uz9eUqsFye3WngZuU8vSgvbAt0q9wmlZ2Z\nVWATeEvXu4P+S16u10VERDQsK/qIiIiGpegjIiIalqKPiIhoWIo+IiKiYSn6iIiIhqXoIyIiGpai\nj4iIaFiKPiIiomF/ABFx9NHo/BaYAAAAAElFTkSuQmCC\n",
937 | "text/plain": [
938 | ""
939 | ]
940 | },
941 | "metadata": {
942 | "tags": []
943 | },
944 | "output_type": "display_data"
945 | }
946 | ],
947 | "source": [
948 | "acc = history.history['crf_viterbi_accuracy']\n",
949 | "val_acc = history.history['val_crf_viterbi_accuracy']\n",
950 | "loss = history.history['loss']\n",
951 | "val_loss = history.history['val_loss']\n",
952 | "plt.figure(figsize = (8, 8))\n",
953 | "epochs = range(1, len(acc) + 1)\n",
954 | "plt.plot(epochs, acc, 'bo', label='Training acc')\n",
955 | "plt.plot(epochs, val_acc, 'b', label='Validation acc')\n",
956 | "plt.title('Training and validation accuracy')\n",
957 | "plt.legend()"
958 | ]
959 | },
960 | {
961 | "cell_type": "code",
962 | "execution_count": 121,
963 | "metadata": {
964 | "colab": {
965 | "base_uri": "https://localhost:8080/",
966 | "height": 499
967 | },
968 | "colab_type": "code",
969 | "executionInfo": {
970 | "elapsed": 1215,
971 | "status": "ok",
972 | "timestamp": 1560709916909,
973 | "user": {
974 | "displayName": "CHAVAN AKSHAY",
975 | "photoUrl": "",
976 | "userId": "10674464813829582221"
977 | },
978 | "user_tz": -330
979 | },
980 | "id": "sASoBK0_bYgo",
981 | "outputId": "bfe6b0a3-a22b-4a0a-db9c-793a71184516"
982 | },
983 | "outputs": [
984 | {
985 | "data": {
986 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAe0AAAHiCAYAAADF4pQuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3X28VWWd9/HPT0CeBQTMhBSKSXlQ\ngU74gISomVrqYOqgaNmDqHejZdnEqJVjtzPqOGV6O91So6YhxGiWPVozcqdOhQIqqPgsIoIIKAiC\nyYHr/mPtAwc8nHPgPGyufT7v12u/ztlrX2vt39qHF999XWuta0VKCUmStOvbrdwFSJKkxjG0JUnK\nhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjaUi0R0S4i1kbEvs3ZtpwiYlBENPu1nRFxTEQsrPX8mYgY\n05i2O/FeP4qIS3d2/Xq2+78j4rbm3q7UUtqXuwCpKSJiba2nXYC/AhtLz89LKU3dke2llDYC3Zq7\nbVuQUtq/ObYTEV8EzkopHVlr219sjm1LuTO0lbWU0ubQLPXkvphS+q/ttY+I9iml6taoTZKam8Pj\nqmil4c+fRsS0iFgDnBURh0XEXyJiVUQsjYgbIqJDqX37iEgRMaD0/Cel138bEWsi4s8RMXBH25Ze\nPz4ino2I1RFxY0T8T0Scs526G1PjeRHxfES8GRE31Fq3XUR8LyJWRsSLwHH1fD6XRcT0bZbdFBHf\nLf3+xYhYUNqfF0q94O1ta3FEHFn6vUtE3FGq7UngI9u0vTwiXixt98mIOKm0/EDg/wBjSoceVtT6\nbK+otf75pX1fGRE/j4j3N+azaUhEjC/Vsyoi7o+I/Wu9dmlELImItyLi6Vr7emhEzC0tXxYR/9rY\n95N2lKGttmA8cCfQA/gpUA18GegDjKYItfPqWf9M4JvAnsAi4Ds72jYi9gJmAF8vve9LwKh6ttOY\nGk+gCMMRFF9GjiktvwA4FjgY+Chwej3vMw34VER0LdXZHjiN4vMCWAZ8EtgDOBe4MSIOqmd7Na4E\nPgB8sFTnZ7d5/dnSfvUArgLujIj3pZTmA38PPJhS6pZS6rPthiPi2NL2TwX6AUuAbQ+DbO+z2a6I\nGAzcAVwI9AX+C7g3IjpExFCKz39kSmkP4HiKvy/AjcC/lpYPAu5q6L2knWVoqy14KKX0y5TSppTS\n+pTSIymlWSml6pTSi8AUYGw969+VUpqdUtpAEQ7Dd6Ltp4DHUkq/KL32PWDF9jbSyBr/JaW0OqW0\nEPh/td7rdOB7KaXFKaWVwNX1vM+LwBPAyaVFHwfeTCnNLr3+y5TSi6lwP/DfQJ0nm23jdOB/p5Te\nTCm9TNF7rv2+M1JKS0t/kzuBhUBVI7YLMBH4UUrpsZTSO8BkYGxE9K/VZnufTX0mAPemlO4v/Y2u\npvhScQjFl6hOwNDSIZaXSp8dwAbgbyKid0ppTUppViP3Q9phhrbagldqP4mIAyLi1xHxWkS8RdFr\ne0+PrpbXav2+jvpPPtte231q15GKO/Us3t5GGlljo94LeLmeeqHoVZ9R+v1MtvSyiYhPRcSsiHgj\nIlZR9ODr+6xqvL++GiLinIh4vDQMvQo4oJHbhWL/Nm8vpfQW8CZFr7vGjvzNtrfdTRR/o34ppWeA\nr1H8HV6P4nDL3qWmnwOGAM9ExMMRcUIj90PaYYa22oJtL3e6maJ3Oag0pPktIFq4hqXA5p5gRARb\nh8y2mlLjUoqh6RoNXZI2AzgmIvpR9LjvLNXYmWKo91+A96WUegK/b2Qdr22vhoj4IPADimH83qXt\nPl1ruw1dnrYE2K/W9roDvYBXG1HXjmx3N4q/2asAKaWfpJRGAwOBdhSfCymlZ1JKE4C9gH8D7o6I\nTk2sRaqToa22qDuwGni7dByzvuPZzeVXwMiIOLF03PjLFMdNW6LGGcBXIqJfRPQGvlFf45TSa8BD\nwG3AMyml50ovdQR2B5YDGyPiU8DRO1DDpRHRM4rr2P++1mvdKIJ5OcX3l3Mpeto1lgH9a068q8M0\n4AsRcVBEdKQIzwdTStsdudiBmk+KiCNL7/11YA0wKyIGR8S40vutLz02UezA2RHRp9QzX13at01N\nrEWqk6GttuhrFCdGraHo0f60pd8wpbQM+Dvgu8BK4EPAoxTXlTd3jT+gOPY8H3iExp0YdSdwDLWG\nxlNKq4CLgXuANyhO/PpVI2v4NkWPfyHwW+D2WtudR3Hy1sOlNvsDtY8D/wF4DlgWEbWHuWvW/x3F\nMPU9pfX3pTjO3SQppScpPvMfUHyhOA44qXR8uyNwLcV5CK9R9OwvK616ArAgiqsTrgP+LqX0blPr\nkeoSxaE1Sa0pItpRDMeemlJ6sNz1SMqDPW2plUTEcaXh4o4Ul4VtoOhtSlKjGNpS6zkCeJFi6PUT\nwPiU0vaGxyXpPRwelyQpE/a0JUnKhKEtSVImdrm7fPXp0ycNGDCg3GVIktRq5syZsyKlVN/cDcAu\nGNoDBgxg9uzZ5S5DkqRWExENTTcMODwuSVI2DG1JkjJhaEuSlIld7pi2JKnxNmzYwOLFi3nnnXfK\nXYoaoVOnTvTv358OHbZ3P5z6GdqSlLHFixfTvXt3BgwYQHHHV+2qUkqsXLmSxYsXM3DgwJ3ahsPj\nkpSxd955h969exvYGYgIevfu3aRREUNbkjJnYOejqX8rQ1uStNNWrlzJ8OHDGT58OHvvvTf9+vXb\n/Pzddxt3W/HPfe5zPPPMM/W2uemmm5g6dWpzlMwRRxzBY4891izbam0e05akNmTqVLjsMli0CPbd\nF666CiZO3Pnt9e7de3MAXnHFFXTr1o1LLrlkqzYpJVJK7LZb3f3EW2+9tcH3+dKXvrTzRVYQe9qS\n1EZMnQqTJsHLL0NKxc9Jk4rlze35559nyJAhTJw4kaFDh7J06VImTZpEVVUVQ4cO5corr9zctqbn\nW11dTc+ePZk8eTIHH3wwhx12GK+//joAl19+Oddff/3m9pMnT2bUqFHsv//+/OlPfwLg7bff5tOf\n/jRDhgzh1FNPpaqqqsEe9U9+8hMOPPBAhg0bxqWXXgpAdXU1Z5999ublN9xwAwDf+973GDJkCAcd\ndBBnnXVWs39mjWFPW5LaiMsug3Xrtl62bl2xvCm97e15+umnuf3226mqqgLg6quvZs8996S6uppx\n48Zx6qmnMmTIkK3WWb16NWPHjuXqq6/mq1/9KrfccguTJ09+z7ZTSjz88MPce++9XHnllfzud7/j\nxhtvZO+99+buu+/m8ccfZ+TIkfXWt3jxYi6//HJmz55Njx49OOaYY/jVr35F3759WbFiBfPnzwdg\n1apVAFx77bW8/PLL7L777puXtTZ72pLURixatGPLm+pDH/rQ5sAGmDZtGiNHjmTkyJEsWLCAp556\n6j3rdO7cmeOPPx6Aj3zkIyxcuLDObZ9yyinvafPQQw8xYcIEAA4++GCGDh1ab32zZs3iqKOOok+f\nPnTo0IEzzzyTBx54gEGDBvHMM89w0UUXcd9999GjRw8Ahg4dyllnncXUqVN3+jrrpjK0JamN2Hff\nHVveVF27dt38+3PPPcf3v/997r//fubNm8dxxx1X56VPu+++++bf27VrR3V1dZ3b7tixY4Ntdlbv\n3r2ZN28eY8aM4aabbuK8884D4L777uP888/nkUceYdSoUWzcuLFZ37cxDG1JaiOuugq6dNl6WZcu\nxfKW9tZbb9G9e3f22GMPli5dyn333dfs7zF69GhmzJgBwPz58+vsydd2yCGHMHPmTFauXEl1dTXT\np09n7NixLF++nJQSp512GldeeSVz585l48aNLF68mKOOOoprr72WFStWsG7bYw2twGPaktRG1By3\nbs6zxxtr5MiRDBkyhAMOOID99tuP0aNHN/t7XHjhhXzmM59hyJAhmx81Q9t16d+/P9/5znc48sgj\nSSlx4okn8slPfpK5c+fyhS98gZQSEcE111xDdXU1Z555JmvWrGHTpk1ccskldO/evdn3oSGRUmr1\nN61PVVVVaq77aTf3pQ2StKtZsGABgwcPLncZu4Tq6mqqq6vp1KkTzz33HMceeyzPPfcc7dvvWv3T\nuv5mETEnpVS1nVU227X2pBnVXNpQM3pRc2kDGNySVInWrl3L0UcfTXV1NSklbr755l0usJuqsvam\nlta+tEGSVF49e/Zkzpw55S6jRVXsiWitfWmDJEktrWJDu7UvbZAkqaVVbGiX89IGSZJaQsWG9sSJ\nMGUK7LcfRBQ/p0zxeLYkKV8VG9pQBPTChbBpU/HTwJak5jVu3Lj3TJRy/fXXc8EFF9S7Xrdu3QBY\nsmQJp556ap1tjjzySBq6BPj666/fapKTE044oVnmBb/iiiu47rrrmryd5lbRoS1JallnnHEG06dP\n32rZ9OnTOeOMMxq1/j777MNdd9210++/bWj/5je/oWfPnju9vV2doS1J2mmnnnoqv/71r3n33XcB\nWLhwIUuWLGHMmDGbr5seOXIkBx54IL/4xS/es/7ChQsZNmwYAOvXr2fChAkMHjyY8ePHs379+s3t\nLrjggs239fz2t78NwA033MCSJUsYN24c48aNA2DAgAGsWLECgO9+97sMGzaMYcOGbb6t58KFCxk8\neDDnnnsuQ4cO5dhjj93qfery2GOPceihh3LQQQcxfvx43nzzzc3vX3Orzpoblfzxj39k+PDhDB8+\nnBEjRrBmzZqd/mzrUrHXaUtSW/OVr0ADt4/eYcOHQynv6rTnnnsyatQofvvb33LyySczffp0Tj/9\ndCKCTp06cc8997DHHnuwYsUKDj30UE466SQios5t/eAHP6BLly4sWLCAefPmbXVrzauuuoo999yT\njRs3cvTRRzNv3jwuuugivvvd7zJz5kz69Omz1bbmzJnDrbfeyqxZs0gpccghhzB27Fh69erFc889\nx7Rp0/jhD3/I6aefzt13313v/bE/85nPcOONNzJ27Fi+9a1v8U//9E9cf/31XH311bz00kt07Nhx\n85D8ddddx0033cTo0aNZu3YtnTp12oFPu2H2tCVJTVJ7iLz20HhKiUsvvZSDDjqIY445hldffZVl\ny5ZtdzsPPPDA5vA86KCDOOiggza/NmPGDEaOHMmIESN48sknG7wZyEMPPcT48ePp2rUr3bp145RT\nTuHBBx8EYODAgQwfPhyo//afUNzfe9WqVYwdOxaAz372szzwwAOba5w4cSI/+clPNs+8Nnr0aL76\n1a9yww03sGrVqmafkc2etiRViPp6xC3p5JNP5uKLL2bu3LmsW7eOj3zkIwBMnTqV5cuXM2fOHDp0\n6MCAAQPqvB1nQ1566SWuu+46HnnkEXr16sU555yzU9upUXNbTyhu7dnQ8Pj2/PrXv+aBBx7gl7/8\nJVdddRXz589n8uTJfPKTn+Q3v/kNo0eP5r777uOAAw7Y6Vq3ZU9bktQk3bp1Y9y4cXz+85/f6gS0\n1atXs9dee9GhQwdmzpzJyy+/XO92Pvaxj3HnnXcC8MQTTzBv3jyguK1n165d6dGjB8uWLeO3v/3t\n5nW6d+9e53HjMWPG8POf/5x169bx9ttvc8899zBmzJgd3rcePXrQq1evzb30O+64g7Fjx7Jp0yZe\neeUVxo0bxzXXXMPq1atZu3YtL7zwAgceeCDf+MY3+OhHP8rTTz+9w+9ZH3vakqQmO+OMMxg/fvxW\nZ5JPnDiRE088kQMPPJCqqqoGe5wXXHABn/vc5xg8eDCDBw/e3GM/+OCDGTFiBAcccAAf+MAHtrqt\n56RJkzjuuOPYZ599mDlz5ublI0eO5JxzzmHUqFEAfPGLX2TEiBH1DoVvz49//GPOP/981q1bxwc/\n+EFuvfVWNm7cyFlnncXq1atJKXHRRRfRs2dPvvnNbzJz5kx22203hg4dyvHHH7/D71efir41pyRV\nOm/NmZ+m3JrT4XFJkjJhaEuSlAlDuwJMnQoDBsBuuxU/p04td0WSpJbgiWiZmzoVJk2Cmln8Xn65\neA7OtS61FSml7U5Yol1LU88js6educsu2xLYNdatK5ZLqnydOnVi5cqVTQ4DtbyUEitXrmzSLGn2\ntDO3aNGOLZdUWfr378/ixYtZvnx5uUtRI3Tq1In+/fvv9PqGdub23bcYEq9ruaTK16FDBwYOHFju\nMtRKHB7P3FVXQZcuWy/r0qVYLkmqLIZ25iZOhClTYL/9IKL4OWWKJ6FJUiVyeLwCTJxoSEtSW2BP\nW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxtZcO7mUlq67xOW1nwbmaSZE9bmfBuZpJk\naCsT3s1MkgxtZWJ7dy3zbmaS2hJDW1nwbmaSZGgrE97NTJI8e1wZ8W5mkto6e9qSJGXC0JYkKROG\ntiRJmWhUaEfExRHxZEQ8ERHTIqLTNq/vGxEzI+LRiJgXESeUln88IuZExPzSz6NaYickSWoLGgzt\niOgHXARUpZSGAe2ACds0uxyYkVIaUXrt30vLVwAnppQOBD4L3NFchUuVyjnWJW1PY88ebw90jogN\nQBdgyTavJ2CP0u89al5PKT1aq82TpW10TCn9dedLliqXc6xLqk+DPe2U0qvAdcAiYCmwOqX0+22a\nXQGcFRGLgd8AF9axqU8Dcw1safucY11SfRozPN4LOBkYCOwDdI2Is7ZpdgZwW0qpP3ACcEdE7FZr\nG0OBa4DztvMekyJidkTMXr58+c7tiVQBnGNdUn0acyLaMcBLKaXlKaUNwM+Aw7dp8wVgBkBK6c9A\nJ6APQET0B+4BPpNSeqGuN0gpTUkpVaWUqvr27btzeyJVAOdYl1SfxoT2IuDQiOgSEQEcDSyoo83R\nABExmCK0l0dET+DXwOSU0v80X9lSZXKOdUn1acwx7VnAXcBcYH5pnSkRcWVEnFRq9jXg3Ih4HJgG\nnJNSSsDfA4OAb0XEY6XHXi2xI1IlcI51SfWJIlt3HVVVVWn27NnlLkOSpFYTEXNSSlUNtXNGNEmS\nMmFoS5KUCUNbkqRMGNqSJGXC0JZUFs6xLu24xs49LknNxjnWpZ1jT1tSq3OOdWnnGNqSWp1zrEs7\nx9CW1OqcY13aOYa2pFbnHOvSzjG0JbU651iXdo5nj0sqi4kTDWlpR9nTliQpE4a2JEmZMLQlScqE\noS1JUiYMbUlqQc6xrubk2eOS1EKcY13NzZ62JLUQ51hXczO0JamFOMe6mpuhLUktxDnW1dwMbUlq\nIc6xruZmaEtSC3GOdTU3zx6XpBbkHOtqTva0JUnKhKEtSVImDG1JUpM581vr8Ji2JKlJnPmt9djT\nliQ1iTO/tR5DW5LUJM781noMbUlSkzjzW+sxtCVJTeLMb63H0JYkNYkzv7Uezx6XJDWZM7+1Dnva\nkiRlwtCWJCkThrYkSY1U7pnfPKYtSVIj7Aozv9nTliSpEXaFmd8MbUmSGmFXmPnN0JYkqRF2hZnf\nDG1JkhphV5j5zdCWJKkRdoWZ3zx7XJKkRir3zG/2tCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uSpEwY\n2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROGtiRJ\nmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxt\nSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRM\nGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImGhXaEXFxRDwZEU9ExLSI6LTN6/tGxMyIeDQi5kXE\nCbVe+8eIeD4inomITzT3DkiS1FY0GNoR0Q+4CKhKKQ0D2gETtml2OTAjpTSi9Nq/l9YdUno+FDgO\n+PeIaNd85UuS1HY0dni8PdA5ItoDXYAl27yegD1Kv/eo9frJwPSU0l9TSi8BzwOjmlayJEltU4Oh\nnVJ6FbgOWAQsBVanlH6/TbMrgLMiYjHwG+DC0vJ+wCu12i0uLdtKREyKiNkRMXv58uU7vBOSJLUF\njRke70XRYx4I7AN0jYiztml2BnBbSqk/cAJwR0Q0+iS3lNKUlFJVSqmqb9++ja9ekqQ2pDHBegzw\nUkppeUppA/Az4PBt2nwBmAGQUvoz0AnoA7wKfKBWu/6lZZIkaQc1JrQXAYdGRJeICOBoYEEdbY4G\niIjBFKG9HLgXmBARHSNiIPA3wMPNVbwkSW1J+4YapJRmRcRdwFygGngUmBIRVwKzU0r3Al8DfhgR\nF1OclHZOSikBT0bEDOCp0rpfSiltbKF9kSSpokWRrbuOqqqqNHv27HKXIUlSq4mIOSmlqobaOSOa\nJEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVIm\nDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uS\npEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROG\ntiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlS\nJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNb\nkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkT\nhrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1J\nUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlIlG\nhXZEXBwRT0bEExExLSI6bfP69yLisdLj2YhYVeu1a0vrLoiIGyIimnsnJElqCxoM7YjoB1wEVKWU\nhgHtgAm126SULk4pDU8pDQduBH5WWvdwYDRwEDAM+Cgwtln3QJKkNqKxw+Ptgc4R0R7oAiypp+0Z\nwLTS7wnoBOwOdAQ6AMt2rlRJktq2BkM7pfQqcB2wCFgKrE4p/b6uthGxHzAQuL+07p+BmaX1lgL3\npZQWNE/pkiS1LY0ZHu8FnEwRxvsAXSPirO00nwDclVLaWFp3EDAY6A/0A46KiDF1vMekiJgdEbOX\nL1++c3siSVKFa8zw+DHASyml5SmlDRTHqw/fTtsJbBkaBxgP/CWltDaltBb4LXDYtiullKaklKpS\nSlV9+/bdsT2QJKmNaExoLwIOjYgupTO/jwbeM8QdEQcAvYA/b7Pu2IhoHxEdKE5Cc3hckqSd0Jhj\n2rOAu4C5wPzSOlMi4sqIOKlW0wnA9JRSqrXsLuCF0nqPA4+nlH7ZXMVLktSWxNYZW35VVVVp9uzZ\n5S5DkqRWExFzUkpVDbVzRjRJkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlS\nJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNb\nkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkT\nhrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1J\nUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlD\nW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQp\nE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEt\nSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJ\nQ1uSpEwY2pIkZcLQliQpE40K7Yi4OCKejIgnImJaRHTa5vXvRcRjpcezEbGq1mv7RsTvI2JBRDwV\nEQOadxckSWob2jfUICL6ARcBQ1JK6yNiBjABuK2mTUrp4lrtLwRG1NrE7cBVKaU/REQ3YFMz1S5J\nUpvS2OHx9kDniGgPdAGW1NP2DGAaQEQMAdqnlP4AkFJam1Ja14R6JUlqsxoM7ZTSq8B1wCJgKbA6\npfT7utpGxH7AQOD+0qIPA6si4mcR8WhE/GtEtGue0iVJalsaDO2I6AWcTBHG+wBdI+Ks7TSfANyV\nUtpYet4eGANcAnwU+CBwTh3vMSkiZkfE7OXLl+/wTkiS1BY0Znj8GOCllNLylNIG4GfA4dtpO4HS\n0HjJYuCxlNKLKaVq4OfAyG1XSilNSSlVpZSq+vbtu2N7IElSG9GY0F4EHBoRXSIigKOBBds2iogD\ngF7An2stfgToGRE1SXwU8FTTSpYkqW1qzDHtWcBdwFxgfmmdKRFxZUScVKvpBGB6SinVWncjxdD4\nf0fEfCCAHzZj/ZIktRlRK2N3CVVVVWn27NnlLkOSpFYTEXNSSlUNtXNGNEmSMmFoS5KUCUNbkqRM\nGNqSJGXC0JYkKROGtiRJmTC0JUnKhKEtSVImDG1JkjJhaEuSlAlDW5KkTBjakiRlwtCWJCkT7ctd\ngCRJzS0l2LixeGzatOX37S3b0ecHHgh9+rT+fhnakqQmSQn++EeYMQPWrNmx8GuOAK3reUvfdfrn\nP4eTT27Z96iLoS1J2ikrV8KPfww33wzPPgvdukHfvtCu3ZbHbrvV/7xDh/pfb8w2GnreEusMG1ae\nz9zQliQ1Wkrw4INFUN91F7z7Lhx2GNx2G5x2GnTpUu4KK5uhLUlq0BtvwO23w5QpsGAB7LEHnHsu\nnHdecXxXrcPQliTVKSX405+KXvV//ie88w4ccgj8x3/A3/0ddO1a7grbHkNbkrSVVavgjjuKXvUT\nT0D37nDOOUWvevjwclfXthn7Nk7xAAAN4ElEQVTakiRSglmzil71T38K69dDVRX88IcwYUJxkpnK\nz9CWpDZs9WqYOrUI63nzinA+++yiVz1yZLmr07YMbUlqY1KC2bOLoJ42DdatgxEj4P/+XzjzzGI4\nXLsmQ1tS2aQEEeWuou1YswbuvLMI60cfLS7POuOMolddVeXfIgfOPS6p1a1ZA9/8ZnHZ0Ic/DF/4\nAtx6Kzz/fMvPZNUWzZkDkybB+98P559fzBp2002wZAn86Efw0Y8a2Lmwpy2p1WzcCLfcUgT2smUw\nfjxUV8M99xTLAfbeG444AsaMKX4efHAxA5V2zNq1xdD3lCnFUHjnzsVlWuedV1y2ZUjnydCW1Cru\nuw8uuaS4hGj0aPjFL4rwgGKu6AUL4KGHitm2HnywmG0LiuOrhx++JchHjSoCSHV77LFi+Hvq1GJE\nY+hQuOGG4uSynj3LXZ2aKtIuNhZVVVWVZs+eXe4yJDWT+fPh618vQvtDH4JrroFTTmm4p7doURHi\nNUH+xBPF8g4diuOvNT3x0aNhzz1bfj92ZW+/XVymdfPN8PDD0KkTnH56MSR++OH2qnMQEXNSSlUN\ntjO0JbWE114rhsFvuQV69Ch+/1//Czp23LntvfFGMTvXgw8WQf7II7BhQ/Ha0KFFiNcE+b77Nt9+\n7Mrmzy+C+o474K23YPDgYvj77LP9IpMbQ1tSWaxbB//2b0WP+t134UtfKgK7uUNk/foiuGtC/H/+\npxgOhiK0a4bTx4wpwmy3Cjntdv364haYN98Mf/5z8SXo1FOLsD7iCHvVuTK0JbWqTZuKHt9ll8Gr\nr8KnPw1XXw2DBrXO+2/cWEwOUvu4+GuvFa/tuWcxjF4T5B/5COy+e+vU1VyeeqoI6ttvL6YZ/fCH\ni6D+7Gehd+9yV6emMrQltZr774evfa04CWrUqKKnfcQR5a0pJXjxxS098QcfLO75DMUx30MO2TKc\nfthhxeVnu5p33ilOyLv55mIfOnQovgyddx6MHWuvupIY2pJa3NNPFyeZ/epXsN9+8C//UlxWtKsO\nRb/++tYntz36aNFD32234kYYtS8123vv8tX59NPFpVo//nFxLH/QoOKksnPOgb59y1eXWo6hLanF\nLF8OV1xR9AC7doVLL4Uvf7noweZk7Vr4y1+29Mb/8pfimDwUQVn7uPigQS3bs/3rX+Huu4vP9IEH\noH374jr2886DceN23S9Cah6GtqRm98478P3vw1VXFeF2/vnw7W9XTu9vw4ai911zTPyhh2DlyuK1\n972vCPGaID/44CJYm+rZZ4te9W23Fe/1wQ/CuefC5z5XvKfaBkNbUrPZtAmmT4d//Mfi+ukTT4Rr\nr4UDDih3ZS0rpWKouvbJbQsXFq9161YcC68ZTj/kkGIu78Z4991iFribb4aZM4vwP/nkold99NH2\nqtsiQ1tSs3joIfjqV4vLq0aMKE4yGzeu3FWVz+LFWx8Xnz+/CPcOHYqz0mt64qNHv/es7hdeKHrV\nt95aHGIYMGBLr/r97y/L7mgXYWhLapLnn4dvfAN+9jPo168YEj/7bHuB21q1autJXx5+uOhJAwwZ\nUgT4sGHFtK3/9V/FPOonnlj0qj/+cedVV8HQlrRT3ngDvvOd4i5Qu+8OkycXPe3GDv22de+8U9yg\no/akL6tXwwc+UPSqP//54kuQVFtjQ9sbhkgCirOXb7qpCOy33ipul3nlleW99ClHnTptOWENikvK\nXn65uCTOXrWaytCW2riUikuNvvGNYjKST3wCrruuGNJV07VrV5wRLjUHj05JbdisWcUx19NOK4a/\nf/e74mFgS7smQ1tqgxYuhAkT4NBDizOaf/jDYgrST3yi3JVJqo/D41IbsmoV/PM/FxOktGtX3H3r\nH/6huOZY0q7P0JbagA0biok8rriiODv8s58tTjjr37/clUnaEQ6PSxUsJbj3XjjwQLjwwmLqzTlz\nisk9DGwpP4a2VKHmzIGjjiqmx4yAX/6ymNxjxIhyVyZpZxnaUoV55RX4zGegqgqeeKK49nrePPjU\np7z/spQ7j2lLFWLNGrjmmmJu8JSKmcwmT4YePcpdmaTmYmhLmauuhltugW99C5YtgzPPLM4Q32+/\nclcmqbkZ2lKmUiomQvn61+HJJ4tpM++9F0aNKndlklqKoa0WlVJxx6O334a1a4ufNY/6nm/vtfXr\nYY894H3vq/ux117Fz969K3ue53nz4JJL4A9/gEGDimlIx4/3mLVU6QxtAcUQ644Ga2ODduPGxtcR\nAV27FpN9dO265dG9e3Hjis6dizsmLVtW3Md42bLiGuRt7bYb9O1bf7DXPPr2Le6FnIOlS4sJUW69\ntThWff31cMEFxd24JFU+QztzGzbASy8V9z5+882dD9qa+/82VufOWwK1dsD26rX1823Dt6HXOnXa\nsd5iSsUsX8uWweuvFz/rejz3XPFz/fq6t9O7d8PhXvPo2HHHPqvm8PbbxQlm115b/K2+8hW4/PLi\n85bUdhjaGUgJXn0Vnn32vY8XX9x+T7ZDh7qDca+9di5Qa37v0mXXGXqOKIKrVy844ID626ZUfEmp\nL9yXLSvuhbxsWXE2dl169Gg42GseXbs2bf82boTbby8CeskSOPVUuPpq+NCHmrZdSXkytHchb7xR\ndzA/9xysW7elXefO8Dd/U8xuddpp8OEPF8c1+/TZOmBzGfJtLRHFMHv37o0LvfXrG+7BP/kk3H9/\nMcpRl65dGw72mscee2w9yvDf/w1f+xo8/jgccgjMmAGjRzfPZyEpT4Z2K1u3rhjKriucV67c0q5d\nOxg4sAjkceOKnzWPfv2KY7ZqWZ07w4ABxaMh775bhHt9Af/CC/CnP8GKFUWvf1sdO24J+Hbtittm\nDhgA06fD6ad7kpkkQ7tFVFcXtz6sK5hfeWXrtv36FUH86U9vHcwDB3pyUU52372Yy7sx83lXVxdf\n0Oobon/zzeL49YUXFsf5JQkM7Z2WUnEmb13B/MILxX/MNXr2hP33hyOP3DqYBw3ylohtUfv2W4bE\nJWlHGNoNePPN4phyXeH89ttb2nXqVBxnHjYMTjll63Du3duhTUlS0xnaFCcc1T7OXDukly/f0q5d\nu+IY44c/DB/72NbB3L+/x5klSS2rzYR2dTW8/PL2jzPXPjFon32KIB4/3uPMkqRdR0WH9ttvFzdP\nqDnOXHvmrB49iuPM2/aYBw0qLgmSJGlXU9Gh3aULvPYaDBkCf/u3W4dznz4eZ5Yk5aWiQzuiuNZV\nkqRK4KlTkiRlwtCWJCkThrYkSZkwtCVJyoShLUlSJgxtSZIyYWhLkpQJQ1uSpEwY2pIkZcLQliQp\nE4a2JEmZMLQlScqEoS1JUiYMbUmSMmFoS5KUCUNbkqRMGNqSJGXC0JYkKRORUip3DVuJiOXAy828\n2T7Aimbe5q7I/aws7mdlcT8rS3Pv534ppb4NNdrlQrslRMTslFJVuetoae5nZXE/K4v7WVnKtZ8O\nj0uSlAlDW5KkTLSV0J5S7gJaiftZWdzPyuJ+Vpay7GebOKYtSVIlaCs9bUmSslfRoR0Rt0TE6xHx\nRLlraSkR8YGImBkRT0XEkxHx5XLX1BIiolNEPBwRj5f285/KXVNLioh2EfFoRPyq3LW0lIhYGBHz\nI+KxiJhd7npaSkT0jIi7IuLpiFgQEYeVu6bmFhH7l/6ONY+3IuIr5a6rJUTExaX/g56IiGkR0alV\n37+Sh8cj4mPAWuD2lNKwctfTEiLi/cD7U0pzI6I7MAf425TSU2UurVlFRABdU0prI6ID8BDw5ZTS\nX8pcWouIiK8CVcAeKaVPlbuelhARC4GqlFJFX9MbET8GHkwp/Sgidge6pJRWlbuulhIR7YBXgUNS\nSs0950ZZRUQ/iv97hqSU1kfEDOA3KaXbWquGiu5pp5QeAN4odx0tKaW0NKU0t/T7GmAB0K+8VTW/\nVFhbetqh9KjIb5wR0R/4JPCjcteipomIHsDHgP8ASCm9W8mBXXI08EKlBXYt7YHOEdEe6AIsac03\nr+jQbmsiYgAwAphV3kpaRmnI+DHgdeAPKaWK3E/geuAfgE3lLqSFJeD3ETEnIiaVu5gWMhBYDtxa\nOtzxo4joWu6iWtgEYFq5i2gJKaVXgeuARcBSYHVK6fetWYOhXSEiohtwN/CVlNJb5a6nJaSUNqaU\nhgP9gVERUXGHPCLiU8DrKaU55a6lFRyRUhoJHA98qXQ4q9K0B0YCP0gpjQDeBiaXt6SWUxr+Pwn4\nz3LX0hIiohdwMsWXsX2ArhFxVmvWYGhXgNIx3ruBqSmln5W7npZWGl6cCRxX7lpawGjgpNLx3unA\nURHxk/KW1DJKvRZSSq8D9wCjyltRi1gMLK41KnQXRYhXquOBuSmlZeUupIUcA7yUUlqeUtoA/Aw4\nvDULMLQzVzpB6z+ABSml75a7npYSEX0jomfp987Ax4Gny1tV80sp/WNKqX9KaQDFMOP9KaVW/Sbf\nGiKia+nESUrDxccCFXeVR0rpNeCViNi/tOhooKJOEt3GGVTo0HjJIuDQiOhS+r/3aIrziFpNRYd2\nREwD/gzsHxGLI+IL5a6pBYwGzqbokdVcbnFCuYtqAe8HZkbEPOARimPaFXs5VBvwPuChiHgceBj4\ndUrpd2WuqaVcCEwt/dsdDvxzmetpEaUvXx+n6H1WpNKIyV3AXGA+RYa26sxoFX3JlyRJlaSie9qS\nJFUSQ1uSpEwY2pIkZcLQliQpE4a2JEmZMLQlScqEoS1JUiYMbUmSMvH/AZq1GDIPjALkAAAAAElF\nTkSuQmCC\n",
987 | "text/plain": [
988 | ""
989 | ]
990 | },
991 | "metadata": {
992 | "tags": []
993 | },
994 | "output_type": "display_data"
995 | }
996 | ],
997 | "source": [
998 | "plt.figure(figsize = (8, 8))\n",
999 | "plt.plot(epochs, loss, 'bo', label='Training loss')\n",
1000 | "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n",
1001 | "plt.title('Training and validation loss')\n",
1002 | "plt.legend()\n",
1003 | "plt.show()"
1004 | ]
1005 | },
1006 | {
1007 | "cell_type": "markdown",
1008 | "metadata": {},
1009 | "source": [
1010 | "#### Evaluating the model on test set"
1011 | ]
1012 | },
1013 | {
1014 | "cell_type": "code",
1015 | "execution_count": 59,
1016 | "metadata": {
1017 | "colab": {},
1018 | "colab_type": "code",
1019 | "id": "tNtfnN3kdiDr"
1020 | },
1021 | "outputs": [],
1022 | "source": [
1023 | "# Evaluation\n",
1024 | "y_pred = model.predict(X_test)\n",
1025 | "y_pred = np.argmax(y_pred, axis=-1)\n",
1026 | "y_test_true = np.argmax(y_test, -1)"
1027 | ]
1028 | },
1029 | {
1030 | "cell_type": "code",
1031 | "execution_count": 60,
1032 | "metadata": {
1033 | "colab": {},
1034 | "colab_type": "code",
1035 | "id": "hlAMoN7Lfx4k"
1036 | },
1037 | "outputs": [],
1038 | "source": [
1039 | "# Convert the index to tag\n",
1040 | "y_pred = [[idx2tag[i] for i in row] for row in y_pred]\n",
1041 | "y_test_true = [[idx2tag[i] for i in row] for row in y_test_true] "
1042 | ]
1043 | },
1044 | {
1045 | "cell_type": "code",
1046 | "execution_count": 61,
1047 | "metadata": {
1048 | "colab": {
1049 | "base_uri": "https://localhost:8080/",
1050 | "height": 34
1051 | },
1052 | "colab_type": "code",
1053 | "executionInfo": {
1054 | "elapsed": 2597,
1055 | "status": "ok",
1056 | "timestamp": 1560709997195,
1057 | "user": {
1058 | "displayName": "CHAVAN AKSHAY",
1059 | "photoUrl": "",
1060 | "userId": "10674464813829582221"
1061 | },
1062 | "user_tz": -330
1063 | },
1064 | "id": "IXWG7vqDf7X4",
1065 | "outputId": "8fe95519-f489-4a35-aabb-322dda91d765"
1066 | },
1067 | "outputs": [
1068 | {
1069 | "name": "stdout",
1070 | "output_type": "stream",
1071 | "text": [
1072 | "F1-score is : 90.4%\n"
1073 | ]
1074 | }
1075 | ],
1076 | "source": [
1077 | "print(\"F1-score is : {:.1%}\".format(f1_score(y_test_true, y_pred)))"
1078 | ]
1079 | },
1080 | {
1081 | "cell_type": "code",
1082 | "execution_count": 62,
1083 | "metadata": {
1084 | "colab": {
1085 | "base_uri": "https://localhost:8080/",
1086 | "height": 442
1087 | },
1088 | "colab_type": "code",
1089 | "executionInfo": {
1090 | "elapsed": 5255,
1091 | "status": "ok",
1092 | "timestamp": 1560710007669,
1093 | "user": {
1094 | "displayName": "CHAVAN AKSHAY",
1095 | "photoUrl": "",
1096 | "userId": "10674464813829582221"
1097 | },
1098 | "user_tz": -330
1099 | },
1100 | "id": "8E2X4JzEgJjK",
1101 | "outputId": "e963aa51-efe0-4523-952a-d2978ca48290"
1102 | },
1103 | "outputs": [
1104 | {
1105 | "name": "stdout",
1106 | "output_type": "stream",
1107 | "text": [
1108 | " precision recall f1-score support\n",
1109 | "\n",
1110 | " B-art 0.00 0.00 0.00 47\n",
1111 | " B-eve 0.56 0.19 0.29 47\n",
1112 | " B-geo 0.86 0.93 0.89 5632\n",
1113 | " B-gpe 0.97 0.94 0.96 2418\n",
1114 | " B-nat 0.00 0.00 0.00 30\n",
1115 | " B-org 0.84 0.75 0.79 3001\n",
1116 | " B-per 0.90 0.85 0.87 2562\n",
1117 | " B-tim 0.93 0.90 0.91 3031\n",
1118 | " I-art 0.00 0.00 0.00 27\n",
1119 | " I-eve 0.00 0.00 0.00 40\n",
1120 | " I-geo 0.80 0.86 0.83 1086\n",
1121 | " I-gpe 1.00 0.52 0.68 25\n",
1122 | " I-nat 0.00 0.00 0.00 6\n",
1123 | " I-org 0.80 0.85 0.82 2436\n",
1124 | " I-per 0.90 0.90 0.90 2626\n",
1125 | " I-tim 0.86 0.74 0.80 941\n",
1126 | " O 0.99 0.99 0.99 132279\n",
1127 | " PAD 1.00 1.00 1.00 383316\n",
1128 | "\n",
1129 | "avg / total 0.99 0.99 0.99 539550\n",
1130 | "\n"
1131 | ]
1132 | }
1133 | ],
1134 | "source": [
1135 | "report = flat_classification_report(y_pred=y_pred, y_true=y_test_true)\n",
1136 | "print(report)"
1137 | ]
1138 | },
1139 | {
1140 | "cell_type": "code",
1141 | "execution_count": 147,
1142 | "metadata": {
1143 | "colab": {
1144 | "base_uri": "https://localhost:8080/",
1145 | "height": 595
1146 | },
1147 | "colab_type": "code",
1148 | "executionInfo": {
1149 | "elapsed": 914,
1150 | "status": "ok",
1151 | "timestamp": 1560710040586,
1152 | "user": {
1153 | "displayName": "CHAVAN AKSHAY",
1154 | "photoUrl": "",
1155 | "userId": "10674464813829582221"
1156 | },
1157 | "user_tz": -330
1158 | },
1159 | "id": "hdmpuybYxPJ3",
1160 | "outputId": "57b72e90-9032-4573-f7b5-928a517c6ee0"
1161 | },
1162 | "outputs": [
1163 | {
1164 | "name": "stdout",
1165 | "output_type": "stream",
1166 | "text": [
1167 | "Sample number 3435 of 7194 (Test Set)\n",
1168 | "Word ||True ||Pred\n",
1169 | "==============================\n",
1170 | "It : O O\n",
1171 | "is : O O\n",
1172 | "the : O O\n",
1173 | "second : O O\n",
1174 | "major : O O\n",
1175 | "quarterly : O O\n",
1176 | "loss : O O\n",
1177 | "for : O O\n",
1178 | "Citigroup : B-org B-org\n",
1179 | ", : O O\n",
1180 | "and : O O\n",
1181 | "it : O O\n",
1182 | "is : O O\n",
1183 | "the : O O\n",
1184 | "latest : O O\n",
1185 | "in : O O\n",
1186 | "a : O O\n",
1187 | "wave : O O\n",
1188 | "of : O O\n",
1189 | "dismal : O O\n",
1190 | "bank : O O\n",
1191 | "earning : O O\n",
1192 | "reports : O O\n",
1193 | "over : O O\n",
1194 | "the : O O\n",
1195 | "past : B-tim B-tim\n",
1196 | "week : O O\n",
1197 | ". : O O\n"
1198 | ]
1199 | }
1200 | ],
1201 | "source": [
1202 | "# At every execution model picks some random test sample from test set.\n",
1203 | "i = np.random.randint(0,X_test.shape[0]) # choose a random number between 0 and len(X_te)b\n",
1204 | "p = model.predict(np.array([X_test[i]]))\n",
1205 | "p = np.argmax(p, axis=-1)\n",
1206 | "true = np.argmax(y_test[i], -1)\n",
1207 | "\n",
1208 | "print(\"Sample number {} of {} (Test Set)\".format(i, X_test.shape[0]))\n",
1209 | "# Visualization\n",
1210 | "print(\"{:15}||{:5}||{}\".format(\"Word\", \"True\", \"Pred\"))\n",
1211 | "print(30 * \"=\")\n",
1212 | "for w, t, pred in zip(X_test[i], true, p[0]):\n",
1213 | " if w != 0:\n",
1214 | " print(\"{:15}: {:5} {}\".format(words[w-2], idx2tag[t], idx2tag[pred]))"
1215 | ]
1216 | },
1217 | {
1218 | "cell_type": "markdown",
1219 | "metadata": {},
1220 | "source": [
1221 | "The results looks quite interesting."
1222 | ]
1223 | },
1224 | {
1225 | "cell_type": "markdown",
1226 | "metadata": {},
1227 | "source": [
1228 | "#### Save the result"
1229 | ]
1230 | },
1231 | {
1232 | "cell_type": "code",
1233 | "execution_count": 119,
1234 | "metadata": {},
1235 | "outputs": [],
1236 | "source": [
1237 | "with open('word_to_index.pickle', 'wb') as f:\n",
1238 | " pickle.dump(word_to_index, f)\n",
1239 | "\n",
1240 | "with open('tag_to_index.pickle', 'wb') as f:\n",
1241 | " pickle.dump(tag_to_index, f)"
1242 | ]
1243 | },
1244 | {
1245 | "cell_type": "code",
1246 | "execution_count": null,
1247 | "metadata": {},
1248 | "outputs": [],
1249 | "source": []
1250 | }
1251 | ],
1252 | "metadata": {
1253 | "accelerator": "GPU",
1254 | "colab": {
1255 | "collapsed_sections": [],
1256 | "name": "Untitled1.ipynb",
1257 | "provenance": [],
1258 | "version": "0.3.2"
1259 | },
1260 | "kernelspec": {
1261 | "display_name": "Python 3",
1262 | "language": "python",
1263 | "name": "python3"
1264 | },
1265 | "language_info": {
1266 | "codemirror_mode": {
1267 | "name": "ipython",
1268 | "version": 3
1269 | },
1270 | "file_extension": ".py",
1271 | "mimetype": "text/x-python",
1272 | "name": "python",
1273 | "nbconvert_exporter": "python",
1274 | "pygments_lexer": "ipython3",
1275 | "version": "3.6.5"
1276 | }
1277 | },
1278 | "nbformat": 4,
1279 | "nbformat_minor": 1
1280 | }
1281 |
--------------------------------------------------------------------------------
/NER using CRF.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Named Entity Recognition using CRF model\n",
8 | "In Natural Language Processing (NLP) an Entity Recognition is one of the common problem. The entity is referred to as the part of the text that is interested in. In NLP, NER is a method of extracting the relevant information from a large corpus and classifying those entities into predefined categories such as location, organization, name and so on. \n",
9 | "Information about lables: \n",
10 | "* geo = Geographical Entity\n",
11 | "* org = Organization\n",
12 | "* per = Person\n",
13 | "* gpe = Geopolitical Entity\n",
14 | "* tim = Time indicator\n",
15 | "* art = Artifact\n",
16 | "* eve = Event\n",
17 | "* nat = Natural Phenomenon\n",
18 | "\n",
19 | " 1. Total Words Count = 1354149 \n",
20 | " 2. Target Data Column: Tag"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "#### Importing Libraries"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 1,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "import pandas as pd\n",
37 | "\n",
38 | "from sklearn.model_selection import train_test_split\n",
39 | "from sklearn_crfsuite import CRF\n",
40 | "from sklearn_crfsuite.metrics import flat_f1_score\n",
41 | "from sklearn_crfsuite.metrics import flat_classification_report"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 2,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "#Reading the csv file\n",
51 | "df = pd.read_csv('ner_dataset.csv', encoding = \"ISO-8859-1\")"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 3,
57 | "metadata": {},
58 | "outputs": [
59 | {
60 | "data": {
61 | "text/html": [
62 | "\n",
63 | "\n",
76 | "
\n",
77 | " \n",
78 | " \n",
79 | " | \n",
80 | " Sentence # | \n",
81 | " Word | \n",
82 | " POS | \n",
83 | " Tag | \n",
84 | "
\n",
85 | " \n",
86 | " \n",
87 | " \n",
88 | " 0 | \n",
89 | " Sentence: 1 | \n",
90 | " Thousands | \n",
91 | " NNS | \n",
92 | " O | \n",
93 | "
\n",
94 | " \n",
95 | " 1 | \n",
96 | " NaN | \n",
97 | " of | \n",
98 | " IN | \n",
99 | " O | \n",
100 | "
\n",
101 | " \n",
102 | " 2 | \n",
103 | " NaN | \n",
104 | " demonstrators | \n",
105 | " NNS | \n",
106 | " O | \n",
107 | "
\n",
108 | " \n",
109 | " 3 | \n",
110 | " NaN | \n",
111 | " have | \n",
112 | " VBP | \n",
113 | " O | \n",
114 | "
\n",
115 | " \n",
116 | " 4 | \n",
117 | " NaN | \n",
118 | " marched | \n",
119 | " VBN | \n",
120 | " O | \n",
121 | "
\n",
122 | " \n",
123 | " 5 | \n",
124 | " NaN | \n",
125 | " through | \n",
126 | " IN | \n",
127 | " O | \n",
128 | "
\n",
129 | " \n",
130 | " 6 | \n",
131 | " NaN | \n",
132 | " London | \n",
133 | " NNP | \n",
134 | " B-geo | \n",
135 | "
\n",
136 | " \n",
137 | " 7 | \n",
138 | " NaN | \n",
139 | " to | \n",
140 | " TO | \n",
141 | " O | \n",
142 | "
\n",
143 | " \n",
144 | " 8 | \n",
145 | " NaN | \n",
146 | " protest | \n",
147 | " VB | \n",
148 | " O | \n",
149 | "
\n",
150 | " \n",
151 | " 9 | \n",
152 | " NaN | \n",
153 | " the | \n",
154 | " DT | \n",
155 | " O | \n",
156 | "
\n",
157 | " \n",
158 | "
\n",
159 | "
"
160 | ],
161 | "text/plain": [
162 | " Sentence # Word POS Tag\n",
163 | "0 Sentence: 1 Thousands NNS O\n",
164 | "1 NaN of IN O\n",
165 | "2 NaN demonstrators NNS O\n",
166 | "3 NaN have VBP O\n",
167 | "4 NaN marched VBN O\n",
168 | "5 NaN through IN O\n",
169 | "6 NaN London NNP B-geo\n",
170 | "7 NaN to TO O\n",
171 | "8 NaN protest VB O\n",
172 | "9 NaN the DT O"
173 | ]
174 | },
175 | "execution_count": 3,
176 | "metadata": {},
177 | "output_type": "execute_result"
178 | }
179 | ],
180 | "source": [
181 | "#Display first 10 rows\n",
182 | "df.head(10)"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 4,
188 | "metadata": {},
189 | "outputs": [
190 | {
191 | "data": {
192 | "text/html": [
193 | "\n",
194 | "\n",
207 | "
\n",
208 | " \n",
209 | " \n",
210 | " | \n",
211 | " Sentence # | \n",
212 | " Word | \n",
213 | " POS | \n",
214 | " Tag | \n",
215 | "
\n",
216 | " \n",
217 | " \n",
218 | " \n",
219 | " count | \n",
220 | " 47959 | \n",
221 | " 1048575 | \n",
222 | " 1048575 | \n",
223 | " 1048575 | \n",
224 | "
\n",
225 | " \n",
226 | " unique | \n",
227 | " 47959 | \n",
228 | " 35178 | \n",
229 | " 42 | \n",
230 | " 17 | \n",
231 | "
\n",
232 | " \n",
233 | " top | \n",
234 | " Sentence: 9309 | \n",
235 | " the | \n",
236 | " NN | \n",
237 | " O | \n",
238 | "
\n",
239 | " \n",
240 | " freq | \n",
241 | " 1 | \n",
242 | " 52573 | \n",
243 | " 145807 | \n",
244 | " 887908 | \n",
245 | "
\n",
246 | " \n",
247 | "
\n",
248 | "
"
249 | ],
250 | "text/plain": [
251 | " Sentence # Word POS Tag\n",
252 | "count 47959 1048575 1048575 1048575\n",
253 | "unique 47959 35178 42 17\n",
254 | "top Sentence: 9309 the NN O\n",
255 | "freq 1 52573 145807 887908"
256 | ]
257 | },
258 | "execution_count": 4,
259 | "metadata": {},
260 | "output_type": "execute_result"
261 | }
262 | ],
263 | "source": [
264 | "df.describe()"
265 | ]
266 | },
267 | {
268 | "cell_type": "markdown",
269 | "metadata": {},
270 | "source": [
271 | "#### Observations : \n",
272 | "* There are total 47959 sentences in the dataset.\n",
273 | "* Number unique words in the dataset are 35178.\n",
274 | "* Total 17 lables (Tags)."
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": 5,
280 | "metadata": {},
281 | "outputs": [
282 | {
283 | "data": {
284 | "text/plain": [
285 | "array(['O', 'B-geo', 'B-gpe', 'B-per', 'I-geo', 'B-org', 'I-org', 'B-tim',\n",
286 | " 'B-art', 'I-art', 'I-per', 'I-gpe', 'I-tim', 'B-nat', 'B-eve',\n",
287 | " 'I-eve', 'I-nat'], dtype=object)"
288 | ]
289 | },
290 | "execution_count": 5,
291 | "metadata": {},
292 | "output_type": "execute_result"
293 | }
294 | ],
295 | "source": [
296 | "#Displaying the unique Tags\n",
297 | "df['Tag'].unique()"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "execution_count": 6,
303 | "metadata": {},
304 | "outputs": [
305 | {
306 | "data": {
307 | "text/plain": [
308 | "Sentence # 1000616\n",
309 | "Word 0\n",
310 | "POS 0\n",
311 | "Tag 0\n",
312 | "dtype: int64"
313 | ]
314 | },
315 | "execution_count": 6,
316 | "metadata": {},
317 | "output_type": "execute_result"
318 | }
319 | ],
320 | "source": [
321 | "#Checking null values, if any.\n",
322 | "df.isnull().sum()"
323 | ]
324 | },
325 | {
326 | "cell_type": "markdown",
327 | "metadata": {},
328 | "source": [
329 | "There are lots of missing values in 'Sentence #' attribute. So we will use pandas fillna technique and use 'ffill' method which propagates last valid observation forward to next."
330 | ]
331 | },
332 | {
333 | "cell_type": "code",
334 | "execution_count": 7,
335 | "metadata": {},
336 | "outputs": [],
337 | "source": [
338 | "df = df.fillna(method = 'ffill')"
339 | ]
340 | },
341 | {
342 | "cell_type": "code",
343 | "execution_count": 8,
344 | "metadata": {},
345 | "outputs": [],
346 | "source": [
347 | "# This is a class te get sentence. The each sentence will be list of tuples with its tag and pos.\n",
348 | "class sentence(object):\n",
349 | " def __init__(self, df):\n",
350 | " self.n_sent = 1\n",
351 | " self.df = df\n",
352 | " self.empty = False\n",
353 | " agg = lambda s : [(w, p, t) for w, p, t in zip(s['Word'].values.tolist(),\n",
354 | " s['POS'].values.tolist(),\n",
355 | " s['Tag'].values.tolist())]\n",
356 | " self.grouped = self.df.groupby(\"Sentence #\").apply(agg)\n",
357 | " self.sentences = [s for s in self.grouped]\n",
358 | " \n",
359 | " def get_text(self):\n",
360 | " try:\n",
361 | " s = self.grouped['Sentence: {}'.format(self.n_sent)]\n",
362 | " self.n_sent +=1\n",
363 | " return s\n",
364 | " except:\n",
365 | " return None"
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "execution_count": 9,
371 | "metadata": {},
372 | "outputs": [
373 | {
374 | "data": {
375 | "text/plain": [
376 | "'Thousands of demonstrators have marched through London to protest the war in Iraq and demand the withdrawal of British troops from that country .'"
377 | ]
378 | },
379 | "execution_count": 9,
380 | "metadata": {},
381 | "output_type": "execute_result"
382 | }
383 | ],
384 | "source": [
385 | "#Displaying one full sentence\n",
386 | "getter = sentence(df)\n",
387 | "sentences = [\" \".join([s[0] for s in sent]) for sent in getter.sentences]\n",
388 | "sentences[0]"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": 10,
394 | "metadata": {},
395 | "outputs": [
396 | {
397 | "name": "stdout",
398 | "output_type": "stream",
399 | "text": [
400 | "[('Thousands', 'NNS', 'O'), ('of', 'IN', 'O'), ('demonstrators', 'NNS', 'O'), ('have', 'VBP', 'O'), ('marched', 'VBN', 'O'), ('through', 'IN', 'O'), ('London', 'NNP', 'B-geo'), ('to', 'TO', 'O'), ('protest', 'VB', 'O'), ('the', 'DT', 'O'), ('war', 'NN', 'O'), ('in', 'IN', 'O'), ('Iraq', 'NNP', 'B-geo'), ('and', 'CC', 'O'), ('demand', 'VB', 'O'), ('the', 'DT', 'O'), ('withdrawal', 'NN', 'O'), ('of', 'IN', 'O'), ('British', 'JJ', 'B-gpe'), ('troops', 'NNS', 'O'), ('from', 'IN', 'O'), ('that', 'DT', 'O'), ('country', 'NN', 'O'), ('.', '.', 'O')]\n"
401 | ]
402 | }
403 | ],
404 | "source": [
405 | "#sentence with its pos and tag.\n",
406 | "sent = getter.get_text()\n",
407 | "print(sent)"
408 | ]
409 | },
410 | {
411 | "cell_type": "markdown",
412 | "metadata": {},
413 | "source": [
414 | "Getting all the sentences in the dataset."
415 | ]
416 | },
417 | {
418 | "cell_type": "code",
419 | "execution_count": 11,
420 | "metadata": {},
421 | "outputs": [],
422 | "source": [
423 | "sentences = getter.sentences"
424 | ]
425 | },
426 | {
427 | "cell_type": "markdown",
428 | "metadata": {},
429 | "source": [
430 | "#### Feature Preparation\n",
431 | "These are the default features used by the NER in nltk. We can also modify it for our customization."
432 | ]
433 | },
434 | {
435 | "cell_type": "code",
436 | "execution_count": 12,
437 | "metadata": {},
438 | "outputs": [],
439 | "source": [
440 | "def word2features(sent, i):\n",
441 | " word = sent[i][0]\n",
442 | " postag = sent[i][1]\n",
443 | "\n",
444 | " features = {\n",
445 | " 'bias': 1.0,\n",
446 | " 'word.lower()': word.lower(),\n",
447 | " 'word[-3:]': word[-3:],\n",
448 | " 'word[-2:]': word[-2:],\n",
449 | " 'word.isupper()': word.isupper(),\n",
450 | " 'word.istitle()': word.istitle(),\n",
451 | " 'word.isdigit()': word.isdigit(),\n",
452 | " 'postag': postag,\n",
453 | " 'postag[:2]': postag[:2],\n",
454 | " }\n",
455 | " if i > 0:\n",
456 | " word1 = sent[i-1][0]\n",
457 | " postag1 = sent[i-1][1]\n",
458 | " features.update({\n",
459 | " '-1:word.lower()': word1.lower(),\n",
460 | " '-1:word.istitle()': word1.istitle(),\n",
461 | " '-1:word.isupper()': word1.isupper(),\n",
462 | " '-1:postag': postag1,\n",
463 | " '-1:postag[:2]': postag1[:2],\n",
464 | " })\n",
465 | " else:\n",
466 | " features['BOS'] = True\n",
467 | "\n",
468 | " if i < len(sent)-1:\n",
469 | " word1 = sent[i+1][0]\n",
470 | " postag1 = sent[i+1][1]\n",
471 | " features.update({\n",
472 | " '+1:word.lower()': word1.lower(),\n",
473 | " '+1:word.istitle()': word1.istitle(),\n",
474 | " '+1:word.isupper()': word1.isupper(),\n",
475 | " '+1:postag': postag1,\n",
476 | " '+1:postag[:2]': postag1[:2],\n",
477 | " })\n",
478 | " else:\n",
479 | " features['EOS'] = True\n",
480 | "\n",
481 | " return features\n",
482 | "\n",
483 | "\n",
484 | "def sent2features(sent):\n",
485 | " return [word2features(sent, i) for i in range(len(sent))]\n",
486 | "\n",
487 | "def sent2labels(sent):\n",
488 | " return [label for token, postag, label in sent]\n",
489 | "\n",
490 | "def sent2tokens(sent):\n",
491 | " return [token for token, postag, label in sent]"
492 | ]
493 | },
494 | {
495 | "cell_type": "code",
496 | "execution_count": 13,
497 | "metadata": {},
498 | "outputs": [],
499 | "source": [
500 | "X = [sent2features(s) for s in sentences]\n",
501 | "y = [sent2labels(s) for s in sentences]"
502 | ]
503 | },
504 | {
505 | "cell_type": "code",
506 | "execution_count": 14,
507 | "metadata": {},
508 | "outputs": [],
509 | "source": [
510 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)"
511 | ]
512 | },
513 | {
514 | "cell_type": "code",
515 | "execution_count": 15,
516 | "metadata": {},
517 | "outputs": [
518 | {
519 | "data": {
520 | "text/plain": [
521 | "CRF(algorithm='lbfgs', all_possible_states=None,\n",
522 | " all_possible_transitions=False, averaging=None, c=None, c1=0.1, c2=0.1,\n",
523 | " calibration_candidates=None, calibration_eta=None,\n",
524 | " calibration_max_trials=None, calibration_rate=None,\n",
525 | " calibration_samples=None, delta=None, epsilon=None, error_sensitive=None,\n",
526 | " gamma=None, keep_tempfiles=None, linesearch=None, max_iterations=100,\n",
527 | " max_linesearch=None, min_freq=None, model_filename=None,\n",
528 | " num_memories=None, pa_type=None, period=None, trainer_cls=None,\n",
529 | " variance=None, verbose=False)"
530 | ]
531 | },
532 | "execution_count": 15,
533 | "metadata": {},
534 | "output_type": "execute_result"
535 | }
536 | ],
537 | "source": [
538 | "crf = CRF(algorithm = 'lbfgs',\n",
539 | " c1 = 0.1,\n",
540 | " c2 = 0.1,\n",
541 | " max_iterations = 100,\n",
542 | " all_possible_transitions = False)\n",
543 | "crf.fit(X_train, y_train)"
544 | ]
545 | },
546 | {
547 | "cell_type": "code",
548 | "execution_count": 16,
549 | "metadata": {},
550 | "outputs": [],
551 | "source": [
552 | "#Predicting on the test set.\n",
553 | "y_pred = crf.predict(X_test)"
554 | ]
555 | },
556 | {
557 | "cell_type": "markdown",
558 | "metadata": {},
559 | "source": [
560 | "#### Evaluating the model performance.\n",
561 | "We will use precision, recall and f1-score metrics to evaluate the performance of the model since the accuracy is not a good metric for this dataset because we have an unequal number of data points in each class."
562 | ]
563 | },
564 | {
565 | "cell_type": "code",
566 | "execution_count": 19,
567 | "metadata": {},
568 | "outputs": [
569 | {
570 | "name": "stdout",
571 | "output_type": "stream",
572 | "text": [
573 | "0.9719578426137272\n"
574 | ]
575 | }
576 | ],
577 | "source": [
578 | "f1_score = flat_f1_score(y_test, y_pred, average = 'weighted')\n",
579 | "print(f1_score)"
580 | ]
581 | },
582 | {
583 | "cell_type": "code",
584 | "execution_count": 20,
585 | "metadata": {},
586 | "outputs": [
587 | {
588 | "name": "stdout",
589 | "output_type": "stream",
590 | "text": [
591 | " precision recall f1-score support\n",
592 | "\n",
593 | " B-art 0.43 0.17 0.24 78\n",
594 | " B-eve 0.68 0.41 0.51 61\n",
595 | " B-geo 0.86 0.91 0.88 7481\n",
596 | " B-gpe 0.97 0.94 0.95 3185\n",
597 | " B-nat 0.85 0.36 0.51 47\n",
598 | " B-org 0.81 0.74 0.77 4187\n",
599 | " B-per 0.86 0.83 0.84 3421\n",
600 | " B-tim 0.93 0.88 0.90 4030\n",
601 | " I-art 0.25 0.08 0.12 64\n",
602 | " I-eve 0.52 0.30 0.38 44\n",
603 | " I-geo 0.81 0.80 0.80 1461\n",
604 | " I-gpe 0.81 0.46 0.59 37\n",
605 | " I-nat 0.50 0.17 0.25 12\n",
606 | " I-org 0.83 0.81 0.82 3441\n",
607 | " I-per 0.86 0.90 0.88 3488\n",
608 | " I-tim 0.84 0.74 0.79 1245\n",
609 | " O 0.99 0.99 0.99 177951\n",
610 | "\n",
611 | "avg / total 0.97 0.97 0.97 210233\n",
612 | "\n"
613 | ]
614 | }
615 | ],
616 | "source": [
617 | "report = flat_classification_report(y_test, y_pred)\n",
618 | "print(report)"
619 | ]
620 | },
621 | {
622 | "cell_type": "markdown",
623 | "metadata": {},
624 | "source": [
625 | "This looks quite nice."
626 | ]
627 | },
628 | {
629 | "cell_type": "code",
630 | "execution_count": null,
631 | "metadata": {},
632 | "outputs": [],
633 | "source": []
634 | }
635 | ],
636 | "metadata": {
637 | "kernelspec": {
638 | "display_name": "Python 3",
639 | "language": "python",
640 | "name": "python3"
641 | },
642 | "language_info": {
643 | "codemirror_mode": {
644 | "name": "ipython",
645 | "version": 3
646 | },
647 | "file_extension": ".py",
648 | "mimetype": "text/x-python",
649 | "name": "python",
650 | "nbconvert_exporter": "python",
651 | "pygments_lexer": "ipython3",
652 | "version": "3.6.5"
653 | }
654 | },
655 | "nbformat": 4,
656 | "nbformat_minor": 2
657 | }
658 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Named Entity Recognition
2 |
3 | In Natural Language Processing (NLP) an Entity Recognition is one of the common problem. The entity is referred to as the part of the text that is interested in. In NLP, NER is a method of extracting the relevant information from a large corpus and classifying those entities into predefined categories such as location, organization, name and so on.
4 | Information about lables:
5 | * geo = Geographical Entity
6 | * org = Organization
7 | * per = Person
8 | * gpe = Geopolitical Entity
9 | * tim = Time indicator
10 | * art = Artifact
11 | * eve = Event
12 | * nat = Natural Phenomenon
13 |
14 | 1. Total Words Count = 1354149
15 | 2. Target Data Column: Tag
16 |
--------------------------------------------------------------------------------
/model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Akshayc1/named-entity-recognition/8a64faf36529b42eec3244cc3bd774050abbc651/model.h5
--------------------------------------------------------------------------------
/tag_to_index.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Akshayc1/named-entity-recognition/8a64faf36529b42eec3244cc3bd774050abbc651/tag_to_index.pickle
--------------------------------------------------------------------------------
/word_to_index.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Akshayc1/named-entity-recognition/8a64faf36529b42eec3244cc3bd774050abbc651/word_to_index.pickle
--------------------------------------------------------------------------------