├── Knowledge Graphs
    ├── init
    ├── wordnet.png
    ├── wordnet-graph.ipynb
    ├── tree.py
    ├── intro-to-wordnet.ipynb
    └── lesk.ipynb
├── topic modelling
    ├── init
    └── nmf-imdb-movie-reviews.ipynb
├── neural networks for NLP
    ├── init
    ├── images
    │   ├── init
    │   ├── or.png
    │   ├── ann.png
    │   ├── nand.png
    │   ├── xor.png
    │   ├── ANDand.png
    │   ├── combo.png
    │   ├── deepnet.png
    │   └── matmul.png
    ├── imdb-reviews-classification.ipynb
    ├── keras.ipynb
    └── forward-pass.ipynb
├── _config.yml
└── distributional semantics
    ├── init
    ├── images
        ├── init
        ├── Emb1.png
        ├── Emb2.png
        ├── Emb3.png
        ├── Emb4.png
        ├── man-king.png
        ├── king-queen.png
        ├── man-woman.png
        └── woman-queen.png
    ├── utils.py
    └── w2v-text-classification.ipynb


/Knowledge Graphs/init:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/topic modelling/init:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/neural networks for NLP/init:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/distributional semantics/init:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/distributional semantics/images/init:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/neural networks for NLP/images/init:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Knowledge Graphs/wordnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/Knowledge Graphs/wordnet.png


--------------------------------------------------------------------------------
/neural networks for NLP/images/or.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/neural networks for NLP/images/or.png


--------------------------------------------------------------------------------
/neural networks for NLP/images/ann.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/neural networks for NLP/images/ann.png


--------------------------------------------------------------------------------
/neural networks for NLP/images/nand.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/neural networks for NLP/images/nand.png


--------------------------------------------------------------------------------
/neural networks for NLP/images/xor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/neural networks for NLP/images/xor.png


--------------------------------------------------------------------------------
/distributional semantics/images/Emb1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/distributional semantics/images/Emb1.png


--------------------------------------------------------------------------------
/distributional semantics/images/Emb2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/distributional semantics/images/Emb2.png


--------------------------------------------------------------------------------
/distributional semantics/images/Emb3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/distributional semantics/images/Emb3.png


--------------------------------------------------------------------------------
/distributional semantics/images/Emb4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/distributional semantics/images/Emb4.png


--------------------------------------------------------------------------------
/neural networks for NLP/images/ANDand.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/neural networks for NLP/images/ANDand.png


--------------------------------------------------------------------------------
/neural networks for NLP/images/combo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/neural networks for NLP/images/combo.png


--------------------------------------------------------------------------------
/neural networks for NLP/images/deepnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/neural networks for NLP/images/deepnet.png


--------------------------------------------------------------------------------
/neural networks for NLP/images/matmul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/neural networks for NLP/images/matmul.png


--------------------------------------------------------------------------------
/distributional semantics/images/man-king.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/distributional semantics/images/man-king.png


--------------------------------------------------------------------------------
/distributional semantics/images/king-queen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/distributional semantics/images/king-queen.png


--------------------------------------------------------------------------------
/distributional semantics/images/man-woman.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/distributional semantics/images/man-woman.png


--------------------------------------------------------------------------------
/distributional semantics/images/woman-queen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Semantic-Processing/main/distributional semantics/images/woman-queen.png


--------------------------------------------------------------------------------
/Knowledge Graphs/wordnet-graph.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "advance-confidence",
 6 |    "metadata": {},
 7 |    "source": []
 8 |   },
 9 |   {
10 |    "cell_type": "markdown",
11 |    "id": "prostate-franchise",
12 |    "metadata": {},
13 |    "source": [
14 |     "<img src=\"wordnet.png\">"
15 |    ]
16 |   }
17 |  ],
18 |  "metadata": {
19 |   "kernelspec": {
20 |    "display_name": "Python 3",
21 |    "language": "python",
22 |    "name": "python3"
23 |   },
24 |   "language_info": {
25 |    "codemirror_mode": {
26 |     "name": "ipython",
27 |     "version": 3
28 |    },
29 |    "file_extension": ".py",
30 |    "mimetype": "text/x-python",
31 |    "name": "python",
32 |    "nbconvert_exporter": "python",
33 |    "pygments_lexer": "ipython3",
34 |    "version": "3.7.7"
35 |   }
36 |  },
37 |  "nbformat": 4,
38 |  "nbformat_minor": 5
39 | }
40 | 


--------------------------------------------------------------------------------
/Knowledge Graphs/tree.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from spacy_wordnet.wordnet_annotator import WordnetAnnotator
 3 | from spacy import load
 4 | from spacy_wordnet.wordnet_annotator import WordnetAnnotator
 5 | import pandas as pd
 6 | nlp = load('en_core_web_sm')
 7 | nlp.add_pipe(WordnetAnnotator(nlp.lang))
 8 | token = nlp('Calculator.')[0]
 9 | token._.wordnet.synsets()
10 | meaning1, meaning2 = token = nlp('Calculator.')[0]
11 | meaning1, meaning2 = token._.wordnet.synsets()
12 | meaning1
13 | meaning1.name()
14 | meaning1.lemmas()
15 | meaning2.lemmas()
16 | token._.wordnet.wordnet_domains()
17 | nlp('mathematics')[0]._.wordnet.wordnet_domains()
18 | 'science' in _
19 | nlp('pure_science')[0]._.wordnet.wordnet_domains()
20 | nlp('science')[0]._.wordnet.wordnet_domains()
21 | wnet = nlp('science')[0]._.wordnet
22 | wnet.wordnet_synsets_for_domain()
23 | wnet.lemmas()
24 | token = nlp('human')[0]
25 | token._.wordnet.lemmas()
26 | token._.wordnet.synsets()
27 | [c.lemmas() for c in token._.wordnet.synsets()]
28 | syn =token._.wordnet.synsets()
29 | x = syn[0]
30 | x
31 | x.common_hypernyms()
32 | man = nlp('man')[0]
33 | woman = nlp('woman')[0]
34 | man._.wordnet.synsets()
35 | man_syn = man._.wordnet.synsets()[0]
36 | woman_syn = woman._.wordnet.synsets()[0]
37 | man_syn
38 | woman_syn
39 | man_syn.common_hypernyms(woman_syn)
40 | man_syn.entailments()
41 | get_ipython().run_line_magic('pinfo', 'man_syn.entailments')
42 | man_syn.examples()
43 | man_syn.hypernyms()
44 | woman_syn.hypernyms()
45 | man_syn.hyponyms()
46 | woman_syn.hyponyms()
47 | get_ipython().run_line_magic('pinfo', 'man_syn.jcn_similarity')
48 | get_ipython().run_line_magic('pinfo', 'man_syn.lemmas')
49 | man_syn.lemmas()
50 | get_ipython().run_line_magic('pinfo', 'man_syn.similar_tos')
51 | man_syn.similar_tos()
52 | while True:
53 |     x = man_syn.hypernyms()[0]
54 |     print(x)
55 |     x = x.hypernyms()[0]
56 |     
57 | x = man_syn
58 | while True:
59 |     print(x)
60 |     x = x.hypernyms()[0]
61 |     
62 | x
63 | x.hypernyms()
64 | x = man_syn
65 | while True:
66 |     print(x)
67 |     x = x.hyponyms()[0]
68 |     
69 | while True:
70 |     print(x)
71 |     i = 0
72 |     try:
73 |         x = x.hyponyms()[i]
74 |     except IndexError:
75 |         i += 1
76 |         x = x.hyponyms()[i]
77 |         
78 | while True:
79 |     print(x)
80 |     try:
81 |         i = 0
82 |         x = x.hyponyms()[i]
83 |     except IndexError:
84 |         i += 1
85 |         x = x.hyponyms()[i]
86 |         
87 |         
88 | 


--------------------------------------------------------------------------------
/distributional semantics/utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | # In[1]:
 5 | 
 6 | 
 7 | from sklearn.decomposition import PCA
 8 | from sklearn.manifold import TSNE
 9 | import numpy as np
10 | import matplotlib.pyplot as plt
11 | from gensim.models.callbacks import CallbackAny2Vec
12 | from gensim.models import Word2Vec, KeyedVectors
13 | from tensorflow.keras.layers import Embedding
14 | 
15 | 
16 | class MetricCallback(CallbackAny2Vec):
17 |     """
18 |     Callback to print loss after each epoch
19 |     """
20 |     def __init__(self, every=10):
21 |         self.myloss = []
22 |         self.epoch = 0
23 |         self.every = every
24 | 
25 |     def on_epoch_end(self, model):
26 |         loss = model.get_latest_training_loss()
27 |         if self.epoch == 0:
28 |             self.myloss.append(loss)
29 |         else:
30 |             self.myloss.append(loss - self.loss_previous_step)
31 |         if self.epoch % self.every == 0:
32 |             print(f'Loss after epoch {self.epoch}: {self.myloss[-1]}')  # NOQA: T001
33 |         self.epoch += 1
34 |         self.loss_previous_step = loss
35 | 
36 | 
37 | def plot_arrows(starts, ends, wv, estimator=PCA, **kwargs):
38 |     if len(starts) != len(ends):
39 |         raise ValueError('starts and ends must be the same length.')
40 |     fig, ax = plt.subplots(figsize=kwargs.pop('figsize', (8, 8)))
41 |     X = wv[starts + ends]  # NOQA: N806
42 |     x_red = estimator(n_components=2).fit_transform(X)
43 |     plt.scatter(*x_red.T)
44 |     for i, word in enumerate(starts + ends):
45 |         plt.annotate(word, x_red[i])
46 |     xstart = x_red[:len(starts)]
47 |     xend = x_red[len(starts):]
48 |     for i, (start, end) in enumerate(zip(starts, ends)):
49 |         x1, y1 = xstart[i]
50 |         x2, y2 = xend[i]
51 |         plt.arrow(x1, y1, x2 - x1, y2 - y1)
52 | 
53 | 
54 | def plot_vectors(words, model, estimator=TSNE, **kwargs):
55 |     names = []
56 |     vectors = []
57 |     for word in words:
58 |         if word in model.wv:
59 |             names.append(word)
60 |             vectors.append(model.wv[word])
61 | 
62 |     X = np.r_[vectors]  # NOQA: N806
63 |     x_red = estimator(n_components=2).fit_transform(X)
64 |     fig, ax = plt.subplots(figsize=kwargs.pop('figsize', (16, 16)))  # NOQA: E912
65 |     ax.scatter(*x_red.T)
66 | 
67 |     for i, word in enumerate(names):
68 |         plt.annotate(word, x_red[i])
69 | 
70 | 
71 | def make_embedding_layer(model, tokenizer, MAX_SEQUENCE_LENGTH):  # NOQA: N803
72 |     word_index = tokenizer.word_index
73 |     if isinstance(model, Word2Vec):
74 |         wv = model.wv
75 |     elif isinstance(model, KeyedVectors):
76 |         wv = model
77 |     embedding_matrix = np.zeros((len(word_index) + 1, wv.vector_size))
78 |     for word, i in word_index.items():
79 |         try:
80 |             vector = wv.get_vector(word, False)
81 |             embedding_matrix[i] = vector
82 |         except KeyError:
83 |             continue
84 |     el = Embedding(
85 |         len(word_index) + 1, wv.vector_size, weights=[embedding_matrix],
86 |         input_length=MAX_SEQUENCE_LENGTH, trainable=False
87 |     )
88 |     return el
89 | 


--------------------------------------------------------------------------------
/neural networks for NLP/imdb-reviews-classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "respective-webster",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Text Classification"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "raw",
 13 |    "id": "adverse-evolution",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "----------------------------------------------------------------------\n",
 17 |     "Filename : imdb-reviews-classification.ipynb\n",
 18 |     "Author   : Jaidev Deshpande\n",
 19 |     "Purpose  : Understanding text classification using keras\n",
 20 |     "Libraries: tensorflow.keras, numpy, sklearn and pandas\n",
 21 |     "----------------------------------------------------------------------"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "id": "seasonal-balloon",
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# Imports\n",
 32 |     "\n",
 33 |     "from tensorflow.keras.preprocessing import text_dataset_from_directory\n",
 34 |     "from tensorflow.keras import Sequential\n",
 35 |     "from tensorflow.keras.layers import Dense\n",
 36 |     "from tensorflow.keras.optimizers import SGD\n",
 37 |     "\n",
 38 |     "import pandas as pd\n",
 39 |     "import numpy as np\n",
 40 |     "from sklearn.feature_extraction.text import TfidfVectorizer\n",
 41 |     "from sklearn.model_selection import train_test_split\n",
 42 |     "import matplotlib.pyplot as plt\n",
 43 |     "%matplotlib inline"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "id": "corrected-railway",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "## The Problem: Large Movie Dataset Review\n",
 52 |     "### Classify movie reviews from IMDB into positive or negative sentiment.\n",
 53 |     "### Download the dataset [here](https://drive.google.com/drive/u/0/folders/1hYw0TQbwcM2YWEiKN-2c_kHCPtHO_TMC)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "id": "fossil-vertical",
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "# Importing & preprocessing the dataset\n",
 64 |     "\n",
 65 |     "train_ds = text_dataset_from_directory('aclImdb/train')\n",
 66 |     "test_ds = text_dataset_from_directory('aclImdb/test')\n",
 67 |     "\n",
 68 |     "dfTrain = pd.DataFrame(train_ds.unbatch().as_numpy_iterator(), columns=['text', 'label'])\n",
 69 |     "dfTest = pd.DataFrame(test_ds.unbatch().as_numpy_iterator(), columns=['text', 'label'])\n",
 70 |     "_, xts = train_test_split(dfTest, stratify=dfTest['label'], test_size=0.25)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "id": "vertical-northern",
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "# Look at a sample movie review\n",
 81 |     "\n",
 82 |     "print(dfTrain.loc[0, 'text'])"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "id": "virgin-illustration",
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "# Look at the label of the review\n",
 93 |     "\n",
 94 |     "dfTrain.loc[0, 'label']"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "id": "sustained-camcorder",
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "pd.options.display.max_colwidth = 100\n",
105 |     "dfTrain.head()"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "id": "improved-negotiation",
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "dfTest.head()"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "id": "enormous-comment",
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "# Feature Extraction - Text to TFIDF\n",
126 |     "\n",
127 |     "vect = TfidfVectorizer(stop_words='english')\n",
128 |     "XTrain = vect.fit_transform(dfTrain['text']).toarray()\n",
129 |     "XTest = vect.transform(xts['text']).toarray()"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "id": "included-healing",
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "XTrain.shape"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "id": "according-russia",
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "# Assemble and compile the neural network\n",
150 |     "\n",
151 |     "model = Sequential([\n",
152 |     "    Dense(128, input_shape=(XTrain.shape[1],), activation='relu'),\n",
153 |     "    Dense(128, activation='relu'),\n",
154 |     "    Dense(64, activation='relu'),\n",
155 |     "    Dense(1, activation='sigmoid')\n",
156 |     "])\n",
157 |     "model.compile(loss='binary_crossentropy', optimizer=SGD(lr=1e-3), metrics=['accuracy'])"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "id": "protective-parking",
164 |    "metadata": {
165 |     "scrolled": true
166 |    },
167 |    "outputs": [],
168 |    "source": [
169 |     "# Train the neural network\n",
170 |     "\n",
171 |     "history = model.fit(XTrain, dfTrain['label'], batch_size=64, validation_data=(XTest, xts['label']), epochs=50)"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "id": "completed-poultry",
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "# Visualize the learning curve\n",
182 |     "\n",
183 |     "plt.plot(history.history['accuracy'], label='Train Accuracy')\n",
184 |     "plt.plot(history.history['val_accuracy'], label='Test Accuracy')\n",
185 |     "plt.legend()"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "id": "metallic-office",
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": []
195 |   }
196 |  ],
197 |  "metadata": {
198 |   "kernelspec": {
199 |    "display_name": "Python 3",
200 |    "language": "python",
201 |    "name": "python3"
202 |   },
203 |   "language_info": {
204 |    "codemirror_mode": {
205 |     "name": "ipython",
206 |     "version": 3
207 |    },
208 |    "file_extension": ".py",
209 |    "mimetype": "text/x-python",
210 |    "name": "python",
211 |    "nbconvert_exporter": "python",
212 |    "pygments_lexer": "ipython3",
213 |    "version": "3.7.7"
214 |   }
215 |  },
216 |  "nbformat": 4,
217 |  "nbformat_minor": 5
218 | }
219 | 


--------------------------------------------------------------------------------
/neural networks for NLP/keras.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "strange-seafood",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## [Installing Tensorflow / Keras](https://www.tensorflow.org/install)"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "aquatic-learning",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# Imports\n",
 19 |     "\n",
 20 |     "from tensorflow.keras.layers import Dense\n",
 21 |     "from tensorflow.keras import Sequential\n",
 22 |     "from tensorflow.keras.optimizers import SGD\n",
 23 |     "from tensorflow.keras.utils import to_categorical, plot_model\n",
 24 |     "import pandas as pd\n",
 25 |     "import numpy as np\n",
 26 |     "import matplotlib.pyplot as plt\n",
 27 |     "%matplotlib inline"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "id": "maritime-flavor",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "## Problem: **Credit Card Fraud Detection**\n",
 36 |     "### Given a list of 28 anonimyzed features and the amount of money involved in a credt card transaction, predict whether the transaction is likely to be fraudulent.\n",
 37 |     "### Source: https://www.kaggle.com/mlg-ulb/creditcardfraud/"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "id": "occupied-eagle",
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# Preview dataset\n",
 48 |     "\n",
 49 |     "df = pd.read_csv('creditcard.csv')\n",
 50 |     "df.drop(['Time'], axis=1, inplace=True)\n",
 51 |     "df.head()"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "id": "stock-imperial",
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "print(len(df))"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "id": "charged-parade",
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "# Check the proportion of the classes\n",
 72 |     "\n",
 73 |     "df['Class'].value_counts(normalize=True)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "id": "level-telephone",
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "df['Class'].value_counts(normalize=True).plot(kind='pie')"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "id": "attached-potter",
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "# Define the input and the output\n",
 94 |     "\n",
 95 |     "y = df.pop('Class').values\n",
 96 |     "X = df.values"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "id": "aging-cream",
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "# Assemble the network\n",
107 |     "\n",
108 |     "model = Sequential([\n",
109 |     "    Dense(16, input_shape=(29,), activation='relu'),\n",
110 |     "    Dense(8, activation='relu'),\n",
111 |     "    Dense(2, activation='softmax')\n",
112 |     "])\n",
113 |     "model.compile(loss='sparse_categorical_crossentropy', optimizer=SGD(lr=1e-3), metrics=['categorical_accuracy'])"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "id": "experienced-terminal",
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "# View the network - layer by layer\n",
124 |     "\n",
125 |     "plot_model(model, show_shapes=True, rankdir='LR')"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "id": "revolutionary-gothic",
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "model.layers"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "id": "authorized-holiday",
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "layer1, layer2, layer3 = model.layers"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "id": "quarterly-participation",
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "# View the shapes of weights and biases for each layer\n",
156 |     "\n",
157 |     "l1_weight, l1_bias = layer1.weights\n",
158 |     "print(l1_weight.shape)\n",
159 |     "print(l1_bias.shape)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "id": "impaired-emphasis",
166 |    "metadata": {},
167 |    "outputs": [],
168 |    "source": [
169 |     "l2_weight, l2_bias = layer2.weights\n",
170 |     "print(l2_weight.shape)\n",
171 |     "print(l2_bias.shape)"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "id": "contained-leave",
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "l3_weight, l3_bias = layer3.weights\n",
182 |     "print(l3_weight.shape)\n",
183 |     "print(l3_bias.shape)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "id": "ultimate-times",
190 |    "metadata": {
191 |     "scrolled": true
192 |    },
193 |    "outputs": [],
194 |    "source": [
195 |     "# Train the model\n",
196 |     "\n",
197 |     "history = model.fit(X, y, batch_size=128, epochs=30)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "id": "signal-fireplace",
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "# Plot the loss across epochs\n",
208 |     "\n",
209 |     "plt.plot(history.history['loss'], label='Loss')"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": null,
215 |    "id": "advised-border",
216 |    "metadata": {},
217 |    "outputs": [],
218 |    "source": [
219 |     "# Plot the accuracy across epochs\n",
220 |     "\n",
221 |     "plt.plot(history.history['categorical_accuracy'], label='Accuracy')"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "id": "attended-mileage",
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": []
231 |   }
232 |  ],
233 |  "metadata": {
234 |   "kernelspec": {
235 |    "display_name": "Python 3",
236 |    "language": "python",
237 |    "name": "python3"
238 |   },
239 |   "language_info": {
240 |    "codemirror_mode": {
241 |     "name": "ipython",
242 |     "version": 3
243 |    },
244 |    "file_extension": ".py",
245 |    "mimetype": "text/x-python",
246 |    "name": "python",
247 |    "nbconvert_exporter": "python",
248 |    "pygments_lexer": "ipython3",
249 |    "version": "3.7.7"
250 |   }
251 |  },
252 |  "nbformat": 4,
253 |  "nbformat_minor": 5
254 | }
255 | 


--------------------------------------------------------------------------------
/neural networks for NLP/forward-pass.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "looking-election",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Forward Pass"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "raw",
 13 |    "id": "continuous-royalty",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "----------------------------------------------------------------------\n",
 17 |     "Filename : forward-pass.ipynb\n",
 18 |     "Author   : Jaidev Deshpande\n",
 19 |     "Content  : Understanding feed forward propagation\n",
 20 |     "Libraries: numpy, sklearn and pandas\n",
 21 |     "----------------------------------------------------------------------"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "id": "understood-island",
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# Importing libraries\n",
 32 |     "\n",
 33 |     "from sklearn.datasets import load_iris\n",
 34 |     "import pandas as pd\n",
 35 |     "import numpy as np"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "id": "spoken-clarity",
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "# Load the Iris dataset\n",
 46 |     "\n",
 47 |     "iris = load_iris()\n",
 48 |     "X = iris.data\n",
 49 |     "y = iris.target\n",
 50 |     "df = pd.DataFrame(X, columns=iris.feature_names)\n",
 51 |     "df['species'] = iris.target_names[y]\n",
 52 |     "df.sample(n=5)"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "id": "western-consensus",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "<img src=\"images/ann.png\" height=\"50%\" width=\"50%\">"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "id": "detailed-nomination",
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "# Activation functions\n",
 71 |     "\n",
 72 |     "def sigmoid(x):\n",
 73 |     "    return 1 / (1 + np.exp(-x))\n",
 74 |     "\n",
 75 |     "\n",
 76 |     "def softmax(x):\n",
 77 |     "    return np.exp(x) / np.exp(x).sum()"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "id": "unable-typing",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "## Matrix Multiplication\n",
 86 |     "<img src=\"images/matmul.png\" height=\"50%\" width=\"50%\"> "
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "id": "played-brighton",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "## Layer 1 Weights, Biases and Activation"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "id": "mounted-penetration",
100 |    "metadata": {},
101 |    "source": [
102 |     "<img src=\"images/ann.png\" height=\"25%\" width=\"25%\">"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "id": "loving-metropolitan",
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "df.head()"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "id": "split-tension",
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "x = X[:5]\n",
123 |     "x"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "id": "cross-finder",
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "# Initialize weights and biases\n",
134 |     "\n",
135 |     "w1 = np.random.rand(4, 5)\n",
136 |     "b1 = np.random.rand(5)\n",
137 |     "print(w1.shape)\n",
138 |     "print(b1.shape)"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "id": "recorded-prison",
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "# Take a data sample\n",
149 |     "\n",
150 |     "sample = x[0]\n",
151 |     "sample"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "id": "american-access",
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "print(w1)"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "id": "governing-reader",
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "# Multiply the sample with the first layer weights <𝐱,𝐰1>\n",
172 |     "\n",
173 |     "a1 = np.dot(sample, w1)\n",
174 |     "a1"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "id": "concrete-differential",
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "b1"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "id": "institutional-talent",
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "# Add the bias to the product <𝐱,𝐰1>+ b\n",
195 |     "\n",
196 |     "a1 + b1"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "id": "balanced-storm",
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": [
206 |     "# Apply the activation to get the output of the first layer f(<𝐱,𝐰1>+ b)\n",
207 |     "\n",
208 |     "f1 = sigmoid(a1 + b1)\n",
209 |     "f1"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "markdown",
214 |    "id": "promising-exhibit",
215 |    "metadata": {},
216 |    "source": [
217 |     "## Layer 2: Weights, Biases and Activations"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "id": "acknowledged-cruise",
223 |    "metadata": {},
224 |    "source": [
225 |     "<img src=\"images/ann.png\" height=\"25%\" width=\"25%\">"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "id": "color-source",
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "w2 = np.random.rand(5, 7)\n",
236 |     "b2 = np.random.rand(7)"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "id": "visible-fifth",
243 |    "metadata": {},
244 |    "outputs": [],
245 |    "source": [
246 |     "a2 = np.dot(f1, w2) + b2\n",
247 |     "a2"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "id": "radical-serum",
254 |    "metadata": {},
255 |    "outputs": [],
256 |    "source": [
257 |     "f2 = sigmoid(a2)\n",
258 |     "f2"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "id": "ahead-outreach",
264 |    "metadata": {},
265 |    "source": [
266 |     "## Layer 3: Weights, Biases and Activations"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "markdown",
271 |    "id": "fuzzy-homeless",
272 |    "metadata": {},
273 |    "source": [
274 |     "<img src=\"images/ann.png\" height=\"25%\" width=\"25%\">"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": null,
280 |    "id": "conventional-white",
281 |    "metadata": {},
282 |    "outputs": [],
283 |    "source": [
284 |     "w3 = np.random.rand(7, 3)\n",
285 |     "b3 = np.random.rand(3)"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "id": "beautiful-springfield",
292 |    "metadata": {},
293 |    "outputs": [],
294 |    "source": [
295 |     "a3 = np.dot(f2, w3) + b3\n",
296 |     "a3"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": null,
302 |    "id": "solid-inquiry",
303 |    "metadata": {},
304 |    "outputs": [],
305 |    "source": [
306 |     "f3 = softmax(a3)\n",
307 |     "f3"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": null,
313 |    "id": "valuable-science",
314 |    "metadata": {},
315 |    "outputs": [],
316 |    "source": []
317 |   }
318 |  ],
319 |  "metadata": {
320 |   "kernelspec": {
321 |    "display_name": "Python 3",
322 |    "language": "python",
323 |    "name": "python3"
324 |   },
325 |   "language_info": {
326 |    "codemirror_mode": {
327 |     "name": "ipython",
328 |     "version": 3
329 |    },
330 |    "file_extension": ".py",
331 |    "mimetype": "text/x-python",
332 |    "name": "python",
333 |    "nbconvert_exporter": "python",
334 |    "pygments_lexer": "ipython3",
335 |    "version": "3.7.7"
336 |   }
337 |  },
338 |  "nbformat": 4,
339 |  "nbformat_minor": 5
340 | }
341 | 


--------------------------------------------------------------------------------
/Knowledge Graphs/intro-to-wordnet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "raw",
  5 |    "id": "freelance-naples",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "----------------------------------------------------------------------\n",
  9 |     "Filename : intro-to-wordnet.ipynb\n",
 10 |     "Author   : Jaidev Deshpande\n",
 11 |     "Purpose  : Understanding Wordnet functionalities\n",
 12 |     "Libraries: nltk\n",
 13 |     "----------------------------------------------------------------------"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "id": "anonymous-canadian",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## [WordNet®](https://wordnet.princeton.edu/) Tutorial\n",
 22 |     "\n",
 23 |     "### Navigating Wornet Relationships"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 1,
 29 |    "id": "widespread-traveler",
 30 |    "metadata": {},
 31 |    "outputs": [
 32 |     {
 33 |      "name": "stdout",
 34 |      "output_type": "stream",
 35 |      "text": [
 36 |       "Requirement already satisfied: nltk in /home/jaidevd/anaconda3/lib/python3.7/site-packages (3.3)\n",
 37 |       "Requirement already satisfied: six in /home/jaidevd/anaconda3/lib/python3.7/site-packages (from nltk) (1.15.0)\n",
 38 |       "\u001b[33mWARNING: You are using pip version 21.0.1; however, version 21.1 is available.\n",
 39 |       "You should consider upgrading via the '/home/jaidevd/anaconda3/bin/python -m pip install --upgrade pip' command.\u001b[0m\n"
 40 |      ]
 41 |     }
 42 |    ],
 43 |    "source": [
 44 |     "!pip install nltk"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 2,
 50 |    "id": "vocational-clearance",
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "from nltk import download"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 3,
 60 |    "id": "sensitive-albany",
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "name": "stdout",
 65 |      "output_type": "stream",
 66 |      "text": [
 67 |       "[nltk_data] Downloading package wordnet to /home/jaidevd/nltk_data...\n",
 68 |       "[nltk_data]   Package wordnet is already up-to-date!\n"
 69 |      ]
 70 |     },
 71 |     {
 72 |      "data": {
 73 |       "text/plain": [
 74 |        "True"
 75 |       ]
 76 |      },
 77 |      "execution_count": 3,
 78 |      "metadata": {},
 79 |      "output_type": "execute_result"
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "download('wordnet')"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 4,
 89 |    "id": "weird-memory",
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "from nltk.corpus import wordnet"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 5,
 99 |    "id": "developing-failure",
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "data": {
104 |       "text/plain": [
105 |        "[Synset('tractor.n.01'), Synset('tractor.n.02')]"
106 |       ]
107 |      },
108 |      "execution_count": 5,
109 |      "metadata": {},
110 |      "output_type": "execute_result"
111 |     }
112 |    ],
113 |    "source": [
114 |     "# Synsets\n",
115 |     "\n",
116 |     "tractor = wordnet.synsets('tractor')\n",
117 |     "tractor"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 6,
123 |    "id": "seasonal-hungary",
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/plain": [
129 |        "['a wheeled vehicle with large wheels; used in farming and other applications',\n",
130 |        " 'a truck that has a cab but no body; used for pulling large trailers or vans']"
131 |       ]
132 |      },
133 |      "execution_count": 6,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "# Definitions of senses\n",
140 |     "\n",
141 |     "[syn.definition() for syn in tractor]"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 7,
147 |    "id": "active-norwegian",
148 |    "metadata": {},
149 |    "outputs": [
150 |     {
151 |      "data": {
152 |       "text/plain": [
153 |        "[Synset('self-propelled_vehicle.n.01')]"
154 |       ]
155 |      },
156 |      "execution_count": 7,
157 |      "metadata": {},
158 |      "output_type": "execute_result"
159 |     }
160 |    ],
161 |    "source": [
162 |     "# Hypernyms: Relation between a concept and its superordinate\n",
163 |     "\n",
164 |     "tractor = wordnet.synset('tractor.n.01')\n",
165 |     "tractor.hypernyms()"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 8,
171 |    "id": "arctic-customer",
172 |    "metadata": {},
173 |    "outputs": [
174 |     {
175 |      "data": {
176 |       "text/plain": [
177 |        "[Synset('wheeled_vehicle.n.01')]"
178 |       ]
179 |      },
180 |      "execution_count": 8,
181 |      "metadata": {},
182 |      "output_type": "execute_result"
183 |     }
184 |    ],
185 |    "source": [
186 |     "self_propelled_vehicle = wordnet.synset('self-propelled_vehicle.n.01')\n",
187 |     "self_propelled_vehicle.hypernyms()"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 9,
193 |    "id": "invalid-lease",
194 |    "metadata": {},
195 |    "outputs": [
196 |     {
197 |      "data": {
198 |       "text/plain": [
199 |        "[Synset('axle.n.01'),\n",
200 |        " Synset('brake.n.01'),\n",
201 |        " Synset('splasher.n.01'),\n",
202 |        " Synset('wheel.n.01')]"
203 |       ]
204 |      },
205 |      "execution_count": 9,
206 |      "metadata": {},
207 |      "output_type": "execute_result"
208 |     }
209 |    ],
210 |    "source": [
211 |     "# Meronyms: Relation between a part and its whole\n",
212 |     "\n",
213 |     "wheeled_vehicle = wordnet.synset('wheeled_vehicle.n.01')\n",
214 |     "wheeled_vehicle.part_meronyms()"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 10,
220 |    "id": "brown-weekend",
221 |    "metadata": {},
222 |    "outputs": [
223 |     {
224 |      "data": {
225 |       "text/plain": [
226 |        "[Synset('baby_buggy.n.01'),\n",
227 |        " Synset('bicycle.n.01'),\n",
228 |        " Synset('boneshaker.n.01'),\n",
229 |        " Synset('car.n.02'),\n",
230 |        " Synset('handcart.n.01'),\n",
231 |        " Synset('horse-drawn_vehicle.n.01'),\n",
232 |        " Synset('motor_scooter.n.01'),\n",
233 |        " Synset('rolling_stock.n.01'),\n",
234 |        " Synset('scooter.n.02'),\n",
235 |        " Synset('self-propelled_vehicle.n.01'),\n",
236 |        " Synset('skateboard.n.01'),\n",
237 |        " Synset('trailer.n.04'),\n",
238 |        " Synset('tricycle.n.01'),\n",
239 |        " Synset('unicycle.n.01'),\n",
240 |        " Synset('wagon.n.01'),\n",
241 |        " Synset('wagon.n.04'),\n",
242 |        " Synset('welcome_wagon.n.01')]"
243 |       ]
244 |      },
245 |      "execution_count": 10,
246 |      "metadata": {},
247 |      "output_type": "execute_result"
248 |     }
249 |    ],
250 |    "source": [
251 |     "# Hyponyms: Relation between a concept and its subordinate\n",
252 |     "\n",
253 |     "wheeled_vehicle.hyponyms()"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": 11,
259 |    "id": "theoretical-bargain",
260 |    "metadata": {},
261 |    "outputs": [
262 |     {
263 |      "data": {
264 |       "text/plain": [
265 |        "[Synset('wheeled_vehicle.n.01')]"
266 |       ]
267 |      },
268 |      "execution_count": 11,
269 |      "metadata": {},
270 |      "output_type": "execute_result"
271 |     }
272 |    ],
273 |    "source": [
274 |     "# Holonyms: Relation between whole and its parts\n",
275 |     "\n",
276 |     "axle = wordnet.synset('axle.n.01')\n",
277 |     "axle.part_holonyms()"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 12,
283 |    "id": "identical-shoulder",
284 |    "metadata": {},
285 |    "outputs": [
286 |     {
287 |      "data": {
288 |       "text/plain": [
289 |        "[Synset('armored_vehicle.n.01'),\n",
290 |        " Synset('carrier.n.02'),\n",
291 |        " Synset('forklift.n.01'),\n",
292 |        " Synset('locomotive.n.01'),\n",
293 |        " Synset('motor_vehicle.n.01'),\n",
294 |        " Synset('personnel_carrier.n.01'),\n",
295 |        " Synset('reconnaissance_vehicle.n.01'),\n",
296 |        " Synset('recreational_vehicle.n.01'),\n",
297 |        " Synset('streetcar.n.01'),\n",
298 |        " Synset('tracked_vehicle.n.01'),\n",
299 |        " Synset('tractor.n.01'),\n",
300 |        " Synset('weapons_carrier.n.01')]"
301 |       ]
302 |      },
303 |      "execution_count": 12,
304 |      "metadata": {},
305 |      "output_type": "execute_result"
306 |     }
307 |    ],
308 |    "source": [
309 |     "self_propelled_vehicle.hyponyms()"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": 13,
315 |    "id": "quick-strengthening",
316 |    "metadata": {},
317 |    "outputs": [
318 |     {
319 |      "data": {
320 |       "text/plain": [
321 |        "[Synset('amphibian.n.01'),\n",
322 |        " Synset('bloodmobile.n.01'),\n",
323 |        " Synset('car.n.01'),\n",
324 |        " Synset('doodlebug.n.01'),\n",
325 |        " Synset('four-wheel_drive.n.01'),\n",
326 |        " Synset('go-kart.n.01'),\n",
327 |        " Synset('golfcart.n.01'),\n",
328 |        " Synset('hearse.n.01'),\n",
329 |        " Synset('motorcycle.n.01'),\n",
330 |        " Synset('snowplow.n.01'),\n",
331 |        " Synset('truck.n.01')]"
332 |       ]
333 |      },
334 |      "execution_count": 13,
335 |      "metadata": {},
336 |      "output_type": "execute_result"
337 |     }
338 |    ],
339 |    "source": [
340 |     "motor_vehicle = wordnet.synset('motor_vehicle.n.01')\n",
341 |     "motor_vehicle.hyponyms()"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": 14,
347 |    "id": "egyptian-appliance",
348 |    "metadata": {},
349 |    "outputs": [
350 |     {
351 |      "data": {
352 |       "text/plain": [
353 |        "[Synset('accelerator.n.01'),\n",
354 |        " Synset('air_bag.n.01'),\n",
355 |        " Synset('auto_accessory.n.01'),\n",
356 |        " Synset('automobile_engine.n.01'),\n",
357 |        " Synset('automobile_horn.n.01'),\n",
358 |        " Synset('buffer.n.06'),\n",
359 |        " Synset('bumper.n.02'),\n",
360 |        " Synset('car_door.n.01'),\n",
361 |        " Synset('car_mirror.n.01'),\n",
362 |        " Synset('car_seat.n.01'),\n",
363 |        " Synset('car_window.n.01'),\n",
364 |        " Synset('fender.n.01'),\n",
365 |        " Synset('first_gear.n.01'),\n",
366 |        " Synset('floorboard.n.02'),\n",
367 |        " Synset('gasoline_engine.n.01'),\n",
368 |        " Synset('glove_compartment.n.01'),\n",
369 |        " Synset('grille.n.02'),\n",
370 |        " Synset('high_gear.n.01'),\n",
371 |        " Synset('hood.n.09'),\n",
372 |        " Synset('luggage_compartment.n.01'),\n",
373 |        " Synset('rear_window.n.01'),\n",
374 |        " Synset('reverse.n.02'),\n",
375 |        " Synset('roof.n.02'),\n",
376 |        " Synset('running_board.n.01'),\n",
377 |        " Synset('stabilizer_bar.n.01'),\n",
378 |        " Synset('sunroof.n.01'),\n",
379 |        " Synset('tail_fin.n.02'),\n",
380 |        " Synset('third_gear.n.01'),\n",
381 |        " Synset('window.n.02')]"
382 |       ]
383 |      },
384 |      "execution_count": 14,
385 |      "metadata": {},
386 |      "output_type": "execute_result"
387 |     }
388 |    ],
389 |    "source": [
390 |     "car = wordnet.synset('car.n.01')\n",
391 |     "car.part_meronyms()"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": null,
397 |    "id": "civic-worst",
398 |    "metadata": {},
399 |    "outputs": [],
400 |    "source": []
401 |   }
402 |  ],
403 |  "metadata": {
404 |   "kernelspec": {
405 |    "display_name": "Python 3",
406 |    "language": "python",
407 |    "name": "python3"
408 |   },
409 |   "language_info": {
410 |    "codemirror_mode": {
411 |     "name": "ipython",
412 |     "version": 3
413 |    },
414 |    "file_extension": ".py",
415 |    "mimetype": "text/x-python",
416 |    "name": "python",
417 |    "nbconvert_exporter": "python",
418 |    "pygments_lexer": "ipython3",
419 |    "version": "3.7.7"
420 |   }
421 |  },
422 |  "nbformat": 4,
423 |  "nbformat_minor": 5
424 | }
425 | 


--------------------------------------------------------------------------------
/Knowledge Graphs/lesk.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "shaped-norway",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Word-Sense Disambiguation"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "id": "genetic-terror",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from nltk.corpus import wordnet as wn\n",
 19 |     "from nltk import wsd"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "id": "adult-bangladesh",
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "X = 'The die is cast.'\n",
 30 |     "Y = 'Roll the die to get a 6.'\n",
 31 |     "Z = 'What is dead may never die.'"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 3,
 37 |    "id": "antique-burlington",
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "data": {
 42 |       "text/plain": [
 43 |        "[Synset('die.n.01'),\n",
 44 |        " Synset('die.n.02'),\n",
 45 |        " Synset('die.n.03'),\n",
 46 |        " Synset('die.v.01'),\n",
 47 |        " Synset('die.v.02'),\n",
 48 |        " Synset('die.v.03'),\n",
 49 |        " Synset('fail.v.04'),\n",
 50 |        " Synset('die.v.05'),\n",
 51 |        " Synset('die.v.06'),\n",
 52 |        " Synset('die.v.07'),\n",
 53 |        " Synset('die.v.08'),\n",
 54 |        " Synset('die.v.09'),\n",
 55 |        " Synset('die.v.10'),\n",
 56 |        " Synset('die.v.11')]"
 57 |       ]
 58 |      },
 59 |      "execution_count": 3,
 60 |      "metadata": {},
 61 |      "output_type": "execute_result"
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "wn.synsets('die')"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 4,
 71 |    "id": "governing-montana",
 72 |    "metadata": {},
 73 |    "outputs": [
 74 |     {
 75 |      "data": {
 76 |       "text/plain": [
 77 |        "[Synset('die.n.01'), Synset('die.n.02'), Synset('die.n.03')]"
 78 |       ]
 79 |      },
 80 |      "execution_count": 4,
 81 |      "metadata": {},
 82 |      "output_type": "execute_result"
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "wn.synsets('die', pos=wn.NOUN)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 5,
 92 |    "id": "located-bookmark",
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stdout",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "a small cube with 1 to 6 spots on the six faces; used in gambling to generate random numbers\n",
100 |       "a device used for shaping metal\n",
101 |       "a cutting tool that is fitted into a diestock and used for cutting male (external) screw threads on screws or bolts or pipes or rods\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "for syn in wn.synsets('die', pos=wn.NOUN):\n",
107 |     "    print(syn.definition())"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 6,
113 |    "id": "after-party",
114 |    "metadata": {},
115 |    "outputs": [
116 |     {
117 |      "name": "stdout",
118 |      "output_type": "stream",
119 |      "text": [
120 |       "pass from physical life and lose all bodily attributes and functions necessary to sustain life\n",
121 |       "suffer or face the pain of death\n",
122 |       "be brought to or as if to the point of death by an intense emotion such as embarrassment, amusement, or shame\n",
123 |       "stop operating or functioning\n",
124 |       "feel indifferent towards\n",
125 |       "languish as with love or desire\n",
126 |       "cut or shape with a die\n",
127 |       "to be on base at the end of an inning, of a player\n",
128 |       "lose sparkle or bouquet\n",
129 |       "disappear or come to an end\n",
130 |       "suffer spiritual death; be damned (in the religious sense)\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "for syn in wn.synsets('die', pos=wn.VERB):\n",
136 |     "    print(syn.definition())"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "id": "bacterial-effect",
142 |    "metadata": {},
143 |    "source": [
144 |     "## Word-Sense Disambiguation with Lesk Algorithm"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 7,
150 |    "id": "threaded-tourism",
151 |    "metadata": {},
152 |    "outputs": [
153 |     {
154 |      "name": "stdout",
155 |      "output_type": "stream",
156 |      "text": [
157 |       "The die is cast.\n"
158 |      ]
159 |     },
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "Synset('die.v.07')"
164 |       ]
165 |      },
166 |      "execution_count": 7,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "print(X)\n",
173 |     "wsd.lesk(X.split(), 'die')"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 8,
179 |    "id": "fluid-cargo",
180 |    "metadata": {},
181 |    "outputs": [
182 |     {
183 |      "data": {
184 |       "text/plain": [
185 |        "'cut or shape with a die'"
186 |       ]
187 |      },
188 |      "execution_count": 8,
189 |      "metadata": {},
190 |      "output_type": "execute_result"
191 |     }
192 |    ],
193 |    "source": [
194 |     "_.definition()"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 9,
200 |    "id": "independent-melissa",
201 |    "metadata": {},
202 |    "outputs": [
203 |     {
204 |      "data": {
205 |       "text/plain": [
206 |        "'a cutting tool that is fitted into a diestock and used for cutting male (external) screw threads on screws or bolts or pipes or rods'"
207 |       ]
208 |      },
209 |      "execution_count": 9,
210 |      "metadata": {},
211 |      "output_type": "execute_result"
212 |     }
213 |    ],
214 |    "source": [
215 |     "wsd.lesk(X.split(), 'die', pos=wn.NOUN).definition()"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 10,
221 |    "id": "progressive-origin",
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "name": "stdout",
226 |      "output_type": "stream",
227 |      "text": [
228 |       "Roll the die to get a 6.\n"
229 |      ]
230 |     },
231 |     {
232 |      "data": {
233 |       "text/plain": [
234 |        "'to be on base at the end of an inning, of a player'"
235 |       ]
236 |      },
237 |      "execution_count": 10,
238 |      "metadata": {},
239 |      "output_type": "execute_result"
240 |     }
241 |    ],
242 |    "source": [
243 |     "print(Y)\n",
244 |     "wsd.lesk(Y.split(), 'die').definition()"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": 11,
250 |    "id": "proof-while",
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "data": {
255 |       "text/plain": [
256 |        "'a small cube with 1 to 6 spots on the six faces; used in gambling to generate random numbers'"
257 |       ]
258 |      },
259 |      "execution_count": 11,
260 |      "metadata": {},
261 |      "output_type": "execute_result"
262 |     }
263 |    ],
264 |    "source": [
265 |     "wsd.lesk(Y.split(), 'die', pos=wn.NOUN).definition()"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 12,
271 |    "id": "middle-object",
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "name": "stdout",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "What is dead may never die.\n"
279 |      ]
280 |     },
281 |     {
282 |      "data": {
283 |       "text/plain": [
284 |        "'a cutting tool that is fitted into a diestock and used for cutting male (external) screw threads on screws or bolts or pipes or rods'"
285 |       ]
286 |      },
287 |      "execution_count": 12,
288 |      "metadata": {},
289 |      "output_type": "execute_result"
290 |     }
291 |    ],
292 |    "source": [
293 |     "print(Z)\n",
294 |     "wsd.lesk(Z.split(), 'die').definition()"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 13,
300 |    "id": "organizational-joint",
301 |    "metadata": {},
302 |    "outputs": [
303 |     {
304 |      "data": {
305 |       "text/plain": [
306 |        "'stop operating or functioning'"
307 |       ]
308 |      },
309 |      "execution_count": 13,
310 |      "metadata": {},
311 |      "output_type": "execute_result"
312 |     }
313 |    ],
314 |    "source": [
315 |     "wsd.lesk(Z.split(), 'die', pos=wn.VERB).definition()"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "markdown",
320 |    "id": "valuable-harbor",
321 |    "metadata": {},
322 |    "source": [
323 |     "## Automatic POS Tagging + Lesk with spaCy"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "id": "defensive-trailer",
330 |    "metadata": {},
331 |    "outputs": [],
332 |    "source": [
333 |     "!pip install spacy"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 34,
339 |    "id": "cellular-reflection",
340 |    "metadata": {},
341 |    "outputs": [],
342 |    "source": [
343 |     "from spacy.cli import download\n",
344 |     "from spacy import load\n",
345 |     "# download('en_core_web_sm')\n",
346 |     "nlp = load('en_core_web_sm')"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": 35,
352 |    "id": "radical-melbourne",
353 |    "metadata": {},
354 |    "outputs": [],
355 |    "source": [
356 |     "import warnings\n",
357 |     "\n",
358 |     "POS_MAP = {\n",
359 |     "    'VERB': wn.VERB,\n",
360 |     "    'NOUN': wn.NOUN,\n",
361 |     "    'PROPN': wn.NOUN\n",
362 |     "}\n",
363 |     "\n",
364 |     "\n",
365 |     "def lesk(doc, word):\n",
366 |     "    found = False\n",
367 |     "    for token in doc:\n",
368 |     "        if token.text == word:\n",
369 |     "            word = token\n",
370 |     "            found = True\n",
371 |     "            break\n",
372 |     "    if not found:\n",
373 |     "        raise ValueError(f'Word \\\"{word}\\\" does not appear in the document: {doc.text}.')\n",
374 |     "    pos = POS_MAP.get(word.pos_, False)\n",
375 |     "    if not pos:\n",
376 |     "        warnings.warn(f'POS tag for {word.text} not found in wordnet. Falling back to default Lesk behaviour.')\n",
377 |     "    args = [c.text for c in doc], word.text\n",
378 |     "    kwargs = dict(pos=pos)\n",
379 |     "    return wsd.lesk(*args, **kwargs)"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "execution_count": 36,
385 |    "id": "monetary-disaster",
386 |    "metadata": {},
387 |    "outputs": [],
388 |    "source": [
389 |     "doc = nlp('Roll the die to get a 6.')"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "code",
394 |    "execution_count": 37,
395 |    "id": "surgical-chrome",
396 |    "metadata": {},
397 |    "outputs": [
398 |     {
399 |      "data": {
400 |       "text/plain": [
401 |        "Synset('die.n.01')"
402 |       ]
403 |      },
404 |      "execution_count": 37,
405 |      "metadata": {},
406 |      "output_type": "execute_result"
407 |     }
408 |    ],
409 |    "source": [
410 |     "lesk(doc, 'die')"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "code",
415 |    "execution_count": 38,
416 |    "id": "cordless-bankruptcy",
417 |    "metadata": {},
418 |    "outputs": [
419 |     {
420 |      "data": {
421 |       "text/plain": [
422 |        "'a small cube with 1 to 6 spots on the six faces; used in gambling to generate random numbers'"
423 |       ]
424 |      },
425 |      "execution_count": 38,
426 |      "metadata": {},
427 |      "output_type": "execute_result"
428 |     }
429 |    ],
430 |    "source": [
431 |     "lesk(doc, 'die').definition()"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": 39,
437 |    "id": "excess-consultancy",
438 |    "metadata": {},
439 |    "outputs": [
440 |     {
441 |      "data": {
442 |       "text/plain": [
443 |        "\"a widely used search engine that uses text-matching techniques to find web pages that are important and relevant to a user's search\""
444 |       ]
445 |      },
446 |      "execution_count": 39,
447 |      "metadata": {},
448 |      "output_type": "execute_result"
449 |     }
450 |    ],
451 |    "source": [
452 |     "lesk(nlp('I work at google.'), 'google').definition()"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "code",
457 |    "execution_count": 40,
458 |    "id": "infectious-binary",
459 |    "metadata": {},
460 |    "outputs": [
461 |     {
462 |      "data": {
463 |       "text/plain": [
464 |        "'search the internet (for information) using the Google search engine'"
465 |       ]
466 |      },
467 |      "execution_count": 40,
468 |      "metadata": {},
469 |      "output_type": "execute_result"
470 |     }
471 |    ],
472 |    "source": [
473 |     "lesk(nlp('I will google it.'), 'google').definition()"
474 |    ]
475 |   },
476 |   {
477 |    "cell_type": "code",
478 |    "execution_count": null,
479 |    "id": "laughing-carolina",
480 |    "metadata": {},
481 |    "outputs": [],
482 |    "source": []
483 |   }
484 |  ],
485 |  "metadata": {
486 |   "kernelspec": {
487 |    "display_name": "Python 3",
488 |    "language": "python",
489 |    "name": "python3"
490 |   },
491 |   "language_info": {
492 |    "codemirror_mode": {
493 |     "name": "ipython",
494 |     "version": 3
495 |    },
496 |    "file_extension": ".py",
497 |    "mimetype": "text/x-python",
498 |    "name": "python",
499 |    "nbconvert_exporter": "python",
500 |    "pygments_lexer": "ipython3",
501 |    "version": "3.7.7"
502 |   }
503 |  },
504 |  "nbformat": 4,
505 |  "nbformat_minor": 5
506 | }
507 | 


--------------------------------------------------------------------------------
/topic modelling/nmf-imdb-movie-reviews.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "interior-basics",
   6 |    "metadata": {},
   7 |    "source": [
   8 |     "# Inferring Topics from IMDB Reviews"
   9 |    ]
  10 |   },
  11 |   {
  12 |    "cell_type": "code",
  13 |    "execution_count": 1,
  14 |    "id": "established-malta",
  15 |    "metadata": {},
  16 |    "outputs": [],
  17 |    "source": [
  18 |     "import numpy as np\n",
  19 |     "import os\n",
  20 |     "from sklearn.feature_extraction.text import TfidfVectorizer\n",
  21 |     "from sklearn.decomposition import NMF\n",
  22 |     "import pandas as pd\n",
  23 |     "import matplotlib.pyplot as plt"
  24 |    ]
  25 |   },
  26 |   {
  27 |    "cell_type": "markdown",
  28 |    "id": "political-ability",
  29 |    "metadata": {},
  30 |    "source": [
  31 |     "## Exploring the Dataset: [Large Movie Review Dataset](https://drive.google.com/drive/u/0/folders/1umS1MgUXyra3KVF-6FsN8krHQ31lXhlX)"
  32 |    ]
  33 |   },
  34 |   {
  35 |    "cell_type": "code",
  36 |    "execution_count": 2,
  37 |    "id": "physical-speaker",
  38 |    "metadata": {},
  39 |    "outputs": [],
  40 |    "source": [
  41 |     "ROOT = '../neuralnets/aclImdb/train/pos/'"
  42 |    ]
  43 |   },
  44 |   {
  45 |    "cell_type": "code",
  46 |    "execution_count": 3,
  47 |    "id": "solar-universe",
  48 |    "metadata": {},
  49 |    "outputs": [],
  50 |    "source": [
  51 |     "reviews = []\n",
  52 |     "for file in os.listdir(ROOT):\n",
  53 |     "    path = os.path.join(ROOT, file)\n",
  54 |     "    if os.path.isfile(path):\n",
  55 |     "        with open(path, 'r') as fin:\n",
  56 |     "            reviews.append(fin.read())"
  57 |    ]
  58 |   },
  59 |   {
  60 |    "cell_type": "code",
  61 |    "execution_count": 4,
  62 |    "id": "permanent-pride",
  63 |    "metadata": {},
  64 |    "outputs": [
  65 |     {
  66 |      "data": {
  67 |       "text/plain": [
  68 |        "12500"
  69 |       ]
  70 |      },
  71 |      "execution_count": 4,
  72 |      "metadata": {},
  73 |      "output_type": "execute_result"
  74 |     }
  75 |    ],
  76 |    "source": [
  77 |     "len(reviews)"
  78 |    ]
  79 |   },
  80 |   {
  81 |    "cell_type": "code",
  82 |    "execution_count": 5,
  83 |    "id": "similar-commander",
  84 |    "metadata": {},
  85 |    "outputs": [
  86 |     {
  87 |      "name": "stdout",
  88 |      "output_type": "stream",
  89 |      "text": [
  90 |       "Not wishing to give *anything* away here, I would just say this technically excellent, flawlessly acted and uplifting little flic will reward the viewer with an excellent hour and a half's entertainment: It will amuse, surprise, possibly embarrass occasionally and almost certainly tug at the heartstrings from time to time, as it approaches the inevitable, but not obvious, ending without becoming clichéd or predictable in any way. Most definitely recommended.<br /><br />A previous User's Comment gives 8 out of 10 for the film and 10 out of 10 for both Branagh and Bonham-Carter's outstanding performances - I agree entirely....\n",
  91 |       "======================================================================================================================================================\n",
  92 |       "Wrestlemania 14 is not often looked as one of the great Wrestlemania's but I would personally put it, in my top 5, if not the top 3. It has so many great things, and it truly signified the birth of The Attitude Era, which was WWE's best era, in my opinion. HBK has the heart of a lion, and him putting over Austin like he did, on his way out, was pure class on his part. It has one of the hottest crowds you will ever see, and it has J.R and The King at their announcing best!. <br /><br />Matches.<br /><br />15  team battle royal LOUD pop for L.O.D's return. I'm not a fan of battle royal's, and this is yet another average one. Very predictable, even when you 1st see it, it's obvious L.O.D would win. Looking at Sunny for 8 or so minutes though, definitely helps. <br /><br />2/5<br /><br />WWF Light Heavyweight Championship<br /><br />Taka Michinoku|C| Vs Aguila.<br /><br />Taka gets a surprising pop, with his entrance. Fast, high-flying, and very exciting. If these two had more time, they would have surely tore the roof off, with their stuff. Taka wins with the Michinoku driver.<br /><br />3 1/2 /5<br /><br />WWF European Championship.<br /><br />Triple H|C| Vs Owen Hart Stipulation here, is Chyna is handcuffed to Slaughter. Nice pop for Owen, mixed reaction for Trips. A really, really underrated match, that ranks among one of my favorites for Wrestlemania, actually. The two mixed together very well, and Owen can go with anybody. Trips wins, with Chyna interference.<br /><br />4/5<br /><br />Mixed Tag match. Marc Mero&Sable Vs Goldust&Luna. Defining pop for Sable, unheard of that time, for woman. Sable actually looks hot, and the crowd is just eating her up!. Constant Sable chants, and them erupting almost every time she gets in the ring. Not bad for a Mixed tag match, it had entertaining antics, and passed the time well. Sable's team wins, when Sable hits the TKO.<br /><br />2 1/2 /5<br /><br />WWF Intercontinental Championship. Ken Shamrock Vs The Rock|C|. Before I review the match, I'd like to note The Rock showed off his immense potential, with his interview with Jennifer Flowers, before his match. Nice pop for Shamrock, big time heat for The Rock. Too disappointingly short, and I thought the ending was kinda stupid, though Shamrock's snapping antics were awesome to see, and the crowd went nuts for it. Rock keeps the title, when The Ref reverses the decision.<br /><br />2/5<br /><br />Dumpster match, for The WWF Tag Team Championship<br /><br />Catcus Jack&Terry Funk Vs The New Age Outlaws. The Outlaws are not as over, as they were gonna be at this time. Crowd is actually somewhat dead for this, but I thought it had some great Hardcore bits, with some sick looking bumps. Cactus and Terry win the titles in the end.<br /><br />3/5<br /><br />The Undertaker vs Kane. Big time ovation, for The Undertaker. Much better than there outing at Wrestlemania 20, and for a big man vs big man match, this was really good. It was a great all out brawl, with The Undertaker taking a sick looking bump, through the table. WWE was smart, by making Kane looking strong, even through defeat. After 2 tombstone kick out's, Taker finally puts him away, with a 3rd one.<br /><br />3 1/2 /5<br /><br />WWF Championship. <br /><br />Special Guest Enforcer \"Mike Tyson\"<br /><br />HBK|C| Vs Steve Austin. Big heat for Tyson. Crowd goes ape sh*t for Austin, definitely one of the biggest pops I have heard. Mixed reaction, for HBK. This is truly a special match up, one of the greatest wrestlemania main events in history, you can tell when J.R is even out of breath. HBK gives it his all, in what was supposed to be his last match, and Austin has rarely been better. The animosity and electricity from the crowd is amazing, and it's as exciting as it gets. Austin wins with the stunner, with Tyson joining 3:16 by knocking out Michaels. Austin's celebratory victory, is a wonder to behold, with one of the nosiest crowd's you will ever see, King said it right, they were going nuts.<br /><br />5/5<br /><br />Bottom line. Wrestlemania 14 is one of the greatest for real. It has everything you want in a Wrestlemania, and truly kick started the Attitude Era. This is very special to me, because it was the 1st Wrestlemania I ever saw, back in 98. \"The Austin Era, has begun!\"<br /><br />9 1/2 /10\n",
  93 |       "======================================================================================================================================================\n",
  94 |       "It could have been a better film. It does drag at points, and the central story shifts from Boyer completing his mission to Boyer avenging Wanda Hendrix's death, but Graham Greene is an author who is really hard to spoil. His stories are all morality tales, due to his own considerations of Catholicism, guilt and innocence (very relative terms in his world view), and the human condition.<br /><br />Boyer is Luis Denard, a well-known concert pianist, who has sided with the Republicans in the Spanish Civil War. He has been sent to England to try to carry through an arms purchase deal that is desperately needed. Unfortunately for Denard he is literally on his own - everyone of his contacts turns out to be a willing turncoat for the Falagists of Spain. In particular Katina Paxinou (Mrs. Melendez) a grim boarding house keeper, and Peter Lorre (Mr. Contreras) a teacher of an \"esperanto\" type international language. Wanda Hendrix is the drudge of a girl (Else) who works for Mrs. Melendez. The local diplomat, Licata (Victor Francken) is already a willing associate of the Falangists.<br /><br />The Brits (Holmes Herbert, Miles Mander, and best - if not worst - of the lot, George Coulouris) don't give much hope to Boyer's cause (which he soon grasps may be Britain's before long). Herbert and Mander just retreat behind the official policy of neutrality ordered by the Ramsay MacDonald's and Stanley Baldwin's governments during the Civil War. Coulouris here is a typical Col. Blimp type - always impeccable in his native English diction, he is sharp in showing his dislike for foreigners in general.<br /><br />The one ray of hope is Lauren Bacall (Rose Cullen), here trying to play her role as well as she can - but she can't really. She's an aristocrat - the daughter of a Press lord. It was Bacall's second film, and (sad to say) almost sank her long career. She does act well, but the spark she showed in her first film was due to the dual effect of starring with Humphrey Bogart and being directed by Howard Hawks. Boyer is a fine actor, but he's not Bogie, and Herman Shumlin is not Hawks. Her next film returned her to Bogie and Hawks again, and her star resumed it's ascendancy.<br /><br />It's a bleak film (as was the novel). Boyer's mission never succeeds, as he has too many hidden foes all over the place. But the villains are likewise also losers - frequently with their lives.<br /><br />With Dan Seymour as a suspicious foreign tenant of Katina Paxinou (and the man who destroys her). It is well worth watching to catch the Warner's lot of character actors doing their best given the weakness in direction.\n",
  95 |       "======================================================================================================================================================\n"
  96 |      ]
  97 |     }
  98 |    ],
  99 |    "source": [
 100 |     "for i in range(3):\n",
 101 |     "    print(reviews[i])\n",
 102 |     "    print('=' * 150)"
 103 |    ]
 104 |   },
 105 |   {
 106 |    "cell_type": "markdown",
 107 |    "id": "blind-relative",
 108 |    "metadata": {},
 109 |    "source": [
 110 |     "## Feature Extraction"
 111 |    ]
 112 |   },
 113 |   {
 114 |    "cell_type": "code",
 115 |    "execution_count": 8,
 116 |    "id": "fuzzy-legislation",
 117 |    "metadata": {},
 118 |    "outputs": [
 119 |     {
 120 |      "data": {
 121 |       "text/html": [
 122 |        "<div>\n",
 123 |        "<style scoped>\n",
 124 |        "    .dataframe tbody tr th:only-of-type {\n",
 125 |        "        vertical-align: middle;\n",
 126 |        "    }\n",
 127 |        "\n",
 128 |        "    .dataframe tbody tr th {\n",
 129 |        "        vertical-align: top;\n",
 130 |        "    }\n",
 131 |        "\n",
 132 |        "    .dataframe thead th {\n",
 133 |        "        text-align: right;\n",
 134 |        "    }\n",
 135 |        "</style>\n",
 136 |        "<table border=\"1\" class=\"dataframe\">\n",
 137 |        "  <thead>\n",
 138 |        "    <tr style=\"text-align: right;\">\n",
 139 |        "      <th></th>\n",
 140 |        "      <th>00</th>\n",
 141 |        "      <th>000</th>\n",
 142 |        "      <th>000s</th>\n",
 143 |        "      <th>003830</th>\n",
 144 |        "      <th>006</th>\n",
 145 |        "      <th>007</th>\n",
 146 |        "      <th>0079</th>\n",
 147 |        "      <th>0080</th>\n",
 148 |        "      <th>0083</th>\n",
 149 |        "      <th>0093638</th>\n",
 150 |        "      <th>...</th>\n",
 151 |        "      <th>élan</th>\n",
 152 |        "      <th>émigré</th>\n",
 153 |        "      <th>émigrés</th>\n",
 154 |        "      <th>était</th>\n",
 155 |        "      <th>état</th>\n",
 156 |        "      <th>étc</th>\n",
 157 |        "      <th>êxtase</th>\n",
 158 |        "      <th>ís</th>\n",
 159 |        "      <th>østbye</th>\n",
 160 |        "      <th>über</th>\n",
 161 |        "    </tr>\n",
 162 |        "  </thead>\n",
 163 |        "  <tbody>\n",
 164 |        "    <tr>\n",
 165 |        "      <th>0</th>\n",
 166 |        "      <td>0.0</td>\n",
 167 |        "      <td>0.0</td>\n",
 168 |        "      <td>0.0</td>\n",
 169 |        "      <td>0.0</td>\n",
 170 |        "      <td>0.0</td>\n",
 171 |        "      <td>0.0</td>\n",
 172 |        "      <td>0.0</td>\n",
 173 |        "      <td>0.0</td>\n",
 174 |        "      <td>0.0</td>\n",
 175 |        "      <td>0.0</td>\n",
 176 |        "      <td>...</td>\n",
 177 |        "      <td>0.0</td>\n",
 178 |        "      <td>0.0</td>\n",
 179 |        "      <td>0.0</td>\n",
 180 |        "      <td>0.0</td>\n",
 181 |        "      <td>0.0</td>\n",
 182 |        "      <td>0.0</td>\n",
 183 |        "      <td>0.0</td>\n",
 184 |        "      <td>0.0</td>\n",
 185 |        "      <td>0.0</td>\n",
 186 |        "      <td>0.0</td>\n",
 187 |        "    </tr>\n",
 188 |        "    <tr>\n",
 189 |        "      <th>1</th>\n",
 190 |        "      <td>0.0</td>\n",
 191 |        "      <td>0.0</td>\n",
 192 |        "      <td>0.0</td>\n",
 193 |        "      <td>0.0</td>\n",
 194 |        "      <td>0.0</td>\n",
 195 |        "      <td>0.0</td>\n",
 196 |        "      <td>0.0</td>\n",
 197 |        "      <td>0.0</td>\n",
 198 |        "      <td>0.0</td>\n",
 199 |        "      <td>0.0</td>\n",
 200 |        "      <td>...</td>\n",
 201 |        "      <td>0.0</td>\n",
 202 |        "      <td>0.0</td>\n",
 203 |        "      <td>0.0</td>\n",
 204 |        "      <td>0.0</td>\n",
 205 |        "      <td>0.0</td>\n",
 206 |        "      <td>0.0</td>\n",
 207 |        "      <td>0.0</td>\n",
 208 |        "      <td>0.0</td>\n",
 209 |        "      <td>0.0</td>\n",
 210 |        "      <td>0.0</td>\n",
 211 |        "    </tr>\n",
 212 |        "    <tr>\n",
 213 |        "      <th>2</th>\n",
 214 |        "      <td>0.0</td>\n",
 215 |        "      <td>0.0</td>\n",
 216 |        "      <td>0.0</td>\n",
 217 |        "      <td>0.0</td>\n",
 218 |        "      <td>0.0</td>\n",
 219 |        "      <td>0.0</td>\n",
 220 |        "      <td>0.0</td>\n",
 221 |        "      <td>0.0</td>\n",
 222 |        "      <td>0.0</td>\n",
 223 |        "      <td>0.0</td>\n",
 224 |        "      <td>...</td>\n",
 225 |        "      <td>0.0</td>\n",
 226 |        "      <td>0.0</td>\n",
 227 |        "      <td>0.0</td>\n",
 228 |        "      <td>0.0</td>\n",
 229 |        "      <td>0.0</td>\n",
 230 |        "      <td>0.0</td>\n",
 231 |        "      <td>0.0</td>\n",
 232 |        "      <td>0.0</td>\n",
 233 |        "      <td>0.0</td>\n",
 234 |        "      <td>0.0</td>\n",
 235 |        "    </tr>\n",
 236 |        "    <tr>\n",
 237 |        "      <th>3</th>\n",
 238 |        "      <td>0.0</td>\n",
 239 |        "      <td>0.0</td>\n",
 240 |        "      <td>0.0</td>\n",
 241 |        "      <td>0.0</td>\n",
 242 |        "      <td>0.0</td>\n",
 243 |        "      <td>0.0</td>\n",
 244 |        "      <td>0.0</td>\n",
 245 |        "      <td>0.0</td>\n",
 246 |        "      <td>0.0</td>\n",
 247 |        "      <td>0.0</td>\n",
 248 |        "      <td>...</td>\n",
 249 |        "      <td>0.0</td>\n",
 250 |        "      <td>0.0</td>\n",
 251 |        "      <td>0.0</td>\n",
 252 |        "      <td>0.0</td>\n",
 253 |        "      <td>0.0</td>\n",
 254 |        "      <td>0.0</td>\n",
 255 |        "      <td>0.0</td>\n",
 256 |        "      <td>0.0</td>\n",
 257 |        "      <td>0.0</td>\n",
 258 |        "      <td>0.0</td>\n",
 259 |        "    </tr>\n",
 260 |        "    <tr>\n",
 261 |        "      <th>4</th>\n",
 262 |        "      <td>0.0</td>\n",
 263 |        "      <td>0.0</td>\n",
 264 |        "      <td>0.0</td>\n",
 265 |        "      <td>0.0</td>\n",
 266 |        "      <td>0.0</td>\n",
 267 |        "      <td>0.0</td>\n",
 268 |        "      <td>0.0</td>\n",
 269 |        "      <td>0.0</td>\n",
 270 |        "      <td>0.0</td>\n",
 271 |        "      <td>0.0</td>\n",
 272 |        "      <td>...</td>\n",
 273 |        "      <td>0.0</td>\n",
 274 |        "      <td>0.0</td>\n",
 275 |        "      <td>0.0</td>\n",
 276 |        "      <td>0.0</td>\n",
 277 |        "      <td>0.0</td>\n",
 278 |        "      <td>0.0</td>\n",
 279 |        "      <td>0.0</td>\n",
 280 |        "      <td>0.0</td>\n",
 281 |        "      <td>0.0</td>\n",
 282 |        "      <td>0.0</td>\n",
 283 |        "    </tr>\n",
 284 |        "    <tr>\n",
 285 |        "      <th>...</th>\n",
 286 |        "      <td>...</td>\n",
 287 |        "      <td>...</td>\n",
 288 |        "      <td>...</td>\n",
 289 |        "      <td>...</td>\n",
 290 |        "      <td>...</td>\n",
 291 |        "      <td>...</td>\n",
 292 |        "      <td>...</td>\n",
 293 |        "      <td>...</td>\n",
 294 |        "      <td>...</td>\n",
 295 |        "      <td>...</td>\n",
 296 |        "      <td>...</td>\n",
 297 |        "      <td>...</td>\n",
 298 |        "      <td>...</td>\n",
 299 |        "      <td>...</td>\n",
 300 |        "      <td>...</td>\n",
 301 |        "      <td>...</td>\n",
 302 |        "      <td>...</td>\n",
 303 |        "      <td>...</td>\n",
 304 |        "      <td>...</td>\n",
 305 |        "      <td>...</td>\n",
 306 |        "      <td>...</td>\n",
 307 |        "    </tr>\n",
 308 |        "    <tr>\n",
 309 |        "      <th>12495</th>\n",
 310 |        "      <td>0.0</td>\n",
 311 |        "      <td>0.0</td>\n",
 312 |        "      <td>0.0</td>\n",
 313 |        "      <td>0.0</td>\n",
 314 |        "      <td>0.0</td>\n",
 315 |        "      <td>0.0</td>\n",
 316 |        "      <td>0.0</td>\n",
 317 |        "      <td>0.0</td>\n",
 318 |        "      <td>0.0</td>\n",
 319 |        "      <td>0.0</td>\n",
 320 |        "      <td>...</td>\n",
 321 |        "      <td>0.0</td>\n",
 322 |        "      <td>0.0</td>\n",
 323 |        "      <td>0.0</td>\n",
 324 |        "      <td>0.0</td>\n",
 325 |        "      <td>0.0</td>\n",
 326 |        "      <td>0.0</td>\n",
 327 |        "      <td>0.0</td>\n",
 328 |        "      <td>0.0</td>\n",
 329 |        "      <td>0.0</td>\n",
 330 |        "      <td>0.0</td>\n",
 331 |        "    </tr>\n",
 332 |        "    <tr>\n",
 333 |        "      <th>12496</th>\n",
 334 |        "      <td>0.0</td>\n",
 335 |        "      <td>0.0</td>\n",
 336 |        "      <td>0.0</td>\n",
 337 |        "      <td>0.0</td>\n",
 338 |        "      <td>0.0</td>\n",
 339 |        "      <td>0.0</td>\n",
 340 |        "      <td>0.0</td>\n",
 341 |        "      <td>0.0</td>\n",
 342 |        "      <td>0.0</td>\n",
 343 |        "      <td>0.0</td>\n",
 344 |        "      <td>...</td>\n",
 345 |        "      <td>0.0</td>\n",
 346 |        "      <td>0.0</td>\n",
 347 |        "      <td>0.0</td>\n",
 348 |        "      <td>0.0</td>\n",
 349 |        "      <td>0.0</td>\n",
 350 |        "      <td>0.0</td>\n",
 351 |        "      <td>0.0</td>\n",
 352 |        "      <td>0.0</td>\n",
 353 |        "      <td>0.0</td>\n",
 354 |        "      <td>0.0</td>\n",
 355 |        "    </tr>\n",
 356 |        "    <tr>\n",
 357 |        "      <th>12497</th>\n",
 358 |        "      <td>0.0</td>\n",
 359 |        "      <td>0.0</td>\n",
 360 |        "      <td>0.0</td>\n",
 361 |        "      <td>0.0</td>\n",
 362 |        "      <td>0.0</td>\n",
 363 |        "      <td>0.0</td>\n",
 364 |        "      <td>0.0</td>\n",
 365 |        "      <td>0.0</td>\n",
 366 |        "      <td>0.0</td>\n",
 367 |        "      <td>0.0</td>\n",
 368 |        "      <td>...</td>\n",
 369 |        "      <td>0.0</td>\n",
 370 |        "      <td>0.0</td>\n",
 371 |        "      <td>0.0</td>\n",
 372 |        "      <td>0.0</td>\n",
 373 |        "      <td>0.0</td>\n",
 374 |        "      <td>0.0</td>\n",
 375 |        "      <td>0.0</td>\n",
 376 |        "      <td>0.0</td>\n",
 377 |        "      <td>0.0</td>\n",
 378 |        "      <td>0.0</td>\n",
 379 |        "    </tr>\n",
 380 |        "    <tr>\n",
 381 |        "      <th>12498</th>\n",
 382 |        "      <td>0.0</td>\n",
 383 |        "      <td>0.0</td>\n",
 384 |        "      <td>0.0</td>\n",
 385 |        "      <td>0.0</td>\n",
 386 |        "      <td>0.0</td>\n",
 387 |        "      <td>0.0</td>\n",
 388 |        "      <td>0.0</td>\n",
 389 |        "      <td>0.0</td>\n",
 390 |        "      <td>0.0</td>\n",
 391 |        "      <td>0.0</td>\n",
 392 |        "      <td>...</td>\n",
 393 |        "      <td>0.0</td>\n",
 394 |        "      <td>0.0</td>\n",
 395 |        "      <td>0.0</td>\n",
 396 |        "      <td>0.0</td>\n",
 397 |        "      <td>0.0</td>\n",
 398 |        "      <td>0.0</td>\n",
 399 |        "      <td>0.0</td>\n",
 400 |        "      <td>0.0</td>\n",
 401 |        "      <td>0.0</td>\n",
 402 |        "      <td>0.0</td>\n",
 403 |        "    </tr>\n",
 404 |        "    <tr>\n",
 405 |        "      <th>12499</th>\n",
 406 |        "      <td>0.0</td>\n",
 407 |        "      <td>0.0</td>\n",
 408 |        "      <td>0.0</td>\n",
 409 |        "      <td>0.0</td>\n",
 410 |        "      <td>0.0</td>\n",
 411 |        "      <td>0.0</td>\n",
 412 |        "      <td>0.0</td>\n",
 413 |        "      <td>0.0</td>\n",
 414 |        "      <td>0.0</td>\n",
 415 |        "      <td>0.0</td>\n",
 416 |        "      <td>...</td>\n",
 417 |        "      <td>0.0</td>\n",
 418 |        "      <td>0.0</td>\n",
 419 |        "      <td>0.0</td>\n",
 420 |        "      <td>0.0</td>\n",
 421 |        "      <td>0.0</td>\n",
 422 |        "      <td>0.0</td>\n",
 423 |        "      <td>0.0</td>\n",
 424 |        "      <td>0.0</td>\n",
 425 |        "      <td>0.0</td>\n",
 426 |        "      <td>0.0</td>\n",
 427 |        "    </tr>\n",
 428 |        "  </tbody>\n",
 429 |        "</table>\n",
 430 |        "<p>12500 rows × 55428 columns</p>\n",
 431 |        "</div>"
 432 |       ],
 433 |       "text/plain": [
 434 |        "        00  000  000s  003830  006  007  0079  0080  0083  0093638  ...  élan  \\\n",
 435 |        "0      0.0  0.0   0.0     0.0  0.0  0.0   0.0   0.0   0.0      0.0  ...   0.0   \n",
 436 |        "1      0.0  0.0   0.0     0.0  0.0  0.0   0.0   0.0   0.0      0.0  ...   0.0   \n",
 437 |        "2      0.0  0.0   0.0     0.0  0.0  0.0   0.0   0.0   0.0      0.0  ...   0.0   \n",
 438 |        "3      0.0  0.0   0.0     0.0  0.0  0.0   0.0   0.0   0.0      0.0  ...   0.0   \n",
 439 |        "4      0.0  0.0   0.0     0.0  0.0  0.0   0.0   0.0   0.0      0.0  ...   0.0   \n",
 440 |        "...    ...  ...   ...     ...  ...  ...   ...   ...   ...      ...  ...   ...   \n",
 441 |        "12495  0.0  0.0   0.0     0.0  0.0  0.0   0.0   0.0   0.0      0.0  ...   0.0   \n",
 442 |        "12496  0.0  0.0   0.0     0.0  0.0  0.0   0.0   0.0   0.0      0.0  ...   0.0   \n",
 443 |        "12497  0.0  0.0   0.0     0.0  0.0  0.0   0.0   0.0   0.0      0.0  ...   0.0   \n",
 444 |        "12498  0.0  0.0   0.0     0.0  0.0  0.0   0.0   0.0   0.0      0.0  ...   0.0   \n",
 445 |        "12499  0.0  0.0   0.0     0.0  0.0  0.0   0.0   0.0   0.0      0.0  ...   0.0   \n",
 446 |        "\n",
 447 |        "       émigré  émigrés  était  état  étc  êxtase   ís  østbye  über  \n",
 448 |        "0         0.0      0.0    0.0   0.0  0.0     0.0  0.0     0.0   0.0  \n",
 449 |        "1         0.0      0.0    0.0   0.0  0.0     0.0  0.0     0.0   0.0  \n",
 450 |        "2         0.0      0.0    0.0   0.0  0.0     0.0  0.0     0.0   0.0  \n",
 451 |        "3         0.0      0.0    0.0   0.0  0.0     0.0  0.0     0.0   0.0  \n",
 452 |        "4         0.0      0.0    0.0   0.0  0.0     0.0  0.0     0.0   0.0  \n",
 453 |        "...       ...      ...    ...   ...  ...     ...  ...     ...   ...  \n",
 454 |        "12495     0.0      0.0    0.0   0.0  0.0     0.0  0.0     0.0   0.0  \n",
 455 |        "12496     0.0      0.0    0.0   0.0  0.0     0.0  0.0     0.0   0.0  \n",
 456 |        "12497     0.0      0.0    0.0   0.0  0.0     0.0  0.0     0.0   0.0  \n",
 457 |        "12498     0.0      0.0    0.0   0.0  0.0     0.0  0.0     0.0   0.0  \n",
 458 |        "12499     0.0      0.0    0.0   0.0  0.0     0.0  0.0     0.0   0.0  \n",
 459 |        "\n",
 460 |        "[12500 rows x 55428 columns]"
 461 |       ]
 462 |      },
 463 |      "execution_count": 8,
 464 |      "metadata": {},
 465 |      "output_type": "execute_result"
 466 |     }
 467 |    ],
 468 |    "source": [
 469 |     "vect = TfidfVectorizer(stop_words='english')\n",
 470 |     "X = vect.fit_transform(reviews)\n",
 471 |     "\n",
 472 |     "pd.DataFrame(X.toarray(), columns=vect.get_feature_names())"
 473 |    ]
 474 |   },
 475 |   {
 476 |    "cell_type": "markdown",
 477 |    "id": "close-advantage",
 478 |    "metadata": {},
 479 |    "source": [
 480 |     "## NMF Decomposition"
 481 |    ]
 482 |   },
 483 |   {
 484 |    "cell_type": "code",
 485 |    "execution_count": 14,
 486 |    "id": "surprising-lindsay",
 487 |    "metadata": {},
 488 |    "outputs": [
 489 |     {
 490 |      "name": "stderr",
 491 |      "output_type": "stream",
 492 |      "text": [
 493 |       "/home/jaidevd/anaconda3/lib/python3.7/site-packages/sklearn/decomposition/_nmf.py:315: FutureWarning: The 'init' value, when 'init=None' and n_components is less than n_samples and n_features, will be changed from 'nndsvd' to 'nndsvda' in 1.1 (renaming of 0.26).\n",
 494 |       "  \"'nndsvda' in 1.1 (renaming of 0.26).\"), FutureWarning)\n",
 495 |       "/home/jaidevd/anaconda3/lib/python3.7/site-packages/sklearn/decomposition/_nmf.py:1091: ConvergenceWarning: Maximum number of iterations 200 reached. Increase it to improve convergence.\n",
 496 |       "  \" improve convergence.\" % max_iter, ConvergenceWarning)\n"
 497 |      ]
 498 |     }
 499 |    ],
 500 |    "source": [
 501 |     "N_TOPICS = 15\n",
 502 |     "nmf = NMF(n_components=N_TOPICS)\n",
 503 |     "W = nmf.fit_transform(X)  # Document-topic matrix\n",
 504 |     "H = nmf.components_       # Topic-term matrix"
 505 |    ]
 506 |   },
 507 |   {
 508 |    "cell_type": "code",
 509 |    "execution_count": 15,
 510 |    "id": "african-corps",
 511 |    "metadata": {},
 512 |    "outputs": [
 513 |     {
 514 |      "data": {
 515 |       "text/html": [
 516 |        "<div>\n",
 517 |        "<style scoped>\n",
 518 |        "    .dataframe tbody tr th:only-of-type {\n",
 519 |        "        vertical-align: middle;\n",
 520 |        "    }\n",
 521 |        "\n",
 522 |        "    .dataframe tbody tr th {\n",
 523 |        "        vertical-align: top;\n",
 524 |        "    }\n",
 525 |        "\n",
 526 |        "    .dataframe thead th {\n",
 527 |        "        text-align: right;\n",
 528 |        "    }\n",
 529 |        "</style>\n",
 530 |        "<table border=\"1\" class=\"dataframe\">\n",
 531 |        "  <thead>\n",
 532 |        "    <tr style=\"text-align: right;\">\n",
 533 |        "      <th></th>\n",
 534 |        "      <th>Word 1</th>\n",
 535 |        "      <th>Word 2</th>\n",
 536 |        "      <th>Word 3</th>\n",
 537 |        "      <th>Word 4</th>\n",
 538 |        "      <th>Word 5</th>\n",
 539 |        "      <th>Word 6</th>\n",
 540 |        "      <th>Word 7</th>\n",
 541 |        "      <th>Word 8</th>\n",
 542 |        "      <th>Word 9</th>\n",
 543 |        "      <th>Word 10</th>\n",
 544 |        "    </tr>\n",
 545 |        "  </thead>\n",
 546 |        "  <tbody>\n",
 547 |        "    <tr>\n",
 548 |        "      <th>Topic 1</th>\n",
 549 |        "      <td>br</td>\n",
 550 |        "      <td>10</td>\n",
 551 |        "      <td>ll</td>\n",
 552 |        "      <td>spoilers</td>\n",
 553 |        "      <td>end</td>\n",
 554 |        "      <td>simply</td>\n",
 555 |        "      <td>yes</td>\n",
 556 |        "      <td>spoiler</td>\n",
 557 |        "      <td>quite</td>\n",
 558 |        "      <td>just</td>\n",
 559 |        "    </tr>\n",
 560 |        "    <tr>\n",
 561 |        "      <th>Topic 2</th>\n",
 562 |        "      <td>movie</td>\n",
 563 |        "      <td>movies</td>\n",
 564 |        "      <td>watch</td>\n",
 565 |        "      <td>recommend</td>\n",
 566 |        "      <td>10</td>\n",
 567 |        "      <td>seen</td>\n",
 568 |        "      <td>saw</td>\n",
 569 |        "      <td>best</td>\n",
 570 |        "      <td>actors</td>\n",
 571 |        "      <td>definitely</td>\n",
 572 |        "    </tr>\n",
 573 |        "    <tr>\n",
 574 |        "      <th>Topic 3</th>\n",
 575 |        "      <td>film</td>\n",
 576 |        "      <td>films</td>\n",
 577 |        "      <td>director</td>\n",
 578 |        "      <td>characters</td>\n",
 579 |        "      <td>seen</td>\n",
 580 |        "      <td>cinema</td>\n",
 581 |        "      <td>festival</td>\n",
 582 |        "      <td>work</td>\n",
 583 |        "      <td>scenes</td>\n",
 584 |        "      <td>art</td>\n",
 585 |        "    </tr>\n",
 586 |        "    <tr>\n",
 587 |        "      <th>Topic 4</th>\n",
 588 |        "      <td>series</td>\n",
 589 |        "      <td>episode</td>\n",
 590 |        "      <td>episodes</td>\n",
 591 |        "      <td>season</td>\n",
 592 |        "      <td>tv</td>\n",
 593 |        "      <td>characters</td>\n",
 594 |        "      <td>trek</td>\n",
 595 |        "      <td>seasons</td>\n",
 596 |        "      <td>shows</td>\n",
 597 |        "      <td>television</td>\n",
 598 |        "    </tr>\n",
 599 |        "    <tr>\n",
 600 |        "      <th>Topic 5</th>\n",
 601 |        "      <td>man</td>\n",
 602 |        "      <td>role</td>\n",
 603 |        "      <td>character</td>\n",
 604 |        "      <td>performance</td>\n",
 605 |        "      <td>best</td>\n",
 606 |        "      <td>plays</td>\n",
 607 |        "      <td>john</td>\n",
 608 |        "      <td>played</td>\n",
 609 |        "      <td>does</td>\n",
 610 |        "      <td>actor</td>\n",
 611 |        "    </tr>\n",
 612 |        "    <tr>\n",
 613 |        "      <th>Topic 6</th>\n",
 614 |        "      <td>good</td>\n",
 615 |        "      <td>pretty</td>\n",
 616 |        "      <td>story</td>\n",
 617 |        "      <td>bad</td>\n",
 618 |        "      <td>acting</td>\n",
 619 |        "      <td>really</td>\n",
 620 |        "      <td>job</td>\n",
 621 |        "      <td>liked</td>\n",
 622 |        "      <td>nice</td>\n",
 623 |        "      <td>little</td>\n",
 624 |        "    </tr>\n",
 625 |        "    <tr>\n",
 626 |        "      <th>Topic 7</th>\n",
 627 |        "      <td>war</td>\n",
 628 |        "      <td>world</td>\n",
 629 |        "      <td>documentary</td>\n",
 630 |        "      <td>people</td>\n",
 631 |        "      <td>american</td>\n",
 632 |        "      <td>history</td>\n",
 633 |        "      <td>soldiers</td>\n",
 634 |        "      <td>men</td>\n",
 635 |        "      <td>women</td>\n",
 636 |        "      <td>hitler</td>\n",
 637 |        "    </tr>\n",
 638 |        "    <tr>\n",
 639 |        "      <th>Topic 8</th>\n",
 640 |        "      <td>funny</td>\n",
 641 |        "      <td>comedy</td>\n",
 642 |        "      <td>laugh</td>\n",
 643 |        "      <td>hilarious</td>\n",
 644 |        "      <td>eddie</td>\n",
 645 |        "      <td>fun</td>\n",
 646 |        "      <td>jokes</td>\n",
 647 |        "      <td>humor</td>\n",
 648 |        "      <td>funniest</td>\n",
 649 |        "      <td>murphy</td>\n",
 650 |        "    </tr>\n",
 651 |        "    <tr>\n",
 652 |        "      <th>Topic 9</th>\n",
 653 |        "      <td>like</td>\n",
 654 |        "      <td>think</td>\n",
 655 |        "      <td>really</td>\n",
 656 |        "      <td>just</td>\n",
 657 |        "      <td>don</td>\n",
 658 |        "      <td>people</td>\n",
 659 |        "      <td>know</td>\n",
 660 |        "      <td>say</td>\n",
 661 |        "      <td>didn</td>\n",
 662 |        "      <td>lot</td>\n",
 663 |        "    </tr>\n",
 664 |        "    <tr>\n",
 665 |        "      <th>Topic 10</th>\n",
 666 |        "      <td>time</td>\n",
 667 |        "      <td>years</td>\n",
 668 |        "      <td>saw</td>\n",
 669 |        "      <td>seen</td>\n",
 670 |        "      <td>dvd</td>\n",
 671 |        "      <td>old</td>\n",
 672 |        "      <td>remember</td>\n",
 673 |        "      <td>ve</td>\n",
 674 |        "      <td>music</td>\n",
 675 |        "      <td>disney</td>\n",
 676 |        "    </tr>\n",
 677 |        "    <tr>\n",
 678 |        "      <th>Topic 11</th>\n",
 679 |        "      <td>life</td>\n",
 680 |        "      <td>story</td>\n",
 681 |        "      <td>love</td>\n",
 682 |        "      <td>family</td>\n",
 683 |        "      <td>real</td>\n",
 684 |        "      <td>characters</td>\n",
 685 |        "      <td>people</td>\n",
 686 |        "      <td>young</td>\n",
 687 |        "      <td>beautiful</td>\n",
 688 |        "      <td>true</td>\n",
 689 |        "    </tr>\n",
 690 |        "    <tr>\n",
 691 |        "      <th>Topic 12</th>\n",
 692 |        "      <td>book</td>\n",
 693 |        "      <td>jane</td>\n",
 694 |        "      <td>version</td>\n",
 695 |        "      <td>read</td>\n",
 696 |        "      <td>eyre</td>\n",
 697 |        "      <td>novel</td>\n",
 698 |        "      <td>rochester</td>\n",
 699 |        "      <td>dalton</td>\n",
 700 |        "      <td>tarzan</td>\n",
 701 |        "      <td>emma</td>\n",
 702 |        "    </tr>\n",
 703 |        "    <tr>\n",
 704 |        "      <th>Topic 13</th>\n",
 705 |        "      <td>horror</td>\n",
 706 |        "      <td>house</td>\n",
 707 |        "      <td>creepy</td>\n",
 708 |        "      <td>scary</td>\n",
 709 |        "      <td>gore</td>\n",
 710 |        "      <td>films</td>\n",
 711 |        "      <td>halloween</td>\n",
 712 |        "      <td>budget</td>\n",
 713 |        "      <td>fans</td>\n",
 714 |        "      <td>effects</td>\n",
 715 |        "    </tr>\n",
 716 |        "    <tr>\n",
 717 |        "      <th>Topic 14</th>\n",
 718 |        "      <td>great</td>\n",
 719 |        "      <td>acting</td>\n",
 720 |        "      <td>really</td>\n",
 721 |        "      <td>actors</td>\n",
 722 |        "      <td>cast</td>\n",
 723 |        "      <td>job</td>\n",
 724 |        "      <td>best</td>\n",
 725 |        "      <td>music</td>\n",
 726 |        "      <td>wonderful</td>\n",
 727 |        "      <td>just</td>\n",
 728 |        "    </tr>\n",
 729 |        "    <tr>\n",
 730 |        "      <th>Topic 15</th>\n",
 731 |        "      <td>action</td>\n",
 732 |        "      <td>jackie</td>\n",
 733 |        "      <td>chan</td>\n",
 734 |        "      <td>scenes</td>\n",
 735 |        "      <td>fu</td>\n",
 736 |        "      <td>kung</td>\n",
 737 |        "      <td>fight</td>\n",
 738 |        "      <td>martial</td>\n",
 739 |        "      <td>bourne</td>\n",
 740 |        "      <td>story</td>\n",
 741 |        "    </tr>\n",
 742 |        "  </tbody>\n",
 743 |        "</table>\n",
 744 |        "</div>"
 745 |       ],
 746 |       "text/plain": [
 747 |        "          Word 1   Word 2       Word 3       Word 4    Word 5      Word 6  \\\n",
 748 |        "Topic 1       br       10           ll     spoilers       end      simply   \n",
 749 |        "Topic 2    movie   movies        watch    recommend        10        seen   \n",
 750 |        "Topic 3     film    films     director   characters      seen      cinema   \n",
 751 |        "Topic 4   series  episode     episodes       season        tv  characters   \n",
 752 |        "Topic 5      man     role    character  performance      best       plays   \n",
 753 |        "Topic 6     good   pretty        story          bad    acting      really   \n",
 754 |        "Topic 7      war    world  documentary       people  american     history   \n",
 755 |        "Topic 8    funny   comedy        laugh    hilarious     eddie         fun   \n",
 756 |        "Topic 9     like    think       really         just       don      people   \n",
 757 |        "Topic 10    time    years          saw         seen       dvd         old   \n",
 758 |        "Topic 11    life    story         love       family      real  characters   \n",
 759 |        "Topic 12    book     jane      version         read      eyre       novel   \n",
 760 |        "Topic 13  horror    house       creepy        scary      gore       films   \n",
 761 |        "Topic 14   great   acting       really       actors      cast         job   \n",
 762 |        "Topic 15  action   jackie         chan       scenes        fu        kung   \n",
 763 |        "\n",
 764 |        "             Word 7   Word 8     Word 9     Word 10  \n",
 765 |        "Topic 1         yes  spoiler      quite        just  \n",
 766 |        "Topic 2         saw     best     actors  definitely  \n",
 767 |        "Topic 3    festival     work     scenes         art  \n",
 768 |        "Topic 4        trek  seasons      shows  television  \n",
 769 |        "Topic 5        john   played       does       actor  \n",
 770 |        "Topic 6         job    liked       nice      little  \n",
 771 |        "Topic 7    soldiers      men      women      hitler  \n",
 772 |        "Topic 8       jokes    humor   funniest      murphy  \n",
 773 |        "Topic 9        know      say       didn         lot  \n",
 774 |        "Topic 10   remember       ve      music      disney  \n",
 775 |        "Topic 11     people    young  beautiful        true  \n",
 776 |        "Topic 12  rochester   dalton     tarzan        emma  \n",
 777 |        "Topic 13  halloween   budget       fans     effects  \n",
 778 |        "Topic 14       best    music  wonderful        just  \n",
 779 |        "Topic 15      fight  martial     bourne       story  "
 780 |       ]
 781 |      },
 782 |      "execution_count": 15,
 783 |      "metadata": {},
 784 |      "output_type": "execute_result"
 785 |     }
 786 |    ],
 787 |    "source": [
 788 |     "# Top 10 words per topic\n",
 789 |     "\n",
 790 |     "words = np.array(vect.get_feature_names())\n",
 791 |     "topic_words = pd.DataFrame(np.zeros((N_TOPICS, 10)), index=[f'Topic {i + 1}' for i in range(N_TOPICS)],\n",
 792 |     "                           columns=[f'Word {i + 1}' for i in range(10)]).astype(str)\n",
 793 |     "for i in range(N_TOPICS):\n",
 794 |     "    ix = H[i].argsort()[::-1][:10]\n",
 795 |     "    topic_words.iloc[i] = words[ix]\n",
 796 |     "\n",
 797 |     "topic_words"
 798 |    ]
 799 |   },
 800 |   {
 801 |    "cell_type": "code",
 802 |    "execution_count": 16,
 803 |    "id": "thousand-clearance",
 804 |    "metadata": {},
 805 |    "outputs": [],
 806 |    "source": [
 807 |     "# Create a topic mapping\n",
 808 |     "\n",
 809 |     "topic_mapping = {\n",
 810 |     "    'Topic 4': 'TV',\n",
 811 |     "    'Topic 7': 'War',\n",
 812 |     "    'Topic 8': 'Comedy',\n",
 813 |     "    'Topic 12': 'Book Adaptation',\n",
 814 |     "    'Topic 13': 'Horror',\n",
 815 |     "    'Topic 15': 'Martial Arts / Action'\n",
 816 |     "}"
 817 |    ]
 818 |   },
 819 |   {
 820 |    "cell_type": "code",
 821 |    "execution_count": 17,
 822 |    "id": "intellectual-somerset",
 823 |    "metadata": {},
 824 |    "outputs": [
 825 |     {
 826 |      "data": {
 827 |       "text/html": [
 828 |        "<div>\n",
 829 |        "<style scoped>\n",
 830 |        "    .dataframe tbody tr th:only-of-type {\n",
 831 |        "        vertical-align: middle;\n",
 832 |        "    }\n",
 833 |        "\n",
 834 |        "    .dataframe tbody tr th {\n",
 835 |        "        vertical-align: top;\n",
 836 |        "    }\n",
 837 |        "\n",
 838 |        "    .dataframe thead th {\n",
 839 |        "        text-align: right;\n",
 840 |        "    }\n",
 841 |        "</style>\n",
 842 |        "<table border=\"1\" class=\"dataframe\">\n",
 843 |        "  <thead>\n",
 844 |        "    <tr style=\"text-align: right;\">\n",
 845 |        "      <th></th>\n",
 846 |        "      <th>Topic 1</th>\n",
 847 |        "      <th>Topic 2</th>\n",
 848 |        "      <th>Topic 3</th>\n",
 849 |        "      <th>Topic 4</th>\n",
 850 |        "      <th>Topic 5</th>\n",
 851 |        "      <th>Topic 6</th>\n",
 852 |        "      <th>Topic 7</th>\n",
 853 |        "      <th>Topic 8</th>\n",
 854 |        "      <th>Topic 9</th>\n",
 855 |        "      <th>Topic 10</th>\n",
 856 |        "      <th>Topic 11</th>\n",
 857 |        "      <th>Topic 12</th>\n",
 858 |        "      <th>Topic 13</th>\n",
 859 |        "      <th>Topic 14</th>\n",
 860 |        "      <th>Topic 15</th>\n",
 861 |        "      <th>max_topic</th>\n",
 862 |        "    </tr>\n",
 863 |        "  </thead>\n",
 864 |        "  <tbody>\n",
 865 |        "    <tr>\n",
 866 |        "      <th>2</th>\n",
 867 |        "      <td>0.028314</td>\n",
 868 |        "      <td>0.000000</td>\n",
 869 |        "      <td>0.022122</td>\n",
 870 |        "      <td>0.001480</td>\n",
 871 |        "      <td>0.023043</td>\n",
 872 |        "      <td>0.002044</td>\n",
 873 |        "      <td>0.030939</td>\n",
 874 |        "      <td>0.000000</td>\n",
 875 |        "      <td>0.006389</td>\n",
 876 |        "      <td>0.000000</td>\n",
 877 |        "      <td>0.000774</td>\n",
 878 |        "      <td>0.007251</td>\n",
 879 |        "      <td>0.000000</td>\n",
 880 |        "      <td>0.003574</td>\n",
 881 |        "      <td>0.000000</td>\n",
 882 |        "      <td>War</td>\n",
 883 |        "    </tr>\n",
 884 |        "    <tr>\n",
 885 |        "      <th>16</th>\n",
 886 |        "      <td>0.000251</td>\n",
 887 |        "      <td>0.000000</td>\n",
 888 |        "      <td>0.001575</td>\n",
 889 |        "      <td>0.000000</td>\n",
 890 |        "      <td>0.029132</td>\n",
 891 |        "      <td>0.002257</td>\n",
 892 |        "      <td>0.000000</td>\n",
 893 |        "      <td>0.033108</td>\n",
 894 |        "      <td>0.016283</td>\n",
 895 |        "      <td>0.000000</td>\n",
 896 |        "      <td>0.012337</td>\n",
 897 |        "      <td>0.000000</td>\n",
 898 |        "      <td>0.003595</td>\n",
 899 |        "      <td>0.011944</td>\n",
 900 |        "      <td>0.010159</td>\n",
 901 |        "      <td>Comedy</td>\n",
 902 |        "    </tr>\n",
 903 |        "    <tr>\n",
 904 |        "      <th>18</th>\n",
 905 |        "      <td>0.029574</td>\n",
 906 |        "      <td>0.000000</td>\n",
 907 |        "      <td>0.019010</td>\n",
 908 |        "      <td>0.001797</td>\n",
 909 |        "      <td>0.016906</td>\n",
 910 |        "      <td>0.008574</td>\n",
 911 |        "      <td>0.000129</td>\n",
 912 |        "      <td>0.038010</td>\n",
 913 |        "      <td>0.005558</td>\n",
 914 |        "      <td>0.006250</td>\n",
 915 |        "      <td>0.036652</td>\n",
 916 |        "      <td>0.000000</td>\n",
 917 |        "      <td>0.000000</td>\n",
 918 |        "      <td>0.000000</td>\n",
 919 |        "      <td>0.000000</td>\n",
 920 |        "      <td>Comedy</td>\n",
 921 |        "    </tr>\n",
 922 |        "    <tr>\n",
 923 |        "      <th>26</th>\n",
 924 |        "      <td>0.015179</td>\n",
 925 |        "      <td>0.000349</td>\n",
 926 |        "      <td>0.000000</td>\n",
 927 |        "      <td>0.000000</td>\n",
 928 |        "      <td>0.015907</td>\n",
 929 |        "      <td>0.012349</td>\n",
 930 |        "      <td>0.000000</td>\n",
 931 |        "      <td>0.034328</td>\n",
 932 |        "      <td>0.015722</td>\n",
 933 |        "      <td>0.008809</td>\n",
 934 |        "      <td>0.004318</td>\n",
 935 |        "      <td>0.000000</td>\n",
 936 |        "      <td>0.000000</td>\n",
 937 |        "      <td>0.001958</td>\n",
 938 |        "      <td>0.000922</td>\n",
 939 |        "      <td>Comedy</td>\n",
 940 |        "    </tr>\n",
 941 |        "    <tr>\n",
 942 |        "      <th>27</th>\n",
 943 |        "      <td>0.031523</td>\n",
 944 |        "      <td>0.008099</td>\n",
 945 |        "      <td>0.000171</td>\n",
 946 |        "      <td>0.003151</td>\n",
 947 |        "      <td>0.009975</td>\n",
 948 |        "      <td>0.001411</td>\n",
 949 |        "      <td>0.035158</td>\n",
 950 |        "      <td>0.042588</td>\n",
 951 |        "      <td>0.000000</td>\n",
 952 |        "      <td>0.000000</td>\n",
 953 |        "      <td>0.001425</td>\n",
 954 |        "      <td>0.002624</td>\n",
 955 |        "      <td>0.000000</td>\n",
 956 |        "      <td>0.003865</td>\n",
 957 |        "      <td>0.002781</td>\n",
 958 |        "      <td>Comedy</td>\n",
 959 |        "    </tr>\n",
 960 |        "    <tr>\n",
 961 |        "      <th>29</th>\n",
 962 |        "      <td>0.000000</td>\n",
 963 |        "      <td>0.000614</td>\n",
 964 |        "      <td>0.000000</td>\n",
 965 |        "      <td>0.000000</td>\n",
 966 |        "      <td>0.000000</td>\n",
 967 |        "      <td>0.014862</td>\n",
 968 |        "      <td>0.000000</td>\n",
 969 |        "      <td>0.014987</td>\n",
 970 |        "      <td>0.010941</td>\n",
 971 |        "      <td>0.000000</td>\n",
 972 |        "      <td>0.000000</td>\n",
 973 |        "      <td>0.001534</td>\n",
 974 |        "      <td>0.066263</td>\n",
 975 |        "      <td>0.000000</td>\n",
 976 |        "      <td>0.036239</td>\n",
 977 |        "      <td>Horror</td>\n",
 978 |        "    </tr>\n",
 979 |        "    <tr>\n",
 980 |        "      <th>30</th>\n",
 981 |        "      <td>0.023404</td>\n",
 982 |        "      <td>0.012107</td>\n",
 983 |        "      <td>0.016814</td>\n",
 984 |        "      <td>0.000000</td>\n",
 985 |        "      <td>0.008135</td>\n",
 986 |        "      <td>0.009620</td>\n",
 987 |        "      <td>0.001377</td>\n",
 988 |        "      <td>0.040382</td>\n",
 989 |        "      <td>0.000809</td>\n",
 990 |        "      <td>0.004582</td>\n",
 991 |        "      <td>0.004803</td>\n",
 992 |        "      <td>0.001186</td>\n",
 993 |        "      <td>0.014194</td>\n",
 994 |        "      <td>0.000000</td>\n",
 995 |        "      <td>0.000000</td>\n",
 996 |        "      <td>Comedy</td>\n",
 997 |        "    </tr>\n",
 998 |        "    <tr>\n",
 999 |        "      <th>31</th>\n",
1000 |        "      <td>0.012324</td>\n",
1001 |        "      <td>0.003554</td>\n",
1002 |        "      <td>0.028753</td>\n",
1003 |        "      <td>0.000000</td>\n",
1004 |        "      <td>0.017125</td>\n",
1005 |        "      <td>0.003483</td>\n",
1006 |        "      <td>0.006804</td>\n",
1007 |        "      <td>0.000000</td>\n",
1008 |        "      <td>0.003702</td>\n",
1009 |        "      <td>0.000000</td>\n",
1010 |        "      <td>0.006449</td>\n",
1011 |        "      <td>0.000833</td>\n",
1012 |        "      <td>0.034161</td>\n",
1013 |        "      <td>0.005682</td>\n",
1014 |        "      <td>0.000000</td>\n",
1015 |        "      <td>Horror</td>\n",
1016 |        "    </tr>\n",
1017 |        "    <tr>\n",
1018 |        "      <th>34</th>\n",
1019 |        "      <td>0.000000</td>\n",
1020 |        "      <td>0.016503</td>\n",
1021 |        "      <td>0.000000</td>\n",
1022 |        "      <td>0.000000</td>\n",
1023 |        "      <td>0.013825</td>\n",
1024 |        "      <td>0.000000</td>\n",
1025 |        "      <td>0.000000</td>\n",
1026 |        "      <td>0.038567</td>\n",
1027 |        "      <td>0.004479</td>\n",
1028 |        "      <td>0.021462</td>\n",
1029 |        "      <td>0.000000</td>\n",
1030 |        "      <td>0.000000</td>\n",
1031 |        "      <td>0.000000</td>\n",
1032 |        "      <td>0.010132</td>\n",
1033 |        "      <td>0.000000</td>\n",
1034 |        "      <td>Comedy</td>\n",
1035 |        "    </tr>\n",
1036 |        "    <tr>\n",
1037 |        "      <th>58</th>\n",
1038 |        "      <td>0.000228</td>\n",
1039 |        "      <td>0.046686</td>\n",
1040 |        "      <td>0.000000</td>\n",
1041 |        "      <td>0.000000</td>\n",
1042 |        "      <td>0.000100</td>\n",
1043 |        "      <td>0.000000</td>\n",
1044 |        "      <td>0.004866</td>\n",
1045 |        "      <td>0.000000</td>\n",
1046 |        "      <td>0.001639</td>\n",
1047 |        "      <td>0.013741</td>\n",
1048 |        "      <td>0.037063</td>\n",
1049 |        "      <td>0.069237</td>\n",
1050 |        "      <td>0.000000</td>\n",
1051 |        "      <td>0.012097</td>\n",
1052 |        "      <td>0.000000</td>\n",
1053 |        "      <td>Book Adaptation</td>\n",
1054 |        "    </tr>\n",
1055 |        "  </tbody>\n",
1056 |        "</table>\n",
1057 |        "</div>"
1058 |       ],
1059 |       "text/plain": [
1060 |        "     Topic 1   Topic 2   Topic 3   Topic 4   Topic 5   Topic 6   Topic 7  \\\n",
1061 |        "2   0.028314  0.000000  0.022122  0.001480  0.023043  0.002044  0.030939   \n",
1062 |        "16  0.000251  0.000000  0.001575  0.000000  0.029132  0.002257  0.000000   \n",
1063 |        "18  0.029574  0.000000  0.019010  0.001797  0.016906  0.008574  0.000129   \n",
1064 |        "26  0.015179  0.000349  0.000000  0.000000  0.015907  0.012349  0.000000   \n",
1065 |        "27  0.031523  0.008099  0.000171  0.003151  0.009975  0.001411  0.035158   \n",
1066 |        "29  0.000000  0.000614  0.000000  0.000000  0.000000  0.014862  0.000000   \n",
1067 |        "30  0.023404  0.012107  0.016814  0.000000  0.008135  0.009620  0.001377   \n",
1068 |        "31  0.012324  0.003554  0.028753  0.000000  0.017125  0.003483  0.006804   \n",
1069 |        "34  0.000000  0.016503  0.000000  0.000000  0.013825  0.000000  0.000000   \n",
1070 |        "58  0.000228  0.046686  0.000000  0.000000  0.000100  0.000000  0.004866   \n",
1071 |        "\n",
1072 |        "     Topic 8   Topic 9  Topic 10  Topic 11  Topic 12  Topic 13  Topic 14  \\\n",
1073 |        "2   0.000000  0.006389  0.000000  0.000774  0.007251  0.000000  0.003574   \n",
1074 |        "16  0.033108  0.016283  0.000000  0.012337  0.000000  0.003595  0.011944   \n",
1075 |        "18  0.038010  0.005558  0.006250  0.036652  0.000000  0.000000  0.000000   \n",
1076 |        "26  0.034328  0.015722  0.008809  0.004318  0.000000  0.000000  0.001958   \n",
1077 |        "27  0.042588  0.000000  0.000000  0.001425  0.002624  0.000000  0.003865   \n",
1078 |        "29  0.014987  0.010941  0.000000  0.000000  0.001534  0.066263  0.000000   \n",
1079 |        "30  0.040382  0.000809  0.004582  0.004803  0.001186  0.014194  0.000000   \n",
1080 |        "31  0.000000  0.003702  0.000000  0.006449  0.000833  0.034161  0.005682   \n",
1081 |        "34  0.038567  0.004479  0.021462  0.000000  0.000000  0.000000  0.010132   \n",
1082 |        "58  0.000000  0.001639  0.013741  0.037063  0.069237  0.000000  0.012097   \n",
1083 |        "\n",
1084 |        "    Topic 15        max_topic  \n",
1085 |        "2   0.000000              War  \n",
1086 |        "16  0.010159           Comedy  \n",
1087 |        "18  0.000000           Comedy  \n",
1088 |        "26  0.000922           Comedy  \n",
1089 |        "27  0.002781           Comedy  \n",
1090 |        "29  0.036239           Horror  \n",
1091 |        "30  0.000000           Comedy  \n",
1092 |        "31  0.000000           Horror  \n",
1093 |        "34  0.000000           Comedy  \n",
1094 |        "58  0.000000  Book Adaptation  "
1095 |       ]
1096 |      },
1097 |      "execution_count": 17,
1098 |      "metadata": {},
1099 |      "output_type": "execute_result"
1100 |     }
1101 |    ],
1102 |    "source": [
1103 |     "# Recall the document-topic matrix, W\n",
1104 |     "\n",
1105 |     "W = pd.DataFrame(W, columns=[f'Topic {i + 1}' for i in range(N_TOPICS)])\n",
1106 |     "W['max_topic'] = W.apply(lambda x: topic_mapping.get(x.idxmax()), axis=1)\n",
1107 |     "W[pd.notnull(W['max_topic'])].head(10)"
1108 |    ]
1109 |   },
1110 |   {
1111 |    "cell_type": "code",
1112 |    "execution_count": 21,
1113 |    "id": "continent-intellectual",
1114 |    "metadata": {},
1115 |    "outputs": [
1116 |     {
1117 |      "data": {
1118 |       "text/plain": [
1119 |        "'In my humble opinion, this movie did not receive the recognition it deserved. Robert Redford lives near me here in Provo, Utah, at Sundance. I enjoy most of his work, and this was my favorite. I\\'m sorry that more people didn\\'t appreciate it. My grandmother was an avid reader and read the book years before it came out on the big screen. She gave it to me to read after we had seen the movie together. The movie and book hit an emotional spot within my heart, and I was weepy for several days after seeing the movie. Sometimes love isn\\'t enough to keep our loved ones from hurting themselves. We see this in our own family relationships, yet our love and our families and their stories endure throughout generations of time. The cinematography was perfect and breathtaking -- I was awed by its beauty and how well it brought to life the words of the author of the book, Norman Maclean, \"But when I am alone in the half light of the canyon, all existence seems to fade to a being with my soul, and memories. And the sounds of the Big Black Foot River, and a four count rhythm, and the hope that a fish will rise. Eventually, all things merge into one, and a river runs through it. The river was cut by the world\\'s great flood and runs over rocks from the basement of time. On some of the rocks are timeless raindrops. Under the rocks are the words, and some of the words are theirs. I am haunted by waters.\" These words, taken from the book and spoken at the end of the movie (by Robert Redford who is narrating as Norman Maclean), are basically scripture, in my opinion. Any possible flaws the movie may have are overshadowed by the beauty and grace of the story and the cinematography. It was beautiful!'"
1120 |       ]
1121 |      },
1122 |      "execution_count": 21,
1123 |      "metadata": {},
1124 |      "output_type": "execute_result"
1125 |     }
1126 |    ],
1127 |    "source": [
1128 |     "reviews[58]"
1129 |    ]
1130 |   },
1131 |   {
1132 |    "cell_type": "code",
1133 |    "execution_count": null,
1134 |    "id": "regulation-comparison",
1135 |    "metadata": {},
1136 |    "outputs": [],
1137 |    "source": []
1138 |   }
1139 |  ],
1140 |  "metadata": {
1141 |   "kernelspec": {
1142 |    "display_name": "Python 3",
1143 |    "language": "python",
1144 |    "name": "python3"
1145 |   },
1146 |   "language_info": {
1147 |    "codemirror_mode": {
1148 |     "name": "ipython",
1149 |     "version": 3
1150 |    },
1151 |    "file_extension": ".py",
1152 |    "mimetype": "text/x-python",
1153 |    "name": "python",
1154 |    "nbconvert_exporter": "python",
1155 |    "pygments_lexer": "ipython3",
1156 |    "version": "3.7.7"
1157 |   }
1158 |  },
1159 |  "nbformat": 4,
1160 |  "nbformat_minor": 5
1161 | }
1162 | 


--------------------------------------------------------------------------------
/distributional semantics/w2v-text-classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "fitting-soccer",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## The Problem: Large Movie Dataset Review\n",
  9 |     "### Classify movie reviews from IMDB into positive or negative sentiment.\n",
 10 |     "### Download the dataset [here](https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz)"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "id": "coordinated-amendment",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "# imports\n",
 21 |     "\n",
 22 |     "from gensim.models import KeyedVectors\n",
 23 |     "import numpy as np\n",
 24 |     "import pandas as pd\n",
 25 |     "import matplotlib.pyplot as plt\n",
 26 |     "from sklearn.model_selection import train_test_split\n",
 27 |     "from tensorflow.keras.preprocessing import text_dataset_from_directory\n",
 28 |     "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
 29 |     "from tensorflow.keras.preprocessing.text import Tokenizer\n",
 30 |     "from tensorflow.keras.layers import Embedding, Dense, Input, GlobalAveragePooling1D\n",
 31 |     "from tensorflow.keras.models import Sequential\n",
 32 |     "from tensorflow.keras.optimizers import Adam\n",
 33 |     "\n",
 34 |     "import utils"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "id": "interior-washer",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "## Exploring the data"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 2,
 48 |    "id": "welsh-barcelona",
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "name": "stdout",
 53 |      "output_type": "stream",
 54 |      "text": [
 55 |       "Found 25001 files belonging to 2 classes.\n",
 56 |       "Found 25000 files belonging to 2 classes.\n"
 57 |      ]
 58 |     },
 59 |     {
 60 |      "name": "stderr",
 61 |      "output_type": "stream",
 62 |      "text": [
 63 |       "/home/jaidevd/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:11: SettingWithCopyWarning: \n",
 64 |       "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 65 |       "Try using .loc[row_indexer,col_indexer] = value instead\n",
 66 |       "\n",
 67 |       "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 68 |       "  # This is added back by InteractiveShellApp.init_path()\n"
 69 |      ]
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "# Importing & preprocessing the dataset\n",
 74 |     "\n",
 75 |     "train_ds = text_dataset_from_directory('../neuralnets/aclImdb/train')\n",
 76 |     "test_ds = text_dataset_from_directory('../neuralnets/aclImdb/test')\n",
 77 |     "\n",
 78 |     "dfTrain = pd.DataFrame(train_ds.unbatch().as_numpy_iterator(), columns=['text', 'label'])\n",
 79 |     "dfTest = pd.DataFrame(test_ds.unbatch().as_numpy_iterator(), columns=['text', 'label'])\n",
 80 |     "_, xts = train_test_split(dfTest, stratify=dfTest['label'], test_size=0.25)\n",
 81 |     "\n",
 82 |     "dfTrain['text'] = dfTrain['text'].map(lambda x: x.decode())\n",
 83 |     "xts['text'] = xts['text'].map(lambda x: x.decode())"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 4,
 89 |    "id": "right-visiting",
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "data": {
 94 |       "text/html": [
 95 |        "<div>\n",
 96 |        "<style scoped>\n",
 97 |        "    .dataframe tbody tr th:only-of-type {\n",
 98 |        "        vertical-align: middle;\n",
 99 |        "    }\n",
100 |        "\n",
101 |        "    .dataframe tbody tr th {\n",
102 |        "        vertical-align: top;\n",
103 |        "    }\n",
104 |        "\n",
105 |        "    .dataframe thead th {\n",
106 |        "        text-align: right;\n",
107 |        "    }\n",
108 |        "</style>\n",
109 |        "<table border=\"1\" class=\"dataframe\">\n",
110 |        "  <thead>\n",
111 |        "    <tr style=\"text-align: right;\">\n",
112 |        "      <th></th>\n",
113 |        "      <th>text</th>\n",
114 |        "      <th>label</th>\n",
115 |        "    </tr>\n",
116 |        "  </thead>\n",
117 |        "  <tbody>\n",
118 |        "    <tr>\n",
119 |        "      <th>4966</th>\n",
120 |        "      <td>Humm, an Italian movie starred by David hasselhoff and Linda Blair, I wasn´t expecting very much, to be honest and in fact, I took even less than I was expecting. It doesn´t mean this movie is the...</td>\n",
121 |        "      <td>0</td>\n",
122 |        "    </tr>\n",
123 |        "    <tr>\n",
124 |        "      <th>24885</th>\n",
125 |        "      <td>Not only was this movie better than all the final season of H:LOTS. But it was better than any movie made for TV I have ever seen!&lt;br /&gt;&lt;br /&gt;Looking at the \"Top 250\" I see that only one small scr...</td>\n",
126 |        "      <td>1</td>\n",
127 |        "    </tr>\n",
128 |        "    <tr>\n",
129 |        "      <th>2310</th>\n",
130 |        "      <td>This is a well-worn story about a man who marries to escape the hangman's noose, then sets about \"taming\" his reluctant bride. It manages to be sexist and racist at exactly the same time. We never...</td>\n",
131 |        "      <td>0</td>\n",
132 |        "    </tr>\n",
133 |        "    <tr>\n",
134 |        "      <th>13648</th>\n",
135 |        "      <td>Being from a small town in Illinois myself, I can instantly relate to this movie. Considering the era it was made in, the townsfolk look uncomfortably like a lot of people I grew up with. Yes the ...</td>\n",
136 |        "      <td>1</td>\n",
137 |        "    </tr>\n",
138 |        "    <tr>\n",
139 |        "      <th>4021</th>\n",
140 |        "      <td>(David H. Steinberg)'s script seemed initially having some real smart points that could've made good romantic comedy, BUT BUT BUT, oh dear ! What did ever happen in the way ???!!!! &lt;br /&gt;&lt;br /&gt;I'l...</td>\n",
141 |        "      <td>0</td>\n",
142 |        "    </tr>\n",
143 |        "  </tbody>\n",
144 |        "</table>\n",
145 |        "</div>"
146 |       ],
147 |       "text/plain": [
148 |        "                                                                                                                                                                                                          text  \\\n",
149 |        "4966   Humm, an Italian movie starred by David hasselhoff and Linda Blair, I wasn´t expecting very much, to be honest and in fact, I took even less than I was expecting. It doesn´t mean this movie is the...   \n",
150 |        "24885  Not only was this movie better than all the final season of H:LOTS. But it was better than any movie made for TV I have ever seen!<br /><br />Looking at the \"Top 250\" I see that only one small scr...   \n",
151 |        "2310   This is a well-worn story about a man who marries to escape the hangman's noose, then sets about \"taming\" his reluctant bride. It manages to be sexist and racist at exactly the same time. We never...   \n",
152 |        "13648  Being from a small town in Illinois myself, I can instantly relate to this movie. Considering the era it was made in, the townsfolk look uncomfortably like a lot of people I grew up with. Yes the ...   \n",
153 |        "4021   (David H. Steinberg)'s script seemed initially having some real smart points that could've made good romantic comedy, BUT BUT BUT, oh dear ! What did ever happen in the way ???!!!! <br /><br />I'l...   \n",
154 |        "\n",
155 |        "       label  \n",
156 |        "4966       0  \n",
157 |        "24885      1  \n",
158 |        "2310       0  \n",
159 |        "13648      1  \n",
160 |        "4021       0  "
161 |       ]
162 |      },
163 |      "execution_count": 4,
164 |      "metadata": {},
165 |      "output_type": "execute_result"
166 |     }
167 |    ],
168 |    "source": [
169 |     "pd.options.display.max_colwidth = 200\n",
170 |     "dfTrain.sample(n=5)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 5,
176 |    "id": "japanese-brooklyn",
177 |    "metadata": {},
178 |    "outputs": [
179 |     {
180 |      "name": "stdout",
181 |      "output_type": "stream",
182 |      "text": [
183 |       "I'm sure this was one of those \"WOAH!\" attractions in 1982 when Epcot opened, but now it's just silly. The film's message is cliché. The Circle-Vision is disorienting. And that awful song at the end is grating. And I really wish they'd install seats. After so much walking, all you want to do is sit down for a few minutes. And when you hear there's a film to see it sounds pretty glamorous! You get entertained while sitting down, right? WRONG! You're standing there for 18+ minutes leaning against a short little railing. Disney should make a newer Maelstrom like attraction to liven things up and replace this dull, lackluster film. NOT FUN. Skip it. In fact, skip Canada altogether unless you're eating there. Move directly to the United Kingdom.\n"
184 |      ]
185 |     }
186 |    ],
187 |    "source": [
188 |     "print(dfTrain.loc[0, 'text'])"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "markdown",
193 |    "id": "rational-vault",
194 |    "metadata": {},
195 |    "source": [
196 |     "## Tokenize the text"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 6,
202 |    "id": "future-salad",
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "name": "stdout",
207 |      "output_type": "stream",
208 |      "text": [
209 |       "Found 88582 unique tokens.\n"
210 |      ]
211 |     }
212 |    ],
213 |    "source": [
214 |     "tokenizer = Tokenizer()\n",
215 |     "tokenizer.fit_on_texts(dfTrain['text'].tolist())\n",
216 |     "train_sequences = tokenizer.texts_to_sequences(dfTrain['text'].tolist())\n",
217 |     "test_sequences = tokenizer.texts_to_sequences(xts['text'].tolist())\n",
218 |     "\n",
219 |     "\n",
220 |     "word_index = tokenizer.word_index\n",
221 |     "print('Found %s unique tokens.' % len(word_index))"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 7,
227 |    "id": "headed-saver",
228 |    "metadata": {},
229 |    "outputs": [
230 |     {
231 |      "name": "stdout",
232 |      "output_type": "stream",
233 |      "text": [
234 |       "[143, 249, 11, 13, 28, 4, 145, 21906, 14896, 8, 6436, 51, 52009, 3050, 18, 147, 42, 40, 705, 1, 593, 746, 6, 1588, 1, 4201, 1768, 6, 21907, 2, 12, 370, 610, 30, 1, 127, 6, 8169, 2, 10, 63, 654, 3396, 23379, 7180, 100, 35, 73, 1282, 29, 22, 178, 5, 78, 6, 866, 177, 15, 3, 168, 231, 2, 51, 22, 839, 222, 3, 19, 5, 64, 9, 931, 181, 5794, 22, 76, 2162, 134, 1260, 177, 205, 352, 332, 2086, 47, 15, 3051, 231, 12258, 426, 3, 343, 114, 20600, 906, 141, 94, 3, 5926, 27631, 37, 3210, 5, 13420, 180, 53, 2, 5171, 11, 750, 5120, 19, 21, 250, 1769, 9, 8, 189, 1769, 3462, 3899, 891, 332, 1883, 47, 844, 2547, 5, 1, 2345, 4517]\n"
235 |      ]
236 |     }
237 |    ],
238 |    "source": [
239 |     "print(train_sequences[0])"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 8,
245 |    "id": "oriental-copper",
246 |    "metadata": {},
247 |    "outputs": [
248 |     {
249 |      "name": "stdout",
250 |      "output_type": "stream",
251 |      "text": [
252 |       "[\"i'm\", 'sure', 'this', 'was', 'one', 'of', 'those', 'woah', 'attractions', 'in', '1982', 'when', 'epcot', 'opened', 'but', 'now', \"it's\", 'just', 'silly', 'the', \"film's\", 'message', 'is', 'cliché', 'the', 'circle', 'vision', 'is', 'disorienting', 'and', 'that', 'awful', 'song', 'at', 'the', 'end', 'is', 'grating', 'and', 'i', 'really', 'wish', \"they'd\", 'install', 'seats', 'after', 'so', 'much', 'walking', 'all', 'you', 'want', 'to', 'do', 'is', 'sit', 'down', 'for', 'a', 'few', 'minutes', 'and', 'when', 'you', 'hear', \"there's\", 'a', 'film', 'to', 'see', 'it', 'sounds', 'pretty', 'glamorous', 'you', 'get', 'entertained', 'while', 'sitting', 'down', 'right', 'wrong', \"you're\", 'standing', 'there', 'for', '18', 'minutes', 'leaning', 'against', 'a', 'short', 'little', 'railing', 'disney', 'should', 'make', 'a', 'newer', 'maelstrom', 'like', 'attraction', 'to', 'liven', 'things', 'up', 'and', 'replace', 'this', 'dull', 'lackluster', 'film', 'not', 'fun', 'skip', 'it', 'in', 'fact', 'skip', 'canada', 'altogether', 'unless', \"you're\", 'eating', 'there', 'move', 'directly', 'to', 'the', 'united', 'kingdom']\n"
253 |      ]
254 |     }
255 |    ],
256 |    "source": [
257 |     "print([tokenizer.index_word[k] for k in train_sequences[0]])"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 9,
263 |    "id": "subjective-mailman",
264 |    "metadata": {},
265 |    "outputs": [],
266 |    "source": [
267 |     "MAX_SEQUENCE_LENGTH = max([max(map(len, train_sequences)), max(map(len, test_sequences))])"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 10,
273 |    "id": "promising-rochester",
274 |    "metadata": {},
275 |    "outputs": [
276 |     {
277 |      "data": {
278 |       "text/plain": [
279 |        "2493"
280 |       ]
281 |      },
282 |      "execution_count": 10,
283 |      "metadata": {},
284 |      "output_type": "execute_result"
285 |     }
286 |    ],
287 |    "source": [
288 |     "MAX_SEQUENCE_LENGTH"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": 11,
294 |    "id": "surgical-specific",
295 |    "metadata": {},
296 |    "outputs": [],
297 |    "source": [
298 |     "train_data = pad_sequences(train_sequences, maxlen=MAX_SEQUENCE_LENGTH)\n",
299 |     "test_data = pad_sequences(test_sequences, maxlen=MAX_SEQUENCE_LENGTH)"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": 12,
305 |    "id": "sexual-convenience",
306 |    "metadata": {},
307 |    "outputs": [
308 |     {
309 |      "name": "stdout",
310 |      "output_type": "stream",
311 |      "text": [
312 |       "['<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', \"i'm\", 'sure', 'this', 'was', 'one', 'of', 'those', 'woah', 'attractions', 'in', '1982', 'when', 'epcot', 'opened', 'but', 'now', \"it's\", 'just', 'silly', 'the', \"film's\", 'message', 'is', 'cliché', 'the', 'circle', 'vision', 'is', 'disorienting', 'and', 'that', 'awful', 'song', 'at', 'the', 'end', 'is', 'grating', 'and', 'i', 'really', 'wish', \"they'd\", 'install', 'seats', 'after', 'so', 'much', 'walking', 'all', 'you', 'want', 'to', 'do', 'is', 'sit', 'down', 'for', 'a', 'few', 'minutes', 'and', 'when', 'you', 'hear', \"there's\", 'a', 'film', 'to', 'see', 'it', 'sounds', 'pretty', 'glamorous', 'you', 'get', 'entertained', 'while', 'sitting', 'down', 'right', 'wrong', \"you're\", 'standing', 'there', 'for', '18', 'minutes', 'leaning', 'against', 'a', 'short', 'little', 'railing', 'disney', 'should', 'make', 'a', 'newer', 'maelstrom', 'like', 'attraction', 'to', 'liven', 'things', 'up', 'and', 'replace', 'this', 'dull', 'lackluster', 'film', 'not', 'fun', 'skip', 'it', 'in', 'fact', 'skip', 'canada', 'altogether', 'unless', \"you're\", 'eating', 'there', 'move', 'directly', 'to', 'the', 'united', 'kingdom']\n"
313 |      ]
314 |     }
315 |    ],
316 |    "source": [
317 |     "print([tokenizer.index_word.get(k, '<PAD>') for k in train_data[0]])"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "markdown",
322 |    "id": "agricultural-radius",
323 |    "metadata": {},
324 |    "source": [
325 |     "# Train a classifier with Word Embeddings"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 14,
331 |    "id": "human-laser",
332 |    "metadata": {},
333 |    "outputs": [],
334 |    "source": [
335 |     "countries_wiki = KeyedVectors.load('wiki-countries.w2v')"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": 15,
341 |    "id": "honey-occasions",
342 |    "metadata": {},
343 |    "outputs": [],
344 |    "source": [
345 |     "embedding_layer = utils.make_embedding_layer(countries_wiki, tokenizer, MAX_SEQUENCE_LENGTH)\n",
346 |     "countries_wiki_model = Sequential([\n",
347 |     "    Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32'),\n",
348 |     "    embedding_layer,\n",
349 |     "    GlobalAveragePooling1D(),\n",
350 |     "    Dense(128, activation='relu'),\n",
351 |     "    Dense(64, activation='relu'),\n",
352 |     "    Dense(1, activation='sigmoid')\n",
353 |     "])\n",
354 |     "countries_wiki_model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "code",
359 |    "execution_count": 24,
360 |    "id": "married-slovakia",
361 |    "metadata": {},
362 |    "outputs": [
363 |     {
364 |      "name": "stdout",
365 |      "output_type": "stream",
366 |      "text": [
367 |       "Epoch 1/30\n",
368 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.5434 - accuracy: 0.7303 - val_loss: 0.5326 - val_accuracy: 0.7371\n",
369 |       "Epoch 2/30\n",
370 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.5192 - accuracy: 0.7445 - val_loss: 0.5388 - val_accuracy: 0.7354\n",
371 |       "Epoch 3/30\n",
372 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.5096 - accuracy: 0.7485 - val_loss: 0.5097 - val_accuracy: 0.7507\n",
373 |       "Epoch 4/30\n",
374 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.5011 - accuracy: 0.7566 - val_loss: 0.5057 - val_accuracy: 0.7552\n",
375 |       "Epoch 5/30\n",
376 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4975 - accuracy: 0.7540 - val_loss: 0.5047 - val_accuracy: 0.7536\n",
377 |       "Epoch 6/30\n",
378 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4956 - accuracy: 0.7577 - val_loss: 0.5032 - val_accuracy: 0.7525\n",
379 |       "Epoch 7/30\n",
380 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4910 - accuracy: 0.7595 - val_loss: 0.5073 - val_accuracy: 0.7576\n",
381 |       "Epoch 8/30\n",
382 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4901 - accuracy: 0.7594 - val_loss: 0.5141 - val_accuracy: 0.7430\n",
383 |       "Epoch 9/30\n",
384 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4863 - accuracy: 0.7626 - val_loss: 0.5018 - val_accuracy: 0.7557\n",
385 |       "Epoch 10/30\n",
386 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4852 - accuracy: 0.7648 - val_loss: 0.4958 - val_accuracy: 0.7568\n",
387 |       "Epoch 11/30\n",
388 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4816 - accuracy: 0.7655 - val_loss: 0.5016 - val_accuracy: 0.7544\n",
389 |       "Epoch 12/30\n",
390 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4801 - accuracy: 0.7678 - val_loss: 0.5028 - val_accuracy: 0.7539\n",
391 |       "Epoch 13/30\n",
392 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4745 - accuracy: 0.7721 - val_loss: 0.4971 - val_accuracy: 0.7549\n",
393 |       "Epoch 14/30\n",
394 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4731 - accuracy: 0.7699 - val_loss: 0.5108 - val_accuracy: 0.7467\n",
395 |       "Epoch 15/30\n",
396 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4714 - accuracy: 0.7724 - val_loss: 0.5074 - val_accuracy: 0.7525\n",
397 |       "Epoch 16/30\n",
398 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4719 - accuracy: 0.7724 - val_loss: 0.4974 - val_accuracy: 0.7533\n",
399 |       "Epoch 17/30\n",
400 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4659 - accuracy: 0.7767 - val_loss: 0.4967 - val_accuracy: 0.7557\n",
401 |       "Epoch 18/30\n",
402 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4633 - accuracy: 0.7765 - val_loss: 0.5012 - val_accuracy: 0.7538\n",
403 |       "Epoch 19/30\n",
404 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4617 - accuracy: 0.7774 - val_loss: 0.4941 - val_accuracy: 0.7557\n",
405 |       "Epoch 20/30\n",
406 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4583 - accuracy: 0.7817 - val_loss: 0.4967 - val_accuracy: 0.7602\n",
407 |       "Epoch 21/30\n",
408 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4553 - accuracy: 0.7828 - val_loss: 0.4958 - val_accuracy: 0.7541\n",
409 |       "Epoch 22/30\n",
410 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4540 - accuracy: 0.7820 - val_loss: 0.5020 - val_accuracy: 0.7533\n",
411 |       "Epoch 23/30\n",
412 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4495 - accuracy: 0.7871 - val_loss: 0.5070 - val_accuracy: 0.7557\n",
413 |       "Epoch 24/30\n",
414 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4487 - accuracy: 0.7877 - val_loss: 0.5036 - val_accuracy: 0.7510\n",
415 |       "Epoch 25/30\n",
416 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4454 - accuracy: 0.7887 - val_loss: 0.5051 - val_accuracy: 0.7539\n",
417 |       "Epoch 26/30\n",
418 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4421 - accuracy: 0.7930 - val_loss: 0.4993 - val_accuracy: 0.7550\n",
419 |       "Epoch 27/30\n",
420 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4402 - accuracy: 0.7907 - val_loss: 0.5056 - val_accuracy: 0.7470\n",
421 |       "Epoch 28/30\n",
422 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4353 - accuracy: 0.7955 - val_loss: 0.5026 - val_accuracy: 0.7530\n",
423 |       "Epoch 29/30\n",
424 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4349 - accuracy: 0.7960 - val_loss: 0.4966 - val_accuracy: 0.7552\n",
425 |       "Epoch 30/30\n",
426 |       "391/391 [==============================] - 8s 21ms/step - loss: 0.4323 - accuracy: 0.7975 - val_loss: 0.5137 - val_accuracy: 0.7515\n"
427 |      ]
428 |     }
429 |    ],
430 |    "source": [
431 |     "countries_wiki_history = countries_wiki_model.fit(\n",
432 |     "    train_data, dfTrain['label'].values,\n",
433 |     "    validation_data=(test_data, xts['label'].values),\n",
434 |     "    batch_size=64, epochs=30\n",
435 |     ")"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "markdown",
440 |    "id": "mighty-jersey",
441 |    "metadata": {},
442 |    "source": [
443 |     "# Train with a different set of word embeddings\n",
444 |     "\n",
445 |     "## GloVe: Global Vectors for Word Representation\n",
446 |     "### Download [here](http://nlp.stanford.edu/data/glove.6B.zip)"
447 |    ]
448 |   },
449 |   {
450 |    "cell_type": "code",
451 |    "execution_count": 29,
452 |    "id": "voluntary-enemy",
453 |    "metadata": {},
454 |    "outputs": [],
455 |    "source": [
456 |     "glove_wiki = KeyedVectors.load_word2vec_format('data/glove.6B.300d.txt', binary=False, no_header=True)"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": 34,
462 |    "id": "beginning-concert",
463 |    "metadata": {},
464 |    "outputs": [],
465 |    "source": [
466 |     "embedding_layer = utils.make_embedding_layer(glove_wiki, tokenizer, MAX_SEQUENCE_LENGTH)\n",
467 |     "\n",
468 |     "glove_model = Sequential([\n",
469 |     "    Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32'),\n",
470 |     "    embedding_layer,\n",
471 |     "    GlobalAveragePooling1D(),\n",
472 |     "    Dense(128, activation='relu'),\n",
473 |     "    Dense(64, activation='relu'),\n",
474 |     "    Dense(1, activation='sigmoid')\n",
475 |     "])\n",
476 |     "glove_model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": 35,
482 |    "id": "identical-breath",
483 |    "metadata": {},
484 |    "outputs": [
485 |     {
486 |      "name": "stdout",
487 |      "output_type": "stream",
488 |      "text": [
489 |       "Epoch 1/30\n",
490 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.5488 - accuracy: 0.7243 - val_loss: 0.4494 - val_accuracy: 0.8083\n",
491 |       "Epoch 2/30\n",
492 |       "782/782 [==============================] - 20s 25ms/step - loss: 0.4324 - accuracy: 0.8130 - val_loss: 0.4147 - val_accuracy: 0.8235\n",
493 |       "Epoch 3/30\n",
494 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.4065 - accuracy: 0.8247 - val_loss: 0.4126 - val_accuracy: 0.8154\n",
495 |       "Epoch 4/30\n",
496 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3940 - accuracy: 0.8269 - val_loss: 0.3854 - val_accuracy: 0.8341\n",
497 |       "Epoch 5/30\n",
498 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3845 - accuracy: 0.8326 - val_loss: 0.3799 - val_accuracy: 0.8360\n",
499 |       "Epoch 6/30\n",
500 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3821 - accuracy: 0.8306 - val_loss: 0.3760 - val_accuracy: 0.8363\n",
501 |       "Epoch 7/30\n",
502 |       "782/782 [==============================] - 19s 24ms/step - loss: 0.3777 - accuracy: 0.8344 - val_loss: 0.3989 - val_accuracy: 0.8214\n",
503 |       "Epoch 8/30\n",
504 |       "782/782 [==============================] - 19s 24ms/step - loss: 0.3785 - accuracy: 0.8319 - val_loss: 0.3735 - val_accuracy: 0.8376\n",
505 |       "Epoch 9/30\n",
506 |       "782/782 [==============================] - 19s 24ms/step - loss: 0.3761 - accuracy: 0.8353 - val_loss: 0.3708 - val_accuracy: 0.8402\n",
507 |       "Epoch 10/30\n",
508 |       "782/782 [==============================] - 19s 24ms/step - loss: 0.3714 - accuracy: 0.8377 - val_loss: 0.4004 - val_accuracy: 0.8147\n",
509 |       "Epoch 11/30\n",
510 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3704 - accuracy: 0.8382 - val_loss: 0.3725 - val_accuracy: 0.8374\n",
511 |       "Epoch 12/30\n",
512 |       "782/782 [==============================] - 19s 24ms/step - loss: 0.3695 - accuracy: 0.8364 - val_loss: 0.3775 - val_accuracy: 0.8320\n",
513 |       "Epoch 13/30\n",
514 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3665 - accuracy: 0.8402 - val_loss: 0.3651 - val_accuracy: 0.8424\n",
515 |       "Epoch 14/30\n",
516 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3688 - accuracy: 0.8368 - val_loss: 0.3694 - val_accuracy: 0.8408\n",
517 |       "Epoch 15/30\n",
518 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3626 - accuracy: 0.8409 - val_loss: 0.3692 - val_accuracy: 0.8384\n",
519 |       "Epoch 16/30\n",
520 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3640 - accuracy: 0.8392 - val_loss: 0.3878 - val_accuracy: 0.8230\n",
521 |       "Epoch 17/30\n",
522 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3620 - accuracy: 0.8408 - val_loss: 0.3732 - val_accuracy: 0.8358\n",
523 |       "Epoch 18/30\n",
524 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3603 - accuracy: 0.8424 - val_loss: 0.3633 - val_accuracy: 0.8469\n",
525 |       "Epoch 19/30\n",
526 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3599 - accuracy: 0.8413 - val_loss: 0.3621 - val_accuracy: 0.8453\n",
527 |       "Epoch 20/30\n",
528 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3581 - accuracy: 0.8428 - val_loss: 0.4027 - val_accuracy: 0.8208\n",
529 |       "Epoch 21/30\n",
530 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3585 - accuracy: 0.8422 - val_loss: 0.3714 - val_accuracy: 0.8342\n",
531 |       "Epoch 22/30\n",
532 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3551 - accuracy: 0.8427 - val_loss: 0.3665 - val_accuracy: 0.8408\n",
533 |       "Epoch 23/30\n",
534 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3553 - accuracy: 0.8443 - val_loss: 0.3655 - val_accuracy: 0.8421\n",
535 |       "Epoch 24/30\n",
536 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3544 - accuracy: 0.8456 - val_loss: 0.3650 - val_accuracy: 0.8419\n",
537 |       "Epoch 25/30\n",
538 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3526 - accuracy: 0.8445 - val_loss: 0.3620 - val_accuracy: 0.8440\n",
539 |       "Epoch 26/30\n",
540 |       "782/782 [==============================] - 20s 25ms/step - loss: 0.3525 - accuracy: 0.8446 - val_loss: 0.3837 - val_accuracy: 0.8312\n",
541 |       "Epoch 27/30\n",
542 |       "782/782 [==============================] - 20s 25ms/step - loss: 0.3503 - accuracy: 0.8464 - val_loss: 0.3614 - val_accuracy: 0.8438\n",
543 |       "Epoch 28/30\n",
544 |       "782/782 [==============================] - 20s 25ms/step - loss: 0.3494 - accuracy: 0.8452 - val_loss: 0.3645 - val_accuracy: 0.8443\n",
545 |       "Epoch 29/30\n",
546 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3492 - accuracy: 0.8448 - val_loss: 0.3624 - val_accuracy: 0.8414\n",
547 |       "Epoch 30/30\n",
548 |       "782/782 [==============================] - 19s 25ms/step - loss: 0.3447 - accuracy: 0.8491 - val_loss: 0.3631 - val_accuracy: 0.8414\n"
549 |      ]
550 |     }
551 |    ],
552 |    "source": [
553 |     "glove_history = glove_model.fit(\n",
554 |     "    train_data, dfTrain['label'].values,\n",
555 |     "    validation_data=(test_data, xts['label'].values),\n",
556 |     "    batch_size=32, epochs=30\n",
557 |     ")"
558 |    ]
559 |   },
560 |   {
561 |    "cell_type": "code",
562 |    "execution_count": 36,
563 |    "id": "interior-season",
564 |    "metadata": {},
565 |    "outputs": [
566 |     {
567 |      "data": {
568 |       "text/plain": [
569 |        "<matplotlib.legend.Legend at 0x7fefe0727510>"
570 |       ]
571 |      },
572 |      "execution_count": 36,
573 |      "metadata": {},
574 |      "output_type": "execute_result"
575 |     },
576 |     {
577 |      "data": {
578 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD4CAYAAADvsV2wAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABAf0lEQVR4nO3dd3hUVfrA8e8hhZCEAEkogRR6CS1AgJAg1YIFEAuiuIrr6rLqirpr3aKru66/XQtrWVnsBQFBUVBsqIAQBEJIKAkltDRKCqRB+vn9cZI4QMokmWSSmffzPHmSuXPv3HMzyTvnvqcprTVCCCEcXxt7F0AIIUTzkIAvhBBOQgK+EEI4CQn4QgjhJCTgCyGEk3C1dwGq4+/vr3v27GnvYgghRKuxY8eOTK1159r2aZEBv2fPnsTExNi7GEII0WoopY7VtY+kdIQQwklIwBdCCCchAV8IIZyEBHwhhHASEvCFEMJJSMAXQggnIQFfCCGchAR8IVqarEPw8yI4vgvKy+1dGuFAWuTAKyGcltbw6d2QVjHw0NMfek+CPpOh92To0MOuxROtmwR8IVqSfV+YYH/p38C7Cxz6EQ6vhz0rzfP+A34J/j2joG17uxZXtC6qJa54FR4ermVqBeF0ykrh9XHm599tAZeK+pjWcHIvHP7RfAAci4bSc9DGFYLGwoxXwK+P/cotWgSl1A6tdXht+0gNX4iWIv4jyDwAsz/4JdgDKAXdhpivyN9DSSGkbDUfAD8vgp//C1e/YL9yi1ZDAr4QLUHJOfjxn9AjHAZNr31fNw/oPdF8Hd8Fx7Y0TxlbknNn4MgGk9ry8LHh656GM8lQkAlns8xXQSacrXhckPXLzx4dYNy9MOJX4NrWdmVoQhLwhWgJtv4P8tLh+jdMjd5aIZHwwzNwNhs8fZuufC1ByTk48DXsXgkHv4WyYhhzN1z1b9u8/pkUWBQFhTnnb1dtwNOv4ssfugwyPx/fBV/+ATY+D1ELYOTt4O5pm7I0EQn4QtjbudOw6UXoexn0HF+/Y0OizPfkn2HgVbYvm72VlcKR9SbIJ34BxXng3Q1G3wWnj0DsBzDxMfDya/y5trwKxQVw/Vvg0wO8/E1g9+gIbarpwa61aVDf+G/4+jH46QUYdx+MvrPFNqZLwBetm9awbTG4epjeKx2D7V2i+tv0EhTmwqVP1v/YHiPBpS0c2+w4AV9rSN0Ou1fAnk9NCqVtBxh8LQy90XwotnGBjP2wf615/yc/3rhzFmTBjvdg2E0w9AbrjlHK/M31mQxHN5vAv+5J2LwQIu4xdx/tOjauXDYmAV+0bvvXwleP/PLYr6/J6/aZDD0vsW1+tynkpJl0zrDZ0G1o/Y93bQuB4abnTkuhNeSmQWqM6WKaGgPH400KxqUtuLpf8L0tuLj9su30UZNHd/WA/tNMkO932cV58s4DYMBVJuBH3Q/uXg0v87b/mZ5PUQsadnzPKPOVGmMC/4//gOhXTNCPuMc2dyA2YFXAV0pNA/4DuABvaq2fu+D5DsCHQHDFaz6vtX7H4nkXIAZI01pfY6OyC2dXWgzf/tn0Tb/hbTiy0fRciVsC298A5QKBo3/pt95j1Pm9X1qCDc9BeRlMfqLhrxESCT+9CEV59kklFOXD8ThTK0+NgbQdkHfcPOfSFgKGmYZNdy8T9EuLoKzIvH+W38uKzc9dQmHyn0wwr+sDO+oB2H857PwQxv624eXf+j8YeI35EGmMwHC4Zbn5gNv4PPz0PGx5Ddp3te54Tz+464fGlaEWdf71VwTr14DLgFRgu1JqtdY6wWK3e4EErfV0pVRnYL9SaonWurji+QVAItDCq1uiVdm2GLIPw9xPfum2OO4eE1BStv3Sb339c7D+n9DWxwT/kbdB7ynV52WbU8YBE6jG3A2dejb8dUIiTa0yZRv0nWqz4tUpYTVs+Bec2gu6YgoI397mziow3Hx1HWpq7U0leCwERUD0qxB+Z8M+0GPfg8Iz5sPDVgKGw00fwKlEiHn74obgmrRt2hBpzW9nDJCktT4MoJRaBswELAO+BtorpRTgDWQDpRX7BwJXA/8AHrJd0UWTyj5icqiDZ4F/P3uX5mIFmSbY9L0M+l16/nOubaHXJeZr6l9ND5YjG0zw3/cFJHwOHUNg1DwYcasZ0WoP3/8N3LxgwsONe53AMeZu5lh08wX8pHWw8g5zdzXhYdOdtMco+6QuohbAspsh4TPr8++VSotNDbznJRA02vZl6zLIdr2IbMCagN8DSLF4nAqMvWCfV4HVQDrQHrhJ68qPfBYCj1Rsr5FS6m7gboDg4FbY8OYItDaBcev/YP9XgIacVJjxsr1LdrEfn4XifLjiH3Xv6+lrPrgGzzL/fIlrIOYdE3B/fNb0ew//tWkMrE+XyMZI2W4+fCY9YXqDNEZbb+ge1nx5/LQdsPw26DwI7vjS9Ee3p/7TzAfPpoUw5Pr6vYe7V5j2hpb4N94ErLmnre63d+F8DFcAcUB3IAx4VSnlo5S6Bjiltd5R10m01ou11uFa6/DOnTtbUSxhM8VnYce78HokvD/TjOK85A+my1/yz/Yu3cVOJsCOd2D0b+qfc3Vta2qBd3wJ926DMXfBoe/hvWvg1dGmtnc2u2nKXUlr05vDq7MZuGMLIZGmgbSk0DavV5OsQ7DkRlOTv3Wl/YM9mNRc1P1wcjccqkf+u7zc9KjpNhT6NGMqzI6sCfipQJDF40BMTd7SHcCn2kgCjgADgShghlLqKLAMmKKU+rDRpRa2kZMK3z0JL4XCmgUmLTDzNXgwAab+BfpeCpn7mz4A1ofW8M0TJtc56bHGvVbnATDtn/CH/XDt66YL3TdPwIuDYNV8yDtpkyJf5OB3phvlxEdN7dwWQqJMo2danXWrhss7CR/MMj/fugrad2u6c9XX0NnQPsAEcGvtX2umsoh6oPnu7OzMmoC/HeinlOqllHIH5mDSN5aSgakASqmuwADgsNb6ca11oNa6Z8VxP2itb7VZ6UX9aW2G4n98OywcBtEvm1TGvC9h/k8mp+3mYfYNjjDfU7bar7wXOvitaYyd9JjtRpa6tYOwW+A362D+JgibC3s+ge+fts3rWyovN6mkTj3NyExbCY4AFCQ3UVqnMBeWXA8FGXDLCvDv2zTnaShXd9P98chGSIute3+tzWC3Tj0h9NqmLl2LUWfA11qXAvcB32B62nystd6rlJqvlJpfsdszQKRSajfwPfCo1jqzqQotGmH7m/DONBM0x90D98fBTR9Wn7/uPhJc3CHZRnO1aA1rHzGDaRqirMTUwP36mXROU+g2FK55EYbPgb2rTFdHW9q9Ak7ugSl/sW3vlXadoOvgpsnjlxbB8rmmx8nsDyBwlO3PYQuj5pkBWpv/U/e+RzeZu6HI+1teV90mZNWVaq3XAmsv2LbI4ud04PI6XmM9sL7eJRS2UzkqtccouH1N3QNV3DwgIAySbVTDz9hnBrhs+x/kn4KI+XUfY2n7m5CVBLd8bAbqNKURv4LY903QH3mbbV6ztAh+/Dt0GwaDr7PNa1oKiYSdS8x0BLYKYuVlZkGWIxth1uKLe0S1JB4+MPrXJuBnHap9yuhNL4FXF3M350Sc56OtpdDaBK3cNFN7rPrKveBxvvnu3dnM7dHGpfHnTt9pcpbXLLR+VGJwBGxdZBoDK1M9DXV4g/neayJ8/agZMj/5T9blT89mm/70vSdDv1rrFrYRONr0/Ij9wHYBP+ZtM4L01oVNMwYgJNJ8oJ+INx/qjaW1mSMm4TO47BkYflPjX7OpjZ1vGt63vGbu1KpzPN401E99svF/062MBPzmoLUJtolrIHG1CfjVcWlrRkpWfikFezfB8Fugvw2CXPwyc47B11p/TPA4k+dP3wkh4xp3/iMbTM70V6vgiwfMYKGCTDOXe10faOufMx+KVzzbPA1sSsHIX5mRvBn7Gz8Cs7jAXG+vCdBnim3KeKHgSPP9WLRtAv5PL5gPkHH3mV4wrUH7bjD8ZjPaetLjpsJ0oU0LTaP/6DubvXj2JgG/qZSXQ+o2MxoxcQ3kJJteML0ugYjfmeHj7t4Vwd3H9Na4cK6QshJ4MdR0mWxswC8tNsvkDbjS5HutFVQx5CLl58YF/LJSkzcdPMsE9+kvm2Hkm14ys0Vet7jmOcUz9pt0zqg7oGtow8tQX8PmwLqnYOcHcPnfG/dasR+YOdStvaNpiPZdwbePCfiRv2/ca8V+YKZdHjrb1O5bk8j7TTpu2/9gyp/Pfy7rkLljiby/ZXQpbWYS8G2prNR0t0tcbaZyzT9hGj17T4ZJj5q5QerTs8TFDUbMhc0vQ+5x8AloeNmS1pmAM/zm+h3n5Qf+/RvfH/94vKmh955oHisFlz5l5hf/9k9maPtNH1Y/F8w3fzIfjo2Zb6YhvDubQT3xy8ztf0PbDcpKzNS7QRG/9HxqKiGRpoJRXt7wtNH+r0033T5TTDdde09BUV/+fWHQNbDtDdPl0rLra/Qr0MbNVLqcUCt7J1uwo5vhhf7w/gzTcBY0Gq57Ex5Ogrkfm+6ODelGOPI20GUQ18jhC/FLTXBtyND7oLGma2Z5ed371uTIevO954Tzt0feB9cugiM/wXszzDS1lg6ug6TvYOLDjR+R2hAjbzNdEQ983fDX2LsKclJg/AM2K1aNQqLMh2dGYsOOzz9lpkzoNhRmv9+08+A0pagHzO8h9v1ftuWdhLiPIOzmljWGoBlJwLeVLa+alM3s9+GRQ6a2OuzGxt82+vY2jZw73m94wD132gSsoTc0rJYaPM68RuaBhp0fTINtl8HV51TDboY5S+BUgukyeqZiJo+yUtMN07c3jGngTIiN1WeqWXBjZwM/cLU2vUY6D4R+V9i2bNUJscjjN8T2t6DkLFz/ZotdxMMqgeEQMt403paVmG1bX4fyEpPOcVIS8G2huMAM6R48C0JnNm5e7uqMmmfaAA43cNrUvavMKMzhcxp2fGUaoqH98SsX3a5M51RnwJWmMTfvBLx9hcnb73jHjPS9/O/2q2m6uJpBWQe/NWm1+kr63vS7j7y/eVIjHYPBJ9CkFuurpNC0lfS7omVOmFdfUQsgN9UMoivMMR9moTNr767p4CTg20LSOigtNHnDpjDwGpOO2fFuw46PX2ZqmAFhDTvet7eZ96WhI25Tt5nfT69aAj6Y2ukda02N7O1p8MPfzSyGA+y8ktOIW830v/Ef1f/YzQuhfXeziEdzUMr8Ho9Fm7uL+tiz0nSVHXdP05StufW7zHSO2PwfE+yLcm07BXIrJAHfFhK/gHa+v3SLszVXd1PL3P+VqQHXR9YhE6iHz2l47xClTC2/oTX8wxtMuivEit9Pt6Fw5zdmEE1hjpnrxt7znPj1MbnxnR/WL4im7YCjP5kA2px3KCGRkH/SrBVgLa1hy3+h65C6P5hbC6VMLf9UgunW22eKmVXUiUnAb6zSYjjwjUlJNOUQ7ZG3Q3mp6V9cH7uWA8p0r2uMoAiz9Fx9P3DAjNLsMdL65QZ9e8NdP5qVfxqy7F9TGPErE0Drkxvf/B8z1N+Wc+ZYo3Jh8/qU9cgGs5BJxO/s/wFrS0OuNymusiIY/6C9S2N3EvAb6+hPUJRj0i5Nyb+vSW/seM/6xlutTTqn90To0KNx5w+u6INf3+6ZhbmmplvfWqOnr/mQaClCZ5rxEjs/sG7/rENmDMboO5t/XV3/fiYFWJ+Av+W/Jm03pJ4LiLR0Lm4w7Vkz91LPS+xdGruTgN9Y+74wqxb1mdz05xo1D84cM7UxayT/bPYf1sDGWksBw8C1Xf0D/rFo0620tgbb1sDd09QW935m3XJ10a+YYDO2nvMF2UJVHt/KhtvMg3DwG7NEoCNONRA604zmdqQ7lwaSgN8Y5eWwb63p2+7WrunPN2i6aSuwtvE2fim4eZrjGsvFzXR1S6lnwD+yEVw9zDJ8rd2IX0Hpubpn+8w/Zfp7D7/Z+sWrbS0kynzY56TWve/WRWaAoBNONeBsJOA3RlqMGU1ri4BqDde2pvF23xcmqNSm5JypjQ6aYbtFNoLGwvFdZmI3ax3ZYI5zhJpjj5Gm10ddaZ2ti0w3WHv2967qj19HQ/vZbPPhNHS2/db2Fc1GAn5jJK6BNq7NM3tjparG2zq6CO7/yrQtNLTvfXWCx5n0jLWrKuVnmD7orT2dU0kpU8tP22GWWaxOUZ7pyz7oGvsuEtJ1sGlzqCutE/ueGWhV36mqRaskAb+htDY17V4TzNJ4zaVzf3O7HltH4+2u5ab/d68JNe9TX0GjMasqWZnWObrRfHeUbn4Aw24yc7HUNPI29n2T47d3f+82LqYrbW0Nt2UlsHWx+RtpKb2hRJOSgN9QpxJNN72m7p1TnVHzzLmP/lT98/kZZt3UYbNtM49+JY8OpuZobX/8IxtNLbOhA75aIi8/GHgV7FpmuuRaKi02Q/lDxpv2DnsLiTQjlfMzqn8+4XPIS4cIGy2kLlo8CfgNte8L833g1c1/7kEzwKNjzY23e1aa1Ist0zmVgiMgdbuZ56YuhzeYpRMdbQm5EbeZmUf3rz1/+55PzMI2zTFJmjUq++NX9wGttflw8uvbvClJYVcS8BsqcY1ZFckes+65eZgeIIlrzAIiF4pfCgHDocsg2587KAKK880gndqcSYbTR2ybUmop+kwGnx7np3XKy81Aqy6DoW8LWQYwIMx0pa0urZOyDdJjTbfR1jb9sWgweacb4vQxOLHLPumcSqNuNzP/xS89f/upRDP3fH3nvbdW1URqdcyrc8QB8/eV2riY3lKHvoecNLMt6TszJXHUgpbT39vV3bS7JFcT8H9+zaTomurvRLRIEvAbYt+X5ntzdcesTpdBpra9493z53eJX2bmrWmqEZMdg0zttq48/uENZpHoprjLaAnC5poJ1Sp7S21aaIbwD2mCxckbIyQKTuw+f7DY6WPm7nDUPNt12RWtggT8htj3BXQeZP9pVkfNM+vjVna9Ky+DXR+bWQKrm3feVoIjTE+dmiYS09r0v+81oeXUdm3Nt5cZqh/3obnbSY6Gcfc2fFWsphISaT6YUrb9sm3bYkDBmLvtVixhH84b8CsXRaivgkxTu22qqZDrY/C15ra8svH2yEbT62LYTU173uBx5jw5KdU/n7HfzNboiPl7SyNvMxPKrfqtaUQfeZu9S3SxHuGmG2llpaAoz3QdHXwtdAi0a9FE83POgB/zDrwwwMwhUl/715oakz3z95Xc2pl5chI+NyMm45eZ2RkHXNm0561c2Lym/viV+XtHGXBVk0HTze/79BEYc1fLTI+4e5oRwpUNtzuXmHnhIxxkzntRL84X8LU2yxGezYKVv4bSovodn/gFdAg2vWBaglG3m2H82980i6cPvrbp5/XpOhjc29cS8DdAxxDo1LNpy2Fvbu3M8oxunvZbgtEaIZGQFmumxNj6upnXqCWMExDNzvkCfvIWk/cecoPpabPub9YfW5QHh380fe9bSm6662DTPXTD/5kh8s3R66KNS0Xvj2oCfnmZGRDm6OmcSpc+Bfdua9o2k8YKiTI9un54xqSgHGVFK1FvzhfwY983oz9nvGwarX5+zYxKtcbB70xtuiXk7y2Nmmfm1+kY8ku3yaYWPM6sJHTuzPnbj8ebHiG9JzVPOezNrZ3pudSSBY0BlJnUrUMwDLRj7zJhV1YFfKXUNKXUfqVUklLqsWqe76CUWqOUildK7VVK3VGxPUgp9aNSKrFi+wJbX0C9FOaYGSSHXG8WGr/sGTNQZtV8yDtZ9/H7vgBPv18WA2kpBs8C725metvmuvMIGgtoM+rWUuVc/c5Sw28NPDr8MlfO2Lsdb+SzsFqdAV8p5QK8BlwJhAI3K6VCL9jtXiBBaz0cmAS8oJRyB0qBP2itBwERwL3VHNt8dq8085lX9qZw84Ab3obiAtPTorbJyEqL4MC3pkHUlvPT2IK7Fzy4t3mn4w0MN/39L+yPf3iDmUJYptptWfpMMQ3MI35l75IIO7Kmhj8GSNJaH9ZaFwPLgJkX7KOB9kopBXgD2UCp1vq41joWQGudByQCjVxrrxFi34euQ6H7iF+2dRkIVz5ncvNbXqn52CMboTiv5d4Ou7g2b7uCu5dZBctyxG1pkcnrS+2+5Zn0ONy3vXlndhUtjjUBvwdg2eE6lYuD9qvAICAd2A0s0FqfV11WSvUERgDVjslXSt2tlIpRSsVkZNQwu19jHI+H43Gmdn9hYBx5u5mQ7Puna57rPXENuHs7T27aGsHjzCIwlbNGpm43d1COOJ1Ca+fmYb/Vt0SLYU3Ar67aeOEQyyuAOKA7EAa8qpSqWrlZKeUNfAI8oLXOre4kWuvFWutwrXV4585N0OMh9gNwaQvDbrz4OaVMI653N1h5p+mNY6m8zPS/73upY6zcZCvBEVBaaD5MwaRzVBvoGWXfcgkhqmVNwE8FLLshBGJq8pbuAD7VRhJwBBgIoJRywwT7JVrrOhYDbSIl52D3xxA6A9p1qn6fdp3g+jfNOqBf/vH851K2QUGGfefOaYmCKnoEVa5ze2SDSZd5dLBfmYQQNbIm4G8H+imlelU0xM4BVl+wTzIwFUAp1RUYAByuyOm/BSRqrV+0XbHrKXGN6aFT19D3kHEw8VGzuEX8sl+27/vCDE/vd1nTlrO1ad8VOvUyefuiPJMOk3SOEC1WnQFfa10K3Ad8g2l0/VhrvVcpNV8pVbkQ5jNApFJqN/A98KjWOhOIAn4FTFFKxVV8XdUkV1Kb2PdNYAoZX/e+l/wRgiPhyz9A1iEzMjdxjZkmQGquF6ucSO1YtBkL4OjTKQjRilnVIVdrvRZYe8G2RRY/pwMXLZujtd5E9W0AzSfrkBn5OeUv1i304OIK1y2GRePhkzvh6hdNmmf8g01f1tYoOMLMyR/ztmkjqZxnRwjR4jj+SNudH5qGxLC51h/TMQhmvALpO2H5rYCyz1KGrUFlHv/A12ZEZ1PP4yOEaDDHDvhlpRC3BPpdAT4B9Ts2dAaMusOsURo0VgYS1cS//y8N4ZLOEaJFc+yAf/BbMy97Q+cpv+JZ6D8NIubXva+zatPml1p+r0n2LIkQog6OPalG7Pvg3RX6XdS8YB13T7hluW3L5IgGX2sWQ7EcwSyEaHEcN+DnHjc1/Kj7ZbKopjZ8jvkSQrRojpvSif8IdJlMFiWEEBUcM+CXl5upFHpeYv+FxoUQooVwzIB/bJNZZ1Rq90IIUcUxA37sB2bu79AZ9i6JEEK0GI4X8M+dhoTPYdhsGQQkhBAWHC/g71oBZUUwUtI5QghhybECvtam733AcPMlhBCiimMF/ONxcHJ3w0fWCiGEA3OsgB/7Pri2gyE32LskQgjR4jhOwC8ugN0rIXSmLNQshBDVcJw5B1w94IZ36j8rphBCOAnHCfhtXKDfpfYuhRBCtFiOk9IRQghRKwn4QgjhJCTgCyGEk5CAL4QQTkICvhBCOAkJ+EII4SQk4AshhJOQgC+EEE5CAr4QQjgJqwK+UmqaUmq/UipJKfVYNc93UEqtUUrFK6X2KqXusPZYIYQQzaPOgK+UcgFeA64EQoGblVKhF+x2L5CgtR4OTAJeUEq5W3msEEKIZmBNDX8MkKS1Pqy1LgaWATMv2EcD7ZVSCvAGsoFSK48VQgjRDKyZPK0HkGLxOBUYe8E+rwKrgXSgPXCT1rpcKWXNsQAope4G7gYIDg62qvBCCNsoKSkhNTWVwsJCexdF1MHDw4PAwEDc3Nzqfaw1AV9Vs01f8PgKIA6YAvQBvlNK/WTlsWaj1ouBxQDh4eHV7iOEaBqpqam0b9+enj17Ym7URUuktSYrK4vU1FR69epV7+OtSemkAkEWjwMxNXlLdwCfaiMJOAIMtPJYIYSdFRYW4ufnJ8G+hVNK4efn1+A7MWsC/nagn1Kql1LKHZiDSd9YSgamVhSoKzAAOGzlsUKIFkCCfevQmPepzoCvtS4F7gO+ARKBj7XWe5VS85VS8yt2ewaIVErtBr4HHtVaZ9Z0bINLK4RwWCdOnGDOnDn06dOH0NBQrrrqKg4cOGDTc6xfv57o6Ogan1+9ejXPPfdco87x4IMPsnDhwqrHV1xxBb/5zW+qHv/hD3/gxRdfPO9c8+bNY+XKlRe91m9+8xsSEhIaVR5LVq14pbVeC6y9YNsii5/TgcutPVYIISxprZk1axa33347y5YtAyAuLo6TJ0/Sv39/m51n/fr1eHt7ExkZedFzpaWlzJgxgxkzZjTqHJGRkaxYsYIHHniA8vJyMjMzyc3NrXo+OjqahQsXMnbs2DrP9eabbzaqLBeSkbZCCLv78ccfcXNzY/78+VXbwsLCuOSSS9Ba8/DDDzNkyBCGDh3K8uXLARO8r7nmmqr977vvPt59910AevbsyZNPPsnIkSMZOnQo+/bt4+jRoyxatIiXXnqJsLAwfvrpJ+bNm8dDDz3E5MmTefTRR3n33Xe57777AMjIyOD6669n9OjRjB49ms2bNwOwYcMGwsLCCAsLY8SIEeTl5Z13LVFRUVV3EXv37mXIkCG0b9+e06dPU1RURGJiIiNGjDjvXJb+8pe/MG/ePMrLy5k0aRIxMTE2+z07zpq2Qgib+NuavSSk59a9Yz2EdvfhyemDa3x+z549jBo1qtrnPv30U+Li4oiPjyczM5PRo0czYcKEOs/p7+9PbGws//3vf3n++ed58803mT9/Pt7e3vzxj38E4K233uLAgQOsW7cOFxeXqg8MgAULFvDggw8yfvx4kpOTueKKK0hMTOT555/ntddeIyoqivz8fDw8PM47b/fu3XF1dSU5OZno6GjGjRtHWloaW7ZsoUOHDgwbNgx3d/dqy/zII4+Qk5PDO++80yRtKhLwhRAt2qZNm7j55ptxcXGha9euTJw4ke3bt+Pj41Prcddddx0Ao0aN4tNPP61xvxtvvBEXF5eLtq9bt+68/Hlubi55eXlERUXx0EMPMXfuXK677joCAwMvOraylh8dHc1DDz1EWloa0dHRdOjQodp0EsAzzzzD2LFjWbx4ca3X1RgS8IUQ56mtJt5UBg8eXG2jJZj8fnVcXV0pLy+venxhV8W2bdsC4OLiQmlpaY3n9vLyqnZ7eXk5W7ZsoV27dudtf+yxx7j66qtZu3YtERERrFu3joEDB563T2RkJNHR0ezevZshQ4YQFBTECy+8gI+PD7/+9a+rPd/o0aPZsWMH2dnZ+Pr61ljexpAcvhDC7qZMmUJRURFvvPFG1bbt27ezYcMGJkyYwPLlyykrKyMjI4ONGzcyZswYQkJCSEhIoKioiJycHL7//vs6z9O+ffuLcu41ufzyy3n11VerHsfFxQFw6NAhhg4dyqOPPkp4eDj79u276NioqCi++OILfH19cXFxwdfXlzNnzrBlyxbGjRtX7fmmTZtW9WFibRnrSwK+EMLulFKsWrWK7777jj59+jB48GCeeuopunfvzqxZsxg2bBjDhw9nypQp/Otf/6Jbt24EBQUxe/Zshg0bxty5cxkxYkSd55k+fTqrVq2qarStzcsvv0xMTAzDhg0jNDSURYtMx8SFCxcyZMgQhg8fTrt27bjyyisvOnbo0KFkZmYSERFx3rYOHTrg7+9f4zlvvPFG7rrrLmbMmMG5c+fqvJ76UjXdLtlTeHi4tmXLtBCidomJiQwaNMjexRBWqu79Ukrt0FqH13ac1PCFEMJJSMAXQggnIQFfCCGchAR8IYRwEhLwhRDCSUjAF0IIJyEBXwjRYqxatQql1HmDmY4ePcqQIUOAiydMqzRixIiqgVGlpaV4eXnx4YcfVj0/atQoYmNj+etf/8q6desAM8FaZmbmRa9V09QHjkACvhCixVi6dCnjx4+vmiLZWpVTGQDEx8czYMCAqscFBQUcPnyY4cOH8/TTT3PppZfW+lq1zZff2knAF0K0CPn5+WzevJm33nqr3gHfckri6Oho5s+fX1Xj37ZtGyNHjsTFxaXahUbOnTvHtGnTqqZ18Pb2bvzFtFAyeZoQ4nxfPQYndtv2NbsNhStrX0nqs88+Y9q0afTv3x9fX19iY2MZOXKkVS8fGRnJn//8Z8AE/CeffJKlS5eSl5dHdHQ0UVFR1R6Xn5/PnDlzuO2227jtttvqd02tkNTwhRAtwtKlS5kzZw4Ac+bMYenSpVYf27NnT4qLizlx4gT79u1jwIABjB49mq1btxIdHV1jXn7mzJnccccdThHsQWr4QogL1VETbwpZWVn88MMP7NmzB6UUZWVlKKX417/+ZfVrjBs3jpUrVxIQEIBSioiICDZv3sy2bdvOm8TMUlRUFF999RW33HKLUyziLjV8IYTdrVy5kttuu41jx45x9OhRUlJS6NWrF5s2bbL6NaKionjppZeqph8eN24c77//Pt26daNjx47VHvP000/j5+fHPffcY4vLaPEk4Ash7G7p0qXMmjXrvG3XX389H330kdWvERUVxeHDh6sCfkBAAGVlZXV2s1y4cCGFhYU88sgj9S94KyPTIwshZHrkVkamRxZCCFErCfhCCOEkJOALIYSTkIAvhACgJbbniYs15n2SgC+EwMPDg6ysLAn6LZzWmqysLDw8PBp0vFUDr5RS04D/AC7Am1rr5y54/mFgrsVrDgI6a62zlVIPAr8BNLAbuENrXdig0gohmkRgYCCpqalkZGTYuyiiDh4eHgQGBjbo2Dq7ZSqlXIADwGVAKrAduFlrnVDD/tOBB7XWU5RSPYBNQKjW+pxS6mNgrdb63drOKd0yhRCifmzVLXMMkKS1Pqy1LgaWATNr2f9mwHISDFegnVLKFfAE0q04pxBCCBuzJuD3AFIsHqdWbLuIUsoTmAZ8AqC1TgOeB5KB40CO1vrbGo69WykVo5SKkdtKIYSwPWsCfnUzCtWUB5oObNZaZwMopTph7gZ6Ad0BL6XUrdUdqLVerLUO11qHd+7c2YpiCSGEqA9rAn4qEGTxOJCa0zJzOD+dcylwRGudobUuAT4FHHf9MCGEaMGsCfjbgX5KqV5KKXdMUF994U5KqQ7AROBzi83JQIRSylOZuUenAomNL7YQQoj6qrNbpta6VCl1H/ANplvm21rrvUqp+RXPL6rYdRbwrda6wOLYrUqplUAsUArsBBbb+BqEEEJYQWbLFEIIByCzZQohhKgiAV8IIZyEBHwhhHASEvCFEMJJSMAXQggnIQFfCCGchAR8IYRwEhLwhRDCSUjAF0IIJyEBXwghnIQEfCGEcBIS8IUQwklIwBdCCCchAV8IIZyEBHwhhHASEvCFEMJJSMAXQggnIQFfCCGchAR8IYRwEhLwhRDCSUjAF0IIJyEBXwghnIQEfCGEcBIS8IUQwklIwBdCCCchAV8IIZyEVQFfKTVNKbVfKZWklHqsmucfVkrFVXztUUqVKaV8K57rqJRaqZTap5RKVEqNs/VFCCGEqFudAV8p5QK8BlwJhAI3K6VCLffRWv9bax2mtQ4DHgc2aK2zK57+D/C11nogMBxItGH5hRBCWMmaGv4YIElrfVhrXQwsA2bWsv/NwFIApZQPMAF4C0BrXay1PtOoEgsh6kVrzVubjvDg8jgKikrtXRxhR65W7NMDSLF4nAqMrW5HpZQnMA24r2JTbyADeEcpNRzYASzQWhdUc+zdwN0AwcHB1pZfCFGLM2eL+eOKeNYlngLgeM453pk3hnbuLnYumbAHa2r4qpptuoZ9pwObLdI5rsBI4HWt9QigALioDQBAa71Yax2utQ7v3LmzFcUSQtQmLuUMV7+8iQ0HMnhyeigv3TScrUeyufuDGApLyuxdPGEH1tTwU4Egi8eBQHoN+86hIp1jcWyq1nprxeOV1BDwhRC2obXm3eijPLs2kS7tPVgxP5KwoI4AlJRpHlm5i3uWxLLo1lG4u0pHPWdizbu9HeinlOqllHLHBPXVF+6klOoATAQ+r9ymtT4BpCilBlRsmgokNLrU4iJpZ85RXFpu72IIO8stLOGeJbH8bU0CE/t35sv7x1cFe4DZ4UH8Y9YQfth3ivs+iqWkTP5mnEmdAV9rXYrJyX+D6WHzsdZ6r1JqvlJqvsWus4Bvq8nP/x5YopTaBYQBz9qk5KLKhz8fY8K/fuSfX0kHKGe2Jy2H6a9s4tuEkzxx1UDeuC2cjp7uF+03d2wIT00P5duEkzywPI5SCfpOQ2ldUzrefsLDw3VMTIy9i9HilZdr/u/rffxv42E83V1ooxRbn5iKV1trMnWNO++x7LMkpOeyNz2HhOO5nC0u47oRPZgR1h1P96Y9vzif1polW5N5+osEfD3defWWEYT39K3zuDc2HuYfaxO5Nqw7L8wOw6VNdc11orVQSu3QWofXto/8Z7ZShSVlPLg8jq/2nOC2cSFcNTSAOYt/Zk18OnPG2K6XU2FJGQdP5pNwPIe96bkkpOeSeDyXgmLT6OfaRtG3izel5ZrHPt3NP75MZNbIHtwyNpiB3XxsVg5RvfyiUp74dDer49OZ0L8zL80ejp93W6uOvWtCb4rLyvn3N/txc2nD/10/jDYS9B2aBPxWKDO/iLvejyEu5Qx/vnoQd47vBcCAru1ZsjXZJgG/tKycuz/YwcYDGZSWm7tAL3cXQrv7cMOoQAZ370Bodx/6dvHGw80FrTU7jp3mo63JLNuewvtbjjEqpBNzxwZz1dAAPNyctxtgbmEJq2LTWBOfTmCndkwbEsDE/p0b3DVSa82+E3l8tecEn8amkn7mHH+8vD/3TOpb74B97+S+FJWW8/L3B3F3bcPfrx2CUrYL+jlnS/gkNpXvEk5y1dBu3BoRYtPXF/UjKR0bKCwp4+kvEvh6zwnCgjoyvq8/E/r706ezt83/uJNO5XPHu9vIyCti4U0jmDakW9Vz7285yl8/38vq+6IYFtixUef5bGcaDyyP49aIYCL7+BMa4EOwr6dVAeV0QTGfxKby0dZkDmcW0NHTjetHBnLL2GD6dPZuVLksZeUXcfBUPseyCpg8oAtdfDxs9tq2sCv1DEt+TmZ1fDrnSsoY2K09J3ILOXO2hHZuLkwe2JlpQwKYMrAL3nWk4bTW7ErN4as9J/h6z3GOZp1FKRjd05cHL+3PuD5+DS6n1pp/fbOf19cfYl5kT56cHtqov1utNXEpZ1iyNZk18ekUlZbTzceDE7mFzBrRg2dnDW3ycQAFRaXsO5FbkXbMpb2HK49fOajZ7mC01mw/epoVMSn8uD+DKQM78/iVg+jkdXGbiq1Yk9KRgN9IhzPyufejnSQez+Wy0K4cPJnH0ayzAHTz8WB8P38u6efP+L7+Vt9q1+Tnw1n89oMduLko3rx99Hm9L8DUJMf+43tmDO/O/90wrMHn0VozbeFPaDRfL5jQ4H8SrTVbDmexZGsy3+w5QWm5JqK3LyOCO+Hn5Y6vlzudvNyrfvb1cr8o/6+15nhOIUmn8kk6lc/BU/kcOpXPwVN5nD5bUrVfWFBHVswfh5uLfbsZFhSVsjo+nSVbj7EnLZd2bi7MDOvOLWODGRbYkZKycrYezuarPcf5Zu9JMvOLcHdtw4R+/kwbEsBlg7rSwdMNMG0lO5JP89XuE3yz9wRpZ87h0kYR2cePaUO6cXloNzq3b9zfVCWtNX//MpG3Nh3h7gm9efzKgfUO+vlFpXy2M42PtiaTcDwXT3cXrh3Rg1vGBBMa4MOrPybx0roDDOjanv/9ahQhfl42KfupvMKqwJ5wPJfE9FyOZBVQGdrae7iSV1jKE1cN5O4JfWxyzpqknznHJztSWRmbyrGss3i5uzC2tx8bD2Tg086NJ64axPUjezTJXY4E/Cb2xa50HvtkN64uipduCmPygC4ApGSf5aeDmWxKymBzUhY550xgCg3wMcG/nz8jgjvVWauztGpnKo+s3EWwryfv3jGGIF/Pavd77JNdfB6Xzs9PTKVDO7cGXdcP+07y63djeHH2cK4bGdig17jQqbxCVsSk8smOVJKzz1aliS7k4dYGP6+2dPJyo41SHM4oIN9iOoCOnm706+JN3y7t6dvFm35dvEk/c47HPt3NPZP68Mi0gTYpb30lHs/lo63JrNqZRn5RKQO7tWfu2GBmjuiBj0f170NZuUmDfbXnON/sOUF6TiGubRTj+vgR2MmTdYknycgrwt2lDZf082fakG5cFtq12p43tqC15snVe3l/yzF8vdwJ9vUkxM+TYN9fvkL8vOjSvu15lYA9aTl8tC2Zz3emUVBcxqAAH3PtYd1pf8G1r99/igXL4ijXmoU3hTF1UNcGlfXnw1m8vekIO1POkJFXVLU9yLcdoQE+JuUY4MPgHj508/HgniWxfJdwko/nj2NkcKeG/YJqUFhSxjd7T7ByRyqbkjLRGiJ6+3LjqCCuHNoNT3dX9p3I5U+r9rDj2Gkievvy92uH0reL7e52QQJ+kykqLePZLxN5b8sxRgZ35NVbRtK9Y7tq9y0r1+xJy2FTUiY/Hcxgx7HTlJRplIKefl6EBvgQ2t18DQ7wuSgtobXmlR+SePG7A0T09uV/t4ZX1QCrszs1h+mvbuJvMwZze2TPBl3f7EVbSDtzjvUPT2qSGrPWmtzCUk4XFJNVUEx2QTHZBUVkF5SQXVBUta2sXNOnszd9KgJ73y7e+Hm5V1s7euyTXSyPSWHJnWOJ7Otv8zJXKi/XnMorIjn7LMeyCkjOPsvmpExik8/g7tqGa4YFMHdsMCODO9WrFqe1Jj41h6/2HOfrPSc4mVvI5AFdmDakG1MGdrkocDaV8nLN8pgUdqXmkJxtri/t9DksP5/burYhyNeTEF9PMguKiU85Q1vXNkwf3p25Y4MJC+pY67WnZJ/ltx/sIOF4LvdP7ccDU/tZdRepteang5m8+kMS245m4+/tzoT+nRncvQODu/swKMCnxkpOzrkSrn75J7SGtfdfUuv/kDUq368VMSmsjk8nr7CUHh3bcf2oQG4YGUiw38UVssrf7T/XJnKupIzfTezDPZP72qx9SwJ+E0jJPsu9H8WyKzWHuy7pxSPTBtYrKBYUlbLtaDZ7UnOqbkGTs89WPe/v3dZ8AAT4MLi7DxsOZLByRyrXjejBc9cPs2pk5MxXN3GupIxvHphQ71vHmKPZ3LBoC09ND2VeVK96HWtPZ4tLmf7KJvIKS/n6gQn4NjJXeiKnkITjORzLOkty9lmSs85yLPssKdlnKbIY4NZGQb8u7bkxPJDrRwbaJEertUZrWkyPmZKyctLPnOOYxe/AfNido42CG0YFct2IwHoF0cKSMv60ag+fxKYyaUBnFt4UVuOdi9aa7xNP8cqPScSnnKGbjwfzJ/ZmzpjgegXLncmnuXHRFi4d1JXXbx3Z4LRKaVk5D34cz5r4dDzc2nDlkABuHBVIRG8/q96zjLwi/vFlAp/FpdPTz5O/XzuU8f0aX0mRgG9j3+49wR9WxKOA528czuWDu9V5jDVyC0tItMhBJqTncvBUHiVl5r154NJ+LJjaz+o/0I+3p/DIJ7tYMX8co63oj23pN+9tJzb5DJsfndLqJthKSM/l2tc2c0k/f968PbzB/9Df7j3BvR/FVv3+Pd1dLFIangT7eZmffT3p0amd3dsNWqvK8QN/W7OXbh08eH3uKIb06FD1fHm55uu9J3jlhyQSj+cS2Kkd90zqy/WjetDWtWF/m4s3HuLZtft4ZuZgfjWuZ72PLy0rZ8GyOL7cfZwFU/tx5yW9akzZ1WXTwUz+/NlujmadZWZYd/58dWij2mQk4NtISVk5//fVPt7cdIRhgR147ZaRNebQbaWotIykU/lozXn/BNY4W1zK2Ge/Z+rALiycM8Lq4/afyOOKhRt56LL+3D+1X32L3CK8s/kIf1uT0OA7lC93HWfBsp0M7tGBv14ziBA/rxrTSMI2YpNPc8+HsZw+W8yzs4YyM6w7X+w6zqs/JpF0Kp/e/l7cM7kvM8O6N/rDtbxc8+v3thOdlMWqeyMZ3N36/y3LYP+nqwZx14TejSoLmDud/64/xKL1h/Bwa8OjVw7k5tHBDbq7k4BvA+lnznHfR7HEJp9hXmRPHr9qYINrF83pqdV7+WhrMj8/MdXq9MZDy+P4eu8Joh+b0mQNg01Na82d78Ww6WAmn90bRWh36wd/fbYzjYc+jmNkcCfeuWN0s+XNhRlbct9Hsfx8OBt/77Zk5hcxoGt77p3Sl6uHBth0FHBWfhFXvfwTXu6urP79eKs6TzRFsLeUdCqfP3+2m/QzhXz74IQG5fWtCfhyL1qLotIyrvtvNAdO5vPaLSN5asbgVhHsAW4ZG0xxWTkrYlLq3hlIPX2Wz+PTuWVMcKsN9gBKKf59wzA6errx+6WxnC22bsGPj2NSePDjOMb08uW9X4+RYN/M/L3b8uGdY/ndpD708vdk0a2j+GrBJcwY3t3mUz74ebflP3NGcDSrgL98toe6Kr1NHewB+nbxZuldEayYP65JBylKwK/FdwknOZFbyCs3j+DqYQH2Lk699O/anjE9ffloWzLlNXSBtPTmT0doo+DOS1pPQ21N/Lzb8tJNYRzOLOCZL+qenHXJ1mM8snIX4/v68868MU0+F5GonqtLGx6dNpAV8yOZNqRbkzZaR/T24/6p/Vi1M42VO1Jr3K85gn0lpRRdm3jwoAT8WizfnkL3Dh5M6N86F2SZGxHMsayzbD6UWet+WflFLNuezKwRPQjoUH330tYmqq8/8yf2Yem2FL7cdbzG/d7dfIQ/rdrDlIFdeOO28FbXUC0a7vdT+hHR25e/fr6XpFN5Fz3fnMG+uUjAr0HamXNsSsrkhvCgVjuL4LQh3fD1cmfJz8m17vde9FGKSsubfBRic3vosv4MD+rIY5/uIvX02YueX7zxEE+tSeDy0K4sunWUU8/344xc2ij+M2cEnu4u3Ltk53mrgDlisAcJ+DVaGZOK1nDjKNuMNLWHtq4u3DgqkO8ST3Iyt7DaffKLSnlvyzEuD+1q85F/9ubm0oaX54ShNSxYdv6876/+cJBn1+7j6mEBvDZ3pKz85KS6+njwwuzh7D+Zx9/WmPSfowZ7kIBfrfJyzYodKUT19Wvy7pdN7ZaxwZSVa5Zvr77xdtm2ZHLOlTB/omPV7iuF+Hnxj1lD2HHsNC//kITWmhe/O8Dz3x5g1oge/OemMOlH7+QmDejCbyf2Zum2ZD7bmeawwR5keuRqbTmcRerpczx8xYC6d27hQvy8uKSfP0u3JXPPpD64WgS3otIy3vjpMON6+zHCxvOLtCQzw3qw4UAGr/5wkEMZ+Xy56zizwwP553XDWm26TtjWHy8fwLYj2TywPA7AIYM9SA2/Wsu3p+Dj4coVNhpJa29zx4ZwPKeQ9fszztv++c50TuYW8btJjlm7t/T0zCEE+3ry5a7j3BoRzHMS7IUFk/4bQf+u3vzlmlCHDPYgNfyL5Jwt4eu9J5gzOshhGvGmDupCV5+2LNl6jEtDzeyEZeWaRRsPMbi7mcHT0Xm3deXdO8aw7Wg2N44KlJGz4iJBvp58++BEexejSUkN/wKfx6dRXFrO7PAgexfFZtxc2nDT6GDWH8ggpWKitu8STnA4o4DfTerjNMGvp78Xs8ODnOZ6hbiQBPwLLN+ewuDuPvWev6almzM6CAUs256M1prX1x8ixM+TK4e0rgFlQoiGk4BvYU+ambLYkWr3lbp3bMeUgV1Zvj2VjQcziU/N4bcT+kgeWwgnIgHfwoqYFNxd23BtWA97F6VJzI0IJjO/iAeXx9G5fVuuG+mY1ymEqJ4E/AqFJWV8FpfOtMHdGr0aTks1oV9nAju1I7ugmDvH93KYRmkhhHUk4Ff4Zu8Jcs6VcNNox0vnVHJpo7jrkt509WnLLWOD7V0cIUQzk26ZFVbEpBLYqR3jevvZuyhN6vbIntw2LkR6qgjhhKSGj1mndlNSJjeOCmox64g2JQn2QjgnqwK+UmqaUmq/UipJKfVYNc8/rJSKq/jao5QqU0r5WjzvopTaqZT6wpaFt5UVO1JRCm4Ib70TpQkhRF3qDPhKKRfgNeBKIBS4WSkVarmP1vrfWuswrXUY8DiwQWudbbHLAiDRZqW2obJyzcqYFMb39adHR8eYC14IIapjTQ1/DJCktT6stS4GlgEza9n/ZmBp5QOlVCBwNfBmYwraVDYnZZKeU+jQjbVCCAHWBfwegOXcuqkV2y6ilPIEpgGfWGxeCDwClFd3jMWxdyulYpRSMRkZGbXtalPLY1Lo6OnGZRVzzAghhKOyJuBX18JX0yKp04HNlekcpdQ1wCmt9Y66TqK1Xqy1Dtdah3fu3DxLCp4uKOa7vSe5NqxHq1mcXAghGsqagJ8KWOY7AoH0Gvadg0U6B4gCZiiljmJSQVOUUh82oJxN4rO4NIrLyiWdI4RwCtYE/O1AP6VUL6WUOyaor75wJ6VUB2Ai8HnlNq3141rrQK11z4rjftBa32qTklfDck3KumhtVoEaFtiBQQE+TVUkIYRoMeoceKW1LlVK3Qd8A7gAb2ut9yql5lc8v6hi11nAt1rrgiYrbS3KyzWT/r2egI4eTB3YhSkDuzIooH2Nfc73pOWy70Qez1w7pJlLKoQQ9qG0rikdbz/h4eE6JiamXscUlpSxeONhvk88SXxqDgABHTyYMrALUwd1IbKP/3lzx/z5s92siEll258upUM7x5w7RwjhPJRSO7TW4bXt4zBTK3i4uXD/1H7cP7Ufp/IKWb8vg+/3nWTVzjSWbE3Gw60N4/v6M2VgVyL7+PF5XDpXDQ2QYC+EcBoOE/AtdWnvwezRQcweHURRaRlbD2fzw75TrEs8ybrEU1X73Sgja4UQTsQhA76ltq4uTOjfmQn9O/Pk9FAOnsrn+8RT5BaWENHLsSdKE0IISw4f8C0ppejftT39u7a3d1GEEKLZyWyZQgjhJCTgCyGEk5CAL4QQTkICvhBCOAkJ+EII4SQk4AshhJOQgC+EEE5CAr4QQjiJFjl5mlIqAzjWwMP9gUwbFsfeHO16wPGuydGuBxzvmhzteuDiawrRWte6elSLDPiNoZSKqWvGuNbE0a4HHO+aHO16wPGuydGuBxp2TZLSEUIIJyEBXwghnIQjBvzF9i6AjTna9YDjXZOjXQ843jU52vVAA67J4XL4QgghqueINXwhhBDVkIAvhBBOwmECvlJqmlJqv1IqSSn1mL3LYwtKqaNKqd1KqTilVP1WdW8BlFJvK6VOKaX2WGzzVUp9p5Q6WPG9kz3LWF81XNNTSqm0ivcpTil1lT3LWB9KqSCl1I9KqUSl1F6l1IKK7a32farlmlrl+6SU8lBKbVNKxVdcz98qttf7PXKIHL5SygU4AFwGpALbgZu11gl2LVgjKaWOAuFa61Y5YEQpNQHIB97XWg+p2PYvIFtr/VzFB3MnrfWj9ixnfdRwTU8B+Vrr5+1ZtoZQSgUAAVrrWKVUe2AHcC0wj1b6PtVyTbNphe+TUkoBXlrrfKWUG7AJWABcRz3fI0ep4Y8BkrTWh7XWxcAyYKady+T0tNYbgewLNs8E3qv4+T3MP2KrUcM1tVpa6+Na69iKn/OARKAHrfh9quWaWiVt5Fc8dKv40jTgPXKUgN8DSLF4nEorfoMtaOBbpdQOpdTd9i6MjXTVWh8H848JdLFzeWzlPqXUroqUT6tJf1hSSvUERgBbcZD36YJrglb6PimlXJRSccAp4DutdYPeI0cJ+Kqaba0/VwVRWuuRwJXAvRXpBNHyvA70AcKA48ALdi1NAyilvIFPgAe01rn2Lo8tVHNNrfZ90lqXaa3DgEBgjFJqSENex1ECfioQZPE4EEi3U1lsRmudXvH9FLAKk7pq7U5W5Fgrc62n7FyeRtNan6z4hywH3qCVvU8VeeFPgCVa608rNrfq96m6a2rt7xOA1voMsB6YRgPeI0cJ+NuBfkqpXkopd2AOsNrOZWoUpZRXRYMTSikv4HJgT+1HtQqrgdsrfr4d+NyOZbGJyn+6CrNoRe9TRYPgW0Ci1vpFi6da7ftU0zW11vdJKdVZKdWx4ud2wKXAPhrwHjlELx2Aii5WCwEX4G2t9T/sW6LGUUr1xtTqAVyBj1rbNSmllgKTMNO4ngSeBD4DPgaCgWTgRq11q2kEreGaJmHSBBo4Cvy2Mrfa0imlxgM/AbuB8orNT2By3q3yfarlmm6mFb5PSqlhmEZZF0wl/WOt9dNKKT/q+R45TMAXQghRO0dJ6QghhKiDBHwhhHASEvCFEMJJSMAXQggnIQFfCCGchAR8IYRwEhLwhRDCSfw/R9DbM824OPcAAAAASUVORK5CYII=\n",
579 |       "text/plain": [
580 |        "<Figure size 432x288 with 1 Axes>"
581 |       ]
582 |      },
583 |      "metadata": {
584 |       "needs_background": "light"
585 |      },
586 |      "output_type": "display_data"
587 |     }
588 |    ],
589 |    "source": [
590 |     "plt.plot(countries_wiki_history.history['val_accuracy'], label='Countries Wiki')\n",
591 |     "plt.plot(glove_history.history['val_accuracy'], label='All Wiki')\n",
592 |     "plt.legend()"
593 |    ]
594 |   },
595 |   {
596 |    "cell_type": "code",
597 |    "execution_count": null,
598 |    "id": "visible-universe",
599 |    "metadata": {},
600 |    "outputs": [],
601 |    "source": []
602 |   }
603 |  ],
604 |  "metadata": {
605 |   "kernelspec": {
606 |    "display_name": "Python 3",
607 |    "language": "python",
608 |    "name": "python3"
609 |   },
610 |   "language_info": {
611 |    "codemirror_mode": {
612 |     "name": "ipython",
613 |     "version": 3
614 |    },
615 |    "file_extension": ".py",
616 |    "mimetype": "text/x-python",
617 |    "name": "python",
618 |    "nbconvert_exporter": "python",
619 |    "pygments_lexer": "ipython3",
620 |    "version": "3.7.7"
621 |   }
622 |  },
623 |  "nbformat": 4,
624 |  "nbformat_minor": 5
625 | }
626 | 


--------------------------------------------------------------------------------