├── 01_installation.md
├── 02_tensor_basics.py
├── 03_nn.py
├── 04_regression.ipynb
├── 05_cnn.py
├── 06_save_load.py
├── 07_Functional_API_Project.ipynb
├── 07_functional_API.py
├── 08_09_Star_Wars_Project.ipynb
├── 10_rnn.py
├── 11_NLP.ipynb
├── README.md
└── slides
    ├── CNN.pdf
    ├── NeuralNet_TF.pdf
    └── RNN.pdf


/01_installation.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | https://www.tensorflow.org/install
 4 | 
 5 | ### 0. Optional: GPU Support
 6 | 
 7 | https://www.tensorflow.org/install/gpu
 8 | 
 9 | 1. Install Visual Studion 2017 Free Version
10 |    and C++ Redistributable:
11 | 
12 |    - [https://www.techspot.com/downloads/6278-visual-studio.html](https://www.techspot.com/downloads/6278-visual-studio.html)
13 |    - [https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads)
14 | 
15 | 2. Install CUDA Toolkit 10.1
16 | [https://developer.nvidia.com/cuda-toolkit-archive](https://developer.nvidia.com/cuda-toolkit-archive)
17 | 
18 | 3. Install NVIDIA cuDNN version 7
19 | [https://developer.nvidia.com/cudnn](https://developer.nvidia.com/cudnn)
20 | 
21 | Download (you need an account) and copy the dll, include, and lib files in the corresponding directories of the CUDA Toolkit installation directory
22 | 
23 | 4. On Windows: Modify environment variables (paths must match your installation directory):
24 | - Add variable
25 | ```
26 | CUDA_PATH = C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\bin
27 | ```
28 | 
29 | Add 2 entries to PATH variable:
30 | ```
31 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\bin
32 | C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\libnvvp
33 | ```
34 | 
35 | ### 1. Installation
36 | 1. Create a virtual environment and activate it (e.g. with conda or virtualenv)
37 | 
38 | ```console
39 | conda create -n tf python=3.8
40 | conda activate tf
41 | ```
42 | 
43 | 2. Install with
44 | ```console
45 | pip install tensorflow
46 | ```
47 | 
48 | ### 2. Verification:
49 | ```python
50 | import tensorflow as tf
51 | print(tf.__version__)
52 | 
53 | # test gpu
54 | physical_devices = tf.config.list_physical_devices("GPU")
55 | print(len(physical_devices))
56 | ```


--------------------------------------------------------------------------------
/02_tensor_basics.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
  3 | 
  4 | import tensorflow as tf
  5 | 
  6 | """
  7 | Everything in TensorFlow is based on Tensor operations.
  8 | Tensors are (kind of) like np.arrays.
  9 | All tensors are immutable: you can never update the contents of a
 10 | tensor, only create a new one.
 11 | 
 12 |  - nd-arrays (1d, 2d, or even 3d and higher)
 13 |  - GPU support
 14 |  - Computational graph / Track gradients / Backpropagation
 15 |  - Immutable!
 16 | """
 17 | # 1. create tensors
 18 | # scalar, rank-0 tensor
 19 | x = tf.constant(4)
 20 | print(x)
 21 | 
 22 | x = tf.constant(4, shape=(1,1), dtype=tf.float32)
 23 | print(x)
 24 | 
 25 | # vector, rank-1
 26 | x = tf.constant([1,2,3])
 27 | print(x)
 28 | 
 29 | # matrix, rank-2
 30 | x = tf.constant([[1,2,3], [4,5,6]])
 31 | print(x)
 32 | 
 33 | x = tf.ones((3,3))
 34 | print(x)
 35 | 
 36 | x = tf.zeros((3,3))
 37 | print(x)
 38 | 
 39 | x = tf.eye(3)
 40 | print(x)
 41 | 
 42 | x = tf.random.normal((3,3), mean=0, stddev=1)
 43 | print(x)
 44 | 
 45 | x = tf.random.uniform((3,3), minval=0, maxval=1)
 46 | print(x)
 47 | 
 48 | x = tf.range(10)
 49 | print(x)
 50 | 
 51 | # 2. cast:
 52 | x = tf.cast(x, dtype=tf.float32)
 53 | print(x)
 54 | 
 55 | # 3. operations, elementwise
 56 | x = tf.constant([1,2,3])
 57 | y = tf.constant([4,5,6])
 58 | 
 59 | z = tf.add(x,y)
 60 | z = x + y
 61 | print(z)
 62 | 
 63 | z = tf.subtract(x,y)
 64 | z = x - y
 65 | print(z)
 66 | 
 67 | z = tf.divide(x,y)
 68 | z = x / y
 69 | print(z)
 70 | 
 71 | z = tf.multiply(x,y)
 72 | z = x * y
 73 | print(z)
 74 | 
 75 | # dot product
 76 | z = tf.tensordot(x,y, axes=1)
 77 | print(z)
 78 | 
 79 | # elementwise exponentiate
 80 | z = x ** 3
 81 | print(z)
 82 | 
 83 | # matrix multiplication (shapes must match: number of columns A = number of rows B)
 84 | x = tf.random.normal((2,2)) # 2,3
 85 | y = tf.random.normal((3,4)) # 3,4
 86 | 
 87 | z = tf.matmul(x,y)
 88 | z = x @ y
 89 | print(z)
 90 | 
 91 | # 4. indexing, slicing
 92 | x = tf.constant([[1,2,3,4],[5,6,7,8]])
 93 | print(x[0])
 94 | print(x[:, 0]) # all rows, column 0
 95 | print(x[1, :]) # row 1, all columns
 96 | print(x[1,1]) # element at 1, 1
 97 | 
 98 | # 5. reshape
 99 | x = tf.random.normal((2,3))
100 | print(x)
101 | x = tf.reshape(x, (3,2))
102 | print(x)
103 | 
104 | x = tf.reshape(x, (-1,2))
105 | print(x)
106 | 
107 | x = tf.reshape(x, (6))
108 | print(x)
109 | 
110 | # 6. numpy
111 | x = x.numpy()
112 | print(type(x))
113 | 
114 | x = tf.convert_to_tensor(x)
115 | print(type(x))
116 | # -> eager tensor = evaluates operations immediately
117 | # without building graphs
118 | 
119 | # string tensor
120 | x = tf.constant("Patrick")
121 | print(x)
122 | 
123 | x = tf.constant(["Patrick", "Max", "Mary"])
124 | print(x)
125 | 
126 | # Variable
127 | # A tf.Variable represents a tensor whose value can be
128 | # changed by running ops on it
129 | # Used to represent shared, persistent state your program manipulates
130 | # Higher level libraries like tf.keras use tf.Variable to store model parameters.
131 | b = tf.Variable([[1.0, 2.0, 3.0]])
132 | print(b)
133 | print(type(b))
134 | 


--------------------------------------------------------------------------------
/03_nn.py:
--------------------------------------------------------------------------------
 1 | # First Neural Net
 2 | # Train, evaluate, and predict with the model
 3 | import os
 4 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 5 | 
 6 | import tensorflow as tf
 7 | from tensorflow import keras
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | 
11 | mnist = keras.datasets.mnist
12 | 
13 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
14 | print(x_train.shape, y_train.shape)
15 | 
16 | # normalize: 0,255 -> 0,1
17 | x_train, x_test = x_train / 255.0, x_test / 255.0
18 | 
19 | # model
20 | model = keras.models.Sequential([
21 |     keras.layers.Flatten(input_shape=(28,28)),
22 |     keras.layers.Dense(128, activation='relu'),
23 |     keras.layers.Dense(10),
24 | ])
25 | 
26 | print(model.summary())
27 | 
28 | # another way to build the Sequential model:
29 | #model = keras.models.Sequential()
30 | #model.add(keras.layers.Flatten(input_shape=(28,28))
31 | #model.add(keras.layers.Dense(128, activation='relu'))
32 | #model.add(keras.layers.Dense(10))
33 | 
34 | # loss and optimizer
35 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
36 | optim = keras.optimizers.Adam(lr=0.001)
37 | metrics = ["accuracy"]
38 | 
39 | model.compile(loss=loss, optimizer=optim, metrics=metrics)
40 | 
41 | # training
42 | batch_size = 64
43 | epochs = 5
44 | 
45 | model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, shuffle=True, verbose=2)
46 | 
47 | # evaulate
48 | model.evaluate(x_test, y_test, batch_size=batch_size, verbose=2)
49 | 
50 | # predictions
51 | 
52 | # 1. option: build new model with Softmax layer
53 | probability_model = keras.models.Sequential([
54 |     model,
55 |     keras.layers.Softmax()
56 | ])
57 | 
58 | predictions = probability_model(x_test)
59 | pred0 = predictions[0]
60 | print(pred0)
61 | 
62 | # use np.argmax to get label with highest probability
63 | label0 = np.argmax(pred0)
64 | print(label0)
65 | 
66 | # 2. option: original model + nn.softmax, call model(x)
67 | predictions = model(x_test)
68 | predictions = tf.nn.softmax(predictions)
69 | pred0 = predictions[0]
70 | print(pred0)
71 | label0 = np.argmax(pred0)
72 | print(label0)
73 | 
74 | # 3. option: original model + nn.softmax, call model.predict(x)
75 | predictions = model.predict(x_test, batch_size=batch_size)
76 | predictions = tf.nn.softmax(predictions)
77 | pred0 = predictions[0]
78 | print(pred0)
79 | label0 = np.argmax(pred0)
80 | print(label0)
81 | 
82 | # call argmax for multiple labels
83 | pred05s = predictions[0:5]
84 | print(pred05s.shape)
85 | label05s = np.argmax(pred05s, axis=1)
86 | print(label05s)
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/05_cnn.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 3 | 
 4 | import tensorflow as tf
 5 | from tensorflow import keras
 6 | from tensorflow.keras import layers
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | 
10 | cifar10 = keras.datasets.cifar10
11 | 
12 | (train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
13 | 
14 | print(train_images.shape) # 50000, 32, 32, 3
15 | 
16 | # Normalize: 0,255 -> 0,1
17 | train_images, test_images = train_images / 255.0, test_images / 255.0
18 | 
19 | class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
20 |                'dog', 'frog', 'horse', 'ship', 'truck']
21 | 
22 | def show():
23 |     plt.figure(figsize=(10,10))
24 |     for i in range(16):
25 |         plt.subplot(4,4,i+1)
26 |         plt.xticks([])
27 |         plt.yticks([])
28 |         plt.grid(False)
29 |         plt.imshow(train_images[i], cmap=plt.cm.binary)
30 |         # The CIFAR labels happen to be arrays, 
31 |         # which is why you need the extra index
32 |         plt.xlabel(class_names[train_labels[i][0]])
33 |     plt.show()
34 | 
35 | show()
36 | 
37 | # model...
38 | model = keras.models.Sequential()
39 | model.add(layers.Conv2D(32, (3,3), strides=(1,1), padding="valid", activation='relu', input_shape=(32,32,3)))
40 | model.add(layers.MaxPool2D((2,2)))
41 | model.add(layers.Conv2D(32, 3, activation='relu'))
42 | model.add(layers.MaxPool2D((2,2)))
43 | model.add(layers.Flatten())
44 | model.add(layers.Dense(64, activation='relu'))
45 | model.add(layers.Dense(10))
46 | print(model.summary())
47 | #import sys; sys.exit()
48 | 
49 | # loss and optimizer
50 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
51 | optim = keras.optimizers.Adam(lr=0.001)
52 | metrics = ["accuracy"]
53 | 
54 | model.compile(optimizer=optim, loss=loss, metrics=metrics)
55 | 
56 | # training
57 | batch_size = 64
58 | epochs = 5
59 | 
60 | model.fit(train_images, train_labels, epochs=epochs,
61 |           batch_size=batch_size, verbose=2)
62 | 
63 | # evaulate
64 | model.evaluate(test_images,  test_labels, batch_size=batch_size, verbose=2)
65 | 


--------------------------------------------------------------------------------
/06_save_load.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 3 | 
 4 | import tensorflow as tf
 5 | from tensorflow import keras
 6 | import numpy as np
 7 | 
 8 | mnist = keras.datasets.mnist
 9 | 
10 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
11 | 
12 | # normalize
13 | x_train, x_test = x_train / 255.0, x_test / 255.0
14 | 
15 | 
16 | # Feed forward neural network
17 | model = keras.models.Sequential([
18 |     keras.layers.Flatten(input_shape=(28,28)),
19 |     keras.layers.Dense(128, activation='relu'),
20 |     keras.layers.Dense(10),
21 | ])
22 | 
23 | # config
24 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
25 | optim = keras.optimizers.Adam(lr=0.001) # "adam"
26 | metrics = [keras.metrics.SparseCategoricalAccuracy()] # "accuracy"
27 | 
28 | # compile
29 | model.compile(loss=loss, optimizer=optim, metrics=metrics)
30 | 
31 | # fit/training
32 | model.fit(x_train, y_train, batch_size=64, epochs=5, shuffle=True, verbose=2)
33 | 
34 | print("Evaluate:")
35 | model.evaluate(x_test,  y_test, verbose=2)
36 | 
37 | # 1) Save whole model
38 | # two formats: SavedModel or HDF5
39 | model.save("nn")  # no file ending = SavedModel
40 | model.save("nn.h5")  # .h5 = HDF5
41 | 
42 | new_model = keras.models.load_model("nn.h5")
43 | 
44 | # 2) save only weights
45 | model.save_weights("nn_weights.h5")
46 | 
47 | # initilaize model first:
48 | # model = keras.Sequential([...])
49 | model.load_weights("nn_weights.h5")
50 | 
51 | # 3) save only architecture, to_json
52 | json_string = model.to_json()
53 | 
54 | with open("nn_model.json", "w") as f:
55 |     f.write(json_string)
56 | 
57 | with open("nn_model.json", "r") as f:
58 |     loaded_json_string = f.read()
59 | 
60 | new_model = keras.models.model_from_json(loaded_json_string)
61 | print(new_model.summary())
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/07_Functional_API_Project.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 12,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'\n",
 11 |     "\n",
 12 |     "import tensorflow as tf\n",
 13 |     "from tensorflow import keras\n",
 14 |     "import numpy as np"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 13,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Define Functional Model\n",
 24 |     "inputs = keras.Input(shape=(28,28))\n",
 25 |     "flatten = keras.layers.Flatten()\n",
 26 |     "dense1 = keras.layers.Dense(128, activation='relu')\n",
 27 |     "\n",
 28 |     "dense2 = keras.layers.Dense(10, activation='softmax', name=\"category_output\")\n",
 29 |     "dense3 = keras.layers.Dense(1, activation='sigmoid', name=\"leftright_output\")"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 14,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "x = flatten(inputs)\n",
 39 |     "x = dense1(x)\n",
 40 |     "outputs1 = dense2(x)\n",
 41 |     "outputs2 = dense3(x)\n",
 42 |     "\n",
 43 |     "model = keras.Model(inputs=inputs, outputs=[outputs1, outputs2], name=\"mnist_model\")"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 15,
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "name": "stdout",
 53 |      "output_type": "stream",
 54 |      "text": [
 55 |       "Model: \"mnist_model\"\n",
 56 |       "__________________________________________________________________________________________________\n",
 57 |       "Layer (type)                    Output Shape         Param #     Connected to                     \n",
 58 |       "==================================================================================================\n",
 59 |       "input_2 (InputLayer)            [(None, 28, 28)]     0                                            \n",
 60 |       "__________________________________________________________________________________________________\n",
 61 |       "flatten_1 (Flatten)             (None, 784)          0           input_2[0][0]                    \n",
 62 |       "__________________________________________________________________________________________________\n",
 63 |       "dense_1 (Dense)                 (None, 128)          100480      flatten_1[0][0]                  \n",
 64 |       "__________________________________________________________________________________________________\n",
 65 |       "category_output (Dense)         (None, 10)           1290        dense_1[0][0]                    \n",
 66 |       "__________________________________________________________________________________________________\n",
 67 |       "leftright_output (Dense)        (None, 1)            129         dense_1[0][0]                    \n",
 68 |       "==================================================================================================\n",
 69 |       "Total params: 101,899\n",
 70 |       "Trainable params: 101,899\n",
 71 |       "Non-trainable params: 0\n",
 72 |       "__________________________________________________________________________________________________\n"
 73 |      ]
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "model.summary()"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 16,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "# loss and optimizer\n",
 87 |     "loss1 = keras.losses.SparseCategoricalCrossentropy(from_logits=False)\n",
 88 |     "loss2 = keras.losses.BinaryCrossentropy(from_logits=False)\n",
 89 |     "optim = keras.optimizers.Adam(lr=0.001)\n",
 90 |     "metrics = [\"accuracy\"]\n",
 91 |     "\n",
 92 |     "losses = {\n",
 93 |     "    \"category_output\": loss1,\n",
 94 |     "    \"leftright_output\": loss2,\n",
 95 |     "}\n",
 96 |     "\n",
 97 |     "model.compile(loss=losses, optimizer=optim, metrics=metrics)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 17,
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "name": "stdout",
107 |      "output_type": "stream",
108 |      "text": [
109 |       "uint8 [5 0 4 1 9 2 1 3 1 4 3 5 3 6 1 7 2 8 6 9]\n",
110 |       "uint8 [0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 1 1 1]\n"
111 |      ]
112 |     }
113 |    ],
114 |    "source": [
115 |     "# create data with 2 labels\n",
116 |     "mnist = keras.datasets.mnist\n",
117 |     "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
118 |     "x_train, x_test = x_train / 255.0, x_test / 255.0\n",
119 |     "\n",
120 |     "# 0=left, 1=right\n",
121 |     "y_leftright = np.zeros(y_train.shape, dtype=np.uint8)\n",
122 |     "for idx, y in enumerate(y_train):\n",
123 |     "    if y > 5:\n",
124 |     "        y_leftright[idx] = 1\n",
125 |     "\n",
126 |     "print(y_train.dtype, y_train[0:20])\n",
127 |     "print(y_leftright.dtype, y_leftright[0:20])\n",
128 |     "\n",
129 |     "y= {\"category_output\": y_train,\n",
130 |     "    \"leftright_output\": y_leftright }"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 18,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "name": "stdout",
140 |      "output_type": "stream",
141 |      "text": [
142 |       "Epoch 1/5\n",
143 |       "938/938 - 2s - loss: 0.4943 - category_output_loss: 0.3031 - leftright_output_loss: 0.1912 - category_output_accuracy: 0.9149 - leftright_output_accuracy: 0.9284\n",
144 |       "Epoch 2/5\n",
145 |       "938/938 - 2s - loss: 0.2341 - category_output_loss: 0.1377 - leftright_output_loss: 0.0964 - category_output_accuracy: 0.9601 - leftright_output_accuracy: 0.9673\n",
146 |       "Epoch 3/5\n",
147 |       "938/938 - 2s - loss: 0.1747 - category_output_loss: 0.0988 - leftright_output_loss: 0.0760 - category_output_accuracy: 0.9704 - leftright_output_accuracy: 0.9744\n",
148 |       "Epoch 4/5\n",
149 |       "938/938 - 2s - loss: 0.1410 - category_output_loss: 0.0773 - leftright_output_loss: 0.0637 - category_output_accuracy: 0.9767 - leftright_output_accuracy: 0.9786\n",
150 |       "Epoch 5/5\n",
151 |       "938/938 - 2s - loss: 0.1161 - category_output_loss: 0.0618 - leftright_output_loss: 0.0542 - category_output_accuracy: 0.9818 - leftright_output_accuracy: 0.9819\n"
152 |      ]
153 |     },
154 |     {
155 |      "data": {
156 |       "text/plain": [
157 |        "<tensorflow.python.keras.callbacks.History at 0x7f8ca6d2bb20>"
158 |       ]
159 |      },
160 |      "execution_count": 18,
161 |      "metadata": {},
162 |      "output_type": "execute_result"
163 |     }
164 |    ],
165 |    "source": [
166 |     "# training\n",
167 |     "model.fit(x_train, y=y, epochs=5,\n",
168 |     "          batch_size=64, verbose=2)"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 19,
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "data": {
178 |       "text/plain": [
179 |        "2"
180 |       ]
181 |      },
182 |      "execution_count": 19,
183 |      "metadata": {},
184 |      "output_type": "execute_result"
185 |     }
186 |    ],
187 |    "source": [
188 |     "# list with 2 predictions\n",
189 |     "predictions = model.predict(x_test)\n",
190 |     "len(predictions)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 20,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "prediction_category = predictions[0]\n",
200 |     "prediction_lr = predictions[1]\n",
201 |     "\n",
202 |     "pr_cat = prediction_category[0:20]\n",
203 |     "prediction_lr = prediction_lr[0:20]\n",
204 |     "\n",
205 |     "labels_cat = np.argmax(pr_cat, axis=1)\n",
206 |     "labels_lr = np.array([1 if p >= 0.5 else 0 for p in prediction_lr])"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 21,
212 |    "metadata": {},
213 |    "outputs": [
214 |     {
215 |      "name": "stdout",
216 |      "output_type": "stream",
217 |      "text": [
218 |       "[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]\n",
219 |       "[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]\n",
220 |       "[1 0 0 0 0 0 0 1 1 1 0 1 1 0 0 0 1 1 0 0]\n"
221 |      ]
222 |     }
223 |    ],
224 |    "source": [
225 |     "print(y_test[0:20])\n",
226 |     "print(labels_cat)\n",
227 |     "print(labels_lr)"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": []
236 |   }
237 |  ],
238 |  "metadata": {
239 |   "kernelspec": {
240 |    "display_name": "tf",
241 |    "language": "python",
242 |    "name": "tf"
243 |   },
244 |   "language_info": {
245 |    "codemirror_mode": {
246 |     "name": "ipython",
247 |     "version": 3
248 |    },
249 |    "file_extension": ".py",
250 |    "mimetype": "text/x-python",
251 |    "name": "python",
252 |    "nbconvert_exporter": "python",
253 |    "pygments_lexer": "ipython3",
254 |    "version": "3.8.5"
255 |   }
256 |  },
257 |  "nbformat": 4,
258 |  "nbformat_minor": 4
259 | }
260 | 


--------------------------------------------------------------------------------
/07_functional_API.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 3 | 
 4 | import tensorflow as tf
 5 | from tensorflow import keras
 6 | import numpy as np
 7 | 
 8 | #  a           a             a     b          a
 9 | #  |           |              \    /        /   \
10 | #  b           b                c          b     c
11 | #  |          /  \              |          \     /
12 | #  c         c    d             d             d
13 | 
14 | # model: Sequential: one input, one output
15 | model = keras.models.Sequential([
16 |     keras.layers.Flatten(input_shape=(28,28)),
17 |     keras.layers.Dense(128, activation='relu'),
18 |     keras.layers.Dense(10),
19 | ])
20 | 
21 | print(model.summary())
22 | 
23 | # create model with functional API
24 | # Advantages:
25 | #   - Models with multiple inputs and outputs
26 | #   - Shared layers
27 | #   - Extract and reuse nodes in the graph of layers
28 | #   - Model are callable like layers (put model into sequential)
29 | # start by creating an Input node
30 | inputs = keras.Input(shape=(28,28))
31 | 
32 | flatten = keras.layers.Flatten()
33 | dense1 = keras.layers.Dense(128, activation='relu')
34 | dense2 = keras.layers.Dense(10)
35 | 
36 | x = flatten(inputs)
37 | x = dense1(x)
38 | outputs = dense2(x)
39 | 
40 | # or with multiple outputs
41 | #dense2_2 = keras.layers.Dense(1)
42 | #outputs2 = dense2_2(x)
43 | #outputs = [output, outputs2]
44 | 
45 | model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
46 | 
47 | print(model.summary())
48 | 
49 | # convert functional to sequential model
50 | # only works if the layers graph is linear.
51 | new_model = keras.models.Sequential()
52 | for layer in model.layers:
53 |     new_model.add(layer)
54 |     
55 | # convert sequential to functional
56 | inputs = keras.Input(shape=(28,28))
57 | x = new_model.layers[0](inputs)
58 | for layer in new_model.layers[1:]:
59 |     x = layer(x) 
60 | outputs = x
61 | 
62 | model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
63 | print(model.summary())
64 | 
65 | 
66 | # access inputs, outputs for model
67 | # access input + output for layer
68 | # access all layers
69 | inputs = model.inputs
70 | outputs = model.outputs
71 | print(inputs)
72 | print(outputs)
73 | 
74 | input0 = model.layers[0].input
75 | output0 = model.layers[0].output
76 | print(input0)
77 | print(output0)
78 | 
79 | # Example: Transfer Learning:
80 | base_model = keras.applications.VGG16()
81 | 
82 | x = base_model.layers[-2].output
83 | new_outputs = keras.layers.Dense(1)(x)
84 | 
85 | new_model = keras.Model(inputs=base_model.inputs, outputs=new_outputs)
86 | 
87 | 


--------------------------------------------------------------------------------
/08_09_Star_Wars_Project.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import math\n",
 11 |     "import random\n",
 12 |     "import shutil\n",
 13 |     "\n",
 14 |     "import numpy as np\n",
 15 |     "import pandas as pd\n",
 16 |     "import matplotlib.pyplot as plt\n",
 17 |     "import seaborn as sn\n",
 18 |     "\n",
 19 |     "import tensorflow as tf\n",
 20 |     "from tensorflow import keras\n",
 21 |     "from tensorflow.keras import layers"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# download the data from kaggle: \n",
 31 |     "# https://www.kaggle.com/ihelon/lego-minifigures-tensorflow-tutorial\n",
 32 |     "# move the folder into your project folder and create a backup of\n",
 33 |     "# the star-wars images at 'lego/star-wars-images/'\n",
 34 |     "BASE_DIR = 'lego/star-wars-images/'\n",
 35 |     "names = [\"YODA\", \"LUKE SKYWALKER\", \"R2-D2\", \"MACE WINDU\", \"GENERAL GRIEVOUS\"]\n",
 36 |     "\n",
 37 |     "tf.random.set_seed(1)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# Reorganize the folder structure:\n",
 47 |     "if not os.path.isdir(BASE_DIR + 'train/'):\n",
 48 |     "    for name in names:\n",
 49 |     "        os.makedirs(BASE_DIR + 'train/' + name)\n",
 50 |     "        os.makedirs(BASE_DIR + 'val/' + name)\n",
 51 |     "        os.makedirs(BASE_DIR + 'test/' + name)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "# Moce the image files\n",
 61 |     "orig_folders = [\"0001/\", \"0002/\", \"0003/\", \"0004/\", \"0005/\"]\n",
 62 |     "for folder_idx, folder in enumerate(orig_folders):\n",
 63 |     "    files = os.listdir(BASE_DIR + folder)\n",
 64 |     "    number_of_images = len([name for name in files])\n",
 65 |     "    n_train = int((number_of_images * 0.6) + 0.5)\n",
 66 |     "    n_valid = int((number_of_images*0.25) + 0.5)\n",
 67 |     "    n_test = number_of_images - n_train - n_valid\n",
 68 |     "    print(number_of_images, n_train, n_valid, n_test)\n",
 69 |     "    for idx, file in enumerate(files):\n",
 70 |     "        file_name = BASE_DIR + folder + file\n",
 71 |     "        if idx < n_train:\n",
 72 |     "            shutil.move(file_name, BASE_DIR + \"train/\" + names[folder_idx])\n",
 73 |     "        elif idx < n_train + n_valid:\n",
 74 |     "            shutil.move(file_name, BASE_DIR + \"val/\" + names[folder_idx])\n",
 75 |     "        else:\n",
 76 |     "            shutil.move(file_name, BASE_DIR + \"test/\" + names[folder_idx])"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "# Generate batches of tensor image data with\n",
 86 |     "# optional real-time data augmentation.\n",
 87 |     "\n",
 88 |     "# preprocessing_function\n",
 89 |     "# rescale=1./255 -> [0,1]\n",
 90 |     "train_gen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)\n",
 91 |     "#    rotation_range=20,\n",
 92 |     "#    horizontal_flip=True,\n",
 93 |     "#    width_shift_range=0.2, height_shift_range=0.2,\n",
 94 |     "#    shear_range=0.2, zoom_range=0.2)\n",
 95 |     "\n",
 96 |     "valid_gen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)\n",
 97 |     "test_gen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)\n",
 98 |     "\n",
 99 |     "train_batches = train_gen.flow_from_directory(\n",
100 |     "    'lego/star-wars-images/train',\n",
101 |     "    target_size=(256, 256),\n",
102 |     "    class_mode='sparse',\n",
103 |     "    batch_size=4,\n",
104 |     "    shuffle=True,\n",
105 |     "    color_mode=\"rgb\",\n",
106 |     "    classes=names   \n",
107 |     ")\n",
108 |     "\n",
109 |     "val_batches = valid_gen.flow_from_directory(\n",
110 |     "    'lego/star-wars-images/val',\n",
111 |     "    target_size=(256, 256),\n",
112 |     "    class_mode='sparse',\n",
113 |     "    batch_size=4,\n",
114 |     "    shuffle=False,\n",
115 |     "    color_mode=\"rgb\",\n",
116 |     "    classes=names\n",
117 |     ")\n",
118 |     "\n",
119 |     "test_batches = test_gen.flow_from_directory(\n",
120 |     "    'lego/star-wars-images/test',\n",
121 |     "    target_size=(256, 256),\n",
122 |     "    class_mode='sparse',\n",
123 |     "    batch_size=4,\n",
124 |     "    shuffle=False,\n",
125 |     "    color_mode=\"rgb\",\n",
126 |     "    classes=names\n",
127 |     ")"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "train_batch = train_batches[0]\n",
137 |     "print(train_batch[0].shape)\n",
138 |     "print(train_batch[1])\n",
139 |     "test_batch = test_batches[0]\n",
140 |     "print(test_batch[0].shape)\n",
141 |     "print(test_batch[1])"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "def show(batch, pred_labels=None):\n",
151 |     "    plt.figure(figsize=(10,10))\n",
152 |     "    for i in range(4):\n",
153 |     "        plt.subplot(2,2,i+1)\n",
154 |     "        plt.xticks([])\n",
155 |     "        plt.yticks([])\n",
156 |     "        plt.grid(False)\n",
157 |     "        plt.imshow(batch[0][i], cmap=plt.cm.binary)\n",
158 |     "        # The CIFAR labels happen to be arrays, \n",
159 |     "        # which is why you need the extra index\n",
160 |     "        lbl = names[int(batch[1][i])]\n",
161 |     "        if pred_labels is not None:\n",
162 |     "            lbl += \"/ Pred:\" + names[int(pred_labels[i])]\n",
163 |     "        plt.xlabel(lbl)\n",
164 |     "    plt.show()\n",
165 |     "\n",
166 |     "show(test_batch)"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "show(train_batch)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "model = keras.models.Sequential()\n",
185 |     "model.add(layers.Conv2D(32, (3,3), strides=(1,1), padding=\"valid\", activation='relu', input_shape=(256, 256,3)))\n",
186 |     "model.add(layers.MaxPool2D((2,2)))\n",
187 |     "model.add(layers.Conv2D(64, 3, activation='relu'))\n",
188 |     "model.add(layers.MaxPool2D((2,2)))\n",
189 |     "model.add(layers.Flatten())\n",
190 |     "model.add(layers.Dense(64, activation='relu'))\n",
191 |     "model.add(layers.Dense(5))\n",
192 |     "print(model.summary())"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": [
201 |     "# loss and optimizer\n",
202 |     "loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
203 |     "optim = keras.optimizers.Adam(lr=0.001)\n",
204 |     "metrics = [\"accuracy\"]\n",
205 |     "\n",
206 |     "model.compile(optimizer=optim, loss=loss, metrics=metrics)"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "# training\n",
216 |     "epochs = 30\n",
217 |     "\n",
218 |     "# callbacks\n",
219 |     "early_stopping = keras.callbacks.EarlyStopping(\n",
220 |     "    monitor=\"val_loss\",\n",
221 |     "    patience=5,\n",
222 |     "    verbose=2\n",
223 |     ")\n",
224 |     "\n",
225 |     "history = model.fit(train_batches, validation_data=val_batches,\n",
226 |     "                    callbacks=[early_stopping],\n",
227 |     "                      epochs=epochs, verbose=2)"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": [
236 |     "model.save(\"lego_model.h5\")"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "# plot loss and acc\n",
246 |     "plt.figure(figsize=(16, 6))\n",
247 |     "plt.subplot(1, 2, 1)\n",
248 |     "plt.plot(history.history['loss'], label='train loss')\n",
249 |     "plt.plot(history.history['val_loss'], label='valid loss')\n",
250 |     "plt.grid()\n",
251 |     "plt.legend(fontsize=15)\n",
252 |     "\n",
253 |     "plt.subplot(1, 2, 2)\n",
254 |     "plt.plot(history.history['accuracy'], label='train acc')\n",
255 |     "plt.plot(history.history['val_accuracy'], label='valid acc')\n",
256 |     "plt.grid()\n",
257 |     "plt.legend(fontsize=15);"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": null,
263 |    "metadata": {},
264 |    "outputs": [],
265 |    "source": [
266 |     "# evaluate on test data\n",
267 |     "model.evaluate(test_batches, verbose=2)"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "# make some predictions\n",
277 |     "predictions = model.predict(test_batches)\n",
278 |     "predictions = tf.nn.softmax(predictions)\n",
279 |     "labels = np.argmax(predictions, axis=1)\n",
280 |     "\n",
281 |     "print(test_batches[0][1])\n",
282 |     "print(labels[0:4])"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "code",
287 |    "execution_count": null,
288 |    "metadata": {},
289 |    "outputs": [],
290 |    "source": [
291 |     "show(test_batches[0], labels[0:4])"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "markdown",
296 |    "metadata": {},
297 |    "source": [
298 |     "# Transfer Learning"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": null,
304 |    "metadata": {},
305 |    "outputs": [],
306 |    "source": [
307 |     "vgg_model = tf.keras.applications.vgg16.VGG16()\n",
308 |     "print(type(vgg_model))\n",
309 |     "vgg_model.summary()\n",
310 |     "\n",
311 |     "# try out different ones, e.g. MobileNetV2\n",
312 |     "#tl_model = tf.keras.applications.MobileNetV2()\n",
313 |     "#print(type(tl_model))\n",
314 |     "#tl_model.summary()\n"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": null,
320 |    "metadata": {},
321 |    "outputs": [],
322 |    "source": [
323 |     "# convert to Sequential model, omit the last layer\n",
324 |     "# this works with VGG16 because the structure is linear\n",
325 |     "model = keras.models.Sequential()\n",
326 |     "for layer in vgg_model.layers[0:-1]:\n",
327 |     "    model.add(layer)"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": null,
333 |    "metadata": {},
334 |    "outputs": [],
335 |    "source": [
336 |     "model.summary()"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": null,
342 |    "metadata": {},
343 |    "outputs": [],
344 |    "source": [
345 |     "# set trainable=False for all layers\n",
346 |     "# we don't want to train them again\n",
347 |     "for layer in model.layers:\n",
348 |     "    layer.trainable = False\n",
349 |     "model.summary()"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "code",
354 |    "execution_count": null,
355 |    "metadata": {},
356 |    "outputs": [],
357 |    "source": [
358 |     "# add a last classification layer for our use case with 5 classes\n",
359 |     "model.add(layers.Dense(5))"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": null,
365 |    "metadata": {},
366 |    "outputs": [],
367 |    "source": [
368 |     "# loss and optimizer\n",
369 |     "loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
370 |     "optim = keras.optimizers.Adam(lr=0.001)\n",
371 |     "metrics = [\"accuracy\"]\n",
372 |     "\n",
373 |     "model.compile(optimizer=optim, loss=loss, metrics=metrics)"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": null,
379 |    "metadata": {},
380 |    "outputs": [],
381 |    "source": [
382 |     "# get the preprocessing function of this model\n",
383 |     "preprocess_input = tf.keras.applications.vgg16.preprocess_input"
384 |    ]
385 |   },
386 |   {
387 |    "cell_type": "code",
388 |    "execution_count": null,
389 |    "metadata": {},
390 |    "outputs": [],
391 |    "source": [
392 |     "# Generate batches of tensor image data with real-time data augmentation.\n",
393 |     "\n",
394 |     "train_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)\n",
395 |     "valid_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)\n",
396 |     "test_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input)\n",
397 |     "\n",
398 |     "train_batches = train_gen.flow_from_directory(\n",
399 |     "    'lego/star-wars-images/train',\n",
400 |     "    target_size=(224, 224),\n",
401 |     "    class_mode='sparse',\n",
402 |     "    batch_size=4,\n",
403 |     "    shuffle=True,\n",
404 |     "    color_mode=\"rgb\",\n",
405 |     "    classes=names   \n",
406 |     ")\n",
407 |     "\n",
408 |     "val_batches = valid_gen.flow_from_directory(\n",
409 |     "    'lego/star-wars-images/val',\n",
410 |     "    target_size=(224, 224),\n",
411 |     "    class_mode='sparse',\n",
412 |     "    batch_size=4,\n",
413 |     "    shuffle=True,\n",
414 |     "    color_mode=\"rgb\",\n",
415 |     "    classes=names\n",
416 |     ")\n",
417 |     "\n",
418 |     "test_batches = test_gen.flow_from_directory(\n",
419 |     "    'lego/star-wars-images/test',\n",
420 |     "    target_size=(224, 224),\n",
421 |     "    class_mode='sparse',\n",
422 |     "    batch_size=4,\n",
423 |     "    shuffle=False,\n",
424 |     "    color_mode=\"rgb\",\n",
425 |     "    classes=names\n",
426 |     ")"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": null,
432 |    "metadata": {},
433 |    "outputs": [],
434 |    "source": [
435 |     "epochs = 30\n",
436 |     "\n",
437 |     "early_stopping = keras.callbacks.EarlyStopping(\n",
438 |     "    monitor=\"val_loss\",\n",
439 |     "    patience=5,\n",
440 |     "    verbose=2\n",
441 |     ")\n",
442 |     "\n",
443 |     "model.fit(train_batches, validation_data=val_batches,\n",
444 |     "          callbacks=[early_stopping],\n",
445 |     "          epochs=epochs, verbose=2)"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": null,
451 |    "metadata": {},
452 |    "outputs": [],
453 |    "source": [
454 |     "model.evaluate(test_batches, verbose=2)"
455 |    ]
456 |   },
457 |   {
458 |    "cell_type": "code",
459 |    "execution_count": null,
460 |    "metadata": {},
461 |    "outputs": [],
462 |    "source": []
463 |   }
464 |  ],
465 |  "metadata": {
466 |   "kernelspec": {
467 |    "display_name": "tf",
468 |    "language": "python",
469 |    "name": "tf"
470 |   },
471 |   "language_info": {
472 |    "codemirror_mode": {
473 |     "name": "ipython",
474 |     "version": 3
475 |    },
476 |    "file_extension": ".py",
477 |    "mimetype": "text/x-python",
478 |    "name": "python",
479 |    "nbconvert_exporter": "python",
480 |    "pygments_lexer": "ipython3",
481 |    "version": "3.8.5"
482 |   }
483 |  },
484 |  "nbformat": 4,
485 |  "nbformat_minor": 4
486 | }
487 | 


--------------------------------------------------------------------------------
/10_rnn.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
 3 | 
 4 | import tensorflow as tf
 5 | from tensorflow import keras
 6 | from tensorflow.keras import layers
 7 | 
 8 | mnist = keras.datasets.mnist
 9 | 
10 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
11 | x_train, x_test = x_train / 255.0, x_test / 255.0
12 | # 28, 28 -> treat image as sequence
13 | # input_size=28
14 | # seq_length=28
15 | 
16 | 
17 | # RNN Outputs and states
18 | # ----------------------
19 | # By default, the output of a RNN layer contains a single vector per sample.
20 | # This vector is the RNN cell output corresponding to the last timestep, 
21 | # containing information about the entire input sequence. 
22 | # The shape of this output is (N, units)
23 | # 
24 | # A RNN layer can also return the entire sequence of outputs 
25 | # for each sample (one vector per timestep per sample), 
26 | # if you set return_sequences=True. The shape of this output 
27 | # is (N, timesteps, units).
28 | 
29 | 
30 | # model
31 | model = keras.models.Sequential()
32 | model.add(keras.Input(shape=(28,28))) # seq_length, input_size
33 | #model.add(layers.SimpleRNN(128, return_sequences=True, activation='relu')) # N, 28, 128
34 | model.add(layers.LSTM(128, return_sequences=False, activation='relu')) # N, 128
35 | model.add(layers.Dense(10))
36 | print(model.summary())
37 | 
38 | # loss and optimizer
39 | loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
40 | optim = keras.optimizers.Adam(lr=0.001)
41 | metrics = ["accuracy"]
42 | 
43 | model.compile(loss=loss, optimizer=optim, metrics=metrics)
44 | 
45 | # training
46 | batch_size = 64
47 | epochs = 5
48 | 
49 | model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)
50 | 
51 | # evaulate
52 | model.evaluate(x_test, y_test, batch_size=batch_size, verbose=2)
53 | 
54 |     
55 | 
56 | 


--------------------------------------------------------------------------------
/11_NLP.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 5,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "import tensorflow as tf\n",
  10 |     "from tensorflow import keras\n",
  11 |     "import pandas as pd\n",
  12 |     "import numpy as np\n",
  13 |     "import os\n",
  14 |     "import matplotlib.pyplot as plt\n",
  15 |     "import time"
  16 |    ]
  17 |   },
  18 |   {
  19 |    "cell_type": "code",
  20 |    "execution_count": 6,
  21 |    "metadata": {},
  22 |    "outputs": [],
  23 |    "source": [
  24 |     "# https://www.kaggle.com/c/nlp-getting-started : NLP Disaster Tweets\n",
  25 |     "df = pd.read_csv(\"data/twitter_train.csv\")"
  26 |    ]
  27 |   },
  28 |   {
  29 |    "cell_type": "code",
  30 |    "execution_count": 7,
  31 |    "metadata": {},
  32 |    "outputs": [
  33 |     {
  34 |      "data": {
  35 |       "text/plain": [
  36 |        "(7613, 5)"
  37 |       ]
  38 |      },
  39 |      "execution_count": 7,
  40 |      "metadata": {},
  41 |      "output_type": "execute_result"
  42 |     }
  43 |    ],
  44 |    "source": [
  45 |     "df.shape"
  46 |    ]
  47 |   },
  48 |   {
  49 |    "cell_type": "code",
  50 |    "execution_count": 8,
  51 |    "metadata": {},
  52 |    "outputs": [
  53 |     {
  54 |      "data": {
  55 |       "text/html": [
  56 |        "<div>\n",
  57 |        "<style scoped>\n",
  58 |        "    .dataframe tbody tr th:only-of-type {\n",
  59 |        "        vertical-align: middle;\n",
  60 |        "    }\n",
  61 |        "\n",
  62 |        "    .dataframe tbody tr th {\n",
  63 |        "        vertical-align: top;\n",
  64 |        "    }\n",
  65 |        "\n",
  66 |        "    .dataframe thead th {\n",
  67 |        "        text-align: right;\n",
  68 |        "    }\n",
  69 |        "</style>\n",
  70 |        "<table border=\"1\" class=\"dataframe\">\n",
  71 |        "  <thead>\n",
  72 |        "    <tr style=\"text-align: right;\">\n",
  73 |        "      <th></th>\n",
  74 |        "      <th>id</th>\n",
  75 |        "      <th>keyword</th>\n",
  76 |        "      <th>location</th>\n",
  77 |        "      <th>text</th>\n",
  78 |        "      <th>target</th>\n",
  79 |        "    </tr>\n",
  80 |        "  </thead>\n",
  81 |        "  <tbody>\n",
  82 |        "    <tr>\n",
  83 |        "      <th>0</th>\n",
  84 |        "      <td>1</td>\n",
  85 |        "      <td>NaN</td>\n",
  86 |        "      <td>NaN</td>\n",
  87 |        "      <td>Our Deeds are the Reason of this #earthquake M...</td>\n",
  88 |        "      <td>1</td>\n",
  89 |        "    </tr>\n",
  90 |        "    <tr>\n",
  91 |        "      <th>1</th>\n",
  92 |        "      <td>4</td>\n",
  93 |        "      <td>NaN</td>\n",
  94 |        "      <td>NaN</td>\n",
  95 |        "      <td>Forest fire near La Ronge Sask. Canada</td>\n",
  96 |        "      <td>1</td>\n",
  97 |        "    </tr>\n",
  98 |        "    <tr>\n",
  99 |        "      <th>2</th>\n",
 100 |        "      <td>5</td>\n",
 101 |        "      <td>NaN</td>\n",
 102 |        "      <td>NaN</td>\n",
 103 |        "      <td>All residents asked to 'shelter in place' are ...</td>\n",
 104 |        "      <td>1</td>\n",
 105 |        "    </tr>\n",
 106 |        "    <tr>\n",
 107 |        "      <th>3</th>\n",
 108 |        "      <td>6</td>\n",
 109 |        "      <td>NaN</td>\n",
 110 |        "      <td>NaN</td>\n",
 111 |        "      <td>13,000 people receive #wildfires evacuation or...</td>\n",
 112 |        "      <td>1</td>\n",
 113 |        "    </tr>\n",
 114 |        "    <tr>\n",
 115 |        "      <th>4</th>\n",
 116 |        "      <td>7</td>\n",
 117 |        "      <td>NaN</td>\n",
 118 |        "      <td>NaN</td>\n",
 119 |        "      <td>Just got sent this photo from Ruby #Alaska as ...</td>\n",
 120 |        "      <td>1</td>\n",
 121 |        "    </tr>\n",
 122 |        "  </tbody>\n",
 123 |        "</table>\n",
 124 |        "</div>"
 125 |       ],
 126 |       "text/plain": [
 127 |        "   id keyword location                                               text  \\\n",
 128 |        "0   1     NaN      NaN  Our Deeds are the Reason of this #earthquake M...   \n",
 129 |        "1   4     NaN      NaN             Forest fire near La Ronge Sask. Canada   \n",
 130 |        "2   5     NaN      NaN  All residents asked to 'shelter in place' are ...   \n",
 131 |        "3   6     NaN      NaN  13,000 people receive #wildfires evacuation or...   \n",
 132 |        "4   7     NaN      NaN  Just got sent this photo from Ruby #Alaska as ...   \n",
 133 |        "\n",
 134 |        "   target  \n",
 135 |        "0       1  \n",
 136 |        "1       1  \n",
 137 |        "2       1  \n",
 138 |        "3       1  \n",
 139 |        "4       1  "
 140 |       ]
 141 |      },
 142 |      "execution_count": 8,
 143 |      "metadata": {},
 144 |      "output_type": "execute_result"
 145 |     }
 146 |    ],
 147 |    "source": [
 148 |     "df.head()"
 149 |    ]
 150 |   },
 151 |   {
 152 |    "cell_type": "code",
 153 |    "execution_count": 9,
 154 |    "metadata": {},
 155 |    "outputs": [
 156 |     {
 157 |      "name": "stdout",
 158 |      "output_type": "stream",
 159 |      "text": [
 160 |       "3271\n",
 161 |       "4342\n"
 162 |      ]
 163 |     }
 164 |    ],
 165 |    "source": [
 166 |     "print((df.target == 1).sum()) # Disaster\n",
 167 |     "print((df.target == 0).sum()) # No Disaster"
 168 |    ]
 169 |   },
 170 |   {
 171 |    "cell_type": "code",
 172 |    "execution_count": 10,
 173 |    "metadata": {},
 174 |    "outputs": [
 175 |     {
 176 |      "data": {
 177 |       "text/plain": [
 178 |        "'!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~'"
 179 |       ]
 180 |      },
 181 |      "execution_count": 10,
 182 |      "metadata": {},
 183 |      "output_type": "execute_result"
 184 |     }
 185 |    ],
 186 |    "source": [
 187 |     "# Preprocessing\n",
 188 |     "import re\n",
 189 |     "import string\n",
 190 |     "\n",
 191 |     "def remove_URL(text):\n",
 192 |     "    url = re.compile(r\"https?://\\S+|www\\.\\S+\")\n",
 193 |     "    return url.sub(r\"\", text)\n",
 194 |     "\n",
 195 |     "# https://stackoverflow.com/questions/34293875/how-to-remove-punctuation-marks-from-a-string-in-python-3-x-using-translate/34294022\n",
 196 |     "def remove_punct(text):\n",
 197 |     "    translator = str.maketrans(\"\", \"\", string.punctuation)\n",
 198 |     "    return text.translate(translator)\n",
 199 |     "\n",
 200 |     "string.punctuation"
 201 |    ]
 202 |   },
 203 |   {
 204 |    "cell_type": "code",
 205 |    "execution_count": 11,
 206 |    "metadata": {},
 207 |    "outputs": [
 208 |     {
 209 |      "name": "stdout",
 210 |      "output_type": "stream",
 211 |      "text": [
 212 |       "@bbcmtd Wholesale Markets ablaze http://t.co/lHYXEOHY6C\n",
 213 |       "t\n",
 214 |       "@bbcmtd Wholesale Markets ablaze \n"
 215 |      ]
 216 |     }
 217 |    ],
 218 |    "source": [
 219 |     "pattern = re.compile(r\"https?://(\\S+|www)\\.\\S+\")\n",
 220 |     "for t in df.text:\n",
 221 |     "    matches = pattern.findall(t)\n",
 222 |     "    for match in matches:\n",
 223 |     "        print(t)\n",
 224 |     "        print(match)\n",
 225 |     "        print(pattern.sub(r\"\", t))\n",
 226 |     "    if len(matches) > 0:\n",
 227 |     "        break"
 228 |    ]
 229 |   },
 230 |   {
 231 |    "cell_type": "code",
 232 |    "execution_count": 12,
 233 |    "metadata": {},
 234 |    "outputs": [],
 235 |    "source": [
 236 |     "df[\"text\"] = df.text.map(remove_URL) # map(lambda x: remove_URL(x))\n",
 237 |     "df[\"text\"] = df.text.map(remove_punct)"
 238 |    ]
 239 |   },
 240 |   {
 241 |    "cell_type": "code",
 242 |    "execution_count": 13,
 243 |    "metadata": {},
 244 |    "outputs": [
 245 |     {
 246 |      "name": "stderr",
 247 |      "output_type": "stream",
 248 |      "text": [
 249 |       "[nltk_data] Downloading package stopwords to\n",
 250 |       "[nltk_data]     /Users/patrickPatricklober/nltk_data...\n",
 251 |       "[nltk_data]   Package stopwords is already up-to-date!\n"
 252 |      ]
 253 |     }
 254 |    ],
 255 |    "source": [
 256 |     "# remove stopwords\n",
 257 |     "# pip install nltk\n",
 258 |     "import nltk\n",
 259 |     "nltk.download('stopwords')\n",
 260 |     "from nltk.corpus import stopwords\n",
 261 |     "\n",
 262 |     "# Stop Words: A stop word is a commonly used word (such as “the”, “a”, “an”, “in”) that a search engine\n",
 263 |     "# has been programmed to ignore, both when indexing entries for searching and when retrieving them \n",
 264 |     "# as the result of a search query.\n",
 265 |     "stop = set(stopwords.words(\"english\"))\n",
 266 |     "\n",
 267 |     "# https://stackoverflow.com/questions/5486337/how-to-remove-stop-words-using-nltk-or-python\n",
 268 |     "def remove_stopwords(text):\n",
 269 |     "    filtered_words = [word.lower() for word in text.split() if word.lower() not in stop]\n",
 270 |     "    return \" \".join(filtered_words)"
 271 |    ]
 272 |   },
 273 |   {
 274 |    "cell_type": "code",
 275 |    "execution_count": 14,
 276 |    "metadata": {},
 277 |    "outputs": [
 278 |     {
 279 |      "data": {
 280 |       "text/plain": [
 281 |        "{'a',\n",
 282 |        " 'about',\n",
 283 |        " 'above',\n",
 284 |        " 'after',\n",
 285 |        " 'again',\n",
 286 |        " 'against',\n",
 287 |        " 'ain',\n",
 288 |        " 'all',\n",
 289 |        " 'am',\n",
 290 |        " 'an',\n",
 291 |        " 'and',\n",
 292 |        " 'any',\n",
 293 |        " 'are',\n",
 294 |        " 'aren',\n",
 295 |        " \"aren't\",\n",
 296 |        " 'as',\n",
 297 |        " 'at',\n",
 298 |        " 'be',\n",
 299 |        " 'because',\n",
 300 |        " 'been',\n",
 301 |        " 'before',\n",
 302 |        " 'being',\n",
 303 |        " 'below',\n",
 304 |        " 'between',\n",
 305 |        " 'both',\n",
 306 |        " 'but',\n",
 307 |        " 'by',\n",
 308 |        " 'can',\n",
 309 |        " 'couldn',\n",
 310 |        " \"couldn't\",\n",
 311 |        " 'd',\n",
 312 |        " 'did',\n",
 313 |        " 'didn',\n",
 314 |        " \"didn't\",\n",
 315 |        " 'do',\n",
 316 |        " 'does',\n",
 317 |        " 'doesn',\n",
 318 |        " \"doesn't\",\n",
 319 |        " 'doing',\n",
 320 |        " 'don',\n",
 321 |        " \"don't\",\n",
 322 |        " 'down',\n",
 323 |        " 'during',\n",
 324 |        " 'each',\n",
 325 |        " 'few',\n",
 326 |        " 'for',\n",
 327 |        " 'from',\n",
 328 |        " 'further',\n",
 329 |        " 'had',\n",
 330 |        " 'hadn',\n",
 331 |        " \"hadn't\",\n",
 332 |        " 'has',\n",
 333 |        " 'hasn',\n",
 334 |        " \"hasn't\",\n",
 335 |        " 'have',\n",
 336 |        " 'haven',\n",
 337 |        " \"haven't\",\n",
 338 |        " 'having',\n",
 339 |        " 'he',\n",
 340 |        " 'her',\n",
 341 |        " 'here',\n",
 342 |        " 'hers',\n",
 343 |        " 'herself',\n",
 344 |        " 'him',\n",
 345 |        " 'himself',\n",
 346 |        " 'his',\n",
 347 |        " 'how',\n",
 348 |        " 'i',\n",
 349 |        " 'if',\n",
 350 |        " 'in',\n",
 351 |        " 'into',\n",
 352 |        " 'is',\n",
 353 |        " 'isn',\n",
 354 |        " \"isn't\",\n",
 355 |        " 'it',\n",
 356 |        " \"it's\",\n",
 357 |        " 'its',\n",
 358 |        " 'itself',\n",
 359 |        " 'just',\n",
 360 |        " 'll',\n",
 361 |        " 'm',\n",
 362 |        " 'ma',\n",
 363 |        " 'me',\n",
 364 |        " 'mightn',\n",
 365 |        " \"mightn't\",\n",
 366 |        " 'more',\n",
 367 |        " 'most',\n",
 368 |        " 'mustn',\n",
 369 |        " \"mustn't\",\n",
 370 |        " 'my',\n",
 371 |        " 'myself',\n",
 372 |        " 'needn',\n",
 373 |        " \"needn't\",\n",
 374 |        " 'no',\n",
 375 |        " 'nor',\n",
 376 |        " 'not',\n",
 377 |        " 'now',\n",
 378 |        " 'o',\n",
 379 |        " 'of',\n",
 380 |        " 'off',\n",
 381 |        " 'on',\n",
 382 |        " 'once',\n",
 383 |        " 'only',\n",
 384 |        " 'or',\n",
 385 |        " 'other',\n",
 386 |        " 'our',\n",
 387 |        " 'ours',\n",
 388 |        " 'ourselves',\n",
 389 |        " 'out',\n",
 390 |        " 'over',\n",
 391 |        " 'own',\n",
 392 |        " 're',\n",
 393 |        " 's',\n",
 394 |        " 'same',\n",
 395 |        " 'shan',\n",
 396 |        " \"shan't\",\n",
 397 |        " 'she',\n",
 398 |        " \"she's\",\n",
 399 |        " 'should',\n",
 400 |        " \"should've\",\n",
 401 |        " 'shouldn',\n",
 402 |        " \"shouldn't\",\n",
 403 |        " 'so',\n",
 404 |        " 'some',\n",
 405 |        " 'such',\n",
 406 |        " 't',\n",
 407 |        " 'than',\n",
 408 |        " 'that',\n",
 409 |        " \"that'll\",\n",
 410 |        " 'the',\n",
 411 |        " 'their',\n",
 412 |        " 'theirs',\n",
 413 |        " 'them',\n",
 414 |        " 'themselves',\n",
 415 |        " 'then',\n",
 416 |        " 'there',\n",
 417 |        " 'these',\n",
 418 |        " 'they',\n",
 419 |        " 'this',\n",
 420 |        " 'those',\n",
 421 |        " 'through',\n",
 422 |        " 'to',\n",
 423 |        " 'too',\n",
 424 |        " 'under',\n",
 425 |        " 'until',\n",
 426 |        " 'up',\n",
 427 |        " 've',\n",
 428 |        " 'very',\n",
 429 |        " 'was',\n",
 430 |        " 'wasn',\n",
 431 |        " \"wasn't\",\n",
 432 |        " 'we',\n",
 433 |        " 'were',\n",
 434 |        " 'weren',\n",
 435 |        " \"weren't\",\n",
 436 |        " 'what',\n",
 437 |        " 'when',\n",
 438 |        " 'where',\n",
 439 |        " 'which',\n",
 440 |        " 'while',\n",
 441 |        " 'who',\n",
 442 |        " 'whom',\n",
 443 |        " 'why',\n",
 444 |        " 'will',\n",
 445 |        " 'with',\n",
 446 |        " 'won',\n",
 447 |        " \"won't\",\n",
 448 |        " 'wouldn',\n",
 449 |        " \"wouldn't\",\n",
 450 |        " 'y',\n",
 451 |        " 'you',\n",
 452 |        " \"you'd\",\n",
 453 |        " \"you'll\",\n",
 454 |        " \"you're\",\n",
 455 |        " \"you've\",\n",
 456 |        " 'your',\n",
 457 |        " 'yours',\n",
 458 |        " 'yourself',\n",
 459 |        " 'yourselves'}"
 460 |       ]
 461 |      },
 462 |      "execution_count": 14,
 463 |      "metadata": {},
 464 |      "output_type": "execute_result"
 465 |     }
 466 |    ],
 467 |    "source": [
 468 |     "stop"
 469 |    ]
 470 |   },
 471 |   {
 472 |    "cell_type": "code",
 473 |    "execution_count": 15,
 474 |    "metadata": {},
 475 |    "outputs": [],
 476 |    "source": [
 477 |     "df[\"text\"] = df.text.map(remove_stopwords)"
 478 |    ]
 479 |   },
 480 |   {
 481 |    "cell_type": "code",
 482 |    "execution_count": 16,
 483 |    "metadata": {},
 484 |    "outputs": [
 485 |     {
 486 |      "data": {
 487 |       "text/plain": [
 488 |        "0            deeds reason earthquake may allah forgive us\n",
 489 |        "1                   forest fire near la ronge sask canada\n",
 490 |        "2       residents asked shelter place notified officer...\n",
 491 |        "3       13000 people receive wildfires evacuation orde...\n",
 492 |        "4       got sent photo ruby alaska smoke wildfires pou...\n",
 493 |        "                              ...                        \n",
 494 |        "7608    two giant cranes holding bridge collapse nearb...\n",
 495 |        "7609    ariaahrary thetawniest control wild fires cali...\n",
 496 |        "7610                      m194 0104 utc5km volcano hawaii\n",
 497 |        "7611    police investigating ebike collided car little...\n",
 498 |        "7612    latest homes razed northern california wildfir...\n",
 499 |        "Name: text, Length: 7613, dtype: object"
 500 |       ]
 501 |      },
 502 |      "execution_count": 16,
 503 |      "metadata": {},
 504 |      "output_type": "execute_result"
 505 |     }
 506 |    ],
 507 |    "source": [
 508 |     "df.text"
 509 |    ]
 510 |   },
 511 |   {
 512 |    "cell_type": "code",
 513 |    "execution_count": 17,
 514 |    "metadata": {},
 515 |    "outputs": [],
 516 |    "source": [
 517 |     "from collections import Counter\n",
 518 |     "\n",
 519 |     "# Count unique words\n",
 520 |     "def counter_word(text_col):\n",
 521 |     "    count = Counter()\n",
 522 |     "    for text in text_col.values:\n",
 523 |     "        for word in text.split():\n",
 524 |     "            count[word] += 1\n",
 525 |     "    return count\n",
 526 |     "\n",
 527 |     "\n",
 528 |     "counter = counter_word(df.text)"
 529 |    ]
 530 |   },
 531 |   {
 532 |    "cell_type": "code",
 533 |    "execution_count": 18,
 534 |    "metadata": {},
 535 |    "outputs": [
 536 |     {
 537 |      "data": {
 538 |       "text/plain": [
 539 |        "17971"
 540 |       ]
 541 |      },
 542 |      "execution_count": 18,
 543 |      "metadata": {},
 544 |      "output_type": "execute_result"
 545 |     }
 546 |    ],
 547 |    "source": [
 548 |     "len(counter)"
 549 |    ]
 550 |   },
 551 |   {
 552 |    "cell_type": "code",
 553 |    "execution_count": 19,
 554 |    "metadata": {},
 555 |    "outputs": [
 556 |     {
 557 |      "data": {
 558 |       "text/plain": [
 559 |        "Counter({'deeds': 2,\n",
 560 |        "         'reason': 20,\n",
 561 |        "         'earthquake': 50,\n",
 562 |        "         'may': 88,\n",
 563 |        "         'allah': 9,\n",
 564 |        "         'forgive': 2,\n",
 565 |        "         'us': 164,\n",
 566 |        "         'forest': 65,\n",
 567 |        "         'fire': 250,\n",
 568 |        "         'near': 54,\n",
 569 |        "         'la': 25,\n",
 570 |        "         'ronge': 1,\n",
 571 |        "         'sask': 1,\n",
 572 |        "         'canada': 11,\n",
 573 |        "         'residents': 8,\n",
 574 |        "         'asked': 9,\n",
 575 |        "         'shelter': 6,\n",
 576 |        "         'place': 26,\n",
 577 |        "         'notified': 1,\n",
 578 |        "         'officers': 8,\n",
 579 |        "         'evacuation': 50,\n",
 580 |        "         'orders': 11,\n",
 581 |        "         'expected': 15,\n",
 582 |        "         '13000': 4,\n",
 583 |        "         'people': 196,\n",
 584 |        "         'receive': 2,\n",
 585 |        "         'wildfires': 11,\n",
 586 |        "         'california': 117,\n",
 587 |        "         'got': 112,\n",
 588 |        "         'sent': 13,\n",
 589 |        "         'photo': 41,\n",
 590 |        "         'ruby': 1,\n",
 591 |        "         'alaska': 6,\n",
 592 |        "         'smoke': 48,\n",
 593 |        "         'pours': 1,\n",
 594 |        "         'school': 66,\n",
 595 |        "         'rockyfire': 4,\n",
 596 |        "         'update': 37,\n",
 597 |        "         'hwy': 9,\n",
 598 |        "         '20': 26,\n",
 599 |        "         'closed': 20,\n",
 600 |        "         'directions': 1,\n",
 601 |        "         'due': 31,\n",
 602 |        "         'lake': 14,\n",
 603 |        "         'county': 38,\n",
 604 |        "         'cafire': 2,\n",
 605 |        "         'flood': 56,\n",
 606 |        "         'disaster': 152,\n",
 607 |        "         'heavy': 20,\n",
 608 |        "         'rain': 44,\n",
 609 |        "         'causes': 13,\n",
 610 |        "         'flash': 21,\n",
 611 |        "         'flooding': 50,\n",
 612 |        "         'streets': 8,\n",
 613 |        "         'manitou': 1,\n",
 614 |        "         'colorado': 16,\n",
 615 |        "         'springs': 5,\n",
 616 |        "         'areas': 9,\n",
 617 |        "         'im': 299,\n",
 618 |        "         'top': 54,\n",
 619 |        "         'hill': 7,\n",
 620 |        "         'see': 105,\n",
 621 |        "         'woods': 2,\n",
 622 |        "         'theres': 45,\n",
 623 |        "         'emergency': 157,\n",
 624 |        "         'happening': 12,\n",
 625 |        "         'building': 30,\n",
 626 |        "         'across': 20,\n",
 627 |        "         'street': 24,\n",
 628 |        "         'afraid': 5,\n",
 629 |        "         'tornado': 31,\n",
 630 |        "         'coming': 51,\n",
 631 |        "         'area': 39,\n",
 632 |        "         'three': 29,\n",
 633 |        "         'died': 28,\n",
 634 |        "         'heat': 45,\n",
 635 |        "         'wave': 34,\n",
 636 |        "         'far': 28,\n",
 637 |        "         'haha': 20,\n",
 638 |        "         'south': 27,\n",
 639 |        "         'tampa': 4,\n",
 640 |        "         'getting': 56,\n",
 641 |        "         'flooded': 4,\n",
 642 |        "         'hah': 3,\n",
 643 |        "         'wait': 26,\n",
 644 |        "         'second': 26,\n",
 645 |        "         'live': 60,\n",
 646 |        "         'gonna': 43,\n",
 647 |        "         'fvck': 1,\n",
 648 |        "         'raining': 4,\n",
 649 |        "         'florida': 6,\n",
 650 |        "         'tampabay': 1,\n",
 651 |        "         '18': 11,\n",
 652 |        "         '19': 13,\n",
 653 |        "         'days': 28,\n",
 654 |        "         'ive': 43,\n",
 655 |        "         'lost': 23,\n",
 656 |        "         'count': 3,\n",
 657 |        "         'bago': 2,\n",
 658 |        "         'myanmar': 18,\n",
 659 |        "         'arrived': 8,\n",
 660 |        "         'damage': 52,\n",
 661 |        "         'bus': 37,\n",
 662 |        "         '80': 3,\n",
 663 |        "         'multi': 2,\n",
 664 |        "         'car': 90,\n",
 665 |        "         'crash': 119,\n",
 666 |        "         'breaking': 39,\n",
 667 |        "         'whats': 29,\n",
 668 |        "         'man': 110,\n",
 669 |        "         'love': 100,\n",
 670 |        "         'fruits': 2,\n",
 671 |        "         'summer': 42,\n",
 672 |        "         'lovely': 8,\n",
 673 |        "         'fast': 22,\n",
 674 |        "         'goooooooaaaaaal': 1,\n",
 675 |        "         'ridiculous': 4,\n",
 676 |        "         'london': 15,\n",
 677 |        "         'cool': 31,\n",
 678 |        "         'skiing': 1,\n",
 679 |        "         'wonderful': 5,\n",
 680 |        "         'day': 109,\n",
 681 |        "         'looooool': 1,\n",
 682 |        "         'wayi': 1,\n",
 683 |        "         'cant': 102,\n",
 684 |        "         'eat': 7,\n",
 685 |        "         'shit': 56,\n",
 686 |        "         'nyc': 12,\n",
 687 |        "         'last': 83,\n",
 688 |        "         'week': 37,\n",
 689 |        "         'girlfriend': 6,\n",
 690 |        "         'cooool': 1,\n",
 691 |        "         'like': 345,\n",
 692 |        "         'pasta': 2,\n",
 693 |        "         'end': 42,\n",
 694 |        "         'bbcmtd': 1,\n",
 695 |        "         'wholesale': 4,\n",
 696 |        "         'markets': 7,\n",
 697 |        "         'ablaze': 28,\n",
 698 |        "         'always': 46,\n",
 699 |        "         'try': 19,\n",
 700 |        "         'bring': 17,\n",
 701 |        "         'metal': 13,\n",
 702 |        "         'rt': 107,\n",
 703 |        "         'africanbaze': 1,\n",
 704 |        "         'newsnigeria': 1,\n",
 705 |        "         'flag': 21,\n",
 706 |        "         'set': 48,\n",
 707 |        "         'aba': 14,\n",
 708 |        "         'crying': 9,\n",
 709 |        "         'plus': 8,\n",
 710 |        "         'side': 23,\n",
 711 |        "         'look': 73,\n",
 712 |        "         'sky': 15,\n",
 713 |        "         'night': 50,\n",
 714 |        "         'phdsquares': 1,\n",
 715 |        "         'mufc': 2,\n",
 716 |        "         'theyve': 5,\n",
 717 |        "         'built': 6,\n",
 718 |        "         'much': 64,\n",
 719 |        "         'hype': 3,\n",
 720 |        "         'around': 39,\n",
 721 |        "         'new': 224,\n",
 722 |        "         'acquisitions': 2,\n",
 723 |        "         'doubt': 5,\n",
 724 |        "         'epl': 1,\n",
 725 |        "         'season': 14,\n",
 726 |        "         'inec': 2,\n",
 727 |        "         'office': 12,\n",
 728 |        "         'abia': 2,\n",
 729 |        "         'barbados': 1,\n",
 730 |        "         'bridgetown': 1,\n",
 731 |        "         'jamaica': 5,\n",
 732 |        "         '\\x89ûò': 39,\n",
 733 |        "         'two': 97,\n",
 734 |        "         'cars': 21,\n",
 735 |        "         'santa': 6,\n",
 736 |        "         'cruz': 10,\n",
 737 |        "         '\\x89ûó': 26,\n",
 738 |        "         'head': 41,\n",
 739 |        "         'st': 30,\n",
 740 |        "         'elizabeth': 2,\n",
 741 |        "         'police': 140,\n",
 742 |        "         'superintende': 1,\n",
 743 |        "         'lord': 20,\n",
 744 |        "         'check': 46,\n",
 745 |        "         'nsfw': 3,\n",
 746 |        "         'outside': 25,\n",
 747 |        "         'youre': 67,\n",
 748 |        "         'alive': 11,\n",
 749 |        "         'dead': 96,\n",
 750 |        "         'inside': 28,\n",
 751 |        "         'awesome': 16,\n",
 752 |        "         'time': 112,\n",
 753 |        "         'visiting': 1,\n",
 754 |        "         'cfc': 2,\n",
 755 |        "         'ancop': 1,\n",
 756 |        "         'site': 27,\n",
 757 |        "         'thanks': 30,\n",
 758 |        "         'tita': 1,\n",
 759 |        "         'vida': 2,\n",
 760 |        "         'taking': 17,\n",
 761 |        "         'care': 31,\n",
 762 |        "         'soooo': 2,\n",
 763 |        "         'pumped': 1,\n",
 764 |        "         'southridgelife': 1,\n",
 765 |        "         'wanted': 18,\n",
 766 |        "         'chicago': 12,\n",
 767 |        "         'preaching': 1,\n",
 768 |        "         'hotel': 6,\n",
 769 |        "         'gained': 6,\n",
 770 |        "         '3': 97,\n",
 771 |        "         'followers': 8,\n",
 772 |        "         'know': 112,\n",
 773 |        "         'stats': 3,\n",
 774 |        "         'grow': 8,\n",
 775 |        "         'west': 24,\n",
 776 |        "         'burned': 39,\n",
 777 |        "         'thousands': 19,\n",
 778 |        "         'alone': 19,\n",
 779 |        "         'perfect': 10,\n",
 780 |        "         'tracklist': 1,\n",
 781 |        "         'life': 87,\n",
 782 |        "         'leave': 28,\n",
 783 |        "         'first': 107,\n",
 784 |        "         'retainers': 1,\n",
 785 |        "         'quite': 10,\n",
 786 |        "         'weird': 9,\n",
 787 |        "         'better': 36,\n",
 788 |        "         'get': 229,\n",
 789 |        "         'used': 30,\n",
 790 |        "         'wear': 3,\n",
 791 |        "         'every': 59,\n",
 792 |        "         'single': 11,\n",
 793 |        "         'next': 50,\n",
 794 |        "         'year': 69,\n",
 795 |        "         'least': 36,\n",
 796 |        "         'deputies': 4,\n",
 797 |        "         'shot': 27,\n",
 798 |        "         'brighton': 2,\n",
 799 |        "         'home': 76,\n",
 800 |        "         'wife': 12,\n",
 801 |        "         'six': 4,\n",
 802 |        "         'years': 79,\n",
 803 |        "         'jail': 3,\n",
 804 |        "         'setting': 11,\n",
 805 |        "         'niece': 3,\n",
 806 |        "         'superintendent': 1,\n",
 807 |        "         'lanford': 1,\n",
 808 |        "         'salmon': 3,\n",
 809 |        "         'r': 17,\n",
 810 |        "         'arsonist': 18,\n",
 811 |        "         'deliberately': 1,\n",
 812 |        "         'black': 66,\n",
 813 |        "         'church': 7,\n",
 814 |        "         'north': 27,\n",
 815 |        "         'carolinaåêablaze': 1,\n",
 816 |        "         'noches': 1,\n",
 817 |        "         'elbestia': 1,\n",
 818 |        "         'alexissanchez': 1,\n",
 819 |        "         'happy': 23,\n",
 820 |        "         'teammates': 1,\n",
 821 |        "         'training': 12,\n",
 822 |        "         'hard': 20,\n",
 823 |        "         'goodnight': 1,\n",
 824 |        "         'gunners': 1,\n",
 825 |        "         'kurds': 1,\n",
 826 |        "         'trampling': 1,\n",
 827 |        "         'turkmen': 2,\n",
 828 |        "         'later': 11,\n",
 829 |        "         'others': 21,\n",
 830 |        "         'vandalized': 1,\n",
 831 |        "         'offices': 2,\n",
 832 |        "         'front': 18,\n",
 833 |        "         'diyala': 1,\n",
 834 |        "         'truck': 50,\n",
 835 |        "         'r21': 1,\n",
 836 |        "         'voortrekker': 1,\n",
 837 |        "         'ave': 15,\n",
 838 |        "         'tambo': 1,\n",
 839 |        "         'intl': 1,\n",
 840 |        "         'cargo': 3,\n",
 841 |        "         'section': 4,\n",
 842 |        "         'hearts': 5,\n",
 843 |        "         'city': 60,\n",
 844 |        "         'gift': 4,\n",
 845 |        "         'skyline': 2,\n",
 846 |        "         'kiss': 2,\n",
 847 |        "         'upon': 16,\n",
 848 |        "         'lips': 1,\n",
 849 |        "         '\\x89û': 60,\n",
 850 |        "         'tonight': 38,\n",
 851 |        "         'los': 4,\n",
 852 |        "         'angeles': 4,\n",
 853 |        "         'expecting': 4,\n",
 854 |        "         'ig': 4,\n",
 855 |        "         'fb': 3,\n",
 856 |        "         'filled': 3,\n",
 857 |        "         'sunset': 6,\n",
 858 |        "         'shots': 15,\n",
 859 |        "         'peeps': 3,\n",
 860 |        "         'climate': 19,\n",
 861 |        "         'energy': 7,\n",
 862 |        "         'revel': 1,\n",
 863 |        "         'wmv': 2,\n",
 864 |        "         'videos': 9,\n",
 865 |        "         'means': 14,\n",
 866 |        "         'mac': 6,\n",
 867 |        "         'farewell': 1,\n",
 868 |        "         'en': 3,\n",
 869 |        "         'route': 10,\n",
 870 |        "         'dvd': 5,\n",
 871 |        "         'gtxrwm': 1,\n",
 872 |        "         'progressive': 1,\n",
 873 |        "         'greetings': 1,\n",
 874 |        "         'month': 11,\n",
 875 |        "         'students': 13,\n",
 876 |        "         'would': 131,\n",
 877 |        "         'pens': 1,\n",
 878 |        "         'torch': 4,\n",
 879 |        "         'publications': 1,\n",
 880 |        "         'rene': 2,\n",
 881 |        "         'amp': 298,\n",
 882 |        "         'jacinta': 1,\n",
 883 |        "         'secret': 15,\n",
 884 |        "         '2k13': 1,\n",
 885 |        "         'fallen': 4,\n",
 886 |        "         'skies': 3,\n",
 887 |        "         'edit': 2,\n",
 888 |        "         'mar': 1,\n",
 889 |        "         '30': 24,\n",
 890 |        "         '2013': 13,\n",
 891 |        "         'navista7': 1,\n",
 892 |        "         'steve': 8,\n",
 893 |        "         'fires': 100,\n",
 894 |        "         'something': 32,\n",
 895 |        "         'else': 18,\n",
 896 |        "         'tinderbox': 1,\n",
 897 |        "         'clown': 1,\n",
 898 |        "         'hood': 2,\n",
 899 |        "         'news24680': 1,\n",
 900 |        "         'nowplaying': 26,\n",
 901 |        "         'ian': 4,\n",
 902 |        "         'buff': 1,\n",
 903 |        "         'magnitude': 1,\n",
 904 |        "         'edm': 11,\n",
 905 |        "         'nxwestmidlands': 1,\n",
 906 |        "         'huge': 21,\n",
 907 |        "         'talk': 19,\n",
 908 |        "         'go': 95,\n",
 909 |        "         'dont': 191,\n",
 910 |        "         'make': 76,\n",
 911 |        "         'work': 74,\n",
 912 |        "         'kids': 30,\n",
 913 |        "         'cuz': 6,\n",
 914 |        "         'bicycle': 2,\n",
 915 |        "         'accident': 87,\n",
 916 |        "         'split': 3,\n",
 917 |        "         'testicles': 1,\n",
 918 |        "         'impossible': 4,\n",
 919 |        "         'michael': 12,\n",
 920 |        "         'father': 5,\n",
 921 |        "         'i24': 1,\n",
 922 |        "         'w': 45,\n",
 923 |        "         'nashvilletraffic': 1,\n",
 924 |        "         'traffic': 31,\n",
 925 |        "         'moving': 11,\n",
 926 |        "         '8m': 1,\n",
 927 |        "         'slower': 3,\n",
 928 |        "         'usual': 5,\n",
 929 |        "         'center': 23,\n",
 930 |        "         'lane': 8,\n",
 931 |        "         'blocked': 11,\n",
 932 |        "         'santaclara': 1,\n",
 933 |        "         'us101': 2,\n",
 934 |        "         'nb': 4,\n",
 935 |        "         'great': 62,\n",
 936 |        "         'america': 19,\n",
 937 |        "         'pkwy': 2,\n",
 938 |        "         'bayarea': 1,\n",
 939 |        "         'personalinjury': 1,\n",
 940 |        "         'read': 58,\n",
 941 |        "         'advice': 2,\n",
 942 |        "         'solicitor': 2,\n",
 943 |        "         'help': 71,\n",
 944 |        "         'otleyhour': 1,\n",
 945 |        "         'stlouis': 1,\n",
 946 |        "         'caraccidentlawyer': 1,\n",
 947 |        "         'speeding': 1,\n",
 948 |        "         'among': 11,\n",
 949 |        "         'teen': 13,\n",
 950 |        "         'accidents': 8,\n",
 951 |        "         'tee\\x89û': 1,\n",
 952 |        "         'reported': 16,\n",
 953 |        "         'motor': 5,\n",
 954 |        "         'vehicle': 19,\n",
 955 |        "         'curry': 1,\n",
 956 |        "         'herman': 1,\n",
 957 |        "         'rd': 20,\n",
 958 |        "         'stephenson': 1,\n",
 959 |        "         'involving': 13,\n",
 960 |        "         'overturned': 1,\n",
 961 |        "         'please': 70,\n",
 962 |        "         'use': 31,\n",
 963 |        "         'bigrigradio': 1,\n",
 964 |        "         'awareness': 2,\n",
 965 |        "         'i77': 5,\n",
 966 |        "         'mile': 8,\n",
 967 |        "         'marker': 3,\n",
 968 |        "         '31': 10,\n",
 969 |        "         'mooresville': 2,\n",
 970 |        "         'iredell': 2,\n",
 971 |        "         'ramp': 1,\n",
 972 |        "         '86': 4,\n",
 973 |        "         '118': 2,\n",
 974 |        "         'pm': 68,\n",
 975 |        "         'sleepjunkies': 1,\n",
 976 |        "         'sleeping': 10,\n",
 977 |        "         'pills': 1,\n",
 978 |        "         'double': 13,\n",
 979 |        "         'risk': 16,\n",
 980 |        "         'knew': 8,\n",
 981 |        "         'gon': 4,\n",
 982 |        "         'happen': 12,\n",
 983 |        "         'n': 25,\n",
 984 |        "         'cabrillo': 1,\n",
 985 |        "         'hwymagellan': 1,\n",
 986 |        "         'av': 5,\n",
 987 |        "         'mir': 1,\n",
 988 |        "         '080615': 1,\n",
 989 |        "         '110358': 1,\n",
 990 |        "         '40': 37,\n",
 991 |        "         'congestion': 1,\n",
 992 |        "         'pastor': 1,\n",
 993 |        "         'scene': 15,\n",
 994 |        "         'accidentwho': 1,\n",
 995 |        "         'owner': 11,\n",
 996 |        "         'range': 4,\n",
 997 |        "         'rover': 2,\n",
 998 |        "         'mom': 21,\n",
 999 |        "         'didnt': 31,\n",
1000 |        "         'wished': 1,\n",
1001 |        "         'spilt': 1,\n",
1002 |        "         'mayonnaise': 1,\n",
1003 |        "         'horrible': 35,\n",
1004 |        "         'past': 35,\n",
1005 |        "         'sunday': 10,\n",
1006 |        "         'finally': 23,\n",
1007 |        "         'able': 10,\n",
1008 |        "         'thank': 32,\n",
1009 |        "         'god': 57,\n",
1010 |        "         'pissed': 2,\n",
1011 |        "         'donnie': 1,\n",
1012 |        "         'tell': 25,\n",
1013 |        "         'another': 69,\n",
1014 |        "         'truckcrash': 1,\n",
1015 |        "         'overturns': 1,\n",
1016 |        "         'fortworth': 1,\n",
1017 |        "         'interstate': 2,\n",
1018 |        "         'click': 6,\n",
1019 |        "         'youve': 7,\n",
1020 |        "         'crashgt': 1,\n",
1021 |        "         'ashville': 1,\n",
1022 |        "         '23': 4,\n",
1023 |        "         'sb': 3,\n",
1024 |        "         'sr': 2,\n",
1025 |        "         '752': 1,\n",
1026 |        "         'carolina': 1,\n",
1027 |        "         'motorcyclist': 9,\n",
1028 |        "         'dies': 14,\n",
1029 |        "         'i540': 1,\n",
1030 |        "         'crossed': 6,\n",
1031 |        "         'median': 1,\n",
1032 |        "         'motorcycle': 5,\n",
1033 |        "         'rider': 5,\n",
1034 |        "         'traveling': 2,\n",
1035 |        "         'fyi': 3,\n",
1036 |        "         'cadfyi': 2,\n",
1037 |        "         'property': 18,\n",
1038 |        "         'damagenhs999': 1,\n",
1039 |        "         'piner': 2,\n",
1040 |        "         'rdhorndale': 2,\n",
1041 |        "         'dr': 12,\n",
1042 |        "         'naayf': 1,\n",
1043 |        "         'turning': 3,\n",
1044 |        "         'onto': 8,\n",
1045 |        "         'chandanee': 1,\n",
1046 |        "         'magu': 1,\n",
1047 |        "         'mma': 6,\n",
1048 |        "         'taxi': 1,\n",
1049 |        "         'rammed': 2,\n",
1050 |        "         'halfway': 1,\n",
1051 |        "         'turned': 16,\n",
1052 |        "         'everyone': 53,\n",
1053 |        "         'conf\\x89û': 1,\n",
1054 |        "         'left': 32,\n",
1055 |        "         'manchester': 9,\n",
1056 |        "         '293': 1,\n",
1057 |        "         'eddy': 1,\n",
1058 |        "         'stop': 49,\n",
1059 |        "         'back': 119,\n",
1060 |        "         'nh3a': 1,\n",
1061 |        "         'delay': 3,\n",
1062 |        "         '4': 66,\n",
1063 |        "         'mins': 3,\n",
1064 |        "         'damagewpd1600': 1,\n",
1065 |        "         '17th': 1,\n",
1066 |        "         '862015209': 1,\n",
1067 |        "         'injury': 37,\n",
1068 |        "         '2781': 1,\n",
1069 |        "         'willis': 1,\n",
1070 |        "         'foreman': 1,\n",
1071 |        "         'aashiqui': 1,\n",
1072 |        "         'actress': 2,\n",
1073 |        "         'anu': 1,\n",
1074 |        "         'aggarwal': 1,\n",
1075 |        "         'nearfatal': 1,\n",
1076 |        "         'suffield': 1,\n",
1077 |        "         'alberta': 4,\n",
1078 |        "         '9': 31,\n",
1079 |        "         'backup': 2,\n",
1080 |        "         'southaccident': 1,\n",
1081 |        "         'blocking': 5,\n",
1082 |        "         'right': 68,\n",
1083 |        "         '2': 159,\n",
1084 |        "         'lanes': 5,\n",
1085 |        "         'exit': 8,\n",
1086 |        "         'langtree': 1,\n",
1087 |        "         'rdconsider': 1,\n",
1088 |        "         'nc': 10,\n",
1089 |        "         '115': 1,\n",
1090 |        "         '150': 3,\n",
1091 |        "         '16': 15,\n",
1092 |        "         'alternate': 1,\n",
1093 |        "         'changed': 4,\n",
1094 |        "         'determine': 1,\n",
1095 |        "         'options': 4,\n",
1096 |        "         'financially': 1,\n",
1097 |        "         'support': 24,\n",
1098 |        "         'plans': 26,\n",
1099 |        "         'ongoing': 1,\n",
1100 |        "         'treatment': 5,\n",
1101 |        "         'deadly': 9,\n",
1102 |        "         'happened': 19,\n",
1103 |        "         'hagerstown': 1,\n",
1104 |        "         'today': 87,\n",
1105 |        "         'ill': 38,\n",
1106 |        "         'details': 6,\n",
1107 |        "         '5': 64,\n",
1108 |        "         'your4state': 1,\n",
1109 |        "         'whag': 1,\n",
1110 |        "         'flowri': 1,\n",
1111 |        "         'marinading': 1,\n",
1112 |        "         'even': 72,\n",
1113 |        "         'fucking': 46,\n",
1114 |        "         'mfs': 3,\n",
1115 |        "         'drive': 18,\n",
1116 |        "         'norwaymfa': 1,\n",
1117 |        "         'bahrain': 1,\n",
1118 |        "         'previously': 4,\n",
1119 |        "         'road': 38,\n",
1120 |        "         'killed': 96,\n",
1121 |        "         'explosion': 41,\n",
1122 |        "         'still': 129,\n",
1123 |        "         'heard': 35,\n",
1124 |        "         'leaders': 4,\n",
1125 |        "         'kenya': 4,\n",
1126 |        "         'forward': 4,\n",
1127 |        "         'comment': 13,\n",
1128 |        "         'issue': 10,\n",
1129 |        "         'disciplinary': 1,\n",
1130 |        "         'measuresarrestpastornganga': 1,\n",
1131 |        "         'aftershockdelo': 2,\n",
1132 |        "         'scuf': 2,\n",
1133 |        "         'ps': 5,\n",
1134 |        "         'game': 37,\n",
1135 |        "         'cya': 1,\n",
1136 |        "         'effort': 6,\n",
1137 |        "         'gets': 26,\n",
1138 |        "         'painful': 1,\n",
1139 |        "         'win': 20,\n",
1140 |        "         'roger': 2,\n",
1141 |        "         'bannister': 1,\n",
1142 |        "         '320': 7,\n",
1143 |        "         'ir': 7,\n",
1144 |        "         'icemoon': 7,\n",
1145 |        "         'aftershock': 19,\n",
1146 |        "         'djicemoon': 7,\n",
1147 |        "         'dubstep': 8,\n",
1148 |        "         'trapmusic': 7,\n",
1149 |        "         'dnb': 8,\n",
1150 |        "         'dance': 14,\n",
1151 |        "         'ices\\x89û': 7,\n",
1152 |        "         'victory': 5,\n",
1153 |        "         'bargain': 5,\n",
1154 |        "         'basement': 3,\n",
1155 |        "         'prices': 5,\n",
1156 |        "         'dwight': 1,\n",
1157 |        "         'david': 8,\n",
1158 |        "         'eisenhower': 1,\n",
1159 |        "         'nobody': 5,\n",
1160 |        "         'remembers': 1,\n",
1161 |        "         'came': 36,\n",
1162 |        "         'charles': 4,\n",
1163 |        "         'schulz': 1,\n",
1164 |        "         'speaking': 4,\n",
1165 |        "         'someone': 40,\n",
1166 |        "         'using': 13,\n",
1167 |        "         'xb1': 2,\n",
1168 |        "         'also': 44,\n",
1169 |        "         'harder': 1,\n",
1170 |        "         'conflict': 5,\n",
1171 |        "         'glorious': 3,\n",
1172 |        "         'triumph': 1,\n",
1173 |        "         'thomas': 7,\n",
1174 |        "         'paine': 1,\n",
1175 |        "         'growingupspoiled': 1,\n",
1176 |        "         'going': 103,\n",
1177 |        "         'clay': 1,\n",
1178 |        "         'pigeon': 1,\n",
1179 |        "         'shooting': 29,\n",
1180 |        "         'guess': 15,\n",
1181 |        "         'one': 193,\n",
1182 |        "         'actually': 28,\n",
1183 |        "         'wants': 15,\n",
1184 |        "         'free': 42,\n",
1185 |        "         'tc': 2,\n",
1186 |        "         'terrifying': 3,\n",
1187 |        "         'best': 73,\n",
1188 |        "         'roller': 3,\n",
1189 |        "         'coaster': 2,\n",
1190 |        "         'ever': 54,\n",
1191 |        "         'disclaimer': 1,\n",
1192 |        "         'kjfordays': 1,\n",
1193 |        "         'seeing': 16,\n",
1194 |        "         'issues': 30,\n",
1195 |        "         'wisdomwed': 1,\n",
1196 |        "         'bonus': 1,\n",
1197 |        "         'minute': 30,\n",
1198 |        "         'daily': 21,\n",
1199 |        "         'habits': 2,\n",
1200 |        "         'could': 81,\n",
1201 |        "         'really': 71,\n",
1202 |        "         'improve': 2,\n",
1203 |        "         'many': 84,\n",
1204 |        "         'already': 29,\n",
1205 |        "         'lifehacks': 1,\n",
1206 |        "         'protect': 7,\n",
1207 |        "         'profit': 4,\n",
1208 |        "         'global': 18,\n",
1209 |        "         'financial': 13,\n",
1210 |        "         'meltdown': 32,\n",
1211 |        "         'wiedemer': 1,\n",
1212 |        "         'http': 1,\n",
1213 |        "         'moment': 17,\n",
1214 |        "         'scary': 6,\n",
1215 |        "         'guy': 19,\n",
1216 |        "         'behind': 17,\n",
1217 |        "         'screaming': 43,\n",
1218 |        "         'bloody': 44,\n",
1219 |        "         'murder': 42,\n",
1220 |        "         'silverwood': 1,\n",
1221 |        "         '\\x89ã¢': 1,\n",
1222 |        "         '2010': 3,\n",
1223 |        "         'full\\x89ã¢': 1,\n",
1224 |        "         'streaming': 2,\n",
1225 |        "         'youtube': 98,\n",
1226 |        "         'gtgt': 13,\n",
1227 |        "         '15': 33,\n",
1228 |        "         'book': 23,\n",
1229 |        "         'esquireattire': 1,\n",
1230 |        "         'sometimes': 13,\n",
1231 |        "         'face': 46,\n",
1232 |        "         'difficulties': 1,\n",
1233 |        "         'wrong': 21,\n",
1234 |        "         'joel': 3,\n",
1235 |        "         'osteen': 1,\n",
1236 |        "         'thing': 36,\n",
1237 |        "         'stands': 4,\n",
1238 |        "         'dream': 9,\n",
1239 |        "         'belief': 2,\n",
1240 |        "         'possible': 32,\n",
1241 |        "         'brown': 26,\n",
1242 |        "         'praise': 1,\n",
1243 |        "         'ministry': 1,\n",
1244 |        "         'tells': 5,\n",
1245 |        "         'wdyouth': 1,\n",
1246 |        "         'biblestudy': 1,\n",
1247 |        "         'remembering': 6,\n",
1248 |        "         'die': 24,\n",
1249 |        "         'way': 77,\n",
1250 |        "         'avoid': 10,\n",
1251 |        "         'trap': 5,\n",
1252 |        "         'thinking': 16,\n",
1253 |        "         'lose': 10,\n",
1254 |        "         'jobs': 21,\n",
1255 |        "         'tried': 15,\n",
1256 |        "         'orange': 4,\n",
1257 |        "         'never': 61,\n",
1258 |        "         'onfireanders': 1,\n",
1259 |        "         'bb': 1,\n",
1260 |        "         'kick': 9,\n",
1261 |        "         'want': 80,\n",
1262 |        "         'making': 29,\n",
1263 |        "         'say': 85,\n",
1264 |        "         'cannot': 11,\n",
1265 |        "         'done': 29,\n",
1266 |        "         'interrupt': 1,\n",
1267 |        "         'george': 6,\n",
1268 |        "         'bernard': 1,\n",
1269 |        "         'shaw': 2,\n",
1270 |        "         'oyster': 1,\n",
1271 |        "         'shell': 4,\n",
1272 |        "         'andrew': 2,\n",
1273 |        "         'carnegie': 1,\n",
1274 |        "         'anyone': 24,\n",
1275 |        "         'need': 72,\n",
1276 |        "         'pu': 1,\n",
1277 |        "         'play': 26,\n",
1278 |        "         'hybrid': 3,\n",
1279 |        "         'slayer': 3,\n",
1280 |        "         'ps4': 2,\n",
1281 |        "         'eu': 3,\n",
1282 |        "         'hmu': 2,\n",
1283 |        "         'cod8sandscrims': 1,\n",
1284 |        "         'empirikgaming': 1,\n",
1285 |        "         'codawscrims': 1,\n",
1286 |        "         '4tpkotc': 1,\n",
1287 |        "         '4tpfa': 1,\n",
1288 |        "         'aftershockorg': 1,\n",
1289 |        "         'experts': 18,\n",
1290 |        "         'france': 16,\n",
1291 |        "         'begin': 12,\n",
1292 |        "         'examining': 10,\n",
1293 |        "         'airplane': 35,\n",
1294 |        "         'debris': 50,\n",
1295 |        "         'found': 52,\n",
1296 |        "         'reunion': 33,\n",
1297 |        "         'island': 38,\n",
1298 |        "         'french': 10,\n",
1299 |        "         'air': 41,\n",
1300 |        "         'news': 193,\n",
1301 |        "         'strict': 2,\n",
1302 |        "         'liability': 2,\n",
1303 |        "         'context': 3,\n",
1304 |        "         'pilot': 8,\n",
1305 |        "         'error': 5,\n",
1306 |        "         'common': 10,\n",
1307 |        "         'component': 1,\n",
1308 |        "         'aviation': 2,\n",
1309 |        "         'cr': 4,\n",
1310 |        "         'crobscarla': 1,\n",
1311 |        "         'lifetime': 3,\n",
1312 |        "         'odds': 3,\n",
1313 |        "         'dying': 9,\n",
1314 |        "         '1': 63,\n",
1315 |        "         '8015': 1,\n",
1316 |        "         'wedn': 2,\n",
1317 |        "         'alexalltimelow': 1,\n",
1318 |        "         'awwww': 2,\n",
1319 |        "         'theyre': 23,\n",
1320 |        "         'cuties': 1,\n",
1321 |        "         'good': 87,\n",
1322 |        "         'job': 25,\n",
1323 |        "         'family': 44,\n",
1324 |        "         'members': 15,\n",
1325 |        "         'osama': 1,\n",
1326 |        "         'bin': 10,\n",
1327 |        "         'laden': 9,\n",
1328 |        "         'ironic': 2,\n",
1329 |        "         'mhmmm': 1,\n",
1330 |        "         'gov': 9,\n",
1331 |        "         'suspect': 36,\n",
1332 |        "         'goes': 31,\n",
1333 |        "         'engine': 4,\n",
1334 |        "         'via': 220,\n",
1335 |        "         'wings': 6,\n",
1336 |        "         '29072015': 4,\n",
1337 |        "         'cessna': 1,\n",
1338 |        "         'ocampo': 1,\n",
1339 |        "         'coahuila': 2,\n",
1340 |        "         'mexico': 7,\n",
1341 |        "         'july': 7,\n",
1342 |        "         '29': 5,\n",
1343 |        "         '2015': 61,\n",
1344 |        "         'four': 17,\n",
1345 |        "         'men': 27,\n",
1346 |        "         'including': 8,\n",
1347 |        "         'state': 49,\n",
1348 |        "         'government': 31,\n",
1349 |        "         'official': 20,\n",
1350 |        "         'watchthevideo': 1,\n",
1351 |        "         'wednesday\\x89û': 1,\n",
1352 |        "         'wednesday': 14,\n",
1353 |        "         'began': 9,\n",
1354 |        "         'kca': 4,\n",
1355 |        "         'votejkt48id': 4,\n",
1356 |        "         'mbataweel': 1,\n",
1357 |        "         'rip': 9,\n",
1358 |        "         'binladen': 1,\n",
1359 |        "         'airplanes': 1,\n",
1360 |        "         'almost': 25,\n",
1361 |        "         'coworker': 2,\n",
1362 |        "         'nudes': 1,\n",
1363 |        "         'mode': 16,\n",
1364 |        "         'mickinyman': 1,\n",
1365 |        "         'theatlantic': 1,\n",
1366 |        "         'might': 23,\n",
1367 |        "         'wreck': 61,\n",
1368 |        "         'politics': 5,\n",
1369 |        "         'mlb': 5,\n",
1370 |        "         'unbelievably': 1,\n",
1371 |        "         'insane': 6,\n",
1372 |        "         'airport': 30,\n",
1373 |        "         'aircraft': 25,\n",
1374 |        "         'aeroplane': 1,\n",
1375 |        "         'runway': 7,\n",
1376 |        "         'freaky\\x89û': 1,\n",
1377 |        "         'airplaneåê29072015': 1,\n",
1378 |        "         'usama': 1,\n",
1379 |        "         'ladins': 1,\n",
1380 |        "         'naturally': 1,\n",
1381 |        "         'plane': 33,\n",
1382 |        "         'festival': 6,\n",
1383 |        "         'death': 71,\n",
1384 |        "         'carfest': 1,\n",
1385 |        "         'dtn': 4,\n",
1386 |        "         'brazil': 7,\n",
1387 |        "         'exp': 6,\n",
1388 |        "         '\\x89ûïairplane\\x89û\\x9d': 1,\n",
1389 |        "         'wtf': 12,\n",
1390 |        "         'can\\x89ûªt': 6,\n",
1391 |        "         'believe': 28,\n",
1392 |        "         'eyes': 23,\n",
1393 |        "         'nicole': 1,\n",
1394 |        "         'fletcher': 1,\n",
1395 |        "         'victim': 12,\n",
1396 |        "         'crashed': 36,\n",
1397 |        "         'times': 48,\n",
1398 |        "         'ago': 26,\n",
1399 |        "         'little': 48,\n",
1400 |        "         'bit': 12,\n",
1401 |        "         'trauma': 39,\n",
1402 |        "         'although': 3,\n",
1403 |        "         'shes': 22,\n",
1404 |        "         'omg': 21,\n",
1405 |        "         'bro': 8,\n",
1406 |        "         'jetengine': 1,\n",
1407 |        "         'turbojet': 1,\n",
1408 |        "         'boing': 1,\n",
1409 |        "         'g90': 1,\n",
1410 |        "         'phone': 37,\n",
1411 |        "         'looks': 46,\n",
1412 |        "         'ship': 21,\n",
1413 |        "         'terrible': 7,\n",
1414 |        "         'statistically': 1,\n",
1415 |        "         'cop': 8,\n",
1416 |        "         'crashes': 5,\n",
1417 |        "         'house': 43,\n",
1418 |        "         'colombia': 1,\n",
1419 |        "         '12': 24,\n",
1420 |        "         'drone': 6,\n",
1421 |        "         'cause': 48,\n",
1422 |        "         'pilots': 6,\n",
1423 |        "         'worried': 5,\n",
1424 |        "         'drones': 5,\n",
1425 |        "         'esp': 3,\n",
1426 |        "         'close': 11,\n",
1427 |        "         'vicinity': 2,\n",
1428 |        "         'airports': 1,\n",
1429 |        "         'early': 16,\n",
1430 |        "         'wake': 28,\n",
1431 |        "         'call': 37,\n",
1432 |        "         'sister': 7,\n",
1433 |        "         'begging': 2,\n",
1434 |        "         'come': 56,\n",
1435 |        "         'ride': 4,\n",
1436 |        "         'wher': 1,\n",
1437 |        "         'ambulance': 39,\n",
1438 |        "         'hospital': 10,\n",
1439 |        "         'rodkiai': 1,\n",
1440 |        "         'twelve': 14,\n",
1441 |        "         'feared': 22,\n",
1442 |        "         'pakistani': 16,\n",
1443 |        "         'helicopter': 25,\n",
1444 |        "         'ambulances': 2,\n",
1445 |        "         'serious': 13,\n",
1446 |        "         'lorry': 1,\n",
1447 |        "         'emsne\\x89û': 1,\n",
1448 |        "         'reuters': 24,\n",
1449 |        "         'yugvani': 3,\n",
1450 |        "         'leading': 9,\n",
1451 |        "         'services': 44,\n",
1452 |        "         'boss': 5,\n",
1453 |        "         'welcomes': 1,\n",
1454 |        "         'charity': 8,\n",
1455 |        "         'travelling': 2,\n",
1456 |        "         'aberystwythshrewsbury': 1,\n",
1457 |        "         'incident': 10,\n",
1458 |        "         'halt': 2,\n",
1459 |        "         'shrews': 1,\n",
1460 |        "         'sprinter': 4,\n",
1461 |        "         'automatic': 7,\n",
1462 |        "         'frontline': 4,\n",
1463 |        "         'choice': 7,\n",
1464 |        "         '14': 10,\n",
1465 |        "         'lez': 4,\n",
1466 |        "         'compliant': 4,\n",
1467 |        "         'ebay': 33,\n",
1468 |        "         'nanotech': 1,\n",
1469 |        "         'device': 3,\n",
1470 |        "         'target': 9,\n",
1471 |        "         'destroy': 41,\n",
1472 |        "         'blood': 42,\n",
1473 |        "         'clots': 1,\n",
1474 |        "         '20skyhawkmm20': 1,\n",
1475 |        "         'traplord29': 1,\n",
1476 |        "         'fredosantana300': 1,\n",
1477 |        "         'lilreese300': 1,\n",
1478 |        "         'hella': 2,\n",
1479 |        "         'crazy': 21,\n",
1480 |        "         'fights': 5,\n",
1481 |        "         'couple': 19,\n",
1482 |        "         'mosh': 1,\n",
1483 |        "         'pits': 2,\n",
1484 |        "         'run': 41,\n",
1485 |        "         'lucky': 11,\n",
1486 |        "         'justsaying': 1,\n",
1487 |        "         'randomthought': 1,\n",
1488 |        "         'tilnow': 3,\n",
1489 |        "         'dna': 1,\n",
1490 |        "         'tanslash': 1,\n",
1491 |        "         'waiting': 10,\n",
1492 |        "         'fouseytube': 2,\n",
1493 |        "         'ok': 36,\n",
1494 |        "         'hahahah': 2,\n",
1495 |        "         'pakistan': 21,\n",
1496 |        "         'kills': 35,\n",
1497 |        "         'nine': 6,\n",
1498 |        "         'thenissonian': 1,\n",
1499 |        "         'rejectdcartoons': 1,\n",
1500 |        "         'nissan': 1,\n",
1501 |        "         'medical': 6,\n",
1502 |        "         'assistance': 4,\n",
1503 |        "         'ems1': 1,\n",
1504 |        "         'ny': 8,\n",
1505 |        "         'emts': 1,\n",
1506 |        "         'petition': 12,\n",
1507 |        "         '17': 13,\n",
1508 |        "         'per': 3,\n",
1509 |        "         'hour': 13,\n",
1510 |        "         '\\x89û÷minimum': 1,\n",
1511 |        "         'wage\\x89ûª': 1,\n",
1512 |        "         'ems': 6,\n",
1513 |        "         'paramedics': 1,\n",
1514 |        "         'kiwikaryn': 1,\n",
1515 |        "         'parking': 4,\n",
1516 |        "         'lot': 33,\n",
1517 |        "         'said': 57,\n",
1518 |        "         'johns': 2,\n",
1519 |        "         'ltlt': 2,\n",
1520 |        "         '\\x89ûïleoblakecarter': 1,\n",
1521 |        "         'dog': 17,\n",
1522 |        "         'thinks': 3,\n",
1523 |        "         'hes': 44,\n",
1524 |        "         'hatzolah': 1,\n",
1525 |        "         'responding': 2,\n",
1526 |        "         'dual': 3,\n",
1527 |        "         'sirens': 29,\n",
1528 |        "         'and\\x89û': 2,\n",
1529 |        "         'worldnews': 9,\n",
1530 |        "         'number': 14,\n",
1531 |        "         'lesotho': 1,\n",
1532 |        "         'body': 124,\n",
1533 |        "         'medic914': 1,\n",
1534 |        "         'aaceorg': 1,\n",
1535 |        "         'surprised': 5,\n",
1536 |        "         'standardised': 1,\n",
1537 |        "         'clinical': 1,\n",
1538 |        "         'practice': 8,\n",
1539 |        "         '11': 21,\n",
1540 |        "         'nhs': 2,\n",
1541 |        "         'trust': 18,\n",
1542 |        "         'jwalk': 1,\n",
1543 |        "         'passing': 7,\n",
1544 |        "         'hate': 21,\n",
1545 |        "         'episode': 12,\n",
1546 |        "         'trunks': 1,\n",
1547 |        "         'annihilated': 31,\n",
1548 |        "         'freiza': 1,\n",
1549 |        "         'cleanest': 1,\n",
1550 |        "         'showed': 3,\n",
1551 |        "         'nigga': 13,\n",
1552 |        "         'mercy': 4,\n",
1553 |        "         'shall': 10,\n",
1554 |        "         'petebests': 1,\n",
1555 |        "         'dessicated': 1,\n",
1556 |        "         'laid': 3,\n",
1557 |        "         'bare': 4,\n",
1558 |        "         'kneel': 1,\n",
1559 |        "         ...})"
1560 |       ]
1561 |      },
1562 |      "execution_count": 19,
1563 |      "metadata": {},
1564 |      "output_type": "execute_result"
1565 |     }
1566 |    ],
1567 |    "source": [
1568 |     "counter"
1569 |    ]
1570 |   },
1571 |   {
1572 |    "cell_type": "code",
1573 |    "execution_count": 20,
1574 |    "metadata": {},
1575 |    "outputs": [
1576 |     {
1577 |      "data": {
1578 |       "text/plain": [
1579 |        "[('like', 345), ('im', 299), ('amp', 298), ('fire', 250), ('get', 229)]"
1580 |       ]
1581 |      },
1582 |      "execution_count": 20,
1583 |      "metadata": {},
1584 |      "output_type": "execute_result"
1585 |     }
1586 |    ],
1587 |    "source": [
1588 |     "counter.most_common(5)"
1589 |    ]
1590 |   },
1591 |   {
1592 |    "cell_type": "code",
1593 |    "execution_count": 22,
1594 |    "metadata": {},
1595 |    "outputs": [],
1596 |    "source": [
1597 |     "num_unique_words = len(counter)"
1598 |    ]
1599 |   },
1600 |   {
1601 |    "cell_type": "code",
1602 |    "execution_count": 23,
1603 |    "metadata": {},
1604 |    "outputs": [],
1605 |    "source": [
1606 |     "# Split dataset into training and validation set\n",
1607 |     "train_size = int(df.shape[0] * 0.8)\n",
1608 |     "\n",
1609 |     "train_df = df[:train_size]\n",
1610 |     "val_df = df[train_size:]\n",
1611 |     "\n",
1612 |     "# split text and labels\n",
1613 |     "train_sentences = train_df.text.to_numpy()\n",
1614 |     "train_labels = train_df.target.to_numpy()\n",
1615 |     "val_sentences = val_df.text.to_numpy()\n",
1616 |     "val_labels = val_df.target.to_numpy()"
1617 |    ]
1618 |   },
1619 |   {
1620 |    "cell_type": "code",
1621 |    "execution_count": 24,
1622 |    "metadata": {},
1623 |    "outputs": [
1624 |     {
1625 |      "data": {
1626 |       "text/plain": [
1627 |        "((6090,), (1523,))"
1628 |       ]
1629 |      },
1630 |      "execution_count": 24,
1631 |      "metadata": {},
1632 |      "output_type": "execute_result"
1633 |     }
1634 |    ],
1635 |    "source": [
1636 |     "train_sentences.shape, val_sentences.shape"
1637 |    ]
1638 |   },
1639 |   {
1640 |    "cell_type": "code",
1641 |    "execution_count": 25,
1642 |    "metadata": {},
1643 |    "outputs": [],
1644 |    "source": [
1645 |     "# Tokenize\n",
1646 |     "from tensorflow.keras.preprocessing.text import Tokenizer\n",
1647 |     "\n",
1648 |     "# vectorize a text corpus by turning each text into a sequence of integers\n",
1649 |     "tokenizer = Tokenizer(num_words=num_unique_words)\n",
1650 |     "tokenizer.fit_on_texts(train_sentences) # fit only to training"
1651 |    ]
1652 |   },
1653 |   {
1654 |    "cell_type": "code",
1655 |    "execution_count": 26,
1656 |    "metadata": {},
1657 |    "outputs": [],
1658 |    "source": [
1659 |     "# each word has unique index\n",
1660 |     "word_index = tokenizer.word_index"
1661 |    ]
1662 |   },
1663 |   {
1664 |    "cell_type": "code",
1665 |    "execution_count": 27,
1666 |    "metadata": {},
1667 |    "outputs": [
1668 |     {
1669 |      "data": {
1670 |       "text/plain": [
1671 |        "{'like': 1,\n",
1672 |        " 'amp': 2,\n",
1673 |        " 'fire': 3,\n",
1674 |        " 'im': 4,\n",
1675 |        " 'get': 5,\n",
1676 |        " 'via': 6,\n",
1677 |        " 'new': 7,\n",
1678 |        " 'people': 8,\n",
1679 |        " 'news': 9,\n",
1680 |        " 'dont': 10,\n",
1681 |        " 'emergency': 11,\n",
1682 |        " 'one': 12,\n",
1683 |        " '2': 13,\n",
1684 |        " 'us': 14,\n",
1685 |        " 'video': 15,\n",
1686 |        " 'disaster': 16,\n",
1687 |        " 'burning': 17,\n",
1688 |        " 'body': 18,\n",
1689 |        " 'would': 19,\n",
1690 |        " 'buildings': 20,\n",
1691 |        " 'police': 21,\n",
1692 |        " 'crash': 22,\n",
1693 |        " 'first': 23,\n",
1694 |        " 'california': 24,\n",
1695 |        " 'still': 25,\n",
1696 |        " 'man': 26,\n",
1697 |        " 'got': 27,\n",
1698 |        " 'know': 28,\n",
1699 |        " 'day': 29,\n",
1700 |        " 'back': 30,\n",
1701 |        " 'going': 31,\n",
1702 |        " 'two': 32,\n",
1703 |        " 'time': 33,\n",
1704 |        " 'full': 34,\n",
1705 |        " 'accident': 35,\n",
1706 |        " 'see': 36,\n",
1707 |        " 'world': 37,\n",
1708 |        " 'attack': 38,\n",
1709 |        " 'nuclear': 39,\n",
1710 |        " 'youtube': 40,\n",
1711 |        " 'may': 41,\n",
1712 |        " 'love': 42,\n",
1713 |        " 'go': 43,\n",
1714 |        " 'rt': 44,\n",
1715 |        " 'many': 45,\n",
1716 |        " 'cant': 46,\n",
1717 |        " '3': 47,\n",
1718 |        " 'watch': 48,\n",
1719 |        " 'collapse': 49,\n",
1720 |        " 'dead': 50,\n",
1721 |        " 'today': 51,\n",
1722 |        " 'car': 52,\n",
1723 |        " 'mass': 53,\n",
1724 |        " 'want': 54,\n",
1725 |        " 'years': 55,\n",
1726 |        " 'work': 56,\n",
1727 |        " 'train': 57,\n",
1728 |        " 'last': 58,\n",
1729 |        " 'good': 59,\n",
1730 |        " 'think': 60,\n",
1731 |        " 'families': 61,\n",
1732 |        " 'hiroshima': 62,\n",
1733 |        " 'life': 63,\n",
1734 |        " 'fires': 64,\n",
1735 |        " 'best': 65,\n",
1736 |        " 'could': 66,\n",
1737 |        " 'say': 67,\n",
1738 |        " 'u': 68,\n",
1739 |        " 'death': 69,\n",
1740 |        " 'hot': 70,\n",
1741 |        " 'forest': 71,\n",
1742 |        " 'way': 72,\n",
1743 |        " 'killed': 73,\n",
1744 |        " 'need': 74,\n",
1745 |        " 'legionnaires': 75,\n",
1746 |        " 'war': 76,\n",
1747 |        " 'fatal': 77,\n",
1748 |        " 'home': 78,\n",
1749 |        " 'black': 79,\n",
1750 |        " '4': 80,\n",
1751 |        " 'really': 81,\n",
1752 |        " 'take': 82,\n",
1753 |        " 'another': 83,\n",
1754 |        " 'right': 84,\n",
1755 |        " 'even': 85,\n",
1756 |        " 'flames': 86,\n",
1757 |        " 'make': 87,\n",
1758 |        " 'fear': 88,\n",
1759 |        " 'bomb': 89,\n",
1760 |        " 'floods': 90,\n",
1761 |        " 'school': 91,\n",
1762 |        " 'youre': 92,\n",
1763 |        " '1': 93,\n",
1764 |        " 'atomic': 94,\n",
1765 |        " 'look': 95,\n",
1766 |        " 'please': 96,\n",
1767 |        " 'let': 97,\n",
1768 |        " 'homes': 98,\n",
1769 |        " 'flood': 99,\n",
1770 |        " 'every': 100,\n",
1771 |        " 'help': 101,\n",
1772 |        " 'latest': 102,\n",
1773 |        " 'year': 103,\n",
1774 |        " 'storm': 104,\n",
1775 |        " 'content': 105,\n",
1776 |        " '5': 106,\n",
1777 |        " 'injured': 107,\n",
1778 |        " 'read': 108,\n",
1779 |        " '2015': 109,\n",
1780 |        " 'oil': 110,\n",
1781 |        " 'damage': 111,\n",
1782 |        " 'truck': 112,\n",
1783 |        " 'city': 113,\n",
1784 |        " 'lol': 114,\n",
1785 |        " 'water': 115,\n",
1786 |        " 'evacuation': 116,\n",
1787 |        " 'flooding': 117,\n",
1788 |        " 'much': 118,\n",
1789 |        " 'debris': 119,\n",
1790 |        " 'army': 120,\n",
1791 |        " 'japan': 121,\n",
1792 |        " 'never': 122,\n",
1793 |        " 'military': 123,\n",
1794 |        " 'northern': 124,\n",
1795 |        " 'ass': 125,\n",
1796 |        " 'cross': 126,\n",
1797 |        " 'top': 127,\n",
1798 |        " 'come': 128,\n",
1799 |        " 'near': 129,\n",
1800 |        " '\\x89û': 130,\n",
1801 |        " 'hit': 131,\n",
1802 |        " 'shit': 132,\n",
1803 |        " 'next': 133,\n",
1804 |        " 'great': 134,\n",
1805 |        " 'food': 135,\n",
1806 |        " 'plan': 136,\n",
1807 |        " 'reddit': 137,\n",
1808 |        " 'getting': 138,\n",
1809 |        " 'set': 139,\n",
1810 |        " 'night': 140,\n",
1811 |        " 'screaming': 141,\n",
1812 |        " 'found': 142,\n",
1813 |        " 'little': 143,\n",
1814 |        " 'services': 144,\n",
1815 |        " 'wildfire': 145,\n",
1816 |        " 'natural': 146,\n",
1817 |        " 'stop': 147,\n",
1818 |        " 'bloody': 148,\n",
1819 |        " 'murder': 149,\n",
1820 |        " 'since': 150,\n",
1821 |        " 'feel': 151,\n",
1822 |        " 'bags': 152,\n",
1823 |        " 'spill': 153,\n",
1824 |        " 'injuries': 154,\n",
1825 |        " 'refugees': 155,\n",
1826 |        " 'heat': 156,\n",
1827 |        " 'always': 157,\n",
1828 |        " 'god': 158,\n",
1829 |        " 'ever': 159,\n",
1830 |        " 'face': 160,\n",
1831 |        " 'times': 161,\n",
1832 |        " 'destroy': 162,\n",
1833 |        " 'said': 163,\n",
1834 |        " 'well': 164,\n",
1835 |        " 'thats': 165,\n",
1836 |        " 'fall': 166,\n",
1837 |        " 'evacuate': 167,\n",
1838 |        " 'derailment': 168,\n",
1839 |        " 'panic': 169,\n",
1840 |        " 'outbreak': 170,\n",
1841 |        " 'check': 171,\n",
1842 |        " 'everyone': 172,\n",
1843 |        " 'explosion': 173,\n",
1844 |        " 'family': 174,\n",
1845 |        " 'attacked': 175,\n",
1846 |        " 'rescue': 176,\n",
1847 |        " 'deaths': 177,\n",
1848 |        " 'fatalities': 178,\n",
1849 |        " 'sinkhole': 179,\n",
1850 |        " 'coming': 180,\n",
1851 |        " 'ambulance': 181,\n",
1852 |        " 'blood': 182,\n",
1853 |        " 'hurricane': 183,\n",
1854 |        " 'bridge': 184,\n",
1855 |        " 'explode': 185,\n",
1856 |        " 'collided': 186,\n",
1857 |        " 'loud': 187,\n",
1858 |        " 'live': 188,\n",
1859 |        " 'gonna': 189,\n",
1860 |        " 'burned': 190,\n",
1861 |        " 'air': 191,\n",
1862 |        " 'weather': 192,\n",
1863 |        " 'destruction': 193,\n",
1864 |        " 'boy': 194,\n",
1865 |        " 'hope': 195,\n",
1866 |        " 'destroyed': 196,\n",
1867 |        " 'bag': 197,\n",
1868 |        " 'ruin': 198,\n",
1869 |        " 'rescued': 199,\n",
1870 |        " 'injury': 200,\n",
1871 |        " 'fucking': 201,\n",
1872 |        " 'free': 202,\n",
1873 |        " 'cause': 203,\n",
1874 |        " 'bad': 204,\n",
1875 |        " 'armageddon': 205,\n",
1876 |        " 'hail': 206,\n",
1877 |        " 'says': 207,\n",
1878 |        " 'wind': 208,\n",
1879 |        " 'rescuers': 209,\n",
1880 |        " 'catastrophe': 210,\n",
1881 |        " 'collapsed': 211,\n",
1882 |        " 'harm': 212,\n",
1883 |        " 'landslide': 213,\n",
1884 |        " 'curfew': 214,\n",
1885 |        " 'deluge': 215,\n",
1886 |        " 'migrants': 216,\n",
1887 |        " 'dust': 217,\n",
1888 |        " 'hostages': 218,\n",
1889 |        " 'mudslide': 219,\n",
1890 |        " 'sandstorm': 220,\n",
1891 |        " 'w': 221,\n",
1892 |        " '70': 222,\n",
1893 |        " 'save': 223,\n",
1894 |        " 'charged': 224,\n",
1895 |        " 'lives': 225,\n",
1896 |        " 'rioting': 226,\n",
1897 |        " 'riot': 227,\n",
1898 |        " 'drought': 228,\n",
1899 |        " 'danger': 229,\n",
1900 |        " 'drowning': 230,\n",
1901 |        " 'engulfed': 231,\n",
1902 |        " 'screamed': 232,\n",
1903 |        " 'massacre': 233,\n",
1904 |        " 'quarantined': 234,\n",
1905 |        " 'earthquake': 235,\n",
1906 |        " 'theres': 236,\n",
1907 |        " 'summer': 237,\n",
1908 |        " '40': 238,\n",
1909 |        " 'also': 239,\n",
1910 |        " 'crashed': 240,\n",
1911 |        " 'house': 241,\n",
1912 |        " 'liked': 242,\n",
1913 |        " 'post': 243,\n",
1914 |        " 'bleeding': 244,\n",
1915 |        " 'bombed': 245,\n",
1916 |        " 'hazard': 246,\n",
1917 |        " 'displaced': 247,\n",
1918 |        " 'service': 248,\n",
1919 |        " 'hazardous': 249,\n",
1920 |        " 'cliff': 250,\n",
1921 |        " 'crush': 251,\n",
1922 |        " 'derail': 252,\n",
1923 |        " 'devastation': 253,\n",
1924 |        " 'inundated': 254,\n",
1925 |        " 'quarantine': 255,\n",
1926 |        " 'ive': 256,\n",
1927 |        " 'end': 257,\n",
1928 |        " 'road': 258,\n",
1929 |        " 'airplane': 259,\n",
1930 |        " 'looks': 260,\n",
1931 |        " 'change': 261,\n",
1932 |        " 'high': 262,\n",
1933 |        " 'real': 263,\n",
1934 |        " 'missing': 264,\n",
1935 |        " 'battle': 265,\n",
1936 |        " 'fedex': 266,\n",
1937 |        " 'keep': 267,\n",
1938 |        " 'report': 268,\n",
1939 |        " 'lightning': 269,\n",
1940 |        " 'blown': 270,\n",
1941 |        " 'casualties': 271,\n",
1942 |        " 'hundreds': 272,\n",
1943 |        " 'desolation': 273,\n",
1944 |        " 'electrocuted': 274,\n",
1945 |        " 'exploded': 275,\n",
1946 |        " 'famine': 276,\n",
1947 |        " 'lava': 277,\n",
1948 |        " 'around': 278,\n",
1949 |        " '\\x89ûò': 279,\n",
1950 |        " 'hes': 280,\n",
1951 |        " 'things': 281,\n",
1952 |        " 'bombing': 282,\n",
1953 |        " 'made': 283,\n",
1954 |        " 'bioterror': 284,\n",
1955 |        " 'hostage': 285,\n",
1956 |        " 'blew': 286,\n",
1957 |        " 'drown': 287,\n",
1958 |        " 'anniversary': 288,\n",
1959 |        " 'chemical': 289,\n",
1960 |        " 'collide': 290,\n",
1961 |        " 'derailed': 291,\n",
1962 |        " 'detonate': 292,\n",
1963 |        " 'flattened': 293,\n",
1964 |        " 'bang': 294,\n",
1965 |        " 'panicking': 295,\n",
1966 |        " 'rain': 296,\n",
1967 |        " 'wave': 297,\n",
1968 |        " 'ill': 298,\n",
1969 |        " 'game': 299,\n",
1970 |        " 'meltdown': 300,\n",
1971 |        " 'iran': 301,\n",
1972 |        " 'without': 302,\n",
1973 |        " 'apocalypse': 303,\n",
1974 |        " 'big': 304,\n",
1975 |        " 'obama': 305,\n",
1976 |        " 'market': 306,\n",
1977 |        " 'boat': 307,\n",
1978 |        " 'mh370': 308,\n",
1979 |        " 'hijacker': 309,\n",
1980 |        " 'hijacking': 310,\n",
1981 |        " 'screams': 311,\n",
1982 |        " 'breaking': 312,\n",
1983 |        " 'head': 313,\n",
1984 |        " 'past': 314,\n",
1985 |        " 'someone': 315,\n",
1986 |        " 'state': 316,\n",
1987 |        " 'run': 317,\n",
1988 |        " 'annihilated': 318,\n",
1989 |        " 'away': 319,\n",
1990 |        " 'girl': 320,\n",
1991 |        " 'white': 321,\n",
1992 |        " 'story': 322,\n",
1993 |        " 'twitter': 323,\n",
1994 |        " 'longer': 324,\n",
1995 |        " 'put': 325,\n",
1996 |        " 'bagging': 326,\n",
1997 |        " 'affected': 327,\n",
1998 |        " 'responders': 328,\n",
1999 |        " 'collision': 329,\n",
2000 |        " 'demolish': 330,\n",
2001 |        " 'detonation': 331,\n",
2002 |        " 'evacuated': 332,\n",
2003 |        " 'obliterated': 333,\n",
2004 |        " 'pandemonium': 334,\n",
2005 |        " 'area': 335,\n",
2006 |        " 'bus': 336,\n",
2007 |        " 'tonight': 337,\n",
2008 |        " 'came': 338,\n",
2009 |        " 'reunion': 339,\n",
2010 |        " 'island': 340,\n",
2011 |        " 'national': 341,\n",
2012 |        " 'show': 342,\n",
2013 |        " 'arson': 343,\n",
2014 |        " 'woman': 344,\n",
2015 |        " 'women': 345,\n",
2016 |        " 'shoulder': 346,\n",
2017 |        " 'send': 347,\n",
2018 |        " 'crushed': 348,\n",
2019 |        " 'blast': 349,\n",
2020 |        " 'demolished': 350,\n",
2021 |        " 'demolition': 351,\n",
2022 |        " 'drowned': 352,\n",
2023 |        " 'fatality': 353,\n",
2024 |        " 'prebreak': 354,\n",
2025 |        " 'obliterate': 355,\n",
2026 |        " 'obliteration': 356,\n",
2027 |        " 'razed': 357,\n",
2028 |        " 'photo': 358,\n",
2029 |        " 'update': 359,\n",
2030 |        " 'week': 360,\n",
2031 |        " 'better': 361,\n",
2032 |        " 'traffic': 362,\n",
2033 |        " 'heard': 363,\n",
2034 |        " 'ebay': 364,\n",
2035 |        " 'went': 365,\n",
2036 |        " 'id': 366,\n",
2037 |        " 'fan': 367,\n",
2038 |        " 'blazing': 368,\n",
2039 |        " 'catastrophic': 369,\n",
2040 |        " 'eyewitness': 370,\n",
2041 |        " 'hellfire': 371,\n",
2042 |        " 'murderer': 372,\n",
2043 |        " 'due': 373,\n",
2044 |        " 'county': 374,\n",
2045 |        " 'ablaze': 375,\n",
2046 |        " 'st': 376,\n",
2047 |        " 'least': 377,\n",
2048 |        " 'pm': 378,\n",
2049 |        " 'horrible': 379,\n",
2050 |        " 'left': 380,\n",
2051 |        " 'possible': 381,\n",
2052 |        " 'call': 382,\n",
2053 |        " 'river': 383,\n",
2054 |        " 'long': 384,\n",
2055 |        " 'fuck': 385,\n",
2056 |        " 'power': 386,\n",
2057 |        " 'light': 387,\n",
2058 |        " 'land': 388,\n",
2059 |        " 'baby': 389,\n",
2060 |        " 'cyclone': 390,\n",
2061 |        " 'electrocute': 391,\n",
2062 |        " 'hijack': 392,\n",
2063 |        " 'sue': 393,\n",
2064 |        " 'rainstorm': 394,\n",
2065 |        " 'died': 395,\n",
2066 |        " 'cool': 396,\n",
2067 |        " 'care': 397,\n",
2068 |        " 'use': 398,\n",
2069 |        " 'goes': 399,\n",
2070 |        " 'government': 400,\n",
2071 |        " 'airport': 401,\n",
2072 |        " 'believe': 402,\n",
2073 |        " 'phone': 403,\n",
2074 |        " 'kill': 404,\n",
2075 |        " 'group': 405,\n",
2076 |        " 'lab': 406,\n",
2077 |        " 'oh': 407,\n",
2078 |        " 'song': 408,\n",
2079 |        " 'policy': 409,\n",
2080 |        " 'stock': 410,\n",
2081 |        " 'far': 411,\n",
2082 |        " 'thank': 412,\n",
2083 |        " 'minute': 413,\n",
2084 |        " 'brown': 414,\n",
2085 |        " 'wake': 415,\n",
2086 |        " 'thought': 416,\n",
2087 |        " 'red': 417,\n",
2088 |        " 'heart': 418,\n",
2089 |        " '8': 419,\n",
2090 |        " 'sound': 420,\n",
2091 |        " 'avalanche': 421,\n",
2092 |        " 'calgary': 422,\n",
2093 |        " 'health': 423,\n",
2094 |        " 're\\x89û': 424,\n",
2095 |        " 'nearby': 425,\n",
2096 |        " 'bush': 426,\n",
2097 |        " 'casualty': 427,\n",
2098 |        " '6': 428,\n",
2099 |        " 'deluged': 429,\n",
2100 |        " 'seismic': 430,\n",
2101 |        " 'reactor': 431,\n",
2102 |        " 'building': 432,\n",
2103 |        " 'whats': 433,\n",
2104 |        " 'used': 434,\n",
2105 |        " 'something': 435,\n",
2106 |        " 'didnt': 436,\n",
2107 |        " '9': 437,\n",
2108 |        " 'plans': 438,\n",
2109 |        " 'shooting': 439,\n",
2110 |        " 'thing': 440,\n",
2111 |        " 'play': 441,\n",
2112 |        " 'must': 442,\n",
2113 |        " 'whole': 443,\n",
2114 |        " 'tomorrow': 444,\n",
2115 |        " 'part': 445,\n",
2116 |        " 'officials': 446,\n",
2117 |        " 'india': 447,\n",
2118 |        " 'music': 448,\n",
2119 |        " 'nothing': 449,\n",
2120 |        " 'blight': 450,\n",
2121 |        " 'ur': 451,\n",
2122 |        " 'rubble': 452,\n",
2123 |        " 'swallowed': 453,\n",
2124 |        " 'thanks': 454,\n",
2125 |        " 'issues': 455,\n",
2126 |        " 'done': 456,\n",
2127 |        " 'bc': 457,\n",
2128 |        " 'soon': 458,\n",
2129 |        " 'start': 459,\n",
2130 |        " 'children': 460,\n",
2131 |        " 'business': 461,\n",
2132 |        " 'yet': 462,\n",
2133 |        " 'sure': 463,\n",
2134 |        " 'bioterrorism': 464,\n",
2135 |        " 'wanna': 465,\n",
2136 |        " 'old': 466,\n",
2137 |        " 'half': 467,\n",
2138 |        " 'bigger': 468,\n",
2139 |        " 'rise': 469,\n",
2140 |        " 'south': 470,\n",
2141 |        " 'tell': 471,\n",
2142 |        " 'support': 472,\n",
2143 |        " 'already': 473,\n",
2144 |        " 'die': 474,\n",
2145 |        " 'men': 475,\n",
2146 |        " 'aircraft': 476,\n",
2147 |        " 'wont': 477,\n",
2148 |        " 'bar': 478,\n",
2149 |        " 'peace': 479,\n",
2150 |        " 'data': 480,\n",
2151 |        " 'pick': 481,\n",
2152 |        " 'amid': 482,\n",
2153 |        " 'anything': 483,\n",
2154 |        " 'stay': 484,\n",
2155 |        " 'media': 485,\n",
2156 |        " 'remember': 486,\n",
2157 |        " 'deal': 487,\n",
2158 |        " '7': 488,\n",
2159 |        " 'transport': 489,\n",
2160 |        " 'doesnt': 490,\n",
2161 |        " 'searching': 491,\n",
2162 |        " 'effect': 492,\n",
2163 |        " 'manslaughter': 493,\n",
2164 |        " 'projected': 494,\n",
2165 |        " 'place': 495,\n",
2166 |        " '30': 496,\n",
2167 |        " 'kids': 497,\n",
2168 |        " 'n': 498,\n",
2169 |        " 'actually': 499,\n",
2170 |        " 'making': 500,\n",
2171 |        " 'job': 501,\n",
2172 |        " 'almost': 502,\n",
2173 |        " 'plane': 503,\n",
2174 |        " 'ago': 504,\n",
2175 |        " 'feared': 505,\n",
2176 |        " 'ok': 506,\n",
2177 |        " 'lot': 507,\n",
2178 |        " 'annihilation': 508,\n",
2179 |        " 'warning': 509,\n",
2180 |        " 'crisis': 510,\n",
2181 |        " 'blaze': 511,\n",
2182 |        " 'yeah': 512,\n",
2183 |        " 'trying': 513,\n",
2184 |        " 'yes': 514,\n",
2185 |        " 'lets': 515,\n",
2186 |        " 'line': 516,\n",
2187 |        " 'probably': 517,\n",
2188 |        " 'bodies': 518,\n",
2189 |        " 'desolate': 519,\n",
2190 |        " 'three': 520,\n",
2191 |        " 'wait': 521,\n",
2192 |        " 'days': 522,\n",
2193 |        " 'flag': 523,\n",
2194 |        " 'leave': 524,\n",
2195 |        " 'north': 525,\n",
2196 |        " 'gets': 526,\n",
2197 |        " 'jobs': 527,\n",
2198 |        " 'helicopter': 528,\n",
2199 |        " 'history': 529,\n",
2200 |        " 'fun': 530,\n",
2201 |        " 'order': 531,\n",
2202 |        " 'august': 532,\n",
2203 |        " 'movie': 533,\n",
2204 |        " 'womens': 534,\n",
2205 |        " 'leather': 535,\n",
2206 |        " 'american': 536,\n",
2207 |        " 'makes': 537,\n",
2208 |        " 'youth': 538,\n",
2209 |        " 'literally': 539,\n",
2210 |        " 'person': 540,\n",
2211 |        " 'guys': 541,\n",
2212 |        " 'photos': 542,\n",
2213 |        " 'saved': 543,\n",
2214 |        " 'waves': 544,\n",
2215 |        " '11yearold': 545,\n",
2216 |        " 'hat': 546,\n",
2217 |        " 'refugio': 547,\n",
2218 |        " 'costlier': 548,\n",
2219 |        " 'offensive': 549,\n",
2220 |        " '20': 550,\n",
2221 |        " 'inside': 551,\n",
2222 |        " 'shot': 552,\n",
2223 |        " 'rd': 553,\n",
2224 |        " '12': 554,\n",
2225 |        " 'everything': 555,\n",
2226 |        " 'ball': 556,\n",
2227 |        " 'case': 557,\n",
2228 |        " 'hell': 558,\n",
2229 |        " 'stand': 559,\n",
2230 |        " 'hours': 560,\n",
2231 |        " 'tv': 561,\n",
2232 |        " 'israeli': 562,\n",
2233 |        " 'beautiful': 563,\n",
2234 |        " 'anthrax': 564,\n",
2235 |        " 'computers': 565,\n",
2236 |        " 'angry': 566,\n",
2237 |        " 'trains': 567,\n",
2238 |        " 'malaysia': 568,\n",
2239 |        " 'abc': 569,\n",
2240 |        " 'banned': 570,\n",
2241 |        " 'ignition': 571,\n",
2242 |        " 'knock': 572,\n",
2243 |        " 'picking': 573,\n",
2244 |        " 'hailstorm': 574,\n",
2245 |        " 'mayhem': 575,\n",
2246 |        " 'la': 576,\n",
2247 |        " 'street': 577,\n",
2248 |        " '\\x89ûó': 578,\n",
2249 |        " 'finally': 579,\n",
2250 |        " 'aftershock': 580,\n",
2251 |        " 'anyone': 581,\n",
2252 |        " 'sorry': 582,\n",
2253 |        " 'maybe': 583,\n",
2254 |        " 'hey': 584,\n",
2255 |        " 'child': 585,\n",
2256 |        " 'totally': 586,\n",
2257 |        " 'toddler': 587,\n",
2258 |        " 'saw': 588,\n",
2259 |        " 'town': 589,\n",
2260 |        " 'fukushima': 590,\n",
2261 |        " '50': 591,\n",
2262 |        " 'mishaps': 592,\n",
2263 |        " 'move': 593,\n",
2264 |        " 'blizzard': 594,\n",
2265 |        " 'money': 595,\n",
2266 |        " 'ladies': 596,\n",
2267 |        " 'appears': 597,\n",
2268 |        " 'russian': 598,\n",
2269 |        " 'giant': 599,\n",
2270 |        " 'centre': 600,\n",
2271 |        " 'village': 601,\n",
2272 |        " '60': 602,\n",
2273 |        " 'houses': 603,\n",
2274 |        " 'crews': 604,\n",
2275 |        " 'caused': 605,\n",
2276 |        " 'course': 606,\n",
2277 |        " 'severe': 607,\n",
2278 |        " 'disea': 608,\n",
2279 |        " 'closed': 609,\n",
2280 |        " 'heavy': 610,\n",
2281 |        " 'second': 611,\n",
2282 |        " 'myanmar': 612,\n",
2283 |        " 'try': 613,\n",
2284 |        " 'arsonist': 614,\n",
2285 |        " 'center': 615,\n",
2286 |        " 'vehicle': 616,\n",
2287 |        " 'guy': 617,\n",
2288 |        " 'might': 618,\n",
2289 |        " 'eyes': 619,\n",
2290 |        " 'shes': 620,\n",
2291 |        " 'omg': 621,\n",
2292 |        " 'couple': 622,\n",
2293 |        " 'vs': 623,\n",
2294 |        " 'self': 624,\n",
2295 |        " 'fight': 625,\n",
2296 |        " 'name': 626,\n",
2297 |        " 'b': 627,\n",
2298 |        " 'class': 628,\n",
2299 |        " 'friend': 629,\n",
2300 |        " 'team': 630,\n",
2301 |        " 'temple': 631,\n",
2302 |        " 'space': 632,\n",
2303 |        " 'germs': 633,\n",
2304 |        " 'words': 634,\n",
2305 |        " '25': 635,\n",
2306 |        " 'find': 636,\n",
2307 |        " 'drake': 637,\n",
2308 |        " 'meek': 638,\n",
2309 |        " 'marks': 639,\n",
2310 |        " 'downtown': 640,\n",
2311 |        " 'memories': 641,\n",
2312 |        " 'devastated': 642,\n",
2313 |        " 'flash': 643,\n",
2314 |        " 'lost': 644,\n",
2315 |        " 'cars': 645,\n",
2316 |        " 'lord': 646,\n",
2317 |        " 'outside': 647,\n",
2318 |        " 'hard': 648,\n",
2319 |        " 'others': 649,\n",
2320 |        " 'front': 650,\n",
2321 |        " 'nowplaying': 651,\n",
2322 |        " 'mom': 652,\n",
2323 |        " 'win': 653,\n",
2324 |        " 'daily': 654,\n",
2325 |        " 'experts': 655,\n",
2326 |        " 'theyre': 656,\n",
2327 |        " 'official': 657,\n",
2328 |        " 'hate': 658,\n",
2329 |        " 'usa': 659,\n",
2330 |        " '10': 660,\n",
2331 |        " 'united': 661,\n",
2332 |        " 'mop': 662,\n",
2333 |        " 'beach': 663,\n",
2334 |        " 'reports': 664,\n",
2335 |        " 'hiring': 665,\n",
2336 |        " 'theater': 666,\n",
2337 |        " 'gun': 667,\n",
2338 |        " 'christian': 668,\n",
2339 |        " 'muslims': 669,\n",
2340 |        " 'listen': 670,\n",
2341 |        " 'star': 671,\n",
2342 |        " 'eye': 672,\n",
2343 |        " 'taken': 673,\n",
2344 |        " 'action': 674,\n",
2345 |        " 'nearly': 675,\n",
2346 |        " 'mad': 676,\n",
2347 |        " 'cake': 677,\n",
2348 |        " 'level': 678,\n",
2349 |        " 'pay': 679,\n",
2350 |        " 'though': 680,\n",
2351 |        " 'link': 681,\n",
2352 |        " 'morning': 682,\n",
2353 |        " 'watching': 683,\n",
2354 |        " 'outrage': 684,\n",
2355 |        " 'gbbo': 685,\n",
2356 |        " 'friends': 686,\n",
2357 |        " 'takes': 687,\n",
2358 |        " 'former': 688,\n",
2359 |        " 'madhya': 689,\n",
2360 |        " 'pradesh': 690,\n",
2361 |        " 'led': 691,\n",
2362 |        " 'gems': 692,\n",
2363 |        " 'funtenna': 693,\n",
2364 |        " 'reddits': 694,\n",
2365 |        " 'subreddits': 695,\n",
2366 |        " 'reason': 696,\n",
2367 |        " 'across': 697,\n",
2368 |        " 'fast': 698,\n",
2369 |        " 'side': 699,\n",
2370 |        " 'west': 700,\n",
2371 |        " 'thousands': 701,\n",
2372 |        " 'r': 702,\n",
2373 |        " 'climate': 703,\n",
2374 |        " 'else': 704,\n",
2375 |        " 'huge': 705,\n",
2376 |        " 'reported': 706,\n",
2377 |        " 'property': 707,\n",
2378 |        " 'happened': 708,\n",
2379 |        " 'moment': 709,\n",
2380 |        " 'book': 710,\n",
2381 |        " 'dog': 711,\n",
2382 |        " 'seen': 712,\n",
2383 |        " 'park': 713,\n",
2384 |        " 'major': 714,\n",
2385 |        " 'disease': 715,\n",
2386 |        " 'hand': 716,\n",
2387 |        " 'govt': 717,\n",
2388 |        " 'till': 718,\n",
2389 |        " 'blue': 719,\n",
2390 |        " 'date': 720,\n",
2391 |        " 'control': 721,\n",
2392 |        " 'caught': 722,\n",
2393 |        " 'east': 723,\n",
2394 |        " 'green': 724,\n",
2395 |        " 'militants': 725,\n",
2396 |        " 'damn': 726,\n",
2397 |        " 'bombs': 727,\n",
2398 |        " 'mount': 728,\n",
2399 |        " 'pamela': 729,\n",
2400 |        " 'needs': 730,\n",
2401 |        " 'playing': 731,\n",
2402 |        " 'running': 732,\n",
2403 |        " 'tweet': 733,\n",
2404 |        " 'low': 734,\n",
2405 |        " 'wow': 735,\n",
2406 |        " 'fashion': 736,\n",
2407 |        " 'pray': 737,\n",
2408 |        " 'camp': 738,\n",
2409 |        " 'hear': 739,\n",
2410 |        " 'chance': 740,\n",
2411 |        " 'patience': 741,\n",
2412 |        " 'lamp': 742,\n",
2413 |        " 'bayelsa': 743,\n",
2414 |        " 'parole': 744,\n",
2415 |        " 'unconfirmed': 745,\n",
2416 |        " 'neighbours': 746,\n",
2417 |        " 'chinas': 747,\n",
2418 |        " 'colorado': 748,\n",
2419 |        " 'haha': 749,\n",
2420 |        " 'bring': 750,\n",
2421 |        " 'awesome': 751,\n",
2422 |        " 'wanted': 752,\n",
2423 |        " 'happy': 753,\n",
2424 |        " 'america': 754,\n",
2425 |        " 'drive': 755,\n",
2426 |        " 'france': 756,\n",
2427 |        " 'pakistani': 757,\n",
2428 |        " 'trust': 758,\n",
2429 |        " 'israel': 759,\n",
2430 |        " 'potus': 760,\n",
2431 |        " 'sign': 761,\n",
2432 |        " 'following': 762,\n",
2433 |        " 'poor': 763,\n",
2434 |        " 'film': 764,\n",
2435 |        " 'comes': 765,\n",
2436 |        " 'escape': 766,\n",
2437 |        " 'gt': 767,\n",
2438 |        " 'russia': 768,\n",
2439 |        " 'isnt': 769,\n",
2440 |        " '100': 770,\n",
2441 |        " 'learn': 771,\n",
2442 |        " 'arrested': 772,\n",
2443 |        " 'true': 773,\n",
2444 |        " 'waving': 774,\n",
2445 |        " 'geller': 775,\n",
2446 |        " 'worst': 776,\n",
2447 |        " 'ebola': 777,\n",
2448 |        " 'enough': 778,\n",
2449 |        " 'dude': 779,\n",
2450 |        " 'pain': 780,\n",
2451 |        " 'entire': 781,\n",
2452 |        " 'info': 782,\n",
2453 |        " 'large': 783,\n",
2454 |        " 'tote': 784,\n",
2455 |        " 'handbag': 785,\n",
2456 |        " 'spot': 786,\n",
2457 |        " 'libya': 787,\n",
2458 |        " '70th': 788,\n",
2459 |        " 'holding': 789,\n",
2460 |        " 'aug': 790,\n",
2461 |        " 'mph': 791,\n",
2462 |        " 'ppl': 792,\n",
2463 |        " 'confirmed': 793,\n",
2464 |        " 'don\\x89ûªt': 794,\n",
2465 |        " 'emmerdale': 795,\n",
2466 |        " 'york': 796,\n",
2467 |        " 'quiz': 797,\n",
2468 |        " 'apollo': 798,\n",
2469 |        " 'cree': 799,\n",
2470 |        " 'islam': 800,\n",
2471 |        " 'nigerian': 801,\n",
2472 |        " 'aba': 802,\n",
2473 |        " 'site': 803,\n",
2474 |        " 'ave': 804,\n",
2475 |        " 'talk': 805,\n",
2476 |        " 'seeing': 806,\n",
2477 |        " 'behind': 807,\n",
2478 |        " '15': 808,\n",
2479 |        " 'wrong': 809,\n",
2480 |        " 'suspect': 810,\n",
2481 |        " 'twelve': 811,\n",
2482 |        " 'crazy': 812,\n",
2483 |        " 'kills': 813,\n",
2484 |        " 'driver': 814,\n",
2485 |        " 'safety': 815,\n",
2486 |        " 'working': 816,\n",
2487 |        " 'japanese': 817,\n",
2488 |        " 'gop': 818,\n",
2489 |        " 'feeling': 819,\n",
2490 |        " 'driving': 820,\n",
2491 |        " 'public': 821,\n",
2492 |        " 'pretty': 822,\n",
2493 |        " 'ahead': 823,\n",
2494 |        " 'online': 824,\n",
2495 |        " 'turn': 825,\n",
2496 |        " 'follow': 826,\n",
2497 |        " 'told': 827,\n",
2498 |        " 'aint': 828,\n",
2499 |        " 'rock': 829,\n",
2500 |        " 'investigating': 830,\n",
2501 |        " 'thursday': 831,\n",
2502 |        " 'businesses': 832,\n",
2503 |        " 'parents': 833,\n",
2504 |        " 'become': 834,\n",
2505 |        " 'china': 835,\n",
2506 |        " 'delivers': 836,\n",
2507 |        " 'isis': 837,\n",
2508 |        " 'passengers': 838,\n",
2509 |        " 'alarm': 839,\n",
2510 |        " 'sick': 840,\n",
2511 |        " '12000': 841,\n",
2512 |        " 'crematoria': 842,\n",
2513 |        " 'provoke': 843,\n",
2514 |        " 'offroad': 844,\n",
2515 |        " 'jonathan': 845,\n",
2516 |        " 'governor': 846,\n",
2517 |        " 'sky': 847,\n",
2518 |        " 'secret': 848,\n",
2519 |        " '2013': 849,\n",
2520 |        " 'double': 850,\n",
2521 |        " 'scene': 851,\n",
2522 |        " 'global': 852,\n",
2523 |        " 'gtgt': 853,\n",
2524 |        " 'thinking': 854,\n",
2525 |        " '11': 855,\n",
2526 |        " 'ready': 856,\n",
2527 |        " 'wild': 857,\n",
2528 |        " 'horse': 858,\n",
2529 |        " 'radio': 859,\n",
2530 |        " 'mod': 860,\n",
2531 |        " 'scared': 861,\n",
2532 |        " 'give': 862,\n",
2533 |        " 'department': 863,\n",
2534 |        " 'earth': 864,\n",
2535 |        " 'ca': 865,\n",
2536 |        " 'lmao': 866,\n",
2537 |        " 'okay': 867,\n",
2538 |        " 'shift': 868,\n",
2539 |        " 'looking': 869,\n",
2540 |        " 'niggas': 870,\n",
2541 |        " 'living': 871,\n",
2542 |        " 'party': 872,\n",
2543 |        " 'falling': 873,\n",
2544 |        " 'arent': 874,\n",
2545 |        " 'broke': 875,\n",
2546 |        " 'faux': 876,\n",
2547 |        " 'middle': 877,\n",
2548 |        " 'coast': 878,\n",
2549 |        " 'likely': 879,\n",
2550 |        " 'western': 880,\n",
2551 |        " 'sad': 881,\n",
2552 |        " 'instead': 882,\n",
2553 |        " 'fans': 883,\n",
2554 |        " 'sea': 884,\n",
2555 |        " 'enugu': 885,\n",
2556 |        " 'spring': 886,\n",
2557 |        " 'wonder': 887,\n",
2558 |        " 'wrought': 888,\n",
2559 |        " 'internally': 889,\n",
2560 |        " 'brooklyn': 890,\n",
2561 |        " 'travel': 891,\n",
2562 |        " 'smoke': 892,\n",
2563 |        " 'happening': 893,\n",
2564 |        " 'london': 894,\n",
2565 |        " 'metal': 895,\n",
2566 |        " 'taking': 896,\n",
2567 |        " 'alone': 897,\n",
2568 |        " 'upon': 898,\n",
2569 |        " 'involving': 899,\n",
2570 |        " 'dies': 900,\n",
2571 |        " 'turned': 901,\n",
2572 |        " '16': 902,\n",
2573 |        " 'financial': 903,\n",
2574 |        " 'tried': 904,\n",
2575 |        " 'wednesday': 905,\n",
2576 |        " 'mode': 906,\n",
2577 |        " 'serious': 907,\n",
2578 |        " 'reuters': 908,\n",
2579 |        " 'pakistan': 909,\n",
2580 |        " 'country': 910,\n",
2581 |        " 'wasnt': 911,\n",
2582 |        " 'total': 912,\n",
2583 |        " 'dad': 913,\n",
2584 |        " 'bed': 914,\n",
2585 |        " 'prepare': 915,\n",
2586 |        " 'vote': 916,\n",
2587 |        " 'linked': 917,\n",
2588 |        " 'million': 918,\n",
2589 |        " 'gave': 919,\n",
2590 |        " 'internet': 920,\n",
2591 |        " 'response': 921,\n",
2592 |        " 'louis': 922,\n",
2593 |        " 'album': 923,\n",
2594 |        " 'added': 924,\n",
2595 |        " 'occurred': 925,\n",
2596 |        " 'australia': 926,\n",
2597 |        " 'miss': 927,\n",
2598 |        " 'research': 928,\n",
2599 |        " 'cut': 929,\n",
2600 |        " 'problem': 930,\n",
2601 |        " 'future': 931,\n",
2602 |        " 'firefighters': 932,\n",
2603 |        " 'e': 933,\n",
2604 |        " 'silver': 934,\n",
2605 |        " 'cold': 935,\n",
2606 |        " 'giving': 936,\n",
2607 |        " 'walking': 937,\n",
2608 |        " 'soul': 938,\n",
2609 |        " 'metro': 939,\n",
2610 |        " 'biggest': 940,\n",
2611 |        " 'standard': 941,\n",
2612 |        " 'strike': 942,\n",
2613 |        " 'purse': 943,\n",
2614 |        " 'saying': 944,\n",
2615 |        " 'point': 945,\n",
2616 |        " 'small': 946,\n",
2617 |        " 'washington': 947,\n",
2618 |        " 'gunman': 948,\n",
2619 |        " 'ground': 949,\n",
2620 |        " 'nagasaki': 950,\n",
2621 |        " 'worse': 951,\n",
2622 |        " 'different': 952,\n",
2623 |        " 'x': 953,\n",
2624 |        " 'press': 954,\n",
2625 |        " 'planned': 955,\n",
2626 |        " 'sounds': 956,\n",
2627 |        " 'account': 957,\n",
2628 |        " 'british': 958,\n",
2629 |        " 'gas': 959,\n",
2630 |        " 'absolutely': 960,\n",
2631 |        " 'worlds': 961,\n",
2632 |        " 'guide': 962,\n",
2633 |        " 'uk': 963,\n",
2634 |        " 'flight': 964,\n",
2635 |        " 'interesting': 965,\n",
2636 |        " 'international': 966,\n",
2637 |        " 'smaug': 967,\n",
2638 |        " 'sensorsenso': 968,\n",
2639 |        " 'struggles': 969,\n",
2640 |        " 'apc': 970,\n",
2641 |        " 'specially': 971,\n",
2642 |        " 'allows': 972,\n",
2643 |        " '1980': 973,\n",
2644 |        " 'alabama': 974,\n",
2645 |        " 'cameroon': 975,\n",
2646 |        " 'orders': 976,\n",
2647 |        " 'causes': 977,\n",
2648 |        " 'season': 978,\n",
2649 |        " 'wife': 979,\n",
2650 |        " 'setting': 980,\n",
2651 |        " 'later': 981,\n",
2652 |        " 'means': 982,\n",
2653 |        " 'students': 983,\n",
2654 |        " 'blocked': 984,\n",
2655 |        " 'risk': 985,\n",
2656 |        " 'using': 986,\n",
2657 |        " 'guess': 987,\n",
2658 |        " 'wants': 988,\n",
2659 |        " 'begin': 989,\n",
2660 |        " 'members': 990,\n",
2661 |        " 'victim': 991,\n",
2662 |        " 'ship': 992,\n",
2663 |        " 'petition': 993,\n",
2664 |        " '17': 994,\n",
2665 |        " 'feat': 995,\n",
2666 |        " 'survivors': 996,\n",
2667 |        " 'syrian': 997,\n",
2668 |        " 'salt': 998,\n",
2669 |        " 'kinda': 999,\n",
2670 |        " 'played': 1000,\n",
2671 |        " ...}"
2672 |       ]
2673 |      },
2674 |      "execution_count": 27,
2675 |      "metadata": {},
2676 |      "output_type": "execute_result"
2677 |     }
2678 |    ],
2679 |    "source": [
2680 |     "word_index"
2681 |    ]
2682 |   },
2683 |   {
2684 |    "cell_type": "code",
2685 |    "execution_count": 28,
2686 |    "metadata": {},
2687 |    "outputs": [],
2688 |    "source": [
2689 |     "train_sequences = tokenizer.texts_to_sequences(train_sentences)\n",
2690 |     "val_sequences = tokenizer.texts_to_sequences(val_sentences)"
2691 |    ]
2692 |   },
2693 |   {
2694 |    "cell_type": "code",
2695 |    "execution_count": 29,
2696 |    "metadata": {},
2697 |    "outputs": [
2698 |     {
2699 |      "name": "stdout",
2700 |      "output_type": "stream",
2701 |      "text": [
2702 |       "['three people died heat wave far'\n",
2703 |       " 'haha south tampa getting flooded hah wait second live south tampa gonna gonna fvck flooding'\n",
2704 |       " 'raining flooding florida tampabay tampa 18 19 days ive lost count'\n",
2705 |       " 'flood bago myanmar arrived bago'\n",
2706 |       " 'damage school bus 80 multi car crash breaking']\n",
2707 |       "[[520, 8, 395, 156, 297, 411], [749, 470, 2248, 138, 2249, 2813, 521, 611, 188, 470, 2248, 189, 189, 5679, 117], [2814, 117, 1884, 5680, 2248, 1285, 1450, 522, 256, 644, 2815], [99, 3742, 612, 1451, 3742], [111, 91, 336, 3743, 3744, 52, 22, 312]]\n"
2708 |      ]
2709 |     }
2710 |    ],
2711 |    "source": [
2712 |     "print(train_sentences[10:15])\n",
2713 |     "print(train_sequences[10:15])"
2714 |    ]
2715 |   },
2716 |   {
2717 |    "cell_type": "code",
2718 |    "execution_count": 30,
2719 |    "metadata": {},
2720 |    "outputs": [
2721 |     {
2722 |      "data": {
2723 |       "text/plain": [
2724 |        "((6090, 20), (1523, 20))"
2725 |       ]
2726 |      },
2727 |      "execution_count": 30,
2728 |      "metadata": {},
2729 |      "output_type": "execute_result"
2730 |     }
2731 |    ],
2732 |    "source": [
2733 |     "# Pad the sequences to have the same length\n",
2734 |     "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
2735 |     "\n",
2736 |     "# Max number of words in a sequence\n",
2737 |     "max_length = 20\n",
2738 |     "\n",
2739 |     "train_padded = pad_sequences(train_sequences, maxlen=max_length, padding=\"post\", truncating=\"post\")\n",
2740 |     "val_padded = pad_sequences(val_sequences, maxlen=max_length, padding=\"post\", truncating=\"post\")\n",
2741 |     "train_padded.shape, val_padded.shape"
2742 |    ]
2743 |   },
2744 |   {
2745 |    "cell_type": "code",
2746 |    "execution_count": 31,
2747 |    "metadata": {},
2748 |    "outputs": [
2749 |     {
2750 |      "data": {
2751 |       "text/plain": [
2752 |        "array([520,   8, 395, 156, 297, 411,   0,   0,   0,   0,   0,   0,   0,\n",
2753 |        "         0,   0,   0,   0,   0,   0,   0], dtype=int32)"
2754 |       ]
2755 |      },
2756 |      "execution_count": 31,
2757 |      "metadata": {},
2758 |      "output_type": "execute_result"
2759 |     }
2760 |    ],
2761 |    "source": [
2762 |     "train_padded[10]"
2763 |    ]
2764 |   },
2765 |   {
2766 |    "cell_type": "code",
2767 |    "execution_count": 32,
2768 |    "metadata": {},
2769 |    "outputs": [
2770 |     {
2771 |      "name": "stdout",
2772 |      "output_type": "stream",
2773 |      "text": [
2774 |       "three people died heat wave far\n",
2775 |       "[520, 8, 395, 156, 297, 411]\n",
2776 |       "[520   8 395 156 297 411   0   0   0   0   0   0   0   0   0   0   0   0\n",
2777 |       "   0   0]\n"
2778 |      ]
2779 |     }
2780 |    ],
2781 |    "source": [
2782 |     "print(train_sentences[10])\n",
2783 |     "print(train_sequences[10])\n",
2784 |     "print(train_padded[10])"
2785 |    ]
2786 |   },
2787 |   {
2788 |    "cell_type": "code",
2789 |    "execution_count": 33,
2790 |    "metadata": {},
2791 |    "outputs": [],
2792 |    "source": [
2793 |     "# Check reversing the indices\n",
2794 |     "\n",
2795 |     "# flip (key, value)\n",
2796 |     "reverse_word_index = dict([(idx, word) for (word, idx) in word_index.items()])"
2797 |    ]
2798 |   },
2799 |   {
2800 |    "cell_type": "code",
2801 |    "execution_count": 34,
2802 |    "metadata": {},
2803 |    "outputs": [
2804 |     {
2805 |      "data": {
2806 |       "text/plain": [
2807 |        "{1: 'like',\n",
2808 |        " 2: 'amp',\n",
2809 |        " 3: 'fire',\n",
2810 |        " 4: 'im',\n",
2811 |        " 5: 'get',\n",
2812 |        " 6: 'via',\n",
2813 |        " 7: 'new',\n",
2814 |        " 8: 'people',\n",
2815 |        " 9: 'news',\n",
2816 |        " 10: 'dont',\n",
2817 |        " 11: 'emergency',\n",
2818 |        " 12: 'one',\n",
2819 |        " 13: '2',\n",
2820 |        " 14: 'us',\n",
2821 |        " 15: 'video',\n",
2822 |        " 16: 'disaster',\n",
2823 |        " 17: 'burning',\n",
2824 |        " 18: 'body',\n",
2825 |        " 19: 'would',\n",
2826 |        " 20: 'buildings',\n",
2827 |        " 21: 'police',\n",
2828 |        " 22: 'crash',\n",
2829 |        " 23: 'first',\n",
2830 |        " 24: 'california',\n",
2831 |        " 25: 'still',\n",
2832 |        " 26: 'man',\n",
2833 |        " 27: 'got',\n",
2834 |        " 28: 'know',\n",
2835 |        " 29: 'day',\n",
2836 |        " 30: 'back',\n",
2837 |        " 31: 'going',\n",
2838 |        " 32: 'two',\n",
2839 |        " 33: 'time',\n",
2840 |        " 34: 'full',\n",
2841 |        " 35: 'accident',\n",
2842 |        " 36: 'see',\n",
2843 |        " 37: 'world',\n",
2844 |        " 38: 'attack',\n",
2845 |        " 39: 'nuclear',\n",
2846 |        " 40: 'youtube',\n",
2847 |        " 41: 'may',\n",
2848 |        " 42: 'love',\n",
2849 |        " 43: 'go',\n",
2850 |        " 44: 'rt',\n",
2851 |        " 45: 'many',\n",
2852 |        " 46: 'cant',\n",
2853 |        " 47: '3',\n",
2854 |        " 48: 'watch',\n",
2855 |        " 49: 'collapse',\n",
2856 |        " 50: 'dead',\n",
2857 |        " 51: 'today',\n",
2858 |        " 52: 'car',\n",
2859 |        " 53: 'mass',\n",
2860 |        " 54: 'want',\n",
2861 |        " 55: 'years',\n",
2862 |        " 56: 'work',\n",
2863 |        " 57: 'train',\n",
2864 |        " 58: 'last',\n",
2865 |        " 59: 'good',\n",
2866 |        " 60: 'think',\n",
2867 |        " 61: 'families',\n",
2868 |        " 62: 'hiroshima',\n",
2869 |        " 63: 'life',\n",
2870 |        " 64: 'fires',\n",
2871 |        " 65: 'best',\n",
2872 |        " 66: 'could',\n",
2873 |        " 67: 'say',\n",
2874 |        " 68: 'u',\n",
2875 |        " 69: 'death',\n",
2876 |        " 70: 'hot',\n",
2877 |        " 71: 'forest',\n",
2878 |        " 72: 'way',\n",
2879 |        " 73: 'killed',\n",
2880 |        " 74: 'need',\n",
2881 |        " 75: 'legionnaires',\n",
2882 |        " 76: 'war',\n",
2883 |        " 77: 'fatal',\n",
2884 |        " 78: 'home',\n",
2885 |        " 79: 'black',\n",
2886 |        " 80: '4',\n",
2887 |        " 81: 'really',\n",
2888 |        " 82: 'take',\n",
2889 |        " 83: 'another',\n",
2890 |        " 84: 'right',\n",
2891 |        " 85: 'even',\n",
2892 |        " 86: 'flames',\n",
2893 |        " 87: 'make',\n",
2894 |        " 88: 'fear',\n",
2895 |        " 89: 'bomb',\n",
2896 |        " 90: 'floods',\n",
2897 |        " 91: 'school',\n",
2898 |        " 92: 'youre',\n",
2899 |        " 93: '1',\n",
2900 |        " 94: 'atomic',\n",
2901 |        " 95: 'look',\n",
2902 |        " 96: 'please',\n",
2903 |        " 97: 'let',\n",
2904 |        " 98: 'homes',\n",
2905 |        " 99: 'flood',\n",
2906 |        " 100: 'every',\n",
2907 |        " 101: 'help',\n",
2908 |        " 102: 'latest',\n",
2909 |        " 103: 'year',\n",
2910 |        " 104: 'storm',\n",
2911 |        " 105: 'content',\n",
2912 |        " 106: '5',\n",
2913 |        " 107: 'injured',\n",
2914 |        " 108: 'read',\n",
2915 |        " 109: '2015',\n",
2916 |        " 110: 'oil',\n",
2917 |        " 111: 'damage',\n",
2918 |        " 112: 'truck',\n",
2919 |        " 113: 'city',\n",
2920 |        " 114: 'lol',\n",
2921 |        " 115: 'water',\n",
2922 |        " 116: 'evacuation',\n",
2923 |        " 117: 'flooding',\n",
2924 |        " 118: 'much',\n",
2925 |        " 119: 'debris',\n",
2926 |        " 120: 'army',\n",
2927 |        " 121: 'japan',\n",
2928 |        " 122: 'never',\n",
2929 |        " 123: 'military',\n",
2930 |        " 124: 'northern',\n",
2931 |        " 125: 'ass',\n",
2932 |        " 126: 'cross',\n",
2933 |        " 127: 'top',\n",
2934 |        " 128: 'come',\n",
2935 |        " 129: 'near',\n",
2936 |        " 130: '\\x89û',\n",
2937 |        " 131: 'hit',\n",
2938 |        " 132: 'shit',\n",
2939 |        " 133: 'next',\n",
2940 |        " 134: 'great',\n",
2941 |        " 135: 'food',\n",
2942 |        " 136: 'plan',\n",
2943 |        " 137: 'reddit',\n",
2944 |        " 138: 'getting',\n",
2945 |        " 139: 'set',\n",
2946 |        " 140: 'night',\n",
2947 |        " 141: 'screaming',\n",
2948 |        " 142: 'found',\n",
2949 |        " 143: 'little',\n",
2950 |        " 144: 'services',\n",
2951 |        " 145: 'wildfire',\n",
2952 |        " 146: 'natural',\n",
2953 |        " 147: 'stop',\n",
2954 |        " 148: 'bloody',\n",
2955 |        " 149: 'murder',\n",
2956 |        " 150: 'since',\n",
2957 |        " 151: 'feel',\n",
2958 |        " 152: 'bags',\n",
2959 |        " 153: 'spill',\n",
2960 |        " 154: 'injuries',\n",
2961 |        " 155: 'refugees',\n",
2962 |        " 156: 'heat',\n",
2963 |        " 157: 'always',\n",
2964 |        " 158: 'god',\n",
2965 |        " 159: 'ever',\n",
2966 |        " 160: 'face',\n",
2967 |        " 161: 'times',\n",
2968 |        " 162: 'destroy',\n",
2969 |        " 163: 'said',\n",
2970 |        " 164: 'well',\n",
2971 |        " 165: 'thats',\n",
2972 |        " 166: 'fall',\n",
2973 |        " 167: 'evacuate',\n",
2974 |        " 168: 'derailment',\n",
2975 |        " 169: 'panic',\n",
2976 |        " 170: 'outbreak',\n",
2977 |        " 171: 'check',\n",
2978 |        " 172: 'everyone',\n",
2979 |        " 173: 'explosion',\n",
2980 |        " 174: 'family',\n",
2981 |        " 175: 'attacked',\n",
2982 |        " 176: 'rescue',\n",
2983 |        " 177: 'deaths',\n",
2984 |        " 178: 'fatalities',\n",
2985 |        " 179: 'sinkhole',\n",
2986 |        " 180: 'coming',\n",
2987 |        " 181: 'ambulance',\n",
2988 |        " 182: 'blood',\n",
2989 |        " 183: 'hurricane',\n",
2990 |        " 184: 'bridge',\n",
2991 |        " 185: 'explode',\n",
2992 |        " 186: 'collided',\n",
2993 |        " 187: 'loud',\n",
2994 |        " 188: 'live',\n",
2995 |        " 189: 'gonna',\n",
2996 |        " 190: 'burned',\n",
2997 |        " 191: 'air',\n",
2998 |        " 192: 'weather',\n",
2999 |        " 193: 'destruction',\n",
3000 |        " 194: 'boy',\n",
3001 |        " 195: 'hope',\n",
3002 |        " 196: 'destroyed',\n",
3003 |        " 197: 'bag',\n",
3004 |        " 198: 'ruin',\n",
3005 |        " 199: 'rescued',\n",
3006 |        " 200: 'injury',\n",
3007 |        " 201: 'fucking',\n",
3008 |        " 202: 'free',\n",
3009 |        " 203: 'cause',\n",
3010 |        " 204: 'bad',\n",
3011 |        " 205: 'armageddon',\n",
3012 |        " 206: 'hail',\n",
3013 |        " 207: 'says',\n",
3014 |        " 208: 'wind',\n",
3015 |        " 209: 'rescuers',\n",
3016 |        " 210: 'catastrophe',\n",
3017 |        " 211: 'collapsed',\n",
3018 |        " 212: 'harm',\n",
3019 |        " 213: 'landslide',\n",
3020 |        " 214: 'curfew',\n",
3021 |        " 215: 'deluge',\n",
3022 |        " 216: 'migrants',\n",
3023 |        " 217: 'dust',\n",
3024 |        " 218: 'hostages',\n",
3025 |        " 219: 'mudslide',\n",
3026 |        " 220: 'sandstorm',\n",
3027 |        " 221: 'w',\n",
3028 |        " 222: '70',\n",
3029 |        " 223: 'save',\n",
3030 |        " 224: 'charged',\n",
3031 |        " 225: 'lives',\n",
3032 |        " 226: 'rioting',\n",
3033 |        " 227: 'riot',\n",
3034 |        " 228: 'drought',\n",
3035 |        " 229: 'danger',\n",
3036 |        " 230: 'drowning',\n",
3037 |        " 231: 'engulfed',\n",
3038 |        " 232: 'screamed',\n",
3039 |        " 233: 'massacre',\n",
3040 |        " 234: 'quarantined',\n",
3041 |        " 235: 'earthquake',\n",
3042 |        " 236: 'theres',\n",
3043 |        " 237: 'summer',\n",
3044 |        " 238: '40',\n",
3045 |        " 239: 'also',\n",
3046 |        " 240: 'crashed',\n",
3047 |        " 241: 'house',\n",
3048 |        " 242: 'liked',\n",
3049 |        " 243: 'post',\n",
3050 |        " 244: 'bleeding',\n",
3051 |        " 245: 'bombed',\n",
3052 |        " 246: 'hazard',\n",
3053 |        " 247: 'displaced',\n",
3054 |        " 248: 'service',\n",
3055 |        " 249: 'hazardous',\n",
3056 |        " 250: 'cliff',\n",
3057 |        " 251: 'crush',\n",
3058 |        " 252: 'derail',\n",
3059 |        " 253: 'devastation',\n",
3060 |        " 254: 'inundated',\n",
3061 |        " 255: 'quarantine',\n",
3062 |        " 256: 'ive',\n",
3063 |        " 257: 'end',\n",
3064 |        " 258: 'road',\n",
3065 |        " 259: 'airplane',\n",
3066 |        " 260: 'looks',\n",
3067 |        " 261: 'change',\n",
3068 |        " 262: 'high',\n",
3069 |        " 263: 'real',\n",
3070 |        " 264: 'missing',\n",
3071 |        " 265: 'battle',\n",
3072 |        " 266: 'fedex',\n",
3073 |        " 267: 'keep',\n",
3074 |        " 268: 'report',\n",
3075 |        " 269: 'lightning',\n",
3076 |        " 270: 'blown',\n",
3077 |        " 271: 'casualties',\n",
3078 |        " 272: 'hundreds',\n",
3079 |        " 273: 'desolation',\n",
3080 |        " 274: 'electrocuted',\n",
3081 |        " 275: 'exploded',\n",
3082 |        " 276: 'famine',\n",
3083 |        " 277: 'lava',\n",
3084 |        " 278: 'around',\n",
3085 |        " 279: '\\x89ûò',\n",
3086 |        " 280: 'hes',\n",
3087 |        " 281: 'things',\n",
3088 |        " 282: 'bombing',\n",
3089 |        " 283: 'made',\n",
3090 |        " 284: 'bioterror',\n",
3091 |        " 285: 'hostage',\n",
3092 |        " 286: 'blew',\n",
3093 |        " 287: 'drown',\n",
3094 |        " 288: 'anniversary',\n",
3095 |        " 289: 'chemical',\n",
3096 |        " 290: 'collide',\n",
3097 |        " 291: 'derailed',\n",
3098 |        " 292: 'detonate',\n",
3099 |        " 293: 'flattened',\n",
3100 |        " 294: 'bang',\n",
3101 |        " 295: 'panicking',\n",
3102 |        " 296: 'rain',\n",
3103 |        " 297: 'wave',\n",
3104 |        " 298: 'ill',\n",
3105 |        " 299: 'game',\n",
3106 |        " 300: 'meltdown',\n",
3107 |        " 301: 'iran',\n",
3108 |        " 302: 'without',\n",
3109 |        " 303: 'apocalypse',\n",
3110 |        " 304: 'big',\n",
3111 |        " 305: 'obama',\n",
3112 |        " 306: 'market',\n",
3113 |        " 307: 'boat',\n",
3114 |        " 308: 'mh370',\n",
3115 |        " 309: 'hijacker',\n",
3116 |        " 310: 'hijacking',\n",
3117 |        " 311: 'screams',\n",
3118 |        " 312: 'breaking',\n",
3119 |        " 313: 'head',\n",
3120 |        " 314: 'past',\n",
3121 |        " 315: 'someone',\n",
3122 |        " 316: 'state',\n",
3123 |        " 317: 'run',\n",
3124 |        " 318: 'annihilated',\n",
3125 |        " 319: 'away',\n",
3126 |        " 320: 'girl',\n",
3127 |        " 321: 'white',\n",
3128 |        " 322: 'story',\n",
3129 |        " 323: 'twitter',\n",
3130 |        " 324: 'longer',\n",
3131 |        " 325: 'put',\n",
3132 |        " 326: 'bagging',\n",
3133 |        " 327: 'affected',\n",
3134 |        " 328: 'responders',\n",
3135 |        " 329: 'collision',\n",
3136 |        " 330: 'demolish',\n",
3137 |        " 331: 'detonation',\n",
3138 |        " 332: 'evacuated',\n",
3139 |        " 333: 'obliterated',\n",
3140 |        " 334: 'pandemonium',\n",
3141 |        " 335: 'area',\n",
3142 |        " 336: 'bus',\n",
3143 |        " 337: 'tonight',\n",
3144 |        " 338: 'came',\n",
3145 |        " 339: 'reunion',\n",
3146 |        " 340: 'island',\n",
3147 |        " 341: 'national',\n",
3148 |        " 342: 'show',\n",
3149 |        " 343: 'arson',\n",
3150 |        " 344: 'woman',\n",
3151 |        " 345: 'women',\n",
3152 |        " 346: 'shoulder',\n",
3153 |        " 347: 'send',\n",
3154 |        " 348: 'crushed',\n",
3155 |        " 349: 'blast',\n",
3156 |        " 350: 'demolished',\n",
3157 |        " 351: 'demolition',\n",
3158 |        " 352: 'drowned',\n",
3159 |        " 353: 'fatality',\n",
3160 |        " 354: 'prebreak',\n",
3161 |        " 355: 'obliterate',\n",
3162 |        " 356: 'obliteration',\n",
3163 |        " 357: 'razed',\n",
3164 |        " 358: 'photo',\n",
3165 |        " 359: 'update',\n",
3166 |        " 360: 'week',\n",
3167 |        " 361: 'better',\n",
3168 |        " 362: 'traffic',\n",
3169 |        " 363: 'heard',\n",
3170 |        " 364: 'ebay',\n",
3171 |        " 365: 'went',\n",
3172 |        " 366: 'id',\n",
3173 |        " 367: 'fan',\n",
3174 |        " 368: 'blazing',\n",
3175 |        " 369: 'catastrophic',\n",
3176 |        " 370: 'eyewitness',\n",
3177 |        " 371: 'hellfire',\n",
3178 |        " 372: 'murderer',\n",
3179 |        " 373: 'due',\n",
3180 |        " 374: 'county',\n",
3181 |        " 375: 'ablaze',\n",
3182 |        " 376: 'st',\n",
3183 |        " 377: 'least',\n",
3184 |        " 378: 'pm',\n",
3185 |        " 379: 'horrible',\n",
3186 |        " 380: 'left',\n",
3187 |        " 381: 'possible',\n",
3188 |        " 382: 'call',\n",
3189 |        " 383: 'river',\n",
3190 |        " 384: 'long',\n",
3191 |        " 385: 'fuck',\n",
3192 |        " 386: 'power',\n",
3193 |        " 387: 'light',\n",
3194 |        " 388: 'land',\n",
3195 |        " 389: 'baby',\n",
3196 |        " 390: 'cyclone',\n",
3197 |        " 391: 'electrocute',\n",
3198 |        " 392: 'hijack',\n",
3199 |        " 393: 'sue',\n",
3200 |        " 394: 'rainstorm',\n",
3201 |        " 395: 'died',\n",
3202 |        " 396: 'cool',\n",
3203 |        " 397: 'care',\n",
3204 |        " 398: 'use',\n",
3205 |        " 399: 'goes',\n",
3206 |        " 400: 'government',\n",
3207 |        " 401: 'airport',\n",
3208 |        " 402: 'believe',\n",
3209 |        " 403: 'phone',\n",
3210 |        " 404: 'kill',\n",
3211 |        " 405: 'group',\n",
3212 |        " 406: 'lab',\n",
3213 |        " 407: 'oh',\n",
3214 |        " 408: 'song',\n",
3215 |        " 409: 'policy',\n",
3216 |        " 410: 'stock',\n",
3217 |        " 411: 'far',\n",
3218 |        " 412: 'thank',\n",
3219 |        " 413: 'minute',\n",
3220 |        " 414: 'brown',\n",
3221 |        " 415: 'wake',\n",
3222 |        " 416: 'thought',\n",
3223 |        " 417: 'red',\n",
3224 |        " 418: 'heart',\n",
3225 |        " 419: '8',\n",
3226 |        " 420: 'sound',\n",
3227 |        " 421: 'avalanche',\n",
3228 |        " 422: 'calgary',\n",
3229 |        " 423: 'health',\n",
3230 |        " 424: 're\\x89û',\n",
3231 |        " 425: 'nearby',\n",
3232 |        " 426: 'bush',\n",
3233 |        " 427: 'casualty',\n",
3234 |        " 428: '6',\n",
3235 |        " 429: 'deluged',\n",
3236 |        " 430: 'seismic',\n",
3237 |        " 431: 'reactor',\n",
3238 |        " 432: 'building',\n",
3239 |        " 433: 'whats',\n",
3240 |        " 434: 'used',\n",
3241 |        " 435: 'something',\n",
3242 |        " 436: 'didnt',\n",
3243 |        " 437: '9',\n",
3244 |        " 438: 'plans',\n",
3245 |        " 439: 'shooting',\n",
3246 |        " 440: 'thing',\n",
3247 |        " 441: 'play',\n",
3248 |        " 442: 'must',\n",
3249 |        " 443: 'whole',\n",
3250 |        " 444: 'tomorrow',\n",
3251 |        " 445: 'part',\n",
3252 |        " 446: 'officials',\n",
3253 |        " 447: 'india',\n",
3254 |        " 448: 'music',\n",
3255 |        " 449: 'nothing',\n",
3256 |        " 450: 'blight',\n",
3257 |        " 451: 'ur',\n",
3258 |        " 452: 'rubble',\n",
3259 |        " 453: 'swallowed',\n",
3260 |        " 454: 'thanks',\n",
3261 |        " 455: 'issues',\n",
3262 |        " 456: 'done',\n",
3263 |        " 457: 'bc',\n",
3264 |        " 458: 'soon',\n",
3265 |        " 459: 'start',\n",
3266 |        " 460: 'children',\n",
3267 |        " 461: 'business',\n",
3268 |        " 462: 'yet',\n",
3269 |        " 463: 'sure',\n",
3270 |        " 464: 'bioterrorism',\n",
3271 |        " 465: 'wanna',\n",
3272 |        " 466: 'old',\n",
3273 |        " 467: 'half',\n",
3274 |        " 468: 'bigger',\n",
3275 |        " 469: 'rise',\n",
3276 |        " 470: 'south',\n",
3277 |        " 471: 'tell',\n",
3278 |        " 472: 'support',\n",
3279 |        " 473: 'already',\n",
3280 |        " 474: 'die',\n",
3281 |        " 475: 'men',\n",
3282 |        " 476: 'aircraft',\n",
3283 |        " 477: 'wont',\n",
3284 |        " 478: 'bar',\n",
3285 |        " 479: 'peace',\n",
3286 |        " 480: 'data',\n",
3287 |        " 481: 'pick',\n",
3288 |        " 482: 'amid',\n",
3289 |        " 483: 'anything',\n",
3290 |        " 484: 'stay',\n",
3291 |        " 485: 'media',\n",
3292 |        " 486: 'remember',\n",
3293 |        " 487: 'deal',\n",
3294 |        " 488: '7',\n",
3295 |        " 489: 'transport',\n",
3296 |        " 490: 'doesnt',\n",
3297 |        " 491: 'searching',\n",
3298 |        " 492: 'effect',\n",
3299 |        " 493: 'manslaughter',\n",
3300 |        " 494: 'projected',\n",
3301 |        " 495: 'place',\n",
3302 |        " 496: '30',\n",
3303 |        " 497: 'kids',\n",
3304 |        " 498: 'n',\n",
3305 |        " 499: 'actually',\n",
3306 |        " 500: 'making',\n",
3307 |        " 501: 'job',\n",
3308 |        " 502: 'almost',\n",
3309 |        " 503: 'plane',\n",
3310 |        " 504: 'ago',\n",
3311 |        " 505: 'feared',\n",
3312 |        " 506: 'ok',\n",
3313 |        " 507: 'lot',\n",
3314 |        " 508: 'annihilation',\n",
3315 |        " 509: 'warning',\n",
3316 |        " 510: 'crisis',\n",
3317 |        " 511: 'blaze',\n",
3318 |        " 512: 'yeah',\n",
3319 |        " 513: 'trying',\n",
3320 |        " 514: 'yes',\n",
3321 |        " 515: 'lets',\n",
3322 |        " 516: 'line',\n",
3323 |        " 517: 'probably',\n",
3324 |        " 518: 'bodies',\n",
3325 |        " 519: 'desolate',\n",
3326 |        " 520: 'three',\n",
3327 |        " 521: 'wait',\n",
3328 |        " 522: 'days',\n",
3329 |        " 523: 'flag',\n",
3330 |        " 524: 'leave',\n",
3331 |        " 525: 'north',\n",
3332 |        " 526: 'gets',\n",
3333 |        " 527: 'jobs',\n",
3334 |        " 528: 'helicopter',\n",
3335 |        " 529: 'history',\n",
3336 |        " 530: 'fun',\n",
3337 |        " 531: 'order',\n",
3338 |        " 532: 'august',\n",
3339 |        " 533: 'movie',\n",
3340 |        " 534: 'womens',\n",
3341 |        " 535: 'leather',\n",
3342 |        " 536: 'american',\n",
3343 |        " 537: 'makes',\n",
3344 |        " 538: 'youth',\n",
3345 |        " 539: 'literally',\n",
3346 |        " 540: 'person',\n",
3347 |        " 541: 'guys',\n",
3348 |        " 542: 'photos',\n",
3349 |        " 543: 'saved',\n",
3350 |        " 544: 'waves',\n",
3351 |        " 545: '11yearold',\n",
3352 |        " 546: 'hat',\n",
3353 |        " 547: 'refugio',\n",
3354 |        " 548: 'costlier',\n",
3355 |        " 549: 'offensive',\n",
3356 |        " 550: '20',\n",
3357 |        " 551: 'inside',\n",
3358 |        " 552: 'shot',\n",
3359 |        " 553: 'rd',\n",
3360 |        " 554: '12',\n",
3361 |        " 555: 'everything',\n",
3362 |        " 556: 'ball',\n",
3363 |        " 557: 'case',\n",
3364 |        " 558: 'hell',\n",
3365 |        " 559: 'stand',\n",
3366 |        " 560: 'hours',\n",
3367 |        " 561: 'tv',\n",
3368 |        " 562: 'israeli',\n",
3369 |        " 563: 'beautiful',\n",
3370 |        " 564: 'anthrax',\n",
3371 |        " 565: 'computers',\n",
3372 |        " 566: 'angry',\n",
3373 |        " 567: 'trains',\n",
3374 |        " 568: 'malaysia',\n",
3375 |        " 569: 'abc',\n",
3376 |        " 570: 'banned',\n",
3377 |        " 571: 'ignition',\n",
3378 |        " 572: 'knock',\n",
3379 |        " 573: 'picking',\n",
3380 |        " 574: 'hailstorm',\n",
3381 |        " 575: 'mayhem',\n",
3382 |        " 576: 'la',\n",
3383 |        " 577: 'street',\n",
3384 |        " 578: '\\x89ûó',\n",
3385 |        " 579: 'finally',\n",
3386 |        " 580: 'aftershock',\n",
3387 |        " 581: 'anyone',\n",
3388 |        " 582: 'sorry',\n",
3389 |        " 583: 'maybe',\n",
3390 |        " 584: 'hey',\n",
3391 |        " 585: 'child',\n",
3392 |        " 586: 'totally',\n",
3393 |        " 587: 'toddler',\n",
3394 |        " 588: 'saw',\n",
3395 |        " 589: 'town',\n",
3396 |        " 590: 'fukushima',\n",
3397 |        " 591: '50',\n",
3398 |        " 592: 'mishaps',\n",
3399 |        " 593: 'move',\n",
3400 |        " 594: 'blizzard',\n",
3401 |        " 595: 'money',\n",
3402 |        " 596: 'ladies',\n",
3403 |        " 597: 'appears',\n",
3404 |        " 598: 'russian',\n",
3405 |        " 599: 'giant',\n",
3406 |        " 600: 'centre',\n",
3407 |        " 601: 'village',\n",
3408 |        " 602: '60',\n",
3409 |        " 603: 'houses',\n",
3410 |        " 604: 'crews',\n",
3411 |        " 605: 'caused',\n",
3412 |        " 606: 'course',\n",
3413 |        " 607: 'severe',\n",
3414 |        " 608: 'disea',\n",
3415 |        " 609: 'closed',\n",
3416 |        " 610: 'heavy',\n",
3417 |        " 611: 'second',\n",
3418 |        " 612: 'myanmar',\n",
3419 |        " 613: 'try',\n",
3420 |        " 614: 'arsonist',\n",
3421 |        " 615: 'center',\n",
3422 |        " 616: 'vehicle',\n",
3423 |        " 617: 'guy',\n",
3424 |        " 618: 'might',\n",
3425 |        " 619: 'eyes',\n",
3426 |        " 620: 'shes',\n",
3427 |        " 621: 'omg',\n",
3428 |        " 622: 'couple',\n",
3429 |        " 623: 'vs',\n",
3430 |        " 624: 'self',\n",
3431 |        " 625: 'fight',\n",
3432 |        " 626: 'name',\n",
3433 |        " 627: 'b',\n",
3434 |        " 628: 'class',\n",
3435 |        " 629: 'friend',\n",
3436 |        " 630: 'team',\n",
3437 |        " 631: 'temple',\n",
3438 |        " 632: 'space',\n",
3439 |        " 633: 'germs',\n",
3440 |        " 634: 'words',\n",
3441 |        " 635: '25',\n",
3442 |        " 636: 'find',\n",
3443 |        " 637: 'drake',\n",
3444 |        " 638: 'meek',\n",
3445 |        " 639: 'marks',\n",
3446 |        " 640: 'downtown',\n",
3447 |        " 641: 'memories',\n",
3448 |        " 642: 'devastated',\n",
3449 |        " 643: 'flash',\n",
3450 |        " 644: 'lost',\n",
3451 |        " 645: 'cars',\n",
3452 |        " 646: 'lord',\n",
3453 |        " 647: 'outside',\n",
3454 |        " 648: 'hard',\n",
3455 |        " 649: 'others',\n",
3456 |        " 650: 'front',\n",
3457 |        " 651: 'nowplaying',\n",
3458 |        " 652: 'mom',\n",
3459 |        " 653: 'win',\n",
3460 |        " 654: 'daily',\n",
3461 |        " 655: 'experts',\n",
3462 |        " 656: 'theyre',\n",
3463 |        " 657: 'official',\n",
3464 |        " 658: 'hate',\n",
3465 |        " 659: 'usa',\n",
3466 |        " 660: '10',\n",
3467 |        " 661: 'united',\n",
3468 |        " 662: 'mop',\n",
3469 |        " 663: 'beach',\n",
3470 |        " 664: 'reports',\n",
3471 |        " 665: 'hiring',\n",
3472 |        " 666: 'theater',\n",
3473 |        " 667: 'gun',\n",
3474 |        " 668: 'christian',\n",
3475 |        " 669: 'muslims',\n",
3476 |        " 670: 'listen',\n",
3477 |        " 671: 'star',\n",
3478 |        " 672: 'eye',\n",
3479 |        " 673: 'taken',\n",
3480 |        " 674: 'action',\n",
3481 |        " 675: 'nearly',\n",
3482 |        " 676: 'mad',\n",
3483 |        " 677: 'cake',\n",
3484 |        " 678: 'level',\n",
3485 |        " 679: 'pay',\n",
3486 |        " 680: 'though',\n",
3487 |        " 681: 'link',\n",
3488 |        " 682: 'morning',\n",
3489 |        " 683: 'watching',\n",
3490 |        " 684: 'outrage',\n",
3491 |        " 685: 'gbbo',\n",
3492 |        " 686: 'friends',\n",
3493 |        " 687: 'takes',\n",
3494 |        " 688: 'former',\n",
3495 |        " 689: 'madhya',\n",
3496 |        " 690: 'pradesh',\n",
3497 |        " 691: 'led',\n",
3498 |        " 692: 'gems',\n",
3499 |        " 693: 'funtenna',\n",
3500 |        " 694: 'reddits',\n",
3501 |        " 695: 'subreddits',\n",
3502 |        " 696: 'reason',\n",
3503 |        " 697: 'across',\n",
3504 |        " 698: 'fast',\n",
3505 |        " 699: 'side',\n",
3506 |        " 700: 'west',\n",
3507 |        " 701: 'thousands',\n",
3508 |        " 702: 'r',\n",
3509 |        " 703: 'climate',\n",
3510 |        " 704: 'else',\n",
3511 |        " 705: 'huge',\n",
3512 |        " 706: 'reported',\n",
3513 |        " 707: 'property',\n",
3514 |        " 708: 'happened',\n",
3515 |        " 709: 'moment',\n",
3516 |        " 710: 'book',\n",
3517 |        " 711: 'dog',\n",
3518 |        " 712: 'seen',\n",
3519 |        " 713: 'park',\n",
3520 |        " 714: 'major',\n",
3521 |        " 715: 'disease',\n",
3522 |        " 716: 'hand',\n",
3523 |        " 717: 'govt',\n",
3524 |        " 718: 'till',\n",
3525 |        " 719: 'blue',\n",
3526 |        " 720: 'date',\n",
3527 |        " 721: 'control',\n",
3528 |        " 722: 'caught',\n",
3529 |        " 723: 'east',\n",
3530 |        " 724: 'green',\n",
3531 |        " 725: 'militants',\n",
3532 |        " 726: 'damn',\n",
3533 |        " 727: 'bombs',\n",
3534 |        " 728: 'mount',\n",
3535 |        " 729: 'pamela',\n",
3536 |        " 730: 'needs',\n",
3537 |        " 731: 'playing',\n",
3538 |        " 732: 'running',\n",
3539 |        " 733: 'tweet',\n",
3540 |        " 734: 'low',\n",
3541 |        " 735: 'wow',\n",
3542 |        " 736: 'fashion',\n",
3543 |        " 737: 'pray',\n",
3544 |        " 738: 'camp',\n",
3545 |        " 739: 'hear',\n",
3546 |        " 740: 'chance',\n",
3547 |        " 741: 'patience',\n",
3548 |        " 742: 'lamp',\n",
3549 |        " 743: 'bayelsa',\n",
3550 |        " 744: 'parole',\n",
3551 |        " 745: 'unconfirmed',\n",
3552 |        " 746: 'neighbours',\n",
3553 |        " 747: 'chinas',\n",
3554 |        " 748: 'colorado',\n",
3555 |        " 749: 'haha',\n",
3556 |        " 750: 'bring',\n",
3557 |        " 751: 'awesome',\n",
3558 |        " 752: 'wanted',\n",
3559 |        " 753: 'happy',\n",
3560 |        " 754: 'america',\n",
3561 |        " 755: 'drive',\n",
3562 |        " 756: 'france',\n",
3563 |        " 757: 'pakistani',\n",
3564 |        " 758: 'trust',\n",
3565 |        " 759: 'israel',\n",
3566 |        " 760: 'potus',\n",
3567 |        " 761: 'sign',\n",
3568 |        " 762: 'following',\n",
3569 |        " 763: 'poor',\n",
3570 |        " 764: 'film',\n",
3571 |        " 765: 'comes',\n",
3572 |        " 766: 'escape',\n",
3573 |        " 767: 'gt',\n",
3574 |        " 768: 'russia',\n",
3575 |        " 769: 'isnt',\n",
3576 |        " 770: '100',\n",
3577 |        " 771: 'learn',\n",
3578 |        " 772: 'arrested',\n",
3579 |        " 773: 'true',\n",
3580 |        " 774: 'waving',\n",
3581 |        " 775: 'geller',\n",
3582 |        " 776: 'worst',\n",
3583 |        " 777: 'ebola',\n",
3584 |        " 778: 'enough',\n",
3585 |        " 779: 'dude',\n",
3586 |        " 780: 'pain',\n",
3587 |        " 781: 'entire',\n",
3588 |        " 782: 'info',\n",
3589 |        " 783: 'large',\n",
3590 |        " 784: 'tote',\n",
3591 |        " 785: 'handbag',\n",
3592 |        " 786: 'spot',\n",
3593 |        " 787: 'libya',\n",
3594 |        " 788: '70th',\n",
3595 |        " 789: 'holding',\n",
3596 |        " 790: 'aug',\n",
3597 |        " 791: 'mph',\n",
3598 |        " 792: 'ppl',\n",
3599 |        " 793: 'confirmed',\n",
3600 |        " 794: 'don\\x89ûªt',\n",
3601 |        " 795: 'emmerdale',\n",
3602 |        " 796: 'york',\n",
3603 |        " 797: 'quiz',\n",
3604 |        " 798: 'apollo',\n",
3605 |        " 799: 'cree',\n",
3606 |        " 800: 'islam',\n",
3607 |        " 801: 'nigerian',\n",
3608 |        " 802: 'aba',\n",
3609 |        " 803: 'site',\n",
3610 |        " 804: 'ave',\n",
3611 |        " 805: 'talk',\n",
3612 |        " 806: 'seeing',\n",
3613 |        " 807: 'behind',\n",
3614 |        " 808: '15',\n",
3615 |        " 809: 'wrong',\n",
3616 |        " 810: 'suspect',\n",
3617 |        " 811: 'twelve',\n",
3618 |        " 812: 'crazy',\n",
3619 |        " 813: 'kills',\n",
3620 |        " 814: 'driver',\n",
3621 |        " 815: 'safety',\n",
3622 |        " 816: 'working',\n",
3623 |        " 817: 'japanese',\n",
3624 |        " 818: 'gop',\n",
3625 |        " 819: 'feeling',\n",
3626 |        " 820: 'driving',\n",
3627 |        " 821: 'public',\n",
3628 |        " 822: 'pretty',\n",
3629 |        " 823: 'ahead',\n",
3630 |        " 824: 'online',\n",
3631 |        " 825: 'turn',\n",
3632 |        " 826: 'follow',\n",
3633 |        " 827: 'told',\n",
3634 |        " 828: 'aint',\n",
3635 |        " 829: 'rock',\n",
3636 |        " 830: 'investigating',\n",
3637 |        " 831: 'thursday',\n",
3638 |        " 832: 'businesses',\n",
3639 |        " 833: 'parents',\n",
3640 |        " 834: 'become',\n",
3641 |        " 835: 'china',\n",
3642 |        " 836: 'delivers',\n",
3643 |        " 837: 'isis',\n",
3644 |        " 838: 'passengers',\n",
3645 |        " 839: 'alarm',\n",
3646 |        " 840: 'sick',\n",
3647 |        " 841: '12000',\n",
3648 |        " 842: 'crematoria',\n",
3649 |        " 843: 'provoke',\n",
3650 |        " 844: 'offroad',\n",
3651 |        " 845: 'jonathan',\n",
3652 |        " 846: 'governor',\n",
3653 |        " 847: 'sky',\n",
3654 |        " 848: 'secret',\n",
3655 |        " 849: '2013',\n",
3656 |        " 850: 'double',\n",
3657 |        " 851: 'scene',\n",
3658 |        " 852: 'global',\n",
3659 |        " 853: 'gtgt',\n",
3660 |        " 854: 'thinking',\n",
3661 |        " 855: '11',\n",
3662 |        " 856: 'ready',\n",
3663 |        " 857: 'wild',\n",
3664 |        " 858: 'horse',\n",
3665 |        " 859: 'radio',\n",
3666 |        " 860: 'mod',\n",
3667 |        " 861: 'scared',\n",
3668 |        " 862: 'give',\n",
3669 |        " 863: 'department',\n",
3670 |        " 864: 'earth',\n",
3671 |        " 865: 'ca',\n",
3672 |        " 866: 'lmao',\n",
3673 |        " 867: 'okay',\n",
3674 |        " 868: 'shift',\n",
3675 |        " 869: 'looking',\n",
3676 |        " 870: 'niggas',\n",
3677 |        " 871: 'living',\n",
3678 |        " 872: 'party',\n",
3679 |        " 873: 'falling',\n",
3680 |        " 874: 'arent',\n",
3681 |        " 875: 'broke',\n",
3682 |        " 876: 'faux',\n",
3683 |        " 877: 'middle',\n",
3684 |        " 878: 'coast',\n",
3685 |        " 879: 'likely',\n",
3686 |        " 880: 'western',\n",
3687 |        " 881: 'sad',\n",
3688 |        " 882: 'instead',\n",
3689 |        " 883: 'fans',\n",
3690 |        " 884: 'sea',\n",
3691 |        " 885: 'enugu',\n",
3692 |        " 886: 'spring',\n",
3693 |        " 887: 'wonder',\n",
3694 |        " 888: 'wrought',\n",
3695 |        " 889: 'internally',\n",
3696 |        " 890: 'brooklyn',\n",
3697 |        " 891: 'travel',\n",
3698 |        " 892: 'smoke',\n",
3699 |        " 893: 'happening',\n",
3700 |        " 894: 'london',\n",
3701 |        " 895: 'metal',\n",
3702 |        " 896: 'taking',\n",
3703 |        " 897: 'alone',\n",
3704 |        " 898: 'upon',\n",
3705 |        " 899: 'involving',\n",
3706 |        " 900: 'dies',\n",
3707 |        " 901: 'turned',\n",
3708 |        " 902: '16',\n",
3709 |        " 903: 'financial',\n",
3710 |        " 904: 'tried',\n",
3711 |        " 905: 'wednesday',\n",
3712 |        " 906: 'mode',\n",
3713 |        " 907: 'serious',\n",
3714 |        " 908: 'reuters',\n",
3715 |        " 909: 'pakistan',\n",
3716 |        " 910: 'country',\n",
3717 |        " 911: 'wasnt',\n",
3718 |        " 912: 'total',\n",
3719 |        " 913: 'dad',\n",
3720 |        " 914: 'bed',\n",
3721 |        " 915: 'prepare',\n",
3722 |        " 916: 'vote',\n",
3723 |        " 917: 'linked',\n",
3724 |        " 918: 'million',\n",
3725 |        " 919: 'gave',\n",
3726 |        " 920: 'internet',\n",
3727 |        " 921: 'response',\n",
3728 |        " 922: 'louis',\n",
3729 |        " 923: 'album',\n",
3730 |        " 924: 'added',\n",
3731 |        " 925: 'occurred',\n",
3732 |        " 926: 'australia',\n",
3733 |        " 927: 'miss',\n",
3734 |        " 928: 'research',\n",
3735 |        " 929: 'cut',\n",
3736 |        " 930: 'problem',\n",
3737 |        " 931: 'future',\n",
3738 |        " 932: 'firefighters',\n",
3739 |        " 933: 'e',\n",
3740 |        " 934: 'silver',\n",
3741 |        " 935: 'cold',\n",
3742 |        " 936: 'giving',\n",
3743 |        " 937: 'walking',\n",
3744 |        " 938: 'soul',\n",
3745 |        " 939: 'metro',\n",
3746 |        " 940: 'biggest',\n",
3747 |        " 941: 'standard',\n",
3748 |        " 942: 'strike',\n",
3749 |        " 943: 'purse',\n",
3750 |        " 944: 'saying',\n",
3751 |        " 945: 'point',\n",
3752 |        " 946: 'small',\n",
3753 |        " 947: 'washington',\n",
3754 |        " 948: 'gunman',\n",
3755 |        " 949: 'ground',\n",
3756 |        " 950: 'nagasaki',\n",
3757 |        " 951: 'worse',\n",
3758 |        " 952: 'different',\n",
3759 |        " 953: 'x',\n",
3760 |        " 954: 'press',\n",
3761 |        " 955: 'planned',\n",
3762 |        " 956: 'sounds',\n",
3763 |        " 957: 'account',\n",
3764 |        " 958: 'british',\n",
3765 |        " 959: 'gas',\n",
3766 |        " 960: 'absolutely',\n",
3767 |        " 961: 'worlds',\n",
3768 |        " 962: 'guide',\n",
3769 |        " 963: 'uk',\n",
3770 |        " 964: 'flight',\n",
3771 |        " 965: 'interesting',\n",
3772 |        " 966: 'international',\n",
3773 |        " 967: 'smaug',\n",
3774 |        " 968: 'sensorsenso',\n",
3775 |        " 969: 'struggles',\n",
3776 |        " 970: 'apc',\n",
3777 |        " 971: 'specially',\n",
3778 |        " 972: 'allows',\n",
3779 |        " 973: '1980',\n",
3780 |        " 974: 'alabama',\n",
3781 |        " 975: 'cameroon',\n",
3782 |        " 976: 'orders',\n",
3783 |        " 977: 'causes',\n",
3784 |        " 978: 'season',\n",
3785 |        " 979: 'wife',\n",
3786 |        " 980: 'setting',\n",
3787 |        " 981: 'later',\n",
3788 |        " 982: 'means',\n",
3789 |        " 983: 'students',\n",
3790 |        " 984: 'blocked',\n",
3791 |        " 985: 'risk',\n",
3792 |        " 986: 'using',\n",
3793 |        " 987: 'guess',\n",
3794 |        " 988: 'wants',\n",
3795 |        " 989: 'begin',\n",
3796 |        " 990: 'members',\n",
3797 |        " 991: 'victim',\n",
3798 |        " 992: 'ship',\n",
3799 |        " 993: 'petition',\n",
3800 |        " 994: '17',\n",
3801 |        " 995: 'feat',\n",
3802 |        " 996: 'survivors',\n",
3803 |        " 997: 'syrian',\n",
3804 |        " 998: 'salt',\n",
3805 |        " 999: 'kinda',\n",
3806 |        " 1000: 'played',\n",
3807 |        " ...}"
3808 |       ]
3809 |      },
3810 |      "execution_count": 34,
3811 |      "metadata": {},
3812 |      "output_type": "execute_result"
3813 |     }
3814 |    ],
3815 |    "source": [
3816 |     "reverse_word_index"
3817 |    ]
3818 |   },
3819 |   {
3820 |    "cell_type": "code",
3821 |    "execution_count": 35,
3822 |    "metadata": {},
3823 |    "outputs": [],
3824 |    "source": [
3825 |     "def decode(sequence):\n",
3826 |     "    return \" \".join([reverse_word_index.get(idx, \"?\") for idx in sequence])"
3827 |    ]
3828 |   },
3829 |   {
3830 |    "cell_type": "code",
3831 |    "execution_count": 36,
3832 |    "metadata": {},
3833 |    "outputs": [
3834 |     {
3835 |      "name": "stdout",
3836 |      "output_type": "stream",
3837 |      "text": [
3838 |       "[520, 8, 395, 156, 297, 411]\n",
3839 |       "three people died heat wave far\n"
3840 |      ]
3841 |     }
3842 |    ],
3843 |    "source": [
3844 |     "decoded_text = decode(train_sequences[10])\n",
3845 |     "\n",
3846 |     "print(train_sequences[10])\n",
3847 |     "print(decoded_text)"
3848 |    ]
3849 |   },
3850 |   {
3851 |    "cell_type": "code",
3852 |    "execution_count": 37,
3853 |    "metadata": {},
3854 |    "outputs": [
3855 |     {
3856 |      "name": "stdout",
3857 |      "output_type": "stream",
3858 |      "text": [
3859 |       "Model: \"sequential\"\n",
3860 |       "_________________________________________________________________\n",
3861 |       "Layer (type)                 Output Shape              Param #   \n",
3862 |       "=================================================================\n",
3863 |       "embedding (Embedding)        (None, 20, 32)            575072    \n",
3864 |       "_________________________________________________________________\n",
3865 |       "lstm (LSTM)                  (None, 64)                24832     \n",
3866 |       "_________________________________________________________________\n",
3867 |       "dense (Dense)                (None, 1)                 65        \n",
3868 |       "=================================================================\n",
3869 |       "Total params: 599,969\n",
3870 |       "Trainable params: 599,969\n",
3871 |       "Non-trainable params: 0\n",
3872 |       "_________________________________________________________________\n"
3873 |      ]
3874 |     }
3875 |    ],
3876 |    "source": [
3877 |     "# Create LSTM model\n",
3878 |     "from tensorflow.keras import layers\n",
3879 |     "\n",
3880 |     "# Embedding: https://www.tensorflow.org/tutorials/text/word_embeddings\n",
3881 |     "# Turns positive integers (indexes) into dense vectors of fixed size. (other approach could be one-hot-encoding)\n",
3882 |     "\n",
3883 |     "# Word embeddings give us a way to use an efficient, dense representation in which similar words have \n",
3884 |     "# a similar encoding. Importantly, you do not have to specify this encoding by hand. An embedding is a \n",
3885 |     "# dense vector of floating point values (the length of the vector is a parameter you specify).\n",
3886 |     "\n",
3887 |     "model = keras.models.Sequential()\n",
3888 |     "model.add(layers.Embedding(num_unique_words, 32, input_length=max_length))\n",
3889 |     "\n",
3890 |     "# The layer will take as input an integer matrix of size (batch, input_length),\n",
3891 |     "# and the largest integer (i.e. word index) in the input should be no larger than num_words (vocabulary size).\n",
3892 |     "# Now model.output_shape is (None, input_length, 32), where `None` is the batch dimension.\n",
3893 |     "\n",
3894 |     "\n",
3895 |     "model.add(layers.LSTM(64, dropout=0.1))\n",
3896 |     "model.add(layers.Dense(1, activation=\"sigmoid\"))\n",
3897 |     "\n",
3898 |     "model.summary()"
3899 |    ]
3900 |   },
3901 |   {
3902 |    "cell_type": "code",
3903 |    "execution_count": 38,
3904 |    "metadata": {},
3905 |    "outputs": [],
3906 |    "source": [
3907 |     "loss = keras.losses.BinaryCrossentropy(from_logits=False)\n",
3908 |     "optim = keras.optimizers.Adam(lr=0.001)\n",
3909 |     "metrics = [\"accuracy\"]\n",
3910 |     "\n",
3911 |     "model.compile(loss=loss, optimizer=optim, metrics=metrics)"
3912 |    ]
3913 |   },
3914 |   {
3915 |    "cell_type": "code",
3916 |    "execution_count": 39,
3917 |    "metadata": {},
3918 |    "outputs": [
3919 |     {
3920 |      "name": "stdout",
3921 |      "output_type": "stream",
3922 |      "text": [
3923 |       "Epoch 1/20\n",
3924 |       "191/191 - 6s - loss: 0.5540 - accuracy: 0.7043 - val_loss: 0.5121 - val_accuracy: 0.7656\n",
3925 |       "Epoch 2/20\n",
3926 |       "191/191 - 5s - loss: 0.2869 - accuracy: 0.8877 - val_loss: 0.5320 - val_accuracy: 0.7735\n",
3927 |       "Epoch 3/20\n",
3928 |       "191/191 - 6s - loss: 0.1635 - accuracy: 0.9473 - val_loss: 0.6233 - val_accuracy: 0.7511\n",
3929 |       "Epoch 4/20\n",
3930 |       "191/191 - 5s - loss: 0.1111 - accuracy: 0.9658 - val_loss: 0.8674 - val_accuracy: 0.7603\n",
3931 |       "Epoch 5/20\n",
3932 |       "191/191 - 6s - loss: 0.0858 - accuracy: 0.9744 - val_loss: 0.7517 - val_accuracy: 0.7525\n",
3933 |       "Epoch 6/20\n",
3934 |       "191/191 - 6s - loss: 0.0741 - accuracy: 0.9760 - val_loss: 1.0622 - val_accuracy: 0.7564\n",
3935 |       "Epoch 7/20\n",
3936 |       "191/191 - 6s - loss: 0.0598 - accuracy: 0.9785 - val_loss: 0.8945 - val_accuracy: 0.7393\n",
3937 |       "Epoch 8/20\n",
3938 |       "191/191 - 6s - loss: 0.0475 - accuracy: 0.9788 - val_loss: 1.0891 - val_accuracy: 0.7446\n",
3939 |       "Epoch 9/20\n",
3940 |       "191/191 - 6s - loss: 0.0433 - accuracy: 0.9813 - val_loss: 1.2928 - val_accuracy: 0.7452\n",
3941 |       "Epoch 10/20\n",
3942 |       "191/191 - 6s - loss: 0.0429 - accuracy: 0.9803 - val_loss: 1.1937 - val_accuracy: 0.7413\n",
3943 |       "Epoch 11/20\n",
3944 |       "191/191 - 6s - loss: 0.0389 - accuracy: 0.9828 - val_loss: 1.1027 - val_accuracy: 0.7406\n",
3945 |       "Epoch 12/20\n",
3946 |       "191/191 - 6s - loss: 0.0373 - accuracy: 0.9831 - val_loss: 1.4668 - val_accuracy: 0.7301\n",
3947 |       "Epoch 13/20\n",
3948 |       "191/191 - 5s - loss: 0.0327 - accuracy: 0.9833 - val_loss: 1.3277 - val_accuracy: 0.7341\n",
3949 |       "Epoch 14/20\n",
3950 |       "191/191 - 5s - loss: 0.0366 - accuracy: 0.9836 - val_loss: 1.1498 - val_accuracy: 0.7295\n",
3951 |       "Epoch 15/20\n",
3952 |       "191/191 - 7s - loss: 0.0353 - accuracy: 0.9841 - val_loss: 1.5738 - val_accuracy: 0.7183\n",
3953 |       "Epoch 16/20\n",
3954 |       "191/191 - 7s - loss: 0.0362 - accuracy: 0.9826 - val_loss: 1.3879 - val_accuracy: 0.7360\n",
3955 |       "Epoch 17/20\n",
3956 |       "191/191 - 5s - loss: 0.0364 - accuracy: 0.9839 - val_loss: 1.2604 - val_accuracy: 0.7406\n",
3957 |       "Epoch 18/20\n",
3958 |       "191/191 - 5s - loss: 0.0376 - accuracy: 0.9846 - val_loss: 1.4514 - val_accuracy: 0.7413\n",
3959 |       "Epoch 19/20\n",
3960 |       "191/191 - 4s - loss: 0.0303 - accuracy: 0.9847 - val_loss: 1.7029 - val_accuracy: 0.7374\n",
3961 |       "Epoch 20/20\n",
3962 |       "191/191 - 5s - loss: 0.0347 - accuracy: 0.9831 - val_loss: 1.7891 - val_accuracy: 0.7387\n"
3963 |      ]
3964 |     },
3965 |     {
3966 |      "data": {
3967 |       "text/plain": [
3968 |        "<tensorflow.python.keras.callbacks.History at 0x7fe0d6b48e20>"
3969 |       ]
3970 |      },
3971 |      "execution_count": 39,
3972 |      "metadata": {},
3973 |      "output_type": "execute_result"
3974 |     }
3975 |    ],
3976 |    "source": [
3977 |     "model.fit(train_padded, train_labels, epochs=20, validation_data=(val_padded, val_labels), verbose=2)"
3978 |    ]
3979 |   },
3980 |   {
3981 |    "cell_type": "code",
3982 |    "execution_count": 40,
3983 |    "metadata": {},
3984 |    "outputs": [],
3985 |    "source": [
3986 |     "predictions = model.predict(train_padded)\n",
3987 |     "predictions = [1 if p > 0.5 else 0 for p in predictions]"
3988 |    ]
3989 |   },
3990 |   {
3991 |    "cell_type": "code",
3992 |    "execution_count": 41,
3993 |    "metadata": {},
3994 |    "outputs": [
3995 |     {
3996 |      "name": "stdout",
3997 |      "output_type": "stream",
3998 |      "text": [
3999 |       "['three people died heat wave far'\n",
4000 |       " 'haha south tampa getting flooded hah wait second live south tampa gonna gonna fvck flooding'\n",
4001 |       " 'raining flooding florida tampabay tampa 18 19 days ive lost count'\n",
4002 |       " 'flood bago myanmar arrived bago'\n",
4003 |       " 'damage school bus 80 multi car crash breaking' 'whats man' 'love fruits'\n",
4004 |       " 'summer lovely' 'car fast' 'goooooooaaaaaal']\n",
4005 |       "[1 1 1 1 1 0 0 0 0 0]\n",
4006 |       "[1, 1, 1, 1, 1, 0, 0, 0, 0, 0]\n"
4007 |      ]
4008 |     }
4009 |    ],
4010 |    "source": [
4011 |     "print(train_sentences[10:20])\n",
4012 |     "\n",
4013 |     "print(train_labels[10:20])\n",
4014 |     "print(predictions[10:20])"
4015 |    ]
4016 |   },
4017 |   {
4018 |    "cell_type": "code",
4019 |    "execution_count": null,
4020 |    "metadata": {},
4021 |    "outputs": [],
4022 |    "source": []
4023 |   }
4024 |  ],
4025 |  "metadata": {
4026 |   "kernelspec": {
4027 |    "display_name": "tf",
4028 |    "language": "python",
4029 |    "name": "tf"
4030 |   },
4031 |   "language_info": {
4032 |    "codemirror_mode": {
4033 |     "name": "ipython",
4034 |     "version": 3
4035 |    },
4036 |    "file_extension": ".py",
4037 |    "mimetype": "text/x-python",
4038 |    "name": "python",
4039 |    "nbconvert_exporter": "python",
4040 |    "pygments_lexer": "ipython3",
4041 |    "version": "3.8.5"
4042 |   }
4043 |  },
4044 |  "nbformat": 4,
4045 |  "nbformat_minor": 4
4046 | }
4047 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Tensorflow Beginner Tutorials from my YouTube channel.


--------------------------------------------------------------------------------
/slides/CNN.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patrickloeber/tensorflow-course/c8ef0fe262ca2a1eec2156453dd100812ec2b7c3/slides/CNN.pdf


--------------------------------------------------------------------------------
/slides/NeuralNet_TF.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patrickloeber/tensorflow-course/c8ef0fe262ca2a1eec2156453dd100812ec2b7c3/slides/NeuralNet_TF.pdf


--------------------------------------------------------------------------------
/slides/RNN.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/patrickloeber/tensorflow-course/c8ef0fe262ca2a1eec2156453dd100812ec2b7c3/slides/RNN.pdf


--------------------------------------------------------------------------------