├── .gitignore
├── Arabic_CNN.ipynb
├── Arabic_LSTM.ipynb
├── Comparison.ipynb
├── WorkOnVersion.ipynb
├── history
├── fasttext-true.db
├── fasttext.db
├── hist_sg_true_10.json
├── lstm_results.db
└── results.csv
├── readme.md
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | data/
4 | img/
5 | arabic_bins/
6 | *.py[cod]
7 | *$py.class
8 | web*
9 | *.npy
10 | *.csv
11 | # C extensions
12 | *.so
13 | .idea
14 | # Distribution / packaging
15 | .Python
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | .hypothesis/
53 | .pytest_cache/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # pyenv
81 | .python-version
82 |
83 | # celery beat schedule file
84 | celerybeat-schedule
85 |
86 | # SageMath parsed files
87 | *.sage.py
88 |
89 | # Environments
90 | .env
91 | .venv
92 | env/
93 | venv/
94 | ENV/
95 | env.bak/
96 | venv.bak/
97 |
98 | # Spyder project settings
99 | .spyderproject
100 | .spyproject
101 |
102 | # Rope project settings
103 | .ropeproject
104 |
105 | # mkdocs documentation
106 | /site
107 |
108 | # mypy
109 | .mypy_cache/
110 |
111 | # corpus text
112 | *.txt
113 |
114 | # words segmentation result
115 | *.json
116 | *.csv
117 |
118 | # models trained
119 | *.bin
120 | *.vec
121 |
--------------------------------------------------------------------------------
/Comparison.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Results and Comparison of CNN models"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas \n",
17 | "import matplotlib\n",
18 | "import matplotlib.pyplot as plt\n"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "lines = pandas.read_csv('./history/results.csv', index_col=0)"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 3,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "data": {
37 | "text/html": [
38 | "
\n",
39 | "\n",
52 | "
\n",
53 | " \n",
54 | " \n",
55 | " | \n",
56 | " train acc | \n",
57 | " train acc std | \n",
58 | " test acc | \n",
59 | " test acc std | \n",
60 | " train loss | \n",
61 | " train loss std | \n",
62 | " test loss | \n",
63 | " test loss std | \n",
64 | "
\n",
65 | " \n",
66 | " \n",
67 | " \n",
68 | " w2v sg train | \n",
69 | " 96.202 | \n",
70 | " 0.640 | \n",
71 | " 94.924 | \n",
72 | " 0.100 | \n",
73 | " 12.032 | \n",
74 | " 2.020 | \n",
75 | " 16.286 | \n",
76 | " 0.390 | \n",
77 | "
\n",
78 | " \n",
79 | " w2v cbow No train | \n",
80 | " 95.083 | \n",
81 | " 0.040 | \n",
82 | " 94.514 | \n",
83 | " 0.070 | \n",
84 | " 15.751 | \n",
85 | " 0.084 | \n",
86 | " 19.071 | \n",
87 | " 0.190 | \n",
88 | "
\n",
89 | " \n",
90 | " fasttext sg No train | \n",
91 | " 95.916 | \n",
92 | " 0.009 | \n",
93 | " 95.382 | \n",
94 | " 0.017 | \n",
95 | " 12.907 | \n",
96 | " 0.012 | \n",
97 | " 14.690 | \n",
98 | " 0.019 | \n",
99 | "
\n",
100 | " \n",
101 | " fasttext sg train | \n",
102 | " 96.865 | \n",
103 | " 0.012 | \n",
104 | " 96.166 | \n",
105 | " 0.018 | \n",
106 | " 10.102 | \n",
107 | " 0.058 | \n",
108 | " 12.812 | \n",
109 | " 0.045 | \n",
110 | "
\n",
111 | " \n",
112 | "
\n",
113 | "
"
114 | ],
115 | "text/plain": [
116 | " train acc train acc std test acc test acc std \\\n",
117 | "w2v sg train 96.202 0.640 94.924 0.100 \n",
118 | "w2v cbow No train 95.083 0.040 94.514 0.070 \n",
119 | "fasttext sg No train 95.916 0.009 95.382 0.017 \n",
120 | "fasttext sg train 96.865 0.012 96.166 0.018 \n",
121 | "\n",
122 | " train loss train loss std test loss test loss std \n",
123 | "w2v sg train 12.032 2.020 16.286 0.390 \n",
124 | "w2v cbow No train 15.751 0.084 19.071 0.190 \n",
125 | "fasttext sg No train 12.907 0.012 14.690 0.019 \n",
126 | "fasttext sg train 10.102 0.058 12.812 0.045 "
127 | ]
128 | },
129 | "execution_count": 3,
130 | "metadata": {},
131 | "output_type": "execute_result"
132 | }
133 | ],
134 | "source": [
135 | "lines.head()"
136 | ]
137 | },
138 | {
139 | "cell_type": "markdown",
140 | "metadata": {},
141 | "source": [
142 | "## Test accuracy"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 11,
148 | "metadata": {},
149 | "outputs": [
150 | {
151 | "data": {
152 | "image/png": "\n",
153 | "text/plain": [
154 | ""
155 | ]
156 | },
157 | "metadata": {},
158 | "output_type": "display_data"
159 | }
160 | ],
161 | "source": [
162 | "x = list(lines.index)\n",
163 | "means = list(lines['test acc'])\n",
164 | "stds = list(lines['test acc std'])\n",
165 | "\n",
166 | "plt.figure(num=1, figsize=(16, 8))\n",
167 | "plt.rc('xtick', labelsize=16) \n",
168 | "plt.rc('ytick', labelsize=16) \n",
169 | "plt.errorbar(x, means, stds, fmt='o', color='white', ecolor='black', elinewidth=15, capsize=10);\n"
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {},
175 | "source": [
176 | "## Test loss"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 13,
182 | "metadata": {},
183 | "outputs": [
184 | {
185 | "data": {
186 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA74AAAHaCAYAAAAnhJVkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3XuYZVddJ/zvL4kChuEewiUTGifAACMypFBwQAICchuBkTeQAIKSjsDIAOo7DlGwGCBeESEaIGE0KNIyGWVAbiLkQmCIvBUE5PJy7aLlmgbCrXMhpNf8sXclJ0VV96nuU13Vqz+f5znPqbP32nuvc+qcdfZ3r73XqdZaAAAAoFeHbXQFAAAAYD0JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4dsdEVmHSrW92qbdmyZaOrAQAAwDq45JJLvtZaO+pAb3dTBd8tW7ZkYWFho6sBAADAOqiqz2/Edp3qDAAAQNcEXwAAALom+AIAANA1wRcAAICuCb4AAAB0TfAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNcEXwAAALom+AIAANA1wRcAAICuCb4AAAB0TfAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNcEXwAAALom+ALso/n5+VTVzG7z8/Mb/ZQAALpUrbWNrsO15ubm2sLCwkZXA2DNqmqfl91M7TAAwHqqqktaa3MHert6fAEAAOia4AsAAEDXBF8AAAC6JvgCAADQNcEXAACArgm+AAAAdE3wBVgnJ510UrZv355rrrkm27dvz0knnbTRVQIAOCQdsdEVAOjRSSedlLPPPjtHHnlkkmTLli05++yzkyTbtm3byKoBABxypurxrapjquqMqnp/VV1eVa2qtqxQ7o5V9b+q6ptVtauqzq+qA/7jxAAb7fTTT7829C458sgjc/rpp29QjQAADl3Tnup8XJITk1yW5KKVClTVLZO8N8m/S/LLSZ4wzjq/qu66n/UEOKgce+yxa5oOAMD6mTb4vqe1dnRr7RFJzl2lzDOSHJ3kUa21N7TW/i7Jo5JcnuSF+19VgIPHjh071jQdAID1M1Xwba3tnqLYfZJ8urX2mYnldmXoIX5UVbmeGDhknHbaadm1a9f1pu3atSunnXbaBtUIAODQNctRna9J8r0Vpl+V5EZJ/s0MtwWwqW3bti1bt27N4uJidu/encXFxWzdutXAVgAAG2CWvbCfTPKQqrpla+3rSVJVhyX5iXH+LWa4LYBNb9u2bYIuAMAmMMse31eN6/uLqvo3VXXbJK9Icsdx/oqnS1fVqVW1UFULO3funGF1AAAAYIbBt7X2uSRPTHJ8ks8k+VKS+yZ52Vjky6ssd1Zrba61NnfUUUfNqjoAAACQZLY9vmmt/U2S2ye5W5LjWmvHJ7lxkn9prRnKFAAAgANu5iMtt9auSfKJJKmq2yV5fJI/mPV2AAAAYBpTB9+qetz45/Hj/cOrameSna21C6vqh5L8fpILk3w7yd2TPC/Jx5K8dHZVBgAAgOmtpcf33GWPzxzvL0xyQpKW5E5JTk5ysyRfSPJnSU5vra30M0cAAACw7qYOvq212sv87yd51H7XCAAAAGZopoNbARxK5ufnU1Wp2uNxwb1aWsf8/PxsKgYAwPVUa22j63Ctubm5trCwsNHVAAAAYB1U1SWttbkDvV09vgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAujZV8K2qY6rqjKp6f1VdXlWtqrasUO7YqnptVe0Yy32qql5cVUfOuuIAAAAwjSOmLHdckhOTXJLkoiQPXV5gDLfvSvJDSZ6fZEeSeyd5YZI7JXn8DOoLAAAAazJt8H1Pa+3oJKmqU7JC8E3yHzIE3J9trb1znHZ+Vd0iya9X1Y+01i7f7xoDAADAGkx1qnNrbfcUxX54vP/2sunfHLdTa6gXAAAAzMQsB7d6V5JPJ/m9qrpbVd24qh6U5NlJXtVa2zXDbQEAAMBUZhZ8W2tXJrnfuM6PJflOkncneUuSX1ltuao6taoWqmph586ds6oOAAAAJJlh8K2qGyZ5Q5JbJ3lykgck+X8zDGr1p6st11o7q7U211qbO+qoo2ZVHQAAAEgy/eBW03hakhOSHNda++w47T1V9a0kZ1XVq1prH57h9gAAAGCvZnmN748luWwi9C75wHh/1xluCwAAAKYyy+D7lSQ3r6rjlk3/yfH+izPcFgAAAExl6lOdq+px45/Hj/cPr6qdSXa21i5Mck6SX03ytqp6SZIdSeaSPD/JJUneN6tKAwAAwLTWco3vucsenzneX5jkhNbaYlXdJ8l8khcnuVWSf0lyVpKXTPlbwAAAADBTUwff1lpNUebjSU7crxoBAADADM3yGl8AAADYdARfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADo2lTBt6qOqaozqur9VXV5VbWq2rKszPw4faXbletReQAAANibI6Ysd1ySE5NckuSiJA9docxrkrxj2bQjx2lv3tcKAgAAwP6YNvi+p7V2dJJU1SlZIfi21r6Q5AuT06rqyeM2Xruf9QQAAIB9MtWpzq213fu4/qck+WqSv9/H5QEAAGC/rNvgVlV1TJIHJvmr1tr312s7AAAAsCfrOarzk8f17/E056o6taoWqmph586d61gdAAAADkXrGXx/Ick/tdY+sqdCrbWzWmtzrbW5o446ah2rAwAAwKFoXYJvVf1Ekn8bg1oBAACwwdarx/cpSb6f5PXrtH4AAACYysyDb1X9cJInJHlba81FuwAAAGyoaX/HN1X1uPHP48f7h1fVziQ7W2sXThR9VJJbxGnOAAAAbAJTB98k5y57fOZ4f2GSEyamPyXJN5K8Zd+rBQAAALMxdfBtrdWU5R6979UBAACA2VrPnzMCAACADSf4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8E0yPz+fqprZbX5+fqOfEgAAAKNqrW10Ha41NzfXFhYWNrQOVbXPy26m1xIAAGCzqapLWmtzB3q7enwBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4TuGkk07K9u3bc80112T79u056aSTNrpKAAAATOmIja7AZnfSSSfl7LPPzpFHHpkk2bJlS84+++wkybZt2zayagAAAExBj+9enH766deG3iVHHnlkTj/99A2qEQAAAGsh+O7Fscceu6bpAAAAbC6C717s2LFjTdMBAADYXATfvTjttNOya9eu603btWtXTjvttA2qEQAAAGsh+O7Ftm3bsnXr1iwuLmb37t1ZXFzM1q1bDWwFAABwkKjW2kbX4Vpzc3NtYWFhQ+tQVfu87GZ6LQEAADabqrqktTZ3oLerxxcAAICuCb4AAAB0TfAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNcEXwAAALo2VfCtqmOq6oyqen9VXV5Vraq2rFL2rlV1blV9raquqKpPVtWzZ1lpAAAAmNa0Pb7HJTkxyWVJLlqtUFXNJfnHJDdIckqSRyR5aZLD96+aAAAAsG+OmLLce1prRydJVZ2S5KHLC1TVYUlem+TdrbXHTsw6f79rCQAAAPtoquDbWts9RbETktwtydP3p0IAAAAwS7Mc3Op+4/0Nq+riqrq6qi6tqldU1Y1muB0AAACY2iyD7+3G+zckeWeShyT5/QzX+r5+tYWq6tSqWqiqhZ07d86wOgAAADD9Nb7TWArRr2utvWD8+4KqOjzJ71bV3VprH1++UGvtrCRnJcnc3FybYX0AAABgpj2+Xx/v/2HZ9HeO9/ec4bYAAABgKrMMvh8b75f32tZ4P80AWQAAADBTswy+b09yVZKHLZv+s+P9wgy3BQAAAFOZ+hrfqnrc+Ofx4/3Dq2pnkp2ttQtba1+vqt9J8vyq+naS85LMJXlBkte21j4zy4oDAADANNYyuNW5yx6fOd5fmOE3fJPkvyf5TpJnJvn1JF9O8gdJXrTvVQQAAIB9N/Wpzq21WuV2wkSZ1lr7o9baca21H26t3aG19oLW2tXrUvsZmZ+fT1WlqvZeeA+W1jE/Pz+bigEAALDfqrXN8wtCc3NzbWHBpcAAAAA9qqpLWmtzB3q7sxzcCgAAADYdwRcAAICuCb4AAAB0TfAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNcEXwAAALom+AIAANA1wRcAAICuCb4AAAB0TfAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNcEXwAAALom+AIAANA1wRcAAICuCb4AAAB0TfAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNcEXwAAALom+AIAANA1wRcAAICuCb4AAAB0TfAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNcEXwAAALom+AIAANA1wRcAAICuCb4AAAB0TfAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNcEXwAAALom+AIAANA1wRcAAICuCb4AAAB0TfAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNemCr5VdUxVnVFV76+qy6uqVdWWFcq1VW73nHXFAQAAYBpHTFnuuCQnJrkkyUVJHrqHsuckefWyaZ9ac80AAABgBqYNvu9prR2dJFV1SvYcfL/YWrt4v2sGAAAAMzDVqc6ttd3rXREAAABYD+sxuNUzquqq8Vrg86rq/uuwDQAAAJjKrIPv65I8M8mDk5ya5JZJzquqE1ZboKpOraqFqlrYuXPnjKsDAADAoW7aa3yn0lp78sTDi6rqTUk+muTFSe63yjJnJTkrSebm5tos6wMAAADr+ju+rbXvJHlrknuv53YAAABgNesafEeVRE8uAAAAG2Jdg29V3STJI5P843puBwAAAFYz9TW+VfW48c/jx/uHV9XOJDtbaxdW1a8nuUuS85N8Kckdkvx6ktskeeLsqgwAAADTW8vgVucue3zmeH9hkhOSfDLJY8fbTZN8O8n7kjyttfaB/asmAAAA7Jupg29rrfYy/++S/N1+1wgAAABm6EAMbgUAAAAbRvAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNcEXwAAALom+AIAANA1wRcAAICuCb4AAAB0TfAFAACga4IvAMABMj8/n6pa8ba4uLjiMouLi6suMz8/f0DrD3CwqtbaRtfhWnNzc21hYWGjqwEAsO6q6nqPr7nmmhx22A/2SezevTuHH3749aZtpv03gLWoqktaa3MHert6fAEADpDJHt/lduzYseIyK03X4wuwNnp8AQAAOCD0+AIAAMA6EHwBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADo2lTBt6qOqaozqur9VXV5VbWq2rKXZZ43lnvvLCoKAAAA+2LaHt/jkpyY5LIkF+2tcFX9aJLfTHLpvlcNAAAA9t+0wfc9rbWjW2uPSHLuFOVfmeSvknxin2sGAAAAMzBV8G2t7Z52hVV1cpJ7JXnevlYKAAAAZmWmg1tV1c2TvCzJf22tfWOW6wYAAIB9MetRnf8gyaeSnDPtAlV1alUtVNXCzp07Z1wdAAAADnUzC75Vdf8kv5DkGa21Nu1yrbWzWmtzrbW5o446albVAQAAgCTJETNc16uT/I8kX6iqm02s//Dx8RWttatmuD0AAADYq1kG37uOt6evMO+yJM9N8scz3B4AAADs1SyD7wNXmPbHSQ5P8qwkn5nhtgAAAGAqUwffqnrc+Ofx4/3Dq2pnkp2ttQtbaxessMw3kxyx0jwAAAA4ENbS43vussdnjvcXJjlhJrUBAACAGZs6+LbWaq0rb62dsNZlAAAAYJZm/Tu+AAAAsKkIvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0DXBFwAAgK4JvgAAAHRN8AUAAKBrgi8AAABdE3wBAADomuALAABA1wRfAAAAuib4AgAA0LWpgm9VHVNVZ1TV+6vq8qpqVbVlWZk7VNWbqurzVXVFVX2tqi6oqoevR8UBAABgGtP2+B6X5MQklyW5aJUyN07ytSS/leQRSZ6W5LtJ3lZV/2k/6wkAAAD75Igpy72ntXZ0klTVKUkeurxAa+1jGcLutarqrUm2J/nFJH+7f1UFAACAtZuqx7e1tntfVt5a+36SbyW5el+WBwAAgP01bY/v1KrqsAyB+lZJtia5c5Jnz3o7AAAAMI2ZB98kv5/k18a/v5vkCa21d69WuKpOTXJqkhx77LHrUB0AAAAOZevxc0Z/nOTeSf5jkrcneX1VPWq1wq21s1prc621uaOOOmodqgMAAMChbOY9vq21LyT5wvjwLVV1QZI/TPKWWW8LAAAA9mY9enyXW8jwc0gAAABwwK1r8B0Hurpfks+u53YAAABgNVOf6lxVjxv/PH68f3hV7Uyys7V2YVXNJ7lFkvcl+UqS22T4Xd+fSHLyzGoMAAAAa7CWa3zPXfb4zPH+wiQnJPlgkuckeUKSm2YIvx9Ocv/W2vv2r5oAAACwb6YOvq212sv8Nyd5837XCAAAAGboQAxuBQAAABtG8AUAANjE5ufnU1Ur3k4++eTs2rXreuV37dqVk08+edVl5ufnN+aJbKBqrW10Ha41NzfXFhYWNroaAAAAm1LV9a9A3b59e7Zs2fID5RYXF3PHO97xetM2Q/arqktaa3MHert6fAEAAA5Sxx577JqmH6oEXwAAgIPUjh071jT9UCX4AgAAHKROO+20Fa/xPe200zaoRpuT4AsAALCJTQ5utdy2bduydevWLC4uZvfu3VlcXMzWrVuzbdu2HyhrcKtNwuBWAAAA/TK4FQAAAKwDwRcAAICuCb4AAAB0TfAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNcEXwAAALom+AIAANA1wRcAAICuCb4AAAB0TfAFAACga4IvAAAAXRN8AQAA6JrgCwAAQNcEXwAAALom+AIAANA1wRcAAICuVWtto+twrarameTzG12PPbhVkq9tdCWAg462A1gr7QawLw6GtuMOrbWjDvRGN1Xw3eyqaqG1NrfR9QAOLtoOYK20G8C+0HaszqnOAAAAdE3wBQAAoGuC79qctdEVAA5K2g5grbQbwL7QdqzCNb4AAAB0TY8vAAAAXRN8AQAA6NqmD75V9TNV9bqq+mxVXTHev7Kqbr3RdZtGVd2squar6l7rsO4tVdWq6qmzXjesl83yma6qxap63YHc5jTG9qKNr8sPLZt33Cw/81X11Kr6pVmsa4V1X1BVF6zHujl4VdUvVdWnq+p7VfXNGa/7OVX1n1aY/piq+tVZbmuV7c9X1YPWezvTGD9/rar+YoV5p4zztsxoW+v2vMd6zq/Huumf9ubAOJjagE0ffJM8Pcktk7w4ycOS/E6Sn0tycVXdeCMrNqWbJfntJDMPvkm+nOS+Sd66DuuG9XKwf6YPlB9N8rR13sZTk6xL8E3yzPEGSZKqul2GQVf+T5IHJXnwjDfxnCQ/sCOa5DFJ1n1HNMN3/abYEZ3wxKq62zpvYz2f932TvGad1k3HtDcH1EHTBhwxqxWto2e21nZOPL6wqj6V5MIkJyb5s42p1vqoqhu01q6apuxY7uJ1rhLM2iH1md4P70zyW1V1Tmvtyo2uzFrapiRprX18PevDQelOSQ5P8trW2ns3ujKHgA8luV2SFyX5+Q2uS5J9akfs47CvtDeb0Ia3Aa21A3JLMpekJbnfxLRnjdNePDHtTuO0R+xhXTcayzx/fHzbJN9P8qwVyv5GkquTHLWH9d07yT8k+XqSy5N8LsmZy8o8OMk/JbkyyWeSnJLknCSLe1jvlrGey29PHedfkOS9Sf7juO6rkjx3nPcrSd6f5BtJvpkh4D5ylfU/dWLaOUm+kOTfJ7lofD6fTvL0A/W/djs0bpv5Mz2We8D4uf5Wkl1JPpzkaRPzF5O8LsnW8TN9ZZIPJnngCut60rj8lUm+luQvk9x2Yv6fJPnMsmUuGZ/TcRPTXpLk0owj6q9S7/lxubkku5P82sS845Z/5sfpDxvbiyvG5/u/k9xlL6/PBSu0TReM8546Pv7pJOeObdCHxnn3TvK/xnbmiiSfTHJ6khutsP4LJh6fMK7z58bX62tJdo7/g5tt9PvZbX1v43fT8vfbOeO8JyQ5b3w/fDfD9+FTVljHs5N8YnzfXZZkIcljx3mLK61/le0uTqzzVklemeSLGb6D//8kp07MP2x8Ly8muenE9B8b6/EH4+OVvuvn9/B63DnJG8f24MokO8bP2hETZe6V4Xv8yiT/kuS0JC9M0qZ4vS/IsH/x7LEucxPzThmnbZmYVkmeO36ev5fhjLI/SXKTvWxn1eed6/ZH7puh1+2KJC9f4//8eq9jrmsf75ThbLfvJvl8khckOWyj3+dum+O2yuf+nHGe9maG7c3B1gYcyB7fD2bYeXpQhsY4499X5Prd4w9Kck2GF381DxjvP5EkrbUvV9W7kjw5yRnLyj4pyTva9XuYrjWeWvn3ST6QYWfvOxkC5U9NlLlbhhf3Axn+UT+c5PlJbpphx3Q1X85wGsTfZjid883j9M9OlLlzkldkOCL7uQxBN2MdXpPhzX9EhnD8lqp6RGvt7XvYZpLcJMnrk/xxkv+e5BeTvLKqPtlaO38vy8K0NuVnOkmq6tFJ/ibJ+5L8coaQdfckd1hhu8cn+c0MX0K/keTtVfXjrbVPjus6Ncmrk7whyfMy9KCcnuQnq+perbXvZmi8/3NVHdta21FVN09yz4nX4jMTr8X5bWy99+JDGb6Y/ltVndVa+84qz/VhGdqn85I8PsmNM3zu31tV92ytfXGV9T8zQ+g8fHyNkuTby8r8VZJtSR6X684QOnas2zkZ2su7Z/jC+dEM7ePevDzJW5KcnOQuSX4/w/vjKVMsy8HrRRkOBr0iyX/O0H4sfYZ/NMPBlN/N8J3600leU1U3aq29Kkmq6olJXprhvX1RhoNl90hyi3Edj03ytgwHqObHaUvrPyrDAZufGx9fNa7zJhnaiBuNy2xP8rMZvi9v0Fo7o7W2u6qWDny9OskTqupGSf46yccytB3JsGP3/gyfi1eP076wh9fjLRnaz2dkaJ9un+QRGS9Bq6pbJXl3ki8l+YUMYfS5GfYN1uJVGU67XLq0ZDUvydC+/WmSv0tytwz/sx+vqge01lbb19nb875phtfqDzPsSF8xTt/r/3wv3pjkz5O8LMP+0Qsz7Kz/+RTL0j/tzfWtZ3tzcLUBB/gIzJsy7PQlw4v9jQxvrKuT3Hic/tdJLt7DOv5VhiMkH8/1j1Q8McMRgLtMTLvnOO3EPaxvqdfqHnso8/oMb+gfmZh22wxHRRZXW24st2Vc/ykrzLtg/Gffcy/rOCzDTuc7k7xphXU/dWLaOeO0B05Mu0GGN/pZB/L/7db/bZN+pivDAaOF7OHo31jme0mOXVaXbyT5y/Hx4Um+uvQcJ8rdb6zHfxkf32L8LD9lfPyYDEeI/0eSbeO0G4+vyx7Pvsh1RzOPyHBg7PtJXjDO+4Ee3/F5fnrZa3fHcVt/tJdtXZDkvStMf+q4nZftZfka6/mk8fnfctm6L5h4fMK4ztcuW8efZGhLV+0Fd+vjluHMqZbkhD2UWfq+OzvJh5e9Tz64l/UvJnndCtPPSfKFFaY/f3zv3WnZ9LMzfGdOfqYeO9b9FzNcN/jdJHdetlzLxNkue6jnrcayP7eHMqeP7dMxE9NuNLaQyzp4AAAKqklEQVRHbYptXPvZzjBWQEvy0+Pj6/X4ju3XlRl7xCbW8aS91XNPzzvX7Y88ei/Lr/g/n1j3/MTj+aX/w7Jy/5zknRv9HnfbPDftzbXlDkR7c9C0AQd6cKvzk9y3qm6YYQf2ZhmO9l+V5P5jmRMy9Fz8gKo6IkPvw+2TPKG19v2J2W/M8MZ48sS0J2c47e/NWd2nMxwFeXVVPamq/vUKZe6T5G2ttcuXJrTWvpyh235/LbbWPrR8YlUdX1VvqaqvZtjxvTrJQzL0kOzN5W2iZ7cN59J/OkNPDczSZvxM3yVDz+5r2uq9FEsubq3tWHrQhl7Vt2Y4grm0rltn6PnMRLn3Zji15gHj428k+Uiu6+l+UIZrlt+V5IHjtJ/O0LCv+FqspLX2qSSvTfJrVXWL5fOr6sgMpye9YfK1a61tz3Bk+QHTbmsVb1xhmzepqt+rqs9m+D9fneHU78pw6tHeLB+M758zHJw7ej/rykGqqu5UVduq6osZ3k9XZwhnk993/1+Se1bVGVX14Kr6kRls+mFJ/jHJ9qo6YumW4SywW2bo9UyStNbemKE345UZLo941vj53Bdfz3CG1+9W1daqWulzc58k72+tXdtz0lq7Ivs2mOU5ST6VoVd3JffJ8BlcPsr9X2fY/9ifduT7GXqbrmfK//meLH8dPhr7OExBe7Pu7c1ym6oNONDB97wMjetPZdgZ/HBr7asZTpN8YFXdPcPOz/nLF6yqwzLsAD44yWNaax+ZnD+G0r/JMIJhVdXhSU5Kcm7bw8AwrbVvjXX5UpIzk+yoqo9W1eRAELfNcF78cl+d7mnv0ZeXTxjD97szHIV9VobX695J3pHkhlOs87IVpl015bKwFpvuM53hCyTZ82k/S1b6DH81QxBPrjut6Qc+p0m+MjE/GV6LpZD7wAzP+fwkR4+XSzwwyZf24cvrhRle499YYd7NMwTOaeq3L1Za759nGJn7FRkOxt07w6lkyXRtzDeWPV4a5EL7dAgaLzf6hyQ/nuS/ZThgdu8Mg9zdYKLoX2Q4Te8nM+wofqOq/rb27yd5bp3hgNTVy27njvNvuaz8a8c6XZrhTLB90obuiYdkOFvjd5J8qqo+V1XPmCg2s/2O1to1GS5HuF9VPXyFIiu2c+PBtK9n/9qRS8ftX2sN//M9Wakd0YawR9qb9W9vVrCp2oADParzP2fozn9QhsGXlno+zsswmuu/ZOhqf98Ky74qw/Vrj2utvXuV9f9lhuvE7pehi/6247Q9Gntcf3488jKX4TqX/zle5/fRDF8GK/3G6Cx6KNoK0x6W4Zz4EyePvszoiBPM0mb8TH9tvL/9HksNVvoMH51h4Inkuob1NiuUu02GL5Il5yd5blXdN8N1r+e11r5SVZ/I8Po8KCscANibNlwz/OoMA94tP2p6WYY2ZLX6fX2t21u++ckHY8/+ozOcdvTyiek/tp/b4dB13wxnaNy/TYy8On4fX2vceXt1hrOzbp7koRkuq3hDhp3TffH1DDt7z15l/icn6vMjGXbKPprhzIbfzXAN3D5prX0uyS9UVWXY+fuVJGdW1WIbxvGY9X7H/8ywg/niDG3vpMl27mNLE8f/wS2zf+3ISvs4U/3PYR1obw5Me3O9za8wbcPagAPa4zu+kS7McOTh/rn+TvK/z3BO+z9OnlKcJFX10gzd37/YWvvfe9jE+Rl6eZ483haz5wF1ltfv+20YNvv5GV6bu46zLk7yiMngWVW3TfIfpljtUm/GjaatR5Kl7Vw9sb07T7k9OGA26Wf6U2O5U8ZGfk/uM3l5Q1X9qySPzDBQQzJ8EX01ywZtqqqfytBoXzgx+T0ZBml6UYbw/dFx+nkZBrm7Z9ZwmvMyL8nw5fGbkxNba7syDODx/4w94kv1u0OGXvjJ+q3kqqytbbpBhuuer142/alrWAdMWun77uYZDrCsqLV2WWvtDRnC3L+bmLXa+3m16e9I8m+T7GitLaxwmxxQ7uUZDqY9Osl/TfLsGgaWm/S9Vbazqjb4UK773c+l53NxhstIjlkqOw5y88i1rH9yO0l+K8OlEct/2ujiDK/R8sHpHp+hg2Rv7chan/ea/+cwI9qb9WlvDpo2YCOOrp2XYdTAyVFeP5hhNNEHZhhB7VpV9RsZ/kF/luTTVXWfidk7W2vXjpDchtHQ/irDCKU/lGFglpWONEyu/1FJTs3w8x/bkxyZ5L9kGK10aef3xRlGNf37qvrDDDt/z8+wQ7y3awi/muEozxOq6iMZflZle2ttT0dQ35XhnPi/GAPCbTOc7rgjB/70dNibTfWZbq21qnpOhtHUz6uqV2UYnO6uSW7dWvvtieJfTfLOqprPdaM6H5khvKa1dk1VvSDDUd/XZbgG7vYZguinMzF6YGvtW1X1wSQ/k+F07KV6np/rTgVec4/vuO5Lq+rlGUZEXO75Ga51eUtVnZlhEK0XZrgW+qV7WfXHkzyzqh6fYbT577RxNOtV6vGtqro4wzXHX84Q8H8p0/Wuw0r+T4a24k+r6rczfP5+K8N766ZLharqrFz3vXxphoHfnpxh0MclH09y//F7/StJvtZaWxyn32I8tW8hyZWttX/OMBro45NcVFUvy3Cg68gMO6f3b609etz2z2c4UPfksefkFVX10CTnVNU9WmuXTmz/kVX1jgxnY3yptfal5U+4qu6RYcf2DRlGfD88w8Gj7+e6g2N/lOFUy7+vqhdmaJ9+dbzfYxu4mtbaW6vqfRlGkp2c/o2q+qMkz6uqXRlGq71rhn2f92bv1/lN9bwnTPU/h3WgvVmf9ubgaQPagR9l7a4ZXsSLl01/U1YYfS0r/9bk0u2cFdZ/94n5e/wdy7H8XTK8GbZnGG1tZ4ZG/yeXlXtIhp/wuCrDReK/nGHgl3+aYhuPyfCmuDoTI7JmlRFVx3knZhjp9soMpx49Ict+Nzh7+B3fFdZ3QSZGWHVzm9Vts32mJ5ZbOrX4u+Ptw5kYBTDX/Y7vKRlC31UZfkfuQSusa+nnBa7KcCDrer/jO1Hu98Z6Pn1i2tKIz4tT1nt+XMcRy6bfLMMpidf7zI/zlv+O75umbP9uM7Z33xnXe8E4/anj4+NWWGZLkrePy1yaYfTLRy7/Xy9vc3LdqM4PXra+pW1t2ej3stv63rLKKKvjZ/WfxvfvZzMcfJ7PxGiiGS55uGB8z12V4Tv7ZZn4ndkMO5BLv19/bXuSYadqW667NGBxYpmbj+vZnqHX4tJxHc8Z5//r8XP3umV1PirD6YFvyzgieYazsi7J8L3dssrvamY4pfC1Gc5OuXxc/4VJfnZZuXtlCJ5XZrj84vkZdmAvm+K1viArj9j+gFzXnm6ZmL7S7/j+afbyO757et5ZZX9k2v/5WO56r2NWbx/PyZRtrNuhcdPeXLvsgWhvDpo2YOnFY43GC7M/k+StrbWnbXR9AIB+jZczfDBDz9LPbHR9gH712t4YSGBKVXVGhq75LyW5XYaL02+e4WgIAMDMVNWLMhxg/3yGQaZOSXKPJI/YyHoB/TlU2hvBd3o3zHAa49EZTk/4QIZT9j6yx6UAANauZfgZotuNf38kw0+/vX1DawX06JBob5zqDAAAQNeMEAwAAEDXBF8AAAC6JvgCAADQNcEXAACArgm+AAAAdO3/Alxj9fqUf1YXAAAAAElFTkSuQmCC\n",
187 | "text/plain": [
188 | ""
189 | ]
190 | },
191 | "metadata": {},
192 | "output_type": "display_data"
193 | }
194 | ],
195 | "source": [
196 | "x = list(lines.index)\n",
197 | "means = list(lines['test loss'])\n",
198 | "stds = list(lines['test loss std'])\n",
199 | "\n",
200 | "plt.figure(num=1, figsize=(16, 8))\n",
201 | "plt.rc('xtick', labelsize=16) \n",
202 | "plt.rc('ytick', labelsize=16) \n",
203 | "plt.errorbar(x, means, stds, fmt='o', color='white', ecolor='black', elinewidth=15, capsize=10);\n"
204 | ]
205 | },
206 | {
207 | "cell_type": "markdown",
208 | "metadata": {},
209 | "source": [
210 | "## Train-Test accuracy difference"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": 16,
216 | "metadata": {},
217 | "outputs": [
218 | {
219 | "data": {
220 | "image/png": "\n",
221 | "text/plain": [
222 | ""
223 | ]
224 | },
225 | "metadata": {},
226 | "output_type": "display_data"
227 | }
228 | ],
229 | "source": [
230 | "x = list(lines.index)\n",
231 | "means = list(lines['train acc'] - lines['test acc'])\n",
232 | "stds = [0]*4\n",
233 | "\n",
234 | "plt.figure(num=1, figsize=(16, 8))\n",
235 | "plt.rc('xtick', labelsize=16) \n",
236 | "plt.rc('ytick', labelsize=16) \n",
237 | "plt.errorbar(x, means, stds, fmt='o', color='red', ecolor='black', elinewidth=15, capsize=10);\n"
238 | ]
239 | },
240 | {
241 | "cell_type": "markdown",
242 | "metadata": {},
243 | "source": [
244 | "## Train-Test loss difference"
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": 18,
250 | "metadata": {},
251 | "outputs": [
252 | {
253 | "data": {
254 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAA8MAAAHaCAYAAAAkM5tMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xm45FddJ/73Jwlbi8gWEKPplodF4MeiNgrK0gkoCLI4IjA2KCBGQVFRZlwiGJCIoyIuI0qrY1h6AB0GiaKCkDQjSMAOskVkM90QCSQhgEiHkJDz++N8b7pyuUvd27f7due8Xs9TT9069V1O1a06Ve/vOd9T1VoLAAAAjOS4za4AAAAAHGnCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDgnbHYFVnPrW9+6bdu2bbOrAQAAwAY7//zzL2utnbgZ+z7qw/C2bduyd+/eza4GAAAAG6yq9m/Wvg2TBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjD67V7d7JtW3Lccf169+7NrhEAAABzOmGzK3BM2r07Oe205MCBfnv//n47SXbu3Lx6AQAAMBc9w+tx+ukHg/CCAwd6OQAAAEc9YXgZO3bsSFUteblm//4l17lm//5l19mxY8eRfQAAAAAsyzDpZezZs2f5O7dt60OjFzlu69a0ffsOV5UAAADYIHqG1+PMM5MtW65btmVLLwcAAOCoJwyvx86dya5dydatSVW/3rXL5FkAAADHCMOk12vnTuEXAADgGKVnGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcNYVhqvq76qqVdXz51j2xlX1m1V1cVVdUVVvr6oHrGe/AAAAsBHWHIar6r8muecaVvnTJD+a5DlJvjfJxUneUFX3Wuu+AQAAYCOsKQxX1c2TvCjJz865/D2T/GCSZ7bW/ri19uYkj03ysSTPW2NdAQAAYEOstWf4N5Jc0Fp75ZzLPzLJVUlevVDQWrs6yauSPKSqbrTG/QMAAMAhO2HeBavqfkl+KGsbIn23JBe21g4sKr8gyQ2T3GH6GwAAAI6YuXqGq+oGSV6S5Ldaax9cw/ZvmeQzS5RfPnP/Uvs7rar2VtXeSy+9dA27AwAAgNXNO0z655PcJMmZa9x+JWnLlC+rtbartba9tbb9xBNPXOMuAQAAYGWrDpOuqpOTnJ7kqUlutOg83xtNk2p9vrX25SVWvzzJyUuU32LmfgAAADii5ukZvn2SGyd5RfqQ54VLkjxr+vvuy6x7QZJvrKoti8rvmuRLST6y1goDAADAoZonDL87ySlLXJIekE/J8qH27CQ3SPIDCwVVdUKSxyV5Y2vtyvVVGwAAANZv1WHSrbXPJtmzuLyqkmR/a23PdHtrko8meV5r7XnTuu+uqlcn+Z1pEq4LkzwtyTcm2bkxDwEAAADWZu6fVppDJTk+X9nb/OT0ibeen+TmSd6T5KGttXdt4L4BAABgbusOw621WnR7X5aYJbq1dkWSn50uAAAAsOnm/WklAAAAuN4QhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBw5grDVfWQqjqnqj5ZVVdW1UVV9edVdddV1ttWVW2Zy8035iEAAADA2pww53K3THJ+khcnuTTJyUl+Icl5VXX31tr+VdZ/QZKzF5V9fi0VBQAAgI0yVxhurb0yyStny6rqnUn+NcljkrxwlU38W2vtvHXVEAAAADbYoZwz/Onp+qqNqAgAAAAcKWsKw1V1fFXdsKrumOQlST6Z5FVzrPqCqrq6qj5XVWdX1d3XU1kAAADYCPOeM7zgHUm+dfr7I0lOba1dssLyV6aH5jemn2v8TUl+Kck/VtW3tdY+sNRKVXVaktOS5OSTT15jFQEAAGBl1Vqbf+GquyS5WZLbJ3lWktsmuV9rbd8atvENSS5IcnZr7QmrLb99+/a2d+/euesIAADAsaGqzm+tbd+Mfa9pmHRr7QOttXdME2o9KMlN02eVXss2Pp7krUnuvZb1AAAAYKOsewKt1tpn04dK32Edq1eS+bukAQAAYAOtOwxX1W3TzwH+6BrXOznJd6affwwAAABH3FwTaFXVa5O8K8l7k/xHkjsleWaSqzP9xnBVPTDJm5M8pbX2sqnshemB++3pE2jdOckvJrkmya9t5AMBAACAec07m/R5SR6b5OeS3DDJx5PsSfKCmcmzKsnxuW5v8wVJnpbkSUm+OsllSc5J8tzW2gcPreoAAACwPmuaTXozmE0aAADg+umYmU0aAAAArg+EYQAAAIYjDAMAADAcYRgAAIDhCMMAAAAMRxgGAABgOMIwAAAAwxGGAQAAGI4wDAAAwHCEYQAAAIYjDAMAADAcYRgAAIDhCMMAAAAMRxgGAABgOMIwAAAAwxGGAQAAGI4wDAAAwHCEYQAAAIYjDAMcSbt3J9u2Jccd1693797sGgEADOmEza4AwDB2705OOy05cKDf3r+/306SnTs3r14AAAPSMwxwpJx++sEgvODAgV4OAMARJQwDbKAdO3akqpa8XLN//5LrXLN//7Lr7Nix48g+AACAQRgmDbCB9uzZs/yd27b1odGLHLd1a9q+fYerSgAALEHPMMCRcuaZyZYt1y3bsqWXAwBwRAnDAEfKzp3Jrl3J1q1JVb/etcvkWQAAm8AwaYAjaedO4RcA4CigZxgAAIDhCMMAAAAMRxgGAABgOMIwAAAAwxGGAQAAGI4wDAAAwHCEYQAAAIYjDAMAADAcYRgAAIDhCMMAAAAMRxgGAABgOMIwAAAAwxGGAQAAGI4wDAAAwHCEYQAAAIYjDAMAADAcYRgAAIDhCMMAAAAMRxgGAABgOMIwAAAAwxGGAQAAGI4wDAAAwHCEYQAAAIYjDAMAADAcYRgAAIDhCMMAAAAMRxgGAABgOMIwAAAAwxGGAQAAGI4wDAAAwHCEYQAAAIYjDAMAADAcYRgAAIDhCMMAAAAMRxgGAABgOMIwAAAAwxGGAQAAGI4wDAAAwHCEYQAAAIYjDAMAADAcYRgAAIDhzBWGq+ohVXVOVX2yqq6sqouq6s+r6q5zrHuLqvqTqrqsqr5QVW+qqrsfetUBAABgfebtGb5lkvOT/GSS707yi0nuluS8qtq63EpVVUnOTvLQJM9I8v1JbpDk3Kr6+kOoNwAAAKzbCfMs1Fp7ZZJXzpZV1TuT/GuSxyR54TKrPjLJ/ZKc2lo7d1rv7UkuTPLfk/zU+qoNAAAA63co5wx/erq+aoVlHpnkEwtBOElaa59L8ldJHnUI+wYAAIB1W1MYrqrjq+qGVXXHJC9J8skkr1phlbslef8S5RckObmqbrqW/QMAAMBGWGvP8DuSXJnkQ0nukT78+ZIVlr9lks8sUX75dH2LpVaqqtOqam9V7b300kvXWEUAAABY2VrD8BOT3CfJDyb5jyR/X1XbVli+krRlypfVWtvVWtveWtt+4oknrrGKAAAAsLI1heHW2gdaa++YJtR6UJKbJvmFFVa5PL13eLGFHuGleo0BAADgsFr3BFqttc8m+UiSO6yw2AXp5w0vdtckH2ut/ed69w8AAADrte4wXFW3TfJNST66wmJnJzmpqh44s97Nkjxiug8AAACOuLl+Z7iqXpvkXUnem36u8J2SPDPJ1Zl+Y3gKvG9O8pTW2sumVc9O8vYkr6iq/5Y+LPoX088Z/o2NexgAAAAwv7nCcJLzkjw2yc8luWGSjyfZk+QFrbV90zKV5PjM9Da31q6pqu9N8ltJXpzkxunh+JTW2sc3oP4AAACwZtXaUpM9Hz22b9/e9u7du9nVAAAAYINV1fmtte2bse91nzMMAAAAxyphGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAMD1ze7dybZtyXHH9evduze7RkedEza7AgAAAGyg3buT005LDhzot/fv77eTZOfOzavXUUbPMAAAwPXJ6acfDMILDhzo5VxLGAYAADjG7NixI1W15OWa/fuXXOea/fuXXWfHjh1H9gEcBQyTBgAAOMbs2bNn+Tu3betDoxc5buvWtH37DleVjjl6hgEAAK5Pzjwz2bLlumVbtvRyriUMAwAAXJ/s3Jns2pVs3ZpU9etdu0yetYhh0gAAANc3O3cKv6vQMwwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMNZNQxX1WOq6jVVtb+qrqiqD1bVC6rqq+dYty1zudfGVB8AAADW7oQ5lnlWko8l+aUkFyX55iRnJDmlqr6jtXbNKuufleQli8o+tLZqAgAAwMaZJww/orV26cztt1TV5UlemmRHknNWWf/fW2vnrbN+AAAAsOFWHSa9KAgv+Kfp+qSNrQ4AAAAcfuudQOuB0/UH5lj2aVV1ZVUdqKpzqur+69wnAAAAbIg1h+GqOinJ85K8qbW2d5XFX5Hk6UkenOS0JLdKck5V7VhlH6dV1d6q2nvppUt1TAMAAMD6VWtt/oWrbppkT5KvS/JtrbWL1rSzPgP1+5N8vLV2v3nW2b59e9u7d7XMDQAAwLGmqs5vrW3fjH3P3TNcVTdOcnaS2yd5yFqDcJK01j6f5PVJ7r3WdQEAAGCjzDObdKrqBklek+Tbkjy4tfa+Q9hnJZm/OxoAAAA22KphuKqOS7I7yYOSPPxQfiapqm6W5OFJ3rHebQAAAMChmqdn+A+S/ECSM5N8oaruM3PfRa21i6pqa5KPJnlea+15SVJVz0py5yTnJvlEkq1JnpXka5Ps3LiHAAAAAGszTxj+nun69Oky67lJzkgf+nx8rnsO8geTfN90+Zok/5HkbUl+pLX2zvVXGQAAAA7NqmG4tbZtjmX2pQfi2bK/SvJX660YAAAAHC5r/p1hAAAAONYJwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4q4bhqnpMVb2mqvZX1RVV9cGqekFVffUc6964qn6zqi6e1n17VT1gY6oOAAAA6zNPz/Czknw5yS8leWiSP0zytCR/X1Wrrf+nSX40yXOSfG+Si5O8oarute4aAwAAwCE6YY5lHtFau3Tm9luq6vIkL02yI8k5S61UVfdM8oNJntJa+7Op7C1JLkjyvCSPPIR6AwAAwLqt2jO8KAgv+Kfp+qQVVn1kkquSvHpmW1cneVWSh1TVjdZQTwAAANgw651A64HT9QdWWOZuSS5srR1YVH5BkhsmucM69w0AAACHZM1huKpOSh/m/KbW2t4VFr1lks8sUX75zP3L7eO0qtpbVXsvvXSpjmkAAABYvzWF4aq6aZLXJbk6yZNXWzxJW6Z8Ra21Xa217a217SeeeOJaqggAAACrmjsMV9WNk5yd5PZJHtJau2iVVS7P0r2/t5i5HwAAAI64ucJwVd0gyWuSfFuSh7XW3jfHahck+caq2rKo/K5JvpTkI2upKAAAAGyUVcPw9FvCu5M8KMmjWmvnzbnts5PcIMkPzGzrhCSPS/LG1tqVa68uAAAAHLp5fmf4D9ID7ZlJvlBV95m576LW2kVVtTXJR5M8r7X2vCRprb27ql6d5HemnuULkzwtyTcm2bmRDwIAAADWYp5h0t8zXZ+e5O2LLk+d7qskxy+xvScn+bMkz0/y+iTfkOShrbV3HVq1AQAAYP1W7RlurW2bY5l9WWKW6NbaFUl+droAAADAUWHNvzMMAAAAxzphGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAAMBxhGAAAgOEIwwAAAAxHGAYAAGA4wjAAAADDEYYBAAAYjjAMAADAcIRhAAAAhiMMAwAczXbvTrZtS447rl/v3r3ZNQK4XjhhsysAAMAydu9OTjstOXCg396/v99Okp07N69eANcDeoYBAI5Wp59+MAgvOHCglwNwSIRhAIBNtGPHjlTVkpdr9u9fcp1r9u9fdp0dO3Yc2QcAcIwyTBoAYBPt2bNn+Tu3betDoxc5buvWtH37DleVAIagZxgA4Gh15pnJli3XLduypZcDcEiEYQCAo9XOncmuXcnWrUlVv961y+RZABvAMGkAgKPZzp3CL8BhoGcYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDjCMAAAAMMRhgEAABiOMAwAAMBwhGEAAACGU621za7Diqrq0iT7N7seK7h1kss2uxLAMUfbAayVdgNYj6O97djaWjtxM3Z81Ifho11V7W2tbd/segDHFm0HsFbaDWA9tB3LM0waAACA4QjDAAAADEcYPnS7NrsCwDFJ2wGslXYDWA9txzKcMwwAAMBw9AwDAAAwHGEYAACA4RyTYbiqHlRVr6iqj1bVFdP1H1bVbTa7bvOoqptX1RlV9S2HYdvbqqpV1ZM2ettwuBwt7+mq2ldVrziS+5zH1F606Xm5waL77rCR7/mqelJVPWUjtrXEtvdU1Z7DsW2OXVX1lKr6cFV9qao+u8Hb/pmq+i9LlD+6qn52I/e1zP7PqKpTD/d+5jG9/1pVvWyJ+5463bdtg/Z12B73VM8zDse2uf7T3hwZx1IbcEyG4SQ/nuRWSZ6f5KFJXpDkkUnOq6qbbmbF5nTzJL+SZMPDcJKLk9w3yesPw7bhcDnW39NHyu2T/Mhh3seTkhyWMJzk6dMFkiRV9XXpE7v8Y5JTkzx4g3fxM0m+4stpkkcnOexfTtM/64+KL6czdlbVXQ/zPg7n475vkj85TNvmekx7c0QdM23ACRu1oSPs6a21S2duv6WqPpTkLUkem+R/bU61Do+qulFr7cp5lp2WO+8wVwk22lDv6UPwxiS/XFVntda+uNmVWUvblCSttX85nPXhmHTHJMcneWlr7a2bXZkBvDvJ1yX51STfv8l1SbKudsR3HNZLe3MU2uw2YFN7hqtq+9TVfb+ZsmdMZc+fKbvjVPawJFn0pXnBP03XJ03r3K6qrq6qZyyx35+vqquq6sQV6nbvqvr7qvp0VR2oqn+rqhcvWubBVfXPVfXFqvrINMzorKrat8J2tyW5cLr5x9PjunaI4zSM6a1V9Yhp21dm6kmpqp+sqrdX1eVV9dmqOq+qHr54+4uHTE51uqiqvrmq/mF6PB+uqh9frp6wHkfze3pa7oHT+/pzVfWFqnpPVX1FT2tV/ej0nv5iVb2rqk5ZYpknTOt/saouq6qXV9XtZu7/n1X1kUXrnD897jvMlJ1ZVZdUVa1U98np6V9kf2K1BavqoVN7ccX0eP+yqu68yjp7kjwwyXfOtE17pvueNN1+QFX9RfXhZe+Y7rt3Vf2fqZ25oqo+WFW/VlU3Wbz9mhkmXVU7pm0+cnq+LquqS6sPmb/5HM8Hx7CqOivJnunmm6fXwlnTfY+vqnOm18N/Tp+HP7zENn66qj4wve4+U1V7q+r7pvv2Jdma3hO68Ho+a9rHDyc5aaZ838w2b139NI1/r6orq+pfq+q0mfuPm17L+6rqa2bK7z7V4zen2ws/13H6zH7OWOH5uFNVvXZqD75YVR+b3msnzCzzLdPn+Ber6uNV9UtV9dyZfa3mC0l+Lcl/qartKy1Y3TOn9/OXquri6X16s1XWW/Zx18HvI/etqn+sqiuS/MZ037z/8+s8j3XwNJI7VtXrp3X3V9VzqupYHQHJBtPefMVjOWztzbHWBmx2z/C7knw2vRt94QjNqUmuyHW71k9N8uUk/7DCth44XX8gSVprF1fVm5I8McnvL1r2CUn+bpkv4Kk+LPMNSd6ZPmTw80m2JfmOmWXumj4U+Z1JHp/khkmeneRrklyzQj0vTh9C8X/Th4KePZV/dGaZOyX5vfQjt/+W5PKpfFv6sIB96f+7RyT566p6WGvtb1fYZ5LcLMn/TvI7SZ6X5MlJ/rCqPthaO3eVdWFeR+V7Okmq6lFJXpPkbUl+LMllSe6W/uG1eL/fmh48r0zy80n+tqru2Vr74LSt05K8JMmrk/xiekD9tSTfXlXf0lr7zyTnJPmJqjq5tfaxqrpFknvNPBcLQfnUJOe2+X7n7t1J/iLJL1TVrtba55d5rA9Nb5/OSfK4JDdNf9+/taru1Vr792W2//Qkr0g/cv5jU9l/LFpmd5JXJnlMDn6GnDzV7az09vJuSZ6TPqz78XM8rt9N8tdJfjDJndM/GL+c/gWC669fTXJ++ufdT6S3Hwvv4dsn+T9Jfj39M/UBSf6kqm7SWvujJKmqnUlemP7a/ockN0lyjyS3nLbxfUn+Jsl7kpwxlS1s/8Qk904/HSPp7/VMQe9t07bOSD94/ZD0z8sbtdZ+v7V2TVU9YdruS5I8vvqBn1cluSC97Uj6UL63p78vXjKVXbTC8/HX6e3n09Lbp5OSPCxTx0VV3TrJm5N8IskPJflSkmemfzdYiz9KH7K5cFrKcs5Mb9/+IMlfJblr+v/snlX1wNbact91VnvcX5P+XP1Wkl9KbxOTOf7nq3htkj9L8qL070fPTfLxqQy0N9d1ONubY6sNaK1t6iXJ69K/CCb9H3B5+ovtqiQ3ncpfleS8Fbbx1Un+Ncm/JDlhpnxnkpbkzjNl95rKHrvC9rZPy9xjhWX+d/qLfMtM2e2SfDHJvlUe87Zp+09d4r490wvgXqts47j0L6JvTPK6Jbb9pJmys6ayU2bKbpT+4t+12a8Bl+vX5Sh9T1f6QaS9SY5bYbl96Q3+yYvqcnmSl0+3j0/yqYXHOLPc/aZ6/NR0+5bTe/mHp9uPTvKZJH+a5JVT2U2n5+XHV3lOz5i2fUL6wbKrkzxnuu8OS7zn9yb58KLn7hunff32Kvvak+StS5Q/adrPi1ZZv6Z6PmF6/LdatO09M7d3TNt86aJt/M/0trQ2+/Xscngv6efstSQ7Vlhm4fPuj5O8Z9Hr5F2rbH9fklcsUX5WkouWKH/29Nq746LyP07/zJx9T33fVPcnp5+H+J9J7rRovZbk+XM8D7eeln3kCsv82tQ+ff1M2U2m9qjNsY9r39vpcw+0JA+Ybj91ur1tun3L6Xk4a9E2nrBaPVd63Dn4feRRq6y/5P98ZttnzNw+Y+H/sGi59yV542a/xl2Onov25trljkR7c8y0AUfD8JFzk9y3qm6c/qX25um9Alcmuf+0zI70Ho6vMHXnvzL9iMbjW2tXz9z92vQXyxNnyp6Y5HM52CO7lA+nHy15SfWhkN+wxDL3SfI3rbUDCwWttYvTT8o/VPtaa+9eXFhV31pVf11Vn0r/MnxVku9K70kmrM6jAAAJ3klEQVRZzYE20wPc+tj8D6f36MBGOhrf03dO7wH+k7Z8b8aC81prH1u40Xrv6+vTj3QubOs26T2kmVnurUn2Z+rRbq1dnuS9Odgjfmr6OdBvSrIw7PoB6Y39ks/FUlprH0ry0iQ/V1W3XHx/VX1V+uR8r5597lprF6YfgX7gvPtaxmuX2OfNqup/VNVH0//PVyV5eXowvuMc21w84d/70g/Y3fYQ68oxahru9sqq+vf019NV6YFt9vPun5Lcq6p+v/ppS1s2YNcPTR/+f2FVnbBwSR8tdqv03tEkSWvttem9Hn+Y5EeTPGN6f67Hp9NHgv169dM0lnrf3CfJ21tr1/awtNauyPomzDwryYfSe3+Xcp/09+Di2fVflf7941DakavTe6WuY87/+UoWPw/vj+84zEF7c9jbm8WOqjbgaAjD56Q3uN+R/gXxPa21T6UPsTylqu6W/oXo3MUrTuPAX5p+pOfRrbX3zt4/BdXXpI/fr6o6Psl/TfIXbYXJZ1prn5vq8okkL07ysap6f1XNTjZxuySXLLH6p+Z72Cu6eHHBFMjfnH609hnpz9e9k/xdkhvPsc3PLFF25Zzrwlocde/p9A+VZOUhQwuWeg9/KtO5yzk4JOor3qdJPjlzf9Kfi4Xge0r6Yz43yW2nUy1OSfKJdXygPTf9Of75Je67RXoInad+67HUdv8sfUbw30s/QHfvHDyveZ425vJFtxcm0tA+DWg6Venvk9wzyS+kH0S7d/pEejeaWfRl6UP8vj39y+PlVfV/69B+Hug26Qeprlp0+Yvp/lstWv6lU50uSR8xti6td2N8V/qojhck+VD1uUqeNrPYhn3vaK19Of1UhvtV1fcssciS7dx0gO3TObR25JJp/9daw/98JUu1I9oQVqS9OfztzRKOqjZgs88ZTnoPwGXpvSbfnIM9JOekzyL78fRu+rctse4fpZ8P95jW2puX2f7L0887u1969/7tprIVTT2z3z8dodmeft7Mn0/nDb4//QNiqd9A3YiejLZE2UPTx9g/dvYozQYdmYKNdDS+py+brk9acaluqffwbZMsnGe70Nh+7RLLfW36h8uCc5M8s6rum34e7TmttU9W1QfSn59Ts8RBgdW0fg7yS5L8ZL7y6Opn0tuQ5er36bXub/HuZ29MIwAelT5k6Xdnyu9+iPthXPdNH8lx/zYz42vNTOySXPuF7iXpo7hukeS700/JeHX6F9b1+HT6F8CfXub+D87UZ0v6F7X3p4+A+PX0c+rWpbX2b0l+qKoq/QvhTyZ5cVXta31ekI3+3vHn6V86n5/e9s6abecuWCic/ge3yqG1I0t9x5nrfw6HgfbmyLQ319n9EmWb1gZses/w9OJ6S/oRivvnul+cvzl9jPw7ZocjJ0lVvTC96/zJrbW/XGEX56b3Bj1xuuzLypP2LK7f1a1P4f3s9OfrLtNd5yV52GwYrT6T7HfOsdmFXo+brLjUdS3s56qZ/d1pzv3BEXOUvqc/NC331KnhX8l9Zk+NqKqvTvLw9Mkgkv7h9Kksmhiqqr4jvSF/y0zx/0ufCOpX0wP5+6fyc9In0rtX1jBEepEz0z9QTp8tbK19IX2SkB+Yes4X6rc1vbd+tn5LuTJra5tulH4e9VWLyp+0hm3ArKU+726RftBlSa21z7TWXp0e8P6/mbuWez0vV/53Sb4pycdaa3uXuMxOWve76QfYHpXkvyf56eqT18360jL7WVbr3p2Dv0u68HjOSz8F5esXlp0m0nl41mFqq385/bSKxT+zdF76c7R4ArzHpXekrNaOrPVxr/l/DhtEe3N42ptjpg04Wo64nZM+W+Hs7LLvSp/F9JT0mduuVVU/n/5P+19JPlxV95m5+9LW2rUzM7c+C9vu9JlRb5A++ctSRyRmt/+9SU5L8pfpM7t9VZKfSp8ldeEL8fPTZ1N9Q1X9VvoXwmenf0le7ZzET6UfDXp8Vb03/acOLmytrXSk9U3pY+xfNoWG26UPlfxYjoKDGrDIUfWebq21qvqZ9Fncz6mqP0qfAO8uSW7TWvuVmcU/leSN1aftX5hN+qvSA21aa1+uquekHx1+Rfo5dSelh9MPZ2bWwtba56rqXUkelD6Ue6Ge5+bgMOI19wxP276kqn43fSbGxZ6dfu7MX1f/SbibprcXn0s/kr2Sf0ny9Kp6XPos959v0yzay9Tjc1V1Xvo5zBenh/6nZL5eeFjKP6a3FX9QVb+S/v775fTX1uxPi+zKwc/lS9Inl3ti+sSSC/4lyf2nz/VPJrmstbZvKr/lNCxwb5Ivttbelz4L6eOS/ENVvSj94NdXpX9hvX9r7VHTvr8//eDdE6celt+rqu9OclZV3aO1dsnM/h9eVX+XPmrjE621Tyx+wFV1j/Qvu69On2n++PQDSlfn4AGz304fpvmGqnpuevv0s9P1im3gclprr6+qt6XPYDtbfnlV/XaSX6yqL6TPknuX9O8+b83q5w3O9bhnzPU/h8NAe3N42ptjpw1oR8fsbndJf2LPW1T+uiwx61v6jIhtmctZS2z/bjP333mO+tw5/QVyYfosb5emfxB8+6Llviv950SuTD8R/cfSJ5f55zn28ej0F8pVmZkJNsvM5Drd99j0GXa/mD5s6fHpk2Dsm1lm2+z22sGZ25aaxW5PZmZ2dXHZqMvR9p6eWW9hWPJ/Tpf3ZGb2wUwzQaZ/6CxMBvXPSU5dYlsLP3VwZfrBrZcnud0Sy/2PqZ4/PlO2MNP0vjnrfca0jRMWld88fTjjdd7z030PTf/QviI9BL9uzvbva6f27vPTdvdM5U+abt9hiXW2JfnbaZ1L0mfdfPji//XiNicHZ5N+8KLtLexr22a/ll0O7yXLzO46vVf/eXr9fjT9gPQZmZnFNP10iT3Ta+7K9M/sFyW52cwy35R+QO7AbHuS/kXrlTl4WsG+mXVuMW3nwvTejUumbfzMdP83TO+7Vyyq84npQwv/JtNM6Omjt85P/9xumZkBddG6t0k/H/BDU10vT+99fcii5b4lPYx+Mf3UjWenf6n9zBzP9Z4sPVP8A3OwPd02U17pwzA/OD0PF6cf5LzZHPta8nFnme8j8/7Pp+Wu8zxm+fbxrMzZxrqMcdHeXLvukWhvjpk2YOHJYwNMJ39/JMnrW2s/stn1AQCuv6ZTId6V3gP1oM2uD3D9dX1tb46WYdLHpKr6/fRu/U8k+br0E+BvkX7UBABgw1TVr6YfdN+fPpHVU5PcI8nDNrNewPXPKO2NMHxobpw+BPK26UMb3pk+3O+9K64FALB2Lf0nkb5u+vu96T9D97ebWivg+miI9sYwaQAAAIZjFmIAAACGIwwDAAAwHGEYAACA4QjDAAAADEcYBgAAYDj/P9clKLuCH0T6AAAAAElFTkSuQmCC\n",
255 | "text/plain": [
256 | ""
257 | ]
258 | },
259 | "metadata": {},
260 | "output_type": "display_data"
261 | }
262 | ],
263 | "source": [
264 | "x = list(lines.index)\n",
265 | "means = list(lines['test loss'] - lines['train loss'])\n",
266 | "stds = [0]*4\n",
267 | "\n",
268 | "plt.figure(num=1, figsize=(16, 8))\n",
269 | "plt.rc('xtick', labelsize=16) \n",
270 | "plt.rc('ytick', labelsize=16) \n",
271 | "plt.errorbar(x, means, stds, fmt='o', color='red', ecolor='black', elinewidth=15, capsize=10);\n"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {},
278 | "outputs": [],
279 | "source": []
280 | }
281 | ],
282 | "metadata": {
283 | "kernelspec": {
284 | "display_name": "Python 3",
285 | "language": "python",
286 | "name": "python3"
287 | },
288 | "language_info": {
289 | "codemirror_mode": {
290 | "name": "ipython",
291 | "version": 3
292 | },
293 | "file_extension": ".py",
294 | "mimetype": "text/x-python",
295 | "name": "python",
296 | "nbconvert_exporter": "python",
297 | "pygments_lexer": "ipython3",
298 | "version": "3.6.5"
299 | }
300 | },
301 | "nbformat": 4,
302 | "nbformat_minor": 2
303 | }
304 |
--------------------------------------------------------------------------------
/WorkOnVersion.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## WorkOnVersion"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "name": "stderr",
17 | "output_type": "stream",
18 | "text": [
19 | "Using TensorFlow backend.\n"
20 | ]
21 | }
22 | ],
23 | "source": [
24 | "from keras.layers import Embedding, Dense, Dropout, Input#, LSTM, Bidirectional\n",
25 | "from keras.layers import MaxPooling1D, Conv1D, Flatten\n",
26 | "from keras.preprocessing import sequence#, text\n",
27 | "from keras.models import Model\n",
28 | "from keras.utils import np_utils\n",
29 | "from keras.callbacks import Callback\n",
30 | "\n",
31 | "from gensim.models.keyedvectors import KeyedVectors\n",
32 | "\n",
33 | "from time import time\n",
34 | "import pandas as pd\n",
35 | "import numpy as np\n",
36 | "import matplotlib.pyplot as plt\n",
37 | "\n",
38 | "from sklearn import preprocessing\n",
39 | "from sklearn.model_selection import train_test_split\n",
40 | "from sklearn.metrics import (\n",
41 | " confusion_matrix as confmat,\n",
42 | " classification_report as creport\n",
43 | ")\n",
44 | "\n",
45 | "from utils import *\n",
46 | "\n",
47 | "np.random.seed(42)"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "metadata": {},
53 | "source": [
54 | "## Language Model and Data"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 2,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "algo = 'cbow' # or 'cbow'"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 3,
69 | "metadata": {
70 | "scrolled": true
71 | },
72 | "outputs": [
73 | {
74 | "name": "stdout",
75 | "output_type": "stream",
76 | "text": [
77 | "Dataset splited.\n",
78 | "Label categories: ['0' '1' '2' '3' '4']\n",
79 | "Converting data to trainable form...\n",
80 | "Number of training examples: 89382\n",
81 | "Number of testing examples: 22346\n"
82 | ]
83 | }
84 | ],
85 | "source": [
86 | "# language model\n",
87 | "WORD_MODEL, _, MAX_FEATURES, EMBED_SIZE = get_init_parameters('./arabic_bins/web_{}_300'.format(algo))\n",
88 | "\n",
89 | "# load data\n",
90 | "data_paths = ['arabic_dataset_classifiction.csv']\n",
91 | "x_train, x_test, y_train, y_test, MAX_TEXT_LENGTH = split_datasets(data_paths, test_size=0.2, seed=42)\n",
92 | "CLASSES_LIST = np.unique(y_train)\n",
93 | "print('Label categories: ' + str(CLASSES_LIST))\n",
94 | "\n",
95 | "# \n",
96 | "x_train, x_test, y_train, y_test, train_y_cat, word_index = class_str_2_ind(x_train, x_test, \n",
97 | " y_train, y_test,\n",
98 | " CLASSES_LIST, MAX_FEATURES,\n",
99 | " MAX_TEXT_LENGTH)\n",
100 | "test_cat_y = np_utils.to_categorical(y_test, len(CLASSES_LIST))"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 4,
106 | "metadata": {},
107 | "outputs": [
108 | {
109 | "data": {
110 | "image/png": "\n",
111 | "text/plain": [
112 | ""
113 | ]
114 | },
115 | "metadata": {},
116 | "output_type": "display_data"
117 | }
118 | ],
119 | "source": [
120 | "LABELS = (\n",
121 | " 'Culture', 'Events', 'Business', 'Politics', 'Sports'\n",
122 | ")\n",
123 | "\n",
124 | "tmp = np.concatenate((y_train, y_test))\n",
125 | "unique, counts = np.unique(tmp, return_counts=True)\n",
126 | "del tmp\n",
127 | "plt.figure(\"Target Pie\", figsize=(10, 10))\n",
128 | "plt.title(\"Pie plot of the class frequencies\")\n",
129 | "plt.pie(counts, labels=LABELS)\n",
130 | "plt.legend(unique)\n",
131 | "plt.savefig(\"./img/stats_classes.png\")\n",
132 | "plt.show();"
133 | ]
134 | },
135 | {
136 | "cell_type": "markdown",
137 | "metadata": {},
138 | "source": [
139 | "**Try less timesteps**"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 5,
145 | "metadata": {},
146 | "outputs": [
147 | {
148 | "name": "stdout",
149 | "output_type": "stream",
150 | "text": [
151 | "Original sequence length : 5049\n"
152 | ]
153 | }
154 | ],
155 | "source": [
156 | "# Sequence length\n",
157 | "print(\"Original sequence length : \"+str(MAX_TEXT_LENGTH))\n",
158 | "\n",
159 | "# CHANGE MAX LENGTH (timesteps)\n",
160 | "MAX_TEXT_LENGTH = 2000"
161 | ]
162 | },
163 | {
164 | "cell_type": "code",
165 | "execution_count": 6,
166 | "metadata": {},
167 | "outputs": [],
168 | "source": [
169 | "def get_model(embedding_weights, word_index, vocab_dim, max_length, print_summary=True):\n",
170 | " inp = Input(shape=(max_length,))\n",
171 | " model = Embedding(input_dim=len(word_index)+1,\n",
172 | " output_dim=vocab_dim,\n",
173 | " trainable=False,\n",
174 | " weights=[embedding_weights])(inp)\n",
175 | "\n",
176 | " model = Conv1D(filters=32, kernel_size=5, padding='same', activation='relu')(model)\n",
177 | " model = MaxPooling1D(pool_size=2)(model)\n",
178 | " model = Flatten()(model)\n",
179 | " model = Dropout(0.1)(model)\n",
180 | " model = Dense(200, activation='relu')(model)\n",
181 | " model = Dropout(0.1)(model)\n",
182 | " model = Dense(5, activation='softmax')(model)\n",
183 | " model = Model(inputs=inp, outputs=model)\n",
184 | " \n",
185 | " model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])\n",
186 | " if print_summary:\n",
187 | " model.summary()\n",
188 | " return model\n",
189 | "\n",
190 | "\n",
191 | "def get_main_model(word_index, WORD_MODEL, EMBED_SIZE, MAX_TEXT_LENGTH):\n",
192 | " tmp = get_embedding_matrix(word_index, WORD_MODEL, EMBED_SIZE)\n",
193 | " model = get_model(tmp, word_index, EMBED_SIZE, MAX_TEXT_LENGTH, print_summary=True)\n",
194 | " return model\n",
195 | "\n",
196 | "\n",
197 | "class TestCallback(Callback):\n",
198 | " def __init__(self, test_data):\n",
199 | " self.test_data = test_data\n",
200 | "\n",
201 | " def on_epoch_end(self, epoch, logs={}):\n",
202 | " x, y = self.test_data\n",
203 | " loss, acc = self.model.evaluate(x, y, verbose=0)\n",
204 | " print('\\nTesting loss: {}, acc: {}\\n'.format(loss, acc))\n",
205 | "\n",
206 | "def train_fit_predict(model, x_train, x_test, y_train, y_test, batch_size, epochs, TestCallback=TestCallback):\n",
207 | " history = model.fit(x_train, y_train,\n",
208 | " batch_size=batch_size,\n",
209 | " epochs=epochs, verbose=1,\n",
210 | " validation_data=(x_test, y_test))\n",
211 | " #callbacks=[TestCallback((x_test, y_test))])\n",
212 | " return history, model"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 7,
218 | "metadata": {},
219 | "outputs": [
220 | {
221 | "name": "stdout",
222 | "output_type": "stream",
223 | "text": [
224 | "Building embedding matrix...\n",
225 | "Embedding matrix built.\n",
226 | "_________________________________________________________________\n",
227 | "Layer (type) Output Shape Param # \n",
228 | "=================================================================\n",
229 | "input_1 (InputLayer) (None, 2000) 0 \n",
230 | "_________________________________________________________________\n",
231 | "embedding_1 (Embedding) (None, 2000, 300) 115239600 \n",
232 | "_________________________________________________________________\n",
233 | "conv1d_1 (Conv1D) (None, 2000, 32) 48032 \n",
234 | "_________________________________________________________________\n",
235 | "max_pooling1d_1 (MaxPooling1 (None, 1000, 32) 0 \n",
236 | "_________________________________________________________________\n",
237 | "flatten_1 (Flatten) (None, 32000) 0 \n",
238 | "_________________________________________________________________\n",
239 | "dropout_1 (Dropout) (None, 32000) 0 \n",
240 | "_________________________________________________________________\n",
241 | "dense_1 (Dense) (None, 200) 6400200 \n",
242 | "_________________________________________________________________\n",
243 | "dropout_2 (Dropout) (None, 200) 0 \n",
244 | "_________________________________________________________________\n",
245 | "dense_2 (Dense) (None, 5) 1005 \n",
246 | "=================================================================\n",
247 | "Total params: 121,688,837\n",
248 | "Trainable params: 6,449,237\n",
249 | "Non-trainable params: 115,239,600\n",
250 | "_________________________________________________________________\n"
251 | ]
252 | }
253 | ],
254 | "source": [
255 | "model = get_main_model(word_index, WORD_MODEL, EMBED_SIZE, MAX_TEXT_LENGTH)"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": 8,
261 | "metadata": {},
262 | "outputs": [],
263 | "source": [
264 | "del WORD_MODEL\n",
265 | "del word_index"
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "metadata": {},
271 | "source": [
272 | "## Start Learning"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": null,
278 | "metadata": {
279 | "scrolled": true
280 | },
281 | "outputs": [],
282 | "source": [
283 | "time_start = time()\n",
284 | "history, model = train_fit_predict(model,\n",
285 | " x_train[:, :MAX_TEXT_LENGTH],\n",
286 | " x_test[:, :MAX_TEXT_LENGTH],\n",
287 | " train_y_cat, test_cat_y,\n",
288 | " batch_size=10, epochs=20)\n",
289 | "time_start = time() - time_start\n",
290 | "\n",
291 | "print(\"Took : \"+str(np.round(time_start, 2))+\" (s)\") "
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": 10,
297 | "metadata": {
298 | "scrolled": true
299 | },
300 | "outputs": [
301 | {
302 | "data": {
303 | "text/plain": [
304 | "dict_keys(['acc', 'val_loss', 'val_acc', 'loss'])"
305 | ]
306 | },
307 | "execution_count": 10,
308 | "metadata": {},
309 | "output_type": "execute_result"
310 | }
311 | ],
312 | "source": [
313 | "history.history.keys()"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": 11,
319 | "metadata": {},
320 | "outputs": [
321 | {
322 | "name": "stdout",
323 | "output_type": "stream",
324 | "text": [
325 | "22346/22346 [==============================] - 3s 121us/step\n"
326 | ]
327 | },
328 | {
329 | "data": {
330 | "text/plain": [
331 | "[0.42280422965826653, 0.9407947731137564]"
332 | ]
333 | },
334 | "execution_count": 11,
335 | "metadata": {},
336 | "output_type": "execute_result"
337 | }
338 | ],
339 | "source": [
340 | "model.evaluate(x_test[:, :MAX_TEXT_LENGTH], test_cat_y)"
341 | ]
342 | },
343 | {
344 | "cell_type": "code",
345 | "execution_count": 12,
346 | "metadata": {},
347 | "outputs": [
348 | {
349 | "name": "stdout",
350 | "output_type": "stream",
351 | "text": [
352 | "Confusion Matrix :\n",
353 | "\n",
354 | " [[2631 21 25 61 31]\n",
355 | " [ 36 3181 22 76 26]\n",
356 | " [ 66 30 2564 182 47]\n",
357 | " [ 154 121 272 3570 52]\n",
358 | " [ 45 23 12 21 9077]]\n",
359 | "\n",
360 | " precision recall f1-score support\n",
361 | "\n",
362 | " 0 0.90 0.95 0.92 2769\n",
363 | " 1 0.94 0.95 0.95 3341\n",
364 | " 2 0.89 0.89 0.89 2889\n",
365 | " 3 0.91 0.86 0.88 4169\n",
366 | " 4 0.98 0.99 0.99 9178\n",
367 | "\n",
368 | " micro avg 0.94 0.94 0.94 22346\n",
369 | " macro avg 0.92 0.93 0.93 22346\n",
370 | "weighted avg 0.94 0.94 0.94 22346\n",
371 | "\n"
372 | ]
373 | }
374 | ],
375 | "source": [
376 | "y_pred = np.argmax(model.predict(x_test[:, :MAX_TEXT_LENGTH]), axis=1)\n",
377 | "# y_pred = np_utils.to_categorical(np.argmax(y_pred, axis=1), len(CLASSES_LIST))\n",
378 | "\n",
379 | "cm = confmat(np.argmax(test_cat_y, axis=1), y_pred)\n",
380 | "# tn, fp, fn, tp = confmat().ravel()\n",
381 | "print(\"Confusion Matrix :\\n\\n {}\\n\".format(cm))\n",
382 | "print(creport(np.argmax(test_cat_y, axis=1), y_pred))"
383 | ]
384 | },
385 | {
386 | "cell_type": "markdown",
387 | "metadata": {},
388 | "source": [
389 | "**Early Stopping**"
390 | ]
391 | },
392 | {
393 | "cell_type": "code",
394 | "execution_count": 13,
395 | "metadata": {
396 | "scrolled": false
397 | },
398 | "outputs": [
399 | {
400 | "name": "stdout",
401 | "output_type": "stream",
402 | "text": [
403 | "Accuracy on train : 95.06 %\n",
404 | "Accuracy on test : 94.55 %\n",
405 | "Loss on train : 15.79 %\n",
406 | "Loss on test : 18.8 %\n"
407 | ]
408 | }
409 | ],
410 | "source": [
411 | "n = np.argmin(history.history['val_loss'])\n",
412 | "\n",
413 | "print(\"Accuracy on train : {} %\".format(np.round(history.history['acc'][n]*100, 2)))\n",
414 | "print(\"Accuracy on test : {} %\".format(np.round(history.history['val_acc'][n]*100, 2)))\n",
415 | "print(\"Loss on train : {} %\".format(np.round(history.history['loss'][n]*100, 2)))\n",
416 | "print(\"Loss on test : {} %\".format(np.round(history.history['val_loss'][n]*100, 2)))"
417 | ]
418 | },
419 | {
420 | "cell_type": "code",
421 | "execution_count": 16,
422 | "metadata": {},
423 | "outputs": [
424 | {
425 | "data": {
426 | "image/png": "\n",
427 | "text/plain": [
428 | ""
429 | ]
430 | },
431 | "metadata": {
432 | "needs_background": "light"
433 | },
434 | "output_type": "display_data"
435 | }
436 | ],
437 | "source": [
438 | "plt.figure(\"Loss Plot\", figsize=(16, 8))\n",
439 | "plt.plot(range(1, len(history.history['loss'])+1), history.history['loss'], label=\"train loss\")\n",
440 | "plt.plot(range(1, len(history.history['val_loss'])+1), history.history['val_loss'], label=\"test loss\")\n",
441 | "plt.plot(n+1,history.history[\"val_loss\"][n],\"r*\", label=\"Lowest loss\")\n",
442 | "plt.legend()\n",
443 | "plt.title(\"Learning Curve\")\n",
444 | "plt.ylabel(\"loss (cross_entropy)\")\n",
445 | "plt.xlabel(\"epochs\")\n",
446 | "#plt.savefig(\"./img/loss_500_2000_sg_true_40.png\")\n",
447 | "plt.show();"
448 | ]
449 | },
450 | {
451 | "cell_type": "code",
452 | "execution_count": 17,
453 | "metadata": {},
454 | "outputs": [
455 | {
456 | "data": {
457 | "image/png": "\n",
458 | "text/plain": [
459 | ""
460 | ]
461 | },
462 | "metadata": {
463 | "needs_background": "light"
464 | },
465 | "output_type": "display_data"
466 | }
467 | ],
468 | "source": [
469 | "plt.figure(\"Accuracy Plot\", figsize=(16, 8))\n",
470 | "plt.plot(range(1, len(history.history['acc'])+1), history.history['acc'], label=\"train accuracy\")\n",
471 | "plt.plot(range(1, len(history.history['val_acc'])+1), history.history['val_acc'], label=\"test accuracy\")\n",
472 | "plt.plot(n+1,history.history[\"val_acc\"][n],\"r*\", label=\"Opt. Acc. (csp. Lowest loss)\")\n",
473 | "plt.legend()\n",
474 | "plt.title(\"Accuracy Curve\")\n",
475 | "plt.ylabel(\"accuracy\")\n",
476 | "plt.xlabel(\"epochs\")\n",
477 | "#plt.savefig(\"./img/acc_500_2000_sg_true_40.png\")\n",
478 | "plt.show()"
479 | ]
480 | }
481 | ],
482 | "metadata": {
483 | "kernelspec": {
484 | "display_name": "Python 3",
485 | "language": "python",
486 | "name": "python3"
487 | },
488 | "language_info": {
489 | "codemirror_mode": {
490 | "name": "ipython",
491 | "version": 3
492 | },
493 | "file_extension": ".py",
494 | "mimetype": "text/x-python",
495 | "name": "python",
496 | "nbconvert_exporter": "python",
497 | "pygments_lexer": "ipython3",
498 | "version": "3.5.3"
499 | }
500 | },
501 | "nbformat": 4,
502 | "nbformat_minor": 2
503 | }
504 |
--------------------------------------------------------------------------------
/history/fasttext-true.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ismailhachimi/Text-Classification/dabc42fe970ce28c9b85edc889cadfaad56dc998/history/fasttext-true.db
--------------------------------------------------------------------------------
/history/fasttext.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ismailhachimi/Text-Classification/dabc42fe970ce28c9b85edc889cadfaad56dc998/history/fasttext.db
--------------------------------------------------------------------------------
/history/hist_sg_true_10.json:
--------------------------------------------------------------------------------
1 | {"histories": [{"val_acc": [0.9367671994783987, 0.9432113057954836, 0.9165846162203826, 0.9159133550058726, 0.9499686679481169, 0.9488946495653212, 0.9484023919445244, 0.9447328309966726, 0.9499686675480141, 0.9483576411421207], "acc": [0.910518889413808, 0.9465664159051016, 0.9546888571237809, 0.9621735860500612, 0.9693003014106487, 0.9760018752385247, 0.9810252587333955, 0.9865073477658269, 0.9898413529607529, 0.9926830889573658], "val_loss": [0.19893465467018095, 0.18101920540518676, 0.24057828282207552, 0.2646416127645017, 0.1646435671975449, 0.18138429793427174, 0.18727710825099395, 0.20537735617229325, 0.20423660150087222, 0.20904433203058764], "loss": [0.27414566198557483, 0.17081394367577207, 0.14223958309651527, 0.11899526745341552, 0.09691917481857915, 0.07759046197950178, 0.060777262810199045, 0.04593987015431495, 0.03527709956128818, 0.027566617913299563]}, {"val_acc": [0.934798167053379, 0.9461201041374063, 0.9453145908144288, 0.9495211604255428, 0.9495211603188487, 0.9489841519756688, 0.9501029198218575, 0.9477311310020842, 0.9487156474013086, 0.9490289023512961], "acc": [0.9088630751460528, 0.946599979671217, 0.9552146898281368, 0.9624085329103416, 0.9685395217339812, 0.9754648542538079, 0.9813832727365438, 0.9861940855637531, 0.989886104617787, 0.9923138870299563], "val_loss": [0.21034979094379064, 0.16769331665991927, 0.1705687659888736, 0.15609147102216162, 0.16108275763479318, 0.16875199289308485, 0.174681825734911, 0.18506558913010457, 0.19078036113703456, 0.20959828927517998], "loss": [0.2785527036560972, 0.1712198272376549, 0.14186773919252704, 0.11916453988589172, 0.098177855091003, 0.07812796809028388, 0.06121879085688519, 0.04665120442804122, 0.03537503705908532, 0.027780377439364092]}, {"val_acc": [0.9360064367977836, 0.939675997446892, 0.9473283739618314, 0.9473283743405955, 0.9475968786482204, 0.9437483148428445, 0.9496106620890318, 0.9422715403373652, 0.9473283737537781, 0.948312890179676], "acc": [0.9055178829452887, 0.9459622672747889, 0.9549126159531035, 0.9623078413185799, 0.9694681205012986, 0.9757781164998941, 0.9815287159319913, 0.9861493337533428, 0.9890805732507427, 0.9914300400130324], "val_loss": [0.20531874990100438, 0.19220247038972801, 0.16304211552942177, 0.1645599808087397, 0.1654309294998345, 0.19074049428895298, 0.187963104372525, 0.21775771949886533, 0.20203972681571786, 0.21165696710526635], "loss": [0.285667455385242, 0.17124268602094297, 0.14260289921352617, 0.11918736431204081, 0.09737023335265163, 0.07739137735897424, 0.060746379107189774, 0.04680705731217625, 0.03709099437761636, 0.03063528629821756]}, {"val_acc": [0.9411527718990244, 0.941466027169094, 0.9496554126513738, 0.9494764096018004, 0.9499686679214434, 0.9494764098685355, 0.9484918934426375, 0.9485366443250617, 0.9506846799330223, 0.9504609260810454], "acc": [0.9096797946207431, 0.9463762209192493, 0.9547447968624536, 0.9626994190611695, 0.9697366309236374, 0.9763598892083302, 0.9817412865596417, 0.9859143871037879, 0.9895169027637314, 0.9917768660452395], "val_loss": [0.19676309556084068, 0.1886305352102378, 0.1624925240959776, 0.16485557859602978, 0.16408765362897673, 0.172538202239932, 0.1911536731829714, 0.18826788601572564, 0.19869800438589064, 0.20909186407930852], "loss": [0.27950917622885624, 0.17291577551998558, 0.14372235047811688, 0.11901762566447298, 0.0963168946510863, 0.07688624654451189, 0.059588463489451855, 0.04668067241640392, 0.03612873973065506, 0.02834626403102065]}, {"val_acc": [0.9390047360510021, 0.9442405730237853, 0.9321131222166776, 0.9347086653418777, 0.9497449140908052, 0.9492526555844475, 0.9510426851466084, 0.9490289019725321, 0.9515349434395779, 0.9497001628082782], "acc": [0.9084379337457115, 0.9462195897228525, 0.9541518360096947, 0.9618491360797695, 0.9688415961424969, 0.9759794993166482, 0.9814727762106568, 0.9860374543706905, 0.9892595802256426, 0.9922691352928998], "val_loss": [0.20577427319250877, 0.17671398141888092, 0.21065657086855072, 0.2050068801912736, 0.1681154088287906, 0.16931459838592916, 0.1764996162501668, 0.18438988332639236, 0.1882768616327003, 0.20438481822349935], "loss": [0.2816499243872643, 0.1725734329471178, 0.14408687920883959, 0.12051941940290962, 0.0991861337199054, 0.07911792923607175, 0.059949337735451365, 0.04624032346338096, 0.03607873275707087, 0.02818731296541775]}]}
--------------------------------------------------------------------------------
/history/lstm_results.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ismailhachimi/Text-Classification/dabc42fe970ce28c9b85edc889cadfaad56dc998/history/lstm_results.db
--------------------------------------------------------------------------------
/history/results.csv:
--------------------------------------------------------------------------------
1 | ,train acc,train acc std,test acc,test acc std,train loss,train loss std,test loss,test loss std
2 | w2v sg train,96.202,0.64,94.924,0.1,12.032,2.02,16.286,0.39
3 | w2v cbow No train,95.083,0.04,94.514,0.07,15.751,0.084,19.071,0.19
4 | fasttext sg No train,95.916,0.009,95.382,0.017,12.907,0.012,14.690,0.019
5 | fasttext sg train,96.865,0.012,96.166,0.018,10.102,0.058,12.812,0.045
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 |
2 | # Comparative Study of CNN and RNN for Arabic TextClassification
3 |
4 | **Under supervision of Mr. [Stéphane Canu](https://scholar.google.fr/citations?user=PpibCZUAAAAJ&hl=fr&oi=ao)**
5 |
6 | This project is a step towards understanding the state of the art of text classification in deep learning. As a result, we compare CNN and RNN to understand the pros and cons of each architecture.
7 |
8 | ## Project Plan
9 | 1. [Introduction](#1-introduction)
10 | 2. [State of the art](#2-state-of-the-art)
11 | 1. [Word representation](#i-word-representation)
12 | 2. [Text classification](#ii-text-classification)
13 | 3. [Data for Arabic text classification](#3-data-for-arabic-text-classification)
14 | 4. [Implementation and results](#4-implementation-and-results)
15 | 5. [Perspectives](#5-perspectives)
16 | 6. [Conclusion](#6-conclusion)
17 | 7. [Bibliography](#7-bibliography)
18 |
19 | ## 1. Introduction
20 |
21 | This project is an assembly of research and learning processes which aims to understand the state-of-the-art of text classification in deep learning as it requires us to look for a relevant data source, go through word representation models and compare them as a first step. In the second phase, we compare text classification based technologies and we try to understand how they work. Finally, we explain in detail text classification in deep learning and propose an attention-based architecture as a result of this project.
22 |
23 | ## 2. State of the art
24 |
25 | ### i. Word representation
26 |
27 | Word representation is a major change in NLP research due to its great help in late advances in different problems related to text analysis for specific topic : Text classification, Topic modeling, intention recognition etc.
28 |
29 | In this project, we try to elaborate the state of the art of text classification in Arabic using language models and deep learning. The use of language models is due to simplicity of intergration in a neural network and its intuition of representiong words in a vectorial space which helps calculating dependencies between words.
30 |
31 | The main state of the art Word Representation models are :
32 |
33 | > - [Word2Vec](https://arxiv.org/pdf/1310.4546) (2013)
34 | > - [GloVe](https://nlp.stanford.edu/pubs/glove.pdf) (2015)
35 | > - [FastText](https://arxiv.org/pdf/1607.04606) (2017)
36 | > - [ELMo](https://arxiv.org/pdf/1802.05365) (2018)
37 |
38 | For the first part, we use a pretrained language model Word2Vec taken from this [Github Repository](https://github.com/bakrianoo/aravec) as a result of research paper mentioned in [Citation](#Citation). Thanks [bakrianoo - Abu Bakr Soliman](https://github.com/bakrianoo).
39 |
40 | #### Citation
41 |
42 | > Abu Bakr Soliman, Kareem Eisa, and Samhaa R. El-Beltagy, “AraVec: A set of Arabic Word Embedding Models for use in Arabic NLP”, in proceedings of the 3rd International Conference on Arabic Computational Linguistics (ACLing 2017), Dubai, UAE, 2017.
43 |
44 | ### ii. Text classification
45 |
46 | In this part, we explain the state-of-the-art of text classification in deep learning by going through the relevant research result of previous years.
47 |
48 | ## 3. Data for Arabic text classification
49 |
50 | In this part, we cite text data that we choosed to train our text classification model using Word2Vec word embedding model as marked in this [Section](#i-word-representation).
51 |
52 | The dataset is a collection of Arabic texts, which covers modern Arabic language used in newspapers articles. For this, we thank Mohamed BINIZ for making it available.
53 |
54 | #### Citation
55 |
56 | > Mohamed, BINIZ (2018), “DataSet for Arabic Classification”, Mendeley Data, v2
57 | > [DOI](http://dx.doi.org/10.17632/v524p5dhpj.2)
58 | > Dataset: DataSet for Arabic Classification
59 |
60 | ## 4. Implementation and results
61 |
62 | ## 5. Perspectives
63 |
64 | As future work, we plan to focus on Attention Models using CNN.
65 |
66 | ## 6. Conclusion
67 |
68 | A brief resume on the work during the project period, the achieved results and the future advances that might be applied to improve our solution.
69 |
70 | ## 7. Bibliography
71 |
72 | > [1] Piotr Bojanowski, Edouard Grave, Armand Joulin, and Tomas Mikolov. Enriching word vectors with subword information. arXiv preprint arXiv:1607.04606, 2016.
73 | >
74 | > [2] Junyoung Chung, Caglar Gulcehre, KyungHyun Cho, and Yoshua Bengio. Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555, 2014.
75 | >
76 | > [3] Jeffrey L Elman. Finding structure in time. Cognitive science, 14(2):179–211, 1990.
77 | >
78 | > [4] Sepp Hochreiter and Jürgen Schmidhuber. Long short-term memory. Neural computation, 9(8):1735–1780, 1997.
79 | >
80 | > [5] Yann LeCun, Léon Bottou, Yoshua Bengio, and Patrick Haffner. Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86(11):2278–2324, 1998.
81 | >
82 | > [6] Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg S Corrado, and Jeff Dean. Distributed representations of words and phrases and their compositionality. In Advances in neural information processing systems, pages 3111–3119, 2013.
83 | >
84 | > [7] Jeffrey Pennington, Richard Socher, and Christopher Manning. Glove: Global vectors for word representation. In Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP), pages 1532–1543, 2014.
85 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import io
3 |
4 | from keras.layers import Embedding, Dense, LSTM, Dropout, Input
5 | #from keras.layers import GRU, MaxPooling1D, Conv1D, Flatten
6 | from keras.preprocessing import text, sequence
7 | from keras.models import Model
8 | from keras.utils import np_utils
9 |
10 | from gensim.models.keyedvectors import KeyedVectors
11 |
12 | import pandas as pd
13 | import numpy as np
14 |
15 | from sklearn import preprocessing
16 | from sklearn.model_selection import train_test_split
17 |
18 | def get_embedding_matrix(word_index, embedding_index, vocab_dim):
19 | print('Building embedding matrix...')
20 | embedding_matrix = np.zeros((len(word_index) + 1, vocab_dim))
21 | for word, i in word_index.items():
22 | try:
23 | embedding_matrix[i] = embedding_index.get_vector(word)
24 | except:
25 | pass
26 | print('Embedding matrix built.')
27 | return embedding_matrix
28 |
29 |
30 | def get_model_first(embedding_weights, word_index, vocab_dim, max_length, print_summary=True):
31 | inp = Input(shape=(max_length,))
32 | model = Embedding(input_dim=len(word_index)+1,
33 | output_dim=vocab_dim,
34 | trainable=False,
35 | weights=[embedding_weights])(inp)
36 |
37 | model = LSTM(vocab_dim, return_sequences=True)(model)
38 | model = Dropout(0.2)(model)
39 | model = LSTM(vocab_dim, return_sequences=False)(model)
40 | model = Dropout(0.1)(model)
41 | model = Dense(int(vocab_dim/10), activation='relu')(model)
42 | model = Dropout(0.1)(model)
43 | model = Dense(5, activation='softmax')(model)
44 | model = Model(inputs=inp, outputs=model)
45 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
46 | if print_summary:
47 | model.summary()
48 | return model
49 |
50 | def train_fit_predict(model, x_train, x_test, y_train, batch_size, epochs):
51 | history = model.fit(x_train, y_train,
52 | batch_size=batch_size,
53 | epochs=epochs, verbose=1)
54 | score = model.predict(x_test)
55 | return history, score, model
56 |
57 | def get_init_parameters(path, ext=None):
58 | if ext == 'vec':
59 | word_model = KeyedVectors.load_word2vec_format(path).wv
60 | else:
61 | word_model = KeyedVectors.load(path).wv
62 | n_words = len(word_model.vocab)
63 | vocab_dim = word_model[word_model.index2word[0]].shape[0]
64 | index_dict = dict()
65 | for i in range(n_words):
66 | index_dict[word_model.index2word[i]] = i+1
67 | return word_model, index_dict, n_words, vocab_dim
68 |
69 | def get_max_length(text_data, return_line=False):
70 | max_length = 0
71 | long_line = ""
72 | for line in text_data:
73 | new = len(line.split())
74 | if new > max_length:
75 | max_length = new
76 | long_line = line
77 | if return_line:
78 | return long_line, max_length
79 | else:
80 | return max_length
81 |
82 | def split_datasets(data_paths, test_size, header=True, seed=42):
83 | x = []
84 | y = []
85 | for data_path in data_paths:
86 | with open(data_path, 'r') as f:
87 | for line in f:
88 | if header:
89 | header = False
90 | else:
91 | temp = line.split(',')
92 | x.append(temp[0])
93 | y.append(temp[1].replace('\n', ''))
94 | max_length = get_max_length(x)
95 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size, random_state=seed)
96 | print('Dataset splited.')
97 | return x_train, x_test, y_train, y_test, max_length
98 |
99 | def get_train_test(train_raw_text, test_raw_text, n_words, max_length):
100 | tokenizer = text.Tokenizer(num_words=n_words)
101 | tokenizer.fit_on_texts(list(train_raw_text))
102 | word_index = tokenizer.word_index
103 | train_tokenized = tokenizer.texts_to_sequences(train_raw_text)
104 | test_tokenized = tokenizer.texts_to_sequences(test_raw_text)
105 | return sequence.pad_sequences(train_tokenized, maxlen=max_length, padding='post', truncating='post'),\
106 | sequence.pad_sequences(test_tokenized, maxlen=max_length, padding='post', truncating='post'),\
107 | word_index
108 |
109 | def class_str_2_ind(x_train, x_test, y_train, y_test, classes, n_words, max_length):
110 | print('Converting data to trainable form...')
111 | y_encoder = preprocessing.LabelEncoder()
112 | y_encoder.fit(classes)
113 | y_train = y_encoder.transform(y_train)
114 | y_test = y_encoder.transform(y_test)
115 | train_y_cat = np_utils.to_categorical(y_train, len(classes))
116 | x_vec_train, x_vec_test, word_index = get_train_test(x_train, x_test, n_words, max_length)
117 | print('Number of training examples: ' + str(len(x_vec_train)))
118 | print('Number of testing examples: ' + str(len(x_vec_test)))
119 | return x_vec_train, x_vec_test, y_train, y_test, train_y_cat, word_index
120 |
121 | def get_main_model(word_index, WORD_MODEL, EMBED_SIZE, MAX_TEXT_LENGTH):
122 | tmp = get_embedding_matrix(word_index, WORD_MODEL, EMBED_SIZE)
123 | model = get_model(tmp, word_index, EMBED_SIZE, MAX_TEXT_LENGTH, print_summary=True)
124 | return model
125 |
126 | def get_embedding_vectors(vectors, index_dict, n_words, vocab_dim):
127 | embedding_weights = np.zeros((n_words+1, vocab_dim))
128 | for word, index in index_dict.items():
129 | embedding_weights[index, :] = vectors[word]
130 | return embedding_weights
131 |
132 | def load_vectors(fname):
133 | fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
134 | n, d = map(int, fin.readline().split())
135 | data = {}
136 | for line in fin:
137 | tokens = line.rstrip().split(' ')
138 | data[tokens[0]] = map(float, tokens[1:])
139 | return data
140 |
141 |
142 | # ==========Deprecated Code===========
143 |
144 |
145 | def error_on_batch(res, y_test):
146 | y_pred = [np.argmax(tmp) for tmp in res]
147 | y_real = [np.argmax(tmp) for tmp in y_test]
148 | true = [1 for i, j in zip(y_pred, y_real) if i == j]
149 | err = len(true)/len(y_pred)
150 | return err
151 |
152 | def convert_data(data, vocab, index):
153 | new = []
154 | for word in data.split():
155 | if word in vocab:
156 | new.append(index[word])
157 | else:
158 | new.append(index[''])
159 | return new
--------------------------------------------------------------------------------