├── requirements.txt ├── svm_model.sav ├── media ├── news.png └── result.gif ├── system_design.png ├── vectorizer.pickle ├── LICENSE ├── app.py ├── README.md ├── stopword.txt └── Burmese_News_Classification.ipynb /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | matplotlib 3 | numpy 4 | scikit-learn 5 | pandas 6 | pyidaungsu 7 | -------------------------------------------------------------------------------- /svm_model.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThuraAung1601/Automatic-Myanmar-News-Classification/HEAD/svm_model.sav -------------------------------------------------------------------------------- /media/news.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThuraAung1601/Automatic-Myanmar-News-Classification/HEAD/media/news.png -------------------------------------------------------------------------------- /media/result.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThuraAung1601/Automatic-Myanmar-News-Classification/HEAD/media/result.gif -------------------------------------------------------------------------------- /system_design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThuraAung1601/Automatic-Myanmar-News-Classification/HEAD/system_design.png -------------------------------------------------------------------------------- /vectorizer.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThuraAung1601/Automatic-Myanmar-News-Classification/HEAD/vectorizer.pickle -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Thura Aung 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import pickle 3 | import numpy as np 4 | import pyidaungsu as pds 5 | from sklearn.feature_extraction.text import TfidfVectorizer 6 | 7 | stopwordslist = [] 8 | slist = [] 9 | 10 | with open("./stopword.txt", encoding = 'utf8') as stopwordsfile: 11 | stopwords = stopwordsfile.readlines() 12 | slist.extend(stopwords) 13 | 14 | for w in range(len(slist)): 15 | temp = slist[w] 16 | stopwordslist.append(temp.rstrip()) 17 | 18 | def stop_word(sentence): 19 | new_sentence = [] 20 | for word in sentence.split(): 21 | if word not in stopwordslist: 22 | new_sentence.append(word) 23 | return(' '.join(new_sentence)) 24 | 25 | 26 | def tokenize(line): 27 | sentence = pds.tokenize(line,form="word") 28 | sentence = ' '.join([str(elem) for elem in sentence]) 29 | sentence = stop_word(sentence) 30 | return sentence 31 | 32 | filename = './svm_model.sav' 33 | # load the model from disk 34 | loaded_model = pickle.load(open(filename, 'rb')) 35 | 36 | vectorizer = pickle.load(open("vectorizer.pickle", "rb")) 37 | 38 | st.title('Automatic News Classification System for Myanmar Language') 39 | st.subheader("Input the News content below") 40 | sentence = st.text_area("Enter your news Content Here", height=200) 41 | sentence = tokenize(sentence) 42 | predict_btt = st.button("Predict") 43 | if predict_btt: 44 | data = vectorizer.transform([sentence]).toarray() 45 | prediction = loaded_model.predict(data) 46 | if prediction == ['Politics']: 47 | st.text("This is Politics News") 48 | elif prediction == ['Sports']: 49 | st.text("This is Sports News") 50 | elif prediction == ['Entertainment']: 51 | st.text("This is Entertainment News") 52 | elif prediction == ['Business']: 53 | st.text("This is Business News") 54 | 55 | 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Automatic Myanmar News Classification 2 | 3 | ## Project Overview 4 | 5 | Automatic Myanmar News Classification System using Linear SVM. I have examined also with other machine learning algorithms - Logistic Regression, Multinomial Naive Bayes, Random Forest and Decision Tree. The weighted f-score is highest when using Linear SVM. 6 | - A.H.Khine, K.T.Nwet, K.M.Soe, Automatic Myanmar News Classification proposed a system which is based on Naive Bayes. I used their dataset for training the model.[1] 7 | - Tokenzation is done by using pyidaungsu library which is based on fasttext. 8 | - The vecotorizer I used is tf-idf. 9 | - N-gram for TF-IDF is Unigram + Bigram 10 | 11 | ## System Design 12 | 13 | - I use the system design proposed in Nwet, Khin & Darren, Seth, Machine Learning Algorithms for Myanmar News Classification [2] 14 | 15 | ![System](system_design.png) 16 | 17 | ## Dataset 18 | 19 | Dataset is taken from Aye Hnin Khine's [repository](https://github.com/ayehninnkhine/MyanmarNewsClassificationSystem) 20 | ![Dataset](media/news.png) 21 | 22 | ## Experiments 23 | 24 | - For feature extraction, vectorize text data using TF-IDF vectorizer available in scikit-learn 25 | - Then train on different machine learning models for classification 26 | 27 | | Model | F1-score | 28 | |:---------------------------:|:------------:| 29 | | Decision Tree | 67% | 30 | | Random Forest | 82% | 31 | | Multinomial Naive Bayes | 84% | 32 | | Logistic Regression | 86% | 33 | | **Linear SVM** | **88%** | 34 | 35 | ## Demonstration 36 | 37 | Demonstration available [HERE](https://share.streamlit.io/thuraaung1601/automatic-myanmar-news-classification/main/app.py) 38 | ![Demo](media/result.gif) 39 | 40 | ## How to run demo 41 | - Download this repository 42 | - Install requirements 43 | ```{r, engine='bash', count_lines} 44 | tra@thura-pc:~$ pip install -r requirements.txt 45 | ``` 46 | - Run the main notebook - News_Classificaiton.ipynb for training 47 | - For Demo 48 | ```{r, engine='bash', count_lines} 49 | tra@thura-pc:~$ streamlit run app.py 50 | ``` 51 | ## Future Works 52 | - More Data is needed to 53 | - Test with Hybrid methods and Deep Learning Approaches 54 | 55 | ## References 56 | [1] A.H.Khine, K.T.Nwet, K.M.Soe, Automatic Myanmar News Classification, 15th Proceedings of International Conference on Computer Applications, February 2017, pp. 401-408 57 |
58 | [2] Nwet, Khin & Darren, Seth. (2019). MACHINE LEARNING ALGORITHMS FOR MYANMAR NEWS CLASSIFICATION. Journal of Natural Language Processing. 8. 17-24. 59 | 60 | 61 | -------------------------------------------------------------------------------- /stopword.txt: -------------------------------------------------------------------------------- 1 | က 2 | ကတည်းက 3 | ကတော့ 4 | ကပ 5 | ကဘာ 6 | ကရ 7 | ကလ 8 | ကလူ 9 | ကာ 10 | ကာလ 11 | ကား 12 | ကို 13 | ကိုယ့် 14 | ကိုယ်တိုင် 15 | ကုန် 16 | ကေ 17 | ကော 18 | ကောင်း 19 | ကဲ 20 | ကျ 21 | ကျန 22 | ကျပ် 23 | ကျော် 24 | ကျွန်တော့် 25 | ကျွန်တော် 26 | ကျွန်မ 27 | ကြ 28 | ကြည့် 29 | ကြာ 30 | ကြာတော့ 31 | ကြား 32 | ကြိမ်မြောက် 33 | ကြီး 34 | ကြောင့် 35 | ကြောင်း 36 | ခ 37 | ခင် 38 | ခဏခဏ 39 | ခန့် 40 | ခါ 41 | ခိုင်း 42 | ခု 43 | ခုချိန် 44 | ခေါ် 45 | ခဲ့ 46 | ခံ 47 | ချ 48 | ချက် 49 | ချင် 50 | ချင်း 51 | ချိန် 52 | ချုပ် 53 | ခြ 54 | ခြင်း 55 | ခွင့် 56 | စ 57 | စက 58 | စစ် 59 | စဉ် 60 | စတင် 61 | စရာ 62 | စသည့် 63 | စာ 64 | စား 65 | စိတ် 66 | စိုး 67 | စီ 68 | စီး 69 | စု 70 | စုံ 71 | စေ 72 | စေသော 73 | စွာ 74 | ဆ 75 | ဆက် 76 | ဆက်စပ် 77 | ဆန 78 | ဆို 79 | ဆိုင် 80 | ဆိုင်ရာ 81 | ဆိုပြီး 82 | ဆိုသည် 83 | ဆီ 84 | ည 85 | ညနေ 86 | ညီ 87 | တ 88 | တကယ် 89 | တက် 90 | တချို့ 91 | တခြား 92 | တင် 93 | တင်ပြ 94 | တစ် 95 | တစ်ဆင့် 96 | တစ်ဦး 97 | တည်း 98 | တတ် 99 | တန 100 | တယ် 101 | တာ 102 | တာကို 103 | တို 104 | တိုင်း 105 | တို့ 106 | တို့သည် 107 | တိုး 108 | တီ 109 | တုန်း 110 | တော 111 | တောင် 112 | တော့ 113 | တော် 114 | တော်တော်လေး 115 | တဲ့ 116 | တွင် 117 | တွေ 118 | တွေ့ 119 | ထ 120 | ထက် 121 | ထင် 122 | ထည့် 123 | ထပ် 124 | ထား 125 | ထားသည် 126 | ထိ 127 | ထို 128 | ထိုသို့ 129 | ထို့အပြင် 130 | ထုတ် 131 | ထူး 132 | ထောင် 133 | ထဲ 134 | ထဲက 135 | ထံ 136 | ထွက် 137 | ထွန်း 138 | ထွေ 139 | ထွေထွေထူးထူး 140 | ဒ 141 | ဒါ 142 | ဒါကို 143 | ဒါကြောင့် 144 | ဒါတွေ 145 | ဒါတွေက 146 | ဒါနဲ့ 147 | ဒါပေမဲ့ 148 | ဒါလေး 149 | ဒါ့အပြင် 150 | ဒီ 151 | ဒီထက် 152 | ဒီနေ့ 153 | ဒီမှာ 154 | ဒီလို 155 | ဒု 156 | န 157 | နက် 158 | နဂို 159 | နည်း 160 | နား 161 | နိုင် 162 | နိုင်သည် 163 | နီ 164 | နေ 165 | နေကျ 166 | နောက် 167 | နောက်ဆုံး 168 | နောက်တစ်ခု 169 | နောက်ထပ် 170 | နောက်ပြီး 171 | နော့ 172 | နော် 173 | နေ့ 174 | နေ့စဉ် 175 | နဲ့ 176 | နှင့် 177 | နှင့်အတူ 178 | နှစ် 179 | ပ 180 | ပင 181 | ပတ 182 | ပတ်သက် 183 | ပါ 184 | ပါပ 185 | ပါဝင် 186 | ပါသည် 187 | ပိတ် 188 | ပို 189 | ပိုင်း 190 | ပိုမို 191 | ပို့ 192 | ပုံ 193 | ပေ 194 | ပေါ 195 | ပေါင်း 196 | ပေါ့ 197 | ပေါ် 198 | ပေး 199 | ပေးလိုက် 200 | ပဲ 201 | ပျော် 202 | ပြ 203 | ပြန် 204 | ပြီ 205 | ပြီး 206 | ပြီးခဲ့သည့် 207 | ပြီးတော့ 208 | ပြု 209 | ပြော 210 | ပြောကြား 211 | ပြောသည် 212 | ပြဲ 213 | ပွဲ 214 | ဖ 215 | ဖက် 216 | ဖို့ 217 | ဖူး 218 | ဖော် 219 | ဖြင့် 220 | ဖြစ် 221 | ဖြစ်ကြောင်း 222 | ဖြစ်တယ် 223 | ဖြစ်ပါတယ် 224 | ဖြစ်ပြီး 225 | ဖြစ်လာ 226 | ဖြစ်သည် 227 | ဖွ 228 | ဖွင့် 229 | ဖွဲ့ 230 | ဘ 231 | ဘက် 232 | ဘယ 233 | ဘယ်လို 234 | ဘဝ 235 | ဘာ 236 | ဘူး 237 | ဘဲ 238 | မ 239 | မက 240 | မင်း 241 | မစ 242 | မည့် 243 | မည် 244 | မည်သို့ 245 | မန 246 | မမ 247 | မယ့် 248 | မယ် 249 | မရှိ 250 | မရှိသလောက် 251 | မရှိသေး 252 | မသိ 253 | မဟုတ် 254 | မာ 255 | မိ 256 | မိမိ 257 | မေ 258 | မေး 259 | မဲ့ 260 | များ 261 | များသည် 262 | မျိုး 263 | မျှ 264 | မြင့် 265 | မြင် 266 | မှ 267 | မှစ၍ 268 | မှန် 269 | မှာ 270 | မှု 271 | ယ 272 | ယခင် 273 | ယခု 274 | ယခုခေတ် 275 | ယင်း 276 | ယင်းကဲ့သို့ 277 | ယင်းသို့ 278 | ယူ 279 | ယောက် 280 | ရ 281 | ရက် 282 | ရင် 283 | ရင်း 284 | ရတာ 285 | ရန် 286 | ရပ် 287 | ရရှိ 288 | ရရှိမည် 289 | ရလာ 290 | ရာ 291 | ရာတွင် 292 | ရာ၌ 293 | ရီ 294 | ရေ 295 | ရော 296 | ရောက် 297 | ရောက်ရှိ 298 | ရေး 299 | ရဲ့ 300 | ရွယ် 301 | ရှင့် 302 | ရှင် 303 | ရှင်း 304 | ရှာ 305 | ရှိ 306 | ရှိပါတယ် 307 | ရှိသည် 308 | ရှေ့ 309 | လ 310 | လက် 311 | လက်ရှိ 312 | လည်း 313 | လမ်း 314 | လာ 315 | လာမည့် 316 | လာရောက် 317 | လား 318 | လိမ့် 319 | လို 320 | လိုက် 321 | လိုက်ပါ 322 | လို့ 323 | လုပ် 324 | လုံး 325 | လုံးဝ 326 | လူ 327 | လောက် 328 | လောလောဆယ် 329 | လေး 330 | လဲ 331 | လဲဆို 332 | လျက် 333 | လျှင် 334 | လွန် 335 | ဝ 336 | ဝင် 337 | ဝင်း 338 | သ 339 | သက် 340 | သက်ဆိုင်ရာ 341 | သင့် 342 | သစ် 343 | သည့် 344 | သည့်အတွက် 345 | သည် 346 | သည်နှင့် 347 | သတ် 348 | သဖြင့် 349 | သလို 350 | သာ 351 | သား 352 | သိ 353 | သိန်း 354 | သိပ် 355 | သိရ 356 | သိရသည် 357 | သိရှိ 358 | သို့ 359 | သို့မဟုတ် 360 | သို့သော် 361 | သုံး 362 | သူ 363 | သူမ 364 | သူများ 365 | သူ့ 366 | သေ 367 | သေသေချာချာ 368 | သော 369 | သောကြောင့် 370 | သော်လည်း 371 | သေး 372 | သွား 373 | ဟန် 374 | ဟာ 375 | ဟို 376 | ဟီး 377 | ဟု 378 | ဟုတ် 379 | ဟူ 380 | ဟော 381 | ဟဲဟဲ 382 | အ 383 | အက 384 | အကြိမ် 385 | အခ 386 | အခါ 387 | အခု 388 | အခုတလော 389 | အခုလို 390 | အခုဟာ 391 | အချက် 392 | အချို့ 393 | အခြား 394 | အင် 395 | အစ 396 | အစွမ်းကုန် 397 | အဆိုပါ 398 | အတူ 399 | အတွက် 400 | အတွင်း 401 | အထက်ပါ 402 | အထိ 403 | အထူး 404 | အထူးသဖြင့် 405 | အနေ 406 | အနေဖြင့် 407 | အပ 408 | အပါအဝင် 409 | အပေါ် 410 | အပြင် 411 | အဖြစ် 412 | အမ 413 | အမှတ် 414 | အရ 415 | အရင် 416 | အရင်က 417 | အရမ်း 418 | အလိုက် 419 | အလိုလို 420 | အား 421 | အားဖြင့် 422 | အားလုံး 423 | အို 424 | အောက် 425 | အောင် 426 | အေး 427 | အဲ 428 | အဲဒီ 429 | အဲဒီလို 430 | အဲ့ 431 | ဥပမာ 432 | ဦး 433 | ၌ 434 | ၍ 435 | ၎င်း 436 | ၏ 437 | -------------------------------------------------------------------------------- /Burmese_News_Classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "News-Classification.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [ 9 | "sHY9s9gh0maj", 10 | "KaZwIKyk1eUo", 11 | "-JLcmb_l1pu8", 12 | "-Jd2eK2u1iRS" 13 | ] 14 | }, 15 | "kernelspec": { 16 | "name": "python3", 17 | "display_name": "Python 3" 18 | }, 19 | "language_info": { 20 | "name": "python" 21 | } 22 | }, 23 | "cells": [ 24 | { 25 | "cell_type": "markdown", 26 | "source": [ 27 | "Dataset : https://github.com/ayehninnkhine/MyanmarNewsClassificationSystem\n", 28 | "\n", 29 | "Demo Website : https://share.streamlit.io/thuraaung1601/automatic-myanmar-news-classification/main/app.py\n", 30 | "\n", 31 | "Github : https://github.com/ThuraAung1601/Automatic-Myanmar-News-Classification" 32 | ], 33 | "metadata": { 34 | "id": "hoFwvAmn21uP" 35 | } 36 | }, 37 | { 38 | "cell_type": "code", 39 | "source": [ 40 | "import pandas as pd\n", 41 | "import numpy as np\n", 42 | "from matplotlib import pyplot as plt\n", 43 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 44 | "from sklearn.model_selection import train_test_split" 45 | ], 46 | "metadata": { 47 | "id": "toQF-eMHkbW-" 48 | }, 49 | "execution_count": 1, 50 | "outputs": [] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "source": [ 55 | "### Data preparation" 56 | ], 57 | "metadata": { 58 | "id": "8IseFn5p1z3u" 59 | } 60 | }, 61 | { 62 | "cell_type": "code", 63 | "source": [ 64 | "dataset = \"./mm-news-classification-dataset.csv\"" 65 | ], 66 | "metadata": { 67 | "id": "Pjl6r4vpMYCx" 68 | }, 69 | "execution_count": 2, 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "source": [ 75 | "data = pd.read_csv(dataset)\n", 76 | "print(data.head())" 77 | ], 78 | "metadata": { 79 | "colab": { 80 | "base_uri": "https://localhost:8080/" 81 | }, 82 | "id": "C5cEOwIjMTEI", 83 | "outputId": "56131687-27d1-42fd-af2d-36d36c62b68e" 84 | }, 85 | "execution_count": 3, 86 | "outputs": [ 87 | { 88 | "output_type": "stream", 89 | "name": "stdout", 90 | "text": [ 91 | " Unnamed: 0 News Category\n", 92 | "0 0 ဒီမိုကရေစီ_ရ_မှ_အမျှဝေ_ပါ_ဟု_မဝင်းမော်ဦး_တောင်... Politics\n", 93 | "1 1 ဒီမိုကရေစီ_အရေး_လူ့အခွင့်အရေး_တောင်းဆို_ဆန္ဒပြ... Politics\n", 94 | "2 2 ၂၀၁၅_ခုနှစ်_အထွေထွေရွေးကောက်ပွဲ_တွင်_အမျိုးသား... Politics\n", 95 | "3 3 လာမည့်_စက်တင်ဘာ_၁၉_ရက်_တွင်_မဝင်းမော်ဦး_ကျဆုံး... Politics\n", 96 | "4 4 တပ်မတော်ကာကွယ်ရေးဦးစီးချုပ်_ဗိုလ်ချုပ်မှူးကြီး... Politics\n" 97 | ] 98 | } 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "source": [ 104 | "data.isnull().sum()\n" 105 | ], 106 | "metadata": { 107 | "colab": { 108 | "base_uri": "https://localhost:8080/" 109 | }, 110 | "id": "7EvKVTDWNRI1", 111 | "outputId": "75a28cd4-f659-496d-a31b-06ebabceb2ac" 112 | }, 113 | "execution_count": 4, 114 | "outputs": [ 115 | { 116 | "output_type": "execute_result", 117 | "data": { 118 | "text/plain": [ 119 | "Unnamed: 0 0\n", 120 | "News 0\n", 121 | "Category 0\n", 122 | "dtype: int64" 123 | ] 124 | }, 125 | "metadata": {}, 126 | "execution_count": 4 127 | } 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "source": [ 133 | "data['category_id'] = data['Category'].factorize()[0]\n", 134 | "colslist = ['Index','News', 'Category', 'category_id']\n", 135 | "data.columns = colslist\n", 136 | "data.groupby('Category').Index.count().plot.bar(ylim=0)" 137 | ], 138 | "metadata": { 139 | "colab": { 140 | "base_uri": "https://localhost:8080/", 141 | "height": 359 142 | }, 143 | "id": "5GaEbXWJNY6I", 144 | "outputId": "2d4ccf5f-6b04-4ad7-cbbe-b90b50b62902" 145 | }, 146 | "execution_count": 5, 147 | "outputs": [ 148 | { 149 | "output_type": "execute_result", 150 | "data": { 151 | "text/plain": [ 152 | "" 153 | ] 154 | }, 155 | "metadata": {}, 156 | "execution_count": 5 157 | }, 158 | { 159 | "output_type": "display_data", 160 | "data": { 161 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAFECAYAAADLDO40AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAdZ0lEQVR4nO3de5wcZZ3v8c+XAIJCJCyzLCZg0A0ociTAiCgHRXG5qSDrDVwB0TW6goJ6dEGPL1APyvEuXmCjRMALiAssUVGIrMDKGmUSYwi3Q7gtycYwigsIyEL4nj/6GSniTDLT3Zmanvq+X69+TdVT1d2/7lfmO5WnnqpHtomIiGbYqO4CIiJi/CT0IyIaJKEfEdEgCf2IiAZJ6EdENMjGdRewPttss41nzpxZdxkRET1j0aJFv7XdN9y2CR/6M2fOZGBgoO4yIiJ6hqS7RtqW7p2IiAZJ6EdENEhCPyKiQRL6ERENktCPiGiQhH5ERIMk9CMiGiShHxHRIAn9iIgGmfBX5G4IM0/6Yd0lrNedp7+y7hIiYhLKkX5ERIMk9CMiGiShHxHRIOsNfUnbS/qppBsl3SDphNK+taQFkm4tP6eVdkk6Q9JySUsl7VF5rWPK/rdKOmbDfayIiBjOaE7kPga83/ZiSVsCiyQtAN4CXGn7dEknAScB/wgcDMwqjxcCZwIvlLQ1cArQD7i8znzbv+/2h4qIgAzaGM56Q9/2KmBVWX5A0k3AdOAwYL+y27nAVbRC/zDgPNsGFkraStJ2Zd8Ftu8FKH84DgLO7+LniXHWC79U0DujofJ9xoY2pj59STOB3YFfANuWPwgAvwG2LcvTgbsrT1tR2kZqH+595kgakDQwODg4lhIjImIdRh36krYALgJOtH1/dVs5qne3irI913a/7f6+vmFn/IqIiDaMKvQlbUIr8L9t++LSvLp021B+3lPaVwLbV54+o7SN1B4REeNkNKN3BJwN3GT7c5VN84GhETjHAJdW2o8uo3j2Bu4r3UCXAwdImlZG+hxQ2iIiYpyMZvTOPsBRwPWSlpS2DwGnAxdKehtwF/CGsu0y4BBgOfAQcCyA7XslfRy4ruz3saGTuhERMT5GM3rnZ4BG2Lz/MPsbOG6E15oHzBtLgRER0T25IjciokES+hERDZLQj4hokIR+RESDJPQjIhokoR8R0SAJ/YiIBknoR0Q0SEI/IqJBEvoREQ2S0I+IaJCEfkREgyT0IyIaJKEfEdEgCf2IiAZJ6EdENMhopkucJ+keScsqbd+VtKQ87hyaUUvSTEkPV7adVXnOnpKul7Rc0hllGsaIiBhHo5ku8Rzgy8B5Qw223zi0LOmzwH2V/W+zPXuY1zkTeDvwC1pTKh4E/GjsJUdERLvWe6Rv+xpg2Llsy9H6G4Dz1/UakrYDptpeWKZTPA94zdjLjYiITnTap78vsNr2rZW2HSX9StLVkvYtbdOBFZV9VpS2YUmaI2lA0sDg4GCHJUZExJBOQ/9InnyUvwrYwfbuwPuA70iaOtYXtT3Xdr/t/r6+vg5LjIiIIaPp0x+WpI2BvwX2HGqz/QjwSFleJOk2YCdgJTCj8vQZpS0iIsZRJ0f6rwButv2nbhtJfZKmlOVnAbOA222vAu6XtHc5D3A0cGkH7x0REW0YzZDN84GfAztLWiHpbWXTEfz5CdyXAEvLEM5/Bt5pe+gk8LuArwPLgdvIyJ2IiHG33u4d20eO0P6WYdouAi4aYf8BYNcx1hcREV2UK3IjIhokoR8R0SAJ/YiIBknoR0Q0SEI/IqJBEvoREQ2S0I+IaJCEfkREgyT0IyIaJKEfEdEgCf2IiAZJ6EdENEhCPyKiQRL6ERENktCPiGiQhH5ERIOMZuaseZLukbSs0naqpJWSlpTHIZVtJ0taLukWSQdW2g8qbcslndT9jxIREeszmiP9c4CDhmn/vO3Z5XEZgKRdaE2j+LzynK9KmlLmzf0KcDCwC3Bk2TciIsbRaKZLvEbSzFG+3mHABbYfAe6QtBzYq2xbbvt2AEkXlH1vHHPFERHRtk769I+XtLR0/0wrbdOBuyv7rChtI7UPS9IcSQOSBgYHBzsoMSIiqtoN/TOBZwOzgVXAZ7tWEWB7ru1+2/19fX3dfOmIiEZbb/fOcGyvHlqW9DXgB2V1JbB9ZdcZpY11tEdExDhp60hf0naV1cOBoZE984EjJD1F0o7ALOCXwHXALEk7StqU1sne+e2XHRER7Vjvkb6k84H9gG0krQBOAfaTNBswcCfwDgDbN0i6kNYJ2seA42yvKa9zPHA5MAWYZ/uGrn+aiIhYp9GM3jlymOaz17H/acBpw7RfBlw2puoiIqKrckVuRESDJPQjIhokoR8R0SAJ/YiIBknoR0Q0SEI/IqJBEvoREQ2S0I+IaJCEfkREgyT0IyIaJKEfEdEgCf2IiAZJ6EdENEhCPyKiQRL6ERENktCPiGiQ9Ya+pHmS7pG0rNL2aUk3S1oq6RJJW5X2mZIelrSkPM6qPGdPSddLWi7pDEnaMB8pIiJGMpoj/XOAg9ZqWwDsavv5wP8DTq5su8327PJ4Z6X9TODttObNnTXMa0ZExAa23tC3fQ1w71ptV9h+rKwuBGas6zXKROpTbS+0beA84DXtlRwREe3qRp/+W4EfVdZ3lPQrSVdL2re0TQdWVPZZUdqGJWmOpAFJA4ODg10oMSIioMPQl/Rh4DHg26VpFbCD7d2B9wHfkTR1rK9re67tftv9fX19nZQYEREVG7f7RElvAV4F7F+6bLD9CPBIWV4k6TZgJ2AlT+4CmlHaIiJiHLV1pC/pIOCDwKG2H6q090maUpafReuE7e22VwH3S9q7jNo5Gri04+ojImJM1nukL+l8YD9gG0krgFNojdZ5CrCgjLxcWEbqvAT4mKRHgceBd9oeOgn8LlojgTandQ6geh4gIiLGwXpD3/aRwzSfPcK+FwEXjbBtANh1TNVFRERX5YrciIgGSehHRDRIQj8iokES+hERDZLQj4hokIR+RESDJPQjIhokoR8R0SAJ/YiIBknoR0Q0SEI/IqJBEvoREQ2S0I+IaJCEfkREgyT0IyIaJKEfEdEgowp9SfMk3SNpWaVta0kLJN1afk4r7ZJ0hqTlkpZK2qPynGPK/rdKOqb7HyciItZltEf65wAHrdV2EnCl7VnAlWUd4GBac+POAuYAZ0LrjwStqRZfCOwFnDL0hyIiIsbHqELf9jXAvWs1HwacW5bPBV5TaT/PLQuBrSRtBxwILLB9r+3fAwv48z8kERGxAXXSp7+t7VVl+TfAtmV5OnB3Zb8VpW2k9j8jaY6kAUkDg4ODHZQYERFVXTmRa9uAu/Fa5fXm2u633d/X19etl42IaLxOQn916bah/LyntK8Etq/sN6O0jdQeERHjpJPQnw8MjcA5Bri00n50GcWzN3Bf6Qa6HDhA0rRyAveA0hYREeNk49HsJOl8YD9gG0kraI3COR24UNLbgLuAN5TdLwMOAZYDDwHHAti+V9LHgevKfh+zvfbJ4YiI2IBGFfq2jxxh0/7D7GvguBFeZx4wb9TVRUREV+WK3IiIBknoR0Q0SEI/IqJBEvoREQ2S0I+IaJCEfkREgyT0IyIaJKEfEdEgCf2IiAZJ6EdENEhCPyKiQRL6ERENktCPiGiQhH5ERIMk9CMiGiShHxHRIG2HvqSdJS2pPO6XdKKkUyWtrLQfUnnOyZKWS7pF0oHd+QgRETFao5o5azi2bwFmA0iaQmuS80toTY/4edufqe4vaRfgCOB5wDOAn0jayfaadmuIiIix6Vb3zv7AbbbvWsc+hwEX2H7E9h205tDdq0vvHxERo9Ct0D8COL+yfrykpZLmSZpW2qYDd1f2WVHa/oykOZIGJA0MDg52qcSIiOg49CVtChwKfK80nQk8m1bXzyrgs2N9Tdtzbffb7u/r6+u0xIiIKLpxpH8wsNj2agDbq22vsf048DWe6MJZCWxfed6M0hYREeOkG6F/JJWuHUnbVbYdDiwry/OBIyQ9RdKOwCzgl114/4iIGKW2R+8ASHoa8DfAOyrNn5I0GzBw59A22zdIuhC4EXgMOC4jdyIixldHoW/7QeAv1mo7ah37nwac1sl7RkRE+3JFbkREgyT0IyIaJKEfEdEgCf2IiAZJ6EdENEhCPyKiQRL6ERENktCPiGiQhH5ERIMk9CMiGiShHxHRIAn9iIgGSehHRDRIQj8iokES+hERDZLQj4hokG5MjH6npOslLZE0UNq2lrRA0q3l57TSLklnSFouaamkPTp9/4iIGL1uHem/zPZs2/1l/STgStuzgCvLOrQmUZ9VHnOAM7v0/hERMQobqnvnMODcsnwu8JpK+3luWQhstdZE6hERsQF1I/QNXCFpkaQ5pW1b26vK8m+AbcvydODuynNXlLYnkTRH0oCkgcHBwS6UGBER0OHE6MX/tL1S0l8CCyTdXN1o25I8lhe0PReYC9Df3z+m50ZExMg6PtK3vbL8vAe4BNgLWD3UbVN+3lN2XwlsX3n6jNIWERHjoKPQl/Q0SVsOLQMHAMuA+cAxZbdjgEvL8nzg6DKKZ2/gvko3UEREbGCddu9sC1wiaei1vmP7x5KuAy6U9DbgLuANZf/LgEOA5cBDwLEdvn9ERIxBR6Fv+3Zgt2HafwfsP0y7geM6ec+IiGhfrsiNiGiQhH5ERIMk9CMiGiShHxHRIAn9iIgGSehHRDRIQj8iokES+hERDZLQj4hokIR+RESDJPQjIhokoR8R0SAJ/YiIBknoR0Q0SEI/IqJBEvoREQ3SduhL2l7STyXdKOkGSSeU9lMlrZS0pDwOqTznZEnLJd0i6cBufICIiBi9TmbOegx4v+3FZZ7cRZIWlG2ft/2Z6s6SdgGOAJ4HPAP4iaSdbK/poIaIiBiDto/0ba+yvbgsPwDcBExfx1MOAy6w/YjtO2jNk7tXu+8fERFj15U+fUkzgd2BX5Sm4yUtlTRP0rTSNh24u/K0FYzwR0LSHEkDkgYGBwe7UWJERNCF0Je0BXARcKLt+4EzgWcDs4FVwGfH+pq259rut93f19fXaYkREVF0FPqSNqEV+N+2fTGA7dW219h+HPgaT3ThrAS2rzx9RmmLiIhx0snoHQFnAzfZ/lylfbvKbocDy8ryfOAISU+RtCMwC/hlu+8fERFj18nonX2Ao4DrJS0pbR8CjpQ0GzBwJ/AOANs3SLoQuJHWyJ/jMnInImJ8tR36tn8GaJhNl63jOacBp7X7nhER0ZlckRsR0SAJ/YiIBknoR0Q0SEI/IqJBEvoREQ2S0I+IaJCEfkREgyT0IyIaJKEfEdEgCf2IiAZJ6EdENEhCPyKiQRL6ERENktCPiGiQhH5ERIMk9CMiGmTcQ1/SQZJukbRc0knj/f4REU02rqEvaQrwFeBgYBdaUyvuMp41REQ02Xgf6e8FLLd9u+3/Bi4ADhvnGiIiGku2x+/NpNcBB9n++7J+FPBC28evtd8cYE5Z3Rm4ZdyKbM82wG/rLmISyffZXfk+u6sXvs9n2u4bbkPbE6NvSLbnAnPrrmO0JA3Y7q+7jski32d35fvsrl7/Pse7e2clsH1lfUZpi4iIcTDeoX8dMEvSjpI2BY4A5o9zDRERjTWu3Tu2H5N0PHA5MAWYZ/uG8axhA+mZrqgeke+zu/J9dldPf5/jeiI3IiLqlStyIyIaJKEfEdEgCf2IiAZJ6EdEjJKkaZKeX3cdnUjot0nSCZKmquVsSYslHVB3Xb1K0jdH0xajI+lpkjYqyztJOlTSJnXX1YskXVV+17cGFgNfk/S5uutqV0K/fW+1fT9wADANOAo4vd6Setrzqivl5nx71lTLZHANsJmk6cAVtP59nlNrRb3r6eV3/W+B82y/EHhFzTW1LaHfPpWfhwDfLNcbaB37xzAknSzpAeD5ku4vjweAe4BLay6vl8n2Q7SC6qu2X89af1hj1DaWtB3wBuAHdRfTqYR++xZJuoJW6F8uaUvg8Zpr6jm2P2l7S+DTtqeWx5a2/8L2yXXX18Mk6UXA3wE/LG1Taqynl32U1gWly21fJ+lZwK0119S2XJzVptJfOhu43fZ/lf6+GbaX1lxazypdEc+kcqW47Wvqq6h3SXop8H7gWtv/twTVibbfU3NpPUfSPravXV9br0jot0nSPsAS2w9KejOwB/BF23fVXFpPknQ6rXsx3QisKc22fWh9VUWApMW291hfW6+YkLdW7hFnArtJ2o3WEdXXgfOAl9ZaVe86HNjZ9iN1FzIZSFoAvN72f5X1acAFtg+st7LeUbrHXgz0SXpfZdNUerirLH367XvMrf8mHQZ82fZXgC1rrqmX3Q5kSGH39A0FPoDt3wPb1lhPL9oU2ILWwfGWlcf9wOtqrKsjOdJv3wOSTqY1FG7f0sef0GrfQ8ASSVcCfzraTx9029ZI2sH2fwBIeiYZaDAmtq+W9DPg+bY/Wnc93ZLQb98bgTfRGq//G0k7AJ+uuaZeNp/MrdBNHwZ+JulqWkOJ9+WJKUhjlGyvkfSMuuvoppzI7UA5eppl+yeSngpMsf1A3XX1KkmbAzvYnuhzIvcESdsAe5fVhbYn+ryuE5KkM4HpwPeAB4fabV9cW1EdSJ9+myS9Hfhn4J9K03TgX+qrqLdJejWwBPhxWZ8tKUf+YyTpOeXnHsAOwH+Wxw6lLcZuM+B3wMuBV5fHq2qtqAM50m+TpCXAXsAvbO9e2q63/T/qraw3SVpE65fqqsr3ucz2rvVW1lskzbU9R9JPh9ls2y8f96JiQkmffvsesf3fUuvOC5I2BvIXtH2P2r5v6PsscuJxjGwP9dsfbPuP1W2SNquhpJ4naQbwJWCf0vRvwAm2V9RXVfvSvdO+qyV9CNhc0t/Q6u/7fs019bIbJL0JmCJplqQvAf9ed1E9bLjvLt9ne75Ba5DBM8rj+6WtJ6V7p01liObbaN1lU7TuzfF15wttSzkR/mGe/H1+fO2j1Vg3SX9F6/zSt2iNLhv6r9NU4Czbz6mrtl4laYnt2etr6xUJ/YhJRNIxwFuAfmCgsukB4JxeHXFSp3LtyDeA80vTkcCxtvevr6r2JfTbVO69cypP3CBMtE6UPavOunqVpH7gQ8BMnnzDtZ6epagukl5r+6K665gMytDsLwEvKk3XAu8ZuvCt1yT02yTpZuC9wCKeuEEYtn9XW1E9TNItwAeA66mcwM0N7MZG0pttf0vS+xlmYIHtnp3xKbojo3fad5/tH9VdxCQyaDvj8jv3tPJzi1qrmETKbam/SOtCNwM/B95r+/ZaC2tTjvTbVG4FPAW4mCffK2ZxbUX1MEn70+orXfveO+mDjlpJWgh8hSf69I8A3l2mTew5Cf025eKX7pL0LeA5wA080b1j22+tr6reI+mMdW3PDezGTtLStc8tSfq17d3qqqkT6d5pk+2X1V3DJPMC2zvXXcQksKjuAiahH0k6CbiAVvfOG4HLymx52L63zuLGKkf6Y1Q5Ufa+4bbnRFl7JH2D1jy5N9Zdy2QiaQsA23+ou5ZeJemOsjgUltXLxntuxF6O9Mdu6ERZJkzprr1p3U//Dlp9+kNDYDNksw2SdgW+CWzdWtUgcLTtG+qtrHdIegFwt+0dy/oxwGuBO4FTe+0If0iO9GNCKGOh/0yGbLZH0r8DH7b907K+H/AJ2y+utbAeImkx8Arb90p6Ca3unXcDs4Hn2u7J2bNy7502SfqUpKmSNpF0paTBMkF6tKGE+wrgUVr/jR56RHueNhT4ALav4on/pcboTKkczb8RmGv7ItsfAf66xro6ktBv3wG276d1X+07af0j+ECtFfUwSe8GVgMLgB+Wxw9qLaq33S7pI5Jmlsf/pjUPcYzelHL3XID9gX+tbOvZrvGeLXwCGPruXgl8b5jbAsfYnADsnCuau+atwEdpXUdiWrcDzvDXsTmf1t10fws8TOs7RNJfA/fVWVgnEvrt+0G5FcPDwD9I6gNyR8j23U0P/yJNFOWe+e+k9T/P64H323603qp6k+3Tys3WtgOuqNxBdyNaffs9KSdyO1DG6d5XJk9+KjDV9m/qrqsXSTob2JlWt071itwMgR0DSd+ldV7k34CDgTttn1hvVTGR5Ei/TZKOrixXN503/tVMCv9RHpuWR7Rnl6EpO8sf0l/WXE9MMAn99r2gsrwZrRM9i0not8X2R+uuYZL4U1eO7cdyninWlu6dLpG0FXCB7YPqrqWXSPqC7RMlfZ/hbwV8aA1l9SxJa4AHh1aBzYGHeOJit6l11RYTQ470u+dBYMe6i+hB3yw/P1NrFZOE7Sl11xATW0K/TWsdmW4E7AJcWF9Fvcn2ovLz6rpriWiCdO+0SdJLK6uPAXfZXlFXPb1O0izgk7T+eG421N5rN7OKmOhypN+m6pGppG2AXFTUmW8ApwCfB14GHEuuGI/ouvxSjZGkvSVdJeliSbtLWgYsA1ZLyknc9m1u+0pa//u8y/aptK52joguypH+2H0Z+BDwdFr34jjY9kJJz6F12faP6yyuhz0iaSPgVknHAyvJPK8RXZc+/TGStMT27LJ8k+3nVrb9yvbu9VXXu8q9y28CtgI+DkwFPmX7F7UWFjHJpHtn7B6vLD+81rb8BW3fTNt/sL3C9rG2XwvsUHdREZNNjvTHqHLxS/XCF8r6ZrY3qau2XiZpse091tcWEZ1Jn/4Y5eKX7pJ0MHAIMF3SGZVNU2kNhY2ILkroR93+ExgADgUWVdofAN5bS0URk1i6d6J2kqYA37T9prpriZjsciI3amd7DbC9pNxSOWIDS/dOTBR3ANdKms8Td4nMJCoRXZbQj4nitvLYCNiy5loiJq306ceEIumpth9a/54R0Y706ceEIOlFkm4Ebi7ru0n6as1lRUw6Cf2YKL4AHEi5W6ntXwMvqbWiiEkooR8Thu2712paU0shEZNYTuTGRHG3pBcDlrQJcAKtG7BFRBflRG5MCGUimi8Cr6B1H6MrgPfYvrfWwiImmYR+TAiS9rF97fraIqIzCf2YEHKXzYjxkT79qJWkFwEvBvokva+yaSqQO5pGdFlCP+q2Ka1pETfmyVfi3g+8rpaKIiaxdO/EhCDpmbbvqruOiMkuR/oxUTxF0lxgJpV/l7ZfXltFEZNQjvRjQpD0a+AsWhOp/OmiLNuLRnxSRIxZQj8mBEmLbO9Zdx0Rk11CPyYESacC9wCXAI8MtefirIjuSujHhCDpjmGabftZ415MxCSW0I+IaJDcZTNqJemDleXXr7XtE+NfUcTkltCPuh1RWT55rW0HjWchEU2Q0I+6aYTl4dYjokMJ/aibR1gebj0iOpQTuVErSWuAB2kd1W8ODE2KLmAz25vUVVvEZJTQj4hokHTvREQ0SEI/IqJBEvrRCJL+StIFkm6TtEjSZZJ2GmHfrSS9a7xrjBgPCf2Y9CSJ1j19rrL97HJjt5OBbUd4ylbABg99Sbm1eYy7hH40wcuAR22fNdRg+9fAryRdKWmxpOslHVY2nw48W9ISSZ8GkPQBSddJWirpo0OvI+kjkm6R9DNJ50v6X6V9tqSFZf9LJE0r7VdJ+oKkAeDDku6QtEnZNrW6HrEh5EgjmmBXWvfpX9sfgcNt3y9pG2ChpPnAScCutmcDSDoAmAXsRWso6XxJLwEeBl4L7AZsAiyuvM95wLttXy3pY8ApwIll26a2+8trzwReCfwLrauTL7b9aBc/e8STJPSjyQR8ogT448B0hu/yOaA8flXWt6D1R2BL4FLbfwT+KOn7AJKeDmxl++qy/7nA9yqv993K8teBD9IK/WOBt3fhc0WMKKEfTXADw0+y/ndAH7Cn7Ucl3QlsNsx+Aj5p+5+e1CidOMy+o/Hg0ILtayXNlLQfMMX2sjZfM2JU0qcfTfCvtObgnTPUIOn5wDOBe0rgv6ysAzxA6yh+yOXAWyVtUZ47XdJfAtcCr5a0Wdn2KgDb9wG/l7Rvef5RwNWM7DzgO8A3OvycEeuVI/2Y9Gxb0uHAFyT9I62+/DuBU4EzJF0PDAA3l/1/J+laScuAH9n+gKTnAj9vDQTiD8CbbV9XzgEsBVYD1wP3lbc9BjhL0lOB22l13Yzk28D/Ac7v4seOGFZuwxDRAUlb2P5DCfdrgDm2F4/xNV4HHGb7qA1SZERFjvQjOjNX0i60zgWc20bgfwk4GDhkQxQXsbYc6UdENEhO5EZENEhCPyKiQRL6ERENktCPiGiQhH5ERIP8f6Q+5HDmGfiMAAAAAElFTkSuQmCC\n", 162 | "text/plain": [ 163 | "
" 164 | ] 165 | }, 166 | "metadata": { 167 | "needs_background": "light" 168 | } 169 | } 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "source": [ 175 | "stopwordslist = []\n", 176 | "slist = []\n", 177 | "with open(\"/content/stopword.txt\", encoding = 'utf8') as stopwordsfile:\n", 178 | " stopwords = stopwordsfile.readlines()\n", 179 | " slist.extend(stopwords)\n", 180 | " for w in range(len(slist)):\n", 181 | " temp = slist[w]\n", 182 | " stopwordslist.append(temp.rstrip())\n" 183 | ], 184 | "metadata": { 185 | "id": "EeNVew5TZN2X" 186 | }, 187 | "execution_count": 6, 188 | "outputs": [] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "source": [ 193 | "!pip install pyidaungsu" 194 | ], 195 | "metadata": { 196 | "id": "1pjG3DQYUOS7" 197 | }, 198 | "execution_count": null, 199 | "outputs": [] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "source": [ 204 | "import pyidaungsu as pds\n", 205 | "import re\n", 206 | "CleanPattern = re.compile(r'\\d+|[၊။!-/:-@[-`{-~\\t ]|[A-za-z0-9]')\n", 207 | "def clean_sentence(sentence):\n", 208 | " sentence = sentence.replace(\"_\",\" \")\n", 209 | " sent = CleanPattern.sub(\" \",sentence)\n", 210 | " return sent\n", 211 | "\n", 212 | "def stop_word(sentence):\n", 213 | " new_sentence = []\n", 214 | " for word in sentence.split():\n", 215 | " if word not in stopwordslist:\n", 216 | " new_sentence.append(word)\n", 217 | " return(' '.join(new_sentence))\n", 218 | "\n", 219 | "def tokenize(line):\n", 220 | " line = clean_sentence(line)\n", 221 | " sentence = pds.tokenize(line,form=\"word\")\n", 222 | " sentence = ' '.join([str(elem) for elem in sentence])\n", 223 | " sentence = stop_word(sentence)\n", 224 | " return sentence\n", 225 | " \n", 226 | "data['News'] = data['News'].apply(tokenize)\n", 227 | "data.head(10)" 228 | ], 229 | "metadata": { 230 | "colab": { 231 | "base_uri": "https://localhost:8080/", 232 | "height": 363 233 | }, 234 | "id": "BeMZFKPtYSo7", 235 | "outputId": "28acc76b-369e-4e09-a8b0-bb0f2c383843" 236 | }, 237 | "execution_count": 7, 238 | "outputs": [ 239 | { 240 | "output_type": "execute_result", 241 | "data": { 242 | "text/html": [ 243 | "\n", 244 | "
\n", 245 | "
\n", 246 | "
\n", 247 | "\n", 260 | "\n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | "
IndexNewsCategorycategory_id
00ဒီမိုကရေစီ အမျှ ဝေ မော်ဦး တောင်းဆို အရေးတော်ပု...Politics0
11ဒီမိုကရေစီ အရေး လူ့ အခွင့် အရေး တောင်းဆို ဆန္ဒ...Politics0
22ခုနှစ် အထွေထွေ ရွေးကောက်ပွဲ အမျိုးသား ဒီမိုကရေ...Politics0
33စက်တင်ဘာ မဝင်း မော်ဦး ကျဆုံး ပြည့် ကျဆုံး ပတ်လ...Politics0
44တပ်မတော် ကာကွယ် ဦးစီးချုပ် ဗိုလ်ချုပ်မှူးကြီး ...Politics0
55တွေ့ဆုံ မြန်မာ နိုင်ငံ ငြိမ်းချမ်း ရာစုပင်လုံ ...Politics0
66ယနေ့ ခရီးစဉ် မြန်မာ နိုင်ငံ အမေရိကန် နိုင်ငံခြ...Politics0
77အလုပ်သမား နေပြည်တော် ဆက်လက်ချီတက်Politics0
88မန္တလေး ဒေသ ကြီးစဉ့် ကိုင် မြို့ဖါးလင်ပိုး ကျေ...Politics0
99ခုံသမာဓိကောင်စီ ကြားနာ စစ်ဆေး ခံယူ အလုပ်သမား လ...Politics0
\n", 343 | "
\n", 344 | " \n", 354 | " \n", 355 | " \n", 392 | "\n", 393 | " \n", 417 | "
\n", 418 | "
\n", 419 | " " 420 | ], 421 | "text/plain": [ 422 | " Index ... category_id\n", 423 | "0 0 ... 0\n", 424 | "1 1 ... 0\n", 425 | "2 2 ... 0\n", 426 | "3 3 ... 0\n", 427 | "4 4 ... 0\n", 428 | "5 5 ... 0\n", 429 | "6 6 ... 0\n", 430 | "7 7 ... 0\n", 431 | "8 8 ... 0\n", 432 | "9 9 ... 0\n", 433 | "\n", 434 | "[10 rows x 4 columns]" 435 | ] 436 | }, 437 | "metadata": {}, 438 | "execution_count": 7 439 | } 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 8, 445 | "metadata": { 446 | "id": "fcc99695" 447 | }, 448 | "outputs": [], 449 | "source": [ 450 | "data = data[[\"News\", \"Category\"]]\n", 451 | "\n", 452 | "x = np.array(data[\"News\"])\n", 453 | "y = np.array(data[\"Category\"])" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "source": [ 459 | "def tokenize(line):\n", 460 | " sentence = pds.tokenize(line,form=\"word\")\n", 461 | " return sentence\n", 462 | "\n", 463 | "vectorizer = TfidfVectorizer(tokenizer=tokenize,ngram_range=(1,2))\n", 464 | "X = vectorizer.fit_transform(x)" 465 | ], 466 | "metadata": { 467 | "id": "v0tB4xSlfeGN" 468 | }, 469 | "execution_count": 9, 470 | "outputs": [] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "source": [ 475 | "X.shape" 476 | ], 477 | "metadata": { 478 | "colab": { 479 | "base_uri": "https://localhost:8080/" 480 | }, 481 | "id": "5gzGmt11ZYup", 482 | "outputId": "827a0bf1-92ab-4de3-eb36-d6231c1d6a87" 483 | }, 484 | "execution_count": 10, 485 | "outputs": [ 486 | { 487 | "output_type": "execute_result", 488 | "data": { 489 | "text/plain": [ 490 | "(8115, 20178)" 491 | ] 492 | }, 493 | "metadata": {}, 494 | "execution_count": 10 495 | } 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "source": [ 501 | "import pickle\n", 502 | "pickle.dump(vectorizer, open(\"vectorizer.pickle\", \"wb\"))" 503 | ], 504 | "metadata": { 505 | "id": "3c3VEefWcubb" 506 | }, 507 | "execution_count": 11, 508 | "outputs": [] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "source": [ 513 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)" 514 | ], 515 | "metadata": { 516 | "id": "ASJopf6UXlYx" 517 | }, 518 | "execution_count": 16, 519 | "outputs": [] 520 | }, 521 | { 522 | "cell_type": "markdown", 523 | "source": [ 524 | "### Naive Bayes " 525 | ], 526 | "metadata": { 527 | "id": "B2IxNcGx0KA7" 528 | } 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": 17, 533 | "metadata": { 534 | "id": "290c3389", 535 | "outputId": "90274a87-64b3-46a4-977a-ae617e9989a1", 536 | "colab": { 537 | "base_uri": "https://localhost:8080/" 538 | } 539 | }, 540 | "outputs": [ 541 | { 542 | "output_type": "execute_result", 543 | "data": { 544 | "text/plain": [ 545 | "MultinomialNB()" 546 | ] 547 | }, 548 | "metadata": {}, 549 | "execution_count": 17 550 | } 551 | ], 552 | "source": [ 553 | "from sklearn.naive_bayes import MultinomialNB\n", 554 | "\n", 555 | "model = MultinomialNB()\n", 556 | "model.fit(X_train,y_train)" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "source": [ 562 | "# predict\n", 563 | "y_pred = model.predict(X_test)" 564 | ], 565 | "metadata": { 566 | "id": "2quuFvRuR98-" 567 | }, 568 | "execution_count": 18, 569 | "outputs": [] 570 | }, 571 | { 572 | "cell_type": "code", 573 | "source": [ 574 | "# confusion matrix and accuracy\n", 575 | "\n", 576 | "from sklearn import metrics\n", 577 | "from sklearn.metrics import classification_report \n", 578 | "\n", 579 | "print(f\"Classification report for classifier {model}:\\n\"\n", 580 | " f\"{metrics.classification_report(y_test, y_pred)}\\n\")" 581 | ], 582 | "metadata": { 583 | "colab": { 584 | "base_uri": "https://localhost:8080/" 585 | }, 586 | "id": "UVLDJrpXRSsQ", 587 | "outputId": "e3287fd3-fdfe-489f-a710-5be057e71963" 588 | }, 589 | "execution_count": 19, 590 | "outputs": [ 591 | { 592 | "output_type": "stream", 593 | "name": "stdout", 594 | "text": [ 595 | "Classification report for classifier MultinomialNB():\n", 596 | " precision recall f1-score support\n", 597 | "\n", 598 | " Business 0.78 0.91 0.84 412\n", 599 | "Entertainment 0.89 0.80 0.84 405\n", 600 | " Politics 0.82 0.86 0.84 386\n", 601 | " Sports 0.93 0.83 0.88 420\n", 602 | "\n", 603 | " accuracy 0.85 1623\n", 604 | " macro avg 0.86 0.85 0.85 1623\n", 605 | " weighted avg 0.86 0.85 0.85 1623\n", 606 | "\n", 607 | "\n" 608 | ] 609 | } 610 | ] 611 | }, 612 | { 613 | "cell_type": "markdown", 614 | "source": [ 615 | "### Linear SVM" 616 | ], 617 | "metadata": { 618 | "id": "sHY9s9gh0maj" 619 | } 620 | }, 621 | { 622 | "cell_type": "code", 623 | "source": [ 624 | "# Import classifiers and performance metrics\n", 625 | "from sklearn import svm, metrics\n", 626 | "\n", 627 | "# linear kernel model\n", 628 | "\n", 629 | "svm_model = svm.SVC(kernel='linear')\n", 630 | "svm_model.fit(X_train, y_train)\n", 631 | "\n", 632 | "# predict\n", 633 | "y_pred = svm_model.predict(X_test)" 634 | ], 635 | "metadata": { 636 | "id": "PqfOxTQU9vIc" 637 | }, 638 | "execution_count": 20, 639 | "outputs": [] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "source": [ 644 | "# confusion matrix and accuracy\n", 645 | "\n", 646 | "from sklearn import metrics\n", 647 | "from sklearn.metrics import classification_report \n", 648 | "\n", 649 | "print(f\"Classification report for classifier {svm_model}:\\n\"\n", 650 | " f\"{metrics.classification_report(y_test, y_pred)}\\n\")" 651 | ], 652 | "metadata": { 653 | "colab": { 654 | "base_uri": "https://localhost:8080/" 655 | }, 656 | "id": "ysrSrhlY-vjG", 657 | "outputId": "9ec74602-83ef-48d4-e6c5-7badca2169bc" 658 | }, 659 | "execution_count": 21, 660 | "outputs": [ 661 | { 662 | "output_type": "stream", 663 | "name": "stdout", 664 | "text": [ 665 | "Classification report for classifier SVC(kernel='linear'):\n", 666 | " precision recall f1-score support\n", 667 | "\n", 668 | " Business 0.90 0.87 0.89 412\n", 669 | "Entertainment 0.77 0.92 0.84 405\n", 670 | " Politics 0.87 0.83 0.85 386\n", 671 | " Sports 0.91 0.81 0.86 420\n", 672 | "\n", 673 | " accuracy 0.86 1623\n", 674 | " macro avg 0.86 0.86 0.86 1623\n", 675 | " weighted avg 0.86 0.86 0.86 1623\n", 676 | "\n", 677 | "\n" 678 | ] 679 | } 680 | ] 681 | }, 682 | { 683 | "cell_type": "markdown", 684 | "source": [ 685 | "### Random Forest " 686 | ], 687 | "metadata": { 688 | "id": "KaZwIKyk1eUo" 689 | } 690 | }, 691 | { 692 | "cell_type": "code", 693 | "source": [ 694 | "from sklearn.ensemble import RandomForestClassifier\n", 695 | "\n", 696 | "# Random forest classifier\n", 697 | "rf_model = RandomForestClassifier(n_estimators=100, n_jobs=1)\n", 698 | "rf_model.fit(X_train,y_train)\n", 699 | "\n", 700 | "# predict\n", 701 | "y_pred = rf_model.predict(X_test)" 702 | ], 703 | "metadata": { 704 | "id": "blkLzOMLFpL9" 705 | }, 706 | "execution_count": 22, 707 | "outputs": [] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "source": [ 712 | "# confusion matrix and accuracy\n", 713 | "\n", 714 | "from sklearn import metrics\n", 715 | "from sklearn.metrics import classification_report \n", 716 | "\n", 717 | "print(f\"Classification report for classifier {rf_model}:\\n\"\n", 718 | " f\"{metrics.classification_report(y_test, y_pred)}\\n\")" 719 | ], 720 | "metadata": { 721 | "colab": { 722 | "base_uri": "https://localhost:8080/" 723 | }, 724 | "id": "Lw9sJzlxHWBD", 725 | "outputId": "9ccd627b-63c1-47af-f547-577407106c14" 726 | }, 727 | "execution_count": 23, 728 | "outputs": [ 729 | { 730 | "output_type": "stream", 731 | "name": "stdout", 732 | "text": [ 733 | "Classification report for classifier RandomForestClassifier(n_jobs=1):\n", 734 | " precision recall f1-score support\n", 735 | "\n", 736 | " Business 0.87 0.79 0.83 412\n", 737 | "Entertainment 0.72 0.89 0.80 405\n", 738 | " Politics 0.79 0.80 0.79 386\n", 739 | " Sports 0.90 0.77 0.83 420\n", 740 | "\n", 741 | " accuracy 0.81 1623\n", 742 | " macro avg 0.82 0.81 0.81 1623\n", 743 | " weighted avg 0.82 0.81 0.81 1623\n", 744 | "\n", 745 | "\n" 746 | ] 747 | } 748 | ] 749 | }, 750 | { 751 | "cell_type": "markdown", 752 | "source": [ 753 | "### Decision Tree" 754 | ], 755 | "metadata": { 756 | "id": "-JLcmb_l1pu8" 757 | } 758 | }, 759 | { 760 | "cell_type": "code", 761 | "source": [ 762 | "from sklearn.tree import DecisionTreeClassifier\n", 763 | "\n", 764 | "dt_model = DecisionTreeClassifier(random_state=0)\n", 765 | "dt_model.fit(X_train,y_train)\n", 766 | "\n", 767 | "# predict\n", 768 | "y_pred = dt_model.predict(X_test)" 769 | ], 770 | "metadata": { 771 | "id": "j48GgUZqHbBF" 772 | }, 773 | "execution_count": 24, 774 | "outputs": [] 775 | }, 776 | { 777 | "cell_type": "code", 778 | "source": [ 779 | "# confusion matrix and accuracy\n", 780 | "\n", 781 | "from sklearn import metrics\n", 782 | "from sklearn.metrics import classification_report \n", 783 | "\n", 784 | "print(f\"Classification report for classifier {dt_model}:\\n\"\n", 785 | " f\"{metrics.classification_report(y_test, y_pred)}\\n\")" 786 | ], 787 | "metadata": { 788 | "colab": { 789 | "base_uri": "https://localhost:8080/" 790 | }, 791 | "id": "hwrlX85QH2Ig", 792 | "outputId": "3150e0c9-cf92-4f18-d059-7b8363b0ad4c" 793 | }, 794 | "execution_count": 25, 795 | "outputs": [ 796 | { 797 | "output_type": "stream", 798 | "name": "stdout", 799 | "text": [ 800 | "Classification report for classifier DecisionTreeClassifier(random_state=0):\n", 801 | " precision recall f1-score support\n", 802 | "\n", 803 | " Business 0.76 0.72 0.74 412\n", 804 | "Entertainment 0.66 0.82 0.73 405\n", 805 | " Politics 0.75 0.70 0.73 386\n", 806 | " Sports 0.81 0.69 0.75 420\n", 807 | "\n", 808 | " accuracy 0.74 1623\n", 809 | " macro avg 0.74 0.74 0.74 1623\n", 810 | " weighted avg 0.74 0.74 0.74 1623\n", 811 | "\n", 812 | "\n" 813 | ] 814 | } 815 | ] 816 | }, 817 | { 818 | "cell_type": "markdown", 819 | "source": [ 820 | "### KNN" 821 | ], 822 | "metadata": { 823 | "id": "-Jd2eK2u1iRS" 824 | } 825 | }, 826 | { 827 | "cell_type": "code", 828 | "source": [ 829 | "from sklearn.neighbors import KNeighborsClassifier\n", 830 | "error = []\n", 831 | "best_k = dict()\n", 832 | "\n", 833 | "# Calculating error for K values between 1 and 20\n", 834 | "for i in range(1, 20):\n", 835 | " knn = KNeighborsClassifier(n_neighbors=i, n_jobs=-1)\n", 836 | " knn.fit(X_train, y_train)\n", 837 | " pred_i = knn.predict(X_test)\n", 838 | " error.append(np.mean(pred_i != y_test))\n", 839 | " best_k[i] = np.mean(pred_i != y_test)\n", 840 | " \n", 841 | "best_k = sorted(best_k.items(), key=lambda k: k[1])[0][0]\n", 842 | "knn_classifier = KNeighborsClassifier(n_neighbors=best_k, n_jobs=-1)\n", 843 | "knn_classifier.fit(X_train, y_train)\n", 844 | "y_pred = knn_classifier.predict(X_test)" 845 | ], 846 | "metadata": { 847 | "id": "w6tofWVrIQqz" 848 | }, 849 | "execution_count": 26, 850 | "outputs": [] 851 | }, 852 | { 853 | "cell_type": "code", 854 | "source": [ 855 | "import matplotlib.pyplot as plt\n", 856 | "\n", 857 | "plt.figure(figsize=(12, 6))\n", 858 | "plt.plot(range(1, 20), error, color='red', linestyle='dashdot', marker='o',markerfacecolor='green', markersize=10)\n", 859 | "plt.title('Error Rate K Value')\n", 860 | "plt.xlabel('K Value')\n", 861 | "plt.ylabel('Mean Error')\n", 862 | "plt.show()" 863 | ], 864 | "metadata": { 865 | "colab": { 866 | "base_uri": "https://localhost:8080/", 867 | "height": 404 868 | }, 869 | "id": "-nyJcsNVInu1", 870 | "outputId": "9d3417bf-2723-4382-ccb1-dc7a3f59422a" 871 | }, 872 | "execution_count": 27, 873 | "outputs": [ 874 | { 875 | "output_type": "display_data", 876 | "data": { 877 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtcAAAGDCAYAAADgeTwhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeXyU1dn/8c+VEIJJCMjiDiibG0KUCFhBi1v1cWuVhwoGl6dCgRortVJ5rGvdWrQqgrjwUBcWa+NuQSs/asUlCCiQ4oIGyuJS2ZeQhJCc3x9nKAGTySTMzD2TfN+v1/2amXvOfd9X6Otlr5xc5zrmnENERERERPZfStABiIiIiIg0FkquRURERESiRMm1iIiIiEiUKLkWEREREYkSJdciIiIiIlGi5FpEREREJEqUXIuISEIxs7fN7Jqg4xARaQgl1yIiETCzf5lZqZltr3ZMjHMMb5tZWejZ683sRTM7NMJrf2hma/fj2Xtdb2bNQ89/z8yy9xl7k5m9U8M92pnZTjPr0dA4REQSnZJrEZHIXeicy6p2XFvTIDNrVsO51Po8KMz4a51zWUBXIAu4vz73jQYzSwdeBFoD5zjntu4zZBrwAzM7ap/zlwFFzrl/xiFMEZFAKLkWEdlPZnZVaAb3QTPbANxuZk+Z2WQzm2VmJcBAMzs2NPu82cyWmdlF1e7xvfHhnumc2wy8DORUu8fVZvapmW0zsxVm9vPQ+UxgNnBYtVn3w8wsJTTLXGxmG8zseTNrU8fPmgG8BjQDznfOldQQ21pgLjBsn6+uAJ4xswPN7HUzW2dmm0Lvj6jlebeb2bRqn480M7f7Fxgza2Vm/2dm35jZV2Z2V31/kRERiSYl1yIi0dEXWAEcDNwdOjc09L4lMB+flP4NOAjIB6ab2dHV7lF9/LvhHmZmbYFLgC+rnf4OuADIBq4GHjSzk0IJ8HnA19Vm3b8OxfBj4HTgMGATMCnMY9PxSXoZcLFzrjTM2KepllyHfs4cYAb+/3v+BHQCOgKlQENLbJ4CduFn8k8EzgFUry0igVFyLSISuZdDs867j+HVvvvaOfeIc25XtaTzFefce865KnximQXc55zb6ZybC7wODKl2j/+Md86V1RLDBDPbAqwH2uETZACcc391zhU77x/4RH5AmJ9nJHCzc26tc64cuB0YVFNZS0hL4BTg6dD4cF4CDjazH4Q+XwHMds6tc85tcM694Jzb4Zzbhv+F4vQ67vc9ZnYw8F/A9c65Eufcd8CD+PITEZFAKLkWEYncj51zrasdT1b7bk0N46ufOwxYE0q0d1sFHF7HPfZ1nXOuFdATOBD4TzmFmZ1nZoVmttHMNuMTz3Zh7tUJeGn3LwvAp0Alfva9JuvxievTZvajcEE653YAfwGuMDMDLgeeCcWZYWaPm9kqM9sKvAO0bkA5RycgDfim2s/wOP4vAyIigVByLSISHa6Oc18DHcys+n93OwJf1XGPmh/mXBFwFzDJvHTgBfwCx4Odc62BWYCFufca4Lx9fmFo4Zz7qoaxu5/7IjAcKDCzsHXh+NKQwcDZ+Fnv10LnbwCOBvo657KB00Ln7Xt3gBIgo9rnQ/aJvxxoVy3+bOfc8XXEJSISM0quRUTiYz6wAxhrZmlm9kPgQuC5/bjn0/hZ5ouA5via6HXALjM7D19/vNu/gbZm1qrauceAu82sE4CZtTezi+t6qHNuJnAt8IqZnRpm6DxgM/AE8JxzbmfofEt8nfXm0ALK28LcYzFwmpl1DMU+rloc3+BLXx4ws+zQAs0uZlbvEhMRkWhRci0iErnXbO8+1y9FemEosbwQv7BwPfAocIVz7rOGBhO658PALaHa5euA5/ELE4cCr1Yb+xkwE1gRKqE4LHTtq8DfzGwbUIhfmBnJs5/Gz0D/1cz61DLG4UtBOoVed3sIOAD/71AIvBHmOW8BfwaWAovwderVXYH/xeKT0M9dAETU+1tEJBbM/7dPRERERET2l2auRURERESiRMm1iIiIiEiUKLkWEREREYkSJdciIiIiIlGi5FpEREREJEpq2+I26bRr184deeSRQYchIiIiIo3cokWL1jvn2tf0XaNJro888kgWLlwYdBgiIiIi0siZ2aravlNZiIiIiIhIlCi5FhERERGJEiXXIiIiIiJRouRaRERERCRKlFyLiIiIiESJkmsRERERkShRci0iIiIiEiVKrkVEREQkeRQXU54/mtK22VSlplDaNpvy/NFQXBx0ZICSaxERERFJFrNnU9K7JxOKptAjbxvNb3b0yNvGhKIplPTuCbNnBx0h5pwLOoaoyM3NddqhUURERKSRKi6mpHdPzhq0g8IO3/+63xqYU5BB5qKl0KVLTEMxs0XOudyavtPMtYiIiIgkvPKHHuDRnIoaE2uAwg4wuVcF5RMejG9g+1ByLSIiIiIJr2rGNB7rVRF2zOScCiqnPxuniGqm5FpEREREEl765u2sahV+zOpW0GLz9vgEVAsl1yIiIiKS2JYto7xFMzptCT+s4xYoa50Vn5hqoeRaRERERBLXW29Bjx6k7Kpi5CILO3TU4jRSLx8Wp8BqpuRaRERERBKHc3D//TBliv98+unwwAOkv/sBo/95AP3W1HxZvzUwakka6deNiV+sNVByLSIiIiLBW7vWv5r5ftVvv+0/N28Ov/oVnHwymTMLmFOQwfi5aXTeCM0qofNGGD83zbfhm1kQ8zZ8dVGfaxEREREJxq5d8NprMGECvPsurFoFhx0GO3ZARkbN1xQXUz7hQSqnP0uLzdspa51F6uXD/Ix1nBLrcH2um8UlAhERERGR3TZu9GUfkybB6tXQsSPcfTdkZvrva0usAbp0If3hifDwRD80DuHWh5JrEREREYmPpUvhkUdg+nQoLYWBA+Ghh+DCC6FZ40hLG8dPISIiIiKJq7ISzjvPd/444AAYNgyuvRZOOCHoyKJOybWIiIiIRN/69fDGG5CXB6mp0KsXnH02/Oxn0KZN0NHFjLqFiIiIiEjdiospzx9NadtsqlJTKG2bTXn+aCgu3nvc7mYZU6f6GeoVK/zn8ePhxhsbdWINSq5FREREpC6zZ1PSuycTiqbQI28bzW929MjbxoSiKZT07gmvvw4FBXDaaTBjhr9m+HAoKoLOnYONPc5UFiIiIiIitSsupmTIIM4atIPCDntOr2gDYwdW8GLXCuZcehGZOx0cdZQvAQE48EB/NDFKrkVERESkVuUPPcCjORV7JdbVFXaAybmQ3/a/SH/p1T3JdROlshARERERqVXVjGk81qsi7JjJuY7K999t8ok1KLkWERERkeo2boTt2/37WbNI37iNVa3CX7K6FbTYvD32sSUBJdciIiIiTVV5OXz4od/YJS8PunWDtm3h5Zf990cdRfkBaXTaEv42HbdAWeus2MebBJRci4iIiCSySFvgRWLHDr874nXXQd++kJ3tX6+7DubOhR494J57IDfXjz/2WFJ+dg0jl6SFve2oxWmkXj6sAT9c42Nudy/CJJebm+sWLlwYdBgiIiIi0TN7NiVDBvFoTgWP9apgVSvotAVGLklj9OI0MmcW+J0Pw7n7bujY0fec3r4dWrXyuyTm5kKfPj657tsXDj8czL5/fXExJb17fq9byG791sCcggwyFy2FLl2i83MnODNb5JzLrfE7JdciIiIiNSgupvyhB6iaMY30zdspb51FytA80q+/IT5JZH2S2sMPh8WLYf58fzRvDk895Qfm5sJJJ8ETT/jPn3/u429Wj6ZxoSR/cq8KJudUsLqVLwUZtTiNUUsiTPIbkXDJtcpCREREEkE0//Qv+6+uTVNmz455CBG1wOtRSnn/U3x5xymnwPXXwz/+sWeXRIDCwj2JNcDRR9cvsQY47zwyFy0lP2cERdOzKb8nhaLp2eTnjPDJfRNKrOuimWsREZGgReNP/xI98SiD2LULSkrCHqXXjabHsG2sCLNbeOeNUPREKhnX3bB3eYfElMpCREREEpXqWRNOef5oJhRNYezA2ns7j5+TSv4Bp5N+8SW1J8i33gq9esGbb/oFg6+/7rtxPPywn2GuQ5UZzX/rqAzTOrpZJZTfk0LKrsqG/KjSQOGSa+3QKCIiEqCI/vTfq4L8CQ+S/vDE+AYXlHjXOpeUwMqV8K9/wcqVVE39Px67po5NU06qZPSjc+GtuXtOHnAAZGbuObZu9edbt4YTT4S0UMeNU06BO+/ce2z1IyMDMjMpPyWXTlvCz1zvboGXsX//AhJFmrkWEREJUGnbbHrkRfCn/+nZZKyvo9lwYxCLEpmqKl+7npkJhx0G33wDY8b4hHrlSli3bu/hQPNbqHvG+G4j5etv9iTEKdFdyhbRDPrcNPJzRjSdX7wShMpCREREEs2OHZCRQVVqCs1vjuBP/3cbKZVV8YsvCA0tkamshK++2pMs7z5OOsmXX+zaBS1awLhx8Lvf+R0I+/SBo47acxx55H/elx7XLTF+4VHJUMJSWYiIiEjQtmzx/YUBBg70pQIvvUR566zI/vSf1cL/6X/DBr+r3mGHxSPquKpXiYxLgU8/9Un06tVQUW1218wv6jvkEP+5WTOYMQNOOMF/btMGvvyy1jhShuYxckn4GeO4bJrSpQuZMwuYU1cLPCXWCUUz1yIiIrGwbh28/bbf9W7uXNi8Gb791id+jz0G6elw9dWR/en//6WSf9zVpD/2JPzhD3DTTb604eCDYdMmn7RHuSQhCPUqkTnmBJ9QV5993n107Oj7PDdUos0YFxdTPuFBKqc/S4vN2ylrnUXq5cNIv26MEuuABFYWYmbnAg8DqcAU59x9+3z/IDAw9DEDOMg519rMcoDJQDZQCdztnPtzuGcpuRYRkUBt3gzvvLMnmS4q8udbtoTTToMzzoBf/MIn1dXVN5H74guftA8f7gf8+Mfw4YdwwQVw4YVw5pm+/jcZOOdnkEMbn1RNmkjz30ZQ6xyP7hjaNEXCCCS5NrNUYDlwNrAWWAAMcc59Usv4fOBE59z/mFl3wDnnvjCzw4BFwLHOuc21PU/JtYiINEhDO1Ns3+67P6Snw5NPwsiRfuHcAQfAqaf6ZPqMM6B377o37NifRO7Pf4aCAt/ubds2X1t85pk+2b7gAjjiiIb/20Tb+vXQtq2fvb//frjnHj/zDpCZSemuMnr8vDL4WufdNGMstQhqh8Y+wJfOuRXOuZ3Ac8DFYcYPAWYCOOeWO+e+CL3/GvgOaB/DWEVEpCmqzy58ZWV7EsGFC+HAA+Gtt/znvn3hllv8znibNvnz48b585HshLc/u9/99Kfwl7/4xPWtt2DECPjkExg1Cjp08Iv6nnkm8n+TaO0UWVYGH3zga8QBnn0W2rf3NdIAnTrBoEEwZQosXQpbtpAyfAQjl6SFvW1cap1369KF9IcnkrF+Cym7KslYv8V35VBiLWHEcuZ6EHCuc+6a0OdhQF/n3LU1jO0EFAJHOOcq9/muD/A0cLxzrmqf70YAIwA6duzYe9WqVTH5WUREpBGKpBzj+RZkXjMKFi+G99/37dvuvdcnjnfeCVdcAcccE//Y6+KcX+z32mt+45LBgyE/3ye6v/kN/OpXcNxx37+uoW3wqqp8uUqovIP582HJEt+l49lnIS/PJ+cvvABXXulrxWuSaLXOIrUIqiykPsn1b/CJdf4+5w8F3gaudM4VhnueykJERKQ+IlpI+CbkfwjpJ+T4Dh+XXAL9+8cxyih75x1flz1nDpx8MixY4GfhL7gAdu6sX2K7cSM8+KBPpBcs8DXnAFlZ/t59+viZ+wEDoF27yGNUrbMkgaCS61OA251zPwp9HgfgnLu3hrEfA79wzr1f7Vw2PrG+xzlXUNfzlFyLiEh9RNyZYlpLMjZsjV9gsbZzpy9VSUmBm2/2dc9A+UFtmdB9E2PPqr2X9vi3jPxmp5L+9jxfc96mjZ8B79vXH336wLHHQmqYFYmRUK2zJLigkutm+AWNZwJf4Rc0DnXOLdtn3DHAG8BRLhSMmTUHZgOvOeceiuR5Sq5FRKQ+It68JR6dKYLiHHz+Obz2GqW3jItsMeHUdDK2lfkTpaV+AadIExPIgkbn3C7gWuBN4FPgeefcMjO708wuqjb0MuA5t3eWPxg4DbjKzBaHjpxYxSoiIk1PedYBdKqj4UTHLVDWOis+AQXBzNeM33gj6RVVrGoVfvjqVtCitFoZjRJrke+J6Q6NzrlZwKx9zt26z+fba7huGjAtlrGJiEgTtG6d7z99xhmkDLuCkR8/ydgza5+VjmtnioBFvFNk6yySpIu2SCCSfzsnERGpn2i1WksmpaVw332+XvenP4WyMtLH/JrRS9Ppt6bmS/qtgVFL0nydbxPgt/xOoDZ4IklKybWISFNSn77OjUFVFUyf7ksfxo2DH/7Qd8xo0QK6dCFzZgFzCjIYPzeNzht9jXXnjTB+bprvjDGzoMksoEu//gZGL07TLxsi+ymm25/HkxY0iojUoan1EH7nHbjhBt9q7qST/I6AAwd+f5w6U+yhNngiEQmkW0i8KbkWEQkvor7Oc9PIzxnhd6FLVsuX+41SXn7Zb/19zz1w+eW+9ZzUTb9siNRJybWIiETe13l6Nhnr62ijkcjOOMNvajJunN9RUR0tRCTKwiXXMe0WIiIiiSN98/bIWq1t3h6fgKKlrAweecRvsX3oofD445CdXfsW2yIiMaS/kYmINAXTplGelhJZX+dWWXD77bByZVxC229ffeV3GvzLX/znbt2UWItIYJRci4g0Rhs3wtNP+24ZAB9+SErLbEZ+FP4/+6MWp5E68Az43e/2tOb7/HN49VUoKYlx0PXwzju+rhp8HfCnn8J11wUbk4gISq5FRBqPrVthS2hq+s034aqrYP58/3n8eNILFzC6qEXdrdZ+fz/8+99w+un+i2efhYsvhnbt4Pzz4bHHYO3amP84NVq+HH7yEx/bjBmwfr0/r4V2IpIglFyLiCSzkhL485/hkkvgoIPgiSf8+YsugkWLoF8//zk9vX59ndu1g7TQhiK33gpz5sDPfw6ffQajRkGHDnDiif67Dz/cM0MeK+vX+5np44/3sdx9t59Rb9cuts8VEakndQsREUk2ZWXwxhvw3HPw2muwY4dfyDd4MFx9NfTqFf76/Wm15pxPsF97zR/vv+8T69xc36EDoLISUlOj97M+8ohPprdtgxEjfD24aqpFJEBqxSci0hjMmeNLNF5+2ZeAtGsHgwb57bwHDIheQlsfGzb4XR3LyuCaa3yi3bWrf/+//xv+2uJiyh96gKoZ00jfvJ3y1lmkDM0j/fobfJL/wgvw61/Dv/7ly1H+8Ac47ri4/FgiIuGES65VFiIiEi/FxZTnj6a0bTZVqSmUts2mPH/0noWD+6qshA8+2PP5iSfglVfg0kv9zPXXX8PkyX5L7yASa4C2bX0LvGuu8Z9LS+HHP4YePfznFSv8TPpvfwuFhXvKRyLZhn3JEmjd2v9S8frrSqxFJClo5lpEJB5C20o/mlPBY70qWNUKOm2BkUvSGL242rbSVVU+qU5Lg4kTIT/fL+Lr1g2+/RYOPNDXTyeLjz7yG7m8957/uQ46CAYMoOTN1znrp+Xht2F/fwEcfXRwvziIiNRCM9ciIkEqLqZkyCDOGrSDsQMrWNEGKlNhRRsYO7CCswbtoOSnl8DPfgadOvlaavAlH88/77fwBjjkkORKrAFOOgn+8Q/47juYPh3OOIPyWa/xaM+aE2uAwg4wuVcF5Y8/qsRaRJKOkmsRkRgrf+gBHs2pCJ9M9iij/Jk/QU7O3sn0f/9349i+u00bGDoUZs6k6oB0HqtxvmePyTkVVE5/Nj6xiYhEkZJrEZEYq5oxjcd6VYQdM/lkqMzO8h04Bg6MU2TBaLTbsIuIoORaRCS2nnqK9E3bIksmtyTQDogxVN46K7Jt2FtnxScgEZEoUnItIrK/duyAefPggQd8r+kuXfZsFb5yJeXNU5RMVpMyNI+RS9LCjhm1OI3Uy4fFKSIRkehRci0iTUN92+DVprISli2DqVP9joUnngjZ2XDaab4n84IFcPLJe7Yhv+MOUob/XMlkNenX38DoxWl1b8N+3Zj4BiYiEgVqxScijV+kbfBqsn2777Ocm+sXGj77LFxxhf+uVSvo0wf69vWvffrUvHNgcTElvXty1qAd4VvPLVpa9w6JjUXof5PJvSqYnFPB6lZ+9n7U4jRGLanjfxMRkYBph0YRabrqk9gecggsWgTz5/uNT845B7780veYfvxxv/X211/7ZLtPH+jeHVIi/AOgksnv259t2EVEAqTkWkSarPL80UwomsLYgbV36xj/lpH/RRvS12/as4PgDTfA/feDc35nwZyc/W+Jp2RSRKRRUHItIk1WadtseuRtY0Wb2sd03ghFTzQjY+z/7invaN8+fkGKiEhSCZdcN4t3MCIi8RRxT+WKKrjjjvgEJSIijZa6hYhIo6aeyiIiEk9KrkWkcVq/HpYsUU9lERGJKyXXItK4VFbCY4/B0UfD0KGkXzdGPZVFRCRulFyLSOMxf77vOT1qFPTsCc8/D926kTmzgDkFGYyfm0bnjdCs0i9iHD83zbfhm1mgbh0iIhIVSq5FJPmtWwfXXAP9+sE338DMmTB3Lhx/vP/+vPPIXLSU/JwRFE3PpvyeFIqmZ5OfM8L3t25q/aVFRCRm1IpPRJJXZaXf3OW3v4Vt2+D66+HWW6Fly6AjExGRRkyt+ESkcbr0UnjlFRg4ECZOhOOOCzoiERFp4pRci0hyWbfOz0y3aAHDh8OQITB4MJgFHZmIiIhqrkUkiXzzDXTvDg884D+ffz789KdKrEVEJGEouRaRxPfVV/710EPhxhvhkkuCjUdERKQWSq5FJHF99x1cfTV07gyffebP/e//wrHHBhuXiIhILZRci0ji2bXLL1Ds3h2mT4cxY+CII4KOSkREpE5a0CgiieW99+AXv4AlS+Css+CRR+CYY4KOSkREJCIxnbk2s3PN7HMz+9LMbqrh+wfNbHHoWG5mm6t9d6WZfRE6roxlnCKSAP79b7jySujfHzZsgL/8Bf72NyXWIiKSVGKWXJtZKjAJOA84DhhiZns1oXXOjXHO5TjncoBHgBdD17YBbgP6An2A28zswFjFKiIxVFxMef5oSttmU5WaQmnbbMrzR0Nx8Z4xb7zhS0BmzoSbbvL11YMGqQuIiIgknVjOXPcBvnTOrXDO7QSeAy4OM34IMDP0/kfAW865jc65TcBbwLkxjFVEYmH2bEp692RC0RR65G2j+c2OHnnbmFA0hZLePeHVV/2444/3G8EUFcG990JmZrBxi4iINFAsa64PB9ZU+7wWPxP9PWbWCTgKmBvm2sNruG4EMAKgY8eO+x+xiERPcTElQwZx1qAdFHbYc3pFGxg7sIIXu1YwZ/BPyPzn59C1K7z8cnCxioiIREmidAu5DChwzlXW5yLn3BPOuVznXG779u1jFJqINET5Qw/waE7FXol1dYUdYHKuHyciItJYxDK5/gqo/n+rR4TO1eQy9pSE1PdaEUlAVTOm8VivirBjJveuovK5GXGKSEREJPZimVwvALqZ2VFm1hyfQL+67yAzOwY4EPig2uk3gXPM7MDQQsZzQudEJEmkb97Oqlbhx6xuBS02b49PQCIiInEQs+TaObcLuBafFH8KPO+cW2Zmd5rZRdWGXgY855xz1a7dCPwOn6AvAO4MnRORJFHeOotOW8KP6bgFylpnxScgERGROIjpJjLOuVnArH3O3brP59truXYqMDVmwYlITKUMzWPk4icZe8auWseMWpxG6uXD4hiViIhIbCXKgkYRaWTSR4xi9Pwq+q2p+ft+a2DUkjTSrxsT38BERERiSMm1iERfZSXccguZOx1z/pzO+LlpdN4IzSqh80YYPzeNOQUZZM4sgC5dgo5WREQkapRci0j0/fa38MorMGECmYuXkZ8zgqLp2ZTfk0LR9Gzyc0aQuWgpnHde0JGKiIhEVUxrrkWkibr8cmjZEq69FoD0hyfCwxMByAgyLhERkRhTci0i0VNcDJ07Q48e/hAREWliVBYiItGxfDn07AkPPhh0JCIiIoFRci0i0dG1q6+1vvzyoCMREREJjMpCRGT/bN4MW7dCx44wblzQ0YiIiARKM9ci0nA7d8Kll8KAAVBWFnQ0IiIigdPMtYg0jHMwYgTMnQtPPw0tWgQdkYiISOA0cy0iDfO73/mk+vbb4Yorgo5GREQkISi5FpH6mzYNbrvNJ9W33hp0NCIiIglDybWI1M/bb8P//A8MHAhPPglmQUckIiKSMJRci0jkPv0UfvIT33bvhRegefOgIxIREUkoSq5FJDKbN8P55/uEetYsOPDAoCMSERFJOOoWIiKRadUKhg+Hs86CI48MOhoREZGEpORaRMKrrISvv4YOHbRJjIiISB1UFiIi4d15J+Tk+ARbREREwtLMtYiEd+WVkJEBhx0WdCQiIiIJTzPXIlKzTz/1uzB27gy/+U3Q0YiIiCQFJdci8n2LFkFuri8JERERkYgpuRaRva1eDRdcAO3awYgRQUcjIiKSVFRzLSJ7bNnie1nv2AFz5sChhwYdkYiISFJRci0iXkUFDBoEn30Gb7wBxx8fdEQiIiJJR8m1iPiFiyNH+tnqP/0Jzjwz6IhERESSkmquRQTuvRemToVbboGrrgo6GhERkaSl5FqkqSsogJtvhrw8uOOOoKMRERFJakquRZq6U06BUaNgyhQwCzoaERGRpKbkWqSp+uYbqKyEww+HRx+F9PSgIxIREUl6Sq5FmqLt26F/f/j5z4OOREREpFFRtxCRpigrC371K78Lo4iIiESNkmuRpqSqClasgK5d4Re/CDoaERGRRkdlISKNVXEx5fmjKW2bTVVqCqVtsynv0xt69YKVK4OOTkREpFFSci3SGM2eTUnvnkwomkKPvG00v9nRI28bE9IXU+J2wqefBh2hiIhIo6TkWiTaapoxzh8NxcVxe37JkEGcNWgHYwdWsKINVKbCijYw9hw4a+guSob+d/ziERERaUKUXItEU20zxkVTKOndE2bPjnkI5Q89wKM5FRR2qPn7wg4wuVcF5RMejHksIiIiTY0554KOISpyc3PdwoULg2Ni2+8AACAASURBVA5DmrLiYkp69+SsQTtqTGz7rYE5BRlkLloKXbrU//6VlbBhA3z3nT9KSuDCC/13Dz3k+1b//veUts2mR942VrSp/VadN0LR9Gwy1m+pfxwiIiJNnJktcs7V2HJL3UJEoiTSGeP8CQ+S/vBE2P2LrRl88QUsXw7nn+/PPfkkzJkD69btSabXr99zDUDLlrB1q3//xRfwr38BkL55O6tahY91dStosXl7w39YERERqVFMk2szOxd4GEgFpjjn7qthzGDgdsABS5xzQ0Pn/wCcjy9deQv4pWss0+zSKFXNmMZjeRVhx0zOqWD045PhhZd9wvzNN9C2LTz1FPz+97BzJ6Sk+AWHixfDQQfB0UfDgAH+ffWjffs9N5406T9vy1tn0WlL+JnrjlugrHUWGfv5M4uIiMjewibXZpYK/N459+v63jh07STgbGAtsMDMXnXOfVJtTDdgHHCqc26TmR0UOv8D4FSgZ2jou8DpwNv1jUMkXiKeMd5ZBeec4xNkM//FiBEwaNCegX/8oz8aIGVoHiOXTGHswNoT/VGL00i9fFiD7i8iIiK1C5tcO+cqzax/A+/dB/jSObcCwMyeAy4GPqk2ZjgwyTm3KfS873Y/GmgBNAcMSAP+3cA4ROIi4hnjNtlkTJ269xedOvkjCtKvv4HRvZ/mxa41l6j0WwOjlqSRPnVMVJ4nIiIie0TSLeRjM3vVzIaZ2SW7jwiuOxxYU+3z2tC56roD3c3sPTMrDJWR4Jz7APg78E3oeNM5973GvGY2wswWmtnCdevWRRCSSIw4R0r6AYxcZGGHxWXGuEsXMmcWMKcgg/Fz0+i8EZpV+kWM4+em+UWVMwsatqhSREREwookuW4BbADOAC4MHRdE6fnNgG7AD4EhwJNm1trMugLHAkfgE/IzzGzAvhc7555wzuU653LbV68/FYmXoiK/pbgZ6WN+zeglzem3puah/5kxvi4OM8bnnUfmoqXk54ygaHo25fekUDQ9m/ycEb5byXnnxT4GERGRJqjOBY3OuasbeO+vgOp/lD4idK66tcB851wFsNLMlrMn2S50zm0HMLPZwCnAvAbGIhJdq1fDzTfDtGkwYwYMGQI33khmjx7MGTKIyb0qmJxTwepWvhRk1OI0Ri1Ji++McZcuvivJwxMBtHhRREQkDuqcuTazI8zsJTP7LnS8YGZHRHDvBUA3MzvKzJoDlwGv7jPmZXwijZm1w5eJrABWA6ebWTMzS8MvZtR+zRK8rVth3Djo3h0KCvz73e3zQDPGIiIiTVwkrfj+BMwA/jv0OS907uxwFznndpnZtcCb+FZ8U51zy8zsTmChc+7V0HfnmNknQCVwo3Nug5kV4MtQivCLG99wzr1W/x9PJEoqKnzv6dtv972nhw2Du+6Cjh2/P1YzxiIiIk1WnTs0mtli51xOXeeCph0aJSacg9deg7Fj4fPP4fTT4YEHoHfvoCMTERGRgITboTGSBY0bzCzPzFJDRx5+gaNI4/fSS3Dxxf79K6/A3/+uxFpERERqFUly/T/AYOBbfFu8QUBDFzmKJL41a2DuXP/+oov87olFRf69hW+1JyIiIk1bJDs03uOcuyhO8YgE75pr4LPPoLgYmjWDK68MOiIRERFJEmFnrp1zlUCnULcPkcZp1y547DH4+mv/+eGH4Z13fGItIiIiUg+RZA8rgPfM7FWgZPdJ59wfYxaVSDw4B3/9K9x4o5+p3rIFfvMbOOaYoCMTERGRJBVJcl0cOlKAlrENRyROPv4YbrjBL1Ds3t0vVrzwwqCjEhERkSQXSc11d+fc5XGKR6Thiospf+gBqmZMI33zdspbZ5EyNI/062/YsyvimjXw29/Cs89C27YwcSKMGAFpacHGLiIiIo1C2OTaOVdpZp3MrLlzbme8ghKpt9mzKRkyiEdzKngsr4JVraDTlm2MXDKF0b2fJvP/noGPPoI//tGXg4wd63dXbNUq6MhFRESkEVHNtSS/4mJKhgzirEE7KOyw5/SKNjB2YAUvdq1gzv/kkVlSAZddBnffDZ06BReviIiINFqquZakV/7QAzyaU7FXYl1dYQeYnFNJfpfLSZ/6dHyDExERkSalzu3Pa7zIrJlzblcM4mkwbX/edJW2zaZH3jZWtKl9TOeNUDQ9m4z1W+IXmIiIiDRKDdr+3Mzerfb+2X2+/jBKsYnst/TN21lVR+n06lbQYvP2+AQkIiIiTVa4TWQyq73vsc932gNaEkZ56yw61TEh3XELlLXOik9AIiIi0mSFS65dLe9r+iwSmJSheYxcEr6V3qjFaaRePixOEYmIiEhTFW5BY2sz+wk+AW9tZpeEzhug/mWSMNKvv4HRvZ/mxa41L2rstwZGLUkjfeqY+AcnIiIiTUq45PofwEXV3lffvu6dmEUkUl9dupA5s4A5QwYx+YRyJp9UyepWvhRk1OI0Ri1JI3NmwZ6NZERERERipEHdQhKRuoUIxcWUX3oxlZ8so0VlCmUHZpF6+TDSrxujxFpERESiJly3kEj6XIskhy5dSE9pDqefBW+9RUbQ8YiIiEiTE25BoySD4mLK80dT2jabqtQUSttmU54/GoqLg44s/kpKYOlS6Ncv6EhERESkiVJyncxmz6akd08mFE2hR942mt/s6JG3jQlFUyjp3RNmzw46wvhatAgqK5Vci4iISGAiKgsxsx8AR1Yf75x7JkYxSSSKiykZMoizBu3Yq0PGijYwdmAFL3atYM6QQWQuWtp06o0LC/1r377BxiEiIiJNVp0z16HdGe8H+gMnh44aC7glfsofeoBHc2puPQdQ2AEm96qgfMKD8Q0sSF26wM9+Bu3aBR2JiIiINFF1dgsxs0+B41yCtxVpat1CSttm0yNvGyva1D6m80Yomp5Nxvo6ti8UERERkYiF6xYSSc31P4FDohuS7K/0zdtZVcdWPqtbQYvN2+MTUNB27IBNm4KOQkRERJq4SJLrdsAnZvammb26+4h1YBJeeessOtUxId1xC5S1zopPQEH761+hTRvfLUREREQkIJEsaLw91kFI/aUMzWPkkimMHVhR65hRi9NIvXxYHKMKUK9ecM89cMwxQUciIiIiTZh2aExWxcWUnNiDswaX1biosd8amFOQ0bS6hYiIiIjEwX7VXJtZPzNbYGbbzWynmVWa2dbohyn10qULmd2PZ84zMH5uGp03QrNKv4hx/Nw0n1jPLGgaifXOnTBrFmzRwk0REREJViRlIROBy4C/4FvwXQF0j2VQEoElS2DRIjJvuon8HdsYPf1ZWmzcSlmLZqQOH0H61DFNI7EG/29x/vnwl7/AoEFBRyMiIiJNWESbyDjnvjSzVOdcJfAnM/sYGBfb0CSsXr3gvffgpJNIb9ECHp4IFRVkpKUFHVn87d48RjszioiISMAiSa53mFlzYLGZ/QH4Bm2bHqzyckhPhx/8YO/zTTGxBp9cH344HHFE0JGIiIhIExdJkjwsNO5aoAToAFway6AkjPJyOOEEuP/+73/nHFx6qe+a0ZQUFmrWWkRERBJCncm1c24VYMChzrk7nHO/cs59GfvQpEbl5XD22b4sZF9m8M03fnFfU/Hdd7BihZJrERERSQiRdAu5EFgMvBH6nKNNZAKUnQ2TJvkEuyYDBsCCBVBWFt+4gjJ/vn9Vci0iIiIJIJKykNuBPsBmAOfcYuCoGMYktZkyBd59N/yY/v19a7oFC+ITU9AKC6FZMzjppKAjEREREYkoua5wzu3bQLhx7DyTTNauhfx8mDw5/LhTT/Wv8+bFPqZEUFjoS2QyMoKORERERCSi5HqZmQ0FUs2sm5k9Arwf47hkX7fdBlVVcNdd4ce1aQPHH1/3DHdjUFkJH36okhARERFJGJG04ssHbgbKgZnAm8DvYhmU7GPZMnjqKfjlL+GoCCpyBgyAGTN88pmaGvPwApOaCp995n9OERERkQQQSbeQHc65m51zJzvnckPvI1otZ2bnmtnnZvalmd1Uy5jBZvaJmS0zsxnVznc0s7+Z2aeh74+M9IdqdG66CVq2hJtvjmx8//6wdSv885+xjSsRHH44dOwYdBQiIiIiQJiZ67o6gjjnLgr3vZmlApOAs4G1wAIze9U590m1Md3wOz2e6pzbZGYHVbvFM8Ddzrm3zCwLqKrzp2mM/vEPeP11uPdeaNs2smv69/ev8+bV3LKvsXj8cUhJgeHDg45EREREBAhfFnIKsAZfCjIf3+u6PvoAXzrnVgCY2XPAxcAn1cYMByY55zYBOOe+C409DmjmnHsrdH57PZ/dODgHY8f62dlf/jLy6zp1gquu8q+N2Ysv+k4hSq5FREQkQYRLrg/BzzoPAYYCfwVmOueWRXjvw/HJ+W5rgb77jOkOYGbvAanA7c65N0LnN5vZi/i2f3OAm5xzexXXmtkIYARAx8ZYGlBQ4Bfs/d//wQEH1O/aP/0pNjElkjffbDr9vEVERCQp1Fpz7ZyrdM694Zy7EugHfAm8bWbXRvH5zYBuwA/xSfyTZtY6dH4A8GvgZKAzcFUNMT4RqgPPbd++fRTDShB//KPv/HHllQ27ft062LYtujElmhYtgo5ARERE5D/CLmg0s3QzuwSYBvwCmAC8FOG9vwI6VPt8ROhcdWuBV51zFc65lcByfLK9FljsnFvhnNsFvAw0vV1C3ngDnn++YR0/PvsMDjoIXor0f64k8+ijMHiwOoWIiIhIQgm3oPEZoAcwC7jDOVff1hMLgG5mdhQ+qb4MX15S3cv4Ges/mVk7fDnICvxukK3NrL1zbh1wBrCwns9PXmVlkJYGrVr5oyG6d4fx4xtvD+hZs2DlysbdalBERESSTriZ6zz8LPIvgffNbGvo2GZmW+u6cWjG+Vp8X+xPgeedc8vM7E4z291p5E1gg5l9AvwduNE5tyFUW/1r4P+ZWRF+MeWTDf0hk86998KJJ0JJScPvkZICv/61T7IbG+f8zoyN9RcHERERSVq1zlw75yLZvTEs59ws/Mx39XO3VnvvgF+Fjn2vfQvoub8xJKWTToLSUsjM3L/7bN0Kb78Np50GrVtHJbSEUFwMGzYouRYREZGEs98JtMTAxRfDH/6w//dZutTf6+239/9eiaSw0L8quRYREZEEo+Q6kXz+Odx11/6Vg1R38smQng7vvhud+yWKwkLIyoLjjgs6EhEREZG9KLlOJOPG+RnraCXX6ek+wZ43Lzr3SxSFhdCnjxYzioiISMJRcp0o3nvPt80bO9a30IuWAQPgo4+il7AHbccOWLJEJSEiIiKSkJRcJ4Ld25wfeiiMGRPde/fvD7t2wfz50b1vUD76yP88Sq5FREQkASm5TgSvvALvvw+3377/HUL29YMfgFnjqbs2g3POgb59g45ERERE5HvMd8NLfrm5uW7hwiTcZ2bXLjjhBP++qAia1dodseF69fKlJm+9Ff17i4iIiDQxZrbIOZdb03eauQ7a1Kl+q/L77otNYg2+7vqDD3win+y2bQs6AhEREZFaKbkOUkkJ3HYbnHoqXHRR3eMbqn9//6xly2L3jHhYu9ZvB//MM0FHIiIiIlKjGE2VSkRefBG+/RZeeMHXEsfKBRf45xx8cOyeEQ9pab4u/eSTg45EREREpEaquQ7axx/DiScGHYWIiIiIREg114lo61b/Gq/EevZsuOIK3/YvWX34IWzZEnQUIiIiIrVSch2EL7+Eww/3ZSHxsmaNX9S4fn38nhlNFRVw+ulwxx1BRyIiIiJSKyXXQcjIgCFD4JRT4vfM4cPhiy+gffv4PTOali6FsjJtHiMiIiIJTQsag3DYYfDEE/F9ZiwXTMZDYaF/VXItIiIiCUwz1/G0e5vzjz4K5vm//71v+5eMCgv99vAdOgQdiYiIiEitlFzH06xZMH68r30OQmqq32b922+Def7+KCz0s9bJPgMvIiIijZqS63iprISbboKuXWHEiGBiGDDAv777bjDPb6j16/0iUJWEiIiISIJTch0vzzwD//wn3HOP3wwlCCeeCAcckHzJ9fz5/lXJtYiIiCQ4JdfxUFoKt9wCffrAoEHBxdG8uU9Q580LLoaGKCz0JS29ewcdiYiIiEhYSq7j4eGH4auvfL110DXD/fvD4sWwbVuwcdRHYSH07AmZmUFHIiIiIhKWWvHF2oYNcN99cMEFcNppQUfj666rqvyiynPOCTqayEyZkryb34iIiEiTopnrWLv7bj9LfN99QUfi9esHKSnJVXfdqZNKQkRERCQpKLmOpR07YNo0uOoqOP74oKPxWrb0CxuTpe767bdh4kTYuTPoSERERETqpLKQWMrIgE8+8WUYiWTEiOSpuX71VXjqKfjFL4KORERERKRO5pwLOoaoyM3NdQsXLgw6jD02bYLWrYNfwJjsnIONG6Ft26AjEREREQHAzBY553Jr+k5lIbEyeDD8138FHUXtNm+GlSuDjqJuZkqsRUREJGkouY4F5yAvDy67LOhIate3L4wZE3QU4X3wAQwdCqtXBx2JiIiISERUcx0LZnDllUFHEd748dCuXdBRhPf3v8PMmTBpUtCRiIiIiEREyXW0FRT4mdbrroNmCfzPe9FFQUdQt8JCOOYYOPDAoCMRERERiYjKQqKprAxuuAGmT/e9pBNZVRXMng0ffhh0JDVzzifX/foFHYmIiIhIxBI8A0wykyb5Wes//CHxk2sz33974sSgI6nZypWwbp2SaxEREUkqCZ4BJpFNm/xujD/6EZx5ZtDR1M3Mb4WeqDs1Fhb61759g41DREREpB6UXDdUcTHl+aMpbZtNVWoKpUccQvnWTXDttUFHFrn+/f0M8VdfBR3J9xUW+k14evQIOhIRERGRiCm5bojZsynp3ZMJRVPokbeN5jc7elyzkwn9jJK8n/pa5mQwYIB/TcTZ68JCOPnkxF4UKiIiIrIPJdf1VVxMyZBBnDVoB2MHVrCiDVSmwoo2MPZsx1mDdlAyZBAUFwcdad169YLMTJg3L+hI9lZaCh9/rHprERERSTpKruup/KEHeDSngsIONX9f2AEm96qgfMKD8Q2sIZo1g1NOSbyZ6/Xr/az6aacFHYmIiIhIvSi5rqeqGdN4rFdF2DGTcyqonP5snCLaTwMGwNKlfjv0RNGhA8ydm9jbx4uIiIjUIKbJtZmda2afm9mXZnZTLWMGm9knZrbMzGbs8122ma01s4TpF5e+eTurWoUfs7oVtNi8PT4B7a/+/X1P6Q8+CDqSPXbtCjoCERERkQaJWXJtZqnAJOA84DhgiJkdt8+YbsA44FTn3PHA9fvc5nfAO7GKsSHKW2fRaUv4MR23QFnrrPgEtL/69vXlIYsXBx3JHt27w001/i4mIiIiktBiOXPdB/jSObfCObcTeA64eJ8xw4FJzrlNAM6573Z/YWa9gYOBv8UwxnpLGZrHyCVpYceMWpxG6uXD4hTRfsrM9K34xo0LOhJv1y4YOlT9rUVERCQpxTK5PhxYU+3z2tC56roD3c3sPTMrNLNzAcwsBXgA+HW4B5jZCDNbaGYL161bF8XQa5d+/Q2MXpxGvzU1f99vDYxakkb6dWPiEk9UHHRQ0BHs0awZ3HUX/OQnQUciIiIiUm9BL2hsBnQDfggMAZ40s9bAaGCWc25tuIudc08453Kdc7nt27ePebAAdOlC5swC5hRkMH5uGp03QrNK6LwRxs9NY05BBpkzC6BLl/jEEw0rV8LgwbBwYdCR+Fi2J0m9uoiIiMg+YrlDx1dA9YZ1R4TOVbcWmO+cqwBWmtlyfLJ9CjDAzEYDWUBzM9vunEuMQtzzziNz0VLyJzzI6OnP0mLzdspaZ5F6+TDSp45JrsQaoGVLmD8fvv466Ejgqqtg587EWmApIiIiEiFzzsXmxmbNgOXAmfikegEw1Dm3rNqYc4Ehzrkrzawd8DGQ45zbUG3MVUCucy7svuK5ubluYSLMvErD7doF2dkwYgQ89FDQ0YiIiIjUyMwWOedya/ouZmUhzrldwLXAm8CnwPPOuWVmdqeZXRQa9iawwcw+Af4O3Fg9sZY4c84fQSkq8rszamdGERERSVKxLAvBOTcLmLXPuVurvXfAr0JHbfd4CngqNhHKf7zzju/SMXs2nHBCMDEUFvpXJdciIiKSpIJe0CiJ4ogjfEu+efOCi6GwEA4+GDp1Ci4GERERkf2g5Fq8o46Cww6Dd98NLobCQj9rbRZcDCIiIiL7Qcm1eGZ+K/R584Kpu96wAZYvV0mIiIiIJDUl17LHgAGwdi2sXh3/Z3/4oX/VzowiIiKSxJRcyx79+/vXIEpDCgshJQVya+xqIyIiIpIUlFzLHiec4PtMB7Go8Wc/gxdf9BvaiIiIiCSpmLbikySTmgo/+EEwM9cdO/pDREREJIlp5lr2NmAALFvmFxjGy5o18PjjsH59/J4pIiIiEgOauZa9nX8+lJdDVVX8nvn3v8PIkXD66dCuXfyeKyIiIhJlSq5lb716+SOehg3z5SidO8f3uSIiIiJRprIQ+b5t22D+/Pg9zwy6dvXdQkRERESSmLIZ+b677vK116WlsX/W9u1w5ZWwYEHsnyUiIiISY0qu5fuuvhpefx2axaFqaOFCeOYZLWYUERGRRkE11/J9xxzjj3goLPSvffrE53kiIiIiMaSZa6lZYaGfUY7Hc7p3h7ZtY/8sERERkRhTci01e+YZuPZaqKyM3TOc88l1v36xe4aIiIhIHCm5lpr17++7hixdGrtnrFoF//63kmsRERFpNJRcS80GDPCvsdwKfXe9tZJrERERaSSUXEvNOnSAjh1jn1wfcACccELsniEiIiISR0qupXYDBsC8eb42Ohbmz4fc3Pi0/BMRERGJAyXXUrv+/eGbb2Dlyujf2znIzIQf/jD69xYREREJiKYMpXa7667nzYPOnaN7bzOYMye69xQREREJmGaupXbHHgsHHhibuutYlZqIiIiIBEjJtdQuJcWXhixfHv17X3UVXHJJ9O8rIiIiEiCVhUh4M2b42uho69EDduyI/n1FREREAqTkWsLLyorNfW+8MTb3FREREQmQykKkbsOHw333Re9+mzZBaWn07iciIiKSIJRcS922bPFboUfL738P7dtDRUX07ikiIiKSAFQWInV7/vno3q+wEI47DtLSontfERERkYBp5loiV1m5//fYtQsWLoR+/fb/XiIiIiIJRsm11G3nTujaFe66a//vtWwZlJQouRYREZFGScm11K15c981ZN68/b9XYaF/VXItIiIijZCSa4nMgAE+Md7fRYiFhX4x41FHRScuERERkQSi5Foi07+/L+dYvHj/7lNY6GetzaITl4iIiEgCUXItkenf37+++27D77FpE3z2mUpCREREpNFSci2ROfxwX8qxP3XXH37oX5Vci4iISCOl5FoiN2CAn7l2rmHX9+oFU6fCySdHNy4RERGRBKHkWiLXvz+sWwfLlzfs+kMOgauvhpYtoxuXiIiISIKIaXJtZuea2edm9qWZ3VTLmMFm9omZLTOzGaFzOWb2QejcUjP7aSzjlAgNGOBfG1J3XVUFTz8Na9ZENyYRERGRBBKz5NrMUoFJwHnAccAQMztunzHdgHHAqc6544HrQ1/tAK4InTsXeMjMWscqVonQ0UfDbbdBbm79r/3iC7jqKvjb36IeloiIiEiiaBbDe/cBvnTOrQAws+eAi4FPqo0ZDkxyzm0CcM59F3r9T92Bc+5rM/sOaA9sjmG8UhczuP32hl3brRt8+qnvcS0iIiLSSMWyLORwoHoNwNrQueq6A93N7D0zKzSzc/e9iZn1AZoDxTV8N8LMFprZwnXr1kUxdKlVWRm8/bZvq1cfKSlwzDHQtm1MwhIRERFJBEEvaGwGdAN+CAwBnqxe/mFmhwLPAlc756r2vdg594RzLtc5l9teM6LxsWQJDBwIc+bU77o77oA33ohNTCIiIiIJIpbJ9VdAh2qfjwidq24t8KpzrsI5txJYjk+2MbNs4K/Azc65whjGKfVx0knw2mtwzjmRX1NSAnfeCe+/H7u4RERERBJALJPrBUA3MzvKzJoDlwGv7jPmZfysNWbWDl8msiI0/iXgGedcQQxjlPpKS4MLLoBWrSK/ZtEi3y2kb9/YxSUiIiKSAGKWXDvndgHXAm8CnwLPO+eWmdmdZnZRaNibwAYz+wT4O3Cjc24DMBg4DbjKzBaHjpxYxSr1tHIl3HMPbN0a2fjC0B8elFyLiIhII2euobvtJZjc3Fy3cOHCoMNoGubMgbPP9jXUP/pR3eMvuQSKinw7PhEREZEkZ2aLnHM19iYOekGjJKN+/SA1NbLNZJyDDz7w14iIiIg0ckqupf6ysuDEE2HevLrHrlkD336r5FpERESaBCXX0jD9+8P8+bBzZ/hxu+utlVyLiIhIE6DkWhpmwAC/ocxHH4UfV1gILVpAz57xiUtEREQkQEqupWFOPdW/1lUa8v/bu/9YucoygePfpz9obWvaKgYVK0gRzS6xlTa0i8XUAJVWomiMgpTfmxVQFNSsqxLXuInBRVatBkwVEn9gUVGwMRSpyyYC9qJt01IsCFRbKakXdsuPvaWU/nj845zCcJm5be8cZjoz309yM3Pe9z33PvPkvec+98w75wwMwAknFJfwkyRJ6nIW1xqeww6DY47Z94caFy8+8Ls5SpIkdSiLaw3fnDlw993FDWKGEtGaeCRJktrM4lrDd+KJMGIEbNlSv3/x4mLMwEBr45IkSWoTi2sN38KF0N8Phx9ev3/cOJg8ubh0nyRJUg+wuNbwjRo19JKPhQth6dLWxSNJktRmFtdqzjXXwLve9dL2557b9zWwJUmSuozFtZozdixMnAjbt7+4/bbbiva1a9sTlyRJUhtYXKs5F1wAt9wCr3jFi9v7+mDXruJyfZIkST3C4lrVGHzm+p57YNq0lxbdkiRJXcziWs07/3yYPfuF7d274fe/f3GbJElSD7C4VvOOzS2yLQAACY5JREFUOgrWrYMnnii2168vrm1tcS1JknqMxbWaN2cOZMKKFcV2X1/xaHEtSZJ6jMW1mjdrVnHN6zvvLLb7+uDVr4apU9sblyRJUotZXKt548bBjBlw113Fdl9fcdZ6qBvMSJIkdSGLa1VjzpziQ4z9/cWaa5eESJKkHmRxrWocfTQ7dj/H9mPexJ6A7V+7kh2XXgIbNrQ7MkmSpJaxuFbzli1j22c/xaJZcOx52znkCjj23G0sWvc9ts14Gyxb1u4IJUmSWiIys90xVGLmzJm5cuXKdofRezZsYNuMt3HyB5+hb8pLu2c/Ar+5aRzjV93rBxwlSVJXiIhVmTmzXp9nrtWUHd+4mmum76xbWAP0TYFrp+1kx6KvtzYwSZKkNrC4VlP2/PhHfGfaziHHXDt9J7tv+GGLIpIkSWofi2s1ZcyTA2yaOPSYv06EsU8OtCYgSZKkNrK4VlN2TJrAEU8NPeaNT8Gzkya0JiBJkqQ2srhWU0Z8ZCEXrR095JiL14xm5FlntygiSZKk9rG4VlPGXPZpLlkzmtmP1O+f/QhcvHY0Yz5xeWsDkyRJagOLazVn6lTGL7mJ39w0jqvuGM1RW2HUbjhqK1x1x+jiMnxLbvIyfJIkqSeMancA6gLz5zN+1b1cuujrXHLDDxn75ADPTprAyLPOZsz1l1tYS5KknuFNZCRJkqQD4E1kJEmSpBawuJYkSZIqYnEtSZIkVcTiWpIkSaqIxbUkSZJUEYtrSZIkqSIW15IkSVJFLK4lSZKkinTNTWQi4nFgU7vj6AKHAv/b7iC6iPmsnjmtlvmsnjmtlvmsnjlt3hGZ+Zp6HV1TXKsaEbGy0R2HdODMZ/XMabXMZ/XMabXMZ/XM6cvLZSGSJElSRSyuJUmSpIpYXGuwxe0OoMuYz+qZ02qZz+qZ02qZz+qZ05eRa64lSZKkinjmWpIkSaqIxXWPiYgpEfE/EbE+Iv4YEZ+sM2ZuRDwVEWvKry+2I9ZOEhEbI2Jdma+VdfojIhZFxMMRcW9EHNeOODtFRLylZv6tiYinI+KyQWOcp0OIiOsj4rGIuK+m7VURsTwiHiofJzfY99xyzEMRcW7roj64NcjpVRHxQPl7fXNETGqw75DHiF7UIJ9fiohHa36vFzTY99SI+FN5TP231kV9cGuQ05/U5HNjRKxpsK9ztCIuC+kxEfE64HWZuToiXgmsAk7PzPU1Y+YCn8nM09oUZseJiI3AzMyse93Q8g/EpcACYBbwzcyc1boIO1dEjAQeBWZl5qaa9rk4TxuKiHcCA8APMvPYsu0/ga2ZeWVZkEzOzM8O2u9VwEpgJpAUx4gZmflES1/AQahBTucBd2Tmroj4KsDgnJbjNjLEMaIXNcjnl4CBzPzaEPuNBB4ETgE2A38Azqz9O9ar6uV0UP/VwFOZ+eU6fRtxjlbCM9c9JjO3ZObq8vn/A/cDh7c3qp7wPoqDXWZmHzCp/EdH+3YSsKG2sNa+ZeZvga2Dmt8HfL98/n3g9Dq7vhtYnplby4J6OXDqyxZoB6mX08y8PTN3lZt9wBtaHliHajBH98fxwMOZ+efMfA64kWJu97yhchoRAXwIWNLSoHqQxXUPi4gjgbcD99Tp/qeIWBsRyyLiH1saWGdK4PaIWBUR/1Kn/3DgkZrtzfhPzf46g8Z/DJynB+awzNxSPv8bcFidMc7V4bsAWNagb1/HCL3g4+Uym+sbLF1yjg7PiUB/Zj7UoN85WhGL6x4VEROAnwOXZebTg7pXU9zWcxrwLeCWVsfXgeZk5nHAfOBj5VtzalJEHAK8F/hZnW7naROyWBPousCKRMQXgF3ADQ2GeIzYP9cCU4HpwBbg6vaG01XOZOiz1s7Rilhc96CIGE1RWN+Qmb8Y3J+ZT2fmQPn8VmB0RBza4jA7SmY+Wj4+BtxM8bZlrUeBKTXbbyjbNLT5wOrM7B/c4Twdlv69y5HKx8fqjHGuHqCIOA84DTgrG3yQaT+OEQIysz8zd2fmHuC71M+Tc/QARcQo4APATxqNcY5Wx+K6x5Rrrq4D7s/M/2ow5rXlOCLieIp58n+ti7KzRMT48sOhRMR4YB5w36BhS4FziouGxGyKD5RsQfvS8EyL83RYlgJ7r/5xLvDLOmN+DcyLiMnlW/LzyjbVERGnAv8KvDczn2kwZn+OEeL5f/r2ej/18/QH4M0R8aby3a0zKOa2GjsZeCAzN9frdI5Wa1S7A1DLvQM4G1hXczmezwNvBMjM7wAfBC6OiF3AduCMRmdjBBTrVm8u67xRwI8z87aIuAiez+mtFFcKeRh4Bji/TbF2jPIAfwrw0Zq22pw6T4cQEUuAucChEbEZ+HfgSuCnEXEhsIniw01ExEzgosz858zcGhH/QVHAAHw5M4fzobOu0yCnnwPGAMvLY0BfZl4UEa8HvpeZC2hwjGjDSzioNMjn3IiYTrFkaSPl739tPssrs3yc4p++kcD1mfnHNryEg069nGbmddT57Ipz9OXjpfgkSZKkirgsRJIkSaqIxbUkSZJUEYtrSZIkqSIW15IkSVJFLK4lSZKkilhcS1KHioiBmucLIuLBiDiipu3IiNgcESMG7bcmImY1+J5HRoTXt5WkYbK4lqQOFxEnAYuA+Zm5aW97Zm4E/gqcWDP2rcArM/OeVscpSb3A4lqSOlhEvJPiNtGnZeaGOkOWUNxAYq8zgBvLM9R3RsTq8uuEOt/7vIj4ds32ryJibvl8XkSsKPf9WURMqPSFSVKHsriWpM41BrgFOD0zH2gw5qfA6RGx9468H6YouB8DTsnM48q2Rfv7QyPiUOAK4ORy/5XAp4b3EiSpu3j7c0nqXDuB3wEXAp+sNyAz+8s11CdFRD+wKzPvi4iJwLfLW03vBo45gJ87G/gH4O7ydsmHACuG/zIkqXtYXEtS59oDfAj474j4fGZ+pcG4vUtD+svnAJeX29Mo3sV8ts5+u3jxO5xjy8cAlmfmmc2FL0ndx2UhktTBMvMZ4D3AWRFxYYNhvwAWUCz/uLFsmwhsycw9wNnAyDr7bQSmR8SIiJgCHF+29wHviIijASJifEQcyJlvSepanrmWpA6XmVsj4lTgtxHxeGYuHdT/ZESsAF6bmX8um68Bfh4R5wC3AdvqfOu7gb8A64H7gdXl93s8Is4DlkTEmHLsFcCDFb80Seo4kZntjkGSJEnqCi4LkSRJkipicS1JkiRVxOJakiRJqojFtSRJklQRi2tJkiSpIhbXkiRJUkUsriVJkqSKWFxLkiRJFfk79wGGHALFGmkAAAAASUVORK5CYII=\n", 878 | "text/plain": [ 879 | "
" 880 | ] 881 | }, 882 | "metadata": { 883 | "needs_background": "light" 884 | } 885 | } 886 | ] 887 | }, 888 | { 889 | "cell_type": "code", 890 | "source": [ 891 | "# confusion matrix and accuracy\n", 892 | "\n", 893 | "from sklearn import metrics\n", 894 | "from sklearn.metrics import classification_report \n", 895 | "\n", 896 | "print(f\"Classification report for classifier {knn_classifier}:\\n\"\n", 897 | " f\"{metrics.classification_report(y_test, y_pred)}\\n\")" 898 | ], 899 | "metadata": { 900 | "colab": { 901 | "base_uri": "https://localhost:8080/" 902 | }, 903 | "id": "FkDutc8kIrqx", 904 | "outputId": "8c4f931f-abf1-4d57-c3d2-0404213f7fb6" 905 | }, 906 | "execution_count": null, 907 | "outputs": [ 908 | { 909 | "output_type": "stream", 910 | "name": "stdout", 911 | "text": [ 912 | "Classification report for classifier KNeighborsClassifier(n_jobs=-1, n_neighbors=2):\n", 913 | " precision recall f1-score support\n", 914 | "\n", 915 | " Business 0.65 0.60 0.62 412\n", 916 | "Entertainment 0.50 0.80 0.61 415\n", 917 | " Politics 0.66 0.58 0.62 383\n", 918 | " Sports 0.90 0.50 0.64 413\n", 919 | "\n", 920 | " accuracy 0.62 1623\n", 921 | " macro avg 0.68 0.62 0.62 1623\n", 922 | " weighted avg 0.68 0.62 0.62 1623\n", 923 | "\n", 924 | "\n" 925 | ] 926 | } 927 | ] 928 | }, 929 | { 930 | "cell_type": "markdown", 931 | "source": [ 932 | "### Post-processing" 933 | ], 934 | "metadata": { 935 | "id": "ejrxYOKN2Scy" 936 | } 937 | }, 938 | { 939 | "cell_type": "code", 940 | "source": [ 941 | "# save the model to disk\n", 942 | "import pickle\n", 943 | "\n", 944 | "filename = 'svm_model.sav'\n", 945 | "pickle.dump(svm_model, open(filename, 'wb'))\n", 946 | "\n", 947 | "# load the model from disk\n", 948 | "loaded_model = pickle.load(open(filename, 'rb'))" 949 | ], 950 | "metadata": { 951 | "id": "eqhzRO67IzQN" 952 | }, 953 | "execution_count": 28, 954 | "outputs": [] 955 | }, 956 | { 957 | "cell_type": "code", 958 | "source": [ 959 | "line = \"ဒီကနေ့ ဒီဇင်ဘာလ ၂၁ ရက်နေ့ နေ့လယ်ပိုင်းမှာလည်း ရေဦးနဲ့ တန့်ဆည်မြို့နယ် နယ်နမိတ်ထိစပ်နေတဲ့နေရာနားက လိပ်ခြံရွာက ပီဒီအက်ဖ်စခန်းတွေလို့ ယူဆတဲ့နေရာတွေကို စစ်ကောင်စီတပ်ရဲ့ ရဟတ်ယာဉ်တွေက ပစ်ခတ်တာတွေ ဆက်လုပ်ခဲ့တယ်လို့ ဒေသခံတွေထံက သိရပါတယ်။\"" 960 | ], 961 | "metadata": { 962 | "id": "oaxUseLEviBk" 963 | }, 964 | "execution_count": 29, 965 | "outputs": [] 966 | }, 967 | { 968 | "cell_type": "code", 969 | "source": [ 970 | "stopwordslist = []\n", 971 | "slist = []\n", 972 | "with open(\"./stopword.txt\", encoding = 'utf8') as stopwordsfile:\n", 973 | " stopwords = stopwordsfile.readlines()\n", 974 | " slist.extend(stopwords)\n", 975 | "\n", 976 | " for w in range(len(slist)):\n", 977 | " temp = slist[w]\n", 978 | " stopwordslist.append(temp.rstrip())\n" 979 | ], 980 | "metadata": { 981 | "id": "8m8FXyx8xkFE" 982 | }, 983 | "execution_count": 31, 984 | "outputs": [] 985 | }, 986 | { 987 | "cell_type": "code", 988 | "source": [ 989 | "def stop_word(sentence):\n", 990 | " new_sentence = []\n", 991 | " for word in sentence.split():\n", 992 | " if word not in stopwordslist:\n", 993 | " new_sentence.append(word)\n", 994 | " return(' '.join(new_sentence))" 995 | ], 996 | "metadata": { 997 | "id": "UypfZVHfxkFG" 998 | }, 999 | "execution_count": 32, 1000 | "outputs": [] 1001 | }, 1002 | { 1003 | "cell_type": "code", 1004 | "source": [ 1005 | "import pyidaungsu as pds\n", 1006 | "\n", 1007 | "def tokenize(line):\n", 1008 | " sentence = pds.tokenize(line,form=\"word\")\n", 1009 | " sentence = ' '.join([str(elem) for elem in sentence])\n", 1010 | " sentence = stop_word(sentence)\n", 1011 | " return sentence" 1012 | ], 1013 | "metadata": { 1014 | "id": "P1snC2NE2jsC" 1015 | }, 1016 | "execution_count": 33, 1017 | "outputs": [] 1018 | }, 1019 | { 1020 | "cell_type": "code", 1021 | "source": [ 1022 | "tokenize(line)" 1023 | ], 1024 | "metadata": { 1025 | "colab": { 1026 | "base_uri": "https://localhost:8080/", 1027 | "height": 53 1028 | }, 1029 | "id": "htTGYa8Ovlkk", 1030 | "outputId": "d52cc97e-a1ce-489a-f4d2-bd41c3e930f4" 1031 | }, 1032 | "execution_count": 34, 1033 | "outputs": [ 1034 | { 1035 | "output_type": "execute_result", 1036 | "data": { 1037 | "application/vnd.google.colaboratory.intrinsic+json": { 1038 | "type": "string" 1039 | }, 1040 | "text/plain": [ 1041 | "'ဒီဇင်ဘာ ၂၁ ရက်နေ့ နေ့လယ် ရေဦး တန့်ဆည် မြို့နယ် နယ်နမိတ် ထိစပ် နေရာ နားကလိပ်ခြံ ရွာ ပီဒီအက်ဖ်စခန်း ယူဆ နေရာ စစ်ကောင်စီ တပ် ရဟတ်ယာဉ် ပစ်ခတ် ဆက်လုပ် ဒေသခံ ။'" 1042 | ] 1043 | }, 1044 | "metadata": {}, 1045 | "execution_count": 34 1046 | } 1047 | ] 1048 | }, 1049 | { 1050 | "cell_type": "code", 1051 | "source": [ 1052 | "user = input(\"Enter a Text: \")\n", 1053 | "user = tokenize(user)\n", 1054 | "data = vectorizer.transform([user]).toarray()\n", 1055 | "output = loaded_model.predict(data)\n", 1056 | "print(output)" 1057 | ], 1058 | "metadata": { 1059 | "id": "QFiTpHpivoce", 1060 | "colab": { 1061 | "base_uri": "https://localhost:8080/" 1062 | }, 1063 | "outputId": "d9b613ec-3d4d-41f7-e5d5-6a1989f408d7" 1064 | }, 1065 | "execution_count": 35, 1066 | "outputs": [ 1067 | { 1068 | "output_type": "stream", 1069 | "name": "stdout", 1070 | "text": [ 1071 | "Enter a Text: ဒီဇင်ဘာ ၂၁ ရက်နေ့ နေ့လယ် ရေဦး တန့်ဆည် မြို့နယ် နယ်နမိတ် ထိစပ် နေရာ နားကလိပ်ခြံ ရွာ ပီဒီအက်ဖ်စခန်း ယူဆ နေရာ စစ်ကောင်စီ တပ် ရဟတ်ယာဉ် ပစ်ခတ် ဆက်လုပ် ဒေသခံ\n", 1072 | "['Politics']\n" 1073 | ] 1074 | } 1075 | ] 1076 | }, 1077 | { 1078 | "cell_type": "code", 1079 | "source": [ 1080 | "" 1081 | ], 1082 | "metadata": { 1083 | "id": "LMHLsUtDaLe3" 1084 | }, 1085 | "execution_count": null, 1086 | "outputs": [] 1087 | } 1088 | ] 1089 | } 1090 | --------------------------------------------------------------------------------