├── Data mining project.pdf ├── Data ├── DS1.csv ├── DS2.csv ├── DS3.csv └── testData.csv └── FinalProject.ipynb /Data mining project.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mohabyoussef09/Arabic-Sentiment-Analysis/f732a000e7761f34065544544d9fcd366ea7ddb3/Data mining project.pdf -------------------------------------------------------------------------------- /Data/DS1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mohabyoussef09/Arabic-Sentiment-Analysis/f732a000e7761f34065544544d9fcd366ea7ddb3/Data/DS1.csv -------------------------------------------------------------------------------- /Data/DS2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mohabyoussef09/Arabic-Sentiment-Analysis/f732a000e7761f34065544544d9fcd366ea7ddb3/Data/DS2.csv -------------------------------------------------------------------------------- /Data/DS3.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mohabyoussef09/Arabic-Sentiment-Analysis/f732a000e7761f34065544544d9fcd366ea7ddb3/Data/DS3.csv -------------------------------------------------------------------------------- /Data/testData.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mohabyoussef09/Arabic-Sentiment-Analysis/f732a000e7761f34065544544d9fcd366ea7ddb3/Data/testData.csv -------------------------------------------------------------------------------- /FinalProject.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 40, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import nltk\n", 12 | "import re\n", 13 | "import csv\n", 14 | "from collections import Counter\n", 15 | "import numpy as np\n", 16 | "from sklearn.metrics import accuracy_score\n", 17 | "from sklearn.naive_bayes import MultinomialNB\n", 18 | "from sklearn.naive_bayes import GaussianNB\n", 19 | "from sklearn.feature_extraction.text import CountVectorizer\n", 20 | "from sklearn import metrics\n", 21 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 22 | "from sklearn import svm\n", 23 | "from sklearn.metrics import classification_report\n", 24 | "import ArStemmerLib as lib\n", 25 | "stemmer= lib.ArStemmer()\n", 26 | "stemmer.loadDicts([\"general.txt\", \"lex_list.txt\", \"wiki_general.txt\"])\n", 27 | "stemmer.loadDict(\"wiki_dict.txt\")" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 41, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "def readNames(filePath):\n", 39 | " names=[]\n", 40 | " f=open(filePath,'r')\n", 41 | " for i in f:\n", 42 | " i=i.strip()\n", 43 | " names.append(i.lower())\n", 44 | " return names\n" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 42, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "def StopWords(filePath):\n", 56 | " stop_words=[]\n", 57 | " f=open(filePath,'r',encoding=\"utf-8\") \n", 58 | " for i in f:\n", 59 | " i=i.strip()\n", 60 | " stop_words.append(i)\n", 61 | " return stop_words" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 43, 67 | "metadata": { 68 | "collapsed": true 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "def removeStopWords(tweet,stop_words):\n", 73 | " new_tweet=''\n", 74 | " for word in tweet.split():\n", 75 | " if word not in stop_words:\n", 76 | " new_tweet+=word+' '\n", 77 | " return new_tweet" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 44, 83 | "metadata": { 84 | "collapsed": true 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "#stem the word\n", 89 | "def stem(word):\n", 90 | " return stemmer.stem(word)\n" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 45, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/plain": [ 101 | "''" 102 | ] 103 | }, 104 | "execution_count": 45, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "def removeDuplicates(s):\n", 111 | " if s!='':\n", 112 | " a=s[0]\n", 113 | " for i in s:\n", 114 | " if i != a[-1]:\n", 115 | " a=a+i\n", 116 | " return a\n", 117 | " else:\n", 118 | " return ''\n", 119 | "removeDuplicates('')" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 46, 125 | "metadata": { 126 | "collapsed": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "def changePolToNum(pol):\n", 131 | " if pol=='pos':\n", 132 | " return 1\n", 133 | " elif pol=='neg':\n", 134 | " return -1\n", 135 | " else:\n", 136 | " return 0\n", 137 | " " 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 86, 143 | "metadata": { 144 | "collapsed": true 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "def preProcessing(tweet):\n", 149 | " \n", 150 | " \n", 151 | " #remove lower for some english words in the tweet\n", 152 | " #tweet=tweet.lower()\n", 153 | " ##remove links\n", 154 | " tweet=re.sub(r\"http\\S+ | www\\S+\" , \"لينك\", tweet)\n", 155 | " ##remove # chracter '#messi'-->'messi'\n", 156 | " tweet=re.sub(r\"#\" , \"\", tweet)\n", 157 | " ##remove mentions (@messi)\n", 158 | " tweet=re.sub(r'@\\S+','منشن',tweet)\n", 159 | " #remove consecutive chracters from a string ('شكراااااا','شكرا')\n", 160 | " tweet=removeDuplicates(tweet)\n", 161 | " ##remove stop words()\n", 162 | " stop_words=StopWords('negators.txt')\n", 163 | " tweet=removeStopWords(tweet,stop_words)\n", 164 | " #remove special chracters,#remove words length less than 2 chracters , first and last chracter of a word is not number\n", 165 | " tweet=''.join(e+' ' for e in tweet.split() if e.isalnum() and len(e) >= 3 and not(e[0].isdigit() or e[-1].isdigit()))\n", 166 | " #stem words\n", 167 | " t=''\n", 168 | " for i in tweet.split():\n", 169 | " t+=stem(i)+' '\n", 170 | " tweet=t\n", 171 | " \n", 172 | " return tweet\n" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "# Data set 1" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "# TfidfVectorizer" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 79, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "name": "stdout", 196 | "output_type": "stream", 197 | "text": [ 198 | "DS1 svm acc 0.385065885798\n", 199 | "('DS1 svm kernel linear acc ', 0.53440702781844807)\n", 200 | "DS1 naive 0.472913616398\n", 201 | "DS1 multinomianl naive 0.543191800878\n" 202 | ] 203 | } 204 | ], 205 | "source": [ 206 | "with open(\"DS1.csv\", 'r') as file:\n", 207 | " train = list(csv.reader(file))\n", 208 | "train=train[1:] #remove header row\n", 209 | "train_data=[preProcessing(t[0]) for t in train]\n", 210 | "train_label=[changePolToNum(t[1]) for t in train]\n", 211 | "vectorizer = TfidfVectorizer()\n", 212 | "train_vectors = vectorizer.fit_transform(train_data)\n", 213 | "\n", 214 | "with open(\"testData.csv\", 'r') as file:\n", 215 | " test = list(csv.reader(file))\n", 216 | "test=test[1:]\n", 217 | "test_data=[preProcessing(t[0]) for t in test]\n", 218 | "test_label=[changePolToNum(t[1]) for t in test]\n", 219 | "test_vectors = vectorizer.transform(test_data)\n", 220 | "\n", 221 | "clf = svm.SVC()\n", 222 | "clf.fit(train_vectors, train_label)\n", 223 | "pred=clf.predict(test_vectors)\n", 224 | "print(\"DS1 svm acc \",accuracy_score(test_label, pred))\n", 225 | "\n", 226 | "\n", 227 | "#kernel svc\n", 228 | "clf = svm.SVC(kernel='linear')\n", 229 | "clf.fit(train_vectors, train_label)\n", 230 | "pred=clf.predict(test_vectors)\n", 231 | "print((\"DS1 svm kernel linear acc \",accuracy_score(test_label, pred)))\n", 232 | " \n", 233 | "#naive bayes\n", 234 | "clf = GaussianNB()\n", 235 | "clf.fit(train_vectors.toarray(), train_label)\n", 236 | "pred=clf.predict(test_vectors.toarray())\n", 237 | "print(\"DS1 naive \",accuracy_score(test_label, pred)) \n", 238 | " \n", 239 | "#multinomial naive bayes\n", 240 | "clf = MultinomialNB()\n", 241 | "clf.fit(train_vectors.toarray(), train_label)\n", 242 | "pred=clf.predict(test_vectors.toarray())\n", 243 | "print(\"DS1 multinomianl naive \",accuracy_score(test_label, pred))\n", 244 | "\n", 245 | " \n" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "# CountVectorizer" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 80, 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | "DS1 svm acc 0.385065885798\n", 265 | "('DS1 svm kernel linear acc ', 0.51537335285505126)\n", 266 | "DS1 naive 0.449487554905\n", 267 | "DS1 multinomianl naive 0.559297218155\n" 268 | ] 269 | } 270 | ], 271 | "source": [ 272 | "with open(\"DS1.csv\", 'r') as file:\n", 273 | " train = list(csv.reader(file))\n", 274 | "train=train[1:]\n", 275 | "train_data=[preProcessing(t[0]) for t in train]\n", 276 | "train_label=[changePolToNum(t[1]) for t in train]\n", 277 | "vectorizer = CountVectorizer()\n", 278 | "train_vectors = vectorizer.fit_transform(train_data)\n", 279 | "\n", 280 | "with open(\"testData.csv\", 'r') as file:\n", 281 | " test = list(csv.reader(file))\n", 282 | "test=test[1:]\n", 283 | "test_data=[preProcessing(t[0]) for t in test]\n", 284 | "test_label=[changePolToNum(t[1]) for t in test]\n", 285 | "test_vectors = vectorizer.transform(test_data)\n", 286 | "\n", 287 | "clf = svm.SVC()\n", 288 | "clf.fit(train_vectors, train_label)\n", 289 | "pred=clf.predict(test_vectors)\n", 290 | "print(\"DS1 svm acc \",accuracy_score(test_label, pred))\n", 291 | "\n", 292 | "\n", 293 | "#kernel svc\n", 294 | "clf = svm.SVC(kernel='linear')\n", 295 | "clf.fit(train_vectors, train_label)\n", 296 | "pred=clf.predict(test_vectors)\n", 297 | "print((\"DS1 svm kernel linear acc \",accuracy_score(test_label, pred)))\n", 298 | " \n", 299 | "#naive bayes\n", 300 | "clf = GaussianNB()\n", 301 | "clf.fit(train_vectors.toarray(), train_label)\n", 302 | "pred=clf.predict(test_vectors.toarray())\n", 303 | "print(\"DS1 naive \",accuracy_score(test_label, pred)) \n", 304 | " \n", 305 | "#multinomial naive bayes\n", 306 | "clf = MultinomialNB()\n", 307 | "clf.fit(train_vectors.toarray(), train_label)\n", 308 | "pred=clf.predict(test_vectors.toarray())\n", 309 | "print(\"DS1 multinomianl naive \",accuracy_score(test_label, pred))\n", 310 | "\n", 311 | " \n" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": { 317 | "collapsed": true 318 | }, 319 | "source": [ 320 | "# Data set 2" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "# TfidfVectorizer" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 81, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "name": "stdout", 337 | "output_type": "stream", 338 | "text": [ 339 | "DS2 svm acc 0.333821376281\n", 340 | "DS2 svm kernel linear acc 0.541727672035\n", 341 | "DS2 naive 0.469985358712\n", 342 | "DS2 multinomianl naive 0.544655929722\n" 343 | ] 344 | } 345 | ], 346 | "source": [ 347 | "with open(\"DS2.csv\", 'r') as file:\n", 348 | " train = list(csv.reader(file))\n", 349 | "train=train[1:]\n", 350 | "train_data=[preProcessing(t[0]) for t in train]\n", 351 | "train_label=[changePolToNum(t[1]) for t in train]\n", 352 | "vectorizer = TfidfVectorizer()\n", 353 | "train_vectors = vectorizer.fit_transform(train_data)\n", 354 | "\n", 355 | "with open(\"testData.csv\", 'r') as file:\n", 356 | " test = list(csv.reader(file))\n", 357 | "test=test[1:]\n", 358 | "test_data=[preProcessing(t[0]) for t in test]\n", 359 | "test_label=[changePolToNum(t[1]) for t in test]\n", 360 | "test_vectors = vectorizer.transform(test_data)\n", 361 | "\n", 362 | "clf = svm.SVC()\n", 363 | "clf.fit(train_vectors, train_label)\n", 364 | "pred=clf.predict(test_vectors)\n", 365 | "print(\"DS2 svm acc \",accuracy_score(test_label, pred))\n", 366 | "\n", 367 | "\n", 368 | "#kernel svc\n", 369 | "clf = svm.SVC(kernel='linear')\n", 370 | "clf.fit(train_vectors, train_label)\n", 371 | "pred=clf.predict(test_vectors)\n", 372 | "print(\"DS2 svm kernel linear acc \",accuracy_score(test_label, pred))\n", 373 | " \n", 374 | "#naive bayes\n", 375 | "clf = GaussianNB()\n", 376 | "clf.fit(train_vectors.toarray(), train_label)\n", 377 | "pred=clf.predict(test_vectors.toarray())\n", 378 | "print(\"DS2 naive \",accuracy_score(test_label, pred)) \n", 379 | " \n", 380 | "#multinomial naive bayes\n", 381 | "clf = MultinomialNB()\n", 382 | "clf.fit(train_vectors.toarray(), train_label)\n", 383 | "pred=clf.predict(test_vectors.toarray())\n", 384 | "print(\"DS2 multinomianl naive \",accuracy_score(test_label, pred))\n", 385 | "\n", 386 | " \n" 387 | ] 388 | }, 389 | { 390 | "cell_type": "markdown", 391 | "metadata": {}, 392 | "source": [ 393 | "# CountVectorizer" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 82, 399 | "metadata": {}, 400 | "outputs": [ 401 | { 402 | "name": "stdout", 403 | "output_type": "stream", 404 | "text": [ 405 | "DS1 svm acc 0.333821376281\n", 406 | "('DS1 svm kernel linear acc ', 0.53440702781844807)\n", 407 | "DS1 naive 0.474377745242\n", 408 | "DS1 multinomianl naive 0.538799414348\n" 409 | ] 410 | } 411 | ], 412 | "source": [ 413 | "with open(\"DS2.csv\", 'r') as file:\n", 414 | " train = list(csv.reader(file))\n", 415 | "train=train[1:]\n", 416 | "train_data=[preProcessing(t[0]) for t in train]\n", 417 | "train_label=[changePolToNum(t[1]) for t in train]\n", 418 | "vectorizer = CountVectorizer()\n", 419 | "train_vectors = vectorizer.fit_transform(train_data)\n", 420 | "\n", 421 | "with open(\"testData.csv\", 'r') as file:\n", 422 | " test = list(csv.reader(file))\n", 423 | "test=test[1:]\n", 424 | "test_data=[preProcessing(t[0]) for t in test]\n", 425 | "test_label=[changePolToNum(t[1]) for t in test]\n", 426 | "test_vectors = vectorizer.transform(test_data)\n", 427 | "\n", 428 | "clf = svm.SVC()\n", 429 | "clf.fit(train_vectors, train_label)\n", 430 | "pred=clf.predict(test_vectors)\n", 431 | "print(\"DS1 svm acc \",accuracy_score(test_label, pred))\n", 432 | "\n", 433 | "\n", 434 | "#kernel svc\n", 435 | "clf = svm.SVC(kernel='linear')\n", 436 | "clf.fit(train_vectors, train_label)\n", 437 | "pred=clf.predict(test_vectors)\n", 438 | "print((\"DS1 svm kernel linear acc \",accuracy_score(test_label, pred)))\n", 439 | " \n", 440 | "#naive bayes\n", 441 | "clf = GaussianNB()\n", 442 | "clf.fit(train_vectors.toarray(), train_label)\n", 443 | "pred=clf.predict(test_vectors.toarray())\n", 444 | "print(\"DS1 naive \",accuracy_score(test_label, pred)) \n", 445 | " \n", 446 | "#multinomial naive bayes\n", 447 | "clf = MultinomialNB()\n", 448 | "clf.fit(train_vectors.toarray(), train_label)\n", 449 | "pred=clf.predict(test_vectors.toarray())\n", 450 | "print(\"DS1 multinomianl naive \",accuracy_score(test_label, pred))\n", 451 | "\n", 452 | " \n" 453 | ] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": {}, 458 | "source": [ 459 | "# Data set 3" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "metadata": {}, 465 | "source": [ 466 | "# TfidfVectorizer" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 83, 472 | "metadata": {}, 473 | "outputs": [ 474 | { 475 | "name": "stdout", 476 | "output_type": "stream", 477 | "text": [ 478 | "DS3 svm acc 0.385065885798\n", 479 | "DS3 svm kernel linear acc 0.516837481698\n", 480 | "DS3 naive 0.433382137628\n", 481 | "DS3 multinomianl naive 0.537335285505\n" 482 | ] 483 | } 484 | ], 485 | "source": [ 486 | "with open(\"DS3.csv\", 'r') as file:\n", 487 | " train = list(csv.reader(file))\n", 488 | "train=train[1:]\n", 489 | "train_data=[preProcessing(t[0]) for t in train]\n", 490 | "train_label=[changePolToNum(t[1]) for t in train]\n", 491 | "vectorizer = TfidfVectorizer()\n", 492 | "train_vectors = vectorizer.fit_transform(train_data)\n", 493 | "\n", 494 | "with open(\"testData.csv\", 'r') as file:\n", 495 | " test = list(csv.reader(file))\n", 496 | "test=test[1:]\n", 497 | "test_data=[preProcessing(t[0]) for t in test]\n", 498 | "test_label=[changePolToNum(t[1]) for t in test]\n", 499 | "test_vectors = vectorizer.transform(test_data)\n", 500 | "\n", 501 | "clf = svm.SVC()\n", 502 | "clf.fit(train_vectors, train_label)\n", 503 | "pred=clf.predict(test_vectors)\n", 504 | "print(\"DS3 svm acc \",accuracy_score(test_label, pred))\n", 505 | "\n", 506 | "\n", 507 | "#kernel svc\n", 508 | "clf = svm.SVC(kernel='linear')\n", 509 | "clf.fit(train_vectors, train_label)\n", 510 | "pred=clf.predict(test_vectors)\n", 511 | "print(\"DS3 svm kernel linear acc \",accuracy_score(test_label, pred))\n", 512 | " \n", 513 | "#naive bayes\n", 514 | "clf = GaussianNB()\n", 515 | "clf.fit(train_vectors.toarray(), train_label)\n", 516 | "pred=clf.predict(test_vectors.toarray())\n", 517 | "print(\"DS3 naive \",accuracy_score(test_label, pred)) \n", 518 | " \n", 519 | "#multinomial naive bayes\n", 520 | "clf = MultinomialNB()\n", 521 | "clf.fit(train_vectors.toarray(), train_label)\n", 522 | "pred=clf.predict(test_vectors.toarray())\n", 523 | "print(\"DS3 multinomianl naive \",accuracy_score(test_label, pred))\n", 524 | "\n", 525 | " \n" 526 | ] 527 | }, 528 | { 529 | "cell_type": "markdown", 530 | "metadata": {}, 531 | "source": [ 532 | "# CountVectorizer" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 84, 538 | "metadata": {}, 539 | "outputs": [ 540 | { 541 | "name": "stdout", 542 | "output_type": "stream", 543 | "text": [ 544 | "DS3 svm acc 0.385065885798\n", 545 | "DS3 svm kernel linear acc 0.481698389458\n", 546 | "DS3 naive 0.440702781845\n", 547 | "DS3 multinomianl naive 0.551976573939\n" 548 | ] 549 | } 550 | ], 551 | "source": [ 552 | "with open(\"DS3.csv\", 'r') as file:\n", 553 | " train = list(csv.reader(file))\n", 554 | "train=train[1:]\n", 555 | "train_data=[preProcessing(t[0]) for t in train]\n", 556 | "train_label=[changePolToNum(t[1]) for t in train]\n", 557 | "vectorizer = CountVectorizer()\n", 558 | "train_vectors = vectorizer.fit_transform(train_data)\n", 559 | "\n", 560 | "with open(\"testData.csv\", 'r') as file:\n", 561 | " test = list(csv.reader(file))\n", 562 | "test=test[1:]\n", 563 | "test_data=[preProcessing(t[0]) for t in test]\n", 564 | "test_label=[changePolToNum(t[1]) for t in test]\n", 565 | "test_vectors = vectorizer.transform(test_data)\n", 566 | "\n", 567 | "clf = svm.SVC()\n", 568 | "clf.fit(train_vectors, train_label)\n", 569 | "pred=clf.predict(test_vectors)\n", 570 | "print(\"DS3 svm acc \",accuracy_score(test_label, pred))\n", 571 | "\n", 572 | "\n", 573 | "#kernel svc\n", 574 | "clf = svm.SVC(kernel='linear')\n", 575 | "clf.fit(train_vectors, train_label)\n", 576 | "pred=clf.predict(test_vectors)\n", 577 | "print(\"DS3 svm kernel linear acc \",accuracy_score(test_label, pred))\n", 578 | " \n", 579 | "#naive bayes\n", 580 | "clf = GaussianNB()\n", 581 | "clf.fit(train_vectors.toarray(), train_label)\n", 582 | "pred=clf.predict(test_vectors.toarray())\n", 583 | "print(\"DS3 naive \",accuracy_score(test_label, pred)) \n", 584 | " \n", 585 | "#multinomial naive bayes\n", 586 | "clf = MultinomialNB()\n", 587 | "clf.fit(train_vectors.toarray(), train_label)\n", 588 | "pred=clf.predict(test_vectors.toarray())\n", 589 | "print(\"DS3 multinomianl naive \",accuracy_score(test_label, pred))\n", 590 | "\n", 591 | " \n" 592 | ] 593 | } 594 | ], 595 | "metadata": { 596 | "kernelspec": { 597 | "display_name": "Python 3", 598 | "language": "python", 599 | "name": "python3" 600 | }, 601 | "language_info": { 602 | "codemirror_mode": { 603 | "name": "ipython", 604 | "version": 3 605 | }, 606 | "file_extension": ".py", 607 | "mimetype": "text/x-python", 608 | "name": "python", 609 | "nbconvert_exporter": "python", 610 | "pygments_lexer": "ipython3", 611 | "version": "3.6.2" 612 | } 613 | }, 614 | "nbformat": 4, 615 | "nbformat_minor": 2 616 | } 617 | --------------------------------------------------------------------------------