├── .gitignore └── sentiment.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | trainingandtestdata 2 | -------------------------------------------------------------------------------- /sentiment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# A simple sentiment prototype" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import os # manipulate paths\n", 19 | "import pandas as pd # SQL-like operations and convenience functions\n", 20 | "import joblib # save and load models" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "Download the Sentiment140 data from [their website](http://help.sentiment140.com/for-students) and set `DATA_DIR` to the directory in which you have put the `CSV` files." 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "DATA_DIR = \"trainingandtestdata\"\n", 39 | "training_csv_file = os.path.join(DATA_DIR, 'training.1600000.processed.noemoticon.csv')" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "## A peek at the data" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "names = ('polarity', 'id', 'date', 'query', 'author', 'text')\n", 58 | "df = pd.read_csv(training_csv_file, encoding='latin1', names=names)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/html": [ 71 | "
\n", 72 | "\n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | "
polarityiddatequeryauthortext
001467810369Mon Apr 06 22:19:45 PDT 2009NO_QUERY_TheSpecialOne_@switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer. You shoulda got David Carr of Third Day to do it. ;D
101467810672Mon Apr 06 22:19:49 PDT 2009NO_QUERYscotthamiltonis upset that he can't update his Facebook by texting it... and might cry as a result School today also. Blah!
201467810917Mon Apr 06 22:19:53 PDT 2009NO_QUERYmattycus@Kenichan I dived many times for the ball. Managed to save 50% The rest go out of bounds
301467811184Mon Apr 06 22:19:57 PDT 2009NO_QUERYElleCTFmy whole body feels itchy and like its on fire
401467811193Mon Apr 06 22:19:57 PDT 2009NO_QUERYKaroli@nationwideclass no, it's not behaving at all. i'm mad. why am i here? because I can't see you all over there.
\n", 132 | "
" 133 | ], 134 | "text/plain": [ 135 | " polarity id date query \\\n", 136 | "0 0 1467810369 Mon Apr 06 22:19:45 PDT 2009 NO_QUERY \n", 137 | "1 0 1467810672 Mon Apr 06 22:19:49 PDT 2009 NO_QUERY \n", 138 | "2 0 1467810917 Mon Apr 06 22:19:53 PDT 2009 NO_QUERY \n", 139 | "3 0 1467811184 Mon Apr 06 22:19:57 PDT 2009 NO_QUERY \n", 140 | "4 0 1467811193 Mon Apr 06 22:19:57 PDT 2009 NO_QUERY \n", 141 | "\n", 142 | " author \\\n", 143 | "0 _TheSpecialOne_ \n", 144 | "1 scotthamilton \n", 145 | "2 mattycus \n", 146 | "3 ElleCTF \n", 147 | "4 Karoli \n", 148 | "\n", 149 | " text \n", 150 | "0 @switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer. You shoulda got David Carr of Third Day to do it. ;D \n", 151 | "1 is upset that he can't update his Facebook by texting it... and might cry as a result School today also. Blah! \n", 152 | "2 @Kenichan I dived many times for the ball. Managed to save 50% The rest go out of bounds \n", 153 | "3 my whole body feels itchy and like its on fire \n", 154 | "4 @nationwideclass no, it's not behaving at all. i'm mad. why am i here? because I can't see you all over there. " 155 | ] 156 | }, 157 | "execution_count": 4, 158 | "metadata": {}, 159 | "output_type": "execute_result" 160 | } 161 | ], 162 | "source": [ 163 | "pd.options.display.max_colwidth = 140 # allow wide columns\n", 164 | "df.head() # show first 5 rows" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 5, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/html": [ 177 | "
\n", 178 | "\n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | "
polarityiddatequeryauthortext
159999542193601966Tue Jun 16 08:40:49 PDT 2009NO_QUERYAmandaMarie1028Just woke up. Having no school is the best feeling ever
159999642193601969Tue Jun 16 08:40:49 PDT 2009NO_QUERYTheWDBoardsTheWDB.com - Very cool to hear old Walt interviews! ♫ http://blip.fm/~8bmta
159999742193601991Tue Jun 16 08:40:49 PDT 2009NO_QUERYbpbabeAre you ready for your MoJo Makeover? Ask me for details
159999842193602064Tue Jun 16 08:40:49 PDT 2009NO_QUERYtinydiamondzHappy 38th Birthday to my boo of alll time!!! Tupac Amaru Shakur
159999942193602129Tue Jun 16 08:40:50 PDT 2009NO_QUERYRyanTrevMorrishappy #charitytuesday @theNSPCC @SparksCharity @SpeakingUpH4H
\n", 238 | "
" 239 | ], 240 | "text/plain": [ 241 | " polarity id date query \\\n", 242 | "1599995 4 2193601966 Tue Jun 16 08:40:49 PDT 2009 NO_QUERY \n", 243 | "1599996 4 2193601969 Tue Jun 16 08:40:49 PDT 2009 NO_QUERY \n", 244 | "1599997 4 2193601991 Tue Jun 16 08:40:49 PDT 2009 NO_QUERY \n", 245 | "1599998 4 2193602064 Tue Jun 16 08:40:49 PDT 2009 NO_QUERY \n", 246 | "1599999 4 2193602129 Tue Jun 16 08:40:50 PDT 2009 NO_QUERY \n", 247 | "\n", 248 | " author \\\n", 249 | "1599995 AmandaMarie1028 \n", 250 | "1599996 TheWDBoards \n", 251 | "1599997 bpbabe \n", 252 | "1599998 tinydiamondz \n", 253 | "1599999 RyanTrevMorris \n", 254 | "\n", 255 | " text \n", 256 | "1599995 Just woke up. Having no school is the best feeling ever \n", 257 | "1599996 TheWDB.com - Very cool to hear old Walt interviews! ♫ http://blip.fm/~8bmta \n", 258 | "1599997 Are you ready for your MoJo Makeover? Ask me for details \n", 259 | "1599998 Happy 38th Birthday to my boo of alll time!!! Tupac Amaru Shakur \n", 260 | "1599999 happy #charitytuesday @theNSPCC @SparksCharity @SpeakingUpH4H " 261 | ] 262 | }, 263 | "execution_count": 5, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "df.tail()" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 6, 275 | "metadata": { 276 | "collapsed": false 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "df['polarity'].replace({0: -1, 4: 1}, inplace=True)\n", 281 | "text = df['text']\n", 282 | "target = df['polarity'].values" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 7, 288 | "metadata": { 289 | "collapsed": false 290 | }, 291 | "outputs": [ 292 | { 293 | "name": "stdout", 294 | "output_type": "stream", 295 | "text": [ 296 | "1600000 1600000\n" 297 | ] 298 | } 299 | ], 300 | "source": [ 301 | "print(len(target), len(text))" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "## Train the model\n", 309 | "\n", 310 | "Set 20% of the data aside to test the trained model" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 8, 316 | "metadata": { 317 | "collapsed": false 318 | }, 319 | "outputs": [], 320 | "source": [ 321 | "from sklearn.cross_validation import train_test_split\n", 322 | "\n", 323 | "text_train, text_validation, target_train, target_validation = (\n", 324 | " train_test_split(text, target, test_size=0.2, random_state=42)\n", 325 | ")" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "Build a pipeline" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 9, 338 | "metadata": { 339 | "collapsed": false 340 | }, 341 | "outputs": [], 342 | "source": [ 343 | "from sklearn.feature_extraction.text import CountVectorizer\n", 344 | "from sklearn.feature_selection import SelectKBest, chi2\n", 345 | "from sklearn.linear_model import LogisticRegressionCV\n", 346 | "from sklearn.pipeline import Pipeline\n", 347 | "\n", 348 | "vectorizer = CountVectorizer(ngram_range=(1, 2), max_features=100000)\n", 349 | "feature_selector = SelectKBest(chi2, k=5000)\n", 350 | "classifier = LogisticRegressionCV(n_jobs=4)" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "This next cell took ~3 minutes to run on my machine" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 10, 363 | "metadata": { 364 | "collapsed": false 365 | }, 366 | "outputs": [], 367 | "source": [ 368 | "if os.path.exists('model.pkl'):\n", 369 | " sentiment_pipeline = joblib.load('model.pkl')\n", 370 | "else:\n", 371 | " sentiment_pipeline = Pipeline((\n", 372 | " ('v', vectorizer),\n", 373 | " ('f', feature_selector),\n", 374 | " ('c', classifier)\n", 375 | " ))\n", 376 | " sentiment_pipeline.fit(text_train, target_train)\n", 377 | " joblib.dump(sentiment_pipeline, 'model.pkl');" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": { 383 | "collapsed": false 384 | }, 385 | "source": [ 386 | "## Test the model" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 11, 392 | "metadata": { 393 | "collapsed": false 394 | }, 395 | "outputs": [ 396 | { 397 | "name": "stdout", 398 | "output_type": "stream", 399 | "text": [ 400 | "[-1 1 -1 1 -1]\n" 401 | ] 402 | } 403 | ], 404 | "source": [ 405 | "print(sentiment_pipeline.predict(['bad', 'good', \"didnt like\", \"today was a good day\", \"i hate this product\"]))" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 12, 411 | "metadata": { 412 | "collapsed": false 413 | }, 414 | "outputs": [ 415 | { 416 | "name": "stdout", 417 | "output_type": "stream", 418 | "text": [ 419 | "1 -1 \t @chrishasboobs AHHH I HOPE YOUR OK!!! \n", 420 | "1 -1 \t @misstoriblack cool , i have no tweet apps for my razr 2\n", 421 | "1 -1 \t @TiannaChaos i know just family drama. its lame.hey next time u hang out with kim n u guys like have a sleepover or whatever, ill call u\n", 422 | "-1 -1 \t School email won't open and I have geography stuff on there to revise! *Stupid School* :'(\n", 423 | "1 -1 \t upper airways problem \n", 424 | "-1 -1 \t Going to miss Pastor's sermon on Faith... \n", 425 | "1 1 \t on lunch....dj should come eat with me \n", 426 | "-1 -1 \t @piginthepoke oh why are you feeling like that? \n", 427 | "-1 -1 \t gahh noo!peyton needs to live!this is horrible \n", 428 | "1 1 \t @mrstessyman thank you glad you like it! There is a product review bit on the site Enjoy knitting it!\n" 429 | ] 430 | } 431 | ], 432 | "source": [ 433 | "for text, target in zip(text_validation[:10], target_validation[:10]):\n", 434 | " print(sentiment_pipeline.predict([text])[0], target, '\\t', text)" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": 13, 440 | "metadata": { 441 | "collapsed": false 442 | }, 443 | "outputs": [ 444 | { 445 | "data": { 446 | "text/plain": [ 447 | "0.79942812500000004" 448 | ] 449 | }, 450 | "execution_count": 13, 451 | "metadata": {}, 452 | "output_type": "execute_result" 453 | } 454 | ], 455 | "source": [ 456 | "sentiment_pipeline.score(text_validation, target_validation)" 457 | ] 458 | }, 459 | { 460 | "cell_type": "markdown", 461 | "metadata": {}, 462 | "source": [ 463 | "## What did the model learn?" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 14, 469 | "metadata": { 470 | "collapsed": false 471 | }, 472 | "outputs": [], 473 | "source": [ 474 | "feature_names = sentiment_pipeline.steps[0][1].get_feature_names()\n", 475 | "feature_names = [feature_names[i] for i in \n", 476 | " sentiment_pipeline.steps[1][1].get_support(indices=True)]\n", 477 | "\n", 478 | "def show_most_informative_features(feature_names, clf, n=1000):\n", 479 | " coefs_with_fns = sorted(zip(clf.coef_[0], feature_names))\n", 480 | " top = zip(coefs_with_fns[:n], coefs_with_fns[:-(n + 1):-1])\n", 481 | " for (coef_1, fn_1), (coef_2, fn_2) in top:\n", 482 | " print(\"\\t%.4f\\t%-15s\\t\\t%.4f\\t%-15s\" % (coef_1, fn_1, coef_2, fn_2))" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 15, 488 | "metadata": { 489 | "collapsed": false 490 | }, 491 | "outputs": [ 492 | { 493 | "name": "stdout", 494 | "output_type": "stream", 495 | "text": [ 496 | "\t-3.4628\tnot happy \t\t2.6750\tno problem \n", 497 | "\t-3.2794\tclean me \t\t2.6244\tno worries \n", 498 | "\t-2.9125\tnot looking \t\t2.5657\tcannot wait \n", 499 | "\t-2.9124\tinaperfectworld\t\t2.3216\tcant wait \n", 500 | "\t-2.9110\tsad \t\t2.2859\tno prob \n", 501 | "\t-2.8527\tpassed away \t\t2.2517\tsmiling \n", 502 | "\t-2.6970\tsadly \t\t2.1924\tnothing wrong \n", 503 | "\t-2.6421\tnot nice \t\t2.1093\tnot bad \n", 504 | "\t-2.6333\tgutted \t\t2.0198\tsad sad \n", 505 | "\t-2.6265\tnot cool \t\t1.9122\tcongratulations\n", 506 | "\t-2.6044\tno luck \t\t1.7787\tfuzzball \n", 507 | "\t-2.6044\tdisappointing \t\t1.7751\tno probs \n", 508 | "\t-2.4926\theartbreaking \t\t1.7722\twelcome \n", 509 | "\t-2.4771\tsadd \t\t1.7376\tmusicmonday \n", 510 | "\t-2.4683\theartbroken \t\t1.6985\thate hate \n", 511 | "\t-2.4286\tboohoo \t\t1.6670\tyayyy \n", 512 | "\t-2.4184\trip \t\t1.6627\tsmile \n", 513 | "\t-2.4156\tnot fun \t\t1.6291\twoooo \n", 514 | "\t-2.4029\tpoor \t\t1.6259\tthankyou \n", 515 | "\t-2.3998\tdontyouhate \t\t1.5961\tjust sayin \n", 516 | "\t-2.3856\tbummer \t\t1.5958\tsmiles \n", 517 | "\t-2.3646\tdepressed \t\t1.5762\tproud \n", 518 | "\t-2.3349\tbummed \t\t1.5680\theheh \n", 519 | "\t-2.3247\tcancelled \t\t1.5599\tfollowfriday \n", 520 | "\t-2.3185\tunfortunately \t\t1.5565\tlaughter \n", 521 | "\t-2.3119\tnot excited \t\t1.5494\tthanks \n", 522 | "\t-2.3048\tdepressing \t\t1.5486\tamusing \n", 523 | "\t-2.2977\tupsetting \t\t1.5291\tmy pleasure \n", 524 | "\t-2.2942\theadache \t\t1.5291\tgracias \n", 525 | "\t-2.2716\tnooooo \t\t1.5015\tyayy \n", 526 | "\t-2.2643\tpakcricket \t\t1.4982\tgoodsex \n", 527 | "\t-2.2612\tfuneral \t\t1.4792\tfeel free \n", 528 | "\t-2.2482\tdevastated \t\t1.4786\tgiggle \n", 529 | "\t-2.2342\tboooo \t\t1.4735\thehehe \n", 530 | "\t-2.2308\thurts \t\t1.4685\tthumbs up \n", 531 | "\t-2.2298\tair france \t\t1.4580\tgrin \n", 532 | "\t-2.2223\truined \t\t1.4309\tblessed \n", 533 | "\t-2.2030\tlet down \t\t1.4294\tcan wait \n", 534 | "\t-2.1970\tmisses \t\t1.4087\thihi \n", 535 | "\t-2.1958\tnoooooo \t\t1.4024\tthank \n", 536 | "\t-2.1841\ttaking forever \t\t1.3995\tyummm \n", 537 | "\t-2.1810\tmissin \t\t1.3947\tyey \n", 538 | "\t-2.1793\tpoorly \t\t1.3946\tcongrats \n", 539 | "\t-2.1780\thomesick \t\t1.3920\twoohoo \n", 540 | "\t-2.1776\tbooooo \t\t1.3889\tblessings \n", 541 | "\t-2.1724\tnot good \t\t1.3815\thonored \n", 542 | "\t-2.1713\tfed up \t\t1.3811\tcrack me \n", 543 | "\t-2.1649\ttoothache \t\t1.3810\twooooo \n", 544 | "\t-2.1486\tdissapointed \t\t1.3710\texcellent \n", 545 | "\t-2.1344\tftl \t\t1.3682\tfeels good \n", 546 | "\t-2.1287\tdisappointed \t\t1.3526\twhoo \n", 547 | "\t-2.1198\tnot enjoying \t\t1.3477\theey \n", 548 | "\t-2.1184\tsaddest \t\t1.3444\tfor following \n", 549 | "\t-2.1134\twhat wrong \t\t1.3400\tsocial media \n", 550 | "\t-2.1131\tso mean \t\t1.3279\theehee \n", 551 | "\t-2.1052\tunhappy \t\t1.3276\tthx \n", 552 | "\t-2.0992\tunfair \t\t1.3275\tteehee \n", 553 | "\t-2.0963\tmissing \t\t1.3240\tdon forget \n", 554 | "\t-2.0849\tsick \t\t1.3150\tpleasure \n", 555 | "\t-2.0778\tsaddened \t\t1.3132\tcheers \n", 556 | "\t-2.0753\tmeanie \t\t1.3095\ttnx \n", 557 | "\t-2.0658\tfarrah \t\t1.2997\theya \n", 558 | "\t-2.0646\tsadness \t\t1.2949\tsweetest \n", 559 | "\t-2.0491\tbooo \t\t1.2829\thowdy \n", 560 | "\t-2.0476\thuhuhu \t\t1.2727\tawesomeness \n", 561 | "\t-2.0465\tdied \t\t1.2688\tappreciated \n", 562 | "\t-2.0284\tnever reply \t\t1.2670\treward \n", 563 | "\t-2.0279\tnot fair \t\t1.2670\tloving \n", 564 | "\t-2.0278\tcondolences \t\t1.2657\tpeaceful \n", 565 | "\t-2.0121\tsadface \t\t1.2645\tglad \n", 566 | "\t-2.0119\tsucks \t\t1.2506\tstay tuned \n", 567 | "\t-2.0107\tcries \t\t1.2493\trelaxed \n", 568 | "\t-2.0044\twouldn let \t\t1.2486\tnice work \n", 569 | "\t-2.0039\tmigraine \t\t1.2422\ttommcfly http \n", 570 | "\t-1.9938\tdreading \t\t1.2394\talike \n", 571 | "\t-1.9892\thates \t\t1.2366\thehe \n", 572 | "\t-1.9805\tlonely \t\t1.2365\thooray \n", 573 | "\t-1.9644\tno fun \t\t1.2337\texcited \n", 574 | "\t-1.9478\tnot funny \t\t1.2332\tsad just \n", 575 | "\t-1.9455\tnot feeling \t\t1.2325\tfound you \n", 576 | "\t-1.9449\tcanceled \t\t1.2285\theh \n", 577 | "\t-1.9410\twon let \t\t1.2267\tblessing \n", 578 | "\t-1.9253\tnoooooooooo \t\t1.2233\tstoked \n", 579 | "\t-1.9144\ttragic \t\t1.2158\tyaay \n", 580 | "\t-1.9073\tfathers day \t\t1.2130\tfabulous \n", 581 | "\t-1.9018\tisn good \t\t1.2127\tgoood \n", 582 | "\t-1.9017\tdrop by \t\t1.2110\tsad but \n", 583 | "\t-1.8982\twon work \t\t1.2010\ttoo funny \n", 584 | "\t-1.8905\thappy father \t\t1.1997\tugh hate \n", 585 | "\t-1.8784\tcramps \t\t1.1996\tftw \n", 586 | "\t-1.8782\tsold out \t\t1.1994\tyaaay \n", 587 | "\t-1.8722\tmiss \t\t1.1990\thappy \n", 588 | "\t-1.8709\tinjured \t\t1.1939\tnp \n", 589 | "\t-1.8683\tgood bye \t\t1.1890\tgoooood \n", 590 | "\t-1.8656\tunfortunate \t\t1.1853\twhy thank \n", 591 | "\t-1.8655\tiran \t\t1.1853\twoot \n", 592 | "\t-1.8644\tate all \t\t1.1847\tthankful \n", 593 | "\t-1.8603\tupset \t\t1.1839\tyay \n", 594 | "\t-1.8535\tunloved \t\t1.1838\tlearning how \n", 595 | "\t-1.8508\tjury duty \t\t1.1798\tdont forget \n", 596 | "\t-1.8486\tbreaks my \t\t1.1762\tadorable \n", 597 | "\t-1.8436\tno new \t\t1.1711\tyummy \n", 598 | "\t-1.8378\tsux \t\t1.1674\tgrateful \n", 599 | "\t-1.8346\tbroke \t\t1.1577\thee \n", 600 | "\t-1.8344\tcrying \t\t1.1564\tyumm \n", 601 | "\t-1.8307\tneda \t\t1.1521\tthanx \n", 602 | "\t-1.8211\tnot liking \t\t1.1422\thello \n", 603 | "\t-1.8092\tnot found \t\t1.1393\tgod is \n", 604 | "\t-1.8071\tnot coming \t\t1.1370\tnicely \n", 605 | "\t-1.8069\tcrashes \t\t1.1367\tgoodnight \n", 606 | "\t-1.8017\tstill no \t\t1.1294\tlovin \n", 607 | "\t-1.7991\thorrible \t\t1.1248\tquote \n", 608 | "\t-1.7977\twont let \t\t1.1238\twonderful \n", 609 | "\t-1.7931\tnot feelin \t\t1.1224\trocks \n", 610 | "\t-1.7902\twhy won \t\t1.1196\tsick but \n", 611 | "\t-1.7810\tno good \t\t1.1174\tawesome \n", 612 | "\t-1.7777\thiccups \t\t1.1163\tkindly \n", 613 | "\t-1.7744\tcoughing \t\t1.1138\tgooood \n", 614 | "\t-1.7707\tfeel well \t\t1.1127\tgotta love \n", 615 | "\t-1.7699\talas \t\t1.1048\tinspired \n", 616 | "\t-1.7675\thayfever \t\t1.1039\thow sweet \n", 617 | "\t-1.7651\tthrowing up \t\t1.1033\tsure thing \n", 618 | "\t-1.7627\thappy fathers \t\t1.1033\tbed night \n", 619 | "\t-1.7620\tbnp \t\t1.0981\tgoodluck \n", 620 | "\t-1.7545\tnoooo \t\t1.0960\thandy \n", 621 | "\t-1.7483\tnooooooo \t\t1.0896\tyum \n", 622 | "\t-1.7462\tdownside \t\t1.0884\twe come \n", 623 | "\t-1.7451\tnowhere to \t\t1.0882\tdon mind \n", 624 | "\t-1.7428\tunfortunatly \t\t1.0875\tbom dia \n", 625 | "\t-1.7419\twishin \t\t1.0772\thilarious \n", 626 | "\t-1.7418\twhy must \t\t1.0713\tbe sad \n", 627 | "\t-1.7414\tthere goes \t\t1.0621\tyessss \n", 628 | "\t-1.7411\twhyyyy \t\t1.0602\tpraise \n", 629 | "\t-1.7394\twhats wrong \t\t1.0596\thugh \n", 630 | "\t-1.7342\tdidn win \t\t1.0503\tenjoying \n", 631 | "\t-1.7320\tnever got \t\t1.0500\tabout time \n", 632 | "\t-1.7300\tsadder \t\t1.0498\tpleased \n", 633 | "\t-1.7286\twon come \t\t1.0462\tat last \n", 634 | "\t-1.7226\tawful \t\t1.0451\trelaxing \n", 635 | "\t-1.7158\tfailed \t\t1.0433\tw00t \n", 636 | "\t-1.7142\tstinks \t\t1.0407\tfunniest \n", 637 | "\t-1.7112\tughhh \t\t1.0399\tgo girl \n", 638 | "\t-1.7044\tughhhh \t\t1.0365\topinion \n", 639 | "\t-1.7043\touchie \t\t1.0329\tlaughing \n", 640 | "\t-1.7031\tfrustrated \t\t1.0308\tyou want \n", 641 | "\t-1.6914\thurting \t\t1.0285\ttetris \n", 642 | "\t-1.6889\tboooooo \t\t1.0215\tknew you \n", 643 | "\t-1.6819\tbroken \t\t1.0143\tcute \n", 644 | "\t-1.6807\tstruggling \t\t1.0130\thola \n", 645 | "\t-1.6773\tbad luck \t\t1.0125\tpositive \n", 646 | "\t-1.6762\tshame \t\t1.0092\tcolorblindfish \n", 647 | "\t-1.6724\tsummer school \t\t1.0091\thiya \n", 648 | "\t-1.6713\thmph \t\t1.0075\tfantastic \n", 649 | "\t-1.6679\tfml \t\t1.0065\tre good \n", 650 | "\t-1.6664\tugh \t\t1.0054\tshine \n", 651 | "\t-1.6660\trainin \t\t1.0046\tcome join \n", 652 | "\t-1.6660\tdisabled \t\t1.0037\tgreetings \n", 653 | "\t-1.6639\tlaid off \t\t1.0035\tamazing \n", 654 | "\t-1.6604\tdisappointment \t\t1.0033\taww thanks \n", 655 | "\t-1.6586\tis down \t\t1.0015\tbrilliant \n", 656 | "\t-1.6524\tfeeling ill \t\t1.0001\trealhughjackman\n", 657 | "\t-1.6439\tpity \t\t0.9999\tpumped \n", 658 | "\t-1.6387\tterrible \t\t0.9986\tgoodmorning \n", 659 | "\t-1.6372\tyucky \t\t0.9980\tthe poor \n", 660 | "\t-1.6360\twon see \t\t0.9963\tpower of \n", 661 | "\t-1.6326\tneglected \t\t0.9934\tmade my \n", 662 | "\t-1.6319\theartburn \t\t0.9892\tcool \n", 663 | "\t-1.6310\touch \t\t0.9884\tawsome \n", 664 | "\t-1.6304\tnot ready \t\t0.9882\tthanking \n", 665 | "\t-1.6298\tnot showing \t\t0.9870\tttyl \n", 666 | "\t-1.6268\tanswer me \t\t0.9856\tloves \n", 667 | "\t-1.6252\tnot very \t\t0.9851\twooo \n", 668 | "\t-1.6218\tignoring me \t\t0.9826\tdays until \n", 669 | "\t-1.6205\tstung \t\t0.9800\tbliss \n", 670 | "\t-1.6190\tdisappeared \t\t0.9800\tlike plan \n", 671 | "\t-1.6179\tneed hug \t\t0.9782\thandsome \n", 672 | "\t-1.6171\t3gs \t\t0.9779\tmultiply \n", 673 | "\t-1.6163\tmiserable \t\t0.9768\tenjoy \n", 674 | "\t-1.6092\tclosed \t\t0.9748\tfamiliar \n", 675 | "\t-1.6086\tache \t\t0.9724\tunique \n", 676 | "\t-1.6056\tinfection \t\t0.9723\tcurious \n", 677 | "\t-1.6049\tbronchitis \t\t0.9647\tit fun \n", 678 | "\t-1.6043\tnoes \t\t0.9637\twoo \n", 679 | "\t-1.5960\twithout me \t\t0.9627\tthnx \n", 680 | "\t-1.5950\tsucky \t\t0.9620\tglad could \n", 681 | "\t-1.5946\tstranded \t\t0.9564\tugh im \n", 682 | "\t-1.5928\tdisconnected \t\t0.9535\tcutest \n", 683 | "\t-1.5886\tfeel ill \t\t0.9521\thappiness \n", 684 | "\t-1.5856\thate \t\t0.9500\ttry it \n", 685 | "\t-1.5785\tthe er \t\t0.9489\tyou wanna \n", 686 | "\t-1.5742\tdown today \t\t0.9475\tadore \n", 687 | "\t-1.5730\tloosing \t\t0.9464\tjust wanted \n", 688 | "\t-1.5664\tran over \t\t0.9427\tsweet \n", 689 | "\t-1.5642\tbad times \t\t0.9425\tinspiring \n", 690 | "\t-1.5633\tmms \t\t0.9415\tprefer \n", 691 | "\t-1.5568\tbut but \t\t0.9397\tmmmm \n", 692 | "\t-1.5544\tgoodbyes \t\t0.9371\tgood work \n", 693 | "\t-1.5473\tgotta wait \t\t0.9363\tmyweakness \n", 694 | "\t-1.5438\tick \t\t0.9359\tluck \n", 695 | "\t-1.5396\tbed alone \t\t0.9332\tnew favorite \n", 696 | "\t-1.5373\tlosing \t\t0.9327\tbetter now \n", 697 | "\t-1.5360\tno fair \t\t0.9310\tlovely \n", 698 | "\t-1.5343\tnot well \t\t0.9282\taw thanks \n", 699 | "\t-1.5329\tcoursework \t\t0.9282\tcutie \n", 700 | "\t-1.5326\tan end \t\t0.9278\tsleep tight \n", 701 | "\t-1.5317\tscrewed \t\t0.9212\t17 again \n", 702 | "\t-1.5306\tnot available \t\t0.9189\trelax \n", 703 | "\t-1.5290\thaving trouble \t\t0.9166\tproductive \n", 704 | "\t-1.5226\trest in \t\t0.9163\tholla \n", 705 | "\t-1.5206\tfather day \t\t0.9150\theyyy \n", 706 | "\t-1.5198\tsaddens \t\t0.9115\tfunny \n", 707 | "\t-1.5198\targh \t\t0.9104\thelps \n", 708 | "\t-1.5182\tcrappy \t\t0.9103\tis fun \n", 709 | "\t-1.5177\ttwitter won \t\t0.9093\tback later \n", 710 | "\t-1.5155\tdammit \t\t0.9079\tahaha \n", 711 | "\t-1.5145\tgloomy \t\t0.9044\tbeauty \n", 712 | "\t-1.5111\tsuffering \t\t0.9027\tlove being \n", 713 | "\t-1.5094\tiranelection \t\t0.9027\teveryone should\n", 714 | "\t-1.5080\tworried \t\t0.8922\tbored bored \n", 715 | "\t-1.5058\tbut live \t\t0.8915\theyy \n", 716 | "\t-1.5047\tleaving me \t\t0.8865\tjust need \n", 717 | "\t-1.5046\tdangit \t\t0.8852\tya tomorrow \n", 718 | "\t-1.5038\tyou thank \t\t0.8835\tneat \n", 719 | "\t-1.5016\tspilled \t\t0.8834\tcompliment \n", 720 | "\t-1.5006\tnever answer \t\t0.8832\tmmmmm \n", 721 | "\t-1.5000\tnever see \t\t0.8816\tsooo excited \n", 722 | "\t-1.4992\tshitty \t\t0.8781\tgorgeous \n", 723 | "\t-1.4918\tsunburned \t\t0.8779\tiamdiddy \n", 724 | "\t-1.4899\tdamnit \t\t0.8765\tits great \n", 725 | "\t-1.4888\tto reality \t\t0.8718\tty \n", 726 | "\t-1.4884\tscratched \t\t0.8713\twas sick \n", 727 | "\t-1.4871\tgot cold \t\t0.8705\tthat cute \n", 728 | "\t-1.4843\tswollen \t\t0.8701\tyou might \n", 729 | "\t-1.4841\ttear \t\t0.8686\thahaha \n", 730 | "\t-1.4803\tnot playing \t\t0.8636\tlets \n", 731 | "\t-1.4756\tfail \t\t0.8604\tbom \n", 732 | "\t-1.4734\tisnt working \t\t0.8591\twww \n", 733 | "\t-1.4717\tstolen \t\t0.8573\tlovee \n", 734 | "\t-1.4706\tsome reason \t\t0.8565\tperfect \n", 735 | "\t-1.4673\tsicky \t\t0.8551\talways good \n", 736 | "\t-1.4667\tcried \t\t0.8543\tgood evening \n", 737 | "\t-1.4634\tregents \t\t0.8522\tnice \n", 738 | "\t-1.4620\ttears \t\t0.8509\tgreat news \n", 739 | "\t-1.4580\tshattered \t\t0.8495\tof course \n", 740 | "\t-1.4539\tfrustrating \t\t0.8492\tfly with \n", 741 | "\t-1.4533\texpired \t\t0.8460\tloves it \n", 742 | "\t-1.4530\tburnt my \t\t0.8415\tentertaining \n", 743 | "\t-1.4522\tbeing mean \t\t0.8328\tgiveaway \n", 744 | "\t-1.4521\tno1 \t\t0.8316\tgreat meeting \n", 745 | "\t-1.4501\tforgot my \t\t0.8311\tjoin me \n", 746 | "\t-1.4488\tiphone update \t\t0.8261\thurts like \n", 747 | "\t-1.4467\tnoooooooo \t\t0.8250\tfeedback \n", 748 | "\t-1.4457\trained \t\t0.8210\thumor \n", 749 | "\t-1.4448\tgoodbye \t\t0.8171\tme laugh \n", 750 | "\t-1.4441\thumid \t\t0.8170\tsurprised \n", 751 | "\t-1.4433\tnooo \t\t0.8162\tha \n", 752 | "\t-1.4407\tcrushed \t\t0.8156\tjust wondering \n", 753 | "\t-1.4402\tstressed \t\t0.8130\tgood choice \n", 754 | "\t-1.4381\twheres \t\t0.8130\teven better \n", 755 | "\t-1.4345\thow come \t\t0.8119\tanytime \n", 756 | "\t-1.4337\tcry \t\t0.8114\tin case \n", 757 | "\t-1.4261\tdivorce \t\t0.8092\tmrtweet \n", 758 | "\t-1.4230\tboo \t\t0.8079\tfeatured \n", 759 | "\t-1.4220\tfroze \t\t0.8063\tfresh \n", 760 | "\t-1.4217\tsuspended \t\t0.8047\there come \n", 761 | "\t-1.4217\twishing could \t\t0.8013\trecommendation \n", 762 | "\t-1.4189\towie \t\t0.7996\thello there \n", 763 | "\t-1.4181\tbad day \t\t0.7986\thi \n", 764 | "\t-1.4167\tdelayed \t\t0.7975\tcelebration \n", 765 | "\t-1.4162\tsob \t\t0.7955\tnighty \n", 766 | "\t-1.4138\tno sun \t\t0.7954\taha \n", 767 | "\t-1.4124\tmissed \t\t0.7952\tcertainly \n", 768 | "\t-1.4108\thuhu \t\t0.7949\tpeace \n", 769 | "\t-1.4101\trubbish \t\t0.7940\twent well \n", 770 | "\t-1.4077\tashamed \t\t0.7929\thell yeah \n", 771 | "\t-1.4057\tdrained \t\t0.7923\twatch out \n", 772 | "\t-1.4051\tno go \t\t0.7921\tsuccess \n", 773 | "\t-1.4023\tbut nobody \t\t0.7921\tlaughs \n", 774 | "\t-1.3996\t30 tag \t\t0.7866\tð¼ð \n", 775 | "\t-1.3966\tsomething wrong\t\t0.7837\tenjoyed \n", 776 | "\t-1.3957\tcannot \t\t0.7827\ttalented \n", 777 | "\t-1.3948\twe lost \t\t0.7826\tclever \n", 778 | "\t-1.3945\targhh \t\t0.7825\t100 followers \n", 779 | "\t-1.3933\tignored \t\t0.7816\tchillin \n", 780 | "\t-1.3931\twoe \t\t0.7809\tgreat week \n", 781 | "\t-1.3926\twhy can \t\t0.7797\thow are \n", 782 | "\t-1.3891\ttorn \t\t0.7788\tlove this \n", 783 | "\t-1.3880\tfailing \t\t0.7787\tgenius \n", 784 | "\t-1.3865\tmuch pain \t\t0.7783\tcoolest \n", 785 | "\t-1.3855\tworst \t\t0.7768\tget sick \n", 786 | "\t-1.3772\tsniffle \t\t0.7768\tmuch better \n", 787 | "\t-1.3769\tnot able \t\t0.7765\thttp blip \n", 788 | "\t-1.3739\tdisgusting \t\t0.7763\tyesss \n", 789 | "\t-1.3739\targ \t\t0.7761\tconversation \n", 790 | "\t-1.3731\tbugger \t\t0.7752\tchatting \n", 791 | "\t-1.3728\tbit my \t\t0.7731\tsweet dreams \n", 792 | "\t-1.3723\tno word \t\t0.7729\tuseful \n", 793 | "\t-1.3708\tvet \t\t0.7720\tchilling \n", 794 | "\t-1.3683\tlost my \t\t0.7718\tcase you \n", 795 | "\t-1.3681\tleaves \t\t0.7704\tmmm \n", 796 | "\t-1.3603\tmourning \t\t0.7700\tbeautiful \n", 797 | "\t-1.3602\ttoo fast \t\t0.7700\thaha \n", 798 | "\t-1.3601\treally ill \t\t0.7692\tthe lord \n", 799 | "\t-1.3596\tgrrrrrr \t\t0.7683\thahahaha \n", 800 | "\t-1.3589\tgrrr \t\t0.7677\tworth it \n", 801 | "\t-1.3576\tripped \t\t0.7668\tright back \n", 802 | "\t-1.3499\tasthma \t\t0.7639\theadache and \n", 803 | "\t-1.3480\tgross \t\t0.7625\tin mind \n", 804 | "\t-1.3422\tdies \t\t0.7617\tof fun \n", 805 | "\t-1.3421\tpainful \t\t0.7610\tyour thinking \n", 806 | "\t-1.3420\twasn able \t\t0.7599\thas great \n", 807 | "\t-1.3412\tyuck \t\t0.7598\tcelebrating \n", 808 | "\t-1.3398\tthrow up \t\t0.7589\tappreciate that\n", 809 | "\t-1.3391\tcouldn make \t\t0.7572\tlakers \n", 810 | "\t-1.3380\tsurgery \t\t0.7538\tlong as \n", 811 | "\t-1.3380\tdestroyed \t\t0.7514\twhoop \n", 812 | "\t-1.3377\tcant \t\t0.7497\trockin \n", 813 | "\t-1.3367\tdislike \t\t0.7480\tinteresting \n", 814 | "\t-1.3358\thating \t\t0.7466\tlet get \n", 815 | "\t-1.3346\tis ill \t\t0.7466\tclassic \n", 816 | "\t-1.3345\tstressing \t\t0.7433\trecommend \n", 817 | "\t-1.3339\tunable \t\t0.7425\tdon worry \n", 818 | "\t-1.3319\truining \t\t0.7401\thah \n", 819 | "\t-1.3305\tno iphone \t\t0.7400\train rain \n", 820 | "\t-1.3302\tcrashed \t\t0.7390\trocking \n", 821 | "\t-1.3290\ted mcmahon \t\t0.7384\tsweetheart \n", 822 | "\t-1.3233\tpoisoning \t\t0.7357\tcoming soon \n", 823 | "\t-1.3231\tstorming \t\t0.7347\tfinally \n", 824 | "\t-1.3223\tblister \t\t0.7339\tjust thought \n", 825 | "\t-1.3212\thurt \t\t0.7318\tsad and \n", 826 | "\t-1.3198\tos3 \t\t0.7301\tfollow them \n", 827 | "\t-1.3194\twhat waste \t\t0.7295\tlove \n", 828 | "\t-1.3181\thorrid \t\t0.7288\trules \n", 829 | "\t-1.3180\tallergic \t\t0.7282\tvery happy \n", 830 | "\t-1.3177\tsprained \t\t0.7274\tdm me \n", 831 | "\t-1.3162\tboo for \t\t0.7269\tgreatest \n", 832 | "\t-1.3155\ttrackle \t\t0.7253\ttwittering \n", 833 | "\t-1.3148\twas hoping \t\t0.7241\taww thank \n", 834 | "\t-1.3136\trefuses \t\t0.7206\tmariahcarey \n", 835 | "\t-1.3121\tisn working \t\t0.7174\tnow following \n", 836 | "\t-1.3116\tburned \t\t0.7161\tawww thanks \n", 837 | "\t-1.3115\tsleepless night\t\t0.7144\tsmart \n", 838 | "\t-1.3095\tnightmare \t\t0.7137\ttasty \n", 839 | "\t-1.3058\tbad news \t\t0.7119\tdont worry \n", 840 | "\t-1.3055\tno power \t\t0.7110\tgreat to \n", 841 | "\t-1.3053\tdoesn look \t\t0.7109\thave beautiful \n", 842 | "\t-1.3051\tno bueno \t\t0.7100\tblast \n", 843 | "\t-1.3047\tdon like \t\t0.7089\tbeing sick \n", 844 | "\t-1.3023\tconcert but \t\t0.7080\tbed good \n", 845 | "\t-1.3023\tcancel \t\t0.7071\tso exciting \n", 846 | "\t-1.3016\turgh \t\t0.7066\tpoor guy \n", 847 | "\t-1.2999\tdeleted \t\t0.7056\tbirthday hope \n", 848 | "\t-1.2976\tbleeding \t\t0.7043\tsad he \n", 849 | "\t-1.2953\tsry \t\t0.7042\tcheck them \n", 850 | "\t-1.2939\toh noes \t\t0.7038\tmy dear \n", 851 | "\t-1.2939\tkeeps crashing \t\t0.7034\tfeel sad \n", 852 | "\t-1.2933\tmad that \t\t0.7030\tmy headache \n", 853 | "\t-1.2906\tnauseous \t\t0.7022\tthat right \n", 854 | "\t-1.2899\tcan find \t\t0.7016\tquotes \n", 855 | "\t-1.2895\tbye to \t\t0.7014\tam happy \n", 856 | "\t-1.2892\tso mad \t\t0.6997\ttweeters \n", 857 | "\t-1.2891\tanxiety \t\t0.6989\tto hurt \n", 858 | "\t-1.2891\tcant find \t\t0.6988\t½ï \n", 859 | "\t-1.2890\tno friends \t\t0.6985\tstyle \n", 860 | "\t-1.2880\tnever gonna \t\t0.6980\treally cute \n", 861 | "\t-1.2875\tnot impressed \t\t0.6973\tperhaps \n", 862 | "\t-1.2858\tnot here \t\t0.6969\twell worth \n", 863 | "\t-1.2851\tfeel for \t\t0.6963\texciting \n", 864 | "\t-1.2832\thasnt \t\t0.6960\tmuch fun \n", 865 | "\t-1.2832\tlast show \t\t0.6947\tsurprise \n", 866 | "\t-1.2812\tlink didn \t\t0.6940\tchallenge \n", 867 | "\t-1.2785\tsniff \t\t0.6939\thave great \n", 868 | "\t-1.2778\thacked \t\t0.6936\ttweeties \n", 869 | "\t-1.2761\tsigh \t\t0.6898\tto ya \n", 870 | "\t-1.2759\tsorry \t\t0.6896\thappy sunday \n", 871 | "\t-1.2750\tis acting \t\t0.6881\tliked it \n", 872 | "\t-1.2674\texpensive \t\t0.6878\tfor follow \n", 873 | "\t-1.2664\tscared \t\t0.6868\tmorning good \n", 874 | "\t-1.2646\tsucked \t\t0.6865\t140 \n", 875 | "\t-1.2619\tdidn get \t\t0.6864\tbride \n", 876 | "\t-1.2616\tstill waiting \t\t0.6862\ttweet was \n", 877 | "\t-1.2615\tdamaged \t\t0.6856\ttune \n", 878 | "\t-1.2614\tannoyed \t\t0.6855\tthe follow \n", 879 | "\t-1.2595\toww \t\t0.6847\tgood \n", 880 | "\t-1.2569\tblows \t\t0.6844\tdoing great \n", 881 | "\t-1.2566\tbut me \t\t0.6840\ttomfelton \n", 882 | "\t-1.2554\tmessed \t\t0.6838\tdiversity \n", 883 | "\t-1.2553\tnever going \t\t0.6827\tcool http \n", 884 | "\t-1.2539\thit my \t\t0.6822\tgreat \n", 885 | "\t-1.2529\tnone \t\t0.6815\tfollow \n", 886 | "\t-1.2529\tstrep \t\t0.6797\tgreat job \n", 887 | "\t-1.2525\tfalling apart \t\t0.6795\tvery cute \n", 888 | "\t-1.2516\tmigraines \t\t0.6794\tdepends \n", 889 | "\t-1.2504\tgonna rain \t\t0.6792\tindeed \n", 890 | "\t-1.2496\tso sorry \t\t0.6791\tfollow him \n", 891 | "\t-1.2494\tblocked \t\t0.6775\tsick of \n", 892 | "\t-1.2491\tlast episode \t\t0.6768\ttell all \n", 893 | "\t-1.2490\tdull \t\t0.6747\tthe ff \n", 894 | "\t-1.2489\tnot getting \t\t0.6713\tremember to \n", 895 | "\t-1.2473\tby myself \t\t0.6706\thi there \n", 896 | "\t-1.2434\tboo hoo \t\t0.6704\tit great \n", 897 | "\t-1.2433\targhhh \t\t0.6697\tadventure \n", 898 | "\t-1.2402\tjailbreak \t\t0.6690\tsang \n", 899 | "\t-1.2398\tcouldn go \t\t0.6685\tmuseum \n", 900 | "\t-1.2390\tcoulda \t\t0.6671\thurts from \n", 901 | "\t-1.2385\tattacked \t\t0.6669\tfor asking \n", 902 | "\t-1.2374\twishes he \t\t0.6656\tstrawberry \n", 903 | "\t-1.2364\tcloudy \t\t0.6650\ttoo cute \n", 904 | "\t-1.2362\tbe missed \t\t0.6650\tdig \n", 905 | "\t-1.2353\tfreezing \t\t0.6650\tchill \n", 906 | "\t-1.2350\tim lonely \t\t0.6632\tyoungq \n", 907 | "\t-1.2339\tlost \t\t0.6629\tlove my \n", 908 | "\t-1.2332\tdavid carradine\t\t0.6628\tkeep the \n", 909 | "\t-1.2316\tfeeling down \t\t0.6622\tcreative \n", 910 | "\t-1.2308\tsuicide \t\t0.6613\tdelicious \n", 911 | "\t-1.2301\twish \t\t0.6610\tgood plan \n", 912 | "\t-1.2276\tneed new \t\t0.6606\tcheck this \n", 913 | "\t-1.2276\twhy does \t\t0.6604\tall is \n", 914 | "\t-1.2270\tfell \t\t0.6594\tjoin us \n", 915 | "\t-1.2263\tpostponed \t\t0.6593\thave nice \n", 916 | "\t-1.2262\twhy did \t\t0.6578\the he \n", 917 | "\t-1.2235\tsunburnt \t\t0.6570\tsong of \n", 918 | "\t-1.2227\tallergies \t\t0.6569\tsexy \n", 919 | "\t-1.2199\t447 \t\t0.6561\tgot my \n", 920 | "\t-1.2163\tburnt \t\t0.6556\ttwitterverse \n", 921 | "\t-1.2162\tisn going \t\t0.6544\tjoining \n", 922 | "\t-1.2145\tis leaving \t\t0.6538\tand eating \n", 923 | "\t-1.2143\tfever \t\t0.6533\theaven \n", 924 | "\t-1.2140\tmy hopes \t\t0.6530\tsmile on \n", 925 | "\t-1.2135\twas looking \t\t0.6527\tannoying you \n", 926 | "\t-1.2135\tarent \t\t0.6526\tre very \n", 927 | "\t-1.2134\tfor maintenance\t\t0.6526\they \n", 928 | "\t-1.2111\toh no \t\t0.6524\tre cool \n", 929 | "\t-1.2109\tsore \t\t0.6517\tallen \n", 930 | "\t-1.2108\twhere is \t\t0.6516\tits good \n", 931 | "\t-1.2102\tdentist \t\t0.6516\tblessed day \n", 932 | "\t-1.2080\twhy wont \t\t0.6493\tadded \n", 933 | "\t-1.2079\taww man \t\t0.6491\tbanksyart2 \n", 934 | "\t-1.2069\twishing was \t\t0.6482\tsome fun \n", 935 | "\t-1.2066\tdepression \t\t0.6466\tworship \n", 936 | "\t-1.2059\toh dear \t\t0.6448\tat http \n", 937 | "\t-1.2038\tsuck \t\t0.6447\thope to \n", 938 | "\t-1.2035\tcool especially\t\t0.6444\ttake look \n", 939 | "\t-1.2033\tuncomfortable \t\t0.6439\tlistening \n", 940 | "\t-1.2030\trained out \t\t0.6428\tall good \n", 941 | "\t-1.2028\tgetting old \t\t0.6425\tworry \n", 942 | "\t-1.2017\tstill havent \t\t0.6424\trecommendations\n", 943 | "\t-1.2004\tnot having \t\t0.6422\tbirthday wishes\n", 944 | "\t-1.2000\twish was \t\t0.6421\thahah \n", 945 | "\t-1.1979\tthrew up \t\t0.6403\tsir \n", 946 | "\t-1.1960\tughhhhh \t\t0.6397\tmay the \n", 947 | "\t-1.1959\ticky \t\t0.6396\thave wonderful \n", 948 | "\t-1.1936\tacting up \t\t0.6390\tlove it \n", 949 | "\t-1.1928\twont work \t\t0.6368\tretweet \n", 950 | "\t-1.1928\tcouldnt \t\t0.6365\tgeek \n", 951 | "\t-1.1919\twhat happened \t\t0.6363\ttweeps \n", 952 | "\t-1.1915\tworrying \t\t0.6358\tyou too \n", 953 | "\t-1.1906\tsaddens me \t\t0.6349\tappreciate \n", 954 | "\t-1.1896\tgrr \t\t0.6338\tmyspace com \n", 955 | "\t-1.1866\twon load \t\t0.6333\thow cute \n", 956 | "\t-1.1822\thit by \t\t0.6324\thappy mother \n", 957 | "\t-1.1783\tughh \t\t0.6324\tspreading \n", 958 | "\t-1.1779\tiphone software\t\t0.6316\tincredible \n", 959 | "\t-1.1779\tstuck \t\t0.6292\tis perfect \n", 960 | "\t-1.1769\tunwell \t\t0.6275\tnothing like \n", 961 | "\t-1.1767\tbut no \t\t0.6261\tall love \n", 962 | "\t-1.1739\tvery sorry \t\t0.6255\tare great \n", 963 | "\t-1.1732\thaving issues \t\t0.6245\thad fun \n", 964 | "\t-1.1730\tbe long \t\t0.6238\tall about \n", 965 | "\t-1.1697\trunning out \t\t0.6234\tloves the \n", 966 | "\t-1.1683\terror \t\t0.6230\tmost welcome \n", 967 | "\t-1.1682\tcruel \t\t0.6213\tsick so \n", 968 | "\t-1.1676\twishing \t\t0.6194\tyes \n", 969 | "\t-1.1672\tewww \t\t0.6190\tcontent \n", 970 | "\t-1.1666\tall alone \t\t0.6185\trocked \n", 971 | "\t-1.1639\tsmh \t\t0.6181\tyou really \n", 972 | "\t-1.1622\tmissing her \t\t0.6178\tat www \n", 973 | "\t-1.1622\tno money \t\t0.6173\tplenty \n", 974 | "\t-1.1616\titchy \t\t0.6163\tor pay \n", 975 | "\t-1.1610\trash \t\t0.6158\tlyrics \n", 976 | "\t-1.1600\twisdom teeth \t\t0.6155\tjust ordered \n", 977 | "\t-1.1569\tis broken \t\t0.6148\tno sorry \n", 978 | "\t-1.1568\twhere did \t\t0.6146\tyou need \n", 979 | "\t-1.1566\twould but \t\t0.6141\tfave \n", 980 | "\t-1.1562\tgetting cold \t\t0.6125\tis loving \n", 981 | "\t-1.1560\tand stuck \t\t0.6119\twho came \n", 982 | "\t-1.1555\twhen will \t\t0.6116\tmorning how \n", 983 | "\t-1.1552\twimbledon \t\t0.6111\toh and \n", 984 | "\t-1.1543\tso confused \t\t0.6104\thurts and \n", 985 | "\t-1.1522\tshould ve \t\t0.6098\tspread the \n", 986 | "\t-1.1519\tpathetic \t\t0.6083\taplusk \n", 987 | "\t-1.1515\tfawcett \t\t0.6078\tnerd \n", 988 | "\t-1.1503\tissues with \t\t0.6076\tyup \n", 989 | "\t-1.1502\tthey took \t\t0.6072\tsick and \n", 990 | "\t-1.1501\tcouldn even \t\t0.6041\tgot twitter \n", 991 | "\t-1.1491\tisnt \t\t0.6027\tfine \n", 992 | "\t-1.1473\twish lived \t\t0.6025\thomemade \n", 993 | "\t-1.1468\taccident \t\t0.6018\task \n", 994 | "\t-1.1464\tnot working \t\t0.6006\thow love \n", 995 | "\t-1.1439\tboring \t\t0.5995\tbe sick \n" 996 | ] 997 | } 998 | ], 999 | "source": [ 1000 | "show_most_informative_features(feature_names, sentiment_pipeline.steps[2][1], n=500)" 1001 | ] 1002 | } 1003 | ], 1004 | "metadata": { 1005 | "kernelspec": { 1006 | "display_name": "Python 3", 1007 | "language": "python", 1008 | "name": "python3" 1009 | }, 1010 | "language_info": { 1011 | "codemirror_mode": { 1012 | "name": "ipython", 1013 | "version": 3 1014 | }, 1015 | "file_extension": ".py", 1016 | "mimetype": "text/x-python", 1017 | "name": "python", 1018 | "nbconvert_exporter": "python", 1019 | "pygments_lexer": "ipython3", 1020 | "version": "3.5.0" 1021 | } 1022 | }, 1023 | "nbformat": 4, 1024 | "nbformat_minor": 0 1025 | } 1026 | --------------------------------------------------------------------------------