├── .gitignore
└── sentiment.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | trainingandtestdata
2 |
--------------------------------------------------------------------------------
/sentiment.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# A simple sentiment prototype"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {
14 | "collapsed": false
15 | },
16 | "outputs": [],
17 | "source": [
18 | "import os # manipulate paths\n",
19 | "import pandas as pd # SQL-like operations and convenience functions\n",
20 | "import joblib # save and load models"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "Download the Sentiment140 data from [their website](http://help.sentiment140.com/for-students) and set `DATA_DIR` to the directory in which you have put the `CSV` files."
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 2,
33 | "metadata": {
34 | "collapsed": false
35 | },
36 | "outputs": [],
37 | "source": [
38 | "DATA_DIR = \"trainingandtestdata\"\n",
39 | "training_csv_file = os.path.join(DATA_DIR, 'training.1600000.processed.noemoticon.csv')"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {},
45 | "source": [
46 | "## A peek at the data"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 3,
52 | "metadata": {
53 | "collapsed": false
54 | },
55 | "outputs": [],
56 | "source": [
57 | "names = ('polarity', 'id', 'date', 'query', 'author', 'text')\n",
58 | "df = pd.read_csv(training_csv_file, encoding='latin1', names=names)"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 4,
64 | "metadata": {
65 | "collapsed": false
66 | },
67 | "outputs": [
68 | {
69 | "data": {
70 | "text/html": [
71 | "
\n",
72 | "
\n",
73 | " \n",
74 | " \n",
75 | " | \n",
76 | " polarity | \n",
77 | " id | \n",
78 | " date | \n",
79 | " query | \n",
80 | " author | \n",
81 | " text | \n",
82 | "
\n",
83 | " \n",
84 | " \n",
85 | " \n",
86 | " 0 | \n",
87 | " 0 | \n",
88 | " 1467810369 | \n",
89 | " Mon Apr 06 22:19:45 PDT 2009 | \n",
90 | " NO_QUERY | \n",
91 | " _TheSpecialOne_ | \n",
92 | " @switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer. You shoulda got David Carr of Third Day to do it. ;D | \n",
93 | "
\n",
94 | " \n",
95 | " 1 | \n",
96 | " 0 | \n",
97 | " 1467810672 | \n",
98 | " Mon Apr 06 22:19:49 PDT 2009 | \n",
99 | " NO_QUERY | \n",
100 | " scotthamilton | \n",
101 | " is upset that he can't update his Facebook by texting it... and might cry as a result School today also. Blah! | \n",
102 | "
\n",
103 | " \n",
104 | " 2 | \n",
105 | " 0 | \n",
106 | " 1467810917 | \n",
107 | " Mon Apr 06 22:19:53 PDT 2009 | \n",
108 | " NO_QUERY | \n",
109 | " mattycus | \n",
110 | " @Kenichan I dived many times for the ball. Managed to save 50% The rest go out of bounds | \n",
111 | "
\n",
112 | " \n",
113 | " 3 | \n",
114 | " 0 | \n",
115 | " 1467811184 | \n",
116 | " Mon Apr 06 22:19:57 PDT 2009 | \n",
117 | " NO_QUERY | \n",
118 | " ElleCTF | \n",
119 | " my whole body feels itchy and like its on fire | \n",
120 | "
\n",
121 | " \n",
122 | " 4 | \n",
123 | " 0 | \n",
124 | " 1467811193 | \n",
125 | " Mon Apr 06 22:19:57 PDT 2009 | \n",
126 | " NO_QUERY | \n",
127 | " Karoli | \n",
128 | " @nationwideclass no, it's not behaving at all. i'm mad. why am i here? because I can't see you all over there. | \n",
129 | "
\n",
130 | " \n",
131 | "
\n",
132 | "
"
133 | ],
134 | "text/plain": [
135 | " polarity id date query \\\n",
136 | "0 0 1467810369 Mon Apr 06 22:19:45 PDT 2009 NO_QUERY \n",
137 | "1 0 1467810672 Mon Apr 06 22:19:49 PDT 2009 NO_QUERY \n",
138 | "2 0 1467810917 Mon Apr 06 22:19:53 PDT 2009 NO_QUERY \n",
139 | "3 0 1467811184 Mon Apr 06 22:19:57 PDT 2009 NO_QUERY \n",
140 | "4 0 1467811193 Mon Apr 06 22:19:57 PDT 2009 NO_QUERY \n",
141 | "\n",
142 | " author \\\n",
143 | "0 _TheSpecialOne_ \n",
144 | "1 scotthamilton \n",
145 | "2 mattycus \n",
146 | "3 ElleCTF \n",
147 | "4 Karoli \n",
148 | "\n",
149 | " text \n",
150 | "0 @switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer. You shoulda got David Carr of Third Day to do it. ;D \n",
151 | "1 is upset that he can't update his Facebook by texting it... and might cry as a result School today also. Blah! \n",
152 | "2 @Kenichan I dived many times for the ball. Managed to save 50% The rest go out of bounds \n",
153 | "3 my whole body feels itchy and like its on fire \n",
154 | "4 @nationwideclass no, it's not behaving at all. i'm mad. why am i here? because I can't see you all over there. "
155 | ]
156 | },
157 | "execution_count": 4,
158 | "metadata": {},
159 | "output_type": "execute_result"
160 | }
161 | ],
162 | "source": [
163 | "pd.options.display.max_colwidth = 140 # allow wide columns\n",
164 | "df.head() # show first 5 rows"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 5,
170 | "metadata": {
171 | "collapsed": false
172 | },
173 | "outputs": [
174 | {
175 | "data": {
176 | "text/html": [
177 | "\n",
178 | "
\n",
179 | " \n",
180 | " \n",
181 | " | \n",
182 | " polarity | \n",
183 | " id | \n",
184 | " date | \n",
185 | " query | \n",
186 | " author | \n",
187 | " text | \n",
188 | "
\n",
189 | " \n",
190 | " \n",
191 | " \n",
192 | " 1599995 | \n",
193 | " 4 | \n",
194 | " 2193601966 | \n",
195 | " Tue Jun 16 08:40:49 PDT 2009 | \n",
196 | " NO_QUERY | \n",
197 | " AmandaMarie1028 | \n",
198 | " Just woke up. Having no school is the best feeling ever | \n",
199 | "
\n",
200 | " \n",
201 | " 1599996 | \n",
202 | " 4 | \n",
203 | " 2193601969 | \n",
204 | " Tue Jun 16 08:40:49 PDT 2009 | \n",
205 | " NO_QUERY | \n",
206 | " TheWDBoards | \n",
207 | " TheWDB.com - Very cool to hear old Walt interviews! â« http://blip.fm/~8bmta | \n",
208 | "
\n",
209 | " \n",
210 | " 1599997 | \n",
211 | " 4 | \n",
212 | " 2193601991 | \n",
213 | " Tue Jun 16 08:40:49 PDT 2009 | \n",
214 | " NO_QUERY | \n",
215 | " bpbabe | \n",
216 | " Are you ready for your MoJo Makeover? Ask me for details | \n",
217 | "
\n",
218 | " \n",
219 | " 1599998 | \n",
220 | " 4 | \n",
221 | " 2193602064 | \n",
222 | " Tue Jun 16 08:40:49 PDT 2009 | \n",
223 | " NO_QUERY | \n",
224 | " tinydiamondz | \n",
225 | " Happy 38th Birthday to my boo of alll time!!! Tupac Amaru Shakur | \n",
226 | "
\n",
227 | " \n",
228 | " 1599999 | \n",
229 | " 4 | \n",
230 | " 2193602129 | \n",
231 | " Tue Jun 16 08:40:50 PDT 2009 | \n",
232 | " NO_QUERY | \n",
233 | " RyanTrevMorris | \n",
234 | " happy #charitytuesday @theNSPCC @SparksCharity @SpeakingUpH4H | \n",
235 | "
\n",
236 | " \n",
237 | "
\n",
238 | "
"
239 | ],
240 | "text/plain": [
241 | " polarity id date query \\\n",
242 | "1599995 4 2193601966 Tue Jun 16 08:40:49 PDT 2009 NO_QUERY \n",
243 | "1599996 4 2193601969 Tue Jun 16 08:40:49 PDT 2009 NO_QUERY \n",
244 | "1599997 4 2193601991 Tue Jun 16 08:40:49 PDT 2009 NO_QUERY \n",
245 | "1599998 4 2193602064 Tue Jun 16 08:40:49 PDT 2009 NO_QUERY \n",
246 | "1599999 4 2193602129 Tue Jun 16 08:40:50 PDT 2009 NO_QUERY \n",
247 | "\n",
248 | " author \\\n",
249 | "1599995 AmandaMarie1028 \n",
250 | "1599996 TheWDBoards \n",
251 | "1599997 bpbabe \n",
252 | "1599998 tinydiamondz \n",
253 | "1599999 RyanTrevMorris \n",
254 | "\n",
255 | " text \n",
256 | "1599995 Just woke up. Having no school is the best feeling ever \n",
257 | "1599996 TheWDB.com - Very cool to hear old Walt interviews! â« http://blip.fm/~8bmta \n",
258 | "1599997 Are you ready for your MoJo Makeover? Ask me for details \n",
259 | "1599998 Happy 38th Birthday to my boo of alll time!!! Tupac Amaru Shakur \n",
260 | "1599999 happy #charitytuesday @theNSPCC @SparksCharity @SpeakingUpH4H "
261 | ]
262 | },
263 | "execution_count": 5,
264 | "metadata": {},
265 | "output_type": "execute_result"
266 | }
267 | ],
268 | "source": [
269 | "df.tail()"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 6,
275 | "metadata": {
276 | "collapsed": false
277 | },
278 | "outputs": [],
279 | "source": [
280 | "df['polarity'].replace({0: -1, 4: 1}, inplace=True)\n",
281 | "text = df['text']\n",
282 | "target = df['polarity'].values"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": 7,
288 | "metadata": {
289 | "collapsed": false
290 | },
291 | "outputs": [
292 | {
293 | "name": "stdout",
294 | "output_type": "stream",
295 | "text": [
296 | "1600000 1600000\n"
297 | ]
298 | }
299 | ],
300 | "source": [
301 | "print(len(target), len(text))"
302 | ]
303 | },
304 | {
305 | "cell_type": "markdown",
306 | "metadata": {},
307 | "source": [
308 | "## Train the model\n",
309 | "\n",
310 | "Set 20% of the data aside to test the trained model"
311 | ]
312 | },
313 | {
314 | "cell_type": "code",
315 | "execution_count": 8,
316 | "metadata": {
317 | "collapsed": false
318 | },
319 | "outputs": [],
320 | "source": [
321 | "from sklearn.cross_validation import train_test_split\n",
322 | "\n",
323 | "text_train, text_validation, target_train, target_validation = (\n",
324 | " train_test_split(text, target, test_size=0.2, random_state=42)\n",
325 | ")"
326 | ]
327 | },
328 | {
329 | "cell_type": "markdown",
330 | "metadata": {},
331 | "source": [
332 | "Build a pipeline"
333 | ]
334 | },
335 | {
336 | "cell_type": "code",
337 | "execution_count": 9,
338 | "metadata": {
339 | "collapsed": false
340 | },
341 | "outputs": [],
342 | "source": [
343 | "from sklearn.feature_extraction.text import CountVectorizer\n",
344 | "from sklearn.feature_selection import SelectKBest, chi2\n",
345 | "from sklearn.linear_model import LogisticRegressionCV\n",
346 | "from sklearn.pipeline import Pipeline\n",
347 | "\n",
348 | "vectorizer = CountVectorizer(ngram_range=(1, 2), max_features=100000)\n",
349 | "feature_selector = SelectKBest(chi2, k=5000)\n",
350 | "classifier = LogisticRegressionCV(n_jobs=4)"
351 | ]
352 | },
353 | {
354 | "cell_type": "markdown",
355 | "metadata": {},
356 | "source": [
357 | "This next cell took ~3 minutes to run on my machine"
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": 10,
363 | "metadata": {
364 | "collapsed": false
365 | },
366 | "outputs": [],
367 | "source": [
368 | "if os.path.exists('model.pkl'):\n",
369 | " sentiment_pipeline = joblib.load('model.pkl')\n",
370 | "else:\n",
371 | " sentiment_pipeline = Pipeline((\n",
372 | " ('v', vectorizer),\n",
373 | " ('f', feature_selector),\n",
374 | " ('c', classifier)\n",
375 | " ))\n",
376 | " sentiment_pipeline.fit(text_train, target_train)\n",
377 | " joblib.dump(sentiment_pipeline, 'model.pkl');"
378 | ]
379 | },
380 | {
381 | "cell_type": "markdown",
382 | "metadata": {
383 | "collapsed": false
384 | },
385 | "source": [
386 | "## Test the model"
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "execution_count": 11,
392 | "metadata": {
393 | "collapsed": false
394 | },
395 | "outputs": [
396 | {
397 | "name": "stdout",
398 | "output_type": "stream",
399 | "text": [
400 | "[-1 1 -1 1 -1]\n"
401 | ]
402 | }
403 | ],
404 | "source": [
405 | "print(sentiment_pipeline.predict(['bad', 'good', \"didnt like\", \"today was a good day\", \"i hate this product\"]))"
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": 12,
411 | "metadata": {
412 | "collapsed": false
413 | },
414 | "outputs": [
415 | {
416 | "name": "stdout",
417 | "output_type": "stream",
418 | "text": [
419 | "1 -1 \t @chrishasboobs AHHH I HOPE YOUR OK!!! \n",
420 | "1 -1 \t @misstoriblack cool , i have no tweet apps for my razr 2\n",
421 | "1 -1 \t @TiannaChaos i know just family drama. its lame.hey next time u hang out with kim n u guys like have a sleepover or whatever, ill call u\n",
422 | "-1 -1 \t School email won't open and I have geography stuff on there to revise! *Stupid School* :'(\n",
423 | "1 -1 \t upper airways problem \n",
424 | "-1 -1 \t Going to miss Pastor's sermon on Faith... \n",
425 | "1 1 \t on lunch....dj should come eat with me \n",
426 | "-1 -1 \t @piginthepoke oh why are you feeling like that? \n",
427 | "-1 -1 \t gahh noo!peyton needs to live!this is horrible \n",
428 | "1 1 \t @mrstessyman thank you glad you like it! There is a product review bit on the site Enjoy knitting it!\n"
429 | ]
430 | }
431 | ],
432 | "source": [
433 | "for text, target in zip(text_validation[:10], target_validation[:10]):\n",
434 | " print(sentiment_pipeline.predict([text])[0], target, '\\t', text)"
435 | ]
436 | },
437 | {
438 | "cell_type": "code",
439 | "execution_count": 13,
440 | "metadata": {
441 | "collapsed": false
442 | },
443 | "outputs": [
444 | {
445 | "data": {
446 | "text/plain": [
447 | "0.79942812500000004"
448 | ]
449 | },
450 | "execution_count": 13,
451 | "metadata": {},
452 | "output_type": "execute_result"
453 | }
454 | ],
455 | "source": [
456 | "sentiment_pipeline.score(text_validation, target_validation)"
457 | ]
458 | },
459 | {
460 | "cell_type": "markdown",
461 | "metadata": {},
462 | "source": [
463 | "## What did the model learn?"
464 | ]
465 | },
466 | {
467 | "cell_type": "code",
468 | "execution_count": 14,
469 | "metadata": {
470 | "collapsed": false
471 | },
472 | "outputs": [],
473 | "source": [
474 | "feature_names = sentiment_pipeline.steps[0][1].get_feature_names()\n",
475 | "feature_names = [feature_names[i] for i in \n",
476 | " sentiment_pipeline.steps[1][1].get_support(indices=True)]\n",
477 | "\n",
478 | "def show_most_informative_features(feature_names, clf, n=1000):\n",
479 | " coefs_with_fns = sorted(zip(clf.coef_[0], feature_names))\n",
480 | " top = zip(coefs_with_fns[:n], coefs_with_fns[:-(n + 1):-1])\n",
481 | " for (coef_1, fn_1), (coef_2, fn_2) in top:\n",
482 | " print(\"\\t%.4f\\t%-15s\\t\\t%.4f\\t%-15s\" % (coef_1, fn_1, coef_2, fn_2))"
483 | ]
484 | },
485 | {
486 | "cell_type": "code",
487 | "execution_count": 15,
488 | "metadata": {
489 | "collapsed": false
490 | },
491 | "outputs": [
492 | {
493 | "name": "stdout",
494 | "output_type": "stream",
495 | "text": [
496 | "\t-3.4628\tnot happy \t\t2.6750\tno problem \n",
497 | "\t-3.2794\tclean me \t\t2.6244\tno worries \n",
498 | "\t-2.9125\tnot looking \t\t2.5657\tcannot wait \n",
499 | "\t-2.9124\tinaperfectworld\t\t2.3216\tcant wait \n",
500 | "\t-2.9110\tsad \t\t2.2859\tno prob \n",
501 | "\t-2.8527\tpassed away \t\t2.2517\tsmiling \n",
502 | "\t-2.6970\tsadly \t\t2.1924\tnothing wrong \n",
503 | "\t-2.6421\tnot nice \t\t2.1093\tnot bad \n",
504 | "\t-2.6333\tgutted \t\t2.0198\tsad sad \n",
505 | "\t-2.6265\tnot cool \t\t1.9122\tcongratulations\n",
506 | "\t-2.6044\tno luck \t\t1.7787\tfuzzball \n",
507 | "\t-2.6044\tdisappointing \t\t1.7751\tno probs \n",
508 | "\t-2.4926\theartbreaking \t\t1.7722\twelcome \n",
509 | "\t-2.4771\tsadd \t\t1.7376\tmusicmonday \n",
510 | "\t-2.4683\theartbroken \t\t1.6985\thate hate \n",
511 | "\t-2.4286\tboohoo \t\t1.6670\tyayyy \n",
512 | "\t-2.4184\trip \t\t1.6627\tsmile \n",
513 | "\t-2.4156\tnot fun \t\t1.6291\twoooo \n",
514 | "\t-2.4029\tpoor \t\t1.6259\tthankyou \n",
515 | "\t-2.3998\tdontyouhate \t\t1.5961\tjust sayin \n",
516 | "\t-2.3856\tbummer \t\t1.5958\tsmiles \n",
517 | "\t-2.3646\tdepressed \t\t1.5762\tproud \n",
518 | "\t-2.3349\tbummed \t\t1.5680\theheh \n",
519 | "\t-2.3247\tcancelled \t\t1.5599\tfollowfriday \n",
520 | "\t-2.3185\tunfortunately \t\t1.5565\tlaughter \n",
521 | "\t-2.3119\tnot excited \t\t1.5494\tthanks \n",
522 | "\t-2.3048\tdepressing \t\t1.5486\tamusing \n",
523 | "\t-2.2977\tupsetting \t\t1.5291\tmy pleasure \n",
524 | "\t-2.2942\theadache \t\t1.5291\tgracias \n",
525 | "\t-2.2716\tnooooo \t\t1.5015\tyayy \n",
526 | "\t-2.2643\tpakcricket \t\t1.4982\tgoodsex \n",
527 | "\t-2.2612\tfuneral \t\t1.4792\tfeel free \n",
528 | "\t-2.2482\tdevastated \t\t1.4786\tgiggle \n",
529 | "\t-2.2342\tboooo \t\t1.4735\thehehe \n",
530 | "\t-2.2308\thurts \t\t1.4685\tthumbs up \n",
531 | "\t-2.2298\tair france \t\t1.4580\tgrin \n",
532 | "\t-2.2223\truined \t\t1.4309\tblessed \n",
533 | "\t-2.2030\tlet down \t\t1.4294\tcan wait \n",
534 | "\t-2.1970\tmisses \t\t1.4087\thihi \n",
535 | "\t-2.1958\tnoooooo \t\t1.4024\tthank \n",
536 | "\t-2.1841\ttaking forever \t\t1.3995\tyummm \n",
537 | "\t-2.1810\tmissin \t\t1.3947\tyey \n",
538 | "\t-2.1793\tpoorly \t\t1.3946\tcongrats \n",
539 | "\t-2.1780\thomesick \t\t1.3920\twoohoo \n",
540 | "\t-2.1776\tbooooo \t\t1.3889\tblessings \n",
541 | "\t-2.1724\tnot good \t\t1.3815\thonored \n",
542 | "\t-2.1713\tfed up \t\t1.3811\tcrack me \n",
543 | "\t-2.1649\ttoothache \t\t1.3810\twooooo \n",
544 | "\t-2.1486\tdissapointed \t\t1.3710\texcellent \n",
545 | "\t-2.1344\tftl \t\t1.3682\tfeels good \n",
546 | "\t-2.1287\tdisappointed \t\t1.3526\twhoo \n",
547 | "\t-2.1198\tnot enjoying \t\t1.3477\theey \n",
548 | "\t-2.1184\tsaddest \t\t1.3444\tfor following \n",
549 | "\t-2.1134\twhat wrong \t\t1.3400\tsocial media \n",
550 | "\t-2.1131\tso mean \t\t1.3279\theehee \n",
551 | "\t-2.1052\tunhappy \t\t1.3276\tthx \n",
552 | "\t-2.0992\tunfair \t\t1.3275\tteehee \n",
553 | "\t-2.0963\tmissing \t\t1.3240\tdon forget \n",
554 | "\t-2.0849\tsick \t\t1.3150\tpleasure \n",
555 | "\t-2.0778\tsaddened \t\t1.3132\tcheers \n",
556 | "\t-2.0753\tmeanie \t\t1.3095\ttnx \n",
557 | "\t-2.0658\tfarrah \t\t1.2997\theya \n",
558 | "\t-2.0646\tsadness \t\t1.2949\tsweetest \n",
559 | "\t-2.0491\tbooo \t\t1.2829\thowdy \n",
560 | "\t-2.0476\thuhuhu \t\t1.2727\tawesomeness \n",
561 | "\t-2.0465\tdied \t\t1.2688\tappreciated \n",
562 | "\t-2.0284\tnever reply \t\t1.2670\treward \n",
563 | "\t-2.0279\tnot fair \t\t1.2670\tloving \n",
564 | "\t-2.0278\tcondolences \t\t1.2657\tpeaceful \n",
565 | "\t-2.0121\tsadface \t\t1.2645\tglad \n",
566 | "\t-2.0119\tsucks \t\t1.2506\tstay tuned \n",
567 | "\t-2.0107\tcries \t\t1.2493\trelaxed \n",
568 | "\t-2.0044\twouldn let \t\t1.2486\tnice work \n",
569 | "\t-2.0039\tmigraine \t\t1.2422\ttommcfly http \n",
570 | "\t-1.9938\tdreading \t\t1.2394\talike \n",
571 | "\t-1.9892\thates \t\t1.2366\thehe \n",
572 | "\t-1.9805\tlonely \t\t1.2365\thooray \n",
573 | "\t-1.9644\tno fun \t\t1.2337\texcited \n",
574 | "\t-1.9478\tnot funny \t\t1.2332\tsad just \n",
575 | "\t-1.9455\tnot feeling \t\t1.2325\tfound you \n",
576 | "\t-1.9449\tcanceled \t\t1.2285\theh \n",
577 | "\t-1.9410\twon let \t\t1.2267\tblessing \n",
578 | "\t-1.9253\tnoooooooooo \t\t1.2233\tstoked \n",
579 | "\t-1.9144\ttragic \t\t1.2158\tyaay \n",
580 | "\t-1.9073\tfathers day \t\t1.2130\tfabulous \n",
581 | "\t-1.9018\tisn good \t\t1.2127\tgoood \n",
582 | "\t-1.9017\tdrop by \t\t1.2110\tsad but \n",
583 | "\t-1.8982\twon work \t\t1.2010\ttoo funny \n",
584 | "\t-1.8905\thappy father \t\t1.1997\tugh hate \n",
585 | "\t-1.8784\tcramps \t\t1.1996\tftw \n",
586 | "\t-1.8782\tsold out \t\t1.1994\tyaaay \n",
587 | "\t-1.8722\tmiss \t\t1.1990\thappy \n",
588 | "\t-1.8709\tinjured \t\t1.1939\tnp \n",
589 | "\t-1.8683\tgood bye \t\t1.1890\tgoooood \n",
590 | "\t-1.8656\tunfortunate \t\t1.1853\twhy thank \n",
591 | "\t-1.8655\tiran \t\t1.1853\twoot \n",
592 | "\t-1.8644\tate all \t\t1.1847\tthankful \n",
593 | "\t-1.8603\tupset \t\t1.1839\tyay \n",
594 | "\t-1.8535\tunloved \t\t1.1838\tlearning how \n",
595 | "\t-1.8508\tjury duty \t\t1.1798\tdont forget \n",
596 | "\t-1.8486\tbreaks my \t\t1.1762\tadorable \n",
597 | "\t-1.8436\tno new \t\t1.1711\tyummy \n",
598 | "\t-1.8378\tsux \t\t1.1674\tgrateful \n",
599 | "\t-1.8346\tbroke \t\t1.1577\thee \n",
600 | "\t-1.8344\tcrying \t\t1.1564\tyumm \n",
601 | "\t-1.8307\tneda \t\t1.1521\tthanx \n",
602 | "\t-1.8211\tnot liking \t\t1.1422\thello \n",
603 | "\t-1.8092\tnot found \t\t1.1393\tgod is \n",
604 | "\t-1.8071\tnot coming \t\t1.1370\tnicely \n",
605 | "\t-1.8069\tcrashes \t\t1.1367\tgoodnight \n",
606 | "\t-1.8017\tstill no \t\t1.1294\tlovin \n",
607 | "\t-1.7991\thorrible \t\t1.1248\tquote \n",
608 | "\t-1.7977\twont let \t\t1.1238\twonderful \n",
609 | "\t-1.7931\tnot feelin \t\t1.1224\trocks \n",
610 | "\t-1.7902\twhy won \t\t1.1196\tsick but \n",
611 | "\t-1.7810\tno good \t\t1.1174\tawesome \n",
612 | "\t-1.7777\thiccups \t\t1.1163\tkindly \n",
613 | "\t-1.7744\tcoughing \t\t1.1138\tgooood \n",
614 | "\t-1.7707\tfeel well \t\t1.1127\tgotta love \n",
615 | "\t-1.7699\talas \t\t1.1048\tinspired \n",
616 | "\t-1.7675\thayfever \t\t1.1039\thow sweet \n",
617 | "\t-1.7651\tthrowing up \t\t1.1033\tsure thing \n",
618 | "\t-1.7627\thappy fathers \t\t1.1033\tbed night \n",
619 | "\t-1.7620\tbnp \t\t1.0981\tgoodluck \n",
620 | "\t-1.7545\tnoooo \t\t1.0960\thandy \n",
621 | "\t-1.7483\tnooooooo \t\t1.0896\tyum \n",
622 | "\t-1.7462\tdownside \t\t1.0884\twe come \n",
623 | "\t-1.7451\tnowhere to \t\t1.0882\tdon mind \n",
624 | "\t-1.7428\tunfortunatly \t\t1.0875\tbom dia \n",
625 | "\t-1.7419\twishin \t\t1.0772\thilarious \n",
626 | "\t-1.7418\twhy must \t\t1.0713\tbe sad \n",
627 | "\t-1.7414\tthere goes \t\t1.0621\tyessss \n",
628 | "\t-1.7411\twhyyyy \t\t1.0602\tpraise \n",
629 | "\t-1.7394\twhats wrong \t\t1.0596\thugh \n",
630 | "\t-1.7342\tdidn win \t\t1.0503\tenjoying \n",
631 | "\t-1.7320\tnever got \t\t1.0500\tabout time \n",
632 | "\t-1.7300\tsadder \t\t1.0498\tpleased \n",
633 | "\t-1.7286\twon come \t\t1.0462\tat last \n",
634 | "\t-1.7226\tawful \t\t1.0451\trelaxing \n",
635 | "\t-1.7158\tfailed \t\t1.0433\tw00t \n",
636 | "\t-1.7142\tstinks \t\t1.0407\tfunniest \n",
637 | "\t-1.7112\tughhh \t\t1.0399\tgo girl \n",
638 | "\t-1.7044\tughhhh \t\t1.0365\topinion \n",
639 | "\t-1.7043\touchie \t\t1.0329\tlaughing \n",
640 | "\t-1.7031\tfrustrated \t\t1.0308\tyou want \n",
641 | "\t-1.6914\thurting \t\t1.0285\ttetris \n",
642 | "\t-1.6889\tboooooo \t\t1.0215\tknew you \n",
643 | "\t-1.6819\tbroken \t\t1.0143\tcute \n",
644 | "\t-1.6807\tstruggling \t\t1.0130\thola \n",
645 | "\t-1.6773\tbad luck \t\t1.0125\tpositive \n",
646 | "\t-1.6762\tshame \t\t1.0092\tcolorblindfish \n",
647 | "\t-1.6724\tsummer school \t\t1.0091\thiya \n",
648 | "\t-1.6713\thmph \t\t1.0075\tfantastic \n",
649 | "\t-1.6679\tfml \t\t1.0065\tre good \n",
650 | "\t-1.6664\tugh \t\t1.0054\tshine \n",
651 | "\t-1.6660\trainin \t\t1.0046\tcome join \n",
652 | "\t-1.6660\tdisabled \t\t1.0037\tgreetings \n",
653 | "\t-1.6639\tlaid off \t\t1.0035\tamazing \n",
654 | "\t-1.6604\tdisappointment \t\t1.0033\taww thanks \n",
655 | "\t-1.6586\tis down \t\t1.0015\tbrilliant \n",
656 | "\t-1.6524\tfeeling ill \t\t1.0001\trealhughjackman\n",
657 | "\t-1.6439\tpity \t\t0.9999\tpumped \n",
658 | "\t-1.6387\tterrible \t\t0.9986\tgoodmorning \n",
659 | "\t-1.6372\tyucky \t\t0.9980\tthe poor \n",
660 | "\t-1.6360\twon see \t\t0.9963\tpower of \n",
661 | "\t-1.6326\tneglected \t\t0.9934\tmade my \n",
662 | "\t-1.6319\theartburn \t\t0.9892\tcool \n",
663 | "\t-1.6310\touch \t\t0.9884\tawsome \n",
664 | "\t-1.6304\tnot ready \t\t0.9882\tthanking \n",
665 | "\t-1.6298\tnot showing \t\t0.9870\tttyl \n",
666 | "\t-1.6268\tanswer me \t\t0.9856\tloves \n",
667 | "\t-1.6252\tnot very \t\t0.9851\twooo \n",
668 | "\t-1.6218\tignoring me \t\t0.9826\tdays until \n",
669 | "\t-1.6205\tstung \t\t0.9800\tbliss \n",
670 | "\t-1.6190\tdisappeared \t\t0.9800\tlike plan \n",
671 | "\t-1.6179\tneed hug \t\t0.9782\thandsome \n",
672 | "\t-1.6171\t3gs \t\t0.9779\tmultiply \n",
673 | "\t-1.6163\tmiserable \t\t0.9768\tenjoy \n",
674 | "\t-1.6092\tclosed \t\t0.9748\tfamiliar \n",
675 | "\t-1.6086\tache \t\t0.9724\tunique \n",
676 | "\t-1.6056\tinfection \t\t0.9723\tcurious \n",
677 | "\t-1.6049\tbronchitis \t\t0.9647\tit fun \n",
678 | "\t-1.6043\tnoes \t\t0.9637\twoo \n",
679 | "\t-1.5960\twithout me \t\t0.9627\tthnx \n",
680 | "\t-1.5950\tsucky \t\t0.9620\tglad could \n",
681 | "\t-1.5946\tstranded \t\t0.9564\tugh im \n",
682 | "\t-1.5928\tdisconnected \t\t0.9535\tcutest \n",
683 | "\t-1.5886\tfeel ill \t\t0.9521\thappiness \n",
684 | "\t-1.5856\thate \t\t0.9500\ttry it \n",
685 | "\t-1.5785\tthe er \t\t0.9489\tyou wanna \n",
686 | "\t-1.5742\tdown today \t\t0.9475\tadore \n",
687 | "\t-1.5730\tloosing \t\t0.9464\tjust wanted \n",
688 | "\t-1.5664\tran over \t\t0.9427\tsweet \n",
689 | "\t-1.5642\tbad times \t\t0.9425\tinspiring \n",
690 | "\t-1.5633\tmms \t\t0.9415\tprefer \n",
691 | "\t-1.5568\tbut but \t\t0.9397\tmmmm \n",
692 | "\t-1.5544\tgoodbyes \t\t0.9371\tgood work \n",
693 | "\t-1.5473\tgotta wait \t\t0.9363\tmyweakness \n",
694 | "\t-1.5438\tick \t\t0.9359\tluck \n",
695 | "\t-1.5396\tbed alone \t\t0.9332\tnew favorite \n",
696 | "\t-1.5373\tlosing \t\t0.9327\tbetter now \n",
697 | "\t-1.5360\tno fair \t\t0.9310\tlovely \n",
698 | "\t-1.5343\tnot well \t\t0.9282\taw thanks \n",
699 | "\t-1.5329\tcoursework \t\t0.9282\tcutie \n",
700 | "\t-1.5326\tan end \t\t0.9278\tsleep tight \n",
701 | "\t-1.5317\tscrewed \t\t0.9212\t17 again \n",
702 | "\t-1.5306\tnot available \t\t0.9189\trelax \n",
703 | "\t-1.5290\thaving trouble \t\t0.9166\tproductive \n",
704 | "\t-1.5226\trest in \t\t0.9163\tholla \n",
705 | "\t-1.5206\tfather day \t\t0.9150\theyyy \n",
706 | "\t-1.5198\tsaddens \t\t0.9115\tfunny \n",
707 | "\t-1.5198\targh \t\t0.9104\thelps \n",
708 | "\t-1.5182\tcrappy \t\t0.9103\tis fun \n",
709 | "\t-1.5177\ttwitter won \t\t0.9093\tback later \n",
710 | "\t-1.5155\tdammit \t\t0.9079\tahaha \n",
711 | "\t-1.5145\tgloomy \t\t0.9044\tbeauty \n",
712 | "\t-1.5111\tsuffering \t\t0.9027\tlove being \n",
713 | "\t-1.5094\tiranelection \t\t0.9027\teveryone should\n",
714 | "\t-1.5080\tworried \t\t0.8922\tbored bored \n",
715 | "\t-1.5058\tbut live \t\t0.8915\theyy \n",
716 | "\t-1.5047\tleaving me \t\t0.8865\tjust need \n",
717 | "\t-1.5046\tdangit \t\t0.8852\tya tomorrow \n",
718 | "\t-1.5038\tyou thank \t\t0.8835\tneat \n",
719 | "\t-1.5016\tspilled \t\t0.8834\tcompliment \n",
720 | "\t-1.5006\tnever answer \t\t0.8832\tmmmmm \n",
721 | "\t-1.5000\tnever see \t\t0.8816\tsooo excited \n",
722 | "\t-1.4992\tshitty \t\t0.8781\tgorgeous \n",
723 | "\t-1.4918\tsunburned \t\t0.8779\tiamdiddy \n",
724 | "\t-1.4899\tdamnit \t\t0.8765\tits great \n",
725 | "\t-1.4888\tto reality \t\t0.8718\tty \n",
726 | "\t-1.4884\tscratched \t\t0.8713\twas sick \n",
727 | "\t-1.4871\tgot cold \t\t0.8705\tthat cute \n",
728 | "\t-1.4843\tswollen \t\t0.8701\tyou might \n",
729 | "\t-1.4841\ttear \t\t0.8686\thahaha \n",
730 | "\t-1.4803\tnot playing \t\t0.8636\tlets \n",
731 | "\t-1.4756\tfail \t\t0.8604\tbom \n",
732 | "\t-1.4734\tisnt working \t\t0.8591\twww \n",
733 | "\t-1.4717\tstolen \t\t0.8573\tlovee \n",
734 | "\t-1.4706\tsome reason \t\t0.8565\tperfect \n",
735 | "\t-1.4673\tsicky \t\t0.8551\talways good \n",
736 | "\t-1.4667\tcried \t\t0.8543\tgood evening \n",
737 | "\t-1.4634\tregents \t\t0.8522\tnice \n",
738 | "\t-1.4620\ttears \t\t0.8509\tgreat news \n",
739 | "\t-1.4580\tshattered \t\t0.8495\tof course \n",
740 | "\t-1.4539\tfrustrating \t\t0.8492\tfly with \n",
741 | "\t-1.4533\texpired \t\t0.8460\tloves it \n",
742 | "\t-1.4530\tburnt my \t\t0.8415\tentertaining \n",
743 | "\t-1.4522\tbeing mean \t\t0.8328\tgiveaway \n",
744 | "\t-1.4521\tno1 \t\t0.8316\tgreat meeting \n",
745 | "\t-1.4501\tforgot my \t\t0.8311\tjoin me \n",
746 | "\t-1.4488\tiphone update \t\t0.8261\thurts like \n",
747 | "\t-1.4467\tnoooooooo \t\t0.8250\tfeedback \n",
748 | "\t-1.4457\trained \t\t0.8210\thumor \n",
749 | "\t-1.4448\tgoodbye \t\t0.8171\tme laugh \n",
750 | "\t-1.4441\thumid \t\t0.8170\tsurprised \n",
751 | "\t-1.4433\tnooo \t\t0.8162\tha \n",
752 | "\t-1.4407\tcrushed \t\t0.8156\tjust wondering \n",
753 | "\t-1.4402\tstressed \t\t0.8130\tgood choice \n",
754 | "\t-1.4381\twheres \t\t0.8130\teven better \n",
755 | "\t-1.4345\thow come \t\t0.8119\tanytime \n",
756 | "\t-1.4337\tcry \t\t0.8114\tin case \n",
757 | "\t-1.4261\tdivorce \t\t0.8092\tmrtweet \n",
758 | "\t-1.4230\tboo \t\t0.8079\tfeatured \n",
759 | "\t-1.4220\tfroze \t\t0.8063\tfresh \n",
760 | "\t-1.4217\tsuspended \t\t0.8047\there come \n",
761 | "\t-1.4217\twishing could \t\t0.8013\trecommendation \n",
762 | "\t-1.4189\towie \t\t0.7996\thello there \n",
763 | "\t-1.4181\tbad day \t\t0.7986\thi \n",
764 | "\t-1.4167\tdelayed \t\t0.7975\tcelebration \n",
765 | "\t-1.4162\tsob \t\t0.7955\tnighty \n",
766 | "\t-1.4138\tno sun \t\t0.7954\taha \n",
767 | "\t-1.4124\tmissed \t\t0.7952\tcertainly \n",
768 | "\t-1.4108\thuhu \t\t0.7949\tpeace \n",
769 | "\t-1.4101\trubbish \t\t0.7940\twent well \n",
770 | "\t-1.4077\tashamed \t\t0.7929\thell yeah \n",
771 | "\t-1.4057\tdrained \t\t0.7923\twatch out \n",
772 | "\t-1.4051\tno go \t\t0.7921\tsuccess \n",
773 | "\t-1.4023\tbut nobody \t\t0.7921\tlaughs \n",
774 | "\t-1.3996\t30 tag \t\t0.7866\tð¼ð \n",
775 | "\t-1.3966\tsomething wrong\t\t0.7837\tenjoyed \n",
776 | "\t-1.3957\tcannot \t\t0.7827\ttalented \n",
777 | "\t-1.3948\twe lost \t\t0.7826\tclever \n",
778 | "\t-1.3945\targhh \t\t0.7825\t100 followers \n",
779 | "\t-1.3933\tignored \t\t0.7816\tchillin \n",
780 | "\t-1.3931\twoe \t\t0.7809\tgreat week \n",
781 | "\t-1.3926\twhy can \t\t0.7797\thow are \n",
782 | "\t-1.3891\ttorn \t\t0.7788\tlove this \n",
783 | "\t-1.3880\tfailing \t\t0.7787\tgenius \n",
784 | "\t-1.3865\tmuch pain \t\t0.7783\tcoolest \n",
785 | "\t-1.3855\tworst \t\t0.7768\tget sick \n",
786 | "\t-1.3772\tsniffle \t\t0.7768\tmuch better \n",
787 | "\t-1.3769\tnot able \t\t0.7765\thttp blip \n",
788 | "\t-1.3739\tdisgusting \t\t0.7763\tyesss \n",
789 | "\t-1.3739\targ \t\t0.7761\tconversation \n",
790 | "\t-1.3731\tbugger \t\t0.7752\tchatting \n",
791 | "\t-1.3728\tbit my \t\t0.7731\tsweet dreams \n",
792 | "\t-1.3723\tno word \t\t0.7729\tuseful \n",
793 | "\t-1.3708\tvet \t\t0.7720\tchilling \n",
794 | "\t-1.3683\tlost my \t\t0.7718\tcase you \n",
795 | "\t-1.3681\tleaves \t\t0.7704\tmmm \n",
796 | "\t-1.3603\tmourning \t\t0.7700\tbeautiful \n",
797 | "\t-1.3602\ttoo fast \t\t0.7700\thaha \n",
798 | "\t-1.3601\treally ill \t\t0.7692\tthe lord \n",
799 | "\t-1.3596\tgrrrrrr \t\t0.7683\thahahaha \n",
800 | "\t-1.3589\tgrrr \t\t0.7677\tworth it \n",
801 | "\t-1.3576\tripped \t\t0.7668\tright back \n",
802 | "\t-1.3499\tasthma \t\t0.7639\theadache and \n",
803 | "\t-1.3480\tgross \t\t0.7625\tin mind \n",
804 | "\t-1.3422\tdies \t\t0.7617\tof fun \n",
805 | "\t-1.3421\tpainful \t\t0.7610\tyour thinking \n",
806 | "\t-1.3420\twasn able \t\t0.7599\thas great \n",
807 | "\t-1.3412\tyuck \t\t0.7598\tcelebrating \n",
808 | "\t-1.3398\tthrow up \t\t0.7589\tappreciate that\n",
809 | "\t-1.3391\tcouldn make \t\t0.7572\tlakers \n",
810 | "\t-1.3380\tsurgery \t\t0.7538\tlong as \n",
811 | "\t-1.3380\tdestroyed \t\t0.7514\twhoop \n",
812 | "\t-1.3377\tcant \t\t0.7497\trockin \n",
813 | "\t-1.3367\tdislike \t\t0.7480\tinteresting \n",
814 | "\t-1.3358\thating \t\t0.7466\tlet get \n",
815 | "\t-1.3346\tis ill \t\t0.7466\tclassic \n",
816 | "\t-1.3345\tstressing \t\t0.7433\trecommend \n",
817 | "\t-1.3339\tunable \t\t0.7425\tdon worry \n",
818 | "\t-1.3319\truining \t\t0.7401\thah \n",
819 | "\t-1.3305\tno iphone \t\t0.7400\train rain \n",
820 | "\t-1.3302\tcrashed \t\t0.7390\trocking \n",
821 | "\t-1.3290\ted mcmahon \t\t0.7384\tsweetheart \n",
822 | "\t-1.3233\tpoisoning \t\t0.7357\tcoming soon \n",
823 | "\t-1.3231\tstorming \t\t0.7347\tfinally \n",
824 | "\t-1.3223\tblister \t\t0.7339\tjust thought \n",
825 | "\t-1.3212\thurt \t\t0.7318\tsad and \n",
826 | "\t-1.3198\tos3 \t\t0.7301\tfollow them \n",
827 | "\t-1.3194\twhat waste \t\t0.7295\tlove \n",
828 | "\t-1.3181\thorrid \t\t0.7288\trules \n",
829 | "\t-1.3180\tallergic \t\t0.7282\tvery happy \n",
830 | "\t-1.3177\tsprained \t\t0.7274\tdm me \n",
831 | "\t-1.3162\tboo for \t\t0.7269\tgreatest \n",
832 | "\t-1.3155\ttrackle \t\t0.7253\ttwittering \n",
833 | "\t-1.3148\twas hoping \t\t0.7241\taww thank \n",
834 | "\t-1.3136\trefuses \t\t0.7206\tmariahcarey \n",
835 | "\t-1.3121\tisn working \t\t0.7174\tnow following \n",
836 | "\t-1.3116\tburned \t\t0.7161\tawww thanks \n",
837 | "\t-1.3115\tsleepless night\t\t0.7144\tsmart \n",
838 | "\t-1.3095\tnightmare \t\t0.7137\ttasty \n",
839 | "\t-1.3058\tbad news \t\t0.7119\tdont worry \n",
840 | "\t-1.3055\tno power \t\t0.7110\tgreat to \n",
841 | "\t-1.3053\tdoesn look \t\t0.7109\thave beautiful \n",
842 | "\t-1.3051\tno bueno \t\t0.7100\tblast \n",
843 | "\t-1.3047\tdon like \t\t0.7089\tbeing sick \n",
844 | "\t-1.3023\tconcert but \t\t0.7080\tbed good \n",
845 | "\t-1.3023\tcancel \t\t0.7071\tso exciting \n",
846 | "\t-1.3016\turgh \t\t0.7066\tpoor guy \n",
847 | "\t-1.2999\tdeleted \t\t0.7056\tbirthday hope \n",
848 | "\t-1.2976\tbleeding \t\t0.7043\tsad he \n",
849 | "\t-1.2953\tsry \t\t0.7042\tcheck them \n",
850 | "\t-1.2939\toh noes \t\t0.7038\tmy dear \n",
851 | "\t-1.2939\tkeeps crashing \t\t0.7034\tfeel sad \n",
852 | "\t-1.2933\tmad that \t\t0.7030\tmy headache \n",
853 | "\t-1.2906\tnauseous \t\t0.7022\tthat right \n",
854 | "\t-1.2899\tcan find \t\t0.7016\tquotes \n",
855 | "\t-1.2895\tbye to \t\t0.7014\tam happy \n",
856 | "\t-1.2892\tso mad \t\t0.6997\ttweeters \n",
857 | "\t-1.2891\tanxiety \t\t0.6989\tto hurt \n",
858 | "\t-1.2891\tcant find \t\t0.6988\t½ï \n",
859 | "\t-1.2890\tno friends \t\t0.6985\tstyle \n",
860 | "\t-1.2880\tnever gonna \t\t0.6980\treally cute \n",
861 | "\t-1.2875\tnot impressed \t\t0.6973\tperhaps \n",
862 | "\t-1.2858\tnot here \t\t0.6969\twell worth \n",
863 | "\t-1.2851\tfeel for \t\t0.6963\texciting \n",
864 | "\t-1.2832\thasnt \t\t0.6960\tmuch fun \n",
865 | "\t-1.2832\tlast show \t\t0.6947\tsurprise \n",
866 | "\t-1.2812\tlink didn \t\t0.6940\tchallenge \n",
867 | "\t-1.2785\tsniff \t\t0.6939\thave great \n",
868 | "\t-1.2778\thacked \t\t0.6936\ttweeties \n",
869 | "\t-1.2761\tsigh \t\t0.6898\tto ya \n",
870 | "\t-1.2759\tsorry \t\t0.6896\thappy sunday \n",
871 | "\t-1.2750\tis acting \t\t0.6881\tliked it \n",
872 | "\t-1.2674\texpensive \t\t0.6878\tfor follow \n",
873 | "\t-1.2664\tscared \t\t0.6868\tmorning good \n",
874 | "\t-1.2646\tsucked \t\t0.6865\t140 \n",
875 | "\t-1.2619\tdidn get \t\t0.6864\tbride \n",
876 | "\t-1.2616\tstill waiting \t\t0.6862\ttweet was \n",
877 | "\t-1.2615\tdamaged \t\t0.6856\ttune \n",
878 | "\t-1.2614\tannoyed \t\t0.6855\tthe follow \n",
879 | "\t-1.2595\toww \t\t0.6847\tgood \n",
880 | "\t-1.2569\tblows \t\t0.6844\tdoing great \n",
881 | "\t-1.2566\tbut me \t\t0.6840\ttomfelton \n",
882 | "\t-1.2554\tmessed \t\t0.6838\tdiversity \n",
883 | "\t-1.2553\tnever going \t\t0.6827\tcool http \n",
884 | "\t-1.2539\thit my \t\t0.6822\tgreat \n",
885 | "\t-1.2529\tnone \t\t0.6815\tfollow \n",
886 | "\t-1.2529\tstrep \t\t0.6797\tgreat job \n",
887 | "\t-1.2525\tfalling apart \t\t0.6795\tvery cute \n",
888 | "\t-1.2516\tmigraines \t\t0.6794\tdepends \n",
889 | "\t-1.2504\tgonna rain \t\t0.6792\tindeed \n",
890 | "\t-1.2496\tso sorry \t\t0.6791\tfollow him \n",
891 | "\t-1.2494\tblocked \t\t0.6775\tsick of \n",
892 | "\t-1.2491\tlast episode \t\t0.6768\ttell all \n",
893 | "\t-1.2490\tdull \t\t0.6747\tthe ff \n",
894 | "\t-1.2489\tnot getting \t\t0.6713\tremember to \n",
895 | "\t-1.2473\tby myself \t\t0.6706\thi there \n",
896 | "\t-1.2434\tboo hoo \t\t0.6704\tit great \n",
897 | "\t-1.2433\targhhh \t\t0.6697\tadventure \n",
898 | "\t-1.2402\tjailbreak \t\t0.6690\tsang \n",
899 | "\t-1.2398\tcouldn go \t\t0.6685\tmuseum \n",
900 | "\t-1.2390\tcoulda \t\t0.6671\thurts from \n",
901 | "\t-1.2385\tattacked \t\t0.6669\tfor asking \n",
902 | "\t-1.2374\twishes he \t\t0.6656\tstrawberry \n",
903 | "\t-1.2364\tcloudy \t\t0.6650\ttoo cute \n",
904 | "\t-1.2362\tbe missed \t\t0.6650\tdig \n",
905 | "\t-1.2353\tfreezing \t\t0.6650\tchill \n",
906 | "\t-1.2350\tim lonely \t\t0.6632\tyoungq \n",
907 | "\t-1.2339\tlost \t\t0.6629\tlove my \n",
908 | "\t-1.2332\tdavid carradine\t\t0.6628\tkeep the \n",
909 | "\t-1.2316\tfeeling down \t\t0.6622\tcreative \n",
910 | "\t-1.2308\tsuicide \t\t0.6613\tdelicious \n",
911 | "\t-1.2301\twish \t\t0.6610\tgood plan \n",
912 | "\t-1.2276\tneed new \t\t0.6606\tcheck this \n",
913 | "\t-1.2276\twhy does \t\t0.6604\tall is \n",
914 | "\t-1.2270\tfell \t\t0.6594\tjoin us \n",
915 | "\t-1.2263\tpostponed \t\t0.6593\thave nice \n",
916 | "\t-1.2262\twhy did \t\t0.6578\the he \n",
917 | "\t-1.2235\tsunburnt \t\t0.6570\tsong of \n",
918 | "\t-1.2227\tallergies \t\t0.6569\tsexy \n",
919 | "\t-1.2199\t447 \t\t0.6561\tgot my \n",
920 | "\t-1.2163\tburnt \t\t0.6556\ttwitterverse \n",
921 | "\t-1.2162\tisn going \t\t0.6544\tjoining \n",
922 | "\t-1.2145\tis leaving \t\t0.6538\tand eating \n",
923 | "\t-1.2143\tfever \t\t0.6533\theaven \n",
924 | "\t-1.2140\tmy hopes \t\t0.6530\tsmile on \n",
925 | "\t-1.2135\twas looking \t\t0.6527\tannoying you \n",
926 | "\t-1.2135\tarent \t\t0.6526\tre very \n",
927 | "\t-1.2134\tfor maintenance\t\t0.6526\they \n",
928 | "\t-1.2111\toh no \t\t0.6524\tre cool \n",
929 | "\t-1.2109\tsore \t\t0.6517\tallen \n",
930 | "\t-1.2108\twhere is \t\t0.6516\tits good \n",
931 | "\t-1.2102\tdentist \t\t0.6516\tblessed day \n",
932 | "\t-1.2080\twhy wont \t\t0.6493\tadded \n",
933 | "\t-1.2079\taww man \t\t0.6491\tbanksyart2 \n",
934 | "\t-1.2069\twishing was \t\t0.6482\tsome fun \n",
935 | "\t-1.2066\tdepression \t\t0.6466\tworship \n",
936 | "\t-1.2059\toh dear \t\t0.6448\tat http \n",
937 | "\t-1.2038\tsuck \t\t0.6447\thope to \n",
938 | "\t-1.2035\tcool especially\t\t0.6444\ttake look \n",
939 | "\t-1.2033\tuncomfortable \t\t0.6439\tlistening \n",
940 | "\t-1.2030\trained out \t\t0.6428\tall good \n",
941 | "\t-1.2028\tgetting old \t\t0.6425\tworry \n",
942 | "\t-1.2017\tstill havent \t\t0.6424\trecommendations\n",
943 | "\t-1.2004\tnot having \t\t0.6422\tbirthday wishes\n",
944 | "\t-1.2000\twish was \t\t0.6421\thahah \n",
945 | "\t-1.1979\tthrew up \t\t0.6403\tsir \n",
946 | "\t-1.1960\tughhhhh \t\t0.6397\tmay the \n",
947 | "\t-1.1959\ticky \t\t0.6396\thave wonderful \n",
948 | "\t-1.1936\tacting up \t\t0.6390\tlove it \n",
949 | "\t-1.1928\twont work \t\t0.6368\tretweet \n",
950 | "\t-1.1928\tcouldnt \t\t0.6365\tgeek \n",
951 | "\t-1.1919\twhat happened \t\t0.6363\ttweeps \n",
952 | "\t-1.1915\tworrying \t\t0.6358\tyou too \n",
953 | "\t-1.1906\tsaddens me \t\t0.6349\tappreciate \n",
954 | "\t-1.1896\tgrr \t\t0.6338\tmyspace com \n",
955 | "\t-1.1866\twon load \t\t0.6333\thow cute \n",
956 | "\t-1.1822\thit by \t\t0.6324\thappy mother \n",
957 | "\t-1.1783\tughh \t\t0.6324\tspreading \n",
958 | "\t-1.1779\tiphone software\t\t0.6316\tincredible \n",
959 | "\t-1.1779\tstuck \t\t0.6292\tis perfect \n",
960 | "\t-1.1769\tunwell \t\t0.6275\tnothing like \n",
961 | "\t-1.1767\tbut no \t\t0.6261\tall love \n",
962 | "\t-1.1739\tvery sorry \t\t0.6255\tare great \n",
963 | "\t-1.1732\thaving issues \t\t0.6245\thad fun \n",
964 | "\t-1.1730\tbe long \t\t0.6238\tall about \n",
965 | "\t-1.1697\trunning out \t\t0.6234\tloves the \n",
966 | "\t-1.1683\terror \t\t0.6230\tmost welcome \n",
967 | "\t-1.1682\tcruel \t\t0.6213\tsick so \n",
968 | "\t-1.1676\twishing \t\t0.6194\tyes \n",
969 | "\t-1.1672\tewww \t\t0.6190\tcontent \n",
970 | "\t-1.1666\tall alone \t\t0.6185\trocked \n",
971 | "\t-1.1639\tsmh \t\t0.6181\tyou really \n",
972 | "\t-1.1622\tmissing her \t\t0.6178\tat www \n",
973 | "\t-1.1622\tno money \t\t0.6173\tplenty \n",
974 | "\t-1.1616\titchy \t\t0.6163\tor pay \n",
975 | "\t-1.1610\trash \t\t0.6158\tlyrics \n",
976 | "\t-1.1600\twisdom teeth \t\t0.6155\tjust ordered \n",
977 | "\t-1.1569\tis broken \t\t0.6148\tno sorry \n",
978 | "\t-1.1568\twhere did \t\t0.6146\tyou need \n",
979 | "\t-1.1566\twould but \t\t0.6141\tfave \n",
980 | "\t-1.1562\tgetting cold \t\t0.6125\tis loving \n",
981 | "\t-1.1560\tand stuck \t\t0.6119\twho came \n",
982 | "\t-1.1555\twhen will \t\t0.6116\tmorning how \n",
983 | "\t-1.1552\twimbledon \t\t0.6111\toh and \n",
984 | "\t-1.1543\tso confused \t\t0.6104\thurts and \n",
985 | "\t-1.1522\tshould ve \t\t0.6098\tspread the \n",
986 | "\t-1.1519\tpathetic \t\t0.6083\taplusk \n",
987 | "\t-1.1515\tfawcett \t\t0.6078\tnerd \n",
988 | "\t-1.1503\tissues with \t\t0.6076\tyup \n",
989 | "\t-1.1502\tthey took \t\t0.6072\tsick and \n",
990 | "\t-1.1501\tcouldn even \t\t0.6041\tgot twitter \n",
991 | "\t-1.1491\tisnt \t\t0.6027\tfine \n",
992 | "\t-1.1473\twish lived \t\t0.6025\thomemade \n",
993 | "\t-1.1468\taccident \t\t0.6018\task \n",
994 | "\t-1.1464\tnot working \t\t0.6006\thow love \n",
995 | "\t-1.1439\tboring \t\t0.5995\tbe sick \n"
996 | ]
997 | }
998 | ],
999 | "source": [
1000 | "show_most_informative_features(feature_names, sentiment_pipeline.steps[2][1], n=500)"
1001 | ]
1002 | }
1003 | ],
1004 | "metadata": {
1005 | "kernelspec": {
1006 | "display_name": "Python 3",
1007 | "language": "python",
1008 | "name": "python3"
1009 | },
1010 | "language_info": {
1011 | "codemirror_mode": {
1012 | "name": "ipython",
1013 | "version": 3
1014 | },
1015 | "file_extension": ".py",
1016 | "mimetype": "text/x-python",
1017 | "name": "python",
1018 | "nbconvert_exporter": "python",
1019 | "pygments_lexer": "ipython3",
1020 | "version": "3.5.0"
1021 | }
1022 | },
1023 | "nbformat": 4,
1024 | "nbformat_minor": 0
1025 | }
1026 |
--------------------------------------------------------------------------------