├── .gitignore ├── bagofwords_sentiment_analyzer.py ├── opinion-lexicon-English ├── negative-words.txt └── positive-words.txt ├── opinionreviews_sentiment_analyzer.py ├── polarity-data ├── rt-polaritydata.README.1.0.txt └── rt-polaritydata │ ├── rt-polarity-neg.txt │ └── rt-polarity-pos.txt ├── sentiment.py └── tweet_sentiment.py /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | *.iml 3 | .idea 4 | -------------------------------------------------------------------------------- /bagofwords_sentiment_analyzer.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | __author__ = 'shekhargulati' 4 | 5 | import os 6 | 7 | 8 | ENGLISH_OPINION_LEXICON_LOCATION = os.path.join('opinion-lexicon-English') 9 | POS_WORDS_FILE = os.path.join(ENGLISH_OPINION_LEXICON_LOCATION, 'positive-words.txt') 10 | NEG_WORDS_FILE = os.path.join(ENGLISH_OPINION_LEXICON_LOCATION, 'negative-words.txt') 11 | 12 | # pos_words = [({'amazing': True}, 'positive'), ({'great': True}, 'positive')] 13 | # neg_words = [({'pathetic': True}, 'negative')] 14 | 15 | pos_words = [] 16 | neg_words = [] 17 | 18 | for pos_word in open(POS_WORDS_FILE, 'r').readlines()[35:]: 19 | pos_words.append(({pos_word.rstrip(): True}, 'positive')) 20 | 21 | for neg_word in open(NEG_WORDS_FILE, 'r').readlines()[35:]: 22 | neg_words.append(({neg_word.rstrip(): True}, 'negative')) 23 | 24 | print "First 5 positive words %s " % pos_words[:5] 25 | print "First 5 negative words %s" % neg_words[:5] 26 | 27 | print "Number of positive words %d" % len(pos_words) 28 | 29 | print "Number of negative words %d" % len(neg_words) 30 | 31 | all_words_with_sentiment = pos_words + neg_words 32 | 33 | print "Total number of words %d" % len(all_words_with_sentiment) 34 | 35 | from nltk.classify import NaiveBayesClassifier 36 | 37 | classifier = NaiveBayesClassifier.train(all_words_with_sentiment) 38 | 39 | 40 | def to_dictionary(words): 41 | return dict([(word, True) for word in words]) 42 | 43 | 44 | test_data = [] 45 | 46 | 47 | def predict_sentiment(text, expected_sentiment=None): 48 | text_to_classify = to_dictionary(text.split()) 49 | result = classifier.classify(text_to_classify) 50 | test_data.append([text_to_classify, expected_sentiment]) 51 | return result 52 | 53 | 54 | POLARITY_DATA_DIR = os.path.join('polarity-data', 'rt-polaritydata') 55 | POSITIVE_REVIEWS_FILE = os.path.join(POLARITY_DATA_DIR, 'rt-polarity-pos.txt') 56 | NEGATIVE_REVIEWS_FILE = os.path.join(POLARITY_DATA_DIR, 'rt-polarity-neg.txt') 57 | 58 | import collections 59 | 60 | import nltk.classify 61 | import nltk.metrics 62 | 63 | 64 | def run_sentiment_analysis_on_rt(): 65 | rt_positive_reviewers = open(POSITIVE_REVIEWS_FILE, 'r') 66 | 67 | expected_pos_set = collections.defaultdict(set) 68 | actual_pos_set = collections.defaultdict(set) 69 | 70 | for index, review in enumerate(rt_positive_reviewers.readlines()): 71 | expected_pos_set['positive'].add(index) 72 | actual_sentiment = predict_sentiment(review, 'positive') 73 | actual_pos_set[actual_sentiment].add(index) 74 | 75 | print "Total Negative found in positive reviews %s" % len(actual_pos_set['negative']) 76 | 77 | rt_negative_reviews = open(NEGATIVE_REVIEWS_FILE, 'r') 78 | 79 | expected_neg_set = collections.defaultdict(set) 80 | actual_neg_set = collections.defaultdict(set) 81 | 82 | for index, review in enumerate(rt_negative_reviews.readlines()): 83 | expected_neg_set['negative'].add(index) 84 | actual_sentiment = predict_sentiment(review, 'negative') 85 | actual_neg_set[actual_sentiment].add(index) 86 | 87 | print "Total Positive found in negative reviews %s" % len(actual_neg_set['positive']) 88 | 89 | print 'accuracy: %.2f' % nltk.classify.util.accuracy(classifier, test_data) 90 | print 'pos precision: %.2f' % nltk.metrics.precision(expected_pos_set['positive'], actual_pos_set['positive']) 91 | print 'pos recall: %.2f' % nltk.metrics.recall(expected_pos_set['positive'], actual_pos_set['positive']) 92 | print 'neg precision: %.2f' % nltk.metrics.precision(expected_neg_set['negative'], actual_neg_set['negative']) 93 | print 'neg recall: %.2f' % nltk.metrics.recall(expected_neg_set['negative'], actual_neg_set['negative']) 94 | 95 | 96 | run_sentiment_analysis_on_rt() -------------------------------------------------------------------------------- /opinion-lexicon-English/negative-words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shekhargulati/sentiment-analysis-python/2edddd53c3c34e26149fed3dd9c41830a8c6e1cb/opinion-lexicon-English/negative-words.txt -------------------------------------------------------------------------------- /opinion-lexicon-English/positive-words.txt: -------------------------------------------------------------------------------- 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2 | ; 3 | ; Opinion Lexicon: Positive 4 | ; 5 | ; This file contains a list of POSITIVE opinion words (or sentiment words). 6 | ; 7 | ; This file and the papers can all be downloaded from 8 | ; http://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html 9 | ; 10 | ; If you use this list, please cite one of the following two papers: 11 | ; 12 | ; Minqing Hu and Bing Liu. "Mining and Summarizing Customer Reviews." 13 | ; Proceedings of the ACM SIGKDD International Conference on Knowledge 14 | ; Discovery and Data Mining (KDD-2004), Aug 22-25, 2004, Seattle, 15 | ; Washington, USA, 16 | ; Bing Liu, Minqing Hu and Junsheng Cheng. "Opinion Observer: Analyzing 17 | ; and Comparing Opinions on the Web." Proceedings of the 14th 18 | ; International World Wide Web conference (WWW-2005), May 10-14, 19 | ; 2005, Chiba, Japan. 20 | ; 21 | ; Notes: 22 | ; 1. The appearance of an opinion word in a sentence does not necessarily 23 | ; mean that the sentence expresses a positive or negative opinion. 24 | ; See the paper below: 25 | ; 26 | ; Bing Liu. "Sentiment Analysis and Subjectivity." An chapter in 27 | ; Handbook of Natural Language Processing, Second Edition, 28 | ; (editors: N. Indurkhya and F. J. Damerau), 2010. 29 | ; 30 | ; 2. You will notice many misspelled words in the list. They are not 31 | ; mistakes. They are included as these misspelled words appear 32 | ; frequently in social media content. 33 | ; 34 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 35 | 36 | a+ 37 | abound 38 | abounds 39 | abundance 40 | abundant 41 | accessable 42 | accessible 43 | acclaim 44 | acclaimed 45 | acclamation 46 | accolade 47 | accolades 48 | accommodative 49 | accomodative 50 | accomplish 51 | accomplished 52 | accomplishment 53 | accomplishments 54 | accurate 55 | accurately 56 | achievable 57 | achievement 58 | achievements 59 | achievible 60 | acumen 61 | adaptable 62 | adaptive 63 | adequate 64 | adjustable 65 | admirable 66 | admirably 67 | admiration 68 | admire 69 | admirer 70 | admiring 71 | admiringly 72 | adorable 73 | adore 74 | adored 75 | adorer 76 | adoring 77 | adoringly 78 | adroit 79 | adroitly 80 | adulate 81 | adulation 82 | adulatory 83 | advanced 84 | advantage 85 | advantageous 86 | advantageously 87 | advantages 88 | adventuresome 89 | adventurous 90 | advocate 91 | advocated 92 | advocates 93 | affability 94 | affable 95 | affably 96 | affectation 97 | affection 98 | affectionate 99 | affinity 100 | affirm 101 | affirmation 102 | affirmative 103 | affluence 104 | affluent 105 | afford 106 | affordable 107 | affordably 108 | afordable 109 | agile 110 | agilely 111 | agility 112 | agreeable 113 | agreeableness 114 | agreeably 115 | all-around 116 | alluring 117 | alluringly 118 | altruistic 119 | altruistically 120 | amaze 121 | amazed 122 | amazement 123 | amazes 124 | amazing 125 | amazingly 126 | ambitious 127 | ambitiously 128 | ameliorate 129 | amenable 130 | amenity 131 | amiability 132 | amiabily 133 | amiable 134 | amicability 135 | amicable 136 | amicably 137 | amity 138 | ample 139 | amply 140 | amuse 141 | amusing 142 | amusingly 143 | angel 144 | angelic 145 | apotheosis 146 | appeal 147 | appealing 148 | applaud 149 | appreciable 150 | appreciate 151 | appreciated 152 | appreciates 153 | appreciative 154 | appreciatively 155 | appropriate 156 | approval 157 | approve 158 | ardent 159 | ardently 160 | ardor 161 | articulate 162 | aspiration 163 | aspirations 164 | aspire 165 | assurance 166 | assurances 167 | assure 168 | assuredly 169 | assuring 170 | astonish 171 | astonished 172 | astonishing 173 | astonishingly 174 | astonishment 175 | astound 176 | astounded 177 | astounding 178 | astoundingly 179 | astutely 180 | attentive 181 | attraction 182 | attractive 183 | attractively 184 | attune 185 | audible 186 | audibly 187 | auspicious 188 | authentic 189 | authoritative 190 | autonomous 191 | available 192 | aver 193 | avid 194 | avidly 195 | award 196 | awarded 197 | awards 198 | awe 199 | awed 200 | awesome 201 | awesomely 202 | awesomeness 203 | awestruck 204 | awsome 205 | backbone 206 | balanced 207 | bargain 208 | beauteous 209 | beautiful 210 | beautifullly 211 | beautifully 212 | beautify 213 | beauty 214 | beckon 215 | beckoned 216 | beckoning 217 | beckons 218 | believable 219 | believeable 220 | beloved 221 | benefactor 222 | beneficent 223 | beneficial 224 | beneficially 225 | beneficiary 226 | benefit 227 | benefits 228 | benevolence 229 | benevolent 230 | benifits 231 | best 232 | best-known 233 | best-performing 234 | best-selling 235 | better 236 | better-known 237 | better-than-expected 238 | beutifully 239 | blameless 240 | bless 241 | blessing 242 | bliss 243 | blissful 244 | blissfully 245 | blithe 246 | blockbuster 247 | bloom 248 | blossom 249 | bolster 250 | bonny 251 | bonus 252 | bonuses 253 | boom 254 | booming 255 | boost 256 | boundless 257 | bountiful 258 | brainiest 259 | brainy 260 | brand-new 261 | brave 262 | bravery 263 | bravo 264 | breakthrough 265 | breakthroughs 266 | breathlessness 267 | breathtaking 268 | breathtakingly 269 | breeze 270 | bright 271 | brighten 272 | brighter 273 | brightest 274 | brilliance 275 | brilliances 276 | brilliant 277 | brilliantly 278 | brisk 279 | brotherly 280 | bullish 281 | buoyant 282 | cajole 283 | calm 284 | calming 285 | calmness 286 | capability 287 | capable 288 | capably 289 | captivate 290 | captivating 291 | carefree 292 | cashback 293 | cashbacks 294 | catchy 295 | celebrate 296 | celebrated 297 | celebration 298 | celebratory 299 | champ 300 | champion 301 | charisma 302 | charismatic 303 | charitable 304 | charm 305 | charming 306 | charmingly 307 | chaste 308 | cheaper 309 | cheapest 310 | cheer 311 | cheerful 312 | cheery 313 | cherish 314 | cherished 315 | cherub 316 | chic 317 | chivalrous 318 | chivalry 319 | civility 320 | civilize 321 | clarity 322 | classic 323 | classy 324 | clean 325 | cleaner 326 | cleanest 327 | cleanliness 328 | cleanly 329 | clear 330 | clear-cut 331 | cleared 332 | clearer 333 | clearly 334 | clears 335 | clever 336 | cleverly 337 | cohere 338 | coherence 339 | coherent 340 | cohesive 341 | colorful 342 | comely 343 | comfort 344 | comfortable 345 | comfortably 346 | comforting 347 | comfy 348 | commend 349 | commendable 350 | commendably 351 | commitment 352 | commodious 353 | compact 354 | compactly 355 | compassion 356 | compassionate 357 | compatible 358 | competitive 359 | complement 360 | complementary 361 | complemented 362 | complements 363 | compliant 364 | compliment 365 | complimentary 366 | comprehensive 367 | conciliate 368 | conciliatory 369 | concise 370 | confidence 371 | confident 372 | congenial 373 | congratulate 374 | congratulation 375 | congratulations 376 | congratulatory 377 | conscientious 378 | considerate 379 | consistent 380 | consistently 381 | constructive 382 | consummate 383 | contentment 384 | continuity 385 | contrasty 386 | contribution 387 | convenience 388 | convenient 389 | conveniently 390 | convience 391 | convienient 392 | convient 393 | convincing 394 | convincingly 395 | cool 396 | coolest 397 | cooperative 398 | cooperatively 399 | cornerstone 400 | correct 401 | correctly 402 | cost-effective 403 | cost-saving 404 | counter-attack 405 | counter-attacks 406 | courage 407 | courageous 408 | courageously 409 | courageousness 410 | courteous 411 | courtly 412 | covenant 413 | cozy 414 | creative 415 | credence 416 | credible 417 | crisp 418 | crisper 419 | cure 420 | cure-all 421 | cushy 422 | cute 423 | cuteness 424 | danke 425 | danken 426 | daring 427 | daringly 428 | darling 429 | dashing 430 | dauntless 431 | dawn 432 | dazzle 433 | dazzled 434 | dazzling 435 | dead-cheap 436 | dead-on 437 | decency 438 | decent 439 | decisive 440 | decisiveness 441 | dedicated 442 | defeat 443 | defeated 444 | defeating 445 | defeats 446 | defender 447 | deference 448 | deft 449 | deginified 450 | delectable 451 | delicacy 452 | delicate 453 | delicious 454 | delight 455 | delighted 456 | delightful 457 | delightfully 458 | delightfulness 459 | dependable 460 | dependably 461 | deservedly 462 | deserving 463 | desirable 464 | desiring 465 | desirous 466 | destiny 467 | detachable 468 | devout 469 | dexterous 470 | dexterously 471 | dextrous 472 | dignified 473 | dignify 474 | dignity 475 | diligence 476 | diligent 477 | diligently 478 | diplomatic 479 | dirt-cheap 480 | distinction 481 | distinctive 482 | distinguished 483 | diversified 484 | divine 485 | divinely 486 | dominate 487 | dominated 488 | dominates 489 | dote 490 | dotingly 491 | doubtless 492 | dreamland 493 | dumbfounded 494 | dumbfounding 495 | dummy-proof 496 | durable 497 | dynamic 498 | eager 499 | eagerly 500 | eagerness 501 | earnest 502 | earnestly 503 | earnestness 504 | ease 505 | eased 506 | eases 507 | easier 508 | easiest 509 | easiness 510 | easing 511 | easy 512 | easy-to-use 513 | easygoing 514 | ebullience 515 | ebullient 516 | ebulliently 517 | ecenomical 518 | economical 519 | ecstasies 520 | ecstasy 521 | ecstatic 522 | ecstatically 523 | edify 524 | educated 525 | effective 526 | effectively 527 | effectiveness 528 | effectual 529 | efficacious 530 | efficient 531 | efficiently 532 | effortless 533 | effortlessly 534 | effusion 535 | effusive 536 | effusively 537 | effusiveness 538 | elan 539 | elate 540 | elated 541 | elatedly 542 | elation 543 | electrify 544 | elegance 545 | elegant 546 | elegantly 547 | elevate 548 | elite 549 | eloquence 550 | eloquent 551 | eloquently 552 | embolden 553 | eminence 554 | eminent 555 | empathize 556 | empathy 557 | empower 558 | empowerment 559 | enchant 560 | enchanted 561 | enchanting 562 | enchantingly 563 | encourage 564 | encouragement 565 | encouraging 566 | encouragingly 567 | endear 568 | endearing 569 | endorse 570 | endorsed 571 | endorsement 572 | endorses 573 | endorsing 574 | energetic 575 | energize 576 | energy-efficient 577 | energy-saving 578 | engaging 579 | engrossing 580 | enhance 581 | enhanced 582 | enhancement 583 | enhances 584 | enjoy 585 | enjoyable 586 | enjoyably 587 | enjoyed 588 | enjoying 589 | enjoyment 590 | enjoys 591 | enlighten 592 | enlightenment 593 | enliven 594 | ennoble 595 | enough 596 | enrapt 597 | enrapture 598 | enraptured 599 | enrich 600 | enrichment 601 | enterprising 602 | entertain 603 | entertaining 604 | entertains 605 | enthral 606 | enthrall 607 | enthralled 608 | enthuse 609 | enthusiasm 610 | enthusiast 611 | enthusiastic 612 | enthusiastically 613 | entice 614 | enticed 615 | enticing 616 | enticingly 617 | entranced 618 | entrancing 619 | entrust 620 | enviable 621 | enviably 622 | envious 623 | enviously 624 | enviousness 625 | envy 626 | equitable 627 | ergonomical 628 | err-free 629 | erudite 630 | ethical 631 | eulogize 632 | euphoria 633 | euphoric 634 | euphorically 635 | evaluative 636 | evenly 637 | eventful 638 | everlasting 639 | evocative 640 | exalt 641 | exaltation 642 | exalted 643 | exaltedly 644 | exalting 645 | exaltingly 646 | examplar 647 | examplary 648 | excallent 649 | exceed 650 | exceeded 651 | exceeding 652 | exceedingly 653 | exceeds 654 | excel 655 | exceled 656 | excelent 657 | excellant 658 | excelled 659 | excellence 660 | excellency 661 | excellent 662 | excellently 663 | excels 664 | exceptional 665 | exceptionally 666 | excite 667 | excited 668 | excitedly 669 | excitedness 670 | excitement 671 | excites 672 | exciting 673 | excitingly 674 | exellent 675 | exemplar 676 | exemplary 677 | exhilarate 678 | exhilarating 679 | exhilaratingly 680 | exhilaration 681 | exonerate 682 | expansive 683 | expeditiously 684 | expertly 685 | exquisite 686 | exquisitely 687 | extol 688 | extoll 689 | extraordinarily 690 | extraordinary 691 | exuberance 692 | exuberant 693 | exuberantly 694 | exult 695 | exultant 696 | exultation 697 | exultingly 698 | eye-catch 699 | eye-catching 700 | eyecatch 701 | eyecatching 702 | fabulous 703 | fabulously 704 | facilitate 705 | fair 706 | fairly 707 | fairness 708 | faith 709 | faithful 710 | faithfully 711 | faithfulness 712 | fame 713 | famed 714 | famous 715 | famously 716 | fancier 717 | fancinating 718 | fancy 719 | fanfare 720 | fans 721 | fantastic 722 | fantastically 723 | fascinate 724 | fascinating 725 | fascinatingly 726 | fascination 727 | fashionable 728 | fashionably 729 | fast 730 | fast-growing 731 | fast-paced 732 | faster 733 | fastest 734 | fastest-growing 735 | faultless 736 | fav 737 | fave 738 | favor 739 | favorable 740 | favored 741 | favorite 742 | favorited 743 | favour 744 | fearless 745 | fearlessly 746 | feasible 747 | feasibly 748 | feat 749 | feature-rich 750 | fecilitous 751 | feisty 752 | felicitate 753 | felicitous 754 | felicity 755 | fertile 756 | fervent 757 | fervently 758 | fervid 759 | fervidly 760 | fervor 761 | festive 762 | fidelity 763 | fiery 764 | fine 765 | fine-looking 766 | finely 767 | finer 768 | finest 769 | firmer 770 | first-class 771 | first-in-class 772 | first-rate 773 | flashy 774 | flatter 775 | flattering 776 | flatteringly 777 | flawless 778 | flawlessly 779 | flexibility 780 | flexible 781 | flourish 782 | flourishing 783 | fluent 784 | flutter 785 | fond 786 | fondly 787 | fondness 788 | foolproof 789 | foremost 790 | foresight 791 | formidable 792 | fortitude 793 | fortuitous 794 | fortuitously 795 | fortunate 796 | fortunately 797 | fortune 798 | fragrant 799 | free 800 | freed 801 | freedom 802 | freedoms 803 | fresh 804 | fresher 805 | freshest 806 | friendliness 807 | friendly 808 | frolic 809 | frugal 810 | fruitful 811 | ftw 812 | fulfillment 813 | fun 814 | futurestic 815 | futuristic 816 | gaiety 817 | gaily 818 | gain 819 | gained 820 | gainful 821 | gainfully 822 | gaining 823 | gains 824 | gallant 825 | gallantly 826 | galore 827 | geekier 828 | geeky 829 | gem 830 | gems 831 | generosity 832 | generous 833 | generously 834 | genial 835 | genius 836 | gentle 837 | gentlest 838 | genuine 839 | gifted 840 | glad 841 | gladden 842 | gladly 843 | gladness 844 | glamorous 845 | glee 846 | gleeful 847 | gleefully 848 | glimmer 849 | glimmering 850 | glisten 851 | glistening 852 | glitter 853 | glitz 854 | glorify 855 | glorious 856 | gloriously 857 | glory 858 | glow 859 | glowing 860 | glowingly 861 | god-given 862 | god-send 863 | godlike 864 | godsend 865 | gold 866 | golden 867 | good 868 | goodly 869 | goodness 870 | goodwill 871 | goood 872 | gooood 873 | gorgeous 874 | gorgeously 875 | grace 876 | graceful 877 | gracefully 878 | gracious 879 | graciously 880 | graciousness 881 | grand 882 | grandeur 883 | grateful 884 | gratefully 885 | gratification 886 | gratified 887 | gratifies 888 | gratify 889 | gratifying 890 | gratifyingly 891 | gratitude 892 | great 893 | greatest 894 | greatness 895 | grin 896 | groundbreaking 897 | guarantee 898 | guidance 899 | guiltless 900 | gumption 901 | gush 902 | gusto 903 | gutsy 904 | hail 905 | halcyon 906 | hale 907 | hallmark 908 | hallmarks 909 | hallowed 910 | handier 911 | handily 912 | hands-down 913 | handsome 914 | handsomely 915 | handy 916 | happier 917 | happily 918 | happiness 919 | happy 920 | hard-working 921 | hardier 922 | hardy 923 | harmless 924 | harmonious 925 | harmoniously 926 | harmonize 927 | harmony 928 | headway 929 | heal 930 | healthful 931 | healthy 932 | hearten 933 | heartening 934 | heartfelt 935 | heartily 936 | heartwarming 937 | heaven 938 | heavenly 939 | helped 940 | helpful 941 | helping 942 | hero 943 | heroic 944 | heroically 945 | heroine 946 | heroize 947 | heros 948 | high-quality 949 | high-spirited 950 | hilarious 951 | holy 952 | homage 953 | honest 954 | honesty 955 | honor 956 | honorable 957 | honored 958 | honoring 959 | hooray 960 | hopeful 961 | hospitable 962 | hot 963 | hotcake 964 | hotcakes 965 | hottest 966 | hug 967 | humane 968 | humble 969 | humility 970 | humor 971 | humorous 972 | humorously 973 | humour 974 | humourous 975 | ideal 976 | idealize 977 | ideally 978 | idol 979 | idolize 980 | idolized 981 | idyllic 982 | illuminate 983 | illuminati 984 | illuminating 985 | illumine 986 | illustrious 987 | ilu 988 | imaculate 989 | imaginative 990 | immaculate 991 | immaculately 992 | immense 993 | impartial 994 | impartiality 995 | impartially 996 | impassioned 997 | impeccable 998 | impeccably 999 | important 1000 | impress 1001 | impressed 1002 | impresses 1003 | impressive 1004 | impressively 1005 | impressiveness 1006 | improve 1007 | improved 1008 | improvement 1009 | improvements 1010 | improves 1011 | improving 1012 | incredible 1013 | incredibly 1014 | indebted 1015 | individualized 1016 | indulgence 1017 | indulgent 1018 | industrious 1019 | inestimable 1020 | inestimably 1021 | inexpensive 1022 | infallibility 1023 | infallible 1024 | infallibly 1025 | influential 1026 | ingenious 1027 | ingeniously 1028 | ingenuity 1029 | ingenuous 1030 | ingenuously 1031 | innocuous 1032 | innovation 1033 | innovative 1034 | inpressed 1035 | insightful 1036 | insightfully 1037 | inspiration 1038 | inspirational 1039 | inspire 1040 | inspiring 1041 | instantly 1042 | instructive 1043 | instrumental 1044 | integral 1045 | integrated 1046 | intelligence 1047 | intelligent 1048 | intelligible 1049 | interesting 1050 | interests 1051 | intimacy 1052 | intimate 1053 | intricate 1054 | intrigue 1055 | intriguing 1056 | intriguingly 1057 | intuitive 1058 | invaluable 1059 | invaluablely 1060 | inventive 1061 | invigorate 1062 | invigorating 1063 | invincibility 1064 | invincible 1065 | inviolable 1066 | inviolate 1067 | invulnerable 1068 | irreplaceable 1069 | irreproachable 1070 | irresistible 1071 | irresistibly 1072 | issue-free 1073 | jaw-droping 1074 | jaw-dropping 1075 | jollify 1076 | jolly 1077 | jovial 1078 | joy 1079 | joyful 1080 | joyfully 1081 | joyous 1082 | joyously 1083 | jubilant 1084 | jubilantly 1085 | jubilate 1086 | jubilation 1087 | jubiliant 1088 | judicious 1089 | justly 1090 | keen 1091 | keenly 1092 | keenness 1093 | kid-friendly 1094 | kindliness 1095 | kindly 1096 | kindness 1097 | knowledgeable 1098 | kudos 1099 | large-capacity 1100 | laud 1101 | laudable 1102 | laudably 1103 | lavish 1104 | lavishly 1105 | law-abiding 1106 | lawful 1107 | lawfully 1108 | lead 1109 | leading 1110 | leads 1111 | lean 1112 | led 1113 | legendary 1114 | leverage 1115 | levity 1116 | liberate 1117 | liberation 1118 | liberty 1119 | lifesaver 1120 | light-hearted 1121 | lighter 1122 | likable 1123 | like 1124 | liked 1125 | likes 1126 | liking 1127 | lionhearted 1128 | lively 1129 | logical 1130 | long-lasting 1131 | lovable 1132 | lovably 1133 | love 1134 | loved 1135 | loveliness 1136 | lovely 1137 | lover 1138 | loves 1139 | loving 1140 | low-cost 1141 | low-price 1142 | low-priced 1143 | low-risk 1144 | lower-priced 1145 | loyal 1146 | loyalty 1147 | lucid 1148 | lucidly 1149 | luck 1150 | luckier 1151 | luckiest 1152 | luckiness 1153 | lucky 1154 | lucrative 1155 | luminous 1156 | lush 1157 | luster 1158 | lustrous 1159 | luxuriant 1160 | luxuriate 1161 | luxurious 1162 | luxuriously 1163 | luxury 1164 | lyrical 1165 | magic 1166 | magical 1167 | magnanimous 1168 | magnanimously 1169 | magnificence 1170 | magnificent 1171 | magnificently 1172 | majestic 1173 | majesty 1174 | manageable 1175 | maneuverable 1176 | marvel 1177 | marveled 1178 | marvelled 1179 | marvellous 1180 | marvelous 1181 | marvelously 1182 | marvelousness 1183 | marvels 1184 | master 1185 | masterful 1186 | masterfully 1187 | masterpiece 1188 | masterpieces 1189 | masters 1190 | mastery 1191 | matchless 1192 | mature 1193 | maturely 1194 | maturity 1195 | meaningful 1196 | memorable 1197 | merciful 1198 | mercifully 1199 | mercy 1200 | merit 1201 | meritorious 1202 | merrily 1203 | merriment 1204 | merriness 1205 | merry 1206 | mesmerize 1207 | mesmerized 1208 | mesmerizes 1209 | mesmerizing 1210 | mesmerizingly 1211 | meticulous 1212 | meticulously 1213 | mightily 1214 | mighty 1215 | mind-blowing 1216 | miracle 1217 | miracles 1218 | miraculous 1219 | miraculously 1220 | miraculousness 1221 | modern 1222 | modest 1223 | modesty 1224 | momentous 1225 | monumental 1226 | monumentally 1227 | morality 1228 | motivated 1229 | multi-purpose 1230 | navigable 1231 | neat 1232 | neatest 1233 | neatly 1234 | nice 1235 | nicely 1236 | nicer 1237 | nicest 1238 | nifty 1239 | nimble 1240 | noble 1241 | nobly 1242 | noiseless 1243 | non-violence 1244 | non-violent 1245 | notably 1246 | noteworthy 1247 | nourish 1248 | nourishing 1249 | nourishment 1250 | novelty 1251 | nurturing 1252 | oasis 1253 | obsession 1254 | obsessions 1255 | obtainable 1256 | openly 1257 | openness 1258 | optimal 1259 | optimism 1260 | optimistic 1261 | opulent 1262 | orderly 1263 | originality 1264 | outdo 1265 | outdone 1266 | outperform 1267 | outperformed 1268 | outperforming 1269 | outperforms 1270 | outshine 1271 | outshone 1272 | outsmart 1273 | outstanding 1274 | outstandingly 1275 | outstrip 1276 | outwit 1277 | ovation 1278 | overjoyed 1279 | overtake 1280 | overtaken 1281 | overtakes 1282 | overtaking 1283 | overtook 1284 | overture 1285 | pain-free 1286 | painless 1287 | painlessly 1288 | palatial 1289 | pamper 1290 | pampered 1291 | pamperedly 1292 | pamperedness 1293 | pampers 1294 | panoramic 1295 | paradise 1296 | paramount 1297 | pardon 1298 | passion 1299 | passionate 1300 | passionately 1301 | patience 1302 | patient 1303 | patiently 1304 | patriot 1305 | patriotic 1306 | peace 1307 | peaceable 1308 | peaceful 1309 | peacefully 1310 | peacekeepers 1311 | peach 1312 | peerless 1313 | pep 1314 | pepped 1315 | pepping 1316 | peppy 1317 | peps 1318 | perfect 1319 | perfection 1320 | perfectly 1321 | permissible 1322 | perseverance 1323 | persevere 1324 | personages 1325 | personalized 1326 | phenomenal 1327 | phenomenally 1328 | picturesque 1329 | piety 1330 | pinnacle 1331 | playful 1332 | playfully 1333 | pleasant 1334 | pleasantly 1335 | pleased 1336 | pleases 1337 | pleasing 1338 | pleasingly 1339 | pleasurable 1340 | pleasurably 1341 | pleasure 1342 | plentiful 1343 | pluses 1344 | plush 1345 | plusses 1346 | poetic 1347 | poeticize 1348 | poignant 1349 | poise 1350 | poised 1351 | polished 1352 | polite 1353 | politeness 1354 | popular 1355 | portable 1356 | posh 1357 | positive 1358 | positively 1359 | positives 1360 | powerful 1361 | powerfully 1362 | praise 1363 | praiseworthy 1364 | praising 1365 | pre-eminent 1366 | precious 1367 | precise 1368 | precisely 1369 | preeminent 1370 | prefer 1371 | preferable 1372 | preferably 1373 | prefered 1374 | preferes 1375 | preferring 1376 | prefers 1377 | premier 1378 | prestige 1379 | prestigious 1380 | prettily 1381 | pretty 1382 | priceless 1383 | pride 1384 | principled 1385 | privilege 1386 | privileged 1387 | prize 1388 | proactive 1389 | problem-free 1390 | problem-solver 1391 | prodigious 1392 | prodigiously 1393 | prodigy 1394 | productive 1395 | productively 1396 | proficient 1397 | proficiently 1398 | profound 1399 | profoundly 1400 | profuse 1401 | profusion 1402 | progress 1403 | progressive 1404 | prolific 1405 | prominence 1406 | prominent 1407 | promise 1408 | promised 1409 | promises 1410 | promising 1411 | promoter 1412 | prompt 1413 | promptly 1414 | proper 1415 | properly 1416 | propitious 1417 | propitiously 1418 | pros 1419 | prosper 1420 | prosperity 1421 | prosperous 1422 | prospros 1423 | protect 1424 | protection 1425 | protective 1426 | proud 1427 | proven 1428 | proves 1429 | providence 1430 | proving 1431 | prowess 1432 | prudence 1433 | prudent 1434 | prudently 1435 | punctual 1436 | pure 1437 | purify 1438 | purposeful 1439 | quaint 1440 | qualified 1441 | qualify 1442 | quicker 1443 | quiet 1444 | quieter 1445 | radiance 1446 | radiant 1447 | rapid 1448 | rapport 1449 | rapt 1450 | rapture 1451 | raptureous 1452 | raptureously 1453 | rapturous 1454 | rapturously 1455 | rational 1456 | razor-sharp 1457 | reachable 1458 | readable 1459 | readily 1460 | ready 1461 | reaffirm 1462 | reaffirmation 1463 | realistic 1464 | realizable 1465 | reasonable 1466 | reasonably 1467 | reasoned 1468 | reassurance 1469 | reassure 1470 | receptive 1471 | reclaim 1472 | recomend 1473 | recommend 1474 | recommendation 1475 | recommendations 1476 | recommended 1477 | reconcile 1478 | reconciliation 1479 | record-setting 1480 | recover 1481 | recovery 1482 | rectification 1483 | rectify 1484 | rectifying 1485 | redeem 1486 | redeeming 1487 | redemption 1488 | refine 1489 | refined 1490 | refinement 1491 | reform 1492 | reformed 1493 | reforming 1494 | reforms 1495 | refresh 1496 | refreshed 1497 | refreshing 1498 | refund 1499 | refunded 1500 | regal 1501 | regally 1502 | regard 1503 | rejoice 1504 | rejoicing 1505 | rejoicingly 1506 | rejuvenate 1507 | rejuvenated 1508 | rejuvenating 1509 | relaxed 1510 | relent 1511 | reliable 1512 | reliably 1513 | relief 1514 | relish 1515 | remarkable 1516 | remarkably 1517 | remedy 1518 | remission 1519 | remunerate 1520 | renaissance 1521 | renewed 1522 | renown 1523 | renowned 1524 | replaceable 1525 | reputable 1526 | reputation 1527 | resilient 1528 | resolute 1529 | resound 1530 | resounding 1531 | resourceful 1532 | resourcefulness 1533 | respect 1534 | respectable 1535 | respectful 1536 | respectfully 1537 | respite 1538 | resplendent 1539 | responsibly 1540 | responsive 1541 | restful 1542 | restored 1543 | restructure 1544 | restructured 1545 | restructuring 1546 | retractable 1547 | revel 1548 | revelation 1549 | revere 1550 | reverence 1551 | reverent 1552 | reverently 1553 | revitalize 1554 | revival 1555 | revive 1556 | revives 1557 | revolutionary 1558 | revolutionize 1559 | revolutionized 1560 | revolutionizes 1561 | reward 1562 | rewarding 1563 | rewardingly 1564 | rich 1565 | richer 1566 | richly 1567 | richness 1568 | right 1569 | righten 1570 | righteous 1571 | righteously 1572 | righteousness 1573 | rightful 1574 | rightfully 1575 | rightly 1576 | rightness 1577 | risk-free 1578 | robust 1579 | rock-star 1580 | rock-stars 1581 | rockstar 1582 | rockstars 1583 | romantic 1584 | romantically 1585 | romanticize 1586 | roomier 1587 | roomy 1588 | rosy 1589 | safe 1590 | safely 1591 | sagacity 1592 | sagely 1593 | saint 1594 | saintliness 1595 | saintly 1596 | salutary 1597 | salute 1598 | sane 1599 | satisfactorily 1600 | satisfactory 1601 | satisfied 1602 | satisfies 1603 | satisfy 1604 | satisfying 1605 | satisified 1606 | saver 1607 | savings 1608 | savior 1609 | savvy 1610 | scenic 1611 | seamless 1612 | seasoned 1613 | secure 1614 | securely 1615 | selective 1616 | self-determination 1617 | self-respect 1618 | self-satisfaction 1619 | self-sufficiency 1620 | self-sufficient 1621 | sensation 1622 | sensational 1623 | sensationally 1624 | sensations 1625 | sensible 1626 | sensibly 1627 | sensitive 1628 | serene 1629 | serenity 1630 | sexy 1631 | sharp 1632 | sharper 1633 | sharpest 1634 | shimmering 1635 | shimmeringly 1636 | shine 1637 | shiny 1638 | significant 1639 | silent 1640 | simpler 1641 | simplest 1642 | simplified 1643 | simplifies 1644 | simplify 1645 | simplifying 1646 | sincere 1647 | sincerely 1648 | sincerity 1649 | skill 1650 | skilled 1651 | skillful 1652 | skillfully 1653 | slammin 1654 | sleek 1655 | slick 1656 | smart 1657 | smarter 1658 | smartest 1659 | smartly 1660 | smile 1661 | smiles 1662 | smiling 1663 | smilingly 1664 | smitten 1665 | smooth 1666 | smoother 1667 | smoothes 1668 | smoothest 1669 | smoothly 1670 | snappy 1671 | snazzy 1672 | sociable 1673 | soft 1674 | softer 1675 | solace 1676 | solicitous 1677 | solicitously 1678 | solid 1679 | solidarity 1680 | soothe 1681 | soothingly 1682 | sophisticated 1683 | soulful 1684 | soundly 1685 | soundness 1686 | spacious 1687 | sparkle 1688 | sparkling 1689 | spectacular 1690 | spectacularly 1691 | speedily 1692 | speedy 1693 | spellbind 1694 | spellbinding 1695 | spellbindingly 1696 | spellbound 1697 | spirited 1698 | spiritual 1699 | splendid 1700 | splendidly 1701 | splendor 1702 | spontaneous 1703 | sporty 1704 | spotless 1705 | sprightly 1706 | stability 1707 | stabilize 1708 | stable 1709 | stainless 1710 | standout 1711 | state-of-the-art 1712 | stately 1713 | statuesque 1714 | staunch 1715 | staunchly 1716 | staunchness 1717 | steadfast 1718 | steadfastly 1719 | steadfastness 1720 | steadiest 1721 | steadiness 1722 | steady 1723 | stellar 1724 | stellarly 1725 | stimulate 1726 | stimulates 1727 | stimulating 1728 | stimulative 1729 | stirringly 1730 | straighten 1731 | straightforward 1732 | streamlined 1733 | striking 1734 | strikingly 1735 | striving 1736 | strong 1737 | stronger 1738 | strongest 1739 | stunned 1740 | stunning 1741 | stunningly 1742 | stupendous 1743 | stupendously 1744 | sturdier 1745 | sturdy 1746 | stylish 1747 | stylishly 1748 | stylized 1749 | suave 1750 | suavely 1751 | sublime 1752 | subsidize 1753 | subsidized 1754 | subsidizes 1755 | subsidizing 1756 | substantive 1757 | succeed 1758 | succeeded 1759 | succeeding 1760 | succeeds 1761 | succes 1762 | success 1763 | successes 1764 | successful 1765 | successfully 1766 | suffice 1767 | sufficed 1768 | suffices 1769 | sufficient 1770 | sufficiently 1771 | suitable 1772 | sumptuous 1773 | sumptuously 1774 | sumptuousness 1775 | super 1776 | superb 1777 | superbly 1778 | superior 1779 | superiority 1780 | supple 1781 | support 1782 | supported 1783 | supporter 1784 | supporting 1785 | supportive 1786 | supports 1787 | supremacy 1788 | supreme 1789 | supremely 1790 | supurb 1791 | supurbly 1792 | surmount 1793 | surpass 1794 | surreal 1795 | survival 1796 | survivor 1797 | sustainability 1798 | sustainable 1799 | swank 1800 | swankier 1801 | swankiest 1802 | swanky 1803 | sweeping 1804 | sweet 1805 | sweeten 1806 | sweetheart 1807 | sweetly 1808 | sweetness 1809 | swift 1810 | swiftness 1811 | talent 1812 | talented 1813 | talents 1814 | tantalize 1815 | tantalizing 1816 | tantalizingly 1817 | tempt 1818 | tempting 1819 | temptingly 1820 | tenacious 1821 | tenaciously 1822 | tenacity 1823 | tender 1824 | tenderly 1825 | terrific 1826 | terrifically 1827 | thank 1828 | thankful 1829 | thinner 1830 | thoughtful 1831 | thoughtfully 1832 | thoughtfulness 1833 | thrift 1834 | thrifty 1835 | thrill 1836 | thrilled 1837 | thrilling 1838 | thrillingly 1839 | thrills 1840 | thrive 1841 | thriving 1842 | thumb-up 1843 | thumbs-up 1844 | tickle 1845 | tidy 1846 | time-honored 1847 | timely 1848 | tingle 1849 | titillate 1850 | titillating 1851 | titillatingly 1852 | togetherness 1853 | tolerable 1854 | toll-free 1855 | top 1856 | top-notch 1857 | top-quality 1858 | topnotch 1859 | tops 1860 | tough 1861 | tougher 1862 | toughest 1863 | traction 1864 | tranquil 1865 | tranquility 1866 | transparent 1867 | treasure 1868 | tremendously 1869 | trendy 1870 | triumph 1871 | triumphal 1872 | triumphant 1873 | triumphantly 1874 | trivially 1875 | trophy 1876 | trouble-free 1877 | trump 1878 | trumpet 1879 | trust 1880 | trusted 1881 | trusting 1882 | trustingly 1883 | trustworthiness 1884 | trustworthy 1885 | trusty 1886 | truthful 1887 | truthfully 1888 | truthfulness 1889 | twinkly 1890 | ultra-crisp 1891 | unabashed 1892 | unabashedly 1893 | unaffected 1894 | unassailable 1895 | unbeatable 1896 | unbiased 1897 | unbound 1898 | uncomplicated 1899 | unconditional 1900 | undamaged 1901 | undaunted 1902 | understandable 1903 | undisputable 1904 | undisputably 1905 | undisputed 1906 | unencumbered 1907 | unequivocal 1908 | unequivocally 1909 | unfazed 1910 | unfettered 1911 | unforgettable 1912 | unity 1913 | unlimited 1914 | unmatched 1915 | unparalleled 1916 | unquestionable 1917 | unquestionably 1918 | unreal 1919 | unrestricted 1920 | unrivaled 1921 | unselfish 1922 | unwavering 1923 | upbeat 1924 | upgradable 1925 | upgradeable 1926 | upgraded 1927 | upheld 1928 | uphold 1929 | uplift 1930 | uplifting 1931 | upliftingly 1932 | upliftment 1933 | upscale 1934 | usable 1935 | useable 1936 | useful 1937 | user-friendly 1938 | user-replaceable 1939 | valiant 1940 | valiantly 1941 | valor 1942 | valuable 1943 | variety 1944 | venerate 1945 | verifiable 1946 | veritable 1947 | versatile 1948 | versatility 1949 | vibrant 1950 | vibrantly 1951 | victorious 1952 | victory 1953 | viewable 1954 | vigilance 1955 | vigilant 1956 | virtue 1957 | virtuous 1958 | virtuously 1959 | visionary 1960 | vivacious 1961 | vivid 1962 | vouch 1963 | vouchsafe 1964 | warm 1965 | warmer 1966 | warmhearted 1967 | warmly 1968 | warmth 1969 | wealthy 1970 | welcome 1971 | well 1972 | well-backlit 1973 | well-balanced 1974 | well-behaved 1975 | well-being 1976 | well-bred 1977 | well-connected 1978 | well-educated 1979 | well-established 1980 | well-informed 1981 | well-intentioned 1982 | well-known 1983 | well-made 1984 | well-managed 1985 | well-mannered 1986 | well-positioned 1987 | well-received 1988 | well-regarded 1989 | well-rounded 1990 | well-run 1991 | well-wishers 1992 | wellbeing 1993 | whoa 1994 | wholeheartedly 1995 | wholesome 1996 | whooa 1997 | whoooa 1998 | wieldy 1999 | willing 2000 | willingly 2001 | willingness 2002 | win 2003 | windfall 2004 | winnable 2005 | winner 2006 | winners 2007 | winning 2008 | wins 2009 | wisdom 2010 | wise 2011 | wisely 2012 | witty 2013 | won 2014 | wonder 2015 | wonderful 2016 | wonderfully 2017 | wonderous 2018 | wonderously 2019 | wonders 2020 | wondrous 2021 | woo 2022 | work 2023 | workable 2024 | worked 2025 | works 2026 | world-famous 2027 | worth 2028 | worth-while 2029 | worthiness 2030 | worthwhile 2031 | worthy 2032 | wow 2033 | wowed 2034 | wowing 2035 | wows 2036 | yay 2037 | youthful 2038 | zeal 2039 | zenith 2040 | zest 2041 | zippy 2042 | -------------------------------------------------------------------------------- /opinionreviews_sentiment_analyzer.py: -------------------------------------------------------------------------------- 1 | __author__ = 'shekhargulati' 2 | 3 | """ 4 | 1. Read the positive review file, tokenize it, and add it to a list of tuple. Tuple ([list of words],label) 5 | 2. Read the negative review file, tokenize it, and add it to a list of tuple. Tuple ([list of words],label) 6 | 3. Create training and test data from the actual data. Training data 75% and Test Date 25% 7 | 4. Train the classifer 8 | 5. Classify using the classifier 9 | """ 10 | 11 | 12 | def tokenize_file_and_apply_label(filename, label): 13 | words_label_tuple_list = [] 14 | for line in open(filename, 'r').readlines(): 15 | words = [word.lower() for word in line.split() if len(word) >= 3] 16 | words_label_tuple_list.append((list_to_dict(words), label)) 17 | return words_label_tuple_list 18 | 19 | 20 | def list_to_dict(words): 21 | return dict([(word, True) for word in words]) 22 | 23 | 24 | def get_training_data(pos_tokens, neg_tokens, cutoff): 25 | import math 26 | 27 | pos_cutoff = int(math.floor(cutoff * len(pos_tokens))) 28 | neg_cutoff = int(math.floor(cutoff * len(neg_tokens))) 29 | return pos_tokens[:pos_cutoff] + neg_tokens[:neg_cutoff] 30 | 31 | 32 | def get_test_data(pos_tokens, neg_tokens, cutoff): 33 | import math 34 | 35 | pos_cutoff = int(math.floor(cutoff * len(pos_tokens))) 36 | neg_cutoff = int(math.floor(cutoff * len(neg_tokens))) 37 | return pos_tokens[pos_cutoff:] + neg_tokens[neg_cutoff:] 38 | 39 | 40 | def all_words_in_training_data(training_data): 41 | all_words = [] 42 | for item in training_data: 43 | for word in item[0]: 44 | all_words.append(word) 45 | return all_words 46 | 47 | 48 | if __name__ == "__main__": 49 | import os 50 | 51 | BASE_DIR = os.path.join("polarity-data", "rt-polaritydata") 52 | POSITIVE_REVIEWS_FILE = os.path.join(BASE_DIR, "rt-polarity-pos.txt") 53 | pos_tokens = tokenize_file_and_apply_label(POSITIVE_REVIEWS_FILE, "positive") 54 | 55 | NEGATIVE_REVIEWS_FILE = os.path.join(BASE_DIR, "rt-polarity-neg.txt") 56 | neg_tokens = tokenize_file_and_apply_label(NEGATIVE_REVIEWS_FILE, "negative") 57 | 58 | training_data = get_training_data(pos_tokens, neg_tokens, 1) 59 | test_data = get_test_data(pos_tokens, neg_tokens, 0.75) 60 | 61 | print "Training Data %d, Test Data %d" % (len(training_data), len(test_data)) 62 | 63 | # word_features = get_word_features(all_words_in_training_data(training_data)) 64 | # feature_training_set = feature_training_set(word_features, training_data) 65 | 66 | from nltk.classify import NaiveBayesClassifier 67 | 68 | classifier = NaiveBayesClassifier.train(training_data) 69 | 70 | import collections 71 | 72 | expected_set = collections.defaultdict(set) 73 | actual_set = collections.defaultdict(set) 74 | 75 | for index, item in enumerate(test_data): 76 | expected_set[item[1]].add(index) 77 | sentiment = classifier.classify(item[0]) 78 | actual_set[sentiment].add(index) 79 | 80 | import nltk 81 | 82 | print 'accuracy: %.2f' % nltk.classify.util.accuracy(classifier, test_data) 83 | print 'pos precision: %.2f' % nltk.metrics.precision(expected_set['positive'], actual_set['positive']) 84 | print 'pos recall: %.2f' % nltk.metrics.recall(expected_set['positive'], actual_set['positive']) 85 | print 'neg precision: %.2f' % nltk.metrics.precision(expected_set['negative'], actual_set['negative']) 86 | print 'neg recall: %.2f' % nltk.metrics.recall(expected_set['negative'], actual_set['negative']) 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /polarity-data/rt-polaritydata.README.1.0.txt: -------------------------------------------------------------------------------- 1 | 2 | ======= 3 | 4 | Introduction 5 | 6 | This README v1.0 (June, 2005) for the v1.0 sentence polarity dataset comes 7 | from the URL 8 | http://www.cs.cornell.edu/people/pabo/movie-review-data . 9 | 10 | ======= 11 | 12 | Citation Info 13 | 14 | This data was first used in Bo Pang and Lillian Lee, 15 | ``Seeing stars: Exploiting class relationships for sentiment categorization 16 | with respect to rating scales.'', Proceedings of the ACL, 2005. 17 | 18 | @InProceedings{Pang+Lee:05a, 19 | author = {Bo Pang and Lillian Lee}, 20 | title = {Seeing stars: Exploiting class relationships for sentiment 21 | categorization with respect to rating scales}, 22 | booktitle = {Proceedings of the ACL}, 23 | year = 2005 24 | } 25 | 26 | ======= 27 | 28 | Data Format Summary 29 | 30 | - rt-polaritydata.tar.gz: contains this readme and two data files that 31 | were used in the experiments described in Pang/Lee ACL 2005. 32 | 33 | Specifically: 34 | * rt-polarity.pos contains 5331 positive snippets 35 | * rt-polarity.neg contains 5331 negative snippets 36 | 37 | Each line in these two files corresponds to a single snippet (usually 38 | containing roughly one single sentence); all snippets are down-cased. 39 | The snippets were labeled automatically, as described below (see 40 | section "Label Decision"). 41 | 42 | Note: The original source files from which the data in 43 | rt-polaritydata.tar.gz was derived can be found in the subjective 44 | part (Rotten Tomatoes pages) of subjectivity_html.tar.gz (released 45 | with subjectivity dataset v1.0). 46 | 47 | 48 | ======= 49 | 50 | Label Decision 51 | 52 | We assumed snippets (from Rotten Tomatoes webpages) for reviews marked with 53 | ``fresh'' are positive, and those for reviews marked with ``rotten'' are 54 | negative. 55 | -------------------------------------------------------------------------------- /polarity-data/rt-polaritydata/rt-polarity-neg.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shekhargulati/sentiment-analysis-python/2edddd53c3c34e26149fed3dd9c41830a8c6e1cb/polarity-data/rt-polaritydata/rt-polarity-neg.txt -------------------------------------------------------------------------------- /polarity-data/rt-polaritydata/rt-polarity-pos.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shekhargulati/sentiment-analysis-python/2edddd53c3c34e26149fed3dd9c41830a8c6e1cb/polarity-data/rt-polaritydata/rt-polarity-pos.txt -------------------------------------------------------------------------------- /sentiment.py: -------------------------------------------------------------------------------- 1 | __author__ = 'shekhargulati' 2 | 3 | import os, re, math, collections 4 | import nltk.metrics 5 | from nltk.classify import NaiveBayesClassifier 6 | 7 | POLARITY_DATA_DIR = os.path.join('polarity-data', 'rt-polaritydata') 8 | POSITIVE_REVIEWS = os.path.join(POLARITY_DATA_DIR, 'rt-polarity-pos.txt') 9 | NEGATIVE_REVIEWS = os.path.join(POLARITY_DATA_DIR, 'rt-polarity-neg.txt') 10 | 11 | 12 | def evaluate_features(feature_select): 13 | pos_features = [] 14 | neg_features = [] 15 | 16 | for line in open(POSITIVE_REVIEWS, 'r'): 17 | pos_words = re.findall(r"[\w']+|[.,!?;]", line.rstrip()) 18 | pos_features.append([feature_select(pos_words), 'pos']) 19 | 20 | for line in open(NEGATIVE_REVIEWS, 'r'): 21 | neg_words = re.findall(r"[\w']+|[.,!?;]", line.rstrip()) 22 | neg_features.append([feature_select(neg_words), 'neg']) 23 | 24 | print("len of positive features %d" % len(pos_features)) 25 | pos_cutoff = int(math.floor(len(pos_features) * 3 / 4)) 26 | neg_cutoff = int(math.floor(len(neg_features) * 3 / 4)) 27 | 28 | print("pos_cutoff %d neg_cutoff %d" % (pos_cutoff, neg_cutoff)) 29 | 30 | training_data = pos_features[:pos_cutoff] + neg_features[:neg_cutoff] 31 | test_data = pos_features[pos_cutoff:] + neg_features[neg_cutoff:] 32 | 33 | classifier = NaiveBayesClassifier.train(training_data) 34 | 35 | reference_set = collections.defaultdict(set) 36 | test_set = collections.defaultdict(set) 37 | 38 | for index, (features, label) in enumerate(test_data): 39 | reference_set[label].add(index) 40 | predicted = classifier.classify(features) 41 | test_set[predicted].add(index) 42 | 43 | print 'train on %d instances, test on %d instances' % (len(training_data), len(test_data)) 44 | print 'accuracy:', nltk.classify.util.accuracy(classifier, test_data) 45 | print 'pos precision:', nltk.metrics.precision(reference_set['pos'], test_set['pos']) 46 | print 'pos recall:', nltk.metrics.recall(reference_set['pos'], test_set['pos']) 47 | print 'neg precision:', nltk.metrics.precision(reference_set['neg'], test_set['neg']) 48 | print 'neg recall:', nltk.metrics.recall(reference_set['neg'], test_set['neg']) 49 | classifier.show_most_informative_features(10) 50 | 51 | 52 | #creates a feature selection mechanism that uses all words 53 | def make_full_dict(words): 54 | return dict([(word, True) for word in words]) 55 | 56 | #tries using all words as the feature selection mechanism 57 | print 'using all words as features' 58 | evaluate_features(make_full_dict) 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /tweet_sentiment.py: -------------------------------------------------------------------------------- 1 | __author__ = 'shekhargulati' 2 | 3 | pos_tweets = [('I love this car', 'positive'), 4 | ('This view is amazing', 'positive'), 5 | ('I feel great this morning', 'positive'), 6 | ('I am so excited about the concert', 'positive'), 7 | ('He is my best friend', 'positive'), 8 | ('This movie was great', 'positive'), 9 | ('This movie was not pathetic', 'positive')] 10 | 11 | neg_tweets = [('I do not like this car', 'negative'), 12 | ('This view is horrible', 'negative'), 13 | ('I feel tired this morning', 'negative'), 14 | ('I am not looking forward to the concert', 'negative'), 15 | ('He is my enemy', 'negative'), 16 | ('This is a pathetic movie', 'negative')] 17 | 18 | tweets_with_sentiment = [] 19 | for (tweet, sentiment) in pos_tweets + neg_tweets: 20 | filtered_tweet_words = [word.lower() for word in tweet.split() if len(word) >= 3] 21 | tweets_with_sentiment.append((filtered_tweet_words, sentiment)) 22 | 23 | print tweets_with_sentiment 24 | 25 | all_words = [] 26 | for words, sentiment in tweets_with_sentiment: 27 | all_words.extend(words) 28 | 29 | import nltk 30 | 31 | fd = nltk.FreqDist(all_words) 32 | word_features = fd.keys() 33 | 34 | print word_features 35 | 36 | # Extract Features 37 | 38 | 39 | def extract_features(document): 40 | unique_words_in_document = set(document) 41 | features = {} 42 | for word_feature in word_features: 43 | features['contains(%s)' % word_feature] = (word_feature in unique_words_in_document) 44 | return features 45 | 46 | 47 | training_set = nltk.classify.apply_features(extract_features, tweets_with_sentiment) 48 | 49 | print training_set 50 | 51 | classifier = nltk.classify.NaiveBayesClassifier.train(training_set) 52 | 53 | classifier.show_most_informative_features(10) 54 | 55 | test_tweet = "#RajaNatwarlal is a pathetic movie" 56 | 57 | features_test_tweet = extract_features(test_tweet.split()) 58 | print features_test_tweet 59 | print classifier.classify(features_test_tweet) 60 | 61 | --------------------------------------------------------------------------------