├── Code ├── football_analyzer.py ├── html │ ├── DataMiningResults.html │ ├── images │ │ └── bg.gif │ ├── main.css │ └── template │ │ ├── Template copy.html │ │ └── Template.html ├── html_creator.py ├── sentiment_analyzer.py ├── sentiment_word_files │ ├── AFINN-111.txt │ ├── AFINN-96.txt │ ├── AFINN-README.txt │ ├── Nielsen2009Responsible_emotion.csv │ ├── Nielsen2010Responsible_english.csv │ ├── tweets_negative.txt │ ├── tweets_neutral.txt │ └── tweets_positive.txt ├── twitter_aggregator.py └── twitter_sentiment_analysis.tmproj ├── README.md └── Report ├── Paper.tmproj ├── README.txt ├── bibbase.bib ├── chapters ├── 1_introduction.tex ├── 2_design_of_the_program.tex ├── 3_implementation.tex ├── 4_results.tex ├── appendix1.tex └── appendix2.tex ├── command.shell ├── figure ├── OneNote Table Of Contents.onetoc2 ├── dtu_A1_UK.eps ├── dtu_A1_UK.pdf ├── dtu_elektro_A_UK.eps ├── dtu_elektro_A_UK.pdf ├── dtu_informatics_A_UK-eps-converted-to.pdf ├── dtu_informatics_A_UK.eps ├── test2.png └── testfigure.png ├── images ├── ClassOverview.png ├── Poster.png └── webpage.png ├── master2010.aux ├── master2010.bbl ├── master2010.blg ├── master2010.lof ├── master2010.log ├── master2010.lot ├── master2010.nlo ├── master2010.pdf ├── master2010.synctex.gz ├── master2010.tcp ├── master2010.tex ├── master2010.thm ├── master2010.toc ├── master2010.tps ├── nomencl.cfg └── style ├── Mythesis.sty ├── thesisdef.log ├── thesisdef.sty ├── thesislayout.log └── thesislayout.sty /Code/football_analyzer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | 02820 Python Programming 5 | 6 | twitter_sentiment_analysis.py 7 | 8 | Created by Elvar Orn Unnthorsson on 07-12-2011 9 | Copyright (c) 2011 ellioman inc. All rights reserved. 10 | """ 11 | 12 | import sys 13 | import os 14 | import twitter 15 | import nltk 16 | import thread 17 | from twitter_aggregator import * 18 | from html_creator import * 19 | from sentiment_analyzer import * 20 | 21 | class FootballAnalyzer: 22 | 23 | """ 24 | FootballAnalyzer is searches twitter for tweets, performs a 25 | sentiment analysis of each tweet harvested and creates a webpage, 26 | in the folder \"html\" that shows the results. 27 | """ 28 | 29 | def __init__( self, search_terms = [], pages = 3, results_per_page = 50 ): 30 | """ 31 | Input: search_terms. A list of search terms to for searching Twitter. 32 | Input: pages. A Number that determines how many pages of tweets to search for. 33 | Input: results_per_page. A Number which determines how many tweet results should be on each page. 34 | Constructs a new FootballAnalyzer instance. 35 | """ 36 | 37 | self.html_filename = "DataMiningResults.html" 38 | self.template_filename = "template.html" 39 | self.task_finished = False 40 | self.search_terms = search_terms 41 | self.pages = pages 42 | self.results_per_page = results_per_page 43 | 44 | 45 | def run( self ): 46 | """ 47 | run( self ): 48 | Runs the football analyzer. Searches, analyzes and creates the webpage. 49 | """ 50 | 51 | print "=======================\nData mining starting\n=======================\n" 52 | tweets = self.__search() 53 | analyzed_tweets = self.__analyze( tweets ) 54 | self.__create_webpage( analyzed_tweets ) 55 | 56 | print "=======================\nData mining successful\n=======================\n" 57 | 58 | 59 | def __search( self ): 60 | """ 61 | __search( self ): 62 | Creates a search aggregator instance and performs a twitter search using the 63 | the search parameters given in the constructor. Returns a list of the tweets harvested. 64 | Return: A list of all tweets harvested. 65 | """ 66 | 67 | try: 68 | print "Searching..." 69 | self.__start_task(); 70 | self.aggregator = TwitterAggregator() 71 | self.aggregator.twitter_search( search_terms=self.search_terms, pages=self.pages, results_per_page=self.results_per_page ) 72 | 73 | tweets = {} 74 | for term in self.search_terms: 75 | tweets[term] = self.aggregator.get_tweets( search_terms = [ term ], return_all_tweets = True ) 76 | self.__end_task(); 77 | print "Search complete" 78 | 79 | return tweets 80 | 81 | except: 82 | raise Exception ("Unknown error in FootballAnalyzer::__search") 83 | 84 | 85 | def __analyze( self, tweets ): 86 | """ 87 | __analyze( self, tweets ): 88 | Input: tweets. A list of tweets strings 89 | Creates a sentiment analyzer instance and uses it to analyze each tweet harvested 90 | by the twitter aggregator. it returns a list of the analyzed tweets. 91 | Return: A list of the tweets analyzed. 92 | """ 93 | 94 | try: 95 | print "Analyzing the data..." 96 | self.__start_task(); 97 | self.analyzer = SentimentAnalyzer() 98 | analyzed_tweets = self.analyzer.analyze( tweets ) 99 | self.__end_task(); 100 | print "Analyzing complete" 101 | 102 | self.analyzer.show_most_informative_features( 20 ) 103 | 104 | return analyzed_tweets 105 | 106 | except: 107 | raise Exception ("Unknown error in FootballAnalyzer::__analyze") 108 | 109 | 110 | def __create_webpage( self, analyzed_tweets ): 111 | """ 112 | __create_webpage( self, analyzed_tweets ): 113 | Input: analyzed_tweets. A list of tweets strings 114 | Creates a webpage with statistics gathered from the tweet aggregator and analyzer, 115 | a word cloud with the 30 most used words in the tweets and list of each tweet harvested 116 | which are colored green, red and white depending on the results from the analyzer. 117 | """ 118 | 119 | try: 120 | print "Creating HTML page..." 121 | self.__start_task(); 122 | 123 | # A Statistic dictionary, used to print out the information on the results webpage. 124 | stats = {} 125 | stats["search_parameters"] = self.search_terms 126 | stats["tweets_count"] = self.aggregator.tweets_count 127 | stats["positive_count"] = self.analyzer.get_analysis_result( "positive" ) 128 | stats["negative_count"] = self.analyzer.get_analysis_result( "negative" ) 129 | stats["neutral_count"] = self.analyzer.get_analysis_result( "neutral" ) 130 | 131 | html_page = HTMLCreator( self.html_filename, self.template_filename, analyzed_tweets, stats ) 132 | html_page.create_html() 133 | self.__end_task(); 134 | 135 | print "Creating HTML page complete\n" 136 | 137 | except: 138 | raise Exception ("Unknown error in FootballAnalyzer::__create_webpage") 139 | 140 | 141 | def __start_task( self ): 142 | """ 143 | __start_task( self ): 144 | Creates a thread which displays dots while a function in the FootballAnalyzer is running. 145 | """ 146 | 147 | self.task_finished = False 148 | thread.start_new_thread( self.__print_time, ( 1.0, ) ) 149 | 150 | 151 | def __end_task( self ): 152 | """ 153 | __end_task( self ): 154 | Stops the thread created in the __start_task() function. 155 | """ 156 | 157 | self.task_finished = True 158 | time.sleep( 1.0 ) 159 | 160 | 161 | def __print_time( self, delay ): 162 | """ 163 | __print_time( self, delay ): 164 | Input: delay. A number which determines how much time should be between the dot printing 165 | Prints dots while a function in the FootballAnalyzer is running. 166 | """ 167 | 168 | while not self.task_finished: 169 | print "." 170 | time.sleep( delay ) 171 | 172 | 173 | if __name__ == '__main__': 174 | search = sys.argv[1:] 175 | if ( len(search) == 0 ): search = ["Wayne Rooney"] 176 | 177 | page_per_search = 3 178 | results_on_page = 10 179 | f = FootballAnalyzer( search_terms = search, pages = page_per_search, results_per_page = results_on_page ) 180 | f.run() -------------------------------------------------------------------------------- /Code/html/images/bg.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ellioman/Twitter-Sentiment-Analysis/e6ea4390897be68214b70c875cee205c1461630c/Code/html/images/bg.gif -------------------------------------------------------------------------------- /Code/html/main.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin:0 auto 0 auto; 3 | font-family: 'lucida grande',trebuchet,'trebuchet ms',verdana,arial,helvetica,sans-serif; 4 | font-size: 0.9em; 5 | color: #333333; 6 | background: #94e4e8 url(html/images/bg.gif) no-repeat top left; 7 | } 8 | 9 | .top { 10 | width: 100%; 11 | margin: 112px auto; 12 | } 13 | 14 | .content { 15 | width: 1000px; 16 | background-color: #ffffff; 17 | border-right: 1px solid #cccccc; 18 | border-bottom: 1px solid #cccccc; 19 | padding: 10px 20px 40px 20px; margin: 0 auto; 20 | border: 2px solid #000; 21 | } 22 | 23 | .top .content h1 { 24 | border-bottom: 1px solid #CCC; 25 | width: 500px; 26 | } 27 | 28 | .tweet-positive { 29 | padding: 10px 10px 20px 10px; 30 | margin-left: 10px; 31 | background-color: #e7ffd7; 32 | height: 90px; 33 | border: 1px solid #e0e0e0; 34 | } 35 | 36 | .tweet-negative { 37 | padding: 10px 10px 20px 10px; 38 | background-color: #ffd7d7; 39 | height: 90px; 40 | border: 1px solid #e0e0e0; 41 | } 42 | 43 | .tweet-neutral { 44 | padding: 10px 10px 20px 10px; 45 | background-color: #ffffff; 46 | height: 90px; 47 | border: 1px solid #e0e0e0; 48 | } 49 | 50 | .left-tweets { 51 | width:32%; 52 | } 53 | 54 | .right-tweets { 55 | width:32%; 56 | float:right; 57 | margin-right: 13px; 58 | } 59 | 60 | .right-content { 61 | float:right; 62 | width: 100px; 63 | } 64 | 65 | .left-content { 66 | 67 | } 68 | 69 | .img { 70 | width: 58px; 71 | float: left; 72 | display: inline; 73 | margin-bottom: 5px; 74 | padding-top: 7px; 75 | } 76 | 77 | .clear { 78 | clear:both; 79 | margin-top: 5px; 80 | } 81 | 82 | #stats { 83 | padding-left: 25px; 84 | width: 300px; 85 | } 86 | 87 | 88 | ul.search-parameters { 89 | list-style-type: square; 90 | } 91 | 92 | #cloud { 93 | padding: 10px; 94 | line-height: 2em; 95 | text-align: center; 96 | width:550px; 97 | text-align: center; 98 | margin: 0; 99 | border: 1px solid #e0e0e0; 100 | float: right; 101 | } 102 | 103 | #cloud li { padding: 0px; } 104 | #cloud li { display: inline; } 105 | #cloud li.tag1 { font-size: 0.150em; font-weight: lighter; } 106 | #cloud li.tag2 { font-size: 0.300em; font-weight: lighter; } 107 | #cloud li.tag3 { font-size: 0.450em; font-weight: lighter; } 108 | #cloud li.tag4 { font-size: 0.600em; font-weight: lighter; } 109 | #cloud li.tag5 { font-size: 0.750em; font-weight: lighter; } 110 | #cloud li.tag6 { font-size: 0.900em; font-weight: normal; } 111 | #cloud li.tag7 { font-size: 1.050em; font-weight: normal; } 112 | #cloud li.tag8 { font-size: 1.200em; font-weight: normal; } 113 | #cloud li.tag9 { font-size: 1.350em; font-weight: normal; } 114 | #cloud li.tag10 { font-size: 1.500em; font-weight: normal; } 115 | #cloud li.tag11 { font-size: 1.650em; font-weight: bold; } 116 | #cloud li.tag12 { font-size: 1.800em; font-weight: bold; } 117 | #cloud li.tag13 { font-size: 1.950em; font-weight: bold; } 118 | #cloud li.tag14 { font-size: 2.100em; font-weight: bold; } 119 | #cloud li.tag15 { font-size: 2.250em; font-weight: bold; } 120 | #cloud li.tag16 { font-size: 2.400em; font-weight: bolder; } 121 | #cloud li.tag17 { font-size: 2.550em; font-weight: bolder; } 122 | #cloud li.tag18 { font-size: 2.700em; font-weight: bolder; } 123 | #cloud li.tag19 { font-size: 2.850em; font-weight: bolder; } 124 | #cloud li.tag20 { font-size: 3.000em; font-weight: bolder; } 125 | #cloud li.tag21 { font-size: 3.150em; font-weight: bolder; } 126 | #cloud li.tag22 { font-size: 3.300em; font-weight: bolder; } 127 | #cloud li.tag23 { font-size: 3.250em; font-weight: bolder; } 128 | #cloud li.tag24 { font-size: 3.600em; font-weight: bolder; } 129 | #cloud li.tag25 { font-size: 3.750em; font-weight: bolder; } -------------------------------------------------------------------------------- /Code/html/template/Template copy.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Data Mining Results 6 | 7 | 8 | 9 | 10 | 11 |
12 |
13 |

Tweets results

14 |
15 |
16 |
  • @chaskaborek
  • 17 |
  • rooney
  • 18 |
  • manutd
  • 19 |
  • mufc
  • 20 |
  • mufc
  • 21 |
  • mufc
  • 22 |
23 |
24 |
25 |
26 |
27 |

28 | Lorem Ipsum is simply dummy text of the printing and typesetting industry. 29 | Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, 30 | when an unknown printer took a galley of type and scrambled it to make a type 31 | specimen book. It has survived not only five centuries, but also the leap into 32 | electronic typesetting, remaining essentially unchanged. It was popularised in 33 | the 1960s with the release of Letraset sheets containing Lorem Ipsum passages. 34 |

35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 | Lorem Ipsum is simply dummy text of the printing and typesetting 44 | Lorem Ipsum has been the industry's standard dummy text ever 45 | when an unknown printer took a galley of type and scrambled it 46 | specimen book. It has survived not only five centuries, but 47 |
48 |
49 | Lorem Ipsum is simply dummy text of the printing and typesetting 50 | Lorem Ipsum has been the industry's standard dummy text ever 51 | when an unknown printer took a galley of type and scrambled it 52 | specimen book. It has survived not only five centuries, but 53 |
54 |
55 | Lorem Ipsum is simply dummy text of the printing and typesetting 56 | Lorem Ipsum has been the industry's standard dummy text ever 57 | when an unknown printer took a galley of type and scrambled it 58 | specimen book. It has survived not only five centuries, but 59 |
60 |
61 | Lorem Ipsum is simply dummy text of the printing and typesetting 62 | Lorem Ipsum has been the industry's standard dummy text ever 63 | when an unknown printer took a galley of type and scrambled it 64 | specimen book. It has survived not only five centuries, but 65 |
66 |
67 |
68 |
69 | 70 | -------------------------------------------------------------------------------- /Code/html/template/Template.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Data Mining Results 5 | 6 | 7 | 8 | 9 | 10 |
11 |
12 |

Results From Football Analyzer

13 |
14 |
15 |
    16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 | 32 | -------------------------------------------------------------------------------- /Code/html_creator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | html_creator.py 5 | 6 | Created by Elvar Orn Unnthorsson on 07-12-2011 7 | Copyright (c) 2011 ellioman inc. All rights reserved. 8 | """ 9 | 10 | import sys 11 | import os 12 | import nltk 13 | import random 14 | from cgi import escape 15 | from os.path import join as pjoin 16 | from mako.template import Template 17 | 18 | 19 | class HTMLCreator(object): 20 | 21 | """ 22 | HTMLCreator creates a HTML webpage that displays statistics, word cloud and a list of all 23 | tweets harvested. Must provide the class with the following: 24 | * Name of the html page to create 25 | * Name of the template for the html to follow. The template must have: 26 | *