├── README.md ├── basic-sentiment.rb ├── sentislang.txt ├── sentiwords.txt └── tweet-search-sentiment.rb /README.md: -------------------------------------------------------------------------------- 1 | Basic-Tweet-Sentiment-Analyzer 2 | ============================== 3 | 4 | This software enables a user to search the twitter API for various search terms 5 | and perform basic sentiment analysis over the results. 6 | 7 | 8 | Contact Information 9 | =================== 10 | 11 | The Ruby code in this project was originally written by Christopher MacLellan 12 | in 2010. If you have any questions he can be 13 | reached at maclellan (dot) christopher (at) gmail (dot) com. 14 | 15 | 16 | There is a blog post that references this github at 17 | [http://www.christopia.net/2011/06/05/sentiment-analysis-of-tweets-using-ruby/]( 18 | Christopia.net) 19 | -------------------------------------------------------------------------------- /basic-sentiment.rb: -------------------------------------------------------------------------------- 1 | ############################################################################# 2 | # Filename: basic-sentiment.rb 3 | # Copyright: Christopher MacLellan 2010 4 | # Description: This code adds functions to the string class for calculating 5 | # the sentivalue of strings. It is not called directly by the 6 | # tweet-search-sentiment.rb program but is included for possible 7 | # future use. 8 | # 9 | # This program is free software: you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program. If not, see . 21 | ############################################################################# 22 | 23 | 24 | class String 25 | @@sentihash = {} 26 | 27 | ##################################################################### 28 | # Function that returns the sentiment value for a given string. 29 | # This value is the sum of the sentiment values of each of the words. 30 | # Stop words are NOT removed. 31 | # 32 | # return:float -- sentiment value of the current string 33 | ##################################################################### 34 | def get_sentiment 35 | sentiment_total = 0.0 36 | 37 | #tokenize the string 38 | tokens = self.split 39 | 40 | for token in tokens do 41 | sentiment_value = @@sentihash[token] 42 | 43 | if sentiment_value 44 | 45 | # for debugging purposes 46 | #puts "#{token} => #{sentiment_value}" 47 | 48 | sentiment_total += sentiment_value 49 | end 50 | end 51 | 52 | return sentiment_total 53 | 54 | end 55 | 56 | ##################################################################### 57 | # load the specified sentiment file into a hash 58 | # 59 | # filename:string -- name of file to load 60 | # sentihash:hash -- hash to load data into 61 | # return:hash -- hash with data loaded 62 | ##################################################################### 63 | def load_senti_file (filename) 64 | # load the word file 65 | file = File.new(filename) 66 | while (line = file.gets) 67 | parsedline = line.chomp.split("\t") 68 | sentiscore = parsedline[0] 69 | text = parsedline[1] 70 | @@sentihash[text] = sentiscore.to_f 71 | end 72 | file.close 73 | end 74 | 75 | end 76 | -------------------------------------------------------------------------------- /sentislang.txt: -------------------------------------------------------------------------------- 1 | -1.0 %-( 2 | -1.0 )-: 3 | -1.0 ): 4 | -1.0 )o: 5 | -1.0 8-0 6 | -1.0 8/ 7 | -1.0 8\ 8 | -1.0 8c 9 | -1.0 :'( 10 | -1.0 :'-( 11 | -1.0 :( 12 | -1.0 :*( 13 | -1.0 :,( 14 | -1.0 :-( 15 | -1.0 :-/ 16 | -1.0 :-S 17 | -1.0 :-\ 18 | -0.50 :-| 19 | -0.50 :/ 20 | -0.25 :O 21 | -0.25 :S 22 | -0.25 :\ 23 | -0.25 :| 24 | -1.0 =( 25 | -1.0 >:( 26 | -1.0 D: 27 | -1.0 sux 28 | 1.0 (o; 29 | 1.00 8-) 30 | 1.0 ;) 31 | 1.0 ;o) 32 | 1.0 %-) 33 | 1.0 (-: 34 | 1.0 (: 35 | 1.0 (o: 36 | 1.0 8) 37 | 1.0 :) 38 | 1.0 :-D 39 | 1.0 :-P 40 | 1.0 :D 41 | 1.0 :P 42 | 1.0 :P 43 | 1.0 :] 44 | 1.0 :o) 45 | 1.0 :p 46 | 1.0 ;^) 47 | 1.0 <3 48 | 1.0 <3 49 | 1.0 =) 50 | 1.0 =] 51 | 1.0 >:) 52 | 1.0 >:D 53 | 1.0 >=D 54 | 1.0 ^_^ 55 | 1.0 }:) 56 | -------------------------------------------------------------------------------- /tweet-search-sentiment.rb: -------------------------------------------------------------------------------- 1 | ############################################################################# 2 | # Filename: tweet-search-sentiment.rb 3 | # Copyright: Christopher MacLellan 2010 4 | # Description: This program will ask for a search term, search twitter for it, 5 | # then perform sentiment analysis of the tweets. 6 | # 7 | # This program is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # This program is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with this program. If not, see . 19 | ############################################################################# 20 | 21 | 22 | require 'rubygems' 23 | require 'json' 24 | require 'net/http' 25 | require 'uri' 26 | 27 | ######################################################################### 28 | # Function takes a search term and uses the twitter search url to access 29 | # tweets with the given search term. It then converts these tweets from 30 | # JSON into a ruby hash, which is returned. 31 | # 32 | # search_term:string -- term to search twitter for. 33 | # return:hash -- discovered tweets in a hash. 34 | ######################################################################### 35 | def get_tweet_hash( search_term, max_results = 2000) 36 | 37 | results_per_page = 2000 38 | results_per_page = max_results if max_results < 100 39 | 40 | done = false 41 | page = 1 42 | num_results = 0 43 | 44 | output = [] 45 | 46 | # Encode search term for URL 47 | search_term = URI.escape(search_term) 48 | 49 | while (not done) 50 | 51 | # Construct the search URL 52 | search_url = "http://search.twitter.com/search.json?q=#{search_term}&rpp=#{results_per_page}&page=#{page}" 53 | 54 | # prints out the url being used... useful for debugging. 55 | puts search_url 56 | 57 | # Request the tweets from twitter search. I got the url for this here: http://dev.twitter.com/pages/using_search 58 | resp = Net::HTTP.get_response(URI.parse(search_url)) 59 | 60 | # Parse the data into from JSON into ruby hash. 61 | data = resp.body 62 | result = JSON.parse(data) 63 | 64 | # Raise exception if there is an error getting data from twitter 65 | if result.has_key? 'Error' 66 | raise "Error assessing tweet data" 67 | end 68 | 69 | if result['results'] 70 | # trims off any amount over the max_results 71 | if max_results < (output.size + result['results'].size) 72 | cutpoint = max_results - output.size 73 | puts cutpoint 74 | puts result['results'][0,cutpoint] 75 | for tweet in result['results'][0,cutpoint] 76 | output.push(tweet) 77 | end 78 | else 79 | for tweet in result['results'] 80 | output.push(tweet) 81 | end 82 | end 83 | end 84 | 85 | page += 1 86 | 87 | if output.size >= max_results or result['results'].size == 0 88 | done = true 89 | end 90 | end 91 | return output 92 | end 93 | 94 | 95 | ##################################################################### 96 | # load the specified sentiment file into a hash 97 | # 98 | # filename:string -- name of file to load 99 | # sentihash:hash -- hash to load data into 100 | # return:hash -- hash with data loaded 101 | ##################################################################### 102 | def load_senti_file (filename) 103 | sentihash = {} 104 | # load the word file 105 | file = File.new(filename) 106 | while (line = file.gets) 107 | parsedline = line.chomp.split("\t") 108 | sentiscore = parsedline[0] 109 | text = parsedline[1] 110 | sentihash[text] = sentiscore.to_f 111 | end 112 | file.close 113 | 114 | return sentihash 115 | end 116 | 117 | 118 | ##################################################################### 119 | # Function analyzes the sentiment of a tweet. Very basic. This just 120 | # imports a list of words with sentiment scores from file and uses 121 | # these to perform the analysis. 122 | # 123 | # tweet: string -- string to analyze the sentiment of 124 | # return: int -- 0 negative, 1 means neutral, and 2 means positive 125 | ##################################################################### 126 | def analyze_sentiment ( text ) 127 | 128 | # load the word file (words -> sentiment score) 129 | sentihash = load_senti_file ('sentiwords.txt') 130 | 131 | # load the symbol file (smiles and ascii symbols -> sentiment score) 132 | sentihash.merge!(load_senti_file ('sentislang.txt')) 133 | 134 | # tokenize the text 135 | tokens = text.split 136 | 137 | # Check the sentiment value of each token against the sentihash. 138 | # Since each word has a positive or negative numeric sentiment value 139 | # we can just sum the values of all the sentimental words. If it is 140 | # positive then we say the tweet is positive. If it is negative we 141 | # say the tweet is negative. 142 | sentiment_total = 0.0 143 | 144 | for token in tokens do 145 | 146 | sentiment_value = sentihash[token] 147 | 148 | if sentiment_value 149 | 150 | # for debugging purposes 151 | #puts "#{token} => #{sentiment_value}" 152 | 153 | sentiment_total += sentiment_value 154 | 155 | end 156 | end 157 | 158 | # threshold for classification 159 | threshold = 0.0 160 | 161 | # if less then the negative threshold classify negative 162 | if sentiment_total < (-1 * threshold) 163 | return 0 164 | # if greater then the positive threshold classify positive 165 | elsif sentiment_total > threshold 166 | return 2 167 | # otherwise classify as neutral 168 | else 169 | puts '---------------------------------------------------------------' 170 | puts text 171 | puts '---------------------------------------------------------------' 172 | return 1 173 | end 174 | end 175 | 176 | 177 | def get_search_term_and_analyze 178 | 179 | # Get search term from user 180 | print "Enter search term: " 181 | search_term = gets.chomp 182 | 183 | # Get the hash from twitter using the specified search term 184 | puts "Accessing tweets using search term: #{search_term}..." 185 | result = get_tweet_hash( search_term, 100) 186 | 187 | negative = 0 188 | neutral = 0 189 | positive = 0 190 | 191 | for tweet in result do 192 | # puts "From #{tweet['from_user']}: #{tweet['text']}" 193 | sentiment = analyze_sentiment( tweet['text'] ) 194 | if sentiment == 0 195 | negative += 1 196 | elsif sentiment == 1 197 | neutral += 1 198 | elsif sentiment == 2 199 | positive += 1 200 | end 201 | end 202 | puts "Number of tweets analyzed: #{result.size}" 203 | puts "Negative tweets: #{negative}" 204 | puts "Neutral tweets: #{neutral}" 205 | puts "Positive tweets: #{positive}" 206 | 207 | if positive >= negative 208 | puts "Search term \"#{search_term}\" had a #{((100.0 * positive) / (positive+negative)).round(0)}\% positive sentiment." 209 | else 210 | puts "Search term \"#{search_term}\" had a #{((100.0 * negative) / (positive+negative)).round(0)}\% negative sentiment." 211 | end 212 | 213 | end 214 | 215 | def display_license 216 | 217 | puts "Copyright (C) 2010 Christopher MacLellan" 218 | puts "This program comes with ABSOLUTELY NO WARRANTY." 219 | puts "This is free software, and you are welcome to redistribute it" 220 | puts "under certain conditions; outlined in the GNU GPL v3." 221 | 222 | end 223 | 224 | # Functions to call when program is loaded 225 | display_license 226 | get_search_term_and_analyze 227 | --------------------------------------------------------------------------------