├── README.md
├── basic-sentiment.rb
├── sentislang.txt
├── sentiwords.txt
└── tweet-search-sentiment.rb
/README.md:
--------------------------------------------------------------------------------
1 | Basic-Tweet-Sentiment-Analyzer
2 | ==============================
3 |
4 | This software enables a user to search the twitter API for various search terms
5 | and perform basic sentiment analysis over the results.
6 |
7 |
8 | Contact Information
9 | ===================
10 |
11 | The Ruby code in this project was originally written by Christopher MacLellan
12 | in 2010. If you have any questions he can be
13 | reached at maclellan (dot) christopher (at) gmail (dot) com.
14 |
15 |
16 | There is a blog post that references this github at
17 | [http://www.christopia.net/2011/06/05/sentiment-analysis-of-tweets-using-ruby/](
18 | Christopia.net)
19 |
--------------------------------------------------------------------------------
/basic-sentiment.rb:
--------------------------------------------------------------------------------
1 | #############################################################################
2 | # Filename: basic-sentiment.rb
3 | # Copyright: Christopher MacLellan 2010
4 | # Description: This code adds functions to the string class for calculating
5 | # the sentivalue of strings. It is not called directly by the
6 | # tweet-search-sentiment.rb program but is included for possible
7 | # future use.
8 | #
9 | # This program is free software: you can redistribute it and/or modify
10 | # it under the terms of the GNU General Public License as published by
11 | # the Free Software Foundation, either version 3 of the License, or
12 | # (at your option) any later version.
13 | #
14 | # This program is distributed in the hope that it will be useful,
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | # GNU General Public License for more details.
18 | #
19 | # You should have received a copy of the GNU General Public License
20 | # along with this program. If not, see .
21 | #############################################################################
22 |
23 |
24 | class String
25 | @@sentihash = {}
26 |
27 | #####################################################################
28 | # Function that returns the sentiment value for a given string.
29 | # This value is the sum of the sentiment values of each of the words.
30 | # Stop words are NOT removed.
31 | #
32 | # return:float -- sentiment value of the current string
33 | #####################################################################
34 | def get_sentiment
35 | sentiment_total = 0.0
36 |
37 | #tokenize the string
38 | tokens = self.split
39 |
40 | for token in tokens do
41 | sentiment_value = @@sentihash[token]
42 |
43 | if sentiment_value
44 |
45 | # for debugging purposes
46 | #puts "#{token} => #{sentiment_value}"
47 |
48 | sentiment_total += sentiment_value
49 | end
50 | end
51 |
52 | return sentiment_total
53 |
54 | end
55 |
56 | #####################################################################
57 | # load the specified sentiment file into a hash
58 | #
59 | # filename:string -- name of file to load
60 | # sentihash:hash -- hash to load data into
61 | # return:hash -- hash with data loaded
62 | #####################################################################
63 | def load_senti_file (filename)
64 | # load the word file
65 | file = File.new(filename)
66 | while (line = file.gets)
67 | parsedline = line.chomp.split("\t")
68 | sentiscore = parsedline[0]
69 | text = parsedline[1]
70 | @@sentihash[text] = sentiscore.to_f
71 | end
72 | file.close
73 | end
74 |
75 | end
76 |
--------------------------------------------------------------------------------
/sentislang.txt:
--------------------------------------------------------------------------------
1 | -1.0 %-(
2 | -1.0 )-:
3 | -1.0 ):
4 | -1.0 )o:
5 | -1.0 8-0
6 | -1.0 8/
7 | -1.0 8\
8 | -1.0 8c
9 | -1.0 :'(
10 | -1.0 :'-(
11 | -1.0 :(
12 | -1.0 :*(
13 | -1.0 :,(
14 | -1.0 :-(
15 | -1.0 :-/
16 | -1.0 :-S
17 | -1.0 :-\
18 | -0.50 :-|
19 | -0.50 :/
20 | -0.25 :O
21 | -0.25 :S
22 | -0.25 :\
23 | -0.25 :|
24 | -1.0 =(
25 | -1.0 >:(
26 | -1.0 D:
27 | -1.0 sux
28 | 1.0 (o;
29 | 1.00 8-)
30 | 1.0 ;)
31 | 1.0 ;o)
32 | 1.0 %-)
33 | 1.0 (-:
34 | 1.0 (:
35 | 1.0 (o:
36 | 1.0 8)
37 | 1.0 :)
38 | 1.0 :-D
39 | 1.0 :-P
40 | 1.0 :D
41 | 1.0 :P
42 | 1.0 :P
43 | 1.0 :]
44 | 1.0 :o)
45 | 1.0 :p
46 | 1.0 ;^)
47 | 1.0 <3
48 | 1.0 <3
49 | 1.0 =)
50 | 1.0 =]
51 | 1.0 >:)
52 | 1.0 >:D
53 | 1.0 >=D
54 | 1.0 ^_^
55 | 1.0 }:)
56 |
--------------------------------------------------------------------------------
/tweet-search-sentiment.rb:
--------------------------------------------------------------------------------
1 | #############################################################################
2 | # Filename: tweet-search-sentiment.rb
3 | # Copyright: Christopher MacLellan 2010
4 | # Description: This program will ask for a search term, search twitter for it,
5 | # then perform sentiment analysis of the tweets.
6 | #
7 | # This program is free software: you can redistribute it and/or modify
8 | # it under the terms of the GNU General Public License as published by
9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # This program is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with this program. If not, see .
19 | #############################################################################
20 |
21 |
22 | require 'rubygems'
23 | require 'json'
24 | require 'net/http'
25 | require 'uri'
26 |
27 | #########################################################################
28 | # Function takes a search term and uses the twitter search url to access
29 | # tweets with the given search term. It then converts these tweets from
30 | # JSON into a ruby hash, which is returned.
31 | #
32 | # search_term:string -- term to search twitter for.
33 | # return:hash -- discovered tweets in a hash.
34 | #########################################################################
35 | def get_tweet_hash( search_term, max_results = 2000)
36 |
37 | results_per_page = 2000
38 | results_per_page = max_results if max_results < 100
39 |
40 | done = false
41 | page = 1
42 | num_results = 0
43 |
44 | output = []
45 |
46 | # Encode search term for URL
47 | search_term = URI.escape(search_term)
48 |
49 | while (not done)
50 |
51 | # Construct the search URL
52 | search_url = "http://search.twitter.com/search.json?q=#{search_term}&rpp=#{results_per_page}&page=#{page}"
53 |
54 | # prints out the url being used... useful for debugging.
55 | puts search_url
56 |
57 | # Request the tweets from twitter search. I got the url for this here: http://dev.twitter.com/pages/using_search
58 | resp = Net::HTTP.get_response(URI.parse(search_url))
59 |
60 | # Parse the data into from JSON into ruby hash.
61 | data = resp.body
62 | result = JSON.parse(data)
63 |
64 | # Raise exception if there is an error getting data from twitter
65 | if result.has_key? 'Error'
66 | raise "Error assessing tweet data"
67 | end
68 |
69 | if result['results']
70 | # trims off any amount over the max_results
71 | if max_results < (output.size + result['results'].size)
72 | cutpoint = max_results - output.size
73 | puts cutpoint
74 | puts result['results'][0,cutpoint]
75 | for tweet in result['results'][0,cutpoint]
76 | output.push(tweet)
77 | end
78 | else
79 | for tweet in result['results']
80 | output.push(tweet)
81 | end
82 | end
83 | end
84 |
85 | page += 1
86 |
87 | if output.size >= max_results or result['results'].size == 0
88 | done = true
89 | end
90 | end
91 | return output
92 | end
93 |
94 |
95 | #####################################################################
96 | # load the specified sentiment file into a hash
97 | #
98 | # filename:string -- name of file to load
99 | # sentihash:hash -- hash to load data into
100 | # return:hash -- hash with data loaded
101 | #####################################################################
102 | def load_senti_file (filename)
103 | sentihash = {}
104 | # load the word file
105 | file = File.new(filename)
106 | while (line = file.gets)
107 | parsedline = line.chomp.split("\t")
108 | sentiscore = parsedline[0]
109 | text = parsedline[1]
110 | sentihash[text] = sentiscore.to_f
111 | end
112 | file.close
113 |
114 | return sentihash
115 | end
116 |
117 |
118 | #####################################################################
119 | # Function analyzes the sentiment of a tweet. Very basic. This just
120 | # imports a list of words with sentiment scores from file and uses
121 | # these to perform the analysis.
122 | #
123 | # tweet: string -- string to analyze the sentiment of
124 | # return: int -- 0 negative, 1 means neutral, and 2 means positive
125 | #####################################################################
126 | def analyze_sentiment ( text )
127 |
128 | # load the word file (words -> sentiment score)
129 | sentihash = load_senti_file ('sentiwords.txt')
130 |
131 | # load the symbol file (smiles and ascii symbols -> sentiment score)
132 | sentihash.merge!(load_senti_file ('sentislang.txt'))
133 |
134 | # tokenize the text
135 | tokens = text.split
136 |
137 | # Check the sentiment value of each token against the sentihash.
138 | # Since each word has a positive or negative numeric sentiment value
139 | # we can just sum the values of all the sentimental words. If it is
140 | # positive then we say the tweet is positive. If it is negative we
141 | # say the tweet is negative.
142 | sentiment_total = 0.0
143 |
144 | for token in tokens do
145 |
146 | sentiment_value = sentihash[token]
147 |
148 | if sentiment_value
149 |
150 | # for debugging purposes
151 | #puts "#{token} => #{sentiment_value}"
152 |
153 | sentiment_total += sentiment_value
154 |
155 | end
156 | end
157 |
158 | # threshold for classification
159 | threshold = 0.0
160 |
161 | # if less then the negative threshold classify negative
162 | if sentiment_total < (-1 * threshold)
163 | return 0
164 | # if greater then the positive threshold classify positive
165 | elsif sentiment_total > threshold
166 | return 2
167 | # otherwise classify as neutral
168 | else
169 | puts '---------------------------------------------------------------'
170 | puts text
171 | puts '---------------------------------------------------------------'
172 | return 1
173 | end
174 | end
175 |
176 |
177 | def get_search_term_and_analyze
178 |
179 | # Get search term from user
180 | print "Enter search term: "
181 | search_term = gets.chomp
182 |
183 | # Get the hash from twitter using the specified search term
184 | puts "Accessing tweets using search term: #{search_term}..."
185 | result = get_tweet_hash( search_term, 100)
186 |
187 | negative = 0
188 | neutral = 0
189 | positive = 0
190 |
191 | for tweet in result do
192 | # puts "From #{tweet['from_user']}: #{tweet['text']}"
193 | sentiment = analyze_sentiment( tweet['text'] )
194 | if sentiment == 0
195 | negative += 1
196 | elsif sentiment == 1
197 | neutral += 1
198 | elsif sentiment == 2
199 | positive += 1
200 | end
201 | end
202 | puts "Number of tweets analyzed: #{result.size}"
203 | puts "Negative tweets: #{negative}"
204 | puts "Neutral tweets: #{neutral}"
205 | puts "Positive tweets: #{positive}"
206 |
207 | if positive >= negative
208 | puts "Search term \"#{search_term}\" had a #{((100.0 * positive) / (positive+negative)).round(0)}\% positive sentiment."
209 | else
210 | puts "Search term \"#{search_term}\" had a #{((100.0 * negative) / (positive+negative)).round(0)}\% negative sentiment."
211 | end
212 |
213 | end
214 |
215 | def display_license
216 |
217 | puts "Copyright (C) 2010 Christopher MacLellan"
218 | puts "This program comes with ABSOLUTELY NO WARRANTY."
219 | puts "This is free software, and you are welcome to redistribute it"
220 | puts "under certain conditions; outlined in the GNU GPL v3."
221 |
222 | end
223 |
224 | # Functions to call when program is loaded
225 | display_license
226 | get_search_term_and_analyze
227 |
--------------------------------------------------------------------------------