├── LICENCE.txt
├── README.md
├── example-results.txt
├── sentitext.php
├── vader_sentiment_lexicon.txt
└── vadersentiment.php


/LICENCE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 cjhutto
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abusby/php-vadersentiment/87d4a3970f30af10169406e9308fa1a5fba2efdc/README.md


--------------------------------------------------------------------------------
/example-results.txt:
--------------------------------------------------------------------------------
 1 | # --- output for the above example code ---
 2 | VADER is smart, handsome, and funny.
 3 |     {'neg': 0.0, 'neu': 0.254, 'pos': 0.746, 'compound': 0.8316}
 4 | VADER is smart, handsome, and funny!
 5 |     {'neg': 0.0, 'neu': 0.248, 'pos': 0.752, 'compound': 0.8439}
 6 | VADER is very smart, handsome, and funny.
 7 |     {'neg': 0.0, 'neu': 0.299, 'pos': 0.701, 'compound': 0.8545}
 8 | VADER is VERY SMART, handsome, and FUNNY.
 9 |     {'neg': 0.0, 'neu': 0.246, 'pos': 0.754, 'compound': 0.9227}
10 | VADER is VERY SMART, handsome, and FUNNY!!!
11 |     {'neg': 0.0, 'neu': 0.233, 'pos': 0.767, 'compound': 0.9342}
12 | VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!
13 |     {'neg': 0.0, 'neu': 0.294, 'pos': 0.706, 'compound': 0.9469}
14 | The book was good.
15 |     {'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}
16 | The book was kind of good.
17 |     {'neg': 0.0, 'neu': 0.657, 'pos': 0.343, 'compound': 0.3832}
18 | The plot was good, but the characters are uncompelling and the dialog is not great.
19 |     {'neg': 0.327, 'neu': 0.579, 'pos': 0.094, 'compound': -0.7042}
20 | A really bad, horrible book.
21 |     {'neg': 0.791, 'neu': 0.209, 'pos': 0.0, 'compound': -0.8211}
22 | At least it isn't a horrible book.
23 |     {'neg': 0.0, 'neu': 0.637, 'pos': 0.363, 'compound': 0.431}
24 | :) and :D
25 |     {'neg': 0.0, 'neu': 0.124, 'pos': 0.876, 'compound': 0.7925}
26 | 
27 |     {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0}
28 | Today sux
29 |     {'neg': 0.714, 'neu': 0.286, 'pos': 0.0, 'compound': -0.3612}
30 | Today sux!
31 |     {'neg': 0.736, 'neu': 0.264, 'pos': 0.0, 'compound': -0.4199}
32 | Today SUX!
33 |     {'neg': 0.779, 'neu': 0.221, 'pos': 0.0, 'compound': -0.5461}
34 | Today kinda sux! But I'll get by, lol
35 |     {'neg': 0.195, 'neu': 0.531, 'pos': 0.274, 'compound': 0.2228}


--------------------------------------------------------------------------------
/sentitext.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /*
  3 | 		Identify sentiment-relevant string-level properties of input text.
  4 | */
  5 | const PUNC_LIST = [".", "!", "?", ",", ";", ":", "-", "'", "\"",
  6 |              "!!", "!!!", "??", "???", "?!?", "!?!", "?!?!", "!?!?"];
  7 | class SentiText {
  8 | 	
  9 | 	private $text = "";
 10 | 	public $words_and_emoticons = null;
 11 | 	public $is_cap_diff = null;
 12 | 	
 13 | 	
 14 | 	
 15 |    function __construct($text){
 16 |         //checking that is string
 17 | 		//if (!isinstance(text, str)){
 18 |         //    text = str(text.encode('utf-8'));
 19 | 		//}
 20 |         $this->text = $text;
 21 |         $this->words_and_emoticons = $this->_words_and_emoticons();
 22 |         // doesn't separate words from\
 23 | 		// adjacent punctuation (keeps emoticons & contractions)
 24 |         $this->is_cap_diff = $this->allcap_differential($this->words_and_emoticons);
 25 | 	}
 26 | 	
 27 | 	/*
 28 | 		Remove all punctation from a string
 29 | 	*/
 30 | 	function strip_punctuation($string) {
 31 | 		//$string = strtolower($string);
 32 | 	   return preg_replace("/[[:punct:]]+/", "", $string);
 33 | 	}
 34 | 	
 35 | 	function array_count_values_of($haystack, $needle) {
 36 | 		if(!in_array($needle,$haystack)){
 37 | 			return 0;
 38 | 		}
 39 | 		$counts = array_count_values($haystack);
 40 | 		return $counts[$needle];
 41 | 	}
 42 | 	
 43 | 	/*
 44 | 		Check whether just some words in the input are ALL CAPS
 45 | 
 46 | 		:param list words: The words to inspect
 47 | 		:returns: `True` if some but not all items in `words` are ALL CAPS
 48 | 	*/
 49 | 	private function allcap_differential($words){
 50 | 
 51 | 		$is_different = false;
 52 | 		$allcap_words = 0;
 53 | 		foreach($words as $word){
 54 | 			//ctype is affected by the local of the processor see manual for more details
 55 | 			if(ctype_upper($word)){
 56 | 				$allcap_words += 1;
 57 | 			}
 58 | 		}
 59 | 		$cap_differential = count($words) - $allcap_words;
 60 | 		if ($cap_differential > 0 && $cap_differential < count($words)){
 61 | 			$is_different = true;
 62 | 		}
 63 | 		return $is_different;
 64 | 	}
 65 | 	
 66 |     function _words_only(){
 67 | 		$text_mod = $this->strip_punctuation($this->text);
 68 |         // removes punctuation (but loses emoticons & contractions)
 69 |         $words_only = preg_split('/\s+/',$text_mod);
 70 |         # get rid of empty items or single letter "words" like 'a' and 'I'
 71 | 		$works_only = array_filter($words_only,function($word){ return strlen($word) > 1; });
 72 |         return $words_only;
 73 | 	}
 74 | 
 75 |     function _words_and_emoticons(){
 76 | 		
 77 |         $wes = preg_split('/\s+/',$this->text);
 78 | 		
 79 |         # get rid of residual empty items or single letter words
 80 | 		$wes = array_filter($wes,function($word){ return strlen($word) > 1; });
 81 | 		//Need to remap the indexes of the array
 82 | 		$wes = array_values ($wes);
 83 | 		$words_only = $this->_words_only();
 84 | 		
 85 | 		foreach($words_only as $word){
 86 | 			
 87 | 			foreach(PUNC_LIST as $punct){
 88 | 				
 89 | 				//replace all punct + word combinations with word
 90 |                 $pword = $punct .$word;
 91 | 			
 92 | 				
 93 |                 $x1 = $this->array_count_values_of($wes,$pword);
 94 |                 while ($x1 > 0){
 95 |                     $i = array_search($pword,$wes);
 96 |                     unset($wes[$i]);
 97 | 					array_splice($wes,$i,0,$word);
 98 |                     $x1 = $this->array_count_values_of($wes,$pword);
 99 | 				}
100 | 				//Do the same as above but word then punct
101 |                 $wordp = $word . $punct;
102 |                 $x2 = $this->array_count_values_of($wes, $wordp);
103 |                 while ($x2 > 0){
104 |                     $i = array_search($wordp, $wes);
105 |                     unset($wes[$i]);
106 | 					array_splice($wes,$i,0,$word);
107 |                     $x2 = $this->array_count_values_of($wes, $wordp);
108 | 				}
109 | 			}
110 | 		}
111 | 
112 |         return $wes;
113 | 	}
114 | }
115 | ?>


--------------------------------------------------------------------------------
/vadersentiment.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | require_once "sentitext.php";
  4 | 
  5 | //Constants
  6 | 
  7 | // (empirically derived mean sentiment intensity rating increase for booster words)
  8 | define("B_INCR",0.293);
  9 | define("B_DECR",-0.293);
 10 | 
 11 | // (empirically derived mean sentiment intensity rating increase for using
 12 | // ALLCAPs to emphasize a word)
 13 | define("C_INCR", 0.733);
 14 | 
 15 | define("N_SCALAR", -0.74);
 16 | 
 17 | // for removing punctuation
 18 | //REGEX_REMOVE_PUNCTUATION = re.compile('[%s]' % re.escape(string.punctuation))
 19 | 
 20 | 
 21 | 
 22 | 			 
 23 | 			 
 24 | const NEGATE = ["aint", "arent", "cannot", "cant", "couldnt", "darent", "didnt", "doesnt",
 25 | 	"ain't", "aren't", "can't", "couldn't", "daren't", "didn't", "doesn't",
 26 | 	"dont", "hadnt", "hasnt", "havent", "isnt", "mightnt", "mustnt", "neither",
 27 | 	"don't", "hadn't", "hasn't", "haven't", "isn't", "mightn't", "mustn't",
 28 | 	"neednt", "needn't", "never", "none", "nope", "nor", "not", "nothing", "nowhere",
 29 | 	"oughtnt", "shant", "shouldnt", "uhuh", "wasnt", "werent",
 30 | 	"oughtn't", "shan't", "shouldn't", "uh-uh", "wasn't", "weren't",
 31 | 	"without", "wont", "wouldnt", "won't", "wouldn't", "rarely", "seldom", "despite"];
 32 | 
 33 | //booster/dampener 'intensifiers' or 'degree adverbs'
 34 | //http://en.wiktionary.org/wiki/Category:English_degree_adverbs
 35 | 
 36 | const BOOSTER_DICT = ["absolutely"=> B_INCR, "amazingly"=> B_INCR, "awfully"=> B_INCR, "completely"=> B_INCR, "considerably"=> B_INCR,
 37 |  "decidedly"=> B_INCR, "deeply"=> B_INCR, "effing"=> B_INCR, "enormously"=> B_INCR,
 38 |  "entirely"=> B_INCR, "especially"=> B_INCR, "exceptionally"=> B_INCR, "extremely"=> B_INCR,
 39 |  "fabulously"=> B_INCR, "flipping"=> B_INCR, "flippin"=> B_INCR,
 40 |  "fricking"=> B_INCR, "frickin"=> B_INCR, "frigging"=> B_INCR, "friggin"=> B_INCR, "fully"=> B_INCR, "fucking"=> B_INCR,
 41 |  "greatly"=> B_INCR, "hella"=> B_INCR, "highly"=> B_INCR, "hugely"=> B_INCR, "incredibly"=> B_INCR,
 42 |  "intensely"=> B_INCR, "majorly"=> B_INCR, "more"=> B_INCR, "most"=> B_INCR, "particularly"=> B_INCR,
 43 |  "purely"=> B_INCR, "quite"=> B_INCR, "really"=> B_INCR, "remarkably"=> B_INCR,
 44 |  "so"=> B_INCR, "substantially"=> B_INCR,
 45 |  "thoroughly"=> B_INCR, "totally"=> B_INCR, "tremendously"=> B_INCR,
 46 |  "uber"=> B_INCR, "unbelievably"=> B_INCR, "unusually"=> B_INCR, "utterly"=> B_INCR,
 47 |  "very"=> B_INCR,
 48 |  "almost"=> B_DECR, "barely"=> B_DECR, "hardly"=> B_DECR, "just enough"=> B_DECR,
 49 |  "kind of"=> B_DECR, "kinda"=> B_DECR, "kindof"=> B_DECR, "kind-of"=> B_DECR,
 50 |  "less"=> B_DECR, "little"=> B_DECR, "marginally"=> B_DECR, "occasionally"=> B_DECR, "partly"=> B_DECR,
 51 |  "scarcely"=> B_DECR, "slightly"=> B_DECR, "somewhat"=> B_DECR,
 52 |  "sort of"=> B_DECR, "sorta"=> B_DECR, "sortof"=> B_DECR, "sort-of"=> B_DECR];
 53 | 
 54 | // check for special case idioms using a sentiment-laden keyword known to SAGE
 55 | const SPECIAL_CASE_IDIOMS = ["the shit"=> 3, "the bomb"=> 3, "bad ass"=> 1.5, "yeah right"=> -2,
 56 |                        "cut the mustard"=> 2, "kiss of death"=> -1.5, "hand to mouth"=> -2];
 57 | ##Static methods##
 58 | 
 59 | /*
 60 |     Normalize the score to be between -1 and 1 using an alpha that
 61 |     approximates the max expected value
 62 | */
 63 | function normalize($score, $alpha=15){
 64 | 	$norm_score = $score/sqrt(($score*$score) + $alpha);
 65 |     return $norm_score;
 66 | }
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 |     
 75 | 
 76 | /*
 77 | 	Give a sentiment intensity score to sentences.
 78 | */
 79 | 
 80 | class SentimentIntensityAnalyzer{
 81 | 
 82 | 	private $lexicon_file = "";
 83 | 	private $lexicon = "";
 84 | 	
 85 | 	private $current_sentitext = null;
 86 | 	
 87 |     function __construct($lexicon_file="vader_sentiment_lexicon.txt"){
 88 | 		//Not sure about this as it forces lexicon file to be in the same directory as executing script
 89 |         $this->lexicon_file = realpath(dirname(__FILE__)) . "\\" . $lexicon_file;
 90 |         $this->lexicon = $this->make_lex_dict();
 91 | 	}
 92 | 	
 93 | 	
 94 | 	/*
 95 | 		Determine if input contains negation words
 96 | 	*/
 97 | 	function IsNegated($wordToTest, $include_nt=true){
 98 | 		
 99 | 		if(in_array($wordToTest,NEGATE)){
100 | 			return true;
101 | 		}
102 | 
103 | 		if ($include_nt) {
104 | 			if (strpos($wordToTest,"n't")){
105 | 				return true;
106 | 			}
107 | 		}
108 | 
109 | 		return false;
110 | 	}
111 | 	
112 | 
113 | 	/*
114 | 		Convert lexicon file to a dictionary
115 | 	*/
116 |     function make_lex_dict(){
117 |         $lex_dict = [];
118 |         $fp = fopen($this->lexicon_file,"r");
119 | 		if(!$fp){
120 | 			die("Cannot load lexicon file");
121 | 		}
122 | 		
123 | 		while (($line = fgets($fp, 4096)) !== false) {
124 |            
125 |             list($word, $measure) = explode("\t",trim($line));
126 | 			//.strip().split('\t')[0:2]
127 | 			$lex_dict[$word] = $measure;
128 | 			//lex_dict[word] = float(measure)
129 | 		}
130 |         return $lex_dict;
131 | 	}
132 | 	
133 | 	
134 | 	private function IsKindOf($firstWord,$secondWord){
135 | 		return "kind" === strtolower($firstWord) && "of" === strtolower($secondWord);
136 | 	}
137 | 	
138 | 	private function IsBoosterWord($word){
139 | 		return array_key_exists(strtolower($word),BOOSTER_DICT);
140 | 	}
141 | 	
142 | 	private function getBoosterScaler($word){
143 | 		return BOOSTER_DICT[strtolower($word)];
144 | 	}
145 | 	
146 | 	private function IsInLexicon($word){
147 | 		$lowercase = strtolower($word);
148 | 		return array_key_exists($lowercase,$this->lexicon);
149 | 	}
150 | 	private function IsUpperCaseWord($word){
151 | 		return ctype_upper($word);
152 | 	}
153 | 	
154 | 	private function getValenceFromLexicon($word){
155 | 		return $this->lexicon[strtolower($word)];
156 | 	}
157 | 	
158 | 	private function getTargetWordFromContext($wordInContext){
159 | 		return $wordInContext[count($wordInContext)-1];
160 | 	}
161 | 
162 | 	/*
163 | 		Gets the precedding two words to check for emphasis
164 | 	*/
165 | 	private function getWordInContext($wordList,$currentWordPosition){
166 | 		$precedingWordList =[];
167 | 		
168 | 		//push the actual word on to the context list
169 | 		array_unshift($precedingWordList,$wordList[$currentWordPosition]);
170 | 		//If the word position is greater than 2 then we know we are not going to overflow
171 | 		if(($currentWordPosition-1)>=0){
172 | 			array_unshift($precedingWordList,$wordList[$currentWordPosition-1]);
173 | 		}else{
174 | 			array_unshift($precedingWordList,"");
175 | 		}
176 | 		if(($currentWordPosition-2)>=0){
177 | 			array_unshift($precedingWordList,$wordList[$currentWordPosition-2]);
178 | 		}else{
179 | 			array_unshift($precedingWordList,"");
180 | 		}
181 | 		if(($currentWordPosition-3)>=0){
182 | 			array_unshift($precedingWordList,$wordList[$currentWordPosition-3]);
183 | 		}else{
184 | 			array_unshift($precedingWordList,"");
185 | 		}
186 | 		return $precedingWordList;
187 | 	}
188 | 	
189 | 	
190 | 	/*
191 | 		Return a float for sentiment strength based on the input text.
192 |         Positive values are positive valence, negative value are negative
193 |         valence.
194 | 	*/	
195 |     function getSentiment($text){
196 |         $this->current_sentitext = new SentiText($text);
197 |   
198 |         $sentiments = [];
199 |         $words_and_emoticons = $this->current_sentitext->words_and_emoticons;
200 | 
201 | 		for($i=0;$i<count($words_and_emoticons)-1;$i++){
202 | 			
203 |             $valence = 0.0;
204 |             $wordBeingTested = $words_and_emoticons[$i];
205 | 			
206 | 			//If this is a booster word add a 0 valances then go to next word as it does not express sentiment directly
207 |            /* if ($this->IsBoosterWord($wordBeingTested)){
208 | 				echo "\t\tThe word is a booster word: setting sentiment to 0.0\n";
209 | 			}*/
210 | 			
211 | 			//If the word is not in the Lexicon then it does not express sentiment. So just ignore it.
212 | 			if($this->IsInLexicon($wordBeingTested)){
213 | 				//Special case because kind is in the lexicon so the modifier kind of needs to be skipped
214 | 				if("kind" !=$words_and_emoticons[$i] && "of" != $words_and_emoticons[$i+1]){
215 | 					$valence = $this->getValenceFromLexicon($wordBeingTested);
216 | 
217 | 					$wordInContext = $this->getWordInContext($words_and_emoticons,$i);
218 | 					//If we are here then we have a word that enhance booster words
219 | 					$valence = $this->adjustBoosterSentiment($wordInContext,$valence);
220 | 				}
221 | 
222 | 				
223 | 			}
224 | 			array_push($sentiments,$valence);
225 | 		}
226 | 		//Once we have a sentiment for each word adjust the sentimest if but is present
227 |         $sentiments = $this->_but_check($words_and_emoticons, $sentiments);
228 | 
229 |         return $this->score_valence($sentiments, $text);
230 | 	}
231 | 	
232 | 	
233 | 	
234 | 	
235 | 	private function applyValenceCapsBoost($targetWord,$valence){
236 | 		if($this->IsUpperCaseWord($targetWord) && $this->current_sentitext->is_cap_diff){
237 | 			if($valence > 0){
238 | 				$valence += C_INCR;
239 | 			}
240 | 			else{
241 | 				$valence -= C_INCR;
242 | 			}
243 | 		}
244 | 		return $valence;
245 | 	}
246 | 	
247 | 	/*
248 | 		Check if the preceding words increase, decrease, or negate/nullify the
249 | 		valence
250 | 	 */
251 | 	private function boosterScaleAdjustment($word, $valence){
252 | 		$scalar = 0.0;
253 | 		if(!$this->IsBoosterWord($word)){
254 | 			return $scalar;
255 | 		}
256 | 		
257 | 		$scalar = $this->getBoosterScaler($word);
258 | 		
259 | 		if ($valence < 0){
260 | 			$scalar *= -1;
261 | 		}
262 | 	   //check if booster/dampener word is in ALLCAPS (while others aren't)
263 | 		$scalar = $this->applyValenceCapsBoost($word,$scalar);
264 | 		
265 | 		return $scalar;
266 | 	}
267 | 	
268 | 	// dampen the scalar modifier of preceding words and emoticons
269 | 	// (excluding the ones that immediately preceed the item) based
270 | 	// on their distance from the current item.
271 | 	private function dampendBoosterScalerByPosition($booster,$position){
272 | 		if(0===$booster){
273 | 			return $booster;
274 | 		}
275 | 		if(1==$position){
276 | 			return $booster*0.95;
277 | 		}
278 | 		if(2==$position){
279 | 			return $booster*0.9;
280 | 		}
281 | 		return $booster;
282 | 	}
283 |     
284 | 	
285 | 	private function adjustBoosterSentiment($wordInContext,$valence){
286 |         //The target word is always the last word
287 | 		$targetWord = $this->getTargetWordFromContext($wordInContext);
288 | 
289 | 		//check if sentiment laden word is in ALL CAPS (while others aren't) and apply booster
290 | 		$valence = $this->applyValenceCapsBoost($targetWord,$valence);
291 | 		
292 | 		$valence = $this->modifyValenceBasedOnContext($wordInContext,$valence);
293 | 		return $valence;
294 | 	}
295 | 		
296 | 	private function modifyValenceBasedOnContext($wordInContext,$valence){
297 | 
298 | 			$wordToTest = $this->getTargetWordFromContext($wordInContext);
299 | 			//if($this->IsInLexicon($wordToTest)){
300 | 			//	continue;
301 | 			//}
302 | 			for($i=0;$i<count($wordInContext)-1;$i++){
303 | 				$scalarValue = $this->boosterScaleAdjustment($wordInContext[$i], $valence);
304 | 				$scalarValue = $this->dampendBoosterScalerByPosition($scalarValue,$i);
305 | 				$valence = $valence+$scalarValue;
306 | 			}
307 | 
308 | 			
309 | 			$valence = $this->_never_check($wordInContext, $valence);
310 | 
311 | 			$valence = $this->_idioms_check($wordInContext, $valence);
312 | 
313 | 				# future work: consider other sentiment-laden idioms
314 | 				# other_idioms =
315 | 				# {"back handed": -2, "blow smoke": -2, "blowing smoke": -2,
316 | 				#  "upper hand": 1, "break a leg": 2,
317 | 				#  "cooking with gas": 2, "in the black": 2, "in the red": -2,
318 | 				#  "on the ball": 2,"under the weather": -2}
319 | 
320 | 			$valence = $this->_least_check($wordInContext, $valence);
321 | 			
322 | 		
323 |         return $valence;
324 | 	}
325 | 	
326 |     function _least_check($wordInContext, $valence){
327 |         # check for negation case using "least"
328 | 		//if the previous word is least"
329 |         if(strtolower($wordInContext[2]) == "least"){
330 | 			//but not "at least {word}" "very least {word}"
331 |             if (strtolower($wordInContext[1]) != "at" && strtolower($wordInContext[1]) != "very"){
332 |                 $valence = $valence*N_SCALAR;
333 | 			}
334 | 		}
335 |         return $valence;
336 | 	}
337 | 
338 | 	
339 |     function _but_check($words_and_emoticons, $sentiments){
340 |         # check for modification in sentiment due to contrastive conjunction 'but'
341 | 		$bi = array_search("but",$words_and_emoticons);
342 | 		if(!$bi){
343 | 			$bi = array_search("BUT",$words_and_emoticons);
344 | 		}
345 |         if($bi){
346 | 			for($si=0;$si<count($sentiments);$si++){
347 | 				if($si<$bi){
348 | 					$sentiments[$si] = $sentiments[$si]*0.5;
349 | 				}elseif($si> $bi){
350 | 					$sentiments[$si] = $sentiments[$si]*1.5;
351 | 				}
352 | 			}
353 | 		}
354 |         return $sentiments;
355 | 	}
356 | 
357 |     function _idioms_check($wordInContext, $valence){
358 |         $onezero = sprintf("%s %s",$wordInContext[2], $wordInContext[3]);
359 | 
360 |         $twoonezero = sprintf("%s %s %s",$wordInContext[1],
361 |                                        $wordInContext[2], $wordInContext[3]);
362 | 
363 |         $twoone = sprintf("%s %s",$wordInContext[1], $wordInContext[2]);
364 | 
365 |         $threetwoone = sprintf("%s %s %s",$wordInContext[0],
366 |                                         $wordInContext[1], $wordInContext[2]);
367 | 
368 |         $threetwo = sprintf("%s %s",$wordInContext[0], $wordInContext[1]);
369 | 
370 | 		$zeroone = sprintf("%s %s",$wordInContext[3], $wordInContext[2]);
371 | 		
372 | 		$zeroonetwo = sprintf("%s %s %s",$wordInContext[3], $wordInContext[2], $wordInContext[1]);
373 | 		
374 |         $sequences = [$onezero, $twoonezero, $twoone, $threetwoone, $threetwo];
375 | 
376 |         foreach($sequences as $seq){
377 |             if (array_key_exists(strtolower($seq), SPECIAL_CASE_IDIOMS)){
378 |                 $valence = SPECIAL_CASE_IDIOMS[$seq];
379 |                 break;
380 | 			}
381 | 			
382 | 			
383 | /*
384 | 			Positive idioms check.  Not implementing it yet
385 | 			if(count($words_and_emoticons)-1 > $i){
386 | 				$zeroone = sprintf("%s %s",$words_and_emoticons[$i], $words_and_emoticons[$i+1]);
387 | 			   if (in_array($zeroone, SPECIAL_CASE_IDIOMS)){
388 | 					$valence = SPECIAL_CASE_IDIOMS[$zeroone];
389 | 				}
390 | 			}
391 | 			if(count($words_and_emoticons)-1 > $i+1){
392 | 				$zeroonetwo = sprintf("%s %s %s",$words_and_emoticons[$i], $words_and_emoticons[$i+1], $words_and_emoticons[$i+2]);
393 | 				if (in_array($zeroonetwo, SPECIAL_CASE_IDIOMS)){
394 | 					$valence = SPECIAL_CASE_IDIOMS[$zeroonetwo];
395 | 				}
396 | 			}
397 | */
398 | 
399 | 			// check for booster/dampener bi-grams such as 'sort of' or 'kind of'
400 | 			if($this->IsBoosterWord($threetwo) || $this->IsBoosterWord($twoone)){
401 | 				$valence = $valence+B_DECR;
402 | 			}
403 | 		}
404 |         return $valence;
405 | 	}
406 | 
407 |     function _never_check($wordInContext,$valance){
408 | 		//If the sentiment word is preceded by never so/this we apply a modifier
409 | 		$neverModifier = 0;
410 | 		if("never" == $wordInContext[0]){
411 | 			$neverModifier = 1.25;
412 | 		}else if("never" == $wordInContext[1]){
413 | 			$neverModifier = 1.5;
414 | 		}
415 | 		if("so" == $wordInContext[1] || "so"== $wordInContext[2] || "this" == $wordInContext[1] || "this" == $wordInContext[2]){
416 | 			$valance *= $neverModifier;
417 | 		}
418 | 		
419 | 		//if any of the words in context are negated words apply negative scaler
420 | 		foreach($wordInContext as $wordToCheck){
421 | 			if($this->IsNegated($wordToCheck)){
422 | 				$valance *= B_DECR;
423 | 			}
424 | 		}
425 | 		
426 | 
427 |         return $valance;
428 | 	}
429 | 	
430 |     function _punctuation_emphasis($sum_s, $text){
431 |         # add emphasis from exclamation points and question marks
432 |         $ep_amplifier = $this->_amplify_ep($text);
433 |         $qm_amplifier = $this->_amplify_qm($text);
434 |         $punct_emph_amplifier = $ep_amplifier+$qm_amplifier;
435 |         return $punct_emph_amplifier;
436 | 	}
437 |     
438 | 	function _amplify_ep($text){
439 |         # check for added emphasis resulting from exclamation points (up to 4 of them)
440 |         $ep_count = substr_count($text,"!");
441 |         if ($ep_count > 4){
442 |             $ep_count = 4;
443 | 		}
444 |         # (empirically derived mean sentiment intensity rating increase for
445 |         # exclamation points)
446 |         $ep_amplifier = $ep_count*0.292;
447 |         return $ep_amplifier;
448 | 	}
449 | 
450 |     function _amplify_qm($text){
451 |         # check for added emphasis resulting from question marks (2 or 3+)
452 |         $qm_count = substr_count ($text,"?");
453 |         $qm_amplifier = 0;
454 |         if ($qm_count > 1){
455 |             if ($qm_count <= 3){
456 |                 # (empirically derived mean sentiment intensity rating increase for
457 |                 # question marks)
458 |                 $qm_amplifier = $qm_count*0.18;
459 |             }else{
460 |                 $qm_amplifier = 0.96;
461 | 			}
462 | 		}
463 |         return $qm_amplifier;
464 | 	}
465 | 
466 |     function _sift_sentiment_scores($sentiments){
467 |         # want separate positive versus negative sentiment scores
468 |         $pos_sum = 0.0;
469 |         $neg_sum = 0.0;
470 |         $neu_count = 0;
471 |         foreach($sentiments as $sentiment_score){
472 |             if($sentiment_score > 0){
473 |                 $pos_sum += $sentiment_score +1; # compensates for neutral words that are counted as 1
474 | 			}
475 |             if ($sentiment_score < 0){
476 |                 $neg_sum += $sentiment_score -1; # when used with math.fabs(), compensates for neutrals
477 | 			}
478 |             if ($sentiment_score == 0){
479 |                 $neu_count += 1;
480 | 			}
481 | 		}
482 |         return [$pos_sum, $neg_sum, $neu_count];
483 | 	}
484 |     
485 | 	function score_valence($sentiments, $text){
486 |         if ($sentiments){
487 |             $sum_s = array_sum($sentiments);
488 |             # compute and add emphasis from punctuation in text
489 |             $punct_emph_amplifier = $this->_punctuation_emphasis($sum_s, $text);
490 |             if ($sum_s > 0){
491 |                 $sum_s += $punct_emph_amplifier;
492 | 			}
493 |             elseif  ($sum_s < 0){
494 |                 $sum_s -= $punct_emph_amplifier;
495 | 			}
496 | 
497 |             $compound = normalize($sum_s);
498 |             # discriminate between positive, negative and neutral sentiment scores
499 |             list($pos_sum, $neg_sum, $neu_count) = $this->_sift_sentiment_scores($sentiments);
500 | 
501 |             if ($pos_sum > abs($neg_sum)){
502 |                 $pos_sum += $punct_emph_amplifier;
503 | 			}
504 |             elseif ($pos_sum < abs($neg_sum)){
505 |                 $neg_sum -= $punct_emph_amplifier;
506 | 			}
507 | 
508 |             $total = $pos_sum + abs($neg_sum) + $neu_count;
509 |             $pos =abs($pos_sum / $total);
510 |             $neg = abs($neg_sum / $total);
511 |             $neu = abs($neu_count / $total);
512 | 
513 |         }else{
514 |             $compound = 0.0;
515 | 			$pos = 0.0;
516 |             $neg = 0.0;
517 |             $neu = 0.0;
518 | 		}
519 | 
520 |         $sentiment_dict = 
521 |             ["neg" => round($neg, 3),
522 |              "neu" => round($neu, 3),
523 |              "pos" => round($pos, 3),
524 |              "compound" => round($compound, 4)];
525 | 
526 |         return $sentiment_dict;
527 | 	}
528 | }
529 | 	
530 | ?>


--------------------------------------------------------------------------------