├── .gitignore ├── LICENSE ├── README.md ├── sentistrength └── __init__.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | SentiStrength.jar 2 | SentiStrengthData/ 3 | __pycache__ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Yong Zhun Hung 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-SentiStrength 2 | Python 3 Wrapper for SentiStrength, reads a single or multiple input with options for binary class or scale output. 3 | 4 | Ensure that you have SentiStrength.jar file and SentiStrengthData Language folders, otherwise you can download them from http://sentistrength.wlv.ac.uk/. For jar file, you will have to email Dr. Mike Thelwall. 5 | 6 | ## Installation 7 | 8 | Pip: 9 | 10 | ```sh 11 | pip install sentistrength 12 | ``` 13 | 14 | 15 | ## Examples 16 | 17 | Example use (single string): 18 | 19 | ```python 20 | >>> from sentistrength import PySentiStr 21 | >>> senti = PySentiStr() 22 | >>> result = senti.getSentiment('What a lovely day') 23 | >>> print(result) 24 | 25 | ... [0.25] 26 | ``` 27 | 28 | 29 | Example use (list of strings or pandas Series): 30 | 31 | ```python 32 | >>> from sentistrength import PySentiStr 33 | >>> senti = PySentiStr() 34 | >>> str_arr = ['What a lovely day', 'What a bad day'] 35 | >>> result = senti.getSentiment(str_arr, score='scale') 36 | >>> print(result) 37 | 38 | ... [1, -1] 39 | # OR, if you want dual scoring (a score each for positive rating and negative rating) 40 | >>> result = senti.getSentiment(str_arr, score='dual') 41 | >>> print(result) 42 | 43 | ... [(2, -1), (1, -2)] 44 | # OR, if you want binary scoring (1 for positive sentence, -1 for negative sentence) 45 | >>> result = senti.getSentiment(str_arr, score='binary') 46 | >>> print(result) 47 | 48 | ... [1, -1] 49 | # OR, if you want trinary scoring (a score each for positive rating, negative rating and neutral rating) 50 | >>> result = senti.getSentiment(str_arr, score='trinary') 51 | >>> print(result) 52 | 53 | ... [(2, -1, 1), (1, -2, -1)] 54 | ``` 55 | 56 | ## Path Setup 57 | 58 | Specify the paths as such: 59 | 60 | ```python 61 | >>> senti = PySentiStr() 62 | >>> senti.setSentiStrengthPath('C:/Documents/SentiStrength.jar') # Note: Provide absolute path instead of relative path 63 | >>> senti.setSentiStrengthLanguageFolderPath('C:/Documents/SentiStrengthData/') # Note: Provide absolute path instead of relative path 64 | ``` 65 | 66 | ## License 67 | 68 | This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details 69 | 70 | ## Acknowledgments 71 | 72 | * Big thanks to Dr. Mike Thelwall for access to SentiStrength. 73 | -------------------------------------------------------------------------------- /sentistrength/__init__.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import shlex 3 | import os.path 4 | import sys 5 | import pandas as pd 6 | from os import getcwd 7 | 8 | class PySentiStr: 9 | def __init__(self): 10 | pass 11 | 12 | def setSentiStrengthPath(self, ss_Path): 13 | self.SentiStrengthLocation = ss_Path 14 | 15 | def setSentiStrengthLanguageFolderPath(self, sslf_Path): 16 | # Ensure it has a forward slash at the end 17 | if sslf_Path[-1] != '/': 18 | sslf_Path += '/' 19 | self.SentiStrengthLanguageFolder = sslf_Path 20 | 21 | def getSentiment(self, df_text, score='scale'): 22 | if not hasattr(self, 'SentiStrengthLocation'): 23 | assert False, "Set path using setSentiStrengthPath(path) function." 24 | 25 | if not hasattr(self, 'SentiStrengthLanguageFolder'): 26 | assert False, "Set path using setSentiStrengthLanguageFolderPath(path) function." 27 | 28 | if type(df_text) != pd.Series: 29 | df_text = pd.Series(df_text) 30 | df_text = df_text.str.replace('\n','') 31 | df_text = df_text.str.replace('\r','') 32 | conc_text = '\n'.join(df_text) 33 | p = subprocess.Popen(shlex.split("java -jar '" + self.SentiStrengthLocation + "' stdin sentidata '" + self.SentiStrengthLanguageFolder + "' trinary"),stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE) 34 | b = bytes(conc_text.replace(" ","+"), 'utf-8') 35 | stdout_byte, stderr_text = p.communicate(b) 36 | stdout_text = stdout_byte.decode("utf-8") 37 | stdout_text = stdout_text.rstrip().replace("\t"," ") 38 | stdout_text = stdout_text.replace('\r\n','') 39 | senti_score = stdout_text.split(' ') 40 | 41 | try: 42 | senti_score = list(map(float, senti_score)) 43 | except: 44 | raise Exception(stdout_text) 45 | 46 | senti_score = [int(i) for i in senti_score] 47 | if score == 'scale': # Returns from -4 to 4 48 | senti_score = [sum(senti_score[i:i+2]) for i in range(0, len(senti_score), 3)] 49 | elif score == 'binary': # Return 1 if positive and -1 if negative 50 | senti_score = [1 if senti_score[i] >= abs(senti_score[i+1]) else -1 for i in range(0, len(senti_score), 3)] 51 | elif score == 'trinary': # Return Positive and Negative Score and Neutral Score 52 | senti_score = [tuple(senti_score[i:i+3]) for i in range(0, len(senti_score), 3)] 53 | elif score == 'dual': # Return Positive and Negative Score 54 | senti_score = [tuple(senti_score[i:i+2]) for i in range(0, len(senti_score), 3)] 55 | else: 56 | return "Argument 'score' takes in either 'scale' (between -1 to 1) or 'binary' (two scores, positive and negative rating)" 57 | return senti_score 58 | 59 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="sentistrength", 8 | version="0.0.9", 9 | author="Zhun Hung", 10 | author_email="yongzhunhung@gmail.com", 11 | description="Python 3 Wrapper for SentiStrength, reads a single or multiple input with options for binary class or scale output.", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/zhunhung/pysentistrength", 15 | packages=setuptools.find_packages(), 16 | classifiers=[ 17 | "Programming Language :: Python :: 3", 18 | "License :: OSI Approved :: MIT License", 19 | "Operating System :: OS Independent", 20 | ], 21 | ) --------------------------------------------------------------------------------