├── Git Cheat Sheet.pdf ├── Git Commands.docx ├── README.md ├── Useful Git Commands.pdf ├── lexicon_analyzer.py ├── main.py ├── ml_analyzer.py ├── model_eval.py ├── nltk_analyzer.py ├── sentiment_comparison.py ├── user_interface.py └── utils ├── data_visualization.py └── text_preprocessing.py /Git Cheat Sheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lixavi/sentiment-scope/a257802af1f93c6cdf2da8d9f64dca5675819c9c/Git Cheat Sheet.pdf -------------------------------------------------------------------------------- /Git Commands.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lixavi/sentiment-scope/a257802af1f93c6cdf2da8d9f64dca5675819c9c/Git Commands.docx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sentiment Scope 2 | 3 | SentimentScope is a tool for detailed sentiment analysis in texts, implementing algorithms from the Natural Language Toolkit (NLTK), spaCy, lexicon-based analysis, and machine learning classifiers. 4 | 5 | ## Overview 6 | 7 | Sentiment analysis, also known as opinion mining, is the process of determining the sentiment expressed in a piece of text, whether it is positive, negative, or neutral. SentimentScope provides a comprehensive solution for analyzing sentiment in texts using various techniques and libraries. 8 | 9 | ## Features 10 | 11 | - Sentiment analysis using NLTK (Natural Language Toolkit) 12 | - Sentiment analysis using spaCy 13 | - Lexicon-based sentiment analysis 14 | - Sentiment analysis using machine learning classifiers 15 | - Text preprocessing utilities 16 | - Data loading and management 17 | - Data visualization of sentiment distribution 18 | - User interaction and interface through a command-line interface (CLI) 19 | - Model evaluation metrics for performance assessment 20 | -------------------------------------------------------------------------------- /Useful Git Commands.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lixavi/sentiment-scope/a257802af1f93c6cdf2da8d9f64dca5675819c9c/Useful Git Commands.pdf -------------------------------------------------------------------------------- /lexicon_analyzer.py: -------------------------------------------------------------------------------- 1 | class LexiconAnalyzer: 2 | def __init__(self, lexicon): 3 | self.lexicon = lexicon 4 | 5 | def analyze_sentiment(self, text): 6 | words = text.lower().split() 7 | sentiment_score = sum(self.lexicon.get(word, 0) for word in words) 8 | 9 | if sentiment_score > 0: 10 | sentiment = 'positive' 11 | elif sentiment_score < 0: 12 | sentiment = 'negative' 13 | else: 14 | sentiment = 'neutral' 15 | 16 | return sentiment 17 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # main.py 2 | from sentiment_analysis.nltk_sentiment import nltk_analyze 3 | from sentiment_analysis.spacy_sentiment import spacy_analyze 4 | from text_processing.text_preprocessing import preprocess_text 5 | 6 | def main(): 7 | # Read text from a file 8 | with open("data/sample_text.txt", "r") as file: 9 | text = file.read() 10 | 11 | # Preprocess the text 12 | preprocessed_text = preprocess_text(text) 13 | 14 | # Perform sentiment analysis using NLTK 15 | nltk_result = nltk_analyze(preprocessed_text) 16 | print("NLTK Sentiment:", nltk_result) 17 | 18 | # Perform sentiment analysis using spaCy 19 | spacy_result = spacy_analyze(preprocessed_text) 20 | print("spaCy Sentiment:", spacy_result) 21 | 22 | if __name__ == "__main__": 23 | main() 24 | -------------------------------------------------------------------------------- /ml_analyzer.py: -------------------------------------------------------------------------------- 1 | from sklearn.feature_extraction.text import TfidfVectorizer 2 | from sklearn.model_selection import train_test_split 3 | from sklearn.pipeline import Pipeline 4 | from sklearn.svm import LinearSVC 5 | 6 | class MLAnalyzer: 7 | def __init__(self): 8 | self.vectorizer = TfidfVectorizer() 9 | self.classifier = LinearSVC() 10 | 11 | def train(self, texts, labels): 12 | X = self.vectorizer.fit_transform(texts) 13 | self.classifier.fit(X, labels) 14 | 15 | def analyze_sentiment(self, text): 16 | X = self.vectorizer.transform([text]) 17 | prediction = self.classifier.predict(X)[0] 18 | 19 | if prediction == 'positive': 20 | sentiment = 'positive' 21 | elif prediction == 'negative': 22 | sentiment = 'negative' 23 | else: 24 | sentiment = 'neutral' 25 | 26 | return sentiment 27 | -------------------------------------------------------------------------------- /model_eval.py: -------------------------------------------------------------------------------- 1 | 2 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score 3 | 4 | class ModelEvaluation: 5 | def __init__(self): 6 | pass 7 | 8 | def evaluate_model(self, true_labels, predicted_labels): 9 | accuracy = accuracy_score(true_labels, predicted_labels) 10 | precision = precision_score(true_labels, predicted_labels, average='weighted') 11 | recall = recall_score(true_labels, predicted_labels, average='weighted') 12 | f1 = f1_score(true_labels, predicted_labels, average='weighted') 13 | 14 | evaluation_results = { 15 | 'Accuracy': accuracy, 16 | 'Precision': precision, 17 | 'Recall': recall, 18 | 'F1 Score': f1 19 | } 20 | 21 | return evaluation_results 22 | -------------------------------------------------------------------------------- /nltk_analyzer.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from nltk.sentiment import SentimentIntensityAnalyzer 3 | from nltk.tokenize import word_tokenize, sent_tokenize 4 | from nltk.tag import pos_tag 5 | from nltk.chunk import ne_chunk 6 | 7 | class NLTKAnalyzer: 8 | def __init__(self): 9 | self.sid = SentimentIntensityAnalyzer() 10 | nltk.download('punkt') 11 | nltk.download('averaged_perceptron_tagger') 12 | nltk.download('maxent_ne_chunker') 13 | nltk.download('words') 14 | 15 | def analyze_sentiment(self, text): 16 | # NLTK sentiment analysis implementation 17 | sentences = sent_tokenize(text) 18 | tokenized_sentences = [word_tokenize(sentence) for sentence in sentences] 19 | pos_tagged_sentences = [pos_tag(tokens) for tokens in tokenized_sentences] 20 | named_entities = [ne_chunk(pos_tags) for pos_tags in pos_tagged_sentences] 21 | 22 | compound_scores = [] 23 | for sentence in tokenized_sentences: 24 | scores = self.sid.polarity_scores(" ".join(sentence)) 25 | compound_scores.append(scores['compound']) 26 | 27 | overall_score = sum(compound_scores) / len(compound_scores) 28 | 29 | if overall_score >= 0.05: 30 | sentiment = 'positive' 31 | elif overall_score <= -0.05: 32 | sentiment = 'negative' 33 | else: 34 | sentiment = 'neutral' 35 | 36 | return sentiment, named_entities 37 | -------------------------------------------------------------------------------- /sentiment_comparison.py: -------------------------------------------------------------------------------- 1 | class SentimentComparison: 2 | def __init__(self): 3 | pass 4 | 5 | def compare_sentiments(self, sentiment1, sentiment2): 6 | if sentiment1 == sentiment2: 7 | return "Sentiments are the same." 8 | else: 9 | return "Sentiments are different." 10 | 11 | def compare_multiple_texts(self, texts, analyzers): 12 | sentiments = [analyzer.analyze_sentiment(text) for text, analyzer in zip(texts, analyzers)] 13 | comparison_results = [] 14 | 15 | for i in range(len(sentiments) - 1): 16 | for j in range(i + 1, len(sentiments)): 17 | comparison_results.append(self.compare_sentiments(sentiments[i], sentiments[j])) 18 | 19 | return comparison_results 20 | -------------------------------------------------------------------------------- /user_interface.py: -------------------------------------------------------------------------------- 1 | from analysis.nltk_analyzer import NLTKAnalyzer 2 | from analysis.spacy_analyzer import SpacyAnalyzer 3 | from utils.data_manager import DataManager 4 | from utils.data_visualization import DataVisualization 5 | 6 | class UserInterface: 7 | def __init__(self): 8 | self.nltk_analyzer = NLTKAnalyzer() 9 | self.spacy_analyzer = SpacyAnalyzer() 10 | self.data_manager = DataManager() 11 | self.data_visualization = DataVisualization() 12 | 13 | def analyze_sentiment(self, text): 14 | nltk_sentiment, _ = self.nltk_analyzer.analyze_sentiment(text) 15 | spacy_sentiment, _ = self.spacy_analyzer.analyze_sentiment(text) 16 | 17 | print("Sentiment Analysis Results:") 18 | print("NLTK Analyzer:", nltk_sentiment) 19 | print("spaCy Analyzer:", spacy_sentiment) 20 | 21 | return nltk_sentiment, spacy_sentiment 22 | 23 | def visualize_sentiment_distribution(self, sentiments): 24 | self.data_visualization.plot_sentiment_distribution(sentiments) 25 | 26 | def load_text_from_file(self, file_path): 27 | return self.data_manager.read_text_file(file_path) 28 | 29 | def save_sentiment_results(self, file_path, nltk_sentiment, spacy_sentiment): 30 | results = f"NLTK Sentiment: {nltk_sentiment}\nspaCy Sentiment: {spacy_sentiment}" 31 | self.data_manager.save_text_file(file_path, results) 32 | -------------------------------------------------------------------------------- /utils/data_visualization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | class DataVisualization: 4 | def __init__(self): 5 | pass 6 | 7 | def plot_sentiment_distribution(self, sentiments): 8 | # Count the occurrences of each sentiment 9 | sentiment_counts = {sentiment: sentiments.count(sentiment) for sentiment in set(sentiments)} 10 | 11 | # Plotting the distribution 12 | labels = sentiment_counts.keys() 13 | counts = sentiment_counts.values() 14 | 15 | plt.figure(figsize=(8, 6)) 16 | plt.bar(labels, counts, color=['green', 'blue', 'red']) 17 | plt.title('Sentiment Distribution') 18 | plt.xlabel('Sentiment') 19 | plt.ylabel('Count') 20 | plt.show() 21 | -------------------------------------------------------------------------------- /utils/text_preprocessing.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from nltk.corpus import stopwords 3 | from nltk.tokenize import word_tokenize 4 | from nltk.stem import PorterStemmer 5 | import string 6 | 7 | class TextPreprocessor: 8 | def __init__(self): 9 | nltk.download('stopwords') 10 | nltk.download('punkt') 11 | self.stop_words = set(stopwords.words('english')) 12 | self.stemmer = PorterStemmer() 13 | 14 | def preprocess_text(self, text): 15 | # Tokenization 16 | tokens = word_tokenize(text.lower()) 17 | 18 | # Remove punctuation and stopwords 19 | filtered_tokens = [token for token in tokens if token not in string.punctuation and token not in self.stop_words] 20 | 21 | # Stemming 22 | stemmed_tokens = [self.stemmer.stem(token) for token in filtered_tokens] 23 | 24 | return stemmed_tokens 25 | --------------------------------------------------------------------------------