├── file4 ├── file3 ├── File └── file2 /file4: -------------------------------------------------------------------------------- 1 | from openpyxl import load_workbook 2 | from openpyxl.styles import PatternFill 3 | 4 | # Reload the exported Excel file 5 | wb = load_workbook("predicted_sentiment_results.xlsx") 6 | ws = wb.active 7 | 8 | # Header row is 1; data starts from row 2 9 | green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid") # Light green 10 | red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid") # Light red 11 | 12 | for row in range(2, ws.max_row + 1): 13 | actual = ws[f"B{row}"].value 14 | predicted = ws[f"C{row}"].value 15 | 16 | if actual == predicted: 17 | ws[f"C{row}"].fill = green_fill # Correct prediction 18 | else: 19 | ws[f"C{row}"].fill = red_fill # Incorrect prediction 20 | 21 | # Save the formatted Excel 22 | wb.save("predicted_sentiment_results_colored.xlsx") 23 | print("🎨 Colored prediction results exported to 'predicted_sentiment_results_colored.xlsx'") 24 | -------------------------------------------------------------------------------- /file3: -------------------------------------------------------------------------------- 1 | # Evaluate 2 | y_pred_prob = model.predict(X_test) 3 | y_pred = np.argmax(y_pred_prob, axis=1) 4 | 5 | print("Classification Report:\n", classification_report(y_test, y_pred, target_names=label_map.keys())) 6 | 7 | # Confusion Matrix 8 | cm = confusion_matrix(y_test, y_pred) 9 | sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=label_map.keys(), yticklabels=label_map.keys()) 10 | plt.xlabel('Predicted') 11 | plt.ylabel('Actual') 12 | plt.title('Confusion Matrix') 13 | plt.show() 14 | 15 | # Create DataFrame for Export 16 | reverse_label_map = {v: k for k, v in label_map.items()} 17 | test_messages = X_test # padded sequences 18 | original_texts = tokenizer.sequences_to_texts(test_messages) 19 | df_results = pd.DataFrame({ 20 | 'original_message': original_texts, 21 | 'actual_sentiment': [reverse_label_map[i] for i in y_test], 22 | 'predicted_sentiment': [reverse_label_map[i] for i in y_pred] 23 | }) 24 | 25 | # Export prediction results to Excel 26 | df_results.to_excel("predicted_sentiment_results.xlsx", index=False) 27 | print("✅ Prediction results exported to 'predicted_sentiment_results.xlsx'") 28 | -------------------------------------------------------------------------------- /File: -------------------------------------------------------------------------------- 1 | # NLP for Sentiment Analysis in Internal Communications using Synthetic Data 2 | 3 | import random 4 | import pandas as pd 5 | import numpy as np 6 | import re 7 | import string 8 | from sklearn.model_selection import train_test_split 9 | from sklearn.feature_extraction.text import TfidfVectorizer 10 | from sklearn.linear_model import LogisticRegression 11 | from sklearn.metrics import classification_report, confusion_matrix 12 | import matplotlib.pyplot as plt 13 | import seaborn as sns 14 | 15 | # 1. Generate Synthetic Data 16 | def generate_synthetic_messages(n=500): 17 | positive_templates = [ 18 | "Great job on the project!", 19 | "I'm really impressed with the results.", 20 | "Keep up the excellent work.", 21 | "The presentation was very well done.", 22 | "Thanks for your support and dedication.", 23 | ] 24 | neutral_templates = [ 25 | "Please attend the meeting at 2 PM.", 26 | "This is to inform you of the new update.", 27 | "Check the latest figures attached.", 28 | "Let’s reschedule our one-on-one.", 29 | "The document is ready for review.", 30 | ] 31 | negative_templates = [ 32 | "We need to address performance issues.", 33 | "This is below expectations.", 34 | "I’m disappointed with the delivery.", 35 | "We missed the deadline again.", 36 | "There were multiple errors in the report.", 37 | ] 38 | 39 | data = [] 40 | for _ in range(n): 41 | sentiment = random.choices(["positive", "neutral", "negative"], weights=[0.4, 0.3, 0.3])[0] 42 | if sentiment == "positive": 43 | msg = random.choice(positive_templates) 44 | elif sentiment == "neutral": 45 | msg = random.choice(neutral_templates) 46 | else: 47 | msg = random.choice(negative_templates) 48 | data.append((msg, sentiment)) 49 | return pd.DataFrame(data, columns=["message", "sentiment"]) 50 | 51 | # 2. Preprocess Text 52 | def preprocess_text(text): 53 | text = text.lower() 54 | text = re.sub(r"http\S+|www\S+|https\S+", '', text) 55 | text = re.sub(r'\@\w+|\#', '', text) 56 | text = re.sub(r'[^\w\s]', '', text) 57 | return text.strip() 58 | 59 | # 3. Main Pipeline 60 | def main(): 61 | df = generate_synthetic_messages(1000) 62 | df['clean_text'] = df['message'].apply(preprocess_text) 63 | 64 | # Encode target 65 | label_map = {'positive': 2, 'neutral': 1, 'negative': 0} 66 | df['label'] = df['sentiment'].map(label_map) 67 | 68 | # Split data 69 | X_train, X_test, y_train, y_test = train_test_split(df['clean_text'], df['label'], test_size=0.2, random_state=42) 70 | 71 | # Vectorize 72 | vectorizer = TfidfVectorizer() 73 | X_train_tfidf = vectorizer.fit_transform(X_train) 74 | X_test_tfidf = vectorizer.transform(X_test) 75 | 76 | # Train classifier 77 | model = LogisticRegression() 78 | model.fit(X_train_tfidf, y_train) 79 | 80 | # Predict 81 | y_pred = model.predict(X_test_tfidf) 82 | 83 | # Evaluate 84 | print("Classification Report:\n", classification_report(y_test, y_pred, target_names=label_map.keys())) 85 | cm = confusion_matrix(y_test, y_pred) 86 | 87 | # Confusion Matrix Plot 88 | plt.figure(figsize=(6,4)) 89 | sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=label_map.keys(), yticklabels=label_map.keys()) 90 | plt.xlabel('Predicted') 91 | plt.ylabel('Actual') 92 | plt.title('Confusion Matrix') 93 | plt.show() 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /file2: -------------------------------------------------------------------------------- 1 | import random 2 | import pandas as pd 3 | import numpy as np 4 | import re 5 | import string 6 | import matplotlib.pyplot as plt 7 | import seaborn as sns 8 | 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.metrics import classification_report, confusion_matrix 11 | from tensorflow.keras.preprocessing.text import Tokenizer 12 | from tensorflow.keras.preprocessing.sequence import pad_sequences 13 | from tensorflow.keras.models import Sequential 14 | from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout 15 | from tensorflow.keras.utils import to_categorical 16 | 17 | # 1. Generate Synthetic Internal Communication Messages 18 | def generate_synthetic_messages(n=1000): 19 | positive = [ 20 | "Great job on the project!", 21 | "Really impressed with your dedication.", 22 | "Well done on the client report.", 23 | "Excellent work on the deployment.", 24 | "Appreciate the quick response." 25 | ] 26 | neutral = [ 27 | "The meeting is rescheduled to 3 PM.", 28 | "Submit the timesheet by Friday.", 29 | "Reminder: team check-in tomorrow.", 30 | "The report has been sent to HR.", 31 | "Your access has been approved." 32 | ] 33 | negative = [ 34 | "This performance is below expectations.", 35 | "We missed the target again.", 36 | "There are serious issues with the delivery.", 37 | "The client is not satisfied.", 38 | "Your response was delayed." 39 | ] 40 | 41 | data = [] 42 | for _ in range(n): 43 | sentiment = random.choices(["positive", "neutral", "negative"], weights=[0.4, 0.3, 0.3])[0] 44 | if sentiment == "positive": 45 | msg = random.choice(positive) 46 | elif sentiment == "neutral": 47 | msg = random.choice(neutral) 48 | else: 49 | msg = random.choice(negative) 50 | data.append((msg, sentiment)) 51 | return pd.DataFrame(data, columns=["message", "sentiment"]) 52 | 53 | # 2. Text Preprocessing 54 | def preprocess_text(text): 55 | text = text.lower() 56 | text = re.sub(r"http\S+|www\S+", '', text) 57 | text = re.sub(r'@\w+|\#', '', text) 58 | text = re.sub(r'[^\w\s]', '', text) 59 | return text.strip() 60 | 61 | # 3. Main Pipeline 62 | def main(): 63 | # Load and clean data 64 | df = generate_synthetic_messages(1500) 65 | df['clean_text'] = df['message'].apply(preprocess_text) 66 | 67 | label_map = {'positive': 2, 'neutral': 1, 'negative': 0} 68 | df['label'] = df['sentiment'].map(label_map) 69 | 70 | # Tokenization 71 | tokenizer = Tokenizer(num_words=5000, oov_token="") 72 | tokenizer.fit_on_texts(df['clean_text']) 73 | sequences = tokenizer.texts_to_sequences(df['clean_text']) 74 | 75 | max_len = max(len(seq) for seq in sequences) 76 | padded = pad_sequences(sequences, maxlen=max_len, padding='post') 77 | 78 | # Train-Test split 79 | X_train, X_test, y_train, y_test = train_test_split(padded, df['label'], test_size=0.2, random_state=42) 80 | 81 | # One-hot encoding labels 82 | y_train_cat = to_categorical(y_train, num_classes=3) 83 | y_test_cat = to_categorical(y_test, num_classes=3) 84 | 85 | # Model 86 | model = Sequential() 87 | model.add(Embedding(input_dim=5000, output_dim=64, input_length=max_len)) 88 | model.add(LSTM(64, return_sequences=False)) 89 | model.add(Dropout(0.3)) 90 | model.add(Dense(32, activation='relu')) 91 | model.add(Dense(3, activation='softmax')) 92 | 93 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 94 | 95 | # Train 96 | history = model.fit(X_train, y_train_cat, epochs=10, validation_data=(X_test, y_test_cat), batch_size=32) 97 | 98 | # Evaluate 99 | y_pred_prob = model.predict(X_test) 100 | y_pred = np.argmax(y_pred_prob, axis=1) 101 | 102 | print("Classification Report:\n", classification_report(y_test, y_pred, target_names=label_map.keys())) 103 | 104 | # Confusion Matrix 105 | cm = confusion_matrix(y_test, y_pred) 106 | sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=label_map.keys(), yticklabels=label_map.keys()) 107 | plt.xlabel('Predicted') 108 | plt.ylabel('Actual') 109 | plt.title('Confusion Matrix') 110 | plt.show() 111 | 112 | if __name__ == "__main__": 113 | main() 114 | --------------------------------------------------------------------------------