├── file4
├── file3
├── File
└── file2


/file4:
--------------------------------------------------------------------------------
 1 | from openpyxl import load_workbook
 2 | from openpyxl.styles import PatternFill
 3 | 
 4 | # Reload the exported Excel file
 5 | wb = load_workbook("predicted_sentiment_results.xlsx")
 6 | ws = wb.active
 7 | 
 8 | # Header row is 1; data starts from row 2
 9 | green_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")  # Light green
10 | red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")    # Light red
11 | 
12 | for row in range(2, ws.max_row + 1):
13 |     actual = ws[f"B{row}"].value
14 |     predicted = ws[f"C{row}"].value
15 | 
16 |     if actual == predicted:
17 |         ws[f"C{row}"].fill = green_fill  # Correct prediction
18 |     else:
19 |         ws[f"C{row}"].fill = red_fill    # Incorrect prediction
20 | 
21 | # Save the formatted Excel
22 | wb.save("predicted_sentiment_results_colored.xlsx")
23 | print("🎨 Colored prediction results exported to 'predicted_sentiment_results_colored.xlsx'")
24 | 


--------------------------------------------------------------------------------
/file3:
--------------------------------------------------------------------------------
 1 |     # Evaluate
 2 |     y_pred_prob = model.predict(X_test)
 3 |     y_pred = np.argmax(y_pred_prob, axis=1)
 4 | 
 5 |     print("Classification Report:\n", classification_report(y_test, y_pred, target_names=label_map.keys()))
 6 | 
 7 |     # Confusion Matrix
 8 |     cm = confusion_matrix(y_test, y_pred)
 9 |     sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=label_map.keys(), yticklabels=label_map.keys())
10 |     plt.xlabel('Predicted')
11 |     plt.ylabel('Actual')
12 |     plt.title('Confusion Matrix')
13 |     plt.show()
14 | 
15 |     # Create DataFrame for Export
16 |     reverse_label_map = {v: k for k, v in label_map.items()}
17 |     test_messages = X_test  # padded sequences
18 |     original_texts = tokenizer.sequences_to_texts(test_messages)
19 |     df_results = pd.DataFrame({
20 |         'original_message': original_texts,
21 |         'actual_sentiment': [reverse_label_map[i] for i in y_test],
22 |         'predicted_sentiment': [reverse_label_map[i] for i in y_pred]
23 |     })
24 | 
25 |     # Export prediction results to Excel
26 |     df_results.to_excel("predicted_sentiment_results.xlsx", index=False)
27 |     print("✅ Prediction results exported to 'predicted_sentiment_results.xlsx'")
28 | 


--------------------------------------------------------------------------------
/File:
--------------------------------------------------------------------------------
 1 | # NLP for Sentiment Analysis in Internal Communications using Synthetic Data
 2 | 
 3 | import random
 4 | import pandas as pd
 5 | import numpy as np
 6 | import re
 7 | import string
 8 | from sklearn.model_selection import train_test_split
 9 | from sklearn.feature_extraction.text import TfidfVectorizer
10 | from sklearn.linear_model import LogisticRegression
11 | from sklearn.metrics import classification_report, confusion_matrix
12 | import matplotlib.pyplot as plt
13 | import seaborn as sns
14 | 
15 | # 1. Generate Synthetic Data
16 | def generate_synthetic_messages(n=500):
17 |     positive_templates = [
18 |         "Great job on the project!",
19 |         "I'm really impressed with the results.",
20 |         "Keep up the excellent work.",
21 |         "The presentation was very well done.",
22 |         "Thanks for your support and dedication.",
23 |     ]
24 |     neutral_templates = [
25 |         "Please attend the meeting at 2 PM.",
26 |         "This is to inform you of the new update.",
27 |         "Check the latest figures attached.",
28 |         "Let’s reschedule our one-on-one.",
29 |         "The document is ready for review.",
30 |     ]
31 |     negative_templates = [
32 |         "We need to address performance issues.",
33 |         "This is below expectations.",
34 |         "I’m disappointed with the delivery.",
35 |         "We missed the deadline again.",
36 |         "There were multiple errors in the report.",
37 |     ]
38 | 
39 |     data = []
40 |     for _ in range(n):
41 |         sentiment = random.choices(["positive", "neutral", "negative"], weights=[0.4, 0.3, 0.3])[0]
42 |         if sentiment == "positive":
43 |             msg = random.choice(positive_templates)
44 |         elif sentiment == "neutral":
45 |             msg = random.choice(neutral_templates)
46 |         else:
47 |             msg = random.choice(negative_templates)
48 |         data.append((msg, sentiment))
49 |     return pd.DataFrame(data, columns=["message", "sentiment"])
50 | 
51 | # 2. Preprocess Text
52 | def preprocess_text(text):
53 |     text = text.lower()
54 |     text = re.sub(r"http\S+|www\S+|https\S+", '', text)
55 |     text = re.sub(r'\@\w+|\#', '', text)
56 |     text = re.sub(r'[^\w\s]', '', text)
57 |     return text.strip()
58 | 
59 | # 3. Main Pipeline
60 | def main():
61 |     df = generate_synthetic_messages(1000)
62 |     df['clean_text'] = df['message'].apply(preprocess_text)
63 | 
64 |     # Encode target
65 |     label_map = {'positive': 2, 'neutral': 1, 'negative': 0}
66 |     df['label'] = df['sentiment'].map(label_map)
67 | 
68 |     # Split data
69 |     X_train, X_test, y_train, y_test = train_test_split(df['clean_text'], df['label'], test_size=0.2, random_state=42)
70 | 
71 |     # Vectorize
72 |     vectorizer = TfidfVectorizer()
73 |     X_train_tfidf = vectorizer.fit_transform(X_train)
74 |     X_test_tfidf = vectorizer.transform(X_test)
75 | 
76 |     # Train classifier
77 |     model = LogisticRegression()
78 |     model.fit(X_train_tfidf, y_train)
79 | 
80 |     # Predict
81 |     y_pred = model.predict(X_test_tfidf)
82 | 
83 |     # Evaluate
84 |     print("Classification Report:\n", classification_report(y_test, y_pred, target_names=label_map.keys()))
85 |     cm = confusion_matrix(y_test, y_pred)
86 | 
87 |     # Confusion Matrix Plot
88 |     plt.figure(figsize=(6,4))
89 |     sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=label_map.keys(), yticklabels=label_map.keys())
90 |     plt.xlabel('Predicted')
91 |     plt.ylabel('Actual')
92 |     plt.title('Confusion Matrix')
93 |     plt.show()
94 | 
95 | if __name__ == "__main__":
96 |     main()
97 | 


--------------------------------------------------------------------------------
/file2:
--------------------------------------------------------------------------------
  1 | import random
  2 | import pandas as pd
  3 | import numpy as np
  4 | import re
  5 | import string
  6 | import matplotlib.pyplot as plt
  7 | import seaborn as sns
  8 | 
  9 | from sklearn.model_selection import train_test_split
 10 | from sklearn.metrics import classification_report, confusion_matrix
 11 | from tensorflow.keras.preprocessing.text import Tokenizer
 12 | from tensorflow.keras.preprocessing.sequence import pad_sequences
 13 | from tensorflow.keras.models import Sequential
 14 | from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
 15 | from tensorflow.keras.utils import to_categorical
 16 | 
 17 | # 1. Generate Synthetic Internal Communication Messages
 18 | def generate_synthetic_messages(n=1000):
 19 |     positive = [
 20 |         "Great job on the project!",
 21 |         "Really impressed with your dedication.",
 22 |         "Well done on the client report.",
 23 |         "Excellent work on the deployment.",
 24 |         "Appreciate the quick response."
 25 |     ]
 26 |     neutral = [
 27 |         "The meeting is rescheduled to 3 PM.",
 28 |         "Submit the timesheet by Friday.",
 29 |         "Reminder: team check-in tomorrow.",
 30 |         "The report has been sent to HR.",
 31 |         "Your access has been approved."
 32 |     ]
 33 |     negative = [
 34 |         "This performance is below expectations.",
 35 |         "We missed the target again.",
 36 |         "There are serious issues with the delivery.",
 37 |         "The client is not satisfied.",
 38 |         "Your response was delayed."
 39 |     ]
 40 | 
 41 |     data = []
 42 |     for _ in range(n):
 43 |         sentiment = random.choices(["positive", "neutral", "negative"], weights=[0.4, 0.3, 0.3])[0]
 44 |         if sentiment == "positive":
 45 |             msg = random.choice(positive)
 46 |         elif sentiment == "neutral":
 47 |             msg = random.choice(neutral)
 48 |         else:
 49 |             msg = random.choice(negative)
 50 |         data.append((msg, sentiment))
 51 |     return pd.DataFrame(data, columns=["message", "sentiment"])
 52 | 
 53 | # 2. Text Preprocessing
 54 | def preprocess_text(text):
 55 |     text = text.lower()
 56 |     text = re.sub(r"http\S+|www\S+", '', text)
 57 |     text = re.sub(r'@\w+|\#', '', text)
 58 |     text = re.sub(r'[^\w\s]', '', text)
 59 |     return text.strip()
 60 | 
 61 | # 3. Main Pipeline
 62 | def main():
 63 |     # Load and clean data
 64 |     df = generate_synthetic_messages(1500)
 65 |     df['clean_text'] = df['message'].apply(preprocess_text)
 66 | 
 67 |     label_map = {'positive': 2, 'neutral': 1, 'negative': 0}
 68 |     df['label'] = df['sentiment'].map(label_map)
 69 | 
 70 |     # Tokenization
 71 |     tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
 72 |     tokenizer.fit_on_texts(df['clean_text'])
 73 |     sequences = tokenizer.texts_to_sequences(df['clean_text'])
 74 | 
 75 |     max_len = max(len(seq) for seq in sequences)
 76 |     padded = pad_sequences(sequences, maxlen=max_len, padding='post')
 77 | 
 78 |     # Train-Test split
 79 |     X_train, X_test, y_train, y_test = train_test_split(padded, df['label'], test_size=0.2, random_state=42)
 80 | 
 81 |     # One-hot encoding labels
 82 |     y_train_cat = to_categorical(y_train, num_classes=3)
 83 |     y_test_cat = to_categorical(y_test, num_classes=3)
 84 | 
 85 |     # Model
 86 |     model = Sequential()
 87 |     model.add(Embedding(input_dim=5000, output_dim=64, input_length=max_len))
 88 |     model.add(LSTM(64, return_sequences=False))
 89 |     model.add(Dropout(0.3))
 90 |     model.add(Dense(32, activation='relu'))
 91 |     model.add(Dense(3, activation='softmax'))
 92 | 
 93 |     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
 94 | 
 95 |     # Train
 96 |     history = model.fit(X_train, y_train_cat, epochs=10, validation_data=(X_test, y_test_cat), batch_size=32)
 97 | 
 98 |     # Evaluate
 99 |     y_pred_prob = model.predict(X_test)
100 |     y_pred = np.argmax(y_pred_prob, axis=1)
101 | 
102 |     print("Classification Report:\n", classification_report(y_test, y_pred, target_names=label_map.keys()))
103 | 
104 |     # Confusion Matrix
105 |     cm = confusion_matrix(y_test, y_pred)
106 |     sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=label_map.keys(), yticklabels=label_map.keys())
107 |     plt.xlabel('Predicted')
108 |     plt.ylabel('Actual')
109 |     plt.title('Confusion Matrix')
110 |     plt.show()
111 | 
112 | if __name__ == "__main__":
113 |     main()
114 | 


--------------------------------------------------------------------------------