├── README.md ├── Spamclassifier.py └── smsspamcollection ├── SMSSpamCollection └── readme /README.md: -------------------------------------------------------------------------------- 1 | # SpamClassifier -------------------------------------------------------------------------------- /Spamclassifier.py: -------------------------------------------------------------------------------- 1 | # importing the Dataset 2 | 3 | import pandas as pd 4 | 5 | messages = pd.read_csv('smsspamcollection/SMSSpamCollection', sep='\t', 6 | names=["label", "message"]) 7 | 8 | #Data cleaning and preprocessing 9 | import re 10 | import nltk 11 | nltk.download('stopwords') 12 | 13 | from nltk.corpus import stopwords 14 | from nltk.stem.porter import PorterStemmer 15 | ps = PorterStemmer() 16 | corpus = [] 17 | for i in range(0, len(messages)): 18 | review = re.sub('[^a-zA-Z]', ' ', messages['message'][i]) 19 | review = review.lower() 20 | review = review.split() 21 | 22 | review = [ps.stem(word) for word in review if not word in stopwords.words('english')] 23 | review = ' '.join(review) 24 | corpus.append(review) 25 | 26 | 27 | # Creating the Bag of Words model 28 | from sklearn.feature_extraction.text import CountVectorizer 29 | cv = CountVectorizer(max_features=2500) 30 | X = cv.fit_transform(corpus).toarray() 31 | 32 | y=pd.get_dummies(messages['label']) 33 | y=y.iloc[:,1].values 34 | 35 | 36 | # Train Test Split 37 | 38 | from sklearn.model_selection import train_test_split 39 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0) 40 | 41 | # Training model using Naive bayes classifier 42 | 43 | from sklearn.naive_bayes import MultinomialNB 44 | spam_detect_model = MultinomialNB().fit(X_train, y_train) 45 | 46 | y_pred=spam_detect_model.predict(X_test) 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /smsspamcollection/readme: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/SpamClassifier/5fa41eaf3ec0d21d3ffced673800d25b6b8ffc20/smsspamcollection/readme --------------------------------------------------------------------------------