├── Procfile
├── README.md
├── Reviews.csv
├── SentimentAnalysis.ipynb
├── app.py
├── images
    └── Application_gif.gif
├── nb_clf.pkl
├── preprocessing.py
├── requirements.txt
├── static
    └── css
    │   ├── grey.jpg
    │   └── style.css
├── templates
    ├── .txt
    └── home.html
└── tfidf_model.pkl


/Procfile:
--------------------------------------------------------------------------------
1 | web: gunicorn app:app
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SentimentAnalysis
 2 | 
 3 | 
 4 | ## About
 5 |  This is a text classification problem, This model predicts the sentiment of the customer from the text to either Positive or Negative or Neutral.
 6 |  
 7 | ## Data
 8 |  Dataset used for this is taken from kaggle. you can get it [here](https://www.kaggle.com/snap/amazon-fine-food-reviews).
 9 | 
10 | ## Dependencies
11 | * Python
12 | * matplotlib
13 | * pandas
14 | * sklearn
15 | * Flask
16 | * NLTK
17 | 
18 | All dependencies can be installed using [pip](https://pip.pypa.io/en/stable/)
19 | 
20 | ## To use
21 | 
22 | * Download or clone the repository using $ git clone https://github.com/Shareef-shaik/SentimentAnalysis.git.
23 | * Install all the dependencies and run python app.py from terminal.
24 | * Open http://127.0.0.1:5000/ in the browser. Provide the text and click predict to see the outcome.
25 | 
26 | ## Live API
27 | * I have deployed it on Heroku with the help of flask, In case if you are interested you can check here. 
28 | [Sentiment Analysis](http://sentiment.shareefshaik.me)
29 | * I wrote a blog covering all the steps involved in completing this project, you can check out [here](https://medium.com/towards-artificial-intelligence/sentiment-analysis-from-scratch-to-production-web-api-3382f19748e8) for more clear understanding.
30 | 
31 | ## Application Demo
32 | 
33 | 
34 | ![Screenshot](images/Application_gif.gif)
35 | 
36 | 


--------------------------------------------------------------------------------
/Reviews.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a88348c20ed3f85d647e8fbaac0a730ab2f09f95e5d1f4bcf1f9e3650ef624d7
3 | size 300904694
4 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from flask import Flask,render_template,url_for,request
 3 | import pickle
 4 | import preprocessing
 5 | 
 6 | # load the model from disk
 7 | clf = pickle.load(open('nb_clf.pkl', 'rb'))
 8 | cv=pickle.load(open('tfidf_model.pkl','rb'))
 9 | app = Flask(__name__)
10 | 
11 | @app.route('/')
12 | def home():
13 | 	return render_template('home.html')
14 | 
15 | @app.route('/predict',methods=['POST'])
16 | def predict():
17 |     if request.method == 'POST':
18 |         message = request.form['message']
19 |         if(len(message)>2):
20 |             text = [message]
21 |             data = preprocessing.text_Preprocessing(text)
22 |             vect = cv.transform(data)
23 |             my_prediction = clf.predict(vect)
24 |         else:
25 |             my_prediction=3
26 |         
27 |     return render_template('home.html',prediction = my_prediction)
28 | 
29 | 
30 | if __name__ == '__main__':
31 | 	app.run(debug=True)
32 | 


--------------------------------------------------------------------------------
/images/Application_gif.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shareef-shaik/SentimentAnalysis/5dc8abf6d93cfb4f02626a7ed837654cc84e9a5a/images/Application_gif.gif


--------------------------------------------------------------------------------
/nb_clf.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shareef-shaik/SentimentAnalysis/5dc8abf6d93cfb4f02626a7ed837654cc84e9a5a/nb_clf.pkl


--------------------------------------------------------------------------------
/preprocessing.py:
--------------------------------------------------------------------------------
 1 | #import all necessary packages
 2 | import nltk
 3 | nltk.download('punkt')
 4 | from nltk.stem import PorterStemmer
 5 | from nltk.tokenize import word_tokenize
 6 | from nltk.corpus import stopwords
 7 | nltk.download('stopwords')
 8 | stop_words = set(stopwords.words('english'))
 9 | stop_words.remove("not")
10 | import re
11 | from bs4 import BeautifulSoup
12 | 
13 | def text_Preprocessing(reviews):
14 |     
15 |     
16 |     """ This will clean the text data, remove html tags, remove special characters and then tokenize the reviews to apply Stemmer on each word token."""
17 |     
18 |     pre_processed_reviews=[]
19 |     
20 |     for review in reviews:
21 |         review= BeautifulSoup(review,'lxml').getText()    #remove html tags
22 |         review=re.sub('\S*\d\S*','',review).strip()
23 |         review=re.sub('[^A-Za-z]+',' ',review)        #remove special chars
24 |         review=re.sub("n't","not",review)
25 |         review=word_tokenize(str(review.lower())) #tokenize the reviews into word tokens
26 |         # now we will split the review into words and then check if these words are in the stop words if so we will remove them, if not we will join
27 |         review=' '.join(PorterStemmer().stem(word) for word in review if word not in stop_words)
28 |         pre_processed_reviews.append(review.strip())
29 |     return pre_processed_reviews
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | flask<=1.1.1
 2 | gunicorn==19.9.0
 3 | matplotlib>=1.4.3
 4 | pandas>=0.19
 5 | itsdangerous==1.1.0
 6 | Jinja2==2.10.1
 7 | MarkupSafe==1.1.1
 8 | Werkzeug==1.0.1
 9 | matplotlib>=1.4.3
10 | tqdm==4.19.6
11 | bs4==0.0.1
12 | lxml==3.4.2
13 | scikit-learn==0.23.1
14 | nltk==3.4.5
15 | 


--------------------------------------------------------------------------------
/static/css/grey.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shareef-shaik/SentimentAnalysis/5dc8abf6d93cfb4f02626a7ed837654cc84e9a5a/static/css/grey.jpg


--------------------------------------------------------------------------------
/static/css/style.css:
--------------------------------------------------------------------------------
 1 | @import url(https://fonts.googleapis.com/css?family=Open+Sans);
 2 | .btn { display: inline-block; *display: inline; *zoom: 1; padding: 4px 10px 4px; margin-bottom: 0; font-size: 13px; line-height: 18px; color: #333333; text-align: center;text-shadow: 0 1px 1px rgba(255, 255, 255, 0.75); vertical-align: middle; background-color: #f5f5f5; background-image: -moz-linear-gradient(top, #ffffff, #e6e6e6); background-image: -ms-linear-gradient(top, #ffffff, #e6e6e6); background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#ffffff), to(#e6e6e6)); background-image: -webkit-linear-gradient(top, #ffffff, #e6e6e6); background-image: -o-linear-gradient(top, #ffffff, #e6e6e6); background-image: linear-gradient(top, #ffffff, #e6e6e6); background-repeat: repeat-x; filter: progid:dximagetransform.microsoft.gradient(startColorstr=#ffffff, endColorstr=#e6e6e6, GradientType=0); border-color: #e6e6e6 #e6e6e6 #e6e6e6; border-color: rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.1) rgba(0, 0, 0, 0.25); border: 1px solid #e6e6e6; -webkit-border-radius: 4px; -moz-border-radius: 4px; border-radius: 4px; -webkit-box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.2), 0 1px 2px rgba(0, 0, 0, 0.05); -moz-box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.2), 0 1px 2px rgba(0, 0, 0, 0.05); box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.2), 0 1px 2px rgba(0, 0, 0, 0.05); cursor: pointer; *margin-left: .3em; }
 3 | .btn:hover, .btn:active, .btn.active, .btn.disabled, .btn[disabled] { background-color: #e6e6e6; }
 4 | .btn-large { padding: 9px 14px; font-size: 18px; line-height: normal; -webkit-border-radius: 5px; -moz-border-radius: 5px; border-radius: 5px; }
 5 | .btn:hover { color: #333333; text-decoration: none; background-color: #e6e6e6; background-position: 0 -15px; -webkit-transition: background-position 0.1s linear; -moz-transition: background-position 0.1s linear; -ms-transition: background-position 0.1s linear; -o-transition: background-position 0.1s linear; transition: background-position 0.1s linear; }
 6 | .btn-primary, .btn-primary:hover { text-shadow: 0 -1px 0 rgba(0, 0, 0, 0.25); color: #ffffff; }
 7 | .btn-primary.active { color: rgba(255, 255, 255, 0.75); }
 8 | .btn-primary { background-color: #4a77d4; background-image: -moz-linear-gradient(top, #6eb6de, #4a77d4); background-image: -ms-linear-gradient(top, #6eb6de, #4a77d4); background-image: -webkit-gradient(linear, 0 0, 0 100%, from(#6eb6de), to(#4a77d4)); background-image: -webkit-linear-gradient(top, #6eb6de, #4a77d4); background-image: -o-linear-gradient(top, #6eb6de, #4a77d4); background-image: linear-gradient(top, #6eb6de, #4a77d4); background-repeat: repeat-x; filter: progid:dximagetransform.microsoft.gradient(startColorstr=#6eb6de, endColorstr=#4a77d4, GradientType=0);  border: 1px solid #3762bc; text-shadow: 1px 1px 1px rgba(0,0,0,0.4); box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.2), 0 1px 2px rgba(0, 0, 0, 0.5); }
 9 | .btn-primary:hover, .btn-primary:active, .btn-primary.active, .btn-primary.disabled, .btn-primary[disabled] { filter: none; background-color: #4a77d4; }
10 | .btn-block { width: 100%; display:block; }
11 | 
12 | * { -webkit-box-sizing:border-box; -moz-box-sizing:border-box; -ms-box-sizing:border-box; -o-box-sizing:border-box; box-sizing:border-box; }
13 | 
14 | html { width: 100%; height:100%; overflow:hidden; }
15 | 
16 | body { 
17 | 	width: 100%;
18 | 	height: 100%;
19 | 	font-family: 'Open Sans', sans-serif;
20 | 	background-image: url("grey.jpg");
21 | 	background-size: cover;
22 | 	color: #fff;
23 | 	font-size: 25px;
24 | 	text-align:center;
25 | 	letter-spacing:1.2px;
26 | 
27 | }
28 | .container { 
29 | 	position: absolute;
30 | 	top:30%;
31 | 	left: 50%;
32 | 	margin: -150px 0 0 -150px;
33 | 	width:400px;
34 | 	height:400px;
35 | }
36 | 
37 | .container h1 { color: #fff; text-shadow: 0 0 10px rgba(0,0,0,0.3); letter-spacing:1px; text-align:center; }
38 | 
39 | input { 
40 | 	width: 25%; 
41 | 	margin-bottom: 10px; 
42 | 	background: rgba(0,0,0,0.3);
43 | 	border: none;
44 | 	outline: none;
45 | 	padding: 10px;
46 | 	font-size: 13px;
47 | 	color: #fff;
48 | 	text-shadow: 1px 1px 1px rgba(0,0,0,0.3);
49 | 	border: 1px solid rgba(0,0,0,0.3);
50 | 	border-radius: 4px;
51 | 	box-shadow: inset 0 -5px 45px rgba(100,100,100,0.2), 0 1px 1px rgba(255,255,255,0.2);
52 | 	-webkit-transition: box-shadow .5s ease;
53 | 	-moz-transition: box-shadow .5s ease;
54 | 	-o-transition: box-shadow .5s ease;
55 | 	-ms-transition: box-shadow .5s ease;
56 | 	transition: box-shadow .5s ease;
57 | }
58 | input:focus { box-shadow: inset 0 -5px 45px rgba(100,100,100,0.4), 0 1px 1px rgba(255,255,255,0.2); }
59 | 


--------------------------------------------------------------------------------
/templates/.txt:
--------------------------------------------------------------------------------
 1 | if request.method == 'POST':
 2 |         message = request.form['message']
 3 |         if len(message)>=1:
 4 |             data = [message]
 5 |             vect = cv.transform(data).toarray()
 6 |             my_prediction = clf.predict(vect)
 7 |         else:
 8 |             my_prediction="Please enter you message"
 9 | 	return render_template('home.html',prediction = my_prediction)
10 | 
11 | 
12 | 
13 | ##Naive bayes
14 | 
15 | from sklearn.naive_bayes import MultinomialNB
16 | nb_params={'alpha':[10**i for i in range(-5,5)]}
17 | nb=MultinomialNB()
18 | rndm_clf=RandomizedSearchCV(nb,nb_params)
19 | rndm_clf.fit(reviews_train_bow,sentiment_train)
20 | rndm_clf.best_params_
21 | 
22 | nb_model=MultinomialNB(alpha=0.001)
23 | nb_model.fit(reviews_train_bow,sentiment_train)
24 | train_fpr,train_tpr,threshold=roc_curve(sentiment_train,nb_model.predict_proba(reviews_train_bow)[:,1])
25 | test_fpr,test_tpr,threshold=roc_curve(sentiment_test,nb_model.predict_proba(reviews_test_bow)[:,1])
26 | plt.plot(train_fpr,train_tpr,label='Train Auc = '+str(auc(train_fpr,train_tpr)))
27 | plt.plot(test_fpr,test_tpr,label='Test AUC = '+str(auc(test_fpr,test_tpr)))
28 | plt.legend()
29 | 
30 | pickle.dump(nb_model,open('nb_model.pkl','wb'))
31 | 
32 | 
33 | background: -moz-radial-gradient(0% 100%, ellipse cover, rgba(104,128,138,.4) 10%,rgba(138,114,76,0) 40%),-moz-linear-gradient(top,  rgba(57,173,219,.25) 0%, rgba(42,60,87,.4) 100%), -moz-linear-gradient(-45deg,  #670d10 0%, #092756 100%);
34 | 	background: -webkit-radial-gradient(0% 100%, ellipse cover, rgba(104,128,138,.4) 10%,rgba(138,114,76,0) 40%), -webkit-linear-gradient(top,  rgba(57,173,219,.25) 0%,rgba(42,60,87,.4) 100%), -webkit-linear-gradient(-45deg,  #670d10 0%,#092756 100%);
35 | 	background: -o-radial-gradient(0% 100%, ellipse cover, rgba(104,128,138,.4) 10%,rgba(138,114,76,0) 40%), -o-linear-gradient(top,  rgba(57,173,219,.25) 0%,rgba(42,60,87,.4) 100%), -o-linear-gradient(-45deg,  #670d10 0%,#092756 100%);
36 | 	background: -ms-radial-gradient(0% 100%, ellipse cover, rgba(104,128,138,.4) 10%,rgba(138,114,76,0) 40%), -ms-linear-gradient(top,  rgba(57,173,219,.25) 0%,rgba(42,60,87,.4) 100%), -ms-linear-gradient(-45deg,  #670d10 0%,#092756 100%);
37 | 	background: -webkit-radial-gradient(0% 100%, ellipse cover, rgba(104,128,138,.4) 10%,rgba(138,114,76,0) 40%), linear-gradient(to bottom,  rgba(57,173,219,.25) 0%,rgba(42,60,87,.4) 100%), linear-gradient(135deg,  #670d10 0%,#092756 100%);
38 | 	filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#3E1D6D', endColorstr='#092756',GradientType=1 );
39 | 
40 | <!--background: #000000;


--------------------------------------------------------------------------------
/templates/home.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <title>ML API</title>
 6 |   <link href='https://fonts.googleapis.com/css?family=Pacifico' rel='stylesheet' type='text/css'>
 7 | <link href='https://fonts.googleapis.com/css?family=Arimo' rel='stylesheet' type='text/css'>
 8 | <link href='https://fonts.googleapis.com/css?family=Hind:300' rel='stylesheet' type='text/css'>
 9 | <link href='https://fonts.googleapis.com/css?family=Open+Sans+Condensed:300' rel='stylesheet' type='text/css'>
10 | <link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
11 |   
12 | 
13 | 	<title>Snetiment Detector</title>
14 | 	<!-- <link rel="stylesheet" type="text/css" href="../static/css/styles.css"> -->
15 | 	<!--<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='css/styles.css') }}">-->
16 | </head>
17 | 
18 | <body> 
19 | 
20 | 
21 | 	<header>
22 | 		<div id="brandname">
23 | 			   Machine Learning App with Flask
24 | 		</div>
25 | 
26 | 	</header>
27 | 
28 | 	<div class="container">
29 |         <h1>Sentiment Analysis</h1>
30 | 		<form action="{{ url_for('predict')}}" method="POST">
31 | 		<p>Enter Your Message Here</p>
32 | 		<!-- <input type="text" name="comment"/> -->
33 | 		<textarea placeholder="Enter your Text here...." name="message" rows="6" cols="50"></textarea>
34 | 		<br/>
35 | 
36 | 		<input type="submit" class="btn-info" value="predict">
37 | 		
38 | 		
39 | 	</form>
40 | 	<div class="my_prediction"
41 | 	{% if prediction == 1%}
42 | 	<h1 style="color:#008000;"><strong>Positive</strong></h1>
43 | 	<span style='font-size:100px;'>&#128522;</span>
44 | 
45 | 	{% elif prediction == 0%}
46 | 	<h1 style="color:#FF0000;"><strong>Negative</strong></h1>
47 | 	<span style='font-size:100px;'>&#128545;</span>
48 | 	
49 | 	{% elif prediction == 2%}
50 | 	<h2 style="color:white;"><strong>Neutral</strong></h2>
51 | 	<span style='font-size:100px;'>&#128528;</span>
52 | 	
53 | 	{% elif prediction == 3%}
54 |         <h2 style="color:red;"><strong>Enter text atleast greater than 3 and Try Again..!!!</strong></h2>
55 | 	
56 | 	{% endif %}
57 | 	</div>
58 | </div>
59 | 
60 | 	
61 | 	
62 | 
63 | </body>
64 | </html>
65 | 


--------------------------------------------------------------------------------
/tfidf_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Shareef-shaik/SentimentAnalysis/5dc8abf6d93cfb4f02626a7ed837654cc84e9a5a/tfidf_model.pkl


--------------------------------------------------------------------------------