├── LICENSE
├── README.md
├── app.py
├── home.jpg
├── result.jpg
├── speech_classification1.pkl
├── speech_classification2.pkl
├── speech_classification3.pkl
├── speech_classification4.pkl
├── speech_classification5.pkl
├── speech_classification6.pkl
├── static
    └── styles.css
└── templates
    ├── home.html
    └── result.html


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Aniket Gupta
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Toxic Speech Classification
 2 | It is a full-fetched web application.Based on sentiment Classification, by using nltk library it predicts that a speech is how much toxic, sever toxic, insult, obscene, threat.
 3 | 
 4 | ### Where Front-End looks like this
 5 | [Home Page](https://github.com/anik8gupta/Toxic_Speech_Classification/blob/master/templates/home.html)             |  [Result Page](https://github.com/anik8gupta/Toxic_Speech_Classification/blob/master/templates/result.html)
 6 | :-------------------------:|:-------------------------:
 7 | ![](https://github.com/anik8gupta/Toxic_Speech_Classification/blob/master/home.jpg)  |  ![](https://github.com/anik8gupta/Toxic_Speech_Classification/blob/master/result.jpg)
 8 | 
 9 | Here simple CSS & HTML is used with POST method to send the data.
10 | 
11 | ### For Back-End [app.py](https://github.com/anik8gupta/Toxic_Speech_Classification/blob/master/app.py), I use FLASK framework of Python
12 | 
13 | ### Main Libraries Used
14 | * [Pandas](https://pandas.pydata.org/pandas-docs/version/0.22/)
15 | * [Matplotlib.pyplot](https://matplotlib.org/3.1.0/api/_as_gen/matplotlib.pyplot.plot.html)
16 | * [NLTK](https://www.nltk.org/)
17 | * [Pickle](https://docs.python.org/3/library/pickle.html)
18 | 
19 | 
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | # Import Libraries
  2 | from flask import Flask,render_template,url_for,request
  3 | import pandas as pd 
  4 | 
  5 | from sklearn.feature_extraction.text import CountVectorizer
  6 | 
  7 | from sklearn.externals import joblib
  8 | 
  9 | 
 10 | app = Flask(__name__)
 11 | 
 12 | @app.route('/')
 13 | def home():
 14 | 	return render_template('home.html')
 15 | 
 16 | @app.route('/predict',methods=['POST'])
 17 | def predict():
 18 | 	 
 19 |     import pickle as p
 20 |     # un-serializing model
 21 |     clf1 = p.load(open('speech_classification1.pkl', 'rb'))
 22 |     clf2 = p.load(open('speech_classification2.pkl', 'rb'))
 23 |     clf3 = p.load(open('speech_classification3.pkl', 'rb'))
 24 |     clf4 = p.load(open('speech_classification4.pkl', 'rb'))
 25 |     clf5 = p.load(open('speech_classification5.pkl', 'rb'))
 26 |     clf6 = p.load(open('speech_classification6.pkl', 'rb'))
 27 |     
 28 |     message = request.form['message']
 29 |     data = message
 30 | 
 31 |     import re
 32 |     import nltk
 33 |     from nltk.corpus import stopwords
 34 |     from nltk.stem.porter import PorterStemmer
 35 | 
 36 |     ps = PorterStemmer()
 37 |     #getting setences from speech#
 38 |     from nltk.tokenize import sent_tokenize
 39 |     tokenize=sent_tokenize(data)
 40 | 
 41 |     corpus3=[]
 42 | 
 43 |     for i in range(0, len(tokenize)):
 44 |         review3 = re.sub('[^a-zA-Z]', ' ', tokenize[i])
 45 |         review3 = review3.lower()
 46 |         review3 = review3.split()
 47 |         #review = [word for word in review if not word in set(stopwords.words('english'))]
 48 |         review3 = [ps.stem(word) for word in review3 if not word in set(stopwords.words('english'))]
 49 |         review3 = ' '.join(review3)
 50 |         corpus3.append(review3)
 51 | 
 52 |     #getting best 100 words
 53 |     cv3 = CountVectorizer(max_features = 100)
 54 |     X3 = cv3.fit_transform(corpus3).toarray()
 55 |     
 56 |     #predicting
 57 |     y_pred1 = clf1.predict(X3)
 58 |     y_pred2 = clf2.predict(X3)
 59 |     y_pred3 = clf3.predict(X3)
 60 |     y_pred4 = clf4.predict(X3)
 61 |     y_pred5 = clf5.predict(X3)
 62 |     y_pred6 = clf6.predict(X3)
 63 | 
 64 |     #conveting them in Data Frame
 65 |     y_pred1_df=pd.DataFrame(y_pred1)
 66 |     y_pred2_df=pd.DataFrame(y_pred2)
 67 |     y_pred3_df=pd.DataFrame(y_pred3)
 68 |     y_pred4_df=pd.DataFrame(y_pred4)
 69 |     y_pred5_df=pd.DataFrame(y_pred5)
 70 |     y_pred6_df=pd.DataFrame(y_pred6)
 71 | 
 72 | 
 73 |     f=y_pred6_df.iloc[:,0].values
 74 |     f2=y_pred5_df.iloc[:,0].values
 75 |     f3=y_pred4_df.iloc[:,0].values
 76 |     f4=y_pred3_df.iloc[:,0].values
 77 |     f5=y_pred2_df.iloc[:,0].values
 78 |     f6=y_pred1_df.iloc[:,0].values
 79 | 
 80 |     #making a final Submission Data frame
 81 |     submission = pd.DataFrame({'id':corpus3,'toxic':f,'severe_toxic':f2,
 82 |                            'obscene':f3,
 83 |                            'threat':f4,
 84 |                            'insult':f5,
 85 |                            'identity_hate':f6})
 86 | 
 87 |     #getting total of all rows#
 88 |     submission['total']=submission.sum(axis=1)
 89 |         
 90 |     #creating a normal column#
 91 |     a=[]
 92 |     for row in submission['total']:
 93 |         if row==0:
 94 |             a.append(1)
 95 |         else:
 96 |             a.append(0)
 97 |     submission['normal']=pd.DataFrame(a)
 98 | 
 99 |     #getting total of column#
100 |     total=submission[['toxic','severe_toxic','obscene','threat','insult','identity_hate','normal']].sum()
101 |     
102 |     
103 |     import matplotlib.pyplot as plt
104 |     import io
105 |     import base64
106 |     import urllib
107 |     #making and saving pie-chart
108 |     img = io.BytesIO() 
109 |     plt.pie(total)
110 |     plt.title("pie chart distribution")
111 |     plt.savefig(img, format='png')
112 |     img.seek(0)
113 | 
114 |     plot_data = urllib.parse.quote(base64.b64encode(img.read()).decode())
115 | 
116 |     #returning results with requested html page	
117 |     return render_template('result.html',normal=(total[6]/total.sum())*100,
118 |                            toxic=(total[0]/total.sum())*100,
119 |                            severe_toxic=(total[1]/total.sum())*100,
120 |                            obscene=(total[2]/total.sum())*100,
121 |                            threat=(total[3]/total.sum())*100,
122 |                            insult=(total[4]/total.sum())*100,
123 |                            identity_hate=(total[5]/total.sum())*100,plot_url=plot_data)
124 | 
125 | 
126 | 
127 | if __name__ == '__main__':
128 | 	app.run(debug=True)
129 | 


--------------------------------------------------------------------------------
/home.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/8e09edfd72ac05fd59f080dcc0ed6c0d77ab75eb/home.jpg


--------------------------------------------------------------------------------
/result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/8e09edfd72ac05fd59f080dcc0ed6c0d77ab75eb/result.jpg


--------------------------------------------------------------------------------
/speech_classification1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/8e09edfd72ac05fd59f080dcc0ed6c0d77ab75eb/speech_classification1.pkl


--------------------------------------------------------------------------------
/speech_classification2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/8e09edfd72ac05fd59f080dcc0ed6c0d77ab75eb/speech_classification2.pkl


--------------------------------------------------------------------------------
/speech_classification3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/8e09edfd72ac05fd59f080dcc0ed6c0d77ab75eb/speech_classification3.pkl


--------------------------------------------------------------------------------
/speech_classification4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/8e09edfd72ac05fd59f080dcc0ed6c0d77ab75eb/speech_classification4.pkl


--------------------------------------------------------------------------------
/speech_classification5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/8e09edfd72ac05fd59f080dcc0ed6c0d77ab75eb/speech_classification5.pkl


--------------------------------------------------------------------------------
/speech_classification6.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/8e09edfd72ac05fd59f080dcc0ed6c0d77ab75eb/speech_classification6.pkl


--------------------------------------------------------------------------------
/static/styles.css:
--------------------------------------------------------------------------------
 1 | body{
 2 | 	font:15px/1.5 Arial, Helvetica,sans-serif;
 3 | 	padding: 0px;
 4 | 	background-color:#ffffff;
 5 | }
 6 | 
 7 | .container{
 8 | 	width:100%;
 9 | 	margin: auto;
10 | 	overflow: hidden;
11 | }
12 | 
13 | header{
14 | 	background-image: linear-gradient(to right,#17ba3a,#178244);
15 | 	border-bottom:rgb(74, 255, 68) 3px solid;
16 | 	height:120px;
17 | 	width:100%;
18 | 	padding-top:30px;
19 | 
20 | }
21 | 
22 | .main-header{
23 | 			text-align:center;
24 | 			background-color: blue;
25 | 			height:100px;
26 | 			width:100%;
27 | 			margin:0px;
28 | 		}
29 | .brandname{
30 | 	
31 | 	font-size:10px;
32 | 	color: #fff;
33 | 	margin: 10px;
34 | 	text-align:center;
35 | }
36 | 
37 | header h2{
38 | 
39 | 	text-align:center;
40 | 	color:#fff;
41 | 	font-size:30px;
42 | 
43 | }
44 | 
45 | 
46 | 
47 | .btn-info {
48 | 	background-image: linear-gradient(to right,#17ba3a,#178244);
49 | 	height:40px;
50 | 	width:100px;
51 | 	border-radius: 20px;
52 | } /* Blue */
53 | .btn-info:hover {background: #17ba3a;}
54 | 
55 | 
56 | .resultss{
57 | 	border-radius: 15px 50px;
58 |     background: #345fe4;
59 |     padding: 20px; 
60 |     width: 200px;
61 |     height: 150px;
62 | }


--------------------------------------------------------------------------------
/templates/home.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title>Home</title>
 5 | 	<!-- <link rel="stylesheet" type="text/css" href="../static/css/styles.css"> -->
 6 | 	<link rel="stylesheet" type="text/css" href="../static/styles.css">
 7 | </head>
 8 | <body>
 9 | 
10 | 	<header>
11 | 		<div class="container">
12 | 		<h2>Sentimental Analysis in Speech</h2>
13 | 		<p class="brandname">Machine Learning App with Flask</p>
14 | 		
15 | 		
16 | 		
17 | 	</div>
18 | 	</header>
19 | 
20 | 	<div class="ml-container">
21 | 
22 | 		<form action="{{ url_for('predict')}}" method="POST">
23 | 		<p>Enter/Paste Your Speech Here</p>
24 | 		<!-- <input type="text" name="comment"/> -->
25 | 		<textarea name="message" rows="25" cols="140"></textarea>
26 | 		<br/>
27 | 
28 | 		<input type="submit" class="btn-info" value="predict">
29 | 		
30 | 	</form>
31 | 		
32 | 	</div>
33 | 
34 | 	
35 | 	
36 | 
37 | </body>
38 | </html>
39 | 


--------------------------------------------------------------------------------
/templates/result.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | 	<title></title>
 5 |     <link rel="stylesheet" type="text/css" href="../static/styles.css">
 6 | </head>
 7 | <body>
 8 | 
 9 | 	<header>
10 | 		<div class="container">
11 | 		
12 | 		<h2>Sentimental Analysis in Speech</h2>
13 | 		
14 | 		<p class="brandname">ML App</p>
15 | 		
16 | 	</div>
17 | 	</header>
18 | 	<p style="color:blue;font-size:20;text-align: center;"><b>Results for Speech</b></p>
19 | 	<img src="data:image/png;base64, {{ plot_url }}" align="right">
20 | 	<div class="results">
21 | 
22 | 
23 | 		
24 | 	
25 | 	<h2 style="color:red;">Normal: {{normal}}%</h2>
26 | 	<h2 style="color:red;">Toxic: {{toxic}}%</h2>
27 | 	<h2 style="color:red;">Severe Toxic: {{severe_toxic}}%</h2>
28 | 	<h2 style="color:red;">Obscene: {{obscene}}%</h2>
29 | 	<h2 style="color:red;">Threat: {{threat}}%</h2>
30 | 	<h2 style="color:red;">Insult: {{insult}}%</h2>
31 | 	<h2 style="color:red;">Identity Hate: {{identity_hate}}%</h2>
32 | 
33 | 	</div>
34 | 
35 | 
36 | </body>
37 | </html>


--------------------------------------------------------------------------------