├── home.jpg
├── result.jpg
├── speech_classification1.pkl
├── speech_classification2.pkl
├── speech_classification3.pkl
├── speech_classification4.pkl
├── speech_classification5.pkl
├── speech_classification6.pkl
├── templates
├── home.html
└── result.html
├── LICENSE
├── static
└── styles.css
├── README.md
└── app.py
/home.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/HEAD/home.jpg
--------------------------------------------------------------------------------
/result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/HEAD/result.jpg
--------------------------------------------------------------------------------
/speech_classification1.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/HEAD/speech_classification1.pkl
--------------------------------------------------------------------------------
/speech_classification2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/HEAD/speech_classification2.pkl
--------------------------------------------------------------------------------
/speech_classification3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/HEAD/speech_classification3.pkl
--------------------------------------------------------------------------------
/speech_classification4.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/HEAD/speech_classification4.pkl
--------------------------------------------------------------------------------
/speech_classification5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/HEAD/speech_classification5.pkl
--------------------------------------------------------------------------------
/speech_classification6.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/anik8gupta/Toxic_Speech_Classification/HEAD/speech_classification6.pkl
--------------------------------------------------------------------------------
/templates/home.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Home
5 |
6 |
7 |
8 |
9 |
10 |
19 |
20 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/templates/result.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
Sentimental Analysis in Speech
13 |
14 |
ML App
15 |
16 |
17 |
18 | Results for Speech
19 |
20 |
21 |
22 |
23 |
24 |
25 |
Normal: {{normal}}%
26 | Toxic: {{toxic}}%
27 | Severe Toxic: {{severe_toxic}}%
28 | Obscene: {{obscene}}%
29 | Threat: {{threat}}%
30 | Insult: {{insult}}%
31 | Identity Hate: {{identity_hate}}%
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Aniket Gupta
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/static/styles.css:
--------------------------------------------------------------------------------
1 | body{
2 | font:15px/1.5 Arial, Helvetica,sans-serif;
3 | padding: 0px;
4 | background-color:#ffffff;
5 | }
6 |
7 | .container{
8 | width:100%;
9 | margin: auto;
10 | overflow: hidden;
11 | }
12 |
13 | header{
14 | background-image: linear-gradient(to right,#17ba3a,#178244);
15 | border-bottom:rgb(74, 255, 68) 3px solid;
16 | height:120px;
17 | width:100%;
18 | padding-top:30px;
19 |
20 | }
21 |
22 | .main-header{
23 | text-align:center;
24 | background-color: blue;
25 | height:100px;
26 | width:100%;
27 | margin:0px;
28 | }
29 | .brandname{
30 |
31 | font-size:10px;
32 | color: #fff;
33 | margin: 10px;
34 | text-align:center;
35 | }
36 |
37 | header h2{
38 |
39 | text-align:center;
40 | color:#fff;
41 | font-size:30px;
42 |
43 | }
44 |
45 |
46 |
47 | .btn-info {
48 | background-image: linear-gradient(to right,#17ba3a,#178244);
49 | height:40px;
50 | width:100px;
51 | border-radius: 20px;
52 | } /* Blue */
53 | .btn-info:hover {background: #17ba3a;}
54 |
55 |
56 | .resultss{
57 | border-radius: 15px 50px;
58 | background: #345fe4;
59 | padding: 20px;
60 | width: 200px;
61 | height: 150px;
62 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Toxic Speech Classification
2 | It is a full-fetched web application.Based on sentiment Classification, by using nltk library it predicts that a speech is how much toxic, sever toxic, insult, obscene, threat.
3 |
4 | ### Where Front-End looks like this
5 | [Home Page](https://github.com/anik8gupta/Toxic_Speech_Classification/blob/master/templates/home.html) | [Result Page](https://github.com/anik8gupta/Toxic_Speech_Classification/blob/master/templates/result.html)
6 | :-------------------------:|:-------------------------:
7 |  | 
8 |
9 | Here simple CSS & HTML is used with POST method to send the data.
10 |
11 | ### For Back-End [app.py](https://github.com/anik8gupta/Toxic_Speech_Classification/blob/master/app.py), I use FLASK framework of Python
12 |
13 | ### Main Libraries Used
14 | * [Pandas](https://pandas.pydata.org/pandas-docs/version/0.22/)
15 | * [Matplotlib.pyplot](https://matplotlib.org/3.1.0/api/_as_gen/matplotlib.pyplot.plot.html)
16 | * [NLTK](https://www.nltk.org/)
17 | * [Pickle](https://docs.python.org/3/library/pickle.html)
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | # Import Libraries
2 | from flask import Flask,render_template,url_for,request
3 | import pandas as pd
4 |
5 | from sklearn.feature_extraction.text import CountVectorizer
6 |
7 | from sklearn.externals import joblib
8 |
9 |
10 | app = Flask(__name__)
11 |
12 | @app.route('/')
13 | def home():
14 | return render_template('home.html')
15 |
16 | @app.route('/predict',methods=['POST'])
17 | def predict():
18 |
19 | import pickle as p
20 | # un-serializing model
21 | clf1 = p.load(open('speech_classification1.pkl', 'rb'))
22 | clf2 = p.load(open('speech_classification2.pkl', 'rb'))
23 | clf3 = p.load(open('speech_classification3.pkl', 'rb'))
24 | clf4 = p.load(open('speech_classification4.pkl', 'rb'))
25 | clf5 = p.load(open('speech_classification5.pkl', 'rb'))
26 | clf6 = p.load(open('speech_classification6.pkl', 'rb'))
27 |
28 | message = request.form['message']
29 | data = message
30 |
31 | import re
32 | import nltk
33 | from nltk.corpus import stopwords
34 | from nltk.stem.porter import PorterStemmer
35 |
36 | ps = PorterStemmer()
37 | #getting setences from speech#
38 | from nltk.tokenize import sent_tokenize
39 | tokenize=sent_tokenize(data)
40 |
41 | corpus3=[]
42 |
43 | for i in range(0, len(tokenize)):
44 | review3 = re.sub('[^a-zA-Z]', ' ', tokenize[i])
45 | review3 = review3.lower()
46 | review3 = review3.split()
47 | #review = [word for word in review if not word in set(stopwords.words('english'))]
48 | review3 = [ps.stem(word) for word in review3 if not word in set(stopwords.words('english'))]
49 | review3 = ' '.join(review3)
50 | corpus3.append(review3)
51 |
52 | #getting best 100 words
53 | cv3 = CountVectorizer(max_features = 100)
54 | X3 = cv3.fit_transform(corpus3).toarray()
55 |
56 | #predicting
57 | y_pred1 = clf1.predict(X3)
58 | y_pred2 = clf2.predict(X3)
59 | y_pred3 = clf3.predict(X3)
60 | y_pred4 = clf4.predict(X3)
61 | y_pred5 = clf5.predict(X3)
62 | y_pred6 = clf6.predict(X3)
63 |
64 | #conveting them in Data Frame
65 | y_pred1_df=pd.DataFrame(y_pred1)
66 | y_pred2_df=pd.DataFrame(y_pred2)
67 | y_pred3_df=pd.DataFrame(y_pred3)
68 | y_pred4_df=pd.DataFrame(y_pred4)
69 | y_pred5_df=pd.DataFrame(y_pred5)
70 | y_pred6_df=pd.DataFrame(y_pred6)
71 |
72 |
73 | f=y_pred6_df.iloc[:,0].values
74 | f2=y_pred5_df.iloc[:,0].values
75 | f3=y_pred4_df.iloc[:,0].values
76 | f4=y_pred3_df.iloc[:,0].values
77 | f5=y_pred2_df.iloc[:,0].values
78 | f6=y_pred1_df.iloc[:,0].values
79 |
80 | #making a final Submission Data frame
81 | submission = pd.DataFrame({'id':corpus3,'toxic':f,'severe_toxic':f2,
82 | 'obscene':f3,
83 | 'threat':f4,
84 | 'insult':f5,
85 | 'identity_hate':f6})
86 |
87 | #getting total of all rows#
88 | submission['total']=submission.sum(axis=1)
89 |
90 | #creating a normal column#
91 | a=[]
92 | for row in submission['total']:
93 | if row==0:
94 | a.append(1)
95 | else:
96 | a.append(0)
97 | submission['normal']=pd.DataFrame(a)
98 |
99 | #getting total of column#
100 | total=submission[['toxic','severe_toxic','obscene','threat','insult','identity_hate','normal']].sum()
101 |
102 |
103 | import matplotlib.pyplot as plt
104 | import io
105 | import base64
106 | import urllib
107 | #making and saving pie-chart
108 | img = io.BytesIO()
109 | plt.pie(total)
110 | plt.title("pie chart distribution")
111 | plt.savefig(img, format='png')
112 | img.seek(0)
113 |
114 | plot_data = urllib.parse.quote(base64.b64encode(img.read()).decode())
115 |
116 | #returning results with requested html page
117 | return render_template('result.html',normal=(total[6]/total.sum())*100,
118 | toxic=(total[0]/total.sum())*100,
119 | severe_toxic=(total[1]/total.sum())*100,
120 | obscene=(total[2]/total.sum())*100,
121 | threat=(total[3]/total.sum())*100,
122 | insult=(total[4]/total.sum())*100,
123 | identity_hate=(total[5]/total.sum())*100,plot_url=plot_data)
124 |
125 |
126 |
127 | if __name__ == '__main__':
128 | app.run(debug=True)
129 |
--------------------------------------------------------------------------------