├── README.md
├── finishedProject
├── SMSSpamCollection.txt
├── app.py
├── functions.py
└── logo_spamFilter.png
└── starterFiles
├── SMSSpamCollection.txt
├── app.py
├── functions.py
└── logo_spamFilter.png
/README.md:
--------------------------------------------------------------------------------
1 | # SimpleSMSspamFilter_GUI
2 | The GUI Version of Simple SMS Spam Filter created with DearPy GUI
3 |
4 |
5 | This simple program labels user-provided strings as spam or not spam (ham)
6 |
7 | You can run it easily from your own command line terminal with "python Dearpy.py"
8 |
9 |
10 | /finishedProject
11 |
12 | This directory contains the full version of the app.
13 |
14 | /starterFiles
15 |
16 | This directory contains only the logo, database, and functions.py
17 |
18 | app.py is incomplete, and is only useful to you if you follow the Python Simplified tutorial on Youtube:
19 |
20 | https://youtu.be/2RocXKPPx4o
21 |
22 |
23 | 
24 |
25 |
26 | Dependencies:
27 |
28 |
29 | - DearPy GUI
30 | - Pandas
31 | - NLTK
32 | - String
33 |
34 | Database from:
35 |
36 | http://www.dt.fee.unicamp.br/~tiago/smsspamcollection/
37 |
38 |
39 | Author: Mariya Sha
40 |
41 | Email: mariyasha888@gmail.com
42 |
43 |
44 | This project was created for learning purposes, so feel free to use
45 |
46 | whichever parts of this applicatioin you need to help you become better at programming.
47 |
48 |
49 | !!! * Important Notes * !!!
50 |
51 |
52 | If you get the following error:
53 |
54 | TypeError: function missing required argument 'pmax' (pos 4)
55 | Exception: Error parsing DearPyGui Marvel::draw_image command on line 16
56 |
57 |
58 | Try adjusting your draw_image function to:
59 |
60 | draw_image("logo", "logo_spamFilter.png", [0,0], [458,192])
61 |
62 |
63 | !!! * Important Notes for Mac Users * !!!
64 |
65 |
66 | add wrap=0 to your add_text() function:
67 |
68 | add_text("Please enter an SMS message of your choice to check if it's spam or not",
69 | color=[232,163,33])
70 |
71 |
72 | A Special Thank You to Jerimiah Ham who shared this solution on YouTube for the Important Notes above and globalizing the pred list!
73 |
74 | Another Special Thank You to DroidZed : Gaming - IT for suggesting the global pred list!
75 |
76 | Another Special Thank You to cyberblitz for helping with removing repeating code and saving everybody lots of typing!
77 |
78 |
79 | Watch full GUI tutorial on Youtube:
80 |
81 | https://youtu.be/2RocXKPPx4o
82 |
83 | Read publication on Medium (without GUI):
84 |
85 | https://medium.com/analytics-vidhya/how-to-build-a-simple-sms-spam-filter-with-python-ee777240fc
86 |
87 | See youtube Tutorial (without GUI):
88 |
89 | https://youtu.be/VDg8fCW8LdM
90 |
91 | Read DearPy GUI Documentation:
92 |
93 | https://hoffstadt.github.io/DearPyGui/index.html
94 |
95 | Connect on LinkedIn:
96 |
97 | https://www.linkedin.com/in/mariyasha888/
98 |
99 | Follow on Instagram:
100 |
101 | https://www.instagram.com/mariyasha888/
102 |
--------------------------------------------------------------------------------
/finishedProject/app.py:
--------------------------------------------------------------------------------
1 | #DearPyGUI Imports
2 | from dearpygui.core import *
3 | from dearpygui.simple import *
4 |
5 | #functions.py Imports
6 | from functions import categorize_words, pre_process, predict
7 |
8 | pred = []
9 | #button callbak function
10 | #runs each time when the "Check" button is clicked
11 | def check_spam(pred):
12 | with window("Simple SMS Spam Filter"):
13 | if pred == []:
14 | #runs only once - the the button is first clicked
15 | #and pred[-1] widget doesn't exist
16 | add_spacing(count=12)
17 | add_separator()
18 | add_spacing(count=12)
19 | else:
20 | #hide prediction widget
21 | hide_item(pred[-1])
22 | #collect input, pre-process and get prediction
23 | input_value = get_value("Input")
24 | input_value = pre_process(input_value)
25 | pred_text, text_colour = predict(input_value)
26 | #store prediction inside the pred list
27 | pred.append(pred_text)
28 | #display prediction to user
29 | add_text(pred[-1], color=text_colour)
30 |
31 | #window object settings
32 | set_main_window_size(540, 720)
33 | set_global_font_scale(1.25)
34 | set_theme("Gold")
35 | set_style_window_padding(30,30)
36 |
37 | with window("Simple SMS Spam Filter", width=520, height=677):
38 | print("GUI is running...")
39 | set_window_pos("Simple SMS Spam Filter", 0, 0)
40 |
41 | #image logo
42 | add_drawing("logo", width=520, height=290) #create some space for the image
43 |
44 | add_separator()
45 | add_spacing(count=12)
46 | #text instructions
47 | add_text("Please enter an SMS message of your choice to check if it's spam or not",
48 | color=[232,163,33])
49 | add_spacing(count=12)
50 | #collect input
51 | add_input_text("Input", width=415, default_value="type message here!")
52 | add_spacing(count=12)
53 | #action button
54 | add_button("Check", callback=lambda x,y:check_spam(pred))
55 |
56 | #place the image inside the space
57 | draw_image("logo", "logo_spamFilter.png", [0, 240])
58 |
59 | #IF THE PREVIOUS LINE OF CODE TRIGGERS AN ERRROR TRY
60 | #draw_image("logo", "logo_spamFilter.png", [0,0], [458,192])
61 |
62 | start_dearpygui()
63 | print("Bye Bye, GUI")
64 |
--------------------------------------------------------------------------------
/finishedProject/functions.py:
--------------------------------------------------------------------------------
1 | #no need to change anything in this file!
2 |
3 | #SMS Spam Filter Imports
4 | import random
5 | import pandas as pd
6 | import string
7 | import nltk
8 | nltk.download('punkt')
9 | nltk.download('stopwords')
10 |
11 | def categorize_words():
12 | '''
13 | Catagorizes each spam/non-spam word into a corresponding list
14 | Repeating words in each list will help with categorizing
15 | '''
16 | spam_words = []
17 | ham_words = []
18 | for sms in data['processed'][data['label'] == 'spam']:
19 | for word in sms:
20 | spam_words.append(word)
21 | for sms in data['processed'][data['label'] == 'ham']:
22 | for word in sms:
23 | ham_words.append(word)
24 | return spam_words, ham_words
25 |
26 | def predict(user_input):
27 | spam_counter = 0
28 | ham_counter = 0
29 |
30 | #add text colour : ham is green, spam is red
31 | red = [220,50,50]
32 | green = [100,220,50]
33 |
34 | for word in user_input:
35 | spam_counter += spam_words.count(word)
36 | ham_counter += ham_words.count(word)
37 |
38 | if ham_counter > spam_counter:
39 | #adding accuracy
40 | certainty = round((ham_counter / (ham_counter + spam_counter)) * 100, 2)
41 | return 'message is not spam, with {}% certainty'.format(certainty), green
42 | elif spam_counter > ham_counter:
43 | certainty = round((spam_counter / (ham_counter + spam_counter)) * 100, 2)
44 | return 'message is spam, with {}% certainty'.format(certainty), red
45 | else:
46 | return 'message could be spam, with 50% certainty', [255,255,255]
47 |
48 | def pre_process(sms):
49 | '''
50 | Remove punctuation and stop words from the custom sms
51 | '''
52 | remove_punct = "".join([word.lower() for word in sms if word not in string.punctuation])
53 | tokenize = nltk.tokenize.word_tokenize(remove_punct)
54 | remove_stop_words = [word for word in tokenize if word not in nltk.corpus.stopwords.words('english')]
55 | return remove_stop_words
56 |
57 | data = pd.read_csv('SMSSpamCollection.txt', sep = '\t', header=None, names=["label", "sms"])
58 | data['processed'] = data['sms'].apply(lambda x: pre_process(x))
59 |
60 | #creating lists to store spam/non-spam associated words and their instances
61 | spam_words, ham_words = categorize_words()
62 |
--------------------------------------------------------------------------------
/finishedProject/logo_spamFilter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MariyaSha/SimpleSMSspamFilter_GUI/7914eddc9397f50744775d68566ebab83489d390/finishedProject/logo_spamFilter.png
--------------------------------------------------------------------------------
/starterFiles/app.py:
--------------------------------------------------------------------------------
1 | #DearPyGUI Imports
2 | from dearpygui.core import *
3 | from dearpygui.simple import *
4 |
--------------------------------------------------------------------------------
/starterFiles/functions.py:
--------------------------------------------------------------------------------
1 | #no need to change anything in this file!
2 |
3 | #SMS Spam Filter Imports
4 | import random
5 | import pandas as pd
6 | import string
7 | import nltk
8 | nltk.download('punkt')
9 | nltk.download('stopwords')
10 |
11 | def categorize_words():
12 | '''
13 | Catagorizes each spam/non-spam word into a corresponding list
14 | Repeating words in each list will help with categorizing
15 | '''
16 | spam_words = []
17 | ham_words = []
18 | for sms in data['processed'][data['label'] == 'spam']:
19 | for word in sms:
20 | spam_words.append(word)
21 | for sms in data['processed'][data['label'] == 'ham']:
22 | for word in sms:
23 | ham_words.append(word)
24 | return spam_words, ham_words
25 |
26 | def predict(user_input):
27 | spam_counter = 0
28 | ham_counter = 0
29 |
30 | #add text colour : ham is green, spam is red
31 | red = [220,50,50]
32 | green = [100,220,50]
33 |
34 | for word in user_input:
35 | spam_counter += spam_words.count(word)
36 | ham_counter += ham_words.count(word)
37 |
38 | if ham_counter > spam_counter:
39 | #adding accuracy
40 | certainty = round((ham_counter / (ham_counter + spam_counter)) * 100, 2)
41 | return 'message is not spam, with {}% certainty'.format(certainty), green
42 | elif spam_counter > ham_counter:
43 | certainty = round((spam_counter / (ham_counter + spam_counter)) * 100, 2)
44 | return 'message is spam, with {}% certainty'.format(certainty), red
45 | else:
46 | return 'message could be spam, with 50% certainty', [255,255,255]
47 |
48 | def pre_process(sms):
49 | '''
50 | Remove punctuation and stop words from the custom sms
51 | '''
52 | remove_punct = "".join([word.lower() for word in sms if word not in string.punctuation])
53 | tokenize = nltk.tokenize.word_tokenize(remove_punct)
54 | remove_stop_words = [word for word in tokenize if word not in nltk.corpus.stopwords.words('english')]
55 | return remove_stop_words
56 |
57 | data = pd.read_csv('SMSSpamCollection.txt', sep = '\t', header=None, names=["label", "sms"])
58 | data['processed'] = data['sms'].apply(lambda x: pre_process(x))
59 |
60 | #creating lists to store spam/non-spam associated words and their instances
61 | spam_words, ham_words = categorize_words()
62 |
--------------------------------------------------------------------------------
/starterFiles/logo_spamFilter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MariyaSha/SimpleSMSspamFilter_GUI/7914eddc9397f50744775d68566ebab83489d390/starterFiles/logo_spamFilter.png
--------------------------------------------------------------------------------