├── README.md ├── finishedProject ├── SMSSpamCollection.txt ├── app.py ├── functions.py └── logo_spamFilter.png └── starterFiles ├── SMSSpamCollection.txt ├── app.py ├── functions.py └── logo_spamFilter.png /README.md: -------------------------------------------------------------------------------- 1 | # SimpleSMSspamFilter_GUI 2 | The GUI Version of Simple SMS Spam Filter created with DearPy GUI 3 |
4 |
5 | This simple program labels user-provided strings as spam or not spam (ham) 6 |
7 | You can run it easily from your own command line terminal with "python Dearpy.py" 8 |
9 |
10 | /finishedProject 11 |
12 | This directory contains the full version of the app. 13 |
14 | /starterFiles 15 |
16 | This directory contains only the logo, database, and functions.py 17 |
18 | app.py is incomplete, and is only useful to you if you follow the Python Simplified tutorial on Youtube: 19 |
20 | https://youtu.be/2RocXKPPx4o 21 |
22 |
23 | ![DearPyGUI](https://user-images.githubusercontent.com/32107652/99757481-48d73a80-2aa4-11eb-9e11-29fdc96f6c06.jpg) 24 |
25 |
26 | Dependencies: 27 |
28 | 34 | Database from: 35 |
36 | http://www.dt.fee.unicamp.br/~tiago/smsspamcollection/ 37 |
38 |
39 | Author: Mariya Sha 40 |
41 | Email: mariyasha888@gmail.com 42 |
43 |
44 | This project was created for learning purposes, so feel free to use 45 |
46 | whichever parts of this applicatioin you need to help you become better at programming. 47 |
48 |
49 | !!! * Important Notes * !!! 50 |
51 |
52 | If you get the following error: 53 |
54 | TypeError: function missing required argument 'pmax' (pos 4) 55 | Exception: Error parsing DearPyGui Marvel::draw_image command on line 16 56 |
57 |
58 | Try adjusting your draw_image function to: 59 |
60 | draw_image("logo", "logo_spamFilter.png", [0,0], [458,192]) 61 |
62 |
63 | !!! * Important Notes for Mac Users * !!! 64 |
65 |
66 | add wrap=0 to your add_text() function: 67 |
68 | add_text("Please enter an SMS message of your choice to check if it's spam or not", 69 | color=[232,163,33]) 70 |
71 |
72 | A Special Thank You to Jerimiah Ham who shared this solution on YouTube for the Important Notes above and globalizing the pred list! 73 |
74 | Another Special Thank You to DroidZed : Gaming - IT for suggesting the global pred list! 75 |
76 | Another Special Thank You to cyberblitz for helping with removing repeating code and saving everybody lots of typing! 77 |
78 |
79 | Watch full GUI tutorial on Youtube: 80 |
81 | https://youtu.be/2RocXKPPx4o 82 |
83 | Read publication on Medium (without GUI): 84 |
85 | https://medium.com/analytics-vidhya/how-to-build-a-simple-sms-spam-filter-with-python-ee777240fc 86 |
87 | See youtube Tutorial (without GUI): 88 |
89 | https://youtu.be/VDg8fCW8LdM 90 |
91 | Read DearPy GUI Documentation: 92 |
93 | https://hoffstadt.github.io/DearPyGui/index.html 94 |
95 | Connect on LinkedIn: 96 |
97 | https://www.linkedin.com/in/mariyasha888/ 98 |
99 | Follow on Instagram: 100 |
101 | https://www.instagram.com/mariyasha888/ 102 | -------------------------------------------------------------------------------- /finishedProject/app.py: -------------------------------------------------------------------------------- 1 | #DearPyGUI Imports 2 | from dearpygui.core import * 3 | from dearpygui.simple import * 4 | 5 | #functions.py Imports 6 | from functions import categorize_words, pre_process, predict 7 | 8 | pred = [] 9 | #button callbak function 10 | #runs each time when the "Check" button is clicked 11 | def check_spam(pred): 12 | with window("Simple SMS Spam Filter"): 13 | if pred == []: 14 | #runs only once - the the button is first clicked 15 | #and pred[-1] widget doesn't exist 16 | add_spacing(count=12) 17 | add_separator() 18 | add_spacing(count=12) 19 | else: 20 | #hide prediction widget 21 | hide_item(pred[-1]) 22 | #collect input, pre-process and get prediction 23 | input_value = get_value("Input") 24 | input_value = pre_process(input_value) 25 | pred_text, text_colour = predict(input_value) 26 | #store prediction inside the pred list 27 | pred.append(pred_text) 28 | #display prediction to user 29 | add_text(pred[-1], color=text_colour) 30 | 31 | #window object settings 32 | set_main_window_size(540, 720) 33 | set_global_font_scale(1.25) 34 | set_theme("Gold") 35 | set_style_window_padding(30,30) 36 | 37 | with window("Simple SMS Spam Filter", width=520, height=677): 38 | print("GUI is running...") 39 | set_window_pos("Simple SMS Spam Filter", 0, 0) 40 | 41 | #image logo 42 | add_drawing("logo", width=520, height=290) #create some space for the image 43 | 44 | add_separator() 45 | add_spacing(count=12) 46 | #text instructions 47 | add_text("Please enter an SMS message of your choice to check if it's spam or not", 48 | color=[232,163,33]) 49 | add_spacing(count=12) 50 | #collect input 51 | add_input_text("Input", width=415, default_value="type message here!") 52 | add_spacing(count=12) 53 | #action button 54 | add_button("Check", callback=lambda x,y:check_spam(pred)) 55 | 56 | #place the image inside the space 57 | draw_image("logo", "logo_spamFilter.png", [0, 240]) 58 | 59 | #IF THE PREVIOUS LINE OF CODE TRIGGERS AN ERRROR TRY 60 | #draw_image("logo", "logo_spamFilter.png", [0,0], [458,192]) 61 | 62 | start_dearpygui() 63 | print("Bye Bye, GUI") 64 | -------------------------------------------------------------------------------- /finishedProject/functions.py: -------------------------------------------------------------------------------- 1 | #no need to change anything in this file! 2 | 3 | #SMS Spam Filter Imports 4 | import random 5 | import pandas as pd 6 | import string 7 | import nltk 8 | nltk.download('punkt') 9 | nltk.download('stopwords') 10 | 11 | def categorize_words(): 12 | ''' 13 | Catagorizes each spam/non-spam word into a corresponding list 14 | Repeating words in each list will help with categorizing 15 | ''' 16 | spam_words = [] 17 | ham_words = [] 18 | for sms in data['processed'][data['label'] == 'spam']: 19 | for word in sms: 20 | spam_words.append(word) 21 | for sms in data['processed'][data['label'] == 'ham']: 22 | for word in sms: 23 | ham_words.append(word) 24 | return spam_words, ham_words 25 | 26 | def predict(user_input): 27 | spam_counter = 0 28 | ham_counter = 0 29 | 30 | #add text colour : ham is green, spam is red 31 | red = [220,50,50] 32 | green = [100,220,50] 33 | 34 | for word in user_input: 35 | spam_counter += spam_words.count(word) 36 | ham_counter += ham_words.count(word) 37 | 38 | if ham_counter > spam_counter: 39 | #adding accuracy 40 | certainty = round((ham_counter / (ham_counter + spam_counter)) * 100, 2) 41 | return 'message is not spam, with {}% certainty'.format(certainty), green 42 | elif spam_counter > ham_counter: 43 | certainty = round((spam_counter / (ham_counter + spam_counter)) * 100, 2) 44 | return 'message is spam, with {}% certainty'.format(certainty), red 45 | else: 46 | return 'message could be spam, with 50% certainty', [255,255,255] 47 | 48 | def pre_process(sms): 49 | ''' 50 | Remove punctuation and stop words from the custom sms 51 | ''' 52 | remove_punct = "".join([word.lower() for word in sms if word not in string.punctuation]) 53 | tokenize = nltk.tokenize.word_tokenize(remove_punct) 54 | remove_stop_words = [word for word in tokenize if word not in nltk.corpus.stopwords.words('english')] 55 | return remove_stop_words 56 | 57 | data = pd.read_csv('SMSSpamCollection.txt', sep = '\t', header=None, names=["label", "sms"]) 58 | data['processed'] = data['sms'].apply(lambda x: pre_process(x)) 59 | 60 | #creating lists to store spam/non-spam associated words and their instances 61 | spam_words, ham_words = categorize_words() 62 | -------------------------------------------------------------------------------- /finishedProject/logo_spamFilter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariyaSha/SimpleSMSspamFilter_GUI/7914eddc9397f50744775d68566ebab83489d390/finishedProject/logo_spamFilter.png -------------------------------------------------------------------------------- /starterFiles/app.py: -------------------------------------------------------------------------------- 1 | #DearPyGUI Imports 2 | from dearpygui.core import * 3 | from dearpygui.simple import * 4 | -------------------------------------------------------------------------------- /starterFiles/functions.py: -------------------------------------------------------------------------------- 1 | #no need to change anything in this file! 2 | 3 | #SMS Spam Filter Imports 4 | import random 5 | import pandas as pd 6 | import string 7 | import nltk 8 | nltk.download('punkt') 9 | nltk.download('stopwords') 10 | 11 | def categorize_words(): 12 | ''' 13 | Catagorizes each spam/non-spam word into a corresponding list 14 | Repeating words in each list will help with categorizing 15 | ''' 16 | spam_words = [] 17 | ham_words = [] 18 | for sms in data['processed'][data['label'] == 'spam']: 19 | for word in sms: 20 | spam_words.append(word) 21 | for sms in data['processed'][data['label'] == 'ham']: 22 | for word in sms: 23 | ham_words.append(word) 24 | return spam_words, ham_words 25 | 26 | def predict(user_input): 27 | spam_counter = 0 28 | ham_counter = 0 29 | 30 | #add text colour : ham is green, spam is red 31 | red = [220,50,50] 32 | green = [100,220,50] 33 | 34 | for word in user_input: 35 | spam_counter += spam_words.count(word) 36 | ham_counter += ham_words.count(word) 37 | 38 | if ham_counter > spam_counter: 39 | #adding accuracy 40 | certainty = round((ham_counter / (ham_counter + spam_counter)) * 100, 2) 41 | return 'message is not spam, with {}% certainty'.format(certainty), green 42 | elif spam_counter > ham_counter: 43 | certainty = round((spam_counter / (ham_counter + spam_counter)) * 100, 2) 44 | return 'message is spam, with {}% certainty'.format(certainty), red 45 | else: 46 | return 'message could be spam, with 50% certainty', [255,255,255] 47 | 48 | def pre_process(sms): 49 | ''' 50 | Remove punctuation and stop words from the custom sms 51 | ''' 52 | remove_punct = "".join([word.lower() for word in sms if word not in string.punctuation]) 53 | tokenize = nltk.tokenize.word_tokenize(remove_punct) 54 | remove_stop_words = [word for word in tokenize if word not in nltk.corpus.stopwords.words('english')] 55 | return remove_stop_words 56 | 57 | data = pd.read_csv('SMSSpamCollection.txt', sep = '\t', header=None, names=["label", "sms"]) 58 | data['processed'] = data['sms'].apply(lambda x: pre_process(x)) 59 | 60 | #creating lists to store spam/non-spam associated words and their instances 61 | spam_words, ham_words = categorize_words() 62 | -------------------------------------------------------------------------------- /starterFiles/logo_spamFilter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MariyaSha/SimpleSMSspamFilter_GUI/7914eddc9397f50744775d68566ebab83489d390/starterFiles/logo_spamFilter.png --------------------------------------------------------------------------------