├── README.md ├── finishedProject ├── SMSSpamCollection.txt ├── app.py ├── functions.py └── logo_spamFilter.png └── starterFiles ├── SMSSpamCollection.txt ├── app.py ├── functions.py └── logo_spamFilter.png /README.md: -------------------------------------------------------------------------------- 1 | # SimpleSMSspamFilter_GUI 2 | The GUI Version of Simple SMS Spam Filter created with DearPy GUI 3 |
4 |
5 | This simple program labels user-provided strings as spam or not spam (ham) 6 |
7 | You can run it easily from your own command line terminal with "python Dearpy.py" 8 |
9 |
10 | /finishedProject 11 |
12 | This directory contains the full version of the app. 13 |
14 | /starterFiles 15 |
16 | This directory contains only the logo, database, and functions.py 17 |
18 | app.py is incomplete, and is only useful to you if you follow the Python Simplified tutorial on Youtube: 19 |
20 | https://youtu.be/2RocXKPPx4o 21 |
22 |
23 | ![DearPyGUI](https://user-images.githubusercontent.com/32107652/99757481-48d73a80-2aa4-11eb-9e11-29fdc96f6c06.jpg) 24 |
25 |
26 | Dependencies: 27 |
28 | 34 | Database from: 35 |
36 | http://www.dt.fee.unicamp.br/~tiago/smsspamcollection/ 37 |
38 |
39 | Author: Mariya Sha 40 |
41 | Email: mariyasha888@gmail.com 42 |
43 |
44 | This project was created for learning purposes, so feel free to use 45 |
46 | whichever parts of this applicatioin you need to help you become better at programming. 47 |
48 |
49 | Read publication on Medium (without GUI): 50 |
51 | https://medium.com/analytics-vidhya/how-to-build-a-simple-sms-spam-filter-with-python-ee777240fc 52 |
53 | See youtube Tutorial (without GUI): 54 |
55 | https://youtu.be/VDg8fCW8LdM 56 |
57 | Read DearPy GUI Documentation: 58 |
59 | https://hoffstadt.github.io/DearPyGui/index.html 60 |
61 | Connect on LinkedIn: 62 |
63 | https://www.linkedin.com/in/mariyasha888/ 64 | -------------------------------------------------------------------------------- /finishedProject/app.py: -------------------------------------------------------------------------------- 1 | #DearPyGUI Imports 2 | from dearpygui.core import * 3 | from dearpygui.simple import * 4 | 5 | #functions.py Imports 6 | from functions import categorize_words, pre_process, predict 7 | 8 | #button callbak function 9 | #runs each time when the "Check" button is clicked 10 | def check_spam(sender, data, pred =[]): 11 | with window("Simple SMS Spam Filter"): 12 | if pred == []: 13 | #runs only once - the the button is first clicked 14 | #and pred[-1] widget doesn't exist 15 | add_spacing(count=12) 16 | add_separator() 17 | add_spacing(count=12) 18 | #collect input, pre-process and get prediction 19 | input_value = get_value("Input") 20 | input_value = pre_process(input_value) 21 | pred_text, text_colour = predict(input_value) 22 | #store prediction inside the pred list 23 | pred.append(pred_text) 24 | #display prediction to user 25 | add_text(pred[-1], color=text_colour) 26 | else: 27 | #hide prediction widget 28 | hide_item(pred[-1]) 29 | #collect the current user input and evaluate it 30 | input_value = get_value("Input") 31 | input_value = pre_process(input_value) 32 | pred_text, text_colour = predict(input_value) 33 | #store prediction inside the pred list 34 | pred.append(pred_text) 35 | add_text(pred[-1], color=text_colour) 36 | 37 | #window object settings 38 | set_main_window_size(540, 720) 39 | set_global_font_scale(1.25) 40 | set_theme("Gold") 41 | set_style_window_padding(30,30) 42 | 43 | with window("Simple SMS Spam Filter", width=520, height=677): 44 | print("GUI is running...") 45 | set_window_pos("Simple SMS Spam Filter", 0, 0) 46 | 47 | #image logo 48 | add_drawing("logo", width=520, height=290) #create some space for the image 49 | add_separator() 50 | add_spacing(count=12) 51 | #text instructions 52 | add_text("Please enter an SMS message of your choice to check if it's spam or not", 53 | color=[232,163,33]) 54 | add_spacing(count=12) 55 | #collect input 56 | add_input_text("Input", width=415, default_value="type message here!") 57 | add_spacing(count=12) 58 | #action button 59 | add_button("Check", callback=check_spam) 60 | 61 | 62 | draw_image("logo", "logo_spamFilter.png", [0, 240]) #place the image inside the space 63 | 64 | start_dearpygui() 65 | print("Bye Bye, GUI") 66 | -------------------------------------------------------------------------------- /finishedProject/functions.py: -------------------------------------------------------------------------------- 1 | #no need to change anything in this file! 2 | 3 | #SMS Spam Filter Imports 4 | import random 5 | import pandas as pd 6 | import string 7 | import nltk 8 | nltk.download('punkt') 9 | nltk.download('stopwords') 10 | 11 | def categorize_words(): 12 | ''' 13 | Catagorizes each spam/non-spam word into a corresponding list 14 | Repeating words in each list will help with categorizing 15 | ''' 16 | spam_words = [] 17 | ham_words = [] 18 | for sms in data['processed'][data['label'] == 'spam']: 19 | for word in sms: 20 | spam_words.append(word) 21 | for sms in data['processed'][data['label'] == 'ham']: 22 | for word in sms: 23 | ham_words.append(word) 24 | return spam_words, ham_words 25 | 26 | def predict(user_input): 27 | spam_counter = 0 28 | ham_counter = 0 29 | 30 | #add text colour : ham is green, spam is red 31 | red = [220,50,50] 32 | green = [100,220,50] 33 | 34 | for word in user_input: 35 | spam_counter += spam_words.count(word) 36 | ham_counter += ham_words.count(word) 37 | 38 | if ham_counter > spam_counter: 39 | #adding accuracy 40 | certainty = round((ham_counter / (ham_counter + spam_counter)) * 100, 2) 41 | return 'message is not spam, with {}% certainty'.format(certainty), green 42 | elif spam_counter > ham_counter: 43 | certainty = round((spam_counter / (ham_counter + spam_counter)) * 100, 2) 44 | return 'message is spam, with {}% certainty'.format(certainty), red 45 | else: 46 | return 'message could be spam, with 50% certainty', [255,255,255] 47 | 48 | def pre_process(sms): 49 | ''' 50 | Remove punctuation and stop words from the custom sms 51 | ''' 52 | remove_punct = "".join([word.lower() for word in sms if word not in string.punctuation]) 53 | tokenize = nltk.tokenize.word_tokenize(remove_punct) 54 | remove_stop_words = [word for word in tokenize if word not in nltk.corpus.stopwords.words('english')] 55 | return remove_stop_words 56 | 57 | data = pd.read_csv('SMSSpamCollection.txt', sep = '\t', header=None, names=["label", "sms"]) 58 | data['processed'] = data['sms'].apply(lambda x: pre_process(x)) 59 | 60 | #creating lists to store spam/non-spam associated words and their instances 61 | spam_words, ham_words = categorize_words() 62 | -------------------------------------------------------------------------------- /finishedProject/logo_spamFilter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guindosaros/SimpleSMSspamFilter_GUI/96c91cd8bd62c6e4a60b6e8d40a0714bcb2c7859/finishedProject/logo_spamFilter.png -------------------------------------------------------------------------------- /starterFiles/app.py: -------------------------------------------------------------------------------- 1 | #DearPyGUI Imports 2 | from dearpygui.core import * 3 | from dearpygui.simple import * 4 | -------------------------------------------------------------------------------- /starterFiles/functions.py: -------------------------------------------------------------------------------- 1 | #no need to change anything in this file! 2 | 3 | #SMS Spam Filter Imports 4 | import random 5 | import pandas as pd 6 | import string 7 | import nltk 8 | nltk.download('punkt') 9 | nltk.download('stopwords') 10 | 11 | def categorize_words(): 12 | ''' 13 | Catagorizes each spam/non-spam word into a corresponding list 14 | Repeating words in each list will help with categorizing 15 | ''' 16 | spam_words = [] 17 | ham_words = [] 18 | for sms in data['processed'][data['label'] == 'spam']: 19 | for word in sms: 20 | spam_words.append(word) 21 | for sms in data['processed'][data['label'] == 'ham']: 22 | for word in sms: 23 | ham_words.append(word) 24 | return spam_words, ham_words 25 | 26 | def predict(user_input): 27 | spam_counter = 0 28 | ham_counter = 0 29 | 30 | #add text colour : ham is green, spam is red 31 | red = [220,50,50] 32 | green = [100,220,50] 33 | 34 | for word in user_input: 35 | spam_counter += spam_words.count(word) 36 | ham_counter += ham_words.count(word) 37 | 38 | if ham_counter > spam_counter: 39 | #adding accuracy 40 | certainty = round((ham_counter / (ham_counter + spam_counter)) * 100, 2) 41 | return 'message is not spam, with {}% certainty'.format(certainty), green 42 | elif spam_counter > ham_counter: 43 | certainty = round((spam_counter / (ham_counter + spam_counter)) * 100, 2) 44 | return 'message is spam, with {}% certainty'.format(certainty), red 45 | else: 46 | return 'message could be spam, with 50% certainty', [255,255,255] 47 | 48 | def pre_process(sms): 49 | ''' 50 | Remove punctuation and stop words from the custom sms 51 | ''' 52 | remove_punct = "".join([word.lower() for word in sms if word not in string.punctuation]) 53 | tokenize = nltk.tokenize.word_tokenize(remove_punct) 54 | remove_stop_words = [word for word in tokenize if word not in nltk.corpus.stopwords.words('english')] 55 | return remove_stop_words 56 | 57 | data = pd.read_csv('SMSSpamCollection.txt', sep = '\t', header=None, names=["label", "sms"]) 58 | data['processed'] = data['sms'].apply(lambda x: pre_process(x)) 59 | 60 | #creating lists to store spam/non-spam associated words and their instances 61 | spam_words, ham_words = categorize_words() 62 | -------------------------------------------------------------------------------- /starterFiles/logo_spamFilter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guindosaros/SimpleSMSspamFilter_GUI/96c91cd8bd62c6e4a60b6e8d40a0714bcb2c7859/starterFiles/logo_spamFilter.png --------------------------------------------------------------------------------