├── DataTransform_Training ├── DataTransformation.py └── __pycache__ │ ├── DataTransformation.cpython-36.pyc │ └── DataTransformation.cpython-37.pyc ├── DataTransformation_Prediction ├── DataTransformationPrediction.py └── __pycache__ │ ├── DataTransformationPrediction.cpython-36.pyc │ └── DataTransformationPrediction.cpython-37.pyc ├── DataTypeValidation_Insertion_Prediction ├── DataTypeValidationPrediction.py └── __pycache__ │ ├── DataTypeValidationPrediction.cpython-36.pyc │ └── DataTypeValidationPrediction.cpython-37.pyc ├── DataTypeValidation_Insertion_Training ├── DataTypeValidation.py └── __pycache__ │ ├── DataTypeValidation.cpython-36.pyc │ └── DataTypeValidation.cpython-37.pyc ├── EDA ├── Phising.ipynb └── phising.csv ├── EncoderPickle └── enc.pickle ├── PredictionArchivedBadData ├── BadData_2020-02-16_172225 │ ├── phising_080020_120.csv │ └── phising_08012020_120.csv └── BadData_2020-02-16_174456 │ ├── phising_080020_120.csv │ └── phising_08012020_120.csv ├── Prediction_Batch_files ├── phising_080020_120.csv ├── phising_08012020_120.csv └── phising_08012020_120000.csv ├── Prediction_Database └── Prediction.db ├── Prediction_FileFromDB └── InputFile.csv ├── Prediction_Logs ├── DataBaseConnectionLog.txt ├── DbInsertLog.txt ├── DbTableCreateLog.txt ├── ExportToCsv.txt ├── GeneralLog.txt ├── Prediction_Log.txt ├── columnValidationLog.txt ├── dataTransformLog.txt ├── missingValuesInColumn.txt └── nameValidationLog.txt ├── Prediction_Output_File └── Predictions.csv ├── Prediction_Raw_Data_Validation ├── __pycache__ │ ├── predictionDataValidation.cpython-36.pyc │ └── predictionDataValidation.cpython-37.pyc └── predictionDataValidation.py ├── Problem Statement.docx ├── Procfile ├── README.md ├── Training_Batch_Files └── phising_08012020_120000.csv ├── Training_FileFromDB └── InputFile.csv ├── Training_Logs ├── DataBaseConnectionLog.txt ├── DbInsertLog.txt ├── DbTableCreateLog.txt ├── ExportToCsv.txt ├── GeneralLog.txt ├── ModelTrainingLog.txt ├── Training_Main_Log.txt ├── addQuotesToStringValuesInColumn.txt ├── columnValidationLog.txt ├── missingValuesInColumn.txt ├── nameValidationLog.txt └── valuesfromSchemaValidationLog.txt ├── Training_Raw_data_validation ├── __pycache__ │ ├── rawValidation.cpython-36.pyc │ └── rawValidation.cpython-37.pyc └── rawValidation.py ├── __pycache__ ├── predictFromModel.cpython-37.pyc ├── prediction_Validation_Insertion.cpython-37.pyc ├── trainingModel.cpython-37.pyc └── training_Validation_Insertion.cpython-37.pyc ├── application_logging ├── __pycache__ │ ├── logger.cpython-36.pyc │ └── logger.cpython-37.pyc └── logger.py ├── best_model_finder ├── __pycache__ │ ├── tuner.cpython-36.pyc │ └── tuner.cpython-37.pyc └── tuner.py ├── data_ingestion ├── __pycache__ │ ├── data_loader.cpython-36.pyc │ ├── data_loader.cpython-37.pyc │ ├── data_loader_prediction.cpython-36.pyc │ └── data_loader_prediction.cpython-37.pyc ├── data_loader.py └── data_loader_prediction.py ├── data_preprocessing ├── __pycache__ │ ├── clustering.cpython-36.pyc │ ├── clustering.cpython-37.pyc │ ├── preprocessing.cpython-36.pyc │ ├── preprocessing.cpython-37.pyc │ └── preprocessing_pred.cpython-36.pyc ├── clustering.py └── preprocessing.py ├── file_operations ├── __pycache__ │ ├── file_methods.cpython-36.pyc │ └── file_methods.cpython-37.pyc └── file_methods.py ├── flask_monitoringdashboard.db ├── main.py ├── manifest.yml ├── models ├── KMeans │ └── KMeans.sav ├── XGBoost0 │ └── XGBoost0.sav ├── XGBoost1 │ └── XGBoost1.sav ├── XGBoost2 │ └── XGBoost2.sav └── XGBoost3 │ └── XGBoost3.sav ├── phising.csv ├── predictFromModel.py ├── prediction_Validation_Insertion.py ├── preprocessing_data └── K-Means_Elbow.PNG ├── requirements.txt ├── runtime.txt ├── schema_prediction.json ├── schema_training.json ├── trainingModel.py └── training_Validation_Insertion.py /DataTransform_Training/DataTransformation.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from os import listdir 3 | from application_logging.logger import App_Logger 4 | import pandas as pd 5 | 6 | 7 | class dataTransform: 8 | 9 | """ 10 | This class shall be used for transforming the Good Raw Training Data before loading it in Database!!. 11 | 12 | Written By: iNeuron Intelligence 13 | Version: 1.0 14 | Revisions: None 15 | 16 | """ 17 | 18 | def __init__(self): 19 | self.goodDataPath = "Training_Raw_files_validated/Good_Raw" 20 | self.logger = App_Logger() 21 | 22 | 23 | def addQuotesToStringValuesInColumn(self): 24 | """ 25 | Method Name: addQuotesToStringValuesInColumn 26 | Description: This method converts all the columns with string datatype such that 27 | each value for that column is enclosed in quotes. This is done 28 | to avoid the error while inserting string values in table as varchar. 29 | 30 | Written By: iNeuron Intelligence 31 | Version: 1.0 32 | Revisions: None 33 | 34 | """ 35 | 36 | log_file = open("Training_Logs/addQuotesToStringValuesInColumn.txt", 'a+') 37 | try: 38 | onlyfiles = [f for f in listdir(self.goodDataPath)] 39 | for file in onlyfiles: 40 | data = pd.read_csv(self.goodDataPath+"/" + file) 41 | #data = self.removeHyphenFromColumnNames(data) 42 | # for col in data.columns: 43 | # # if col in column: # add quotes in string value 44 | # data[col] = data[col].apply(lambda x: "'" + str(x) + "'") 45 | # if col not in column: # add quotes to '?' values in integer/float columns 46 | for column in data.columns: 47 | count = data[column][data[column] == '?'].count() 48 | if count != 0: 49 | data[column] = data[column].replace('?', "'?'") 50 | # #csv.update("'"+ csv['Wafer'] +"'") 51 | # csv.update(csv['Wafer'].astype(str)) 52 | #csv['Wafer'] = csv['Wafer'].str[6:] 53 | data.to_csv(self.goodDataPath+ "/" + file, index=None, header=True) 54 | self.logger.log(log_file," %s: Quotes added successfully!!" % file) 55 | #log_file.write("Current Date :: %s" %date +"\t" + "Current time:: %s" % current_time + "\t \t" + + "\n") 56 | except Exception as e: 57 | self.logger.log(log_file, "Data Transformation failed because:: %s" % e) 58 | #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n") 59 | log_file.close() 60 | log_file.close() 61 | 62 | # def removeHyphenFromColumnNames(self,data): 63 | # """ 64 | # Method Name: addQuotesToStringValuesInColumn 65 | # Description: This method changing the column names by replacing the '-'. 66 | # 67 | # Written By: iNeuron Intelligence 68 | # Version: 1.0 69 | # Revisions: None 70 | # 71 | # """ 72 | # log_file = open("Training_Logs/removeHyphenFromColumnNames.txt", 'a+') 73 | # try: 74 | # 75 | # # there are "hyphen" in our column name which results in failure when inserting the column names in the table 76 | # # so we are changing the column names by replacing the '-' 77 | # for col in data.columns: 78 | # new_col = col.replace('-', '') 79 | # data.rename(columns={col: new_col},inplace=True) 80 | # return data 81 | # 82 | # except Exception as e: 83 | # self.logger.log(log_file, "Data Transformation failed because:: %s" % e) 84 | # #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n") 85 | # log_file.close() 86 | # log_file.close() 87 | # return data -------------------------------------------------------------------------------- /DataTransform_Training/__pycache__/DataTransformation.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/DataTransform_Training/__pycache__/DataTransformation.cpython-36.pyc -------------------------------------------------------------------------------- /DataTransform_Training/__pycache__/DataTransformation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/DataTransform_Training/__pycache__/DataTransformation.cpython-37.pyc -------------------------------------------------------------------------------- /DataTransformation_Prediction/DataTransformationPrediction.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from os import listdir 3 | import pandas 4 | from application_logging.logger import App_Logger 5 | 6 | 7 | class dataTransformPredict: 8 | 9 | """ 10 | This class shall be used for transforming the Good Raw Training Data before loading it in Database!!. 11 | 12 | Written By: iNeuron Intelligence 13 | Version: 1.0 14 | Revisions: None 15 | 16 | """ 17 | 18 | def __init__(self): 19 | self.goodDataPath = "Prediction_Raw_Files_Validated/Good_Raw" 20 | self.logger = App_Logger() 21 | 22 | 23 | def addQuotesToStringValuesInColumn(self): 24 | 25 | """ 26 | Method Name: addQuotesToStringValuesInColumn 27 | Description: This method replaces the missing values in columns with "NULL" to 28 | store in the table. We are using substring in the first column to 29 | keep only "Integer" data for ease up the loading. 30 | This column is anyways going to be removed during prediction. 31 | 32 | Written By: iNeuron Intelligence 33 | Version: 1.0 34 | Revisions: None 35 | 36 | """ 37 | 38 | try: 39 | log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+') 40 | onlyfiles = [f for f in listdir(self.goodDataPath)] 41 | for file in onlyfiles: 42 | data = pandas.read_csv(self.goodDataPath + "/" + file) 43 | for column in data.columns: 44 | count = data[column][data[column] == '?'].count() 45 | if count != 0: 46 | data[column] = data[column].replace('?', "'?'") 47 | data.to_csv(self.goodDataPath + "/" + file, index=None, header=True) 48 | self.logger.log(log_file, " %s: Quotes added successfully!!" % file) 49 | 50 | except Exception as e: 51 | log_file = open("Prediction_Logs/dataTransformLog.txt", 'a+') 52 | self.logger.log(log_file, "Data Transformation failed because:: %s" % e) 53 | #log_file.write("Current Date :: %s" %date +"\t" +"Current time:: %s" % current_time + "\t \t" + "Data Transformation failed because:: %s" % e + "\n") 54 | log_file.close() 55 | raise e 56 | log_file.close() 57 | -------------------------------------------------------------------------------- /DataTransformation_Prediction/__pycache__/DataTransformationPrediction.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/DataTransformation_Prediction/__pycache__/DataTransformationPrediction.cpython-36.pyc -------------------------------------------------------------------------------- /DataTransformation_Prediction/__pycache__/DataTransformationPrediction.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/DataTransformation_Prediction/__pycache__/DataTransformationPrediction.cpython-37.pyc -------------------------------------------------------------------------------- /DataTypeValidation_Insertion_Prediction/DataTypeValidationPrediction.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import sqlite3 3 | from os import listdir 4 | import os 5 | import csv 6 | from application_logging.logger import App_Logger 7 | 8 | 9 | class dBOperation: 10 | """ 11 | This class shall be used for handling all the SQL operations. 12 | 13 | Written By: iNeuron Intelligence 14 | Version: 1.0 15 | Revisions: None 16 | 17 | """ 18 | 19 | def __init__(self): 20 | self.path = 'Prediction_Database/' 21 | self.badFilePath = "Prediction_Raw_Files_Validated/Bad_Raw" 22 | self.goodFilePath = "Prediction_Raw_Files_Validated/Good_Raw" 23 | self.logger = App_Logger() 24 | 25 | 26 | def dataBaseConnection(self,DatabaseName): 27 | 28 | """ 29 | Method Name: dataBaseConnection 30 | Description: This method creates the database with the given name and if Database already exists then opens the connection to the DB. 31 | Output: Connection to the DB 32 | On Failure: Raise ConnectionError 33 | 34 | Written By: iNeuron Intelligence 35 | Version: 1.0 36 | Revisions: None 37 | 38 | """ 39 | try: 40 | conn = sqlite3.connect(self.path+DatabaseName+'.db') 41 | 42 | file = open("Prediction_Logs/DataBaseConnectionLog.txt", 'a+') 43 | self.logger.log(file, "Opened %s database successfully" % DatabaseName) 44 | file.close() 45 | except ConnectionError: 46 | file = open("Prediction_Logs/DataBaseConnectionLog.txt", 'a+') 47 | self.logger.log(file, "Error while connecting to database: %s" %ConnectionError) 48 | file.close() 49 | raise ConnectionError 50 | return conn 51 | 52 | def createTableDb(self,DatabaseName,column_names): 53 | 54 | """ 55 | Method Name: createTableDb 56 | Description: This method creates a table in the given database which will be used to insert the Good data after raw data validation. 57 | Output: None 58 | On Failure: Raise Exception 59 | 60 | Written By: iNeuron Intelligence 61 | Version: 1.0 62 | Revisions: None 63 | 64 | """ 65 | try: 66 | conn = self.dataBaseConnection(DatabaseName) 67 | conn.execute('DROP TABLE IF EXISTS Good_Raw_Data;') 68 | 69 | for key in column_names.keys(): 70 | type = column_names[key] 71 | 72 | # we will remove the column of string datatype before loading as it is not needed for training 73 | #in try block we check if the table exists, if yes then add columns to the table 74 | # else in catch block we create the table 75 | try: 76 | #cur = cur.execute("SELECT name FROM {dbName} WHERE type='table' AND name='Good_Raw_Data'".format(dbName=DatabaseName)) 77 | conn.execute('ALTER TABLE Good_Raw_Data ADD COLUMN "{column_name}" {dataType}'.format(column_name=key,dataType=type)) 78 | except: 79 | conn.execute('CREATE TABLE Good_Raw_Data ({column_name} {dataType})'.format(column_name=key, dataType=type)) 80 | 81 | conn.close() 82 | 83 | file = open("Prediction_Logs/DbTableCreateLog.txt", 'a+') 84 | self.logger.log(file, "Tables created successfully!!") 85 | file.close() 86 | 87 | file = open("Prediction_Logs/DataBaseConnectionLog.txt", 'a+') 88 | self.logger.log(file, "Closed %s database successfully" % DatabaseName) 89 | file.close() 90 | 91 | except Exception as e: 92 | file = open("Prediction_Logs/DbTableCreateLog.txt", 'a+') 93 | self.logger.log(file, "Error while creating table: %s " % e) 94 | file.close() 95 | conn.close() 96 | file = open("Prediction_Logs/DataBaseConnectionLog.txt", 'a+') 97 | self.logger.log(file, "Closed %s database successfully" % DatabaseName) 98 | file.close() 99 | raise e 100 | 101 | 102 | def insertIntoTableGoodData(self,Database): 103 | 104 | """ 105 | Method Name: insertIntoTableGoodData 106 | Description: This method inserts the Good data files from the Good_Raw folder into the 107 | above created table. 108 | Output: None 109 | On Failure: Raise Exception 110 | 111 | Written By: iNeuron Intelligence 112 | Version: 1.0 113 | Revisions: None 114 | 115 | """ 116 | 117 | conn = self.dataBaseConnection(Database) 118 | goodFilePath= self.goodFilePath 119 | badFilePath = self.badFilePath 120 | onlyfiles = [f for f in listdir(goodFilePath)] 121 | log_file = open("Prediction_Logs/DbInsertLog.txt", 'a+') 122 | 123 | for file in onlyfiles: 124 | try: 125 | 126 | with open(goodFilePath+'/'+file, "r") as f: 127 | next(f) 128 | reader = csv.reader(f, delimiter="\n") 129 | for line in enumerate(reader): 130 | for list_ in (line[1]): 131 | try: 132 | conn.execute('INSERT INTO Good_Raw_Data values ({values})'.format(values=(list_))) 133 | self.logger.log(log_file," %s: File loaded successfully!!" % file) 134 | conn.commit() 135 | except Exception as e: 136 | raise e 137 | 138 | except Exception as e: 139 | 140 | conn.rollback() 141 | self.logger.log(log_file,"Error while creating table: %s " % e) 142 | shutil.move(goodFilePath+'/' + file, badFilePath) 143 | self.logger.log(log_file, "File Moved Successfully %s" % file) 144 | log_file.close() 145 | conn.close() 146 | raise e 147 | 148 | conn.close() 149 | log_file.close() 150 | 151 | 152 | def selectingDatafromtableintocsv(self,Database): 153 | 154 | """ 155 | Method Name: selectingDatafromtableintocsv 156 | Description: This method exports the data in GoodData table as a CSV file. in a given location. 157 | above created . 158 | Output: None 159 | On Failure: Raise Exception 160 | 161 | Written By: iNeuron Intelligence 162 | Version: 1.0 163 | Revisions: None 164 | 165 | """ 166 | 167 | self.fileFromDb = 'Prediction_FileFromDB/' 168 | self.fileName = 'InputFile.csv' 169 | log_file = open("Prediction_Logs/ExportToCsv.txt", 'a+') 170 | try: 171 | conn = self.dataBaseConnection(Database) 172 | sqlSelect = "SELECT * FROM Good_Raw_Data" 173 | cursor = conn.cursor() 174 | 175 | cursor.execute(sqlSelect) 176 | 177 | results = cursor.fetchall() 178 | 179 | #Get the headers of the csv file 180 | headers = [i[0] for i in cursor.description] 181 | 182 | #Make the CSV ouput directory 183 | if not os.path.isdir(self.fileFromDb): 184 | os.makedirs(self.fileFromDb) 185 | 186 | # Open CSV file for writing. 187 | csvFile = csv.writer(open(self.fileFromDb + self.fileName, 'w', newline=''),delimiter=',', lineterminator='\r\n',quoting=csv.QUOTE_ALL, escapechar='\\') 188 | 189 | # Add the headers and data to the CSV file. 190 | csvFile.writerow(headers) 191 | csvFile.writerows(results) 192 | 193 | self.logger.log(log_file, "File exported successfully!!!") 194 | 195 | except Exception as e: 196 | self.logger.log(log_file, "File exporting failed. Error : %s" %e) 197 | raise e 198 | 199 | 200 | 201 | 202 | 203 | -------------------------------------------------------------------------------- /DataTypeValidation_Insertion_Prediction/__pycache__/DataTypeValidationPrediction.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/DataTypeValidation_Insertion_Prediction/__pycache__/DataTypeValidationPrediction.cpython-36.pyc -------------------------------------------------------------------------------- /DataTypeValidation_Insertion_Prediction/__pycache__/DataTypeValidationPrediction.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/DataTypeValidation_Insertion_Prediction/__pycache__/DataTypeValidationPrediction.cpython-37.pyc -------------------------------------------------------------------------------- /DataTypeValidation_Insertion_Training/DataTypeValidation.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import sqlite3 3 | from datetime import datetime 4 | from os import listdir 5 | import os 6 | import csv 7 | from application_logging.logger import App_Logger 8 | 9 | 10 | class dBOperation: 11 | """ 12 | This class shall be used for handling all the SQL operations. 13 | 14 | Written By: iNeuron Intelligence 15 | Version: 1.0 16 | Revisions: None 17 | 18 | """ 19 | def __init__(self): 20 | self.path = 'Training_Database/' 21 | self.badFilePath = "Training_Raw_files_validated/Bad_Raw" 22 | self.goodFilePath = "Training_Raw_files_validated/Good_Raw" 23 | self.logger = App_Logger() 24 | 25 | 26 | def dataBaseConnection(self,DatabaseName): 27 | 28 | """ 29 | Method Name: dataBaseConnection 30 | Description: This method creates the database with the given name and if Database already exists then opens the connection to the DB. 31 | Output: Connection to the DB 32 | On Failure: Raise ConnectionError 33 | 34 | Written By: iNeuron Intelligence 35 | Version: 1.0 36 | Revisions: None 37 | 38 | """ 39 | try: 40 | conn = sqlite3.connect(self.path+DatabaseName+'.db') 41 | 42 | file = open("Training_Logs/DataBaseConnectionLog.txt", 'a+') 43 | self.logger.log(file, "Opened %s database successfully" % DatabaseName) 44 | file.close() 45 | except ConnectionError: 46 | file = open("Training_Logs/DataBaseConnectionLog.txt", 'a+') 47 | self.logger.log(file, "Error while connecting to database: %s" %ConnectionError) 48 | file.close() 49 | raise ConnectionError 50 | return conn 51 | 52 | def createTableDb(self,DatabaseName,column_names): 53 | """ 54 | Method Name: createTableDb 55 | Description: This method creates a table in the given database which will be used to insert the Good data after raw data validation. 56 | Output: None 57 | On Failure: Raise Exception 58 | 59 | Written By: iNeuron Intelligence 60 | Version: 1.0 61 | Revisions: None 62 | 63 | """ 64 | try: 65 | conn = self.dataBaseConnection(DatabaseName) 66 | c=conn.cursor() 67 | c.execute("SELECT count(name) FROM sqlite_master WHERE type = 'table'AND name = 'Good_Raw_Data'") 68 | if c.fetchone()[0] ==1: 69 | conn.close() 70 | file = open("Training_Logs/DbTableCreateLog.txt", 'a+') 71 | self.logger.log(file, "Tables created successfully!!") 72 | file.close() 73 | 74 | file = open("Training_Logs/DataBaseConnectionLog.txt", 'a+') 75 | self.logger.log(file, "Closed %s database successfully" % DatabaseName) 76 | file.close() 77 | 78 | else: 79 | 80 | for key in column_names.keys(): 81 | type = column_names[key] 82 | 83 | #in try block we check if the table exists, if yes then add columns to the table 84 | # else in catch block we will create the table 85 | try: 86 | #cur = cur.execute("SELECT name FROM {dbName} WHERE type='table' AND name='Good_Raw_Data'".format(dbName=DatabaseName)) 87 | conn.execute('ALTER TABLE Good_Raw_Data ADD COLUMN "{column_name}" {dataType}'.format(column_name=key,dataType=type)) 88 | except: 89 | conn.execute('CREATE TABLE Good_Raw_Data ({column_name} {dataType})'.format(column_name=key, dataType=type)) 90 | 91 | 92 | # try: 93 | # #cur.execute("SELECT name FROM {dbName} WHERE type='table' AND name='Bad_Raw_Data'".format(dbName=DatabaseName)) 94 | # conn.execute('ALTER TABLE Bad_Raw_Data ADD COLUMN "{column_name}" {dataType}'.format(column_name=key,dataType=type)) 95 | # 96 | # except: 97 | # conn.execute('CREATE TABLE Bad_Raw_Data ({column_name} {dataType})'.format(column_name=key, dataType=type)) 98 | 99 | 100 | conn.close() 101 | 102 | file = open("Training_Logs/DbTableCreateLog.txt", 'a+') 103 | self.logger.log(file, "Tables created successfully!!") 104 | file.close() 105 | 106 | file = open("Training_Logs/DataBaseConnectionLog.txt", 'a+') 107 | self.logger.log(file, "Closed %s database successfully" % DatabaseName) 108 | file.close() 109 | 110 | except Exception as e: 111 | file = open("Training_Logs/DbTableCreateLog.txt", 'a+') 112 | self.logger.log(file, "Error while creating table: %s " % e) 113 | file.close() 114 | conn.close() 115 | file = open("Training_Logs/DataBaseConnectionLog.txt", 'a+') 116 | self.logger.log(file, "Closed %s database successfully" % DatabaseName) 117 | file.close() 118 | raise e 119 | 120 | 121 | def insertIntoTableGoodData(self,Database): 122 | 123 | """ 124 | Method Name: insertIntoTableGoodData 125 | Description: This method inserts the Good data files from the Good_Raw folder into the 126 | above created table. 127 | Output: None 128 | On Failure: Raise Exception 129 | 130 | Written By: iNeuron Intelligence 131 | Version: 1.0 132 | Revisions: None 133 | 134 | """ 135 | 136 | conn = self.dataBaseConnection(Database) 137 | goodFilePath= self.goodFilePath 138 | badFilePath = self.badFilePath 139 | onlyfiles = [f for f in listdir(goodFilePath)] 140 | log_file = open("Training_Logs/DbInsertLog.txt", 'a+') 141 | 142 | for file in onlyfiles: 143 | try: 144 | with open(goodFilePath+'/'+file, "r") as f: 145 | next(f) 146 | reader = csv.reader(f, delimiter="\n") 147 | for line in enumerate(reader): 148 | for list_ in (line[1]): 149 | try: 150 | conn.execute('INSERT INTO Good_Raw_Data values ({values})'.format(values=(list_))) 151 | self.logger.log(log_file," %s: File loaded successfully!!" % file) 152 | conn.commit() 153 | except Exception as e: 154 | raise e 155 | 156 | except Exception as e: 157 | 158 | conn.rollback() 159 | self.logger.log(log_file,"Error while creating table: %s " % e) 160 | shutil.move(goodFilePath+'/' + file, badFilePath) 161 | self.logger.log(log_file, "File Moved Successfully %s" % file) 162 | log_file.close() 163 | conn.close() 164 | 165 | conn.close() 166 | log_file.close() 167 | 168 | 169 | def selectingDatafromtableintocsv(self,Database): 170 | 171 | """ 172 | Method Name: selectingDatafromtableintocsv 173 | Description: This method exports the data in GoodData table as a CSV file. in a given location. 174 | above created . 175 | Output: None 176 | On Failure: Raise Exception 177 | 178 | Written By: iNeuron Intelligence 179 | Version: 1.0 180 | Revisions: None 181 | 182 | """ 183 | 184 | self.fileFromDb = 'Training_FileFromDB/' 185 | self.fileName = 'InputFile.csv' 186 | log_file = open("Training_Logs/ExportToCsv.txt", 'a+') 187 | try: 188 | conn = self.dataBaseConnection(Database) 189 | sqlSelect = "SELECT * FROM Good_Raw_Data" 190 | cursor = conn.cursor() 191 | 192 | cursor.execute(sqlSelect) 193 | 194 | results = cursor.fetchall() 195 | # Get the headers of the csv file 196 | headers = [i[0] for i in cursor.description] 197 | 198 | #Make the CSV ouput directory 199 | if not os.path.isdir(self.fileFromDb): 200 | os.makedirs(self.fileFromDb) 201 | 202 | # Open CSV file for writing. 203 | csvFile = csv.writer(open(self.fileFromDb + self.fileName, 'w', newline=''),delimiter=',', lineterminator='\r\n',quoting=csv.QUOTE_ALL, escapechar='\\') 204 | 205 | # Add the headers and data to the CSV file. 206 | csvFile.writerow(headers) 207 | csvFile.writerows(results) 208 | 209 | self.logger.log(log_file, "File exported successfully!!!") 210 | log_file.close() 211 | 212 | except Exception as e: 213 | self.logger.log(log_file, "File exporting failed. Error : %s" %e) 214 | log_file.close() 215 | 216 | 217 | 218 | 219 | 220 | -------------------------------------------------------------------------------- /DataTypeValidation_Insertion_Training/__pycache__/DataTypeValidation.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/DataTypeValidation_Insertion_Training/__pycache__/DataTypeValidation.cpython-36.pyc -------------------------------------------------------------------------------- /DataTypeValidation_Insertion_Training/__pycache__/DataTypeValidation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/DataTypeValidation_Insertion_Training/__pycache__/DataTypeValidation.cpython-37.pyc -------------------------------------------------------------------------------- /EncoderPickle/enc.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/EncoderPickle/enc.pickle -------------------------------------------------------------------------------- /Prediction_Database/Prediction.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/Prediction_Database/Prediction.db -------------------------------------------------------------------------------- /Prediction_Logs/DataBaseConnectionLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-14/15:01:08 Opened Prediction database successfully 2 | 2020-02-14/15:01:21 Closed Prediction database successfully 3 | 2020-02-14/15:01:23 Opened Prediction database successfully 4 | 2020-02-14/15:04:08 Opened Prediction database successfully 5 | 2020-02-14/15:04:19 Closed Prediction database successfully 6 | 2020-02-14/15:04:24 Opened Prediction database successfully 7 | 2020-02-14/15:05:21 Opened Prediction database successfully 8 | 2020-02-14/15:15:52 Opened Prediction database successfully 9 | 2020-02-14/15:15:57 Closed Prediction database successfully 10 | 2020-02-14/15:16:01 Opened Prediction database successfully 11 | 2020-02-14/15:19:09 Opened Prediction database successfully 12 | 2020-02-14/15:22:02 Opened Prediction database successfully 13 | 2020-02-14/15:22:04 Closed Prediction database successfully 14 | 2020-02-14/15:22:06 Opened Prediction database successfully 15 | 2020-02-14/15:25:06 Opened Prediction database successfully 16 | 2020-02-14/15:27:40 Opened Prediction database successfully 17 | 2020-02-14/15:27:42 Closed Prediction database successfully 18 | 2020-02-14/15:27:43 Opened Prediction database successfully 19 | 2020-02-14/15:35:25 Opened Prediction database successfully 20 | 2020-02-14/18:39:22 Opened Prediction database successfully 21 | 2020-02-14/18:39:24 Closed Prediction database successfully 22 | 2020-02-14/18:39:24 Opened Prediction database successfully 23 | 2020-02-14/18:46:40 Opened Prediction database successfully 24 | 2020-02-16/17:15:43 Opened Prediction database successfully 25 | 2020-02-16/17:15:46 Closed Prediction database successfully 26 | 2020-02-16/17:17:42 Opened Prediction database successfully 27 | 2020-02-16/17:17:46 Closed Prediction database successfully 28 | 2020-02-16/17:18:07 Opened Prediction database successfully 29 | 2020-02-16/17:22:42 Opened Prediction database successfully 30 | 2020-02-16/17:40:31 Opened Prediction database successfully 31 | 2020-02-16/17:40:33 Closed Prediction database successfully 32 | 2020-02-16/17:40:33 Opened Prediction database successfully 33 | 2020-02-16/17:44:56 Opened Prediction database successfully 34 | -------------------------------------------------------------------------------- /Prediction_Logs/DbTableCreateLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-14/15:01:21 Tables created successfully!! 2 | 2020-02-14/15:04:19 Tables created successfully!! 3 | 2020-02-14/15:15:57 Tables created successfully!! 4 | 2020-02-14/15:22:04 Tables created successfully!! 5 | 2020-02-14/15:27:42 Tables created successfully!! 6 | 2020-02-14/18:39:24 Tables created successfully!! 7 | 2020-02-16/17:15:46 Tables created successfully!! 8 | 2020-02-16/17:17:46 Tables created successfully!! 9 | 2020-02-16/17:40:33 Tables created successfully!! 10 | -------------------------------------------------------------------------------- /Prediction_Logs/ExportToCsv.txt: -------------------------------------------------------------------------------- 1 | 2020-02-14/15:05:21 File exported successfully!!! 2 | 2020-02-14/15:19:09 File exported successfully!!! 3 | 2020-02-14/15:25:06 File exported successfully!!! 4 | 2020-02-14/15:35:25 File exported successfully!!! 5 | 2020-02-14/18:46:40 File exported successfully!!! 6 | 2020-02-16/17:22:42 File exported successfully!!! 7 | 2020-02-16/17:44:56 File exported successfully!!! 8 | -------------------------------------------------------------------------------- /Prediction_Logs/GeneralLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-14/15:04:08 BadRaw directory deleted before starting validation!!! 2 | 2020-02-14/15:04:08 GoodRaw directory deleted successfully!!! 3 | 2020-02-14/15:05:12 GoodRaw directory deleted successfully!!! 4 | 2020-02-14/15:05:12 Bad files moved to archive 5 | 2020-02-14/15:05:12 Bad Raw Data Folder Deleted successfully!! 6 | 2020-02-14/15:19:03 GoodRaw directory deleted successfully!!! 7 | 2020-02-14/15:19:03 Bad files moved to archive 8 | 2020-02-14/15:19:03 Bad Raw Data Folder Deleted successfully!! 9 | 2020-02-14/15:25:01 GoodRaw directory deleted successfully!!! 10 | 2020-02-14/15:25:01 Bad files moved to archive 11 | 2020-02-14/15:25:01 Bad Raw Data Folder Deleted successfully!! 12 | 2020-02-14/15:35:02 GoodRaw directory deleted successfully!!! 13 | 2020-02-14/15:35:02 Bad files moved to archive 14 | 2020-02-14/15:35:02 Bad Raw Data Folder Deleted successfully!! 15 | 2020-02-14/18:46:40 GoodRaw directory deleted successfully!!! 16 | 2020-02-14/18:46:40 Bad files moved to archive 17 | 2020-02-14/18:46:40 Bad Raw Data Folder Deleted successfully!! 18 | 2020-02-16/17:10:37 BadRaw directory deleted before starting validation!!! 19 | 2020-02-16/17:10:37 GoodRaw directory deleted successfully!!! 20 | 2020-02-16/17:14:56 BadRaw directory deleted before starting validation!!! 21 | 2020-02-16/17:14:56 GoodRaw directory deleted successfully!!! 22 | 2020-02-16/17:17:39 BadRaw directory deleted before starting validation!!! 23 | 2020-02-16/17:17:39 GoodRaw directory deleted successfully!!! 24 | 2020-02-16/17:22:25 GoodRaw directory deleted successfully!!! 25 | 2020-02-16/17:22:25 Bad files moved to archive 26 | 2020-02-16/17:22:25 Bad Raw Data Folder Deleted successfully!! 27 | 2020-02-16/17:44:56 GoodRaw directory deleted successfully!!! 28 | 2020-02-16/17:44:56 Bad files moved to archive 29 | 2020-02-16/17:44:56 Bad Raw Data Folder Deleted successfully!! 30 | -------------------------------------------------------------------------------- /Prediction_Logs/Prediction_Log.txt: -------------------------------------------------------------------------------- 1 | 2020-02-14/15:27:39 Start of Validation on files for prediction!! 2 | 2020-02-14/15:27:40 Raw Data Validation Complete!! 3 | 2020-02-14/15:27:40 Starting Data Transforamtion!! 4 | 2020-02-14/15:27:40 DataTransformation Completed!!! 5 | 2020-02-14/15:27:40 Creating Prediction_Database and tables on the basis of given schema!!! 6 | 2020-02-14/15:27:42 Table creation Completed!! 7 | 2020-02-14/15:27:42 Insertion of Data into Table started!!!! 8 | 2020-02-14/15:35:02 Insertion in Table completed!!! 9 | 2020-02-14/15:35:02 Deleting Good Data Folder!!! 10 | 2020-02-14/15:35:02 Good_Data folder deleted!!! 11 | 2020-02-14/15:35:02 Moving bad files to Archive and deleting Bad_Data folder!!! 12 | 2020-02-14/15:35:02 Bad files moved to archive!! Bad folder Deleted!! 13 | 2020-02-14/15:35:02 Validation Operation completed!! 14 | 2020-02-14/15:35:02 Extracting csv file from table 15 | 2020-02-14/15:35:32 Start of Prediction 16 | 2020-02-14/15:35:33 Entered the get_data method of the Data_Getter class 17 | 2020-02-14/15:35:33 Data Load Successful.Exited the get_data method of the Data_Getter class 18 | 2020-02-14/15:35:37 Entered the is_null_present method of the Preprocessor class 19 | 2020-02-14/15:35:37 Finding missing values is a success.Data written to the null values file. Exited the is_null_present method of the Preprocessor class 20 | 2020-02-14/15:35:39 Entered the impute_missing_values method of the Preprocessor class 21 | 2020-02-14/15:35:39 Imputing missing values Successful. Exited the impute_missing_values method of the Preprocessor class 22 | 2020-02-14/15:35:53 Entered the load_model method of the File_Operation class 23 | 2020-02-14/15:35:53 Model File KMeans loaded. Exited the load_model method of the Model_Finder class 24 | 2020-02-14/15:36:01 Entered the find_correct_model_file method of the File_Operation class 25 | 2020-02-14/15:36:01 Exited the find_correct_model_file method of the Model_Finder class. 26 | 2020-02-14/15:36:02 Entered the load_model method of the File_Operation class 27 | 2020-02-14/15:36:02 Model File KNN0 loaded. Exited the load_model method of the Model_Finder class 28 | 2020-02-14/15:36:18 Entered the find_correct_model_file method of the File_Operation class 29 | 2020-02-14/15:36:18 Exited the find_correct_model_file method of the Model_Finder class. 30 | 2020-02-14/15:36:18 Entered the load_model method of the File_Operation class 31 | 2020-02-14/15:36:18 Model File RandomForest3 loaded. Exited the load_model method of the Model_Finder class 32 | 2020-02-14/15:36:18 Entered the find_correct_model_file method of the File_Operation class 33 | 2020-02-14/15:36:18 Exited the find_correct_model_file method of the Model_Finder class. 34 | 2020-02-14/15:36:18 Entered the load_model method of the File_Operation class 35 | 2020-02-14/15:36:18 Model File RandomForest4 loaded. Exited the load_model method of the Model_Finder class 36 | 2020-02-14/15:36:18 Entered the find_correct_model_file method of the File_Operation class 37 | 2020-02-14/15:36:18 Exited the find_correct_model_file method of the Model_Finder class. 38 | 2020-02-14/15:36:18 Entered the load_model method of the File_Operation class 39 | 2020-02-14/15:36:18 Model File RandomForest1 loaded. Exited the load_model method of the Model_Finder class 40 | 2020-02-14/15:36:19 Entered the find_correct_model_file method of the File_Operation class 41 | 2020-02-14/15:36:19 Exited the find_correct_model_file method of the Model_Finder class. 42 | 2020-02-14/15:36:19 Entered the load_model method of the File_Operation class 43 | 2020-02-14/15:36:19 Model File KNN2 loaded. Exited the load_model method of the Model_Finder class 44 | 2020-02-14/15:36:23 End of Prediction 45 | 2020-02-14/18:39:21 Start of Validation on files for prediction!! 46 | 2020-02-14/18:39:22 Raw Data Validation Complete!! 47 | 2020-02-14/18:39:22 Starting Data Transforamtion!! 48 | 2020-02-14/18:39:22 DataTransformation Completed!!! 49 | 2020-02-14/18:39:22 Creating Prediction_Database and tables on the basis of given schema!!! 50 | 2020-02-14/18:39:24 Table creation Completed!! 51 | 2020-02-14/18:39:24 Insertion of Data into Table started!!!! 52 | 2020-02-14/18:46:40 Insertion in Table completed!!! 53 | 2020-02-14/18:46:40 Deleting Good Data Folder!!! 54 | 2020-02-14/18:46:40 Good_Data folder deleted!!! 55 | 2020-02-14/18:46:40 Moving bad files to Archive and deleting Bad_Data folder!!! 56 | 2020-02-14/18:46:40 Bad files moved to archive!! Bad folder Deleted!! 57 | 2020-02-14/18:46:40 Validation Operation completed!! 58 | 2020-02-14/18:46:40 Extracting csv file from table 59 | 2020-02-14/18:46:40 Start of Prediction 60 | 2020-02-14/18:46:40 Entered the get_data method of the Data_Getter class 61 | 2020-02-14/18:46:40 Data Load Successful.Exited the get_data method of the Data_Getter class 62 | 2020-02-14/18:46:40 Entered the is_null_present method of the Preprocessor class 63 | 2020-02-14/18:46:40 Finding missing values is a success.Data written to the null values file. Exited the is_null_present method of the Preprocessor class 64 | 2020-02-14/18:46:40 Error occured while running the prediction!! Error:: impute_missing_values() missing 1 required positional argument: 'cols_with_missing_values' 65 | 2020-02-14/18:50:28 Start of Prediction 66 | 2020-02-14/18:50:28 Entered the get_data method of the Data_Getter class 67 | 2020-02-14/18:50:28 Data Load Successful.Exited the get_data method of the Data_Getter class 68 | 2020-02-14/18:50:28 Entered the is_null_present method of the Preprocessor class 69 | 2020-02-14/18:50:28 Finding missing values is a success.Data written to the null values file. Exited the is_null_present method of the Preprocessor class 70 | 2020-02-14/18:50:28 Entered the impute_missing_values method of the Preprocessor class 71 | 2020-02-14/18:50:28 Imputing missing values Successful. Exited the impute_missing_values method of the Preprocessor class 72 | 2020-02-14/18:50:28 Entered the load_model method of the File_Operation class 73 | 2020-02-14/18:50:28 Model File KMeans loaded. Exited the load_model method of the Model_Finder class 74 | 2020-02-14/18:50:28 Entered the find_correct_model_file method of the File_Operation class 75 | 2020-02-14/18:50:28 Exited the find_correct_model_file method of the Model_Finder class. 76 | 2020-02-14/18:50:28 Entered the load_model method of the File_Operation class 77 | 2020-02-14/18:50:28 Model File KNN0 loaded. Exited the load_model method of the Model_Finder class 78 | 2020-02-14/18:50:28 Entered the find_correct_model_file method of the File_Operation class 79 | 2020-02-14/18:50:28 Exited the find_correct_model_file method of the Model_Finder class. 80 | 2020-02-14/18:50:28 Entered the load_model method of the File_Operation class 81 | 2020-02-14/18:50:28 Model File RandomForest4 loaded. Exited the load_model method of the Model_Finder class 82 | 2020-02-14/18:50:28 Entered the find_correct_model_file method of the File_Operation class 83 | 2020-02-14/18:50:28 Exited the find_correct_model_file method of the Model_Finder class. 84 | 2020-02-14/18:50:28 Entered the load_model method of the File_Operation class 85 | 2020-02-14/18:50:28 Model File RandomForest2 loaded. Exited the load_model method of the Model_Finder class 86 | 2020-02-14/18:50:28 Entered the find_correct_model_file method of the File_Operation class 87 | 2020-02-14/18:50:28 Exited the find_correct_model_file method of the Model_Finder class. 88 | 2020-02-14/18:50:28 Entered the load_model method of the File_Operation class 89 | 2020-02-14/18:50:28 Model File RandomForest1 loaded. Exited the load_model method of the Model_Finder class 90 | 2020-02-14/18:50:28 Entered the find_correct_model_file method of the File_Operation class 91 | 2020-02-14/18:50:28 Exited the find_correct_model_file method of the Model_Finder class. 92 | 2020-02-14/18:50:28 Entered the load_model method of the File_Operation class 93 | 2020-02-14/18:50:28 Model File RandomForest3 loaded. Exited the load_model method of the Model_Finder class 94 | 2020-02-14/18:50:28 End of Prediction 95 | 2020-02-16/17:17:39 Start of Validation on files for prediction!! 96 | 2020-02-16/17:17:40 Raw Data Validation Complete!! 97 | 2020-02-16/17:17:40 Starting Data Transforamtion!! 98 | 2020-02-16/17:17:40 DataTransformation Completed!!! 99 | 2020-02-16/17:17:40 Creating Prediction_Database and tables on the basis of given schema!!! 100 | 2020-02-16/17:17:47 Table creation Completed!! 101 | 2020-02-16/17:17:47 Insertion of Data into Table started!!!! 102 | 2020-02-16/17:22:25 Insertion in Table completed!!! 103 | 2020-02-16/17:22:25 Deleting Good Data Folder!!! 104 | 2020-02-16/17:22:25 Good_Data folder deleted!!! 105 | 2020-02-16/17:22:25 Moving bad files to Archive and deleting Bad_Data folder!!! 106 | 2020-02-16/17:22:25 Bad files moved to archive!! Bad folder Deleted!! 107 | 2020-02-16/17:22:25 Validation Operation completed!! 108 | 2020-02-16/17:22:25 Extracting csv file from table 109 | 2020-02-16/17:23:14 Start of Prediction 110 | 2020-02-16/17:23:16 Entered the get_data method of the Data_Getter class 111 | 2020-02-16/17:23:16 Data Load Successful.Exited the get_data method of the Data_Getter class 112 | 2020-02-16/17:23:20 Entered the is_null_present method of the Preprocessor class 113 | 2020-02-16/17:23:20 Finding missing values is a success.Data written to the null values file. Exited the is_null_present method of the Preprocessor class 114 | 2020-02-16/17:23:22 Entered the load_model method of the File_Operation class 115 | 2020-02-16/17:23:22 Model File KMeans loaded. Exited the load_model method of the Model_Finder class 116 | 2020-02-16/17:23:31 Entered the find_correct_model_file method of the File_Operation class 117 | 2020-02-16/17:23:31 Exited the find_correct_model_file method of the Model_Finder class. 118 | 2020-02-16/17:23:32 Entered the load_model method of the File_Operation class 119 | 2020-02-16/17:23:32 Model File XGBoost1 loaded. Exited the load_model method of the Model_Finder class 120 | 2020-02-16/17:23:45 Entered the find_correct_model_file method of the File_Operation class 121 | 2020-02-16/17:23:45 Exited the find_correct_model_file method of the Model_Finder class. 122 | 2020-02-16/17:23:45 Entered the load_model method of the File_Operation class 123 | 2020-02-16/17:23:45 Model File XGBoost2 loaded. Exited the load_model method of the Model_Finder class 124 | 2020-02-16/17:23:45 Entered the find_correct_model_file method of the File_Operation class 125 | 2020-02-16/17:23:45 Exited the find_correct_model_file method of the Model_Finder class. 126 | 2020-02-16/17:23:45 Entered the load_model method of the File_Operation class 127 | 2020-02-16/17:23:45 Model File XGBoost0 loaded. Exited the load_model method of the Model_Finder class 128 | 2020-02-16/17:23:45 Entered the find_correct_model_file method of the File_Operation class 129 | 2020-02-16/17:23:45 Exited the find_correct_model_file method of the Model_Finder class. 130 | 2020-02-16/17:23:45 Entered the load_model method of the File_Operation class 131 | 2020-02-16/17:23:45 Model File XGBoost3 loaded. Exited the load_model method of the Model_Finder class 132 | 2020-02-16/17:23:48 End of Prediction 133 | 2020-02-16/17:40:30 Start of Validation on files for prediction!! 134 | 2020-02-16/17:40:31 Raw Data Validation Complete!! 135 | 2020-02-16/17:40:31 Starting Data Transforamtion!! 136 | 2020-02-16/17:40:31 DataTransformation Completed!!! 137 | 2020-02-16/17:40:31 Creating Prediction_Database and tables on the basis of given schema!!! 138 | 2020-02-16/17:40:33 Table creation Completed!! 139 | 2020-02-16/17:40:33 Insertion of Data into Table started!!!! 140 | 2020-02-16/17:44:56 Insertion in Table completed!!! 141 | 2020-02-16/17:44:56 Deleting Good Data Folder!!! 142 | 2020-02-16/17:44:56 Good_Data folder deleted!!! 143 | 2020-02-16/17:44:56 Moving bad files to Archive and deleting Bad_Data folder!!! 144 | 2020-02-16/17:44:56 Bad files moved to archive!! Bad folder Deleted!! 145 | 2020-02-16/17:44:56 Validation Operation completed!! 146 | 2020-02-16/17:44:56 Extracting csv file from table 147 | 2020-02-16/17:44:56 Start of Prediction 148 | 2020-02-16/17:44:56 Entered the get_data method of the Data_Getter class 149 | 2020-02-16/17:44:56 Data Load Successful.Exited the get_data method of the Data_Getter class 150 | 2020-02-16/17:44:57 Entered the is_null_present method of the Preprocessor class 151 | 2020-02-16/17:44:57 Finding missing values is a success.Data written to the null values file. Exited the is_null_present method of the Preprocessor class 152 | 2020-02-16/17:44:57 Entered the load_model method of the File_Operation class 153 | 2020-02-16/17:44:57 Model File KMeans loaded. Exited the load_model method of the Model_Finder class 154 | 2020-02-16/17:44:57 Entered the find_correct_model_file method of the File_Operation class 155 | 2020-02-16/17:44:57 Exited the find_correct_model_file method of the Model_Finder class. 156 | 2020-02-16/17:44:57 Entered the load_model method of the File_Operation class 157 | 2020-02-16/17:44:57 Model File XGBoost1 loaded. Exited the load_model method of the Model_Finder class 158 | 2020-02-16/17:44:58 Entered the find_correct_model_file method of the File_Operation class 159 | 2020-02-16/17:44:58 Exited the find_correct_model_file method of the Model_Finder class. 160 | 2020-02-16/17:44:58 Entered the load_model method of the File_Operation class 161 | 2020-02-16/17:44:58 Model File XGBoost2 loaded. Exited the load_model method of the Model_Finder class 162 | 2020-02-16/17:44:58 Entered the find_correct_model_file method of the File_Operation class 163 | 2020-02-16/17:44:58 Exited the find_correct_model_file method of the Model_Finder class. 164 | 2020-02-16/17:44:58 Entered the load_model method of the File_Operation class 165 | 2020-02-16/17:44:58 Model File XGBoost0 loaded. Exited the load_model method of the Model_Finder class 166 | 2020-02-16/17:44:58 Entered the find_correct_model_file method of the File_Operation class 167 | 2020-02-16/17:44:58 Exited the find_correct_model_file method of the Model_Finder class. 168 | 2020-02-16/17:44:58 Entered the load_model method of the File_Operation class 169 | 2020-02-16/17:44:58 Model File XGBoost3 loaded. Exited the load_model method of the Model_Finder class 170 | 2020-02-16/17:44:58 End of Prediction 171 | -------------------------------------------------------------------------------- /Prediction_Logs/columnValidationLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-14/15:01:03 Column Length Validation Started!! 2 | 2020-02-14/15:01:03 Column Length Validation Completed!! 3 | 2020-02-14/15:04:08 Column Length Validation Started!! 4 | 2020-02-14/15:04:08 Column Length Validation Completed!! 5 | 2020-02-14/15:15:52 Column Length Validation Started!! 6 | 2020-02-14/15:15:52 Column Length Validation Completed!! 7 | 2020-02-14/15:22:02 Column Length Validation Started!! 8 | 2020-02-14/15:22:02 Column Length Validation Completed!! 9 | 2020-02-14/15:27:39 Column Length Validation Started!! 10 | 2020-02-14/15:27:39 Column Length Validation Completed!! 11 | 2020-02-14/18:39:21 Column Length Validation Started!! 12 | 2020-02-14/18:39:21 Invalid Column Length for the file!! File moved to Bad Raw Folder :: mushroom_08012018_120088.csv 13 | 2020-02-14/18:39:21 Column Length Validation Completed!! 14 | 2020-02-16/17:08:16 Column Length Validation Started!! 15 | 2020-02-16/17:08:24 Column Length Validation Completed!! 16 | 2020-02-16/17:12:36 Column Length Validation Started!! 17 | 2020-02-16/17:12:40 Column Length Validation Completed!! 18 | 2020-02-16/17:14:57 Column Length Validation Started!! 19 | 2020-02-16/17:14:57 Column Length Validation Completed!! 20 | 2020-02-16/17:17:39 Column Length Validation Started!! 21 | 2020-02-16/17:17:39 Column Length Validation Completed!! 22 | 2020-02-16/17:40:30 Column Length Validation Started!! 23 | 2020-02-16/17:40:31 Column Length Validation Completed!! 24 | -------------------------------------------------------------------------------- /Prediction_Logs/dataTransformLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-14/15:04:08 mushroom_08012020_120000.csv: Quotes added successfully!! 2 | 2020-02-14/15:15:52 mushroom_08012020_120001.csv: Quotes added successfully!! 3 | 2020-02-14/15:22:02 mushroom_08012020_120001.csv: Quotes added successfully!! 4 | 2020-02-14/15:27:40 mushroom_08012020_120000.csv: Quotes added successfully!! 5 | 2020-02-14/18:39:22 mushroom_08012006_120010.csv: Quotes added successfully!! 6 | 2020-02-14/18:39:22 mushroom_08012007_120011.csv: Quotes added successfully!! 7 | 2020-02-14/18:39:22 mushroom_08012008_120012.csv: Quotes added successfully!! 8 | 2020-02-14/18:39:22 mushroom_08012009_120013.csv: Quotes added successfully!! 9 | 2020-02-14/18:39:22 mushroom_08012010_120014.csv: Quotes added successfully!! 10 | 2020-02-14/18:39:22 mushroom_08012011_120015.csv: Quotes added successfully!! 11 | 2020-02-14/18:39:22 mushroom_08012012_120016.csv: Quotes added successfully!! 12 | 2020-02-14/18:39:22 mushroom_08012013_120017.csv: Quotes added successfully!! 13 | 2020-02-14/18:39:22 mushroom_08012014_120018.csv: Quotes added successfully!! 14 | 2020-02-14/18:39:22 mushroom_08012015_120019.csv: Quotes added successfully!! 15 | 2020-02-14/18:39:22 mushroom_08012016_120020.csv: Quotes added successfully!! 16 | 2020-02-14/18:39:22 mushroom_08012017_120021.csv: Quotes added successfully!! 17 | 2020-02-14/18:39:22 mushroom_08012018_120022.csv: Quotes added successfully!! 18 | 2020-02-16/17:15:23 phising_08012020_120000.csv: Quotes added successfully!! 19 | 2020-02-16/17:17:40 phising_08012020_120000.csv: Quotes added successfully!! 20 | 2020-02-16/17:40:31 phising_08012020_120000.csv: Quotes added successfully!! 21 | -------------------------------------------------------------------------------- /Prediction_Logs/missingValuesInColumn.txt: -------------------------------------------------------------------------------- 1 | 2020-02-14/15:01:04 Missing Values Validation Started!! 2 | 2020-02-14/15:04:08 Missing Values Validation Started!! 3 | 2020-02-14/15:15:52 Missing Values Validation Started!! 4 | 2020-02-14/15:22:02 Missing Values Validation Started!! 5 | 2020-02-14/15:27:39 Missing Values Validation Started!! 6 | 2020-02-14/18:39:21 Missing Values Validation Started!! 7 | 2020-02-16/17:08:32 Missing Values Validation Started!! 8 | 2020-02-16/17:12:46 Missing Values Validation Started!! 9 | 2020-02-16/17:14:57 Missing Values Validation Started!! 10 | 2020-02-16/17:17:39 Missing Values Validation Started!! 11 | 2020-02-16/17:40:31 Missing Values Validation Started!! 12 | -------------------------------------------------------------------------------- /Prediction_Logs/nameValidationLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-14/15:01:02 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012020_120000.csv 2 | 2020-02-14/15:04:08 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012020_120000.csv 3 | 2020-02-14/15:15:52 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012020_120001.csv 4 | 2020-02-14/15:22:02 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012020_120001.csv 5 | 2020-02-14/15:27:39 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012020_120000.csv 6 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012006_120010.csv 7 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012007_120011.csv 8 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012008_120012.csv 9 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012009_120013.csv 10 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012010_120014.csv 11 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012011_120015.csv 12 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012012_120016.csv 13 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012013_120017.csv 14 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012014_120018.csv 15 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012015_120019.csv 16 | 2020-02-14/18:39:21 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012016_120.csv 17 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012016_120020.csv 18 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012017_120021.csv 19 | 2020-02-14/18:39:21 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012017_1201.csv 20 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012018_120022.csv 21 | 2020-02-14/18:39:21 Valid File name!! File moved to GoodRaw Folder :: mushroom_08012018_120088.csv 22 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012006_120010.csv 23 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012007_120011.csv 24 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012008_120012.csv 25 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012009_120013.csv 26 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012010_120014.csv 27 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012011_120015.csv 28 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012012_120016.csv 29 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012013_120017.csv 30 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012014_120018.csv 31 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012015_120019.csv 32 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012016_120.csv 33 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012016_120020.csv 34 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012017_120021.csv 35 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012017_1201.csv 36 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012018_120022.csv 37 | 2020-02-16/17:08:11 Invalid File Name!! File moved to Bad Raw Folder :: mushroom_08012018_120088.csv 38 | 2020-02-16/17:10:52 Valid File name!! File moved to GoodRaw Folder :: phising_08012020_120000.csv 39 | 2020-02-16/17:14:56 Valid File name!! File moved to GoodRaw Folder :: phising_08012020_120000.csv 40 | 2020-02-16/17:17:39 Invalid File Name!! File moved to Bad Raw Folder :: phising_080020_120.csv 41 | 2020-02-16/17:17:39 Invalid File Name!! File moved to Bad Raw Folder :: phising_08012020_120.csv 42 | 2020-02-16/17:17:39 Valid File name!! File moved to GoodRaw Folder :: phising_08012020_120000.csv 43 | 2020-02-16/17:40:30 Invalid File Name!! File moved to Bad Raw Folder :: phising_080020_120.csv 44 | 2020-02-16/17:40:30 Invalid File Name!! File moved to Bad Raw Folder :: phising_08012020_120.csv 45 | 2020-02-16/17:40:30 Valid File name!! File moved to GoodRaw Folder :: phising_08012020_120000.csv 46 | -------------------------------------------------------------------------------- /Prediction_Output_File/Predictions.csv: -------------------------------------------------------------------------------- 1 | ,Predictions 2 | 0,-1 3 | 1,1 4 | 2,-1 5 | 3,-1 6 | 4,1 7 | 5,1 8 | 6,1 9 | 7,1 10 | 8,1 11 | 9,1 12 | 10,-1 13 | 11,-1 14 | 12,1 15 | 13,-1 16 | 14,-1 17 | 15,1 18 | 16,1 19 | 17,1 20 | 18,-1 21 | 19,-1 22 | 20,1 23 | 21,-1 24 | 22,1 25 | 23,1 26 | 24,-1 27 | 25,1 28 | 26,-1 29 | 27,1 30 | 28,1 31 | 29,1 32 | 30,1 33 | 31,-1 34 | 32,-1 35 | 33,1 36 | 34,1 37 | 35,1 38 | 36,1 39 | 37,1 40 | 38,1 41 | 39,1 42 | 40,-1 43 | 41,1 44 | 42,1 45 | 43,1 46 | 44,-1 47 | 45,-1 48 | 46,1 49 | 47,1 50 | 48,-1 51 | 49,1 52 | 50,-1 53 | 51,-1 54 | 52,1 55 | 53,1 56 | 54,1 57 | 55,-1 58 | 56,-1 59 | 57,-1 60 | 58,1 61 | 59,1 62 | 60,1 63 | 61,1 64 | 62,-1 65 | 63,1 66 | 64,1 67 | 65,1 68 | 66,1 69 | 67,1 70 | 68,1 71 | 69,1 72 | 70,1 73 | 71,1 74 | 72,-1 75 | 73,-1 76 | 74,1 77 | 75,-1 78 | 76,1 79 | 77,1 80 | 78,-1 81 | 79,1 82 | 80,1 83 | 81,1 84 | 82,1 85 | 83,1 86 | 84,1 87 | 85,1 88 | 86,1 89 | 87,1 90 | 88,1 91 | 89,1 92 | 90,1 93 | 91,1 94 | 92,1 95 | 93,1 96 | 94,1 97 | 95,1 98 | 96,1 99 | 97,1 100 | 98,1 101 | 99,1 102 | 100,1 103 | 101,1 104 | 102,1 105 | 103,1 106 | 104,1 107 | 105,1 108 | 106,-1 109 | 107,-1 110 | 108,-1 111 | 109,1 112 | 110,-1 113 | 111,-1 114 | 112,1 115 | 113,-1 116 | 114,1 117 | 115,1 118 | 116,1 119 | 117,1 120 | 118,1 121 | 119,1 122 | 120,1 123 | 121,-1 124 | 122,1 125 | 123,-1 126 | 124,-1 127 | 125,-1 128 | 126,1 129 | 127,1 130 | 128,1 131 | 129,1 132 | 130,1 133 | 131,1 134 | 132,1 135 | 133,-1 136 | 134,-1 137 | 135,1 138 | 136,1 139 | 137,-1 140 | 138,-1 141 | 139,1 142 | 140,1 143 | 141,1 144 | 142,-1 145 | 143,-1 146 | 144,1 147 | 145,-1 148 | 146,1 149 | 147,1 150 | 148,-1 151 | 149,1 152 | 150,-1 153 | 151,1 154 | 152,1 155 | 153,1 156 | 154,1 157 | 155,-1 158 | 156,-1 159 | 157,1 160 | 158,1 161 | 159,1 162 | 160,1 163 | 161,1 164 | 162,1 165 | 163,1 166 | 164,-1 167 | 165,1 168 | 166,1 169 | 167,1 170 | 168,-1 171 | 169,-1 172 | 170,1 173 | 171,1 174 | 172,-1 175 | 173,1 176 | 174,-1 177 | 175,-1 178 | 176,1 179 | 177,1 180 | 178,1 181 | 179,-1 182 | 180,-1 183 | 181,-1 184 | 182,1 185 | 183,1 186 | 184,1 187 | 185,1 188 | 186,-1 189 | 187,1 190 | 188,1 191 | 189,1 192 | 190,1 193 | 191,1 194 | 192,1 195 | 193,1 196 | 194,1 197 | 195,1 198 | 196,-1 199 | 197,-1 200 | 198,1 201 | 199,-1 202 | 200,1 203 | 201,1 204 | 202,-1 205 | 203,1 206 | 204,1 207 | 205,1 208 | 206,1 209 | 207,1 210 | 208,1 211 | 209,1 212 | 210,1 213 | 211,1 214 | 212,1 215 | 213,1 216 | 214,1 217 | 215,1 218 | 216,1 219 | 217,1 220 | 218,1 221 | 219,1 222 | 220,1 223 | 221,1 224 | 222,1 225 | 223,1 226 | 224,1 227 | 225,1 228 | 226,1 229 | 227,1 230 | 228,1 231 | 229,1 232 | 230,-1 233 | 231,-1 234 | 232,-1 235 | 233,1 236 | 234,-1 237 | 235,-1 238 | 236,1 239 | 237,-1 240 | 238,1 241 | 239,1 242 | 240,-1 243 | 241,1 244 | 242,1 245 | 243,1 246 | 244,1 247 | 245,-1 248 | 246,-1 249 | 247,1 250 | 248,-1 251 | 249,1 252 | 250,1 253 | 251,1 254 | 252,1 255 | 253,1 256 | 254,1 257 | 255,1 258 | 256,1 259 | 257,1 260 | 258,-1 261 | 259,1 262 | 260,-1 263 | 261,1 264 | 262,1 265 | 263,1 266 | 264,-1 267 | 265,1 268 | 266,-1 269 | 267,-1 270 | 268,-1 271 | 269,1 272 | 270,-1 273 | 271,-1 274 | 272,1 275 | 273,-1 276 | 274,-1 277 | 275,1 278 | 276,-1 279 | 277,1 280 | 278,-1 281 | 279,-1 282 | 280,1 283 | 281,1 284 | 282,1 285 | 283,1 286 | 284,1 287 | 285,-1 288 | 286,1 289 | 287,1 290 | 288,1 291 | 289,1 292 | 290,1 293 | 291,1 294 | 292,1 295 | 293,1 296 | 294,-1 297 | 295,1 298 | 296,1 299 | 297,1 300 | 298,1 301 | 299,1 302 | 300,1 303 | 301,1 304 | 302,1 305 | 303,1 306 | 304,-1 307 | 305,1 308 | 306,1 309 | 307,1 310 | 308,1 311 | 309,-1 312 | 310,1 313 | 311,-1 314 | 312,1 315 | 313,1 316 | 314,1 317 | 315,1 318 | 316,-1 319 | 317,1 320 | 318,1 321 | 319,1 322 | 320,1 323 | 321,-1 324 | 322,-1 325 | 323,-1 326 | 324,1 327 | 325,1 328 | 326,-1 329 | 327,-1 330 | 328,-1 331 | 329,1 332 | 330,1 333 | 331,-1 334 | 332,-1 335 | 333,1 336 | 334,1 337 | 335,1 338 | 336,1 339 | 337,1 340 | 338,1 341 | 339,1 342 | 340,1 343 | 341,1 344 | 342,1 345 | 343,-1 346 | 344,1 347 | 345,1 348 | 346,-1 349 | 347,1 350 | 348,-1 351 | 349,1 352 | 350,1 353 | 351,1 354 | 352,1 355 | 353,1 356 | 354,1 357 | 355,-1 358 | 356,1 359 | 357,-1 360 | 358,1 361 | 359,1 362 | 360,1 363 | 361,1 364 | 362,1 365 | 363,1 366 | 364,-1 367 | 365,1 368 | 366,-1 369 | 367,1 370 | 368,-1 371 | 369,-1 372 | 370,1 373 | 371,-1 374 | 372,1 375 | 373,-1 376 | 374,1 377 | 375,1 378 | 376,-1 379 | 377,1 380 | 378,1 381 | 379,1 382 | 380,1 383 | 381,1 384 | 382,1 385 | 383,-1 386 | 384,1 387 | 385,-1 388 | 386,-1 389 | 387,-1 390 | 388,-1 391 | 389,-1 392 | 390,1 393 | 391,1 394 | 392,1 395 | 393,1 396 | 394,1 397 | 395,1 398 | 396,-1 399 | 397,1 400 | 398,1 401 | 399,1 402 | 400,1 403 | 401,1 404 | 402,1 405 | 403,1 406 | 404,1 407 | 405,1 408 | 406,1 409 | 407,1 410 | 408,-1 411 | 409,1 412 | 410,1 413 | 411,1 414 | 412,-1 415 | 413,-1 416 | 414,1 417 | 415,-1 418 | 416,-1 419 | 417,1 420 | 418,1 421 | 419,1 422 | 420,1 423 | 421,1 424 | 422,-1 425 | 423,1 426 | 424,1 427 | 425,1 428 | 426,-1 429 | 427,1 430 | 428,-1 431 | 429,1 432 | 430,1 433 | 431,1 434 | 432,1 435 | 433,-1 436 | 434,-1 437 | 435,-1 438 | 436,1 439 | 437,1 440 | 438,1 441 | 439,1 442 | 440,-1 443 | 441,-1 444 | 442,1 445 | 443,1 446 | 444,-1 447 | 445,1 448 | 446,1 449 | 447,1 450 | 448,1 451 | 449,1 452 | 450,1 453 | 451,1 454 | 452,1 455 | 453,-1 456 | 454,1 457 | 455,1 458 | 456,1 459 | 457,1 460 | 458,-1 461 | 459,1 462 | 460,1 463 | 461,1 464 | 462,1 465 | 463,1 466 | 464,-1 467 | 465,1 468 | 466,-1 469 | 467,1 470 | 468,1 471 | 469,1 472 | 470,1 473 | 471,1 474 | 472,1 475 | 473,1 476 | 474,-1 477 | 475,1 478 | 476,1 479 | 477,1 480 | 478,1 481 | 479,1 482 | 480,-1 483 | 481,1 484 | 482,1 485 | 483,1 486 | 484,1 487 | 485,1 488 | 486,1 489 | 487,1 490 | 488,1 491 | 489,1 492 | 490,1 493 | 491,1 494 | 492,1 495 | 493,1 496 | 494,1 497 | 495,1 498 | 496,1 499 | 497,1 500 | 498,1 501 | 499,1 502 | 500,1 503 | 501,1 504 | 502,-1 505 | 503,1 506 | 504,1 507 | 505,1 508 | 506,1 509 | 507,1 510 | 508,1 511 | 509,1 512 | 510,1 513 | 511,-1 514 | 512,1 515 | 513,1 516 | 514,1 517 | 515,1 518 | 516,1 519 | 517,1 520 | 518,1 521 | 519,1 522 | 520,1 523 | 521,1 524 | 522,1 525 | 523,1 526 | 524,-1 527 | 525,-1 528 | 526,1 529 | 527,1 530 | 528,1 531 | 529,-1 532 | 530,1 533 | 531,1 534 | 532,1 535 | 533,-1 536 | 534,1 537 | 535,1 538 | 536,-1 539 | 537,1 540 | 538,1 541 | 539,-1 542 | 540,-1 543 | 541,1 544 | 542,-1 545 | 543,1 546 | 544,-1 547 | 545,-1 548 | 546,1 549 | 547,-1 550 | 548,1 551 | 549,-1 552 | 550,-1 553 | 551,-1 554 | 552,1 555 | 553,-1 556 | 554,1 557 | 555,1 558 | 556,1 559 | 557,1 560 | 558,1 561 | 559,1 562 | 560,1 563 | 561,1 564 | 562,1 565 | 563,1 566 | 564,-1 567 | 565,1 568 | 566,1 569 | 567,-1 570 | 568,1 571 | 569,1 572 | 570,1 573 | 571,1 574 | 572,1 575 | 573,1 576 | 574,-1 577 | 575,1 578 | 576,1 579 | 577,1 580 | 578,-1 581 | 579,1 582 | 580,1 583 | 581,1 584 | 582,1 585 | 583,1 586 | 584,-1 587 | 585,1 588 | 586,1 589 | 587,1 590 | 588,-1 591 | 589,1 592 | 590,1 593 | 591,1 594 | 592,1 595 | 593,1 596 | 594,1 597 | 595,1 598 | 596,-1 599 | 597,-1 600 | 598,1 601 | 599,1 602 | 600,1 603 | 601,1 604 | 602,-1 605 | 603,1 606 | 604,1 607 | 605,1 608 | 606,1 609 | 607,1 610 | 608,-1 611 | 609,1 612 | 610,1 613 | 611,1 614 | 612,1 615 | 613,1 616 | 614,1 617 | 615,1 618 | 616,1 619 | 617,-1 620 | 618,1 621 | 619,1 622 | 620,-1 623 | 621,1 624 | 622,-1 625 | 623,-1 626 | 624,1 627 | 625,1 628 | 626,-1 629 | 627,-1 630 | 628,1 631 | 629,1 632 | 630,1 633 | 631,1 634 | 632,1 635 | 633,1 636 | 634,-1 637 | 635,-1 638 | 636,-1 639 | 637,1 640 | 638,1 641 | 639,1 642 | 640,1 643 | 641,1 644 | 642,1 645 | 643,-1 646 | 644,1 647 | 645,1 648 | 646,1 649 | 647,-1 650 | 648,1 651 | 649,1 652 | 650,-1 653 | 651,1 654 | 652,1 655 | 653,1 656 | 654,1 657 | 655,1 658 | 656,-1 659 | 657,1 660 | 658,1 661 | 659,1 662 | 660,1 663 | 661,1 664 | 662,-1 665 | 663,-1 666 | 664,-1 667 | 665,1 668 | 666,1 669 | 667,1 670 | 668,1 671 | 669,1 672 | 670,-1 673 | 671,1 674 | 672,1 675 | 673,1 676 | 674,1 677 | 675,-1 678 | 676,-1 679 | 677,1 680 | 678,-1 681 | 679,1 682 | 680,1 683 | 681,1 684 | 682,-1 685 | 683,-1 686 | 684,-1 687 | 685,1 688 | 686,1 689 | 687,1 690 | 688,-1 691 | 689,1 692 | 690,1 693 | 691,1 694 | 692,1 695 | 693,1 696 | 694,-1 697 | 695,1 698 | 696,1 699 | 697,1 700 | 698,1 701 | 699,-1 702 | 700,1 703 | 701,-1 704 | 702,1 705 | 703,-1 706 | 704,1 707 | 705,1 708 | 706,1 709 | 707,1 710 | 708,1 711 | 709,-1 712 | 710,1 713 | 711,1 714 | 712,1 715 | 713,-1 716 | 714,1 717 | 715,-1 718 | 716,-1 719 | 717,-1 720 | 718,1 721 | 719,1 722 | 720,-1 723 | 721,1 724 | 722,-1 725 | 723,1 726 | 724,1 727 | 725,1 728 | 726,1 729 | 727,1 730 | 728,-1 731 | 729,1 732 | 730,1 733 | 731,1 734 | 732,-1 735 | 733,-1 736 | 734,-1 737 | 735,1 738 | 736,1 739 | 737,-1 740 | 738,1 741 | 739,-1 742 | 740,1 743 | 741,1 744 | 742,1 745 | 743,1 746 | 744,-1 747 | 745,1 748 | 746,1 749 | 747,1 750 | 748,-1 751 | 749,-1 752 | 750,1 753 | 751,-1 754 | 752,1 755 | 753,-1 756 | 754,1 757 | 755,1 758 | 756,1 759 | 757,1 760 | 758,-1 761 | 759,1 762 | 760,1 763 | 761,-1 764 | 762,1 765 | 763,-1 766 | 764,1 767 | 765,1 768 | 766,-1 769 | 767,1 770 | 768,-1 771 | 769,-1 772 | 770,1 773 | 771,-1 774 | 772,-1 775 | 773,-1 776 | 774,-1 777 | 775,1 778 | 776,-1 779 | 777,-1 780 | 778,1 781 | 779,1 782 | 780,1 783 | 781,1 784 | 782,1 785 | 783,-1 786 | 784,1 787 | 785,-1 788 | 786,-1 789 | 787,-1 790 | 788,1 791 | 789,-1 792 | 790,1 793 | 791,-1 794 | 792,-1 795 | 793,1 796 | 794,-1 797 | 795,1 798 | 796,1 799 | 797,1 800 | 798,1 801 | 799,1 802 | 800,1 803 | 801,1 804 | 802,1 805 | 803,1 806 | 804,1 807 | 805,-1 808 | 806,-1 809 | 807,1 810 | 808,-1 811 | 809,1 812 | 810,1 813 | 811,1 814 | 812,-1 815 | 813,1 816 | 814,-1 817 | 815,-1 818 | 816,1 819 | 817,1 820 | 818,1 821 | 819,1 822 | 820,1 823 | 821,1 824 | 822,-1 825 | 823,-1 826 | 824,-1 827 | 825,-1 828 | 826,1 829 | 827,-1 830 | 828,-1 831 | 829,-1 832 | 830,1 833 | 831,1 834 | 832,1 835 | 833,-1 836 | 834,-1 837 | 835,1 838 | 836,1 839 | 837,1 840 | 838,-1 841 | 839,1 842 | 840,-1 843 | 841,1 844 | 842,1 845 | 843,-1 846 | 844,1 847 | 845,1 848 | 846,-1 849 | 847,1 850 | 848,-1 851 | 849,1 852 | 850,-1 853 | 851,1 854 | 852,1 855 | 853,1 856 | 854,1 857 | 855,1 858 | 856,1 859 | 857,-1 860 | 858,1 861 | 859,1 862 | 860,-1 863 | 861,-1 864 | 862,1 865 | 863,1 866 | 864,-1 867 | 865,1 868 | 866,-1 869 | 867,1 870 | 868,1 871 | 869,1 872 | 870,1 873 | 871,1 874 | 872,1 875 | 873,1 876 | 874,-1 877 | 875,1 878 | 876,1 879 | 877,-1 880 | 878,1 881 | 879,-1 882 | 880,1 883 | 881,1 884 | 882,1 885 | 883,1 886 | 884,1 887 | 885,1 888 | 886,-1 889 | 887,1 890 | 888,-1 891 | 889,1 892 | 890,1 893 | 891,1 894 | 892,1 895 | 893,1 896 | 894,-1 897 | 895,-1 898 | 896,1 899 | 897,1 900 | 898,-1 901 | 899,1 902 | 900,-1 903 | 901,1 904 | 902,-1 905 | 903,1 906 | 904,1 907 | 905,-1 908 | 906,1 909 | 907,1 910 | 908,1 911 | 909,1 912 | 910,1 913 | 911,1 914 | 912,-1 915 | 913,1 916 | 914,-1 917 | 915,-1 918 | 916,-1 919 | 917,-1 920 | 918,-1 921 | 919,1 922 | 920,1 923 | 921,1 924 | 922,1 925 | 923,1 926 | 924,1 927 | 925,-1 928 | 926,1 929 | 927,1 930 | 928,1 931 | 929,1 932 | 930,1 933 | 931,1 934 | 932,1 935 | 933,1 936 | 934,1 937 | 935,1 938 | 936,1 939 | 937,-1 940 | 938,1 941 | 939,1 942 | 940,1 943 | 941,1 944 | 942,-1 945 | 943,-1 946 | 944,1 947 | 945,-1 948 | 946,-1 949 | 947,1 950 | 948,1 951 | 949,1 952 | 950,1 953 | 951,1 954 | 952,1 955 | 953,1 956 | 954,1 957 | 955,-1 958 | 956,1 959 | 957,-1 960 | 958,1 961 | 959,1 962 | 960,1 963 | 961,1 964 | 962,1 965 | 963,-1 966 | 964,-1 967 | 965,1 968 | 966,1 969 | 967,1 970 | 968,1 971 | 969,-1 972 | 970,-1 973 | 971,1 974 | 972,1 975 | 973,-1 976 | 974,1 977 | 975,1 978 | 976,1 979 | 977,1 980 | 978,1 981 | 979,1 982 | 980,1 983 | 981,1 984 | 982,-1 985 | 983,1 986 | 984,1 987 | 985,1 988 | 986,1 989 | 987,-1 990 | 988,1 991 | 989,1 992 | 990,1 993 | 991,1 994 | 992,1 995 | 993,-1 996 | 994,1 997 | 995,-1 998 | 996,1 999 | 997,1 1000 | 998,1 1001 | 999,1 1002 | 1000,1 1003 | 1001,1 1004 | 1002,1 1005 | 1003,-1 1006 | 1004,1 1007 | 1005,1 1008 | 1006,1 1009 | 1007,1 1010 | 1008,1 1011 | 1009,-1 1012 | 1010,1 1013 | 1011,1 1014 | 1012,1 1015 | 1013,1 1016 | 1014,1 1017 | 1015,1 1018 | 1016,1 1019 | 1017,1 1020 | 1018,1 1021 | 1019,1 1022 | 1020,1 1023 | 1021,1 1024 | 1022,1 1025 | 1023,1 1026 | 1024,1 1027 | 1025,1 1028 | 1026,1 1029 | 1027,1 1030 | 1028,-1 1031 | 1029,1 1032 | 1030,1 1033 | 1031,-1 1034 | 1032,1 1035 | 1033,1 1036 | 1034,1 1037 | 1035,1 1038 | 1036,1 1039 | 1037,1 1040 | 1038,1 1041 | 1039,1 1042 | 1040,-1 1043 | 1041,1 1044 | 1042,1 1045 | 1043,1 1046 | 1044,1 1047 | 1045,1 1048 | 1046,1 1049 | 1047,1 1050 | 1048,1 1051 | 1049,1 1052 | 1050,1 1053 | 1051,1 1054 | 1052,1 1055 | 1053,-1 1056 | 1054,-1 1057 | 1055,1 1058 | 1056,1 1059 | 1057,1 1060 | 1058,-1 1061 | 1059,1 1062 | 1060,1 1063 | 1061,1 1064 | 1062,-1 1065 | 1063,1 1066 | 1064,1 1067 | 1065,-1 1068 | 1066,1 1069 | 1067,1 1070 | 1068,-1 1071 | 1069,-1 1072 | 1070,1 1073 | 1071,-1 1074 | 1072,1 1075 | 1073,-1 1076 | 1074,-1 1077 | 1075,1 1078 | 1076,-1 1079 | 1077,1 1080 | 1078,-1 1081 | 1079,-1 1082 | 1080,-1 1083 | 1081,1 1084 | 1082,-1 1085 | 1083,1 1086 | 1084,1 1087 | 1085,1 1088 | 1086,1 1089 | 1087,1 1090 | 1088,1 1091 | 1089,1 1092 | 1090,1 1093 | 1091,1 1094 | 1092,1 1095 | 1093,-1 1096 | 1094,1 1097 | 1095,1 1098 | 1096,-1 1099 | 1097,1 1100 | 1098,1 1101 | 1099,1 1102 | 1100,-1 1103 | 1101,1 1104 | 1102,1 1105 | 1103,-1 1106 | 1104,1 1107 | 1105,1 1108 | 1106,1 1109 | 1107,-1 1110 | 1108,1 1111 | 1109,1 1112 | 1110,1 1113 | 1111,1 1114 | 1112,1 1115 | 1113,-1 1116 | 1114,1 1117 | 1115,1 1118 | 1116,1 1119 | 1117,-1 1120 | 1118,1 1121 | 1119,1 1122 | 1120,1 1123 | 1121,1 1124 | 1122,1 1125 | 1123,1 1126 | 1124,1 1127 | 1125,-1 1128 | 1126,1 1129 | 1127,1 1130 | 1128,1 1131 | 1129,1 1132 | 1130,-1 1133 | 1131,1 1134 | 1132,1 1135 | 1133,1 1136 | 1134,1 1137 | 1135,1 1138 | 1136,-1 1139 | 1137,1 1140 | 1138,1 1141 | 1139,1 1142 | 1140,1 1143 | 1141,1 1144 | 1142,1 1145 | 1143,1 1146 | 1144,1 1147 | 1145,-1 1148 | 1146,1 1149 | 1147,1 1150 | 1148,-1 1151 | 1149,1 1152 | 1150,-1 1153 | 1151,-1 1154 | 1152,1 1155 | 1153,1 1156 | 1154,-1 1157 | 1155,-1 1158 | 1156,1 1159 | 1157,1 1160 | 1158,1 1161 | 1159,1 1162 | 1160,1 1163 | 1161,1 1164 | 1162,-1 1165 | 1163,-1 1166 | 1164,-1 1167 | 1165,1 1168 | 1166,1 1169 | 1167,1 1170 | 1168,1 1171 | 1169,1 1172 | 1170,1 1173 | 1171,-1 1174 | 1172,1 1175 | 1173,1 1176 | 1174,1 1177 | 1175,-1 1178 | 1176,1 1179 | 1177,1 1180 | 1178,-1 1181 | 1179,1 1182 | 1180,1 1183 | 1181,1 1184 | 1182,1 1185 | 1183,1 1186 | 1184,-1 1187 | 1185,1 1188 | 1186,1 1189 | 1187,1 1190 | 1188,-1 1191 | 1189,1 1192 | 1190,-1 1193 | 1191,-1 1194 | 1192,-1 1195 | 1193,1 1196 | 1194,1 1197 | 1195,1 1198 | 1196,1 1199 | 1197,1 1200 | 1198,-1 1201 | 1199,1 1202 | 1200,1 1203 | 1201,1 1204 | 1202,1 1205 | 1203,-1 1206 | 1204,-1 1207 | 1205,1 1208 | 1206,-1 1209 | 1207,1 1210 | 1208,1 1211 | 1209,1 1212 | 1210,-1 1213 | 1211,-1 1214 | 1212,-1 1215 | 1213,1 1216 | 1214,1 1217 | 1215,1 1218 | 1216,-1 1219 | 1217,1 1220 | 1218,1 1221 | 1219,1 1222 | 1220,1 1223 | 1221,1 1224 | 1222,-1 1225 | 1223,1 1226 | 1224,1 1227 | 1225,1 1228 | 1226,1 1229 | 1227,-1 1230 | 1228,1 1231 | 1229,-1 1232 | 1230,1 1233 | 1231,-1 1234 | 1232,1 1235 | 1233,1 1236 | 1234,1 1237 | 1235,1 1238 | 1236,1 1239 | 1237,-1 1240 | 1238,1 1241 | 1239,1 1242 | 1240,1 1243 | 1241,1 1244 | 1242,1 1245 | 1243,-1 1246 | 1244,-1 1247 | 1245,-1 1248 | 1246,1 1249 | 1247,1 1250 | 1248,-1 1251 | 1249,1 1252 | 1250,1 1253 | 1251,-1 1254 | 1252,1 1255 | 1253,1 1256 | 1254,1 1257 | 1255,1 1258 | 1256,1 1259 | 1257,-1 1260 | 1258,1 1261 | 1259,1 1262 | 1260,1 1263 | 1261,-1 1264 | 1262,-1 1265 | 1263,-1 1266 | 1264,1 1267 | 1265,1 1268 | 1266,-1 1269 | 1267,1 1270 | 1268,-1 1271 | 1269,1 1272 | 1270,1 1273 | 1271,1 1274 | 1272,1 1275 | 1273,-1 1276 | 1274,1 1277 | 1275,1 1278 | 1276,1 1279 | 1277,-1 1280 | 1278,-1 1281 | 1279,1 1282 | 1280,-1 1283 | 1281,1 1284 | 1282,1 1285 | 1283,1 1286 | 1284,1 1287 | 1285,1 1288 | 1286,1 1289 | 1287,-1 1290 | 1288,1 1291 | 1289,1 1292 | 1290,-1 1293 | 1291,1 1294 | 1292,-1 1295 | 1293,1 1296 | 1294,1 1297 | 1295,-1 1298 | 1296,1 1299 | 1297,1 1300 | 1298,1 1301 | 1299,1 1302 | 1300,-1 1303 | 1301,1 1304 | 1302,-1 1305 | 1303,-1 1306 | 1304,1 1307 | 1305,-1 1308 | 1306,-1 1309 | 1307,1 1310 | 1308,1 1311 | 1309,1 1312 | 1310,1 1313 | 1311,1 1314 | 1312,-1 1315 | 1313,1 1316 | 1314,-1 1317 | 1315,-1 1318 | 1316,1 1319 | 1317,-1 1320 | 1318,1 1321 | 1319,-1 1322 | 1320,-1 1323 | 1321,1 1324 | 1322,-1 1325 | 1323,1 1326 | 1324,1 1327 | 1325,1 1328 | 1326,1 1329 | 1327,1 1330 | 1328,1 1331 | 1329,1 1332 | 1330,1 1333 | 1331,1 1334 | 1332,1 1335 | 1333,-1 1336 | 1334,-1 1337 | 1335,1 1338 | 1336,-1 1339 | 1337,-1 1340 | 1338,1 1341 | 1339,1 1342 | 1340,1 1343 | 1341,-1 1344 | 1342,1 1345 | 1343,-1 1346 | 1344,-1 1347 | 1345,1 1348 | 1346,1 1349 | 1347,1 1350 | 1348,1 1351 | 1349,1 1352 | 1350,1 1353 | 1351,-1 1354 | 1352,-1 1355 | 1353,-1 1356 | 1354,-1 1357 | 1355,1 1358 | 1356,-1 1359 | 1357,-1 1360 | 1358,-1 1361 | 1359,1 1362 | 1360,1 1363 | 1361,1 1364 | 1362,-1 1365 | 1363,-1 1366 | 1364,1 1367 | 1365,1 1368 | 1366,1 1369 | 1367,1 1370 | 1368,-1 1371 | 1369,1 1372 | 1370,-1 1373 | 1371,1 1374 | 1372,1 1375 | 1373,-1 1376 | 1374,1 1377 | 1375,1 1378 | 1376,-1 1379 | 1377,1 1380 | 1378,-1 1381 | 1379,1 1382 | 1380,-1 1383 | 1381,1 1384 | 1382,1 1385 | 1383,1 1386 | 1384,1 1387 | 1385,1 1388 | 1386,1 1389 | 1387,-1 1390 | 1388,1 1391 | 1389,1 1392 | 1390,1 1393 | 1391,1 1394 | 1392,1 1395 | 1393,1 1396 | 1394,1 1397 | 1395,1 1398 | 1396,1 1399 | 1397,1 1400 | 1398,-1 1401 | 1399,1 1402 | 1400,-1 1403 | 1401,1 1404 | 1402,1 1405 | 1403,1 1406 | 1404,1 1407 | 1405,1 1408 | 1406,1 1409 | 1407,1 1410 | 1408,1 1411 | 1409,1 1412 | 1410,-1 1413 | 1411,1 1414 | 1412,1 1415 | 1413,1 1416 | 1414,-1 1417 | 1415,-1 1418 | 1416,1 1419 | 1417,1 1420 | 1418,1 1421 | 1419,1 1422 | 1420,1 1423 | 1421,1 1424 | 1422,1 1425 | 1423,-1 1426 | 1424,1 1427 | 1425,1 1428 | 1426,1 1429 | 1427,1 1430 | 1428,1 1431 | 1429,-1 1432 | 1430,1 1433 | 1431,1 1434 | 1432,1 1435 | 1433,1 1436 | 1434,1 1437 | 1435,1 1438 | 1436,1 1439 | 1437,1 1440 | 1438,1 1441 | 1439,1 1442 | 1440,1 1443 | 1441,1 1444 | 1442,1 1445 | 1443,1 1446 | 1444,1 1447 | 1445,1 1448 | 1446,1 1449 | 1447,-1 1450 | 1448,1 1451 | 1449,-1 1452 | 1450,1 1453 | 1451,1 1454 | 1452,1 1455 | 1453,1 1456 | 1454,1 1457 | 1455,1 1458 | 1456,1 1459 | 1457,1 1460 | 1458,1 1461 | 1459,1 1462 | 1460,1 1463 | 1461,1 1464 | 1462,1 1465 | 1463,-1 1466 | 1464,1 1467 | 1465,-1 1468 | 1466,1 1469 | 1467,1 1470 | 1468,1 1471 | 1469,1 1472 | 1470,1 1473 | 1471,1 1474 | 1472,-1 1475 | 1473,1 1476 | 1474,-1 1477 | 1475,1 1478 | 1476,1 1479 | 1477,1 1480 | 1478,1 1481 | 1479,1 1482 | 1480,1 1483 | 1481,1 1484 | 1482,1 1485 | 1483,1 1486 | 1484,1 1487 | 1485,1 1488 | 1486,-1 1489 | 1487,1 1490 | 1488,1 1491 | 1489,-1 1492 | 1490,1 1493 | 1491,1 1494 | 1492,1 1495 | 1493,1 1496 | 1494,1 1497 | 1495,1 1498 | 1496,1 1499 | 1497,1 1500 | 1498,1 1501 | 1499,1 1502 | 1500,1 1503 | 1501,1 1504 | 1502,-1 1505 | 1503,-1 1506 | 1504,1 1507 | 1505,1 1508 | 1506,1 1509 | 1507,1 1510 | 1508,1 1511 | 1509,1 1512 | 1510,1 1513 | 1511,-1 1514 | 1512,1 1515 | 1513,1 1516 | 1514,1 1517 | 1515,-1 1518 | 1516,1 1519 | 1517,1 1520 | 1518,1 1521 | 1519,1 1522 | 1520,1 1523 | 1521,1 1524 | 1522,1 1525 | 1523,1 1526 | 1524,1 1527 | 1525,-1 1528 | 1526,1 1529 | 1527,-1 1530 | 1528,-1 1531 | 1529,1 1532 | 1530,1 1533 | 1531,1 1534 | 1532,1 1535 | 1533,1 1536 | 1534,1 1537 | 1535,1 1538 | 1536,-1 1539 | 1537,1 1540 | 1538,1 1541 | 1539,1 1542 | 1540,1 1543 | 1541,-1 1544 | 1542,-1 1545 | 1543,1 1546 | 1544,1 1547 | 1545,1 1548 | 1546,1 1549 | 1547,-1 1550 | 1548,1 1551 | 1549,1 1552 | 1550,1 1553 | 1551,1 1554 | 1552,1 1555 | 1553,1 1556 | 1554,-1 1557 | 1555,1 1558 | 1556,-1 1559 | 1557,-1 1560 | 1558,1 1561 | 1559,1 1562 | 1560,1 1563 | 1561,-1 1564 | 1562,1 1565 | 1563,1 1566 | 1564,1 1567 | 1565,1 1568 | 1566,1 1569 | 1567,1 1570 | 1568,-1 1571 | 1569,-1 1572 | 1570,1 1573 | 1571,1 1574 | 1572,1 1575 | 1573,-1 1576 | 1574,1 1577 | 1575,1 1578 | 1576,1 1579 | 1577,-1 1580 | 1578,-1 1581 | 1579,1 1582 | 1580,1 1583 | 1581,1 1584 | 1582,1 1585 | 1583,1 1586 | 1584,1 1587 | 1585,1 1588 | 1586,-1 1589 | 1587,1 1590 | 1588,1 1591 | 1589,-1 1592 | 1590,1 1593 | 1591,1 1594 | 1592,-1 1595 | 1593,1 1596 | 1594,1 1597 | 1595,1 1598 | 1596,-1 1599 | 1597,-1 1600 | 1598,-1 1601 | 1599,-1 1602 | 1600,1 1603 | 1601,-1 1604 | 1602,1 1605 | 1603,1 1606 | 1604,1 1607 | 1605,1 1608 | 1606,1 1609 | 1607,1 1610 | 1608,1 1611 | 1609,-1 1612 | 1610,1 1613 | 1611,1 1614 | 1612,-1 1615 | 1613,1 1616 | 1614,1 1617 | 1615,1 1618 | 1616,-1 1619 | 1617,1 1620 | 1618,1 1621 | 1619,1 1622 | 1620,1 1623 | 1621,-1 1624 | 1622,1 1625 | 1623,-1 1626 | 1624,1 1627 | 1625,-1 1628 | 1626,-1 1629 | 1627,-1 1630 | 1628,1 1631 | 1629,1 1632 | 1630,1 1633 | 1631,-1 1634 | 1632,1 1635 | 1633,-1 1636 | 1634,-1 1637 | 1635,-1 1638 | 1636,1 1639 | 1637,-1 1640 | 1638,-1 1641 | 1639,1 1642 | 1640,1 1643 | 1641,-1 1644 | 1642,-1 1645 | 1643,1 1646 | 1644,1 1647 | 1645,1 1648 | 1646,-1 1649 | 1647,1 1650 | 1648,-1 1651 | 1649,-1 1652 | 1650,-1 1653 | 1651,1 1654 | 1652,1 1655 | 1653,1 1656 | 1654,-1 1657 | 1655,-1 1658 | 1656,1 1659 | 1657,1 1660 | 1658,1 1661 | 1659,1 1662 | 1660,1 1663 | 1661,1 1664 | 1662,-1 1665 | 1663,-1 1666 | 1664,-1 1667 | 1665,-1 1668 | 1666,1 1669 | 1667,-1 1670 | 1668,1 1671 | 1669,1 1672 | 1670,-1 1673 | 1671,1 1674 | 1672,-1 1675 | 1673,1 1676 | 1674,1 1677 | 1675,1 1678 | 1676,-1 1679 | 1677,-1 1680 | 1678,1 1681 | 1679,1 1682 | 1680,-1 1683 | 1681,1 1684 | 1682,-1 1685 | 1683,1 1686 | 1684,-1 1687 | 1685,1 1688 | 1686,1 1689 | 1687,1 1690 | 1688,-1 1691 | 1689,1 1692 | 1690,-1 1693 | 1691,1 1694 | 1692,-1 1695 | 1693,1 1696 | 1694,1 1697 | 1695,1 1698 | 1696,-1 1699 | 1697,-1 1700 | 1698,-1 1701 | 1699,1 1702 | 1700,1 1703 | 1701,1 1704 | 1702,1 1705 | 1703,-1 1706 | 1704,1 1707 | 1705,1 1708 | 1706,-1 1709 | 1707,1 1710 | 1708,1 1711 | 1709,1 1712 | 1710,-1 1713 | 1711,1 1714 | 1712,-1 1715 | 1713,1 1716 | 1714,1 1717 | 1715,-1 1718 | 1716,1 1719 | 1717,1 1720 | 1718,1 1721 | 1719,-1 1722 | 1720,1 1723 | 1721,1 1724 | 1722,1 1725 | 1723,-1 1726 | 1724,1 1727 | 1725,1 1728 | 1726,1 1729 | 1727,1 1730 | 1728,1 1731 | 1729,1 1732 | 1730,1 1733 | 1731,1 1734 | 1732,-1 1735 | 1733,1 1736 | 1734,-1 1737 | 1735,1 1738 | 1736,1 1739 | 1737,1 1740 | 1738,1 1741 | 1739,1 1742 | 1740,1 1743 | 1741,1 1744 | 1742,-1 1745 | 1743,-1 1746 | 1744,-1 1747 | 1745,1 1748 | 1746,-1 1749 | 1747,-1 1750 | 1748,-1 1751 | 1749,1 1752 | 1750,1 1753 | 1751,-1 1754 | 1752,1 1755 | 1753,-1 1756 | 1754,-1 1757 | 1755,1 1758 | 1756,-1 1759 | 1757,-1 1760 | 1758,-1 1761 | 1759,-1 1762 | 1760,1 1763 | 1761,-1 1764 | 1762,-1 1765 | 1763,-1 1766 | 1764,-1 1767 | 1765,-1 1768 | 1766,-1 1769 | 1767,-1 1770 | 1768,1 1771 | 1769,-1 1772 | 1770,-1 1773 | 1771,-1 1774 | 1772,-1 1775 | 1773,1 1776 | 1774,-1 1777 | 1775,1 1778 | 1776,1 1779 | 1777,-1 1780 | 1778,1 1781 | 1779,-1 1782 | 1780,-1 1783 | 1781,1 1784 | 1782,-1 1785 | 1783,-1 1786 | 1784,-1 1787 | 1785,-1 1788 | 1786,-1 1789 | 1787,-1 1790 | 1788,1 1791 | 1789,-1 1792 | 1790,-1 1793 | 1791,-1 1794 | 1792,1 1795 | 1793,-1 1796 | 1794,-1 1797 | 1795,-1 1798 | 1796,1 1799 | 1797,1 1800 | 1798,1 1801 | 1799,-1 1802 | 1800,-1 1803 | 1801,-1 1804 | 1802,1 1805 | 1803,-1 1806 | 1804,1 1807 | 1805,-1 1808 | 1806,1 1809 | 1807,1 1810 | 1808,1 1811 | 1809,1 1812 | 1810,-1 1813 | 1811,-1 1814 | 1812,-1 1815 | 1813,-1 1816 | 1814,-1 1817 | 1815,-1 1818 | 1816,1 1819 | 1817,1 1820 | 1818,1 1821 | 1819,-1 1822 | 1820,-1 1823 | 1821,1 1824 | 1822,-1 1825 | 1823,-1 1826 | 1824,1 1827 | 1825,-1 1828 | 1826,1 1829 | 1827,1 1830 | 1828,-1 1831 | 1829,-1 1832 | 1830,-1 1833 | 1831,1 1834 | 1832,1 1835 | 1833,1 1836 | 1834,-1 1837 | 1835,1 1838 | 1836,-1 1839 | 1837,-1 1840 | 1838,-1 1841 | 1839,-1 1842 | 1840,-1 1843 | 1841,-1 1844 | 1842,-1 1845 | 1843,1 1846 | 1844,1 1847 | 1845,-1 1848 | 1846,1 1849 | 1847,-1 1850 | 1848,-1 1851 | 1849,1 1852 | 1850,1 1853 | 1851,-1 1854 | 1852,-1 1855 | 1853,-1 1856 | 1854,-1 1857 | 1855,-1 1858 | 1856,1 1859 | 1857,1 1860 | 1858,1 1861 | 1859,-1 1862 | 1860,-1 1863 | 1861,-1 1864 | 1862,1 1865 | 1863,-1 1866 | 1864,-1 1867 | 1865,1 1868 | 1866,-1 1869 | 1867,-1 1870 | 1868,-1 1871 | 1869,-1 1872 | 1870,-1 1873 | 1871,-1 1874 | 1872,1 1875 | 1873,-1 1876 | 1874,-1 1877 | 1875,-1 1878 | 1876,-1 1879 | 1877,-1 1880 | 1878,-1 1881 | 1879,1 1882 | 1880,1 1883 | 1881,1 1884 | 1882,1 1885 | 1883,1 1886 | 1884,-1 1887 | 1885,-1 1888 | 1886,1 1889 | 1887,-1 1890 | 1888,1 1891 | 1889,-1 1892 | 1890,-1 1893 | 1891,1 1894 | 1892,-1 1895 | 1893,1 1896 | 1894,-1 1897 | 1895,1 1898 | 1896,-1 1899 | 1897,-1 1900 | 1898,-1 1901 | 1899,1 1902 | 1900,-1 1903 | 1901,1 1904 | 1902,-1 1905 | 1903,-1 1906 | 1904,1 1907 | 1905,-1 1908 | 1906,1 1909 | 1907,-1 1910 | 1908,1 1911 | 1909,-1 1912 | 1910,1 1913 | 1911,-1 1914 | 1912,-1 1915 | 1913,-1 1916 | 1914,-1 1917 | 1915,1 1918 | 1916,1 1919 | 1917,-1 1920 | 1918,-1 1921 | 1919,-1 1922 | 1920,1 1923 | 1921,1 1924 | 1922,-1 1925 | 1923,-1 1926 | 1924,-1 1927 | 1925,1 1928 | 1926,-1 1929 | 1927,1 1930 | 1928,-1 1931 | 1929,-1 1932 | 1930,-1 1933 | 1931,-1 1934 | 1932,1 1935 | 1933,-1 1936 | 1934,1 1937 | 1935,-1 1938 | 1936,-1 1939 | 1937,-1 1940 | 1938,-1 1941 | 1939,-1 1942 | 1940,-1 1943 | 1941,1 1944 | 1942,-1 1945 | 1943,1 1946 | 1944,-1 1947 | 1945,-1 1948 | 1946,1 1949 | 1947,-1 1950 | 1948,1 1951 | 1949,1 1952 | 1950,-1 1953 | 1951,1 1954 | 1952,1 1955 | 1953,-1 1956 | 1954,-1 1957 | 1955,-1 1958 | 1956,-1 1959 | 1957,-1 1960 | 1958,-1 1961 | 1959,1 1962 | 1960,-1 1963 | 1961,-1 1964 | 1962,-1 1965 | 1963,-1 1966 | 1964,1 1967 | 1965,-1 1968 | 1966,-1 1969 | 1967,1 1970 | 1968,-1 1971 | 1969,1 1972 | 1970,1 1973 | 1971,-1 1974 | 1972,1 1975 | 1973,-1 1976 | 1974,-1 1977 | 1975,1 1978 | 1976,-1 1979 | 1977,-1 1980 | 1978,-1 1981 | 1979,-1 1982 | 1980,-1 1983 | 1981,-1 1984 | 1982,-1 1985 | 1983,-1 1986 | 1984,-1 1987 | 1985,-1 1988 | 1986,-1 1989 | 1987,-1 1990 | 1988,1 1991 | 1989,-1 1992 | 1990,-1 1993 | 1991,-1 1994 | 1992,-1 1995 | 1993,-1 1996 | 1994,1 1997 | 1995,-1 1998 | 1996,-1 1999 | 1997,1 2000 | 1998,-1 2001 | 1999,-1 2002 | 2000,-1 2003 | 2001,1 2004 | 2002,-1 2005 | 2003,-1 2006 | 2004,-1 2007 | 2005,1 2008 | 2006,-1 2009 | 2007,1 2010 | 2008,-1 2011 | 2009,-1 2012 | 2010,1 2013 | 2011,-1 2014 | 2012,-1 2015 | 2013,1 2016 | 2014,-1 2017 | 2015,1 2018 | 2016,1 2019 | 2017,1 2020 | 2018,-1 2021 | 2019,-1 2022 | 2020,1 2023 | 2021,1 2024 | 2022,-1 2025 | 2023,-1 2026 | 2024,-1 2027 | 2025,-1 2028 | 2026,-1 2029 | 2027,-1 2030 | 2028,-1 2031 | 2029,-1 2032 | 2030,-1 2033 | 2031,-1 2034 | 2032,-1 2035 | 2033,-1 2036 | 2034,1 2037 | 2035,-1 2038 | 2036,-1 2039 | 2037,-1 2040 | 2038,-1 2041 | 2039,-1 2042 | 2040,1 2043 | 2041,-1 2044 | 2042,-1 2045 | 2043,-1 2046 | 2044,1 2047 | 2045,-1 2048 | 2046,-1 2049 | 2047,1 2050 | 2048,1 2051 | 2049,-1 2052 | 2050,1 2053 | 2051,1 2054 | 2052,-1 2055 | 2053,1 2056 | 2054,1 2057 | 2055,-1 2058 | 2056,-1 2059 | 2057,-1 2060 | 2058,-1 2061 | 2059,-1 2062 | 2060,-1 2063 | 2061,-1 2064 | 2062,-1 2065 | 2063,1 2066 | 2064,-1 2067 | 2065,-1 2068 | 2066,1 2069 | 2067,-1 2070 | 2068,-1 2071 | 2069,-1 2072 | 2070,-1 2073 | 2071,1 2074 | 2072,1 2075 | 2073,-1 2076 | 2074,1 2077 | 2075,-1 2078 | 2076,-1 2079 | 2077,-1 2080 | 2078,1 2081 | 2079,-1 2082 | 2080,-1 2083 | 2081,1 2084 | 2082,1 2085 | 2083,-1 2086 | 2084,-1 2087 | 2085,-1 2088 | 2086,1 2089 | 2087,-1 2090 | 2088,-1 2091 | 2089,-1 2092 | 2090,-1 2093 | 2091,1 2094 | 2092,-1 2095 | 2093,-1 2096 | 2094,-1 2097 | 2095,-1 2098 | 2096,-1 2099 | 2097,-1 2100 | 2098,1 2101 | 2099,1 2102 | 2100,-1 2103 | 2101,-1 2104 | 2102,-1 2105 | 2103,-1 2106 | 2104,-1 2107 | 2105,-1 2108 | 2106,-1 2109 | 2107,1 2110 | 2108,-1 2111 | 2109,1 2112 | 2110,1 2113 | 2111,-1 2114 | 2112,-1 2115 | 2113,1 2116 | 2114,1 2117 | 2115,-1 2118 | 2116,1 2119 | 2117,-1 2120 | 2118,1 2121 | 2119,1 2122 | 2120,-1 2123 | 2121,-1 2124 | 2122,1 2125 | 2123,-1 2126 | 2124,-1 2127 | 2125,1 2128 | 2126,-1 2129 | 2127,1 2130 | 2128,-1 2131 | 2129,-1 2132 | 2130,-1 2133 | 2131,-1 2134 | 2132,1 2135 | 2133,1 2136 | 2134,-1 2137 | 2135,-1 2138 | 2136,-1 2139 | 2137,-1 2140 | 2138,-1 2141 | 2139,1 2142 | 2140,-1 2143 | 2141,-1 2144 | 2142,-1 2145 | 2143,1 2146 | 2144,1 2147 | 2145,-1 2148 | 2146,1 2149 | 2147,-1 2150 | 2148,-1 2151 | 2149,1 2152 | 2150,-1 2153 | 2151,-1 2154 | 2152,-1 2155 | 2153,-1 2156 | 2154,1 2157 | 2155,-1 2158 | 2156,-1 2159 | 2157,-1 2160 | 2158,-1 2161 | 2159,-1 2162 | 2160,-1 2163 | 2161,-1 2164 | 2162,1 2165 | 2163,-1 2166 | 2164,-1 2167 | 2165,-1 2168 | 2166,-1 2169 | 2167,-1 2170 | 2168,-1 2171 | 2169,1 2172 | 2170,1 2173 | 2171,-1 2174 | 2172,1 2175 | 2173,-1 2176 | 2174,-1 2177 | 2175,1 2178 | 2176,-1 2179 | 2177,-1 2180 | 2178,-1 2181 | 2179,-1 2182 | 2180,-1 2183 | 2181,-1 2184 | 2182,1 2185 | 2183,-1 2186 | 2184,-1 2187 | 2185,-1 2188 | 2186,1 2189 | 2187,-1 2190 | 2188,-1 2191 | 2189,-1 2192 | 2190,1 2193 | 2191,1 2194 | 2192,-1 2195 | 2193,-1 2196 | 2194,-1 2197 | 2195,1 2198 | 2196,-1 2199 | 2197,1 2200 | 2198,-1 2201 | 2199,1 2202 | 2200,1 2203 | 2201,1 2204 | 2202,-1 2205 | 2203,1 2206 | 2204,-1 2207 | 2205,-1 2208 | 2206,-1 2209 | 2207,-1 2210 | 2208,-1 2211 | 2209,-1 2212 | 2210,1 2213 | 2211,1 2214 | 2212,1 2215 | 2213,-1 2216 | 2214,-1 2217 | 2215,1 2218 | 2216,-1 2219 | 2217,-1 2220 | 2218,-1 2221 | 2219,-1 2222 | 2220,1 2223 | 2221,1 2224 | 2222,-1 2225 | 2223,-1 2226 | 2224,-1 2227 | 2225,1 2228 | 2226,1 2229 | 2227,1 2230 | 2228,-1 2231 | 2229,1 2232 | 2230,-1 2233 | 2231,-1 2234 | 2232,-1 2235 | 2233,-1 2236 | 2234,-1 2237 | 2235,-1 2238 | 2236,-1 2239 | 2237,1 2240 | 2238,1 2241 | 2239,-1 2242 | 2240,1 2243 | 2241,-1 2244 | 2242,-1 2245 | 2243,1 2246 | 2244,1 2247 | 2245,-1 2248 | 2246,-1 2249 | 2247,-1 2250 | 2248,-1 2251 | 2249,-1 2252 | 2250,1 2253 | 2251,1 2254 | 2252,1 2255 | 2253,-1 2256 | 2254,-1 2257 | 2255,-1 2258 | 2256,1 2259 | 2257,-1 2260 | 2258,-1 2261 | 2259,1 2262 | 2260,-1 2263 | 2261,-1 2264 | 2262,-1 2265 | 2263,-1 2266 | 2264,-1 2267 | 2265,-1 2268 | 2266,1 2269 | 2267,-1 2270 | 2268,-1 2271 | 2269,-1 2272 | 2270,-1 2273 | 2271,-1 2274 | 2272,-1 2275 | 2273,1 2276 | 2274,1 2277 | 2275,1 2278 | 2276,1 2279 | 2277,1 2280 | 2278,-1 2281 | 2279,-1 2282 | 2280,1 2283 | 2281,-1 2284 | 2282,1 2285 | 2283,-1 2286 | 2284,-1 2287 | 2285,1 2288 | 2286,-1 2289 | 2287,1 2290 | 2288,-1 2291 | 2289,1 2292 | 2290,-1 2293 | 2291,-1 2294 | 2292,-1 2295 | 2293,1 2296 | 2294,-1 2297 | 2295,1 2298 | 2296,-1 2299 | 2297,-1 2300 | 2298,1 2301 | 2299,-1 2302 | 2300,1 2303 | 2301,-1 2304 | 2302,1 2305 | 2303,-1 2306 | 2304,1 2307 | 2305,-1 2308 | 2306,-1 2309 | 2307,-1 2310 | 2308,-1 2311 | 2309,1 2312 | 2310,1 2313 | 2311,-1 2314 | 2312,-1 2315 | 2313,-1 2316 | 2314,1 2317 | 2315,1 2318 | 2316,-1 2319 | 2317,-1 2320 | 2318,-1 2321 | 2319,-1 2322 | 2320,1 2323 | 2321,-1 2324 | 2322,1 2325 | 2323,-1 2326 | 2324,-1 2327 | 2325,-1 2328 | 2326,-1 2329 | 2327,1 2330 | 2328,-1 2331 | 2329,1 2332 | 2330,-1 2333 | 2331,-1 2334 | 2332,-1 2335 | 2333,-1 2336 | 2334,-1 2337 | 2335,-1 2338 | 2336,1 2339 | 2337,-1 2340 | 2338,1 2341 | 2339,-1 2342 | 2340,-1 2343 | 2341,1 2344 | 2342,-1 2345 | 2343,1 2346 | 2344,1 2347 | 2345,-1 2348 | 2346,1 2349 | 2347,1 2350 | 2348,-1 2351 | 2349,1 2352 | 2350,-1 2353 | 2351,-1 2354 | 2352,-1 2355 | 2353,-1 2356 | 2354,1 2357 | 2355,-1 2358 | 2356,1 2359 | 2357,-1 2360 | 2358,-1 2361 | 2359,1 2362 | 2360,-1 2363 | 2361,-1 2364 | 2362,1 2365 | 2363,-1 2366 | 2364,1 2367 | 2365,1 2368 | 2366,-1 2369 | 2367,1 2370 | 2368,-1 2371 | 2369,-1 2372 | 2370,1 2373 | 2371,-1 2374 | 2372,-1 2375 | 2373,-1 2376 | 2374,-1 2377 | 2375,-1 2378 | 2376,-1 2379 | 2377,-1 2380 | 2378,-1 2381 | 2379,-1 2382 | 2380,-1 2383 | 2381,-1 2384 | 2382,-1 2385 | 2383,1 2386 | 2384,-1 2387 | 2385,-1 2388 | 2386,-1 2389 | 2387,-1 2390 | 2388,-1 2391 | 2389,1 2392 | 2390,-1 2393 | 2391,-1 2394 | 2392,1 2395 | 2393,-1 2396 | 2394,-1 2397 | 2395,-1 2398 | 2396,1 2399 | 2397,-1 2400 | 2398,-1 2401 | 2399,-1 2402 | 2400,1 2403 | 2401,-1 2404 | 2402,1 2405 | 2403,-1 2406 | 2404,-1 2407 | 2405,1 2408 | 2406,-1 2409 | 2407,-1 2410 | 2408,1 2411 | 2409,-1 2412 | 2410,1 2413 | 2411,1 2414 | 2412,1 2415 | 2413,-1 2416 | 2414,-1 2417 | 2415,1 2418 | 2416,-1 2419 | 2417,-1 2420 | 2418,-1 2421 | 2419,-1 2422 | 2420,-1 2423 | 2421,-1 2424 | 2422,-1 2425 | 2423,-1 2426 | 2424,-1 2427 | 2425,-1 2428 | 2426,-1 2429 | 2427,-1 2430 | 2428,1 2431 | 2429,-1 2432 | 2430,-1 2433 | 2431,-1 2434 | 2432,-1 2435 | 2433,-1 2436 | 2434,1 2437 | 2435,1 2438 | 2436,-1 2439 | 2437,-1 2440 | 2438,1 2441 | 2439,-1 2442 | 2440,-1 2443 | 2441,1 2444 | 2442,1 2445 | 2443,-1 2446 | 2444,1 2447 | 2445,1 2448 | 2446,-1 2449 | 2447,1 2450 | 2448,1 2451 | 2449,-1 2452 | 2450,-1 2453 | 2451,-1 2454 | 2452,-1 2455 | 2453,-1 2456 | 2454,-1 2457 | 2455,-1 2458 | 2456,-1 2459 | 2457,1 2460 | 2458,-1 2461 | 2459,-1 2462 | 2460,1 2463 | 2461,-1 2464 | 2462,-1 2465 | 2463,-1 2466 | 2464,-1 2467 | 2465,1 2468 | 2466,1 2469 | 2467,-1 2470 | 2468,-1 2471 | 2469,1 2472 | 2470,-1 2473 | 2471,-1 2474 | 2472,-1 2475 | 2473,1 2476 | 2474,-1 2477 | 2475,-1 2478 | 2476,1 2479 | 2477,1 2480 | 2478,-1 2481 | 2479,-1 2482 | 2480,-1 2483 | 2481,1 2484 | 2482,-1 2485 | 2483,-1 2486 | 2484,-1 2487 | 2485,-1 2488 | 2486,1 2489 | 2487,-1 2490 | 2488,-1 2491 | 2489,-1 2492 | 2490,-1 2493 | 2491,-1 2494 | 2492,1 2495 | 2493,1 2496 | 2494,-1 2497 | 2495,-1 2498 | 2496,-1 2499 | 2497,-1 2500 | 2498,-1 2501 | 2499,-1 2502 | 2500,-1 2503 | 2501,1 2504 | 2502,-1 2505 | 2503,1 2506 | 2504,1 2507 | 2505,-1 2508 | 2506,-1 2509 | 2507,1 2510 | 2508,-1 2511 | 2509,1 2512 | 2510,-1 2513 | 2511,1 2514 | 2512,1 2515 | 2513,-1 2516 | 2514,-1 2517 | 2515,1 2518 | 2516,-1 2519 | 2517,-1 2520 | 2518,1 2521 | 2519,-1 2522 | 2520,1 2523 | 2521,-1 2524 | 2522,-1 2525 | 2523,-1 2526 | 2524,-1 2527 | 2525,1 2528 | 2526,1 2529 | 2527,-1 2530 | 2528,-1 2531 | 2529,-1 2532 | 2530,-1 2533 | 2531,-1 2534 | 2532,1 2535 | 2533,-1 2536 | 2534,-1 2537 | 2535,-1 2538 | 2536,-1 2539 | 2537,1 2540 | 2538,-1 2541 | 2539,-1 2542 | 2540,-1 2543 | 2541,1 2544 | 2542,1 2545 | 2543,-1 2546 | 2544,-1 2547 | 2545,-1 2548 | 2546,1 2549 | 2547,-1 2550 | 2548,1 2551 | 2549,1 2552 | 2550,-1 2553 | 2551,1 2554 | 2552,-1 2555 | 2553,1 2556 | 2554,-1 2557 | 2555,1 2558 | 2556,-1 2559 | 2557,1 2560 | 2558,-1 2561 | 2559,1 2562 | 2560,-1 2563 | 2561,-1 2564 | 2562,1 2565 | 2563,1 2566 | 2564,1 2567 | 2565,-1 2568 | 2566,-1 2569 | 2567,-1 2570 | 2568,1 2571 | 2569,-1 2572 | 2570,-1 2573 | 2571,1 2574 | 2572,-1 2575 | 2573,-1 2576 | 2574,-1 2577 | 2575,-1 2578 | 2576,-1 2579 | 2577,-1 2580 | 2578,1 2581 | 2579,-1 2582 | 2580,1 2583 | 2581,1 2584 | 2582,-1 2585 | 2583,-1 2586 | 2584,1 2587 | 2585,-1 2588 | 2586,-1 2589 | 2587,-1 2590 | 2588,-1 2591 | 2589,1 2592 | 2590,1 2593 | 2591,1 2594 | 2592,1 2595 | 2593,-1 2596 | 2594,-1 2597 | 2595,-1 2598 | 2596,-1 2599 | 2597,-1 2600 | 2598,-1 2601 | 2599,-1 2602 | 2600,1 2603 | 2601,-1 2604 | 2602,-1 2605 | 2603,-1 2606 | 2604,1 2607 | 2605,-1 2608 | 2606,-1 2609 | 2607,-1 2610 | 2608,-1 2611 | 2609,-1 2612 | 2610,1 2613 | 2611,1 2614 | 2612,1 2615 | 2613,1 2616 | 2614,-1 2617 | 2615,-1 2618 | 2616,-1 2619 | 2617,1 2620 | 2618,1 2621 | 2619,-1 2622 | 2620,-1 2623 | 2621,-1 2624 | 2622,-1 2625 | 2623,-1 2626 | 2624,1 2627 | 2625,-1 2628 | 2626,-1 2629 | 2627,-1 2630 | 2628,1 2631 | 2629,-1 2632 | 2630,-1 2633 | 2631,1 2634 | 2632,1 2635 | 2633,1 2636 | 2634,-1 2637 | 2635,-1 2638 | 2636,-1 2639 | 2637,1 2640 | 2638,-1 2641 | 2639,-1 2642 | 2640,-1 2643 | 2641,-1 2644 | 2642,1 2645 | 2643,-1 2646 | 2644,-1 2647 | 2645,-1 2648 | 2646,-1 2649 | 2647,-1 2650 | 2648,-1 2651 | 2649,-1 2652 | 2650,-1 2653 | 2651,1 2654 | 2652,-1 2655 | 2653,-1 2656 | 2654,1 2657 | 2655,1 2658 | 2656,-1 2659 | 2657,-1 2660 | 2658,-1 2661 | 2659,-1 2662 | 2660,1 2663 | 2661,1 2664 | 2662,-1 2665 | 2663,-1 2666 | 2664,1 2667 | 2665,1 2668 | 2666,-1 2669 | 2667,-1 2670 | 2668,-1 2671 | 2669,-1 2672 | 2670,-1 2673 | 2671,-1 2674 | 2672,-1 2675 | 2673,-1 2676 | 2674,-1 2677 | 2675,1 2678 | 2676,-1 2679 | 2677,-1 2680 | 2678,-1 2681 | 2679,1 2682 | 2680,1 2683 | 2681,1 2684 | 2682,-1 2685 | 2683,1 2686 | 2684,-1 2687 | 2685,1 2688 | 2686,-1 2689 | 2687,-1 2690 | 2688,-1 2691 | 2689,-1 2692 | 2690,-1 2693 | 2691,-1 2694 | 2692,-1 2695 | 2693,1 2696 | 2694,-1 2697 | 2695,-1 2698 | 2696,1 2699 | 2697,1 2700 | 2698,-1 2701 | 2699,1 2702 | 2700,1 2703 | 2701,-1 2704 | 2702,-1 2705 | 2703,-1 2706 | 2704,1 2707 | 2705,-1 2708 | 2706,-1 2709 | 2707,-1 2710 | 2708,-1 2711 | 2709,-1 2712 | 2710,1 2713 | 2711,1 2714 | 2712,1 2715 | 2713,-1 2716 | 2714,-1 2717 | 2715,-1 2718 | 2716,-1 2719 | 2717,-1 2720 | 2718,1 2721 | 2719,-1 2722 | 2720,-1 2723 | 2721,-1 2724 | 2722,1 2725 | 2723,1 2726 | 2724,-1 2727 | 2725,1 2728 | 2726,-1 2729 | 2727,1 2730 | 2728,-1 2731 | 2729,-1 2732 | 2730,-1 2733 | 2731,-1 2734 | 2732,-1 2735 | 2733,-1 2736 | 2734,1 2737 | 2735,-1 2738 | 2736,-1 2739 | 2737,-1 2740 | 2738,1 2741 | 2739,-1 2742 | 2740,1 2743 | 2741,-1 2744 | 2742,-1 2745 | 2743,-1 2746 | 2744,-1 2747 | 2745,-1 2748 | 2746,-1 2749 | 2747,1 2750 | 2748,-1 2751 | 2749,-1 2752 | 2750,1 2753 | 2751,-1 2754 | 2752,1 2755 | 2753,-1 2756 | 2754,-1 2757 | 2755,-1 2758 | 2756,-1 2759 | 2757,-1 2760 | 2758,-1 2761 | 2759,-1 2762 | 2760,1 2763 | 2761,1 2764 | 2762,-1 2765 | 2763,-1 2766 | 2764,-1 2767 | 2765,-1 2768 | 2766,-1 2769 | 2767,-1 2770 | 2768,-1 2771 | 2769,-1 2772 | 2770,1 2773 | 2771,1 2774 | 2772,-1 2775 | 2773,-1 2776 | 2774,-1 2777 | 2775,-1 2778 | 2776,-1 2779 | 2777,-1 2780 | 2778,-1 2781 | 2779,-1 2782 | 2780,-1 2783 | 2781,1 2784 | 2782,1 2785 | 2783,1 2786 | 2784,-1 2787 | 2785,-1 2788 | 2786,-1 2789 | 2787,1 2790 | 2788,1 2791 | 2789,1 2792 | 2790,1 2793 | 2791,-1 2794 | 2792,-1 2795 | 2793,1 2796 | 2794,1 2797 | 2795,1 2798 | 2796,-1 2799 | 2797,-1 2800 | 2798,-1 2801 | 2799,-1 2802 | 2800,-1 2803 | 2801,-1 2804 | 2802,1 2805 | 2803,1 2806 | 2804,-1 2807 | 2805,-1 2808 | 2806,-1 2809 | 2807,-1 2810 | 2808,-1 2811 | 2809,1 2812 | 2810,-1 2813 | 2811,1 2814 | 2812,1 2815 | 2813,1 2816 | 2814,-1 2817 | 2815,-1 2818 | 2816,-1 2819 | 2817,1 2820 | 2818,-1 2821 | 2819,-1 2822 | 2820,1 2823 | 2821,-1 2824 | 2822,-1 2825 | 2823,-1 2826 | 2824,-1 2827 | 2825,1 2828 | 2826,1 2829 | 2827,-1 2830 | 2828,-1 2831 | 2829,1 2832 | 2830,1 2833 | 2831,1 2834 | 2832,1 2835 | 2833,1 2836 | 2834,1 2837 | 2835,1 2838 | 2836,1 2839 | 2837,1 2840 | 2838,1 2841 | 2839,1 2842 | 2840,1 2843 | 2841,1 2844 | 2842,1 2845 | 2843,1 2846 | 2844,1 2847 | 2845,-1 2848 | 2846,-1 2849 | 2847,1 2850 | 2848,1 2851 | 2849,1 2852 | 2850,-1 2853 | 2851,-1 2854 | 2852,1 2855 | 2853,1 2856 | 2854,1 2857 | 2855,-1 2858 | 2856,1 2859 | 2857,-1 2860 | 2858,1 2861 | 2859,1 2862 | 2860,1 2863 | 2861,1 2864 | 2862,-1 2865 | 2863,-1 2866 | 2864,-1 2867 | 2865,1 2868 | 2866,1 2869 | 2867,1 2870 | 2868,1 2871 | 2869,-1 2872 | 2870,-1 2873 | 2871,1 2874 | 2872,1 2875 | 2873,1 2876 | 2874,1 2877 | 2875,-1 2878 | 2876,-1 2879 | 2877,-1 2880 | 2878,1 2881 | 2879,1 2882 | 2880,-1 2883 | 2881,1 2884 | 2882,-1 2885 | 2883,-1 2886 | 2884,-1 2887 | 2885,1 2888 | 2886,1 2889 | 2887,1 2890 | 2888,-1 2891 | 2889,1 2892 | 2890,-1 2893 | 2891,1 2894 | 2892,1 2895 | 2893,-1 2896 | 2894,1 2897 | 2895,-1 2898 | 2896,-1 2899 | 2897,-1 2900 | 2898,1 2901 | 2899,1 2902 | 2900,-1 2903 | 2901,-1 2904 | 2902,1 2905 | 2903,-1 2906 | 2904,-1 2907 | 2905,1 2908 | 2906,1 2909 | 2907,-1 2910 | 2908,-1 2911 | 2909,1 2912 | 2910,1 2913 | 2911,1 2914 | 2912,1 2915 | 2913,-1 2916 | 2914,1 2917 | 2915,1 2918 | 2916,1 2919 | 2917,-1 2920 | 2918,1 2921 | 2919,-1 2922 | 2920,1 2923 | 2921,-1 2924 | 2922,-1 2925 | 2923,1 2926 | 2924,1 2927 | 2925,-1 2928 | 2926,-1 2929 | 2927,1 2930 | 2928,-1 2931 | 2929,1 2932 | 2930,1 2933 | 2931,-1 2934 | 2932,1 2935 | 2933,-1 2936 | 2934,-1 2937 | 2935,-1 2938 | 2936,1 2939 | 2937,1 2940 | 2938,-1 2941 | 2939,-1 2942 | 2940,-1 2943 | 2941,-1 2944 | 2942,1 2945 | 2943,1 2946 | 2944,1 2947 | 2945,1 2948 | 2946,-1 2949 | 2947,-1 2950 | 2948,-1 2951 | 2949,-1 2952 | 2950,1 2953 | 2951,1 2954 | 2952,1 2955 | 2953,-1 2956 | 2954,-1 2957 | 2955,-1 2958 | 2956,1 2959 | 2957,-1 2960 | 2958,1 2961 | 2959,1 2962 | 2960,-1 2963 | 2961,1 2964 | 2962,-1 2965 | 2963,-1 2966 | 2964,-1 2967 | 2965,1 2968 | 2966,-1 2969 | 2967,-1 2970 | 2968,1 2971 | 2969,1 2972 | 2970,-1 2973 | 2971,1 2974 | 2972,-1 2975 | 2973,1 2976 | 2974,1 2977 | 2975,1 2978 | 2976,-1 2979 | 2977,1 2980 | 2978,1 2981 | 2979,-1 2982 | 2980,-1 2983 | 2981,-1 2984 | 2982,1 2985 | 2983,1 2986 | 2984,-1 2987 | 2985,-1 2988 | 2986,1 2989 | 2987,-1 2990 | 2988,-1 2991 | 2989,-1 2992 | 2990,-1 2993 | 2991,1 2994 | 2992,1 2995 | 2993,1 2996 | 2994,-1 2997 | 2995,-1 2998 | 2996,-1 2999 | 2997,1 3000 | 2998,-1 3001 | 2999,-1 3002 | 3000,1 3003 | 3001,-1 3004 | 3002,-1 3005 | 3003,-1 3006 | 3004,-1 3007 | 3005,1 3008 | 3006,1 3009 | 3007,-1 3010 | 3008,1 3011 | 3009,-1 3012 | 3010,1 3013 | 3011,1 3014 | 3012,1 3015 | 3013,1 3016 | 3014,1 3017 | 3015,1 3018 | 3016,1 3019 | 3017,1 3020 | 3018,1 3021 | 3019,1 3022 | 3020,1 3023 | 3021,1 3024 | 3022,1 3025 | 3023,1 3026 | 3024,1 3027 | 3025,1 3028 | 3026,-1 3029 | 3027,-1 3030 | 3028,1 3031 | 3029,1 3032 | 3030,1 3033 | 3031,-1 3034 | 3032,-1 3035 | 3033,1 3036 | 3034,1 3037 | 3035,1 3038 | 3036,-1 3039 | 3037,1 3040 | 3038,-1 3041 | 3039,1 3042 | 3040,1 3043 | 3041,1 3044 | 3042,1 3045 | 3043,-1 3046 | 3044,-1 3047 | 3045,-1 3048 | 3046,1 3049 | 3047,1 3050 | 3048,1 3051 | 3049,1 3052 | 3050,-1 3053 | 3051,-1 3054 | 3052,1 3055 | 3053,1 3056 | 3054,1 3057 | 3055,1 3058 | 3056,-1 3059 | 3057,-1 3060 | 3058,-1 3061 | 3059,1 3062 | 3060,1 3063 | 3061,-1 3064 | 3062,1 3065 | 3063,-1 3066 | 3064,-1 3067 | 3065,-1 3068 | 3066,1 3069 | 3067,1 3070 | 3068,1 3071 | 3069,-1 3072 | 3070,1 3073 | 3071,-1 3074 | 3072,1 3075 | 3073,1 3076 | 3074,-1 3077 | 3075,1 3078 | 3076,-1 3079 | 3077,-1 3080 | 3078,-1 3081 | 3079,1 3082 | 3080,1 3083 | 3081,-1 3084 | 3082,-1 3085 | 3083,1 3086 | 3084,-1 3087 | 3085,-1 3088 | 3086,1 3089 | 3087,1 3090 | 3088,-1 3091 | 3089,-1 3092 | 3090,1 3093 | 3091,1 3094 | 3092,1 3095 | 3093,1 3096 | 3094,-1 3097 | 3095,1 3098 | 3096,1 3099 | 3097,1 3100 | 3098,-1 3101 | 3099,1 3102 | 3100,-1 3103 | 3101,1 3104 | 3102,-1 3105 | 3103,-1 3106 | 3104,1 3107 | 3105,1 3108 | 3106,-1 3109 | 3107,-1 3110 | 3108,1 3111 | 3109,-1 3112 | 3110,1 3113 | 3111,1 3114 | 3112,-1 3115 | 3113,1 3116 | 3114,-1 3117 | 3115,-1 3118 | 3116,-1 3119 | 3117,1 3120 | 3118,1 3121 | 3119,-1 3122 | 3120,-1 3123 | 3121,-1 3124 | 3122,-1 3125 | 3123,1 3126 | 3124,1 3127 | 3125,1 3128 | 3126,1 3129 | 3127,-1 3130 | 3128,-1 3131 | 3129,-1 3132 | 3130,-1 3133 | 3131,1 3134 | 3132,1 3135 | 3133,1 3136 | 3134,-1 3137 | 3135,-1 3138 | 3136,-1 3139 | 3137,1 3140 | 3138,-1 3141 | 3139,1 3142 | 3140,1 3143 | 3141,-1 3144 | 3142,1 3145 | 3143,-1 3146 | 3144,-1 3147 | 3145,-1 3148 | 3146,1 3149 | 3147,-1 3150 | 3148,-1 3151 | 3149,1 3152 | 3150,1 3153 | 3151,-1 3154 | 3152,1 3155 | 3153,-1 3156 | 3154,1 3157 | 3155,1 3158 | 3156,1 3159 | 3157,-1 3160 | 3158,1 3161 | 3159,1 3162 | 3160,-1 3163 | 3161,-1 3164 | 3162,-1 3165 | 3163,1 3166 | 3164,1 3167 | 3165,-1 3168 | 3166,-1 3169 | 3167,1 3170 | 3168,-1 3171 | 3169,-1 3172 | 3170,-1 3173 | 3171,-1 3174 | 3172,-1 3175 | 3173,-1 3176 | 3174,1 3177 | 3175,1 3178 | 3176,1 3179 | 3177,-1 3180 | 3178,1 3181 | 3179,1 3182 | 3180,1 3183 | 3181,1 3184 | 3182,1 3185 | 3183,-1 3186 | 3184,1 3187 | 3185,1 3188 | 3186,-1 3189 | 3187,-1 3190 | 3188,-1 3191 | 3189,1 3192 | 3190,-1 3193 | 3191,1 3194 | 3192,-1 3195 | 3193,-1 3196 | 3194,-1 3197 | 3195,1 3198 | 3196,-1 3199 | 3197,-1 3200 | 3198,1 3201 | 3199,1 3202 | 3200,-1 3203 | 3201,-1 3204 | 3202,1 3205 | 3203,1 3206 | 3204,1 3207 | 3205,1 3208 | 3206,1 3209 | 3207,-1 3210 | 3208,-1 3211 | 3209,1 3212 | 3210,-1 3213 | 3211,1 3214 | 3212,1 3215 | 3213,-1 3216 | 3214,1 3217 | 3215,1 3218 | 3216,1 3219 | 3217,1 3220 | 3218,-1 3221 | 3219,-1 3222 | 3220,1 3223 | 3221,1 3224 | 3222,1 3225 | 3223,1 3226 | 3224,-1 3227 | 3225,1 3228 | 3226,1 3229 | 3227,1 3230 | 3228,-1 3231 | 3229,1 3232 | 3230,1 3233 | 3231,1 3234 | 3232,1 3235 | 3233,-1 3236 | 3234,1 3237 | 3235,-1 3238 | 3236,1 3239 | 3237,1 3240 | 3238,-1 3241 | 3239,-1 3242 | 3240,1 3243 | 3241,1 3244 | 3242,-1 3245 | 3243,1 3246 | 3244,1 3247 | 3245,1 3248 | 3246,1 3249 | 3247,-1 3250 | 3248,-1 3251 | 3249,1 3252 | 3250,-1 3253 | 3251,1 3254 | 3252,1 3255 | 3253,1 3256 | 3254,1 3257 | 3255,-1 3258 | 3256,1 3259 | 3257,-1 3260 | 3258,1 3261 | 3259,-1 3262 | 3260,-1 3263 | 3261,1 3264 | 3262,1 3265 | 3263,-1 3266 | 3264,1 3267 | 3265,1 3268 | 3266,-1 3269 | 3267,1 3270 | 3268,-1 3271 | 3269,-1 3272 | 3270,-1 3273 | 3271,-1 3274 | 3272,-1 3275 | 3273,-1 3276 | 3274,-1 3277 | 3275,1 3278 | 3276,1 3279 | 3277,-1 3280 | 3278,1 3281 | 3279,1 3282 | 3280,-1 3283 | 3281,1 3284 | 3282,-1 3285 | 3283,1 3286 | 3284,-1 3287 | 3285,-1 3288 | 3286,1 3289 | 3287,1 3290 | 3288,-1 3291 | 3289,1 3292 | 3290,1 3293 | 3291,-1 3294 | 3292,-1 3295 | 3293,-1 3296 | 3294,-1 3297 | 3295,-1 3298 | 3296,1 3299 | 3297,-1 3300 | 3298,-1 3301 | 3299,1 3302 | 3300,-1 3303 | 3301,1 3304 | -------------------------------------------------------------------------------- /Prediction_Raw_Data_Validation/__pycache__/predictionDataValidation.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/Prediction_Raw_Data_Validation/__pycache__/predictionDataValidation.cpython-36.pyc -------------------------------------------------------------------------------- /Prediction_Raw_Data_Validation/__pycache__/predictionDataValidation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/Prediction_Raw_Data_Validation/__pycache__/predictionDataValidation.cpython-37.pyc -------------------------------------------------------------------------------- /Prediction_Raw_Data_Validation/predictionDataValidation.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from datetime import datetime 3 | from os import listdir 4 | import os 5 | import re 6 | import json 7 | import shutil 8 | import pandas as pd 9 | from application_logging.logger import App_Logger 10 | 11 | 12 | 13 | 14 | 15 | class Prediction_Data_validation: 16 | """ 17 | This class shall be used for handling all the validation done on the Raw Prediction Data!!. 18 | 19 | Written By: iNeuron Intelligence 20 | Version: 1.0 21 | Revisions: None 22 | 23 | """ 24 | 25 | def __init__(self,path): 26 | self.Batch_Directory = path 27 | self.schema_path = 'schema_prediction.json' 28 | self.logger = App_Logger() 29 | 30 | 31 | def valuesFromSchema(self): 32 | """ 33 | Method Name: valuesFromSchema 34 | Description: This method extracts all the relevant information from the pre-defined "Schema" file. 35 | Output: LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, Number of Columns 36 | On Failure: Raise ValueError,KeyError,Exception 37 | 38 | Written By: iNeuron Intelligence 39 | Version: 1.0 40 | Revisions: None 41 | 42 | """ 43 | try: 44 | with open(self.schema_path, 'r') as f: 45 | dic = json.load(f) 46 | f.close() 47 | pattern = dic['SampleFileName'] 48 | LengthOfDateStampInFile = dic['LengthOfDateStampInFile'] 49 | LengthOfTimeStampInFile = dic['LengthOfTimeStampInFile'] 50 | column_names = dic['ColName'] 51 | NumberofColumns = dic['NumberofColumns'] 52 | 53 | file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+') 54 | message ="LengthOfDateStampInFile:: %s" %LengthOfDateStampInFile + "\t" + "LengthOfTimeStampInFile:: %s" % LengthOfTimeStampInFile +"\t " + "NumberofColumns:: %s" % NumberofColumns + "\n" 55 | self.logger.log(file,message) 56 | 57 | file.close() 58 | 59 | 60 | 61 | except ValueError: 62 | file = open("Prediction_Logs/valuesfromSchemaValidationLog.txt", 'a+') 63 | self.logger.log(file,"ValueError:Value not found inside schema_training.json") 64 | file.close() 65 | raise ValueError 66 | 67 | except KeyError: 68 | file = open("Prediction_Logs/valuesfromSchemaValidationLog.txt", 'a+') 69 | self.logger.log(file, "KeyError:Key value error incorrect key passed") 70 | file.close() 71 | raise KeyError 72 | 73 | except Exception as e: 74 | file = open("Prediction_Logs/valuesfromSchemaValidationLog.txt", 'a+') 75 | self.logger.log(file, str(e)) 76 | file.close() 77 | raise e 78 | 79 | return LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, NumberofColumns 80 | 81 | 82 | def manualRegexCreation(self): 83 | 84 | """ 85 | Method Name: manualRegexCreation 86 | Description: This method contains a manually defined regex based on the "FileName" given in "Schema" file. 87 | This Regex is used to validate the filename of the prediction data. 88 | Output: Regex pattern 89 | On Failure: None 90 | 91 | Written By: iNeuron Intelligence 92 | Version: 1.0 93 | Revisions: None 94 | 95 | """ 96 | regex = "['phising']+['\_'']+[\d_]+[\d]+\.csv" 97 | return regex 98 | 99 | def createDirectoryForGoodBadRawData(self): 100 | 101 | """ 102 | Method Name: createDirectoryForGoodBadRawData 103 | Description: This method creates directories to store the Good Data and Bad Data 104 | after validating the prediction data. 105 | 106 | Output: None 107 | On Failure: OSError 108 | 109 | Written By: iNeuron Intelligence 110 | Version: 1.0 111 | Revisions: None 112 | 113 | """ 114 | try: 115 | path = os.path.join("Prediction_Raw_Files_Validated/", "Good_Raw/") 116 | if not os.path.isdir(path): 117 | os.makedirs(path) 118 | path = os.path.join("Prediction_Raw_Files_Validated/", "Bad_Raw/") 119 | if not os.path.isdir(path): 120 | os.makedirs(path) 121 | 122 | except OSError as ex: 123 | file = open("Prediction_Logs/GeneralLog.txt", 'a+') 124 | self.logger.log(file,"Error while creating Directory %s:" % ex) 125 | file.close() 126 | raise OSError 127 | 128 | def deleteExistingGoodDataTrainingFolder(self): 129 | """ 130 | Method Name: deleteExistingGoodDataTrainingFolder 131 | Description: This method deletes the directory made to store the Good Data 132 | after loading the data in the table. Once the good files are 133 | loaded in the DB,deleting the directory ensures space optimization. 134 | Output: None 135 | On Failure: OSError 136 | 137 | Written By: iNeuron Intelligence 138 | Version: 1.0 139 | Revisions: None 140 | 141 | """ 142 | try: 143 | path = 'Prediction_Raw_Files_Validated/' 144 | # if os.path.isdir("ids/" + userName): 145 | # if os.path.isdir(path + 'Bad_Raw/'): 146 | # shutil.rmtree(path + 'Bad_Raw/') 147 | if os.path.isdir(path + 'Good_Raw/'): 148 | shutil.rmtree(path + 'Good_Raw/') 149 | file = open("Prediction_Logs/GeneralLog.txt", 'a+') 150 | self.logger.log(file,"GoodRaw directory deleted successfully!!!") 151 | file.close() 152 | except OSError as s: 153 | file = open("Prediction_Logs/GeneralLog.txt", 'a+') 154 | self.logger.log(file,"Error while Deleting Directory : %s" %s) 155 | file.close() 156 | raise OSError 157 | def deleteExistingBadDataTrainingFolder(self): 158 | 159 | """ 160 | Method Name: deleteExistingBadDataTrainingFolder 161 | Description: This method deletes the directory made to store the bad Data. 162 | Output: None 163 | On Failure: OSError 164 | 165 | Written By: iNeuron Intelligence 166 | Version: 1.0 167 | Revisions: None 168 | 169 | """ 170 | 171 | try: 172 | path = 'Prediction_Raw_Files_Validated/' 173 | if os.path.isdir(path + 'Bad_Raw/'): 174 | shutil.rmtree(path + 'Bad_Raw/') 175 | file = open("Prediction_Logs/GeneralLog.txt", 'a+') 176 | self.logger.log(file,"BadRaw directory deleted before starting validation!!!") 177 | file.close() 178 | except OSError as s: 179 | file = open("Prediction_Logs/GeneralLog.txt", 'a+') 180 | self.logger.log(file,"Error while Deleting Directory : %s" %s) 181 | file.close() 182 | raise OSError 183 | 184 | def moveBadFilesToArchiveBad(self): 185 | 186 | 187 | """ 188 | Method Name: moveBadFilesToArchiveBad 189 | Description: This method deletes the directory made to store the Bad Data 190 | after moving the data in an archive folder. We archive the bad 191 | files to send them back to the client for invalid data issue. 192 | Output: None 193 | On Failure: OSError 194 | 195 | Written By: iNeuron Intelligence 196 | Version: 1.0 197 | Revisions: None 198 | 199 | """ 200 | now = datetime.now() 201 | date = now.date() 202 | time = now.strftime("%H%M%S") 203 | try: 204 | path= "PredictionArchivedBadData" 205 | if not os.path.isdir(path): 206 | os.makedirs(path) 207 | source = 'Prediction_Raw_Files_Validated/Bad_Raw/' 208 | dest = 'PredictionArchivedBadData/BadData_' + str(date)+"_"+str(time) 209 | if not os.path.isdir(dest): 210 | os.makedirs(dest) 211 | files = os.listdir(source) 212 | for f in files: 213 | if f not in os.listdir(dest): 214 | shutil.move(source + f, dest) 215 | file = open("Prediction_Logs/GeneralLog.txt", 'a+') 216 | self.logger.log(file,"Bad files moved to archive") 217 | path = 'Prediction_Raw_Files_Validated/' 218 | if os.path.isdir(path + 'Bad_Raw/'): 219 | shutil.rmtree(path + 'Bad_Raw/') 220 | self.logger.log(file,"Bad Raw Data Folder Deleted successfully!!") 221 | file.close() 222 | except OSError as e: 223 | file = open("Prediction_Logs/GeneralLog.txt", 'a+') 224 | self.logger.log(file, "Error while moving bad files to archive:: %s" % e) 225 | file.close() 226 | raise OSError 227 | 228 | 229 | 230 | 231 | def validationFileNameRaw(self,regex,LengthOfDateStampInFile,LengthOfTimeStampInFile): 232 | """ 233 | Method Name: validationFileNameRaw 234 | Description: This function validates the name of the prediction csv file as per given name in the schema! 235 | Regex pattern is used to do the validation.If name format do not match the file is moved 236 | to Bad Raw Data folder else in Good raw data. 237 | Output: None 238 | On Failure: Exception 239 | 240 | Written By: iNeuron Intelligence 241 | Version: 1.0 242 | Revisions: None 243 | 244 | """ 245 | # delete the directories for good and bad data in case last run was unsuccessful and folders were not deleted. 246 | self.deleteExistingBadDataTrainingFolder() 247 | self.deleteExistingGoodDataTrainingFolder() 248 | self.createDirectoryForGoodBadRawData() 249 | onlyfiles = [f for f in listdir(self.Batch_Directory)] 250 | try: 251 | f = open("Prediction_Logs/nameValidationLog.txt", 'a+') 252 | for filename in onlyfiles: 253 | if (re.match(regex, filename)): 254 | splitAtDot = re.split('.csv', filename) 255 | splitAtDot = (re.split('_', splitAtDot[0])) 256 | if len(splitAtDot[1]) == LengthOfDateStampInFile: 257 | if len(splitAtDot[2]) == LengthOfTimeStampInFile: 258 | shutil.copy("Prediction_Batch_files/" + filename, "Prediction_Raw_Files_Validated/Good_Raw") 259 | self.logger.log(f,"Valid File name!! File moved to GoodRaw Folder :: %s" % filename) 260 | 261 | else: 262 | shutil.copy("Prediction_Batch_files/" + filename, "Prediction_Raw_Files_Validated/Bad_Raw") 263 | self.logger.log(f,"Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename) 264 | else: 265 | shutil.copy("Prediction_Batch_files/" + filename, "Prediction_Raw_Files_Validated/Bad_Raw") 266 | self.logger.log(f,"Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename) 267 | else: 268 | shutil.copy("Prediction_Batch_files/" + filename, "Prediction_Raw_Files_Validated/Bad_Raw") 269 | self.logger.log(f, "Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename) 270 | 271 | f.close() 272 | 273 | except Exception as e: 274 | f = open("Prediction_Logs/nameValidationLog.txt", 'a+') 275 | self.logger.log(f, "Error occured while validating FileName %s" % e) 276 | f.close() 277 | raise e 278 | 279 | 280 | 281 | 282 | def validateColumnLength(self,NumberofColumns): 283 | """ 284 | Method Name: validateColumnLength 285 | Description: This function validates the number of columns in the csv files. 286 | It is should be same as given in the schema file. 287 | If not same file is not suitable for processing and thus is moved to Bad Raw Data folder. 288 | If the column number matches, file is kept in Good Raw Data for processing. 289 | The csv file is missing the first column name, this function changes the missing name to "Wafer". 290 | Output: None 291 | On Failure: Exception 292 | 293 | Written By: iNeuron Intelligence 294 | Version: 1.0 295 | Revisions: None 296 | 297 | """ 298 | try: 299 | f = open("Prediction_Logs/columnValidationLog.txt", 'a+') 300 | self.logger.log(f,"Column Length Validation Started!!") 301 | for file in listdir('Prediction_Raw_Files_Validated/Good_Raw/'): 302 | csv = pd.read_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file) 303 | if csv.shape[1] == NumberofColumns: 304 | #csv.rename(columns={"Unnamed: 0": "Wafer"}, inplace=True) 305 | csv.to_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file, index=None, header=True) 306 | else: 307 | shutil.move("Prediction_Raw_Files_Validated/Good_Raw/" + file, "Prediction_Raw_Files_Validated/Bad_Raw") 308 | self.logger.log(f, "Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s" % file) 309 | 310 | self.logger.log(f, "Column Length Validation Completed!!") 311 | except OSError: 312 | f = open("Prediction_Logs/columnValidationLog.txt", 'a+') 313 | self.logger.log(f, "Error Occured while moving the file :: %s" % OSError) 314 | f.close() 315 | raise OSError 316 | except Exception as e: 317 | f = open("Prediction_Logs/columnValidationLog.txt", 'a+') 318 | self.logger.log(f, "Error Occured:: %s" % e) 319 | f.close() 320 | raise e 321 | 322 | f.close() 323 | 324 | def deletePredictionFile(self): 325 | 326 | if os.path.exists('Prediction_Output_File/Predictions.csv'): 327 | os.remove('Prediction_Output_File/Predictions.csv') 328 | 329 | def validateMissingValuesInWholeColumn(self): 330 | """ 331 | Method Name: validateMissingValuesInWholeColumn 332 | Description: This function validates if any column in the csv file has all values missing. 333 | If all the values are missing, the file is not suitable for processing. 334 | SUch files are moved to bad raw data. 335 | Output: None 336 | On Failure: Exception 337 | 338 | Written By: iNeuron Intelligence 339 | Version: 1.0 340 | Revisions: None 341 | 342 | """ 343 | try: 344 | f = open("Prediction_Logs/missingValuesInColumn.txt", 'a+') 345 | self.logger.log(f, "Missing Values Validation Started!!") 346 | 347 | for file in listdir('Prediction_Raw_Files_Validated/Good_Raw/'): 348 | csv = pd.read_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file) 349 | count = 0 350 | for columns in csv: 351 | if (len(csv[columns]) - csv[columns].count()) == len(csv[columns]): 352 | count+=1 353 | shutil.move("Prediction_Raw_Files_Validated/Good_Raw/" + file, 354 | "Prediction_Raw_Files_Validated/Bad_Raw") 355 | self.logger.log(f,"Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s" % file) 356 | break 357 | if count==0: 358 | csv.rename(columns={"Unnamed: 0": "Wafer"}, inplace=True) 359 | csv.to_csv("Prediction_Raw_Files_Validated/Good_Raw/" + file, index=None, header=True) 360 | except OSError: 361 | f = open("Prediction_Logs/missingValuesInColumn.txt", 'a+') 362 | self.logger.log(f, "Error Occured while moving the file :: %s" % OSError) 363 | f.close() 364 | raise OSError 365 | except Exception as e: 366 | f = open("Prediction_Logs/missingValuesInColumn.txt", 'a+') 367 | self.logger.log(f, "Error Occured:: %s" % e) 368 | f.close() 369 | raise e 370 | f.close() 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | -------------------------------------------------------------------------------- /Problem Statement.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/Problem Statement.docx -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: python main.py --master --processes 4 --threads 2 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Phising-Classifier -------------------------------------------------------------------------------- /Training_Logs/DataBaseConnectionLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-16/16:36:05 Opened Training database successfully 2 | 2020-02-16/16:36:36 Closed Training database successfully 3 | 2020-02-16/16:36:44 Opened Training database successfully 4 | 2020-02-16/16:44:53 Opened Training database successfully 5 | 2020-02-16/17:27:46 Opened Training database successfully 6 | 2020-02-16/17:27:48 Closed Training database successfully 7 | 2020-02-16/17:27:48 Opened Training database successfully 8 | 2020-02-16/17:36:58 Opened Training database successfully 9 | -------------------------------------------------------------------------------- /Training_Logs/DbTableCreateLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-16/16:36:34 Tables created successfully!! 2 | 2020-02-16/17:27:48 Tables created successfully!! 3 | -------------------------------------------------------------------------------- /Training_Logs/ExportToCsv.txt: -------------------------------------------------------------------------------- 1 | 2020-02-16/16:44:53 File exported successfully!!! 2 | 2020-02-16/17:36:58 File exported successfully!!! 3 | -------------------------------------------------------------------------------- /Training_Logs/GeneralLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-16/16:44:49 GoodRaw directory deleted successfully!!! 2 | 2020-02-16/16:44:50 Bad files moved to archive 3 | 2020-02-16/16:44:50 Bad Raw Data Folder Deleted successfully!! 4 | 2020-02-16/17:36:58 GoodRaw directory deleted successfully!!! 5 | 2020-02-16/17:36:58 Bad files moved to archive 6 | 2020-02-16/17:36:58 Bad Raw Data Folder Deleted successfully!! 7 | -------------------------------------------------------------------------------- /Training_Logs/ModelTrainingLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-16/16:54:21 Start of Training 2 | 2020-02-16/16:54:24 Entered the get_data method of the Data_Getter class 3 | 2020-02-16/16:54:24 Data Load Successful.Exited the get_data method of the Data_Getter class 4 | 2020-02-16/16:55:11 Entered the is_null_present method of the Preprocessor class 5 | 2020-02-16/16:55:11 Finding missing values is a success.Data written to the null values file. Exited the is_null_present method of the Preprocessor class 6 | 2020-02-16/16:55:16 Entered the separate_label_feature method of the Preprocessor class 7 | 2020-02-16/16:55:16 Label Separation Successful. Exited the separate_label_feature method of the Preprocessor class 8 | 2020-02-16/16:55:19 Entered the elbow_plot method of the KMeansClustering class 9 | 2020-02-16/16:55:23 The optimum number of clusters is: 4 . Exited the elbow_plot method of the KMeansClustering class 10 | 2020-02-16/16:55:29 Entered the create_clusters method of the KMeansClustering class 11 | 2020-02-16/16:55:29 Entered the save_model method of the File_Operation class 12 | 2020-02-16/16:55:29 Model File KMeans saved. Exited the save_model method of the Model_Finder class 13 | 2020-02-16/16:55:29 succesfully created 4clusters. Exited the create_clusters method of the KMeansClustering class 14 | 2020-02-16/16:55:49 Entered the get_best_model method of the Model_Finder class 15 | 2020-02-16/16:55:56 Entered the get_best_params_for_xgboost method of the Model_Finder class 16 | 2020-02-16/16:56:51 XGBoost best params: {'criterion': 'gini', 'max_depth': 8, 'n_estimators': 130}. Exited the get_best_params_for_xgboost method of the Model_Finder class 17 | 2020-02-16/16:57:09 AUC for XGBoost:0.9892971611721613 18 | 2020-02-16/16:57:13 Entered the get_best_params_for_svm method of the Model_Finder class 19 | 2020-02-16/16:57:34 SVM best params: {'C': 1.0, 'kernel': 'rbf', 'random_state': 0}. Exited the get_best_params_for_svm method of the Model_Finder class 20 | 2020-02-16/16:57:41 AUC for SVM:0.9725274725274725 21 | 2020-02-16/16:57:47 Entered the save_model method of the File_Operation class 22 | 2020-02-16/16:57:47 Model File XGBoost1 saved. Exited the save_model method of the Model_Finder class 23 | 2020-02-16/16:57:51 Entered the get_best_model method of the Model_Finder class 24 | 2020-02-16/16:57:54 Entered the get_best_params_for_xgboost method of the Model_Finder class 25 | 2020-02-16/16:58:24 XGBoost best params: {'criterion': 'gini', 'max_depth': 8, 'n_estimators': 100}. Exited the get_best_params_for_xgboost method of the Model_Finder class 26 | 2020-02-16/16:58:24 AUC for XGBoost:0.9492019916968935 27 | 2020-02-16/16:58:24 Entered the get_best_params_for_svm method of the Model_Finder class 28 | 2020-02-16/16:58:38 SVM best params: {'C': 1.0, 'kernel': 'rbf', 'random_state': 0}. Exited the get_best_params_for_svm method of the Model_Finder class 29 | 2020-02-16/16:58:52 AUC for SVM:0.894062361310595 30 | 2020-02-16/16:58:59 Entered the save_model method of the File_Operation class 31 | 2020-02-16/16:58:59 Model File XGBoost2 saved. Exited the save_model method of the Model_Finder class 32 | 2020-02-16/16:58:59 Entered the get_best_model method of the Model_Finder class 33 | 2020-02-16/16:59:00 Entered the get_best_params_for_xgboost method of the Model_Finder class 34 | 2020-02-16/16:59:23 XGBoost best params: {'criterion': 'gini', 'max_depth': 9, 'n_estimators': 130}. Exited the get_best_params_for_xgboost method of the Model_Finder class 35 | 2020-02-16/16:59:23 AUC for XGBoost:0.9697356478329929 36 | 2020-02-16/16:59:23 Entered the get_best_params_for_svm method of the Model_Finder class 37 | 2020-02-16/16:59:31 SVM best params: {'C': 1.0, 'kernel': 'rbf', 'random_state': 0}. Exited the get_best_params_for_svm method of the Model_Finder class 38 | 2020-02-16/16:59:36 AUC for SVM:0.9144525045382346 39 | 2020-02-16/16:59:36 Entered the save_model method of the File_Operation class 40 | 2020-02-16/16:59:36 Model File XGBoost0 saved. Exited the save_model method of the Model_Finder class 41 | 2020-02-16/16:59:38 Entered the get_best_model method of the Model_Finder class 42 | 2020-02-16/16:59:41 Entered the get_best_params_for_xgboost method of the Model_Finder class 43 | 2020-02-16/16:59:50 XGBoost best params: {'criterion': 'gini', 'max_depth': 9, 'n_estimators': 130}. Exited the get_best_params_for_xgboost method of the Model_Finder class 44 | 2020-02-16/16:59:50 AUC for XGBoost:0.9857723577235773 45 | 2020-02-16/16:59:50 Entered the get_best_params_for_svm method of the Model_Finder class 46 | 2020-02-16/16:59:53 SVM best params: {'C': 1.0, 'kernel': 'rbf', 'random_state': 0}. Exited the get_best_params_for_svm method of the Model_Finder class 47 | 2020-02-16/16:59:56 AUC for SVM:0.9785315040650407 48 | 2020-02-16/16:59:56 Entered the save_model method of the File_Operation class 49 | 2020-02-16/16:59:56 Model File XGBoost3 saved. Exited the save_model method of the Model_Finder class 50 | 2020-02-16/16:59:56 Successful End of Training 51 | 2020-02-16/17:36:58 Start of Training 52 | 2020-02-16/17:36:58 Entered the get_data method of the Data_Getter class 53 | 2020-02-16/17:36:58 Data Load Successful.Exited the get_data method of the Data_Getter class 54 | 2020-02-16/17:36:58 Entered the is_null_present method of the Preprocessor class 55 | 2020-02-16/17:36:58 Finding missing values is a success.Data written to the null values file. Exited the is_null_present method of the Preprocessor class 56 | 2020-02-16/17:36:58 Entered the separate_label_feature method of the Preprocessor class 57 | 2020-02-16/17:36:58 Label Separation Successful. Exited the separate_label_feature method of the Preprocessor class 58 | 2020-02-16/17:36:58 Entered the elbow_plot method of the KMeansClustering class 59 | 2020-02-16/17:37:05 The optimum number of clusters is: 4 . Exited the elbow_plot method of the KMeansClustering class 60 | 2020-02-16/17:37:05 Entered the create_clusters method of the KMeansClustering class 61 | 2020-02-16/17:37:05 Entered the save_model method of the File_Operation class 62 | 2020-02-16/17:37:05 Model File KMeans saved. Exited the save_model method of the Model_Finder class 63 | 2020-02-16/17:37:05 succesfully created 4clusters. Exited the create_clusters method of the KMeansClustering class 64 | 2020-02-16/17:37:05 Entered the get_best_model method of the Model_Finder class 65 | 2020-02-16/17:37:05 Entered the get_best_params_for_xgboost method of the Model_Finder class 66 | 2020-02-16/17:37:17 XGBoost best params: {'criterion': 'gini', 'max_depth': 8, 'n_estimators': 130}. Exited the get_best_params_for_xgboost method of the Model_Finder class 67 | 2020-02-16/17:37:17 AUC for XGBoost:0.9892971611721613 68 | 2020-02-16/17:37:17 Entered the get_best_params_for_svm method of the Model_Finder class 69 | 2020-02-16/17:37:19 SVM best params: {'C': 1.0, 'kernel': 'rbf', 'random_state': 0}. Exited the get_best_params_for_svm method of the Model_Finder class 70 | 2020-02-16/17:37:19 AUC for SVM:0.9725274725274725 71 | 2020-02-16/17:37:19 Entered the save_model method of the File_Operation class 72 | 2020-02-16/17:37:19 Model File XGBoost1 saved. Exited the save_model method of the Model_Finder class 73 | 2020-02-16/17:37:19 Entered the get_best_model method of the Model_Finder class 74 | 2020-02-16/17:37:19 Entered the get_best_params_for_xgboost method of the Model_Finder class 75 | 2020-02-16/17:38:08 XGBoost best params: {'criterion': 'gini', 'max_depth': 8, 'n_estimators': 100}. Exited the get_best_params_for_xgboost method of the Model_Finder class 76 | 2020-02-16/17:38:08 AUC for XGBoost:0.9492019916968935 77 | 2020-02-16/17:38:08 Entered the get_best_params_for_svm method of the Model_Finder class 78 | 2020-02-16/17:38:33 SVM best params: {'C': 1.0, 'kernel': 'rbf', 'random_state': 0}. Exited the get_best_params_for_svm method of the Model_Finder class 79 | 2020-02-16/17:38:33 AUC for SVM:0.894062361310595 80 | 2020-02-16/17:38:34 Entered the save_model method of the File_Operation class 81 | 2020-02-16/17:38:34 Model File XGBoost2 saved. Exited the save_model method of the Model_Finder class 82 | 2020-02-16/17:38:34 Entered the get_best_model method of the Model_Finder class 83 | 2020-02-16/17:38:34 Entered the get_best_params_for_xgboost method of the Model_Finder class 84 | 2020-02-16/17:39:11 XGBoost best params: {'criterion': 'gini', 'max_depth': 9, 'n_estimators': 130}. Exited the get_best_params_for_xgboost method of the Model_Finder class 85 | 2020-02-16/17:39:11 AUC for XGBoost:0.9697356478329929 86 | 2020-02-16/17:39:11 Entered the get_best_params_for_svm method of the Model_Finder class 87 | 2020-02-16/17:39:25 SVM best params: {'C': 1.0, 'kernel': 'rbf', 'random_state': 0}. Exited the get_best_params_for_svm method of the Model_Finder class 88 | 2020-02-16/17:39:25 AUC for SVM:0.9144525045382346 89 | 2020-02-16/17:39:25 Entered the save_model method of the File_Operation class 90 | 2020-02-16/17:39:25 Model File XGBoost0 saved. Exited the save_model method of the Model_Finder class 91 | 2020-02-16/17:39:25 Entered the get_best_model method of the Model_Finder class 92 | 2020-02-16/17:39:25 Entered the get_best_params_for_xgboost method of the Model_Finder class 93 | 2020-02-16/17:39:40 XGBoost best params: {'criterion': 'gini', 'max_depth': 9, 'n_estimators': 130}. Exited the get_best_params_for_xgboost method of the Model_Finder class 94 | 2020-02-16/17:39:40 AUC for XGBoost:0.9857723577235773 95 | 2020-02-16/17:39:40 Entered the get_best_params_for_svm method of the Model_Finder class 96 | 2020-02-16/17:39:44 SVM best params: {'C': 1.0, 'kernel': 'rbf', 'random_state': 0}. Exited the get_best_params_for_svm method of the Model_Finder class 97 | 2020-02-16/17:39:44 AUC for SVM:0.9785315040650407 98 | 2020-02-16/17:39:44 Entered the save_model method of the File_Operation class 99 | 2020-02-16/17:39:44 Model File XGBoost3 saved. Exited the save_model method of the Model_Finder class 100 | 2020-02-16/17:39:44 Successful End of Training 101 | -------------------------------------------------------------------------------- /Training_Logs/Training_Main_Log.txt: -------------------------------------------------------------------------------- 1 | 2020-02-16/16:33:22 Start of Validation on files for prediction!! 2 | 2020-02-16/16:34:54 Raw Data Validation Complete!! 3 | 2020-02-16/16:34:54 Starting Data Transforamtion!! 4 | 2020-02-16/16:35:43 DataTransformation Completed!!! 5 | 2020-02-16/16:35:46 Creating Training_Database and tables on the basis of given schema!!! 6 | 2020-02-16/16:36:39 Table creation Completed!! 7 | 2020-02-16/16:36:40 Insertion of Data into Table started!!!! 8 | 2020-02-16/16:44:47 Insertion in Table completed!!! 9 | 2020-02-16/16:44:48 Deleting Good Data Folder!!! 10 | 2020-02-16/16:44:49 Good_Data folder deleted!!! 11 | 2020-02-16/16:44:50 Moving bad files to Archive and deleting Bad_Data folder!!! 12 | 2020-02-16/16:44:51 Bad files moved to archive!! Bad folder Deleted!! 13 | 2020-02-16/16:44:51 Validation Operation completed!! 14 | 2020-02-16/16:44:52 Extracting csv file from table 15 | 2020-02-16/17:27:45 Start of Validation on files for prediction!! 16 | 2020-02-16/17:27:46 Raw Data Validation Complete!! 17 | 2020-02-16/17:27:46 Starting Data Transforamtion!! 18 | 2020-02-16/17:27:46 DataTransformation Completed!!! 19 | 2020-02-16/17:27:46 Creating Training_Database and tables on the basis of given schema!!! 20 | 2020-02-16/17:27:48 Table creation Completed!! 21 | 2020-02-16/17:27:48 Insertion of Data into Table started!!!! 22 | 2020-02-16/17:36:58 Insertion in Table completed!!! 23 | 2020-02-16/17:36:58 Deleting Good Data Folder!!! 24 | 2020-02-16/17:36:58 Good_Data folder deleted!!! 25 | 2020-02-16/17:36:58 Moving bad files to Archive and deleting Bad_Data folder!!! 26 | 2020-02-16/17:36:58 Bad files moved to archive!! Bad folder Deleted!! 27 | 2020-02-16/17:36:58 Validation Operation completed!! 28 | 2020-02-16/17:36:58 Extracting csv file from table 29 | -------------------------------------------------------------------------------- /Training_Logs/addQuotesToStringValuesInColumn.txt: -------------------------------------------------------------------------------- 1 | 2020-02-16/16:35:37 phising_08012020_120000.csv: Quotes added successfully!! 2 | 2020-02-16/17:27:46 phising_08012020_120000.csv: Quotes added successfully!! 3 | -------------------------------------------------------------------------------- /Training_Logs/columnValidationLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-16/16:33:52 Column Length Validation Started!! 2 | 2020-02-16/16:34:01 Column Length Validation Completed!! 3 | 2020-02-16/17:27:45 Column Length Validation Started!! 4 | 2020-02-16/17:27:45 Column Length Validation Completed!! 5 | -------------------------------------------------------------------------------- /Training_Logs/missingValuesInColumn.txt: -------------------------------------------------------------------------------- 1 | 2020-02-16/16:34:09 Missing Values Validation Started!! 2 | 2020-02-16/17:27:45 Missing Values Validation Started!! 3 | -------------------------------------------------------------------------------- /Training_Logs/nameValidationLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-16/16:33:43 Valid File name!! File moved to GoodRaw Folder :: phising_08012020_120000.csv 2 | 2020-02-16/17:27:45 Valid File name!! File moved to GoodRaw Folder :: phising_08012020_120000.csv 3 | -------------------------------------------------------------------------------- /Training_Logs/valuesfromSchemaValidationLog.txt: -------------------------------------------------------------------------------- 1 | 2020-02-16/16:33:24 LengthOfDateStampInFile:: 8 LengthOfTimeStampInFile:: 6 NumberofColumns:: 31 2 | 3 | 2020-02-16/17:08:09 LengthOfDateStampInFile:: 8 LengthOfTimeStampInFile:: 6 NumberofColumns:: 30 4 | 5 | 2020-02-16/17:10:23 LengthOfDateStampInFile:: 8 LengthOfTimeStampInFile:: 6 NumberofColumns:: 30 6 | 7 | 2020-02-16/17:14:55 LengthOfDateStampInFile:: 8 LengthOfTimeStampInFile:: 6 NumberofColumns:: 30 8 | 9 | 2020-02-16/17:17:39 LengthOfDateStampInFile:: 8 LengthOfTimeStampInFile:: 6 NumberofColumns:: 30 10 | 11 | 2020-02-16/17:27:45 LengthOfDateStampInFile:: 8 LengthOfTimeStampInFile:: 6 NumberofColumns:: 31 12 | 13 | 2020-02-16/17:40:30 LengthOfDateStampInFile:: 8 LengthOfTimeStampInFile:: 6 NumberofColumns:: 30 14 | 15 | -------------------------------------------------------------------------------- /Training_Raw_data_validation/__pycache__/rawValidation.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/Training_Raw_data_validation/__pycache__/rawValidation.cpython-36.pyc -------------------------------------------------------------------------------- /Training_Raw_data_validation/__pycache__/rawValidation.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/Training_Raw_data_validation/__pycache__/rawValidation.cpython-37.pyc -------------------------------------------------------------------------------- /Training_Raw_data_validation/rawValidation.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from datetime import datetime 3 | from os import listdir 4 | import os 5 | import re 6 | import json 7 | import shutil 8 | import pandas as pd 9 | from application_logging.logger import App_Logger 10 | 11 | 12 | 13 | 14 | 15 | class Raw_Data_validation: 16 | 17 | """ 18 | This class shall be used for handling all the validation done on the Raw Training Data!!. 19 | 20 | Written By: iNeuron Intelligence 21 | Version: 1.0 22 | Revisions: None 23 | 24 | """ 25 | 26 | def __init__(self,path): 27 | self.Batch_Directory = path 28 | self.schema_path = 'schema_training.json' 29 | self.logger = App_Logger() 30 | 31 | 32 | def valuesFromSchema(self): 33 | """ 34 | Method Name: valuesFromSchema 35 | Description: This method extracts all the relevant information from the pre-defined "Schema" file. 36 | Output: LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, Number of Columns 37 | On Failure: Raise ValueError,KeyError,Exception 38 | 39 | Written By: iNeuron Intelligence 40 | Version: 1.0 41 | Revisions: None 42 | 43 | """ 44 | try: 45 | with open(self.schema_path, 'r') as f: 46 | dic = json.load(f) 47 | f.close() 48 | pattern = dic['SampleFileName'] 49 | LengthOfDateStampInFile = dic['LengthOfDateStampInFile'] 50 | LengthOfTimeStampInFile = dic['LengthOfTimeStampInFile'] 51 | column_names = dic['ColName'] 52 | NumberofColumns = dic['NumberofColumns'] 53 | 54 | file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+') 55 | message ="LengthOfDateStampInFile:: %s" %LengthOfDateStampInFile + "\t" + "LengthOfTimeStampInFile:: %s" % LengthOfTimeStampInFile +"\t " + "NumberofColumns:: %s" % NumberofColumns + "\n" 56 | self.logger.log(file,message) 57 | 58 | file.close() 59 | 60 | 61 | 62 | except ValueError: 63 | file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+') 64 | self.logger.log(file,"ValueError:Value not found inside schema_training.json") 65 | file.close() 66 | raise ValueError 67 | 68 | except KeyError: 69 | file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+') 70 | self.logger.log(file, "KeyError:Key value error incorrect key passed") 71 | file.close() 72 | raise KeyError 73 | 74 | except Exception as e: 75 | file = open("Training_Logs/valuesfromSchemaValidationLog.txt", 'a+') 76 | self.logger.log(file, str(e)) 77 | file.close() 78 | raise e 79 | 80 | return LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, NumberofColumns 81 | 82 | 83 | def manualRegexCreation(self): 84 | """ 85 | Method Name: manualRegexCreation 86 | Description: This method contains a manually defined regex based on the "FileName" given in "Schema" file. 87 | This Regex is used to validate the filename of the training data. 88 | Output: Regex pattern 89 | On Failure: None 90 | 91 | Written By: iNeuron Intelligence 92 | Version: 1.0 93 | Revisions: None 94 | 95 | """ 96 | regex = "['phising']+['\_'']+[\d_]+[\d]+\.csv" 97 | return regex 98 | 99 | def createDirectoryForGoodBadRawData(self): 100 | 101 | """ 102 | Method Name: createDirectoryForGoodBadRawData 103 | Description: This method creates directories to store the Good Data and Bad Data 104 | after validating the training data. 105 | 106 | Output: None 107 | On Failure: OSError 108 | 109 | Written By: iNeuron Intelligence 110 | Version: 1.0 111 | Revisions: None 112 | 113 | """ 114 | 115 | try: 116 | path = os.path.join("Training_Raw_files_validated/", "Good_Raw/") 117 | if not os.path.isdir(path): 118 | os.makedirs(path) 119 | path = os.path.join("Training_Raw_files_validated/", "Bad_Raw/") 120 | if not os.path.isdir(path): 121 | os.makedirs(path) 122 | 123 | except OSError as ex: 124 | file = open("Training_Logs/GeneralLog.txt", 'a+') 125 | self.logger.log(file,"Error while creating Directory %s:" % ex) 126 | file.close() 127 | raise OSError 128 | 129 | def deleteExistingGoodDataTrainingFolder(self): 130 | 131 | """ 132 | Method Name: deleteExistingGoodDataTrainingFolder 133 | Description: This method deletes the directory made to store the Good Data 134 | after loading the data in the table. Once the good files are 135 | loaded in the DB,deleting the directory ensures space optimization. 136 | Output: None 137 | On Failure: OSError 138 | 139 | Written By: iNeuron Intelligence 140 | Version: 1.0 141 | Revisions: None 142 | 143 | """ 144 | 145 | try: 146 | path = 'Training_Raw_files_validated/' 147 | # if os.path.isdir("ids/" + userName): 148 | # if os.path.isdir(path + 'Bad_Raw/'): 149 | # shutil.rmtree(path + 'Bad_Raw/') 150 | if os.path.isdir(path + 'Good_Raw/'): 151 | shutil.rmtree(path + 'Good_Raw/') 152 | file = open("Training_Logs/GeneralLog.txt", 'a+') 153 | self.logger.log(file,"GoodRaw directory deleted successfully!!!") 154 | file.close() 155 | except OSError as s: 156 | file = open("Training_Logs/GeneralLog.txt", 'a+') 157 | self.logger.log(file,"Error while Deleting Directory : %s" %s) 158 | file.close() 159 | raise OSError 160 | 161 | def deleteExistingBadDataTrainingFolder(self): 162 | 163 | """ 164 | Method Name: deleteExistingBadDataTrainingFolder 165 | Description: This method deletes the directory made to store the bad Data. 166 | Output: None 167 | On Failure: OSError 168 | 169 | Written By: iNeuron Intelligence 170 | Version: 1.0 171 | Revisions: None 172 | 173 | """ 174 | 175 | try: 176 | path = 'Training_Raw_files_validated/' 177 | if os.path.isdir(path + 'Bad_Raw/'): 178 | shutil.rmtree(path + 'Bad_Raw/') 179 | file = open("Training_Logs/GeneralLog.txt", 'a+') 180 | self.logger.log(file,"BadRaw directory deleted before starting validation!!!") 181 | file.close() 182 | except OSError as s: 183 | file = open("Training_Logs/GeneralLog.txt", 'a+') 184 | self.logger.log(file,"Error while Deleting Directory : %s" %s) 185 | file.close() 186 | raise OSError 187 | 188 | def moveBadFilesToArchiveBad(self): 189 | 190 | """ 191 | Method Name: moveBadFilesToArchiveBad 192 | Description: This method deletes the directory made to store the Bad Data 193 | after moving the data in an archive folder. We archive the bad 194 | files to send them back to the client for invalid data issue. 195 | Output: None 196 | On Failure: OSError 197 | 198 | Written By: iNeuron Intelligence 199 | Version: 1.0 200 | Revisions: None 201 | 202 | """ 203 | now = datetime.now() 204 | date = now.date() 205 | time = now.strftime("%H%M%S") 206 | try: 207 | 208 | source = 'Training_Raw_files_validated/Bad_Raw/' 209 | if os.path.isdir(source): 210 | path = "TrainingArchiveBadData" 211 | if not os.path.isdir(path): 212 | os.makedirs(path) 213 | dest = 'TrainingArchiveBadData/BadData_' + str(date)+"_"+str(time) 214 | if not os.path.isdir(dest): 215 | os.makedirs(dest) 216 | files = os.listdir(source) 217 | for f in files: 218 | if f not in os.listdir(dest): 219 | shutil.move(source + f, dest) 220 | file = open("Training_Logs/GeneralLog.txt", 'a+') 221 | self.logger.log(file,"Bad files moved to archive") 222 | path = 'Training_Raw_files_validated/' 223 | if os.path.isdir(path + 'Bad_Raw/'): 224 | shutil.rmtree(path + 'Bad_Raw/') 225 | self.logger.log(file,"Bad Raw Data Folder Deleted successfully!!") 226 | file.close() 227 | except Exception as e: 228 | file = open("Training_Logs/GeneralLog.txt", 'a+') 229 | self.logger.log(file, "Error while moving bad files to archive:: %s" % e) 230 | file.close() 231 | raise e 232 | 233 | 234 | 235 | 236 | def validationFileNameRaw(self,regex,LengthOfDateStampInFile,LengthOfTimeStampInFile): 237 | """ 238 | Method Name: validationFileNameRaw 239 | Description: This function validates the name of the training csv files as per given name in the schema! 240 | Regex pattern is used to do the validation.If name format do not match the file is moved 241 | to Bad Raw Data folder else in Good raw data. 242 | Output: None 243 | On Failure: Exception 244 | 245 | Written By: iNeuron Intelligence 246 | Version: 1.0 247 | Revisions: None 248 | 249 | """ 250 | 251 | #pattern = "['Wafer']+['\_'']+[\d_]+[\d]+\.csv" 252 | # delete the directories for good and bad data in case last run was unsuccessful and folders were not deleted. 253 | self.deleteExistingBadDataTrainingFolder() 254 | self.deleteExistingGoodDataTrainingFolder() 255 | 256 | onlyfiles = [f for f in listdir(self.Batch_Directory)] 257 | try: 258 | # create new directories 259 | self.createDirectoryForGoodBadRawData() 260 | f = open("Training_Logs/nameValidationLog.txt", 'a+') 261 | for filename in onlyfiles: 262 | if (re.match(regex, filename)): 263 | splitAtDot = re.split('.csv', filename) 264 | splitAtDot = (re.split('_', splitAtDot[0])) 265 | if len(splitAtDot[1]) == LengthOfDateStampInFile: 266 | if len(splitAtDot[2]) == LengthOfTimeStampInFile: 267 | shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Good_Raw") 268 | self.logger.log(f,"Valid File name!! File moved to GoodRaw Folder :: %s" % filename) 269 | 270 | else: 271 | shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw") 272 | self.logger.log(f,"Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename) 273 | else: 274 | shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw") 275 | self.logger.log(f,"Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename) 276 | else: 277 | shutil.copy("Training_Batch_Files/" + filename, "Training_Raw_files_validated/Bad_Raw") 278 | self.logger.log(f, "Invalid File Name!! File moved to Bad Raw Folder :: %s" % filename) 279 | 280 | f.close() 281 | 282 | except Exception as e: 283 | f = open("Training_Logs/nameValidationLog.txt", 'a+') 284 | self.logger.log(f, "Error occured while validating FileName %s" % e) 285 | f.close() 286 | raise e 287 | 288 | 289 | 290 | 291 | def validateColumnLength(self,NumberofColumns): 292 | """ 293 | Method Name: validateColumnLength 294 | Description: This function validates the number of columns in the csv files. 295 | It is should be same as given in the schema file. 296 | If not same file is not suitable for processing and thus is moved to Bad Raw Data folder. 297 | If the column number matches, file is kept in Good Raw Data for processing. 298 | The csv file is missing the first column name, this function changes the missing name to "Wafer". 299 | Output: None 300 | On Failure: Exception 301 | 302 | Written By: iNeuron Intelligence 303 | Version: 1.0 304 | Revisions: None 305 | 306 | """ 307 | try: 308 | f = open("Training_Logs/columnValidationLog.txt", 'a+') 309 | self.logger.log(f,"Column Length Validation Started!!") 310 | for file in listdir('Training_Raw_files_validated/Good_Raw/'): 311 | csv = pd.read_csv("Training_Raw_files_validated/Good_Raw/" + file) 312 | if csv.shape[1] == NumberofColumns: 313 | pass 314 | else: 315 | shutil.move("Training_Raw_files_validated/Good_Raw/" + file, "Training_Raw_files_validated/Bad_Raw") 316 | self.logger.log(f, "Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s" % file) 317 | self.logger.log(f, "Column Length Validation Completed!!") 318 | except OSError: 319 | f = open("Training_Logs/columnValidationLog.txt", 'a+') 320 | self.logger.log(f, "Error Occured while moving the file :: %s" % OSError) 321 | f.close() 322 | raise OSError 323 | except Exception as e: 324 | f = open("Training_Logs/columnValidationLog.txt", 'a+') 325 | self.logger.log(f, "Error Occured:: %s" % e) 326 | f.close() 327 | raise e 328 | f.close() 329 | 330 | def validateMissingValuesInWholeColumn(self): 331 | """ 332 | Method Name: validateMissingValuesInWholeColumn 333 | Description: This function validates if any column in the csv file has all values missing. 334 | If all the values are missing, the file is not suitable for processing. 335 | SUch files are moved to bad raw data. 336 | Output: None 337 | On Failure: Exception 338 | 339 | Written By: iNeuron Intelligence 340 | Version: 1.0 341 | Revisions: None 342 | 343 | """ 344 | try: 345 | f = open("Training_Logs/missingValuesInColumn.txt", 'a+') 346 | self.logger.log(f,"Missing Values Validation Started!!") 347 | 348 | for file in listdir('Training_Raw_files_validated/Good_Raw/'): 349 | csv = pd.read_csv("Training_Raw_files_validated/Good_Raw/" + file) 350 | count = 0 351 | for columns in csv: 352 | if (len(csv[columns]) - csv[columns].count()) == len(csv[columns]): 353 | count+=1 354 | shutil.move("Training_Raw_files_validated/Good_Raw/" + file, 355 | "Training_Raw_files_validated/Bad_Raw") 356 | self.logger.log(f,"Invalid Column Length for the file!! File moved to Bad Raw Folder :: %s" % file) 357 | break 358 | if count==0: 359 | #csv.rename(columns={"Unnamed: 0": "Wafer"}, inplace=True) 360 | csv.to_csv("Training_Raw_files_validated/Good_Raw/" + file, index=None, header=True) 361 | except OSError: 362 | f = open("Training_Logs/missingValuesInColumn.txt", 'a+') 363 | self.logger.log(f, "Error Occured while moving the file :: %s" % OSError) 364 | f.close() 365 | raise OSError 366 | except Exception as e: 367 | f = open("Training_Logs/missingValuesInColumn.txt", 'a+') 368 | self.logger.log(f, "Error Occured:: %s" % e) 369 | f.close() 370 | raise e 371 | f.close() 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | -------------------------------------------------------------------------------- /__pycache__/predictFromModel.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/__pycache__/predictFromModel.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/prediction_Validation_Insertion.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/__pycache__/prediction_Validation_Insertion.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/trainingModel.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/__pycache__/trainingModel.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/training_Validation_Insertion.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/__pycache__/training_Validation_Insertion.cpython-37.pyc -------------------------------------------------------------------------------- /application_logging/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/application_logging/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /application_logging/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/application_logging/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /application_logging/logger.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | 4 | class App_Logger: 5 | def __init__(self): 6 | pass 7 | 8 | def log(self, file_object, log_message): 9 | self.now = datetime.now() 10 | self.date = self.now.date() 11 | self.current_time = self.now.strftime("%H:%M:%S") 12 | file_object.write( 13 | str(self.date) + "/" + str(self.current_time) + "\t\t" + log_message +"\n") 14 | -------------------------------------------------------------------------------- /best_model_finder/__pycache__/tuner.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/best_model_finder/__pycache__/tuner.cpython-36.pyc -------------------------------------------------------------------------------- /best_model_finder/__pycache__/tuner.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/best_model_finder/__pycache__/tuner.cpython-37.pyc -------------------------------------------------------------------------------- /best_model_finder/tuner.py: -------------------------------------------------------------------------------- 1 | from sklearn.svm import SVC 2 | from sklearn.model_selection import GridSearchCV 3 | from xgboost import XGBClassifier 4 | from sklearn.metrics import roc_auc_score,accuracy_score 5 | 6 | class Model_Finder: 7 | """ 8 | This class shall be used to find the model with best accuracy and AUC score. 9 | Written By: iNeuron Intelligence 10 | Version: 1.0 11 | Revisions: None 12 | 13 | """ 14 | 15 | def __init__(self,file_object,logger_object): 16 | self.file_object = file_object 17 | self.logger_object = logger_object 18 | self.sv_classifier=SVC() 19 | self.xgb = XGBClassifier(objective='binary:logistic',n_jobs=-1) 20 | 21 | def get_best_params_for_svm(self,train_x,train_y): 22 | """ 23 | Method Name: get_best_params_for_naive_bayes 24 | Description: get the parameters for the SVM Algorithm which give the best accuracy. 25 | Use Hyper Parameter Tuning. 26 | Output: The model with the best parameters 27 | On Failure: Raise Exception 28 | 29 | Written By: iNeuron Intelligence 30 | Version: 1.0 31 | Revisions: None 32 | 33 | """ 34 | self.logger_object.log(self.file_object, 'Entered the get_best_params_for_svm method of the Model_Finder class') 35 | try: 36 | # initializing with different combination of parameters 37 | self.param_grid = {"kernel": ['rbf', 'sigmoid'], 38 | "C": [0.1, 0.5, 1.0], 39 | "random_state": [0, 100, 200, 300]} 40 | 41 | #Creating an object of the Grid Search class 42 | self.grid = GridSearchCV(estimator=self.sv_classifier, param_grid=self.param_grid, cv=5, verbose=3) 43 | #finding the best parameters 44 | self.grid.fit(train_x, train_y) 45 | 46 | #extracting the best parameters 47 | self.kernel = self.grid.best_params_['kernel'] 48 | self.C = self.grid.best_params_['C'] 49 | self.random_state = self.grid.best_params_['random_state'] 50 | 51 | 52 | #creating a new model with the best parameters 53 | self.sv_classifier = SVC(kernel=self.kernel,C=self.C,random_state=self.random_state) 54 | # training the mew model 55 | self.sv_classifier.fit(train_x, train_y) 56 | self.logger_object.log(self.file_object, 57 | 'SVM best params: '+str(self.grid.best_params_)+'. Exited the get_best_params_for_svm method of the Model_Finder class') 58 | 59 | return self.sv_classifier 60 | except Exception as e: 61 | self.logger_object.log(self.file_object, 62 | 'Exception occured in get_best_params_for_svm method of the Model_Finder class. Exception message: ' + str( 63 | e)) 64 | self.logger_object.log(self.file_object, 65 | 'SVM training failed. Exited the get_best_params_for_svm method of the Model_Finder class') 66 | raise Exception() 67 | 68 | def get_best_params_for_xgboost(self,train_x,train_y): 69 | 70 | """ 71 | Method Name: get_best_params_for_xgboost 72 | Description: get the parameters for XGBoost Algorithm which give the best accuracy. 73 | Use Hyper Parameter Tuning. 74 | Output: The model with the best parameters 75 | On Failure: Raise Exception 76 | 77 | Written By: iNeuron Intelligence 78 | Version: 1.0 79 | Revisions: None 80 | 81 | """ 82 | self.logger_object.log(self.file_object, 83 | 'Entered the get_best_params_for_xgboost method of the Model_Finder class') 84 | try: 85 | # initializing with different combination of parameters 86 | self.param_grid_xgboost = { 87 | 88 | "n_estimators": [100, 130], "criterion": ['gini', 'entropy'], 89 | "max_depth": range(8, 10, 1) 90 | 91 | } 92 | # Creating an object of the Grid Search class 93 | self.grid= GridSearchCV(XGBClassifier(objective='binary:logistic'),self.param_grid_xgboost, verbose=3,cv=5) 94 | # finding the best parameters 95 | self.grid.fit(train_x, train_y) 96 | 97 | # extracting the best parameters 98 | self.criterion = self.grid.best_params_['criterion'] 99 | self.max_depth = self.grid.best_params_['max_depth'] 100 | self.n_estimators = self.grid.best_params_['n_estimators'] 101 | 102 | # creating a new model with the best parameters 103 | self.xgb = XGBClassifier(criterion=self.criterion, max_depth=self.max_depth,n_estimators= self.n_estimators, n_jobs=-1 ) 104 | # training the mew model 105 | self.xgb.fit(train_x, train_y) 106 | self.logger_object.log(self.file_object, 107 | 'XGBoost best params: ' + str( 108 | self.grid.best_params_) + '. Exited the get_best_params_for_xgboost method of the Model_Finder class') 109 | return self.xgb 110 | except Exception as e: 111 | self.logger_object.log(self.file_object, 112 | 'Exception occured in get_best_params_for_xgboost method of the Model_Finder class. Exception message: ' + str( 113 | e)) 114 | self.logger_object.log(self.file_object, 115 | 'XGBoost Parameter tuning failed. Exited the get_best_params_for_xgboost method of the Model_Finder class') 116 | raise Exception() 117 | 118 | 119 | def get_best_model(self,train_x,train_y,test_x,test_y): 120 | """ 121 | Method Name: get_best_model 122 | Description: Find out the Model which has the best AUC score. 123 | Output: The best model name and the model object 124 | On Failure: Raise Exception 125 | 126 | Written By: iNeuron Intelligence 127 | Version: 1.0 128 | Revisions: None 129 | 130 | """ 131 | self.logger_object.log(self.file_object, 132 | 'Entered the get_best_model method of the Model_Finder class') 133 | # create best model for XGBoost 134 | try: 135 | self.xgboost= self.get_best_params_for_xgboost(train_x,train_y) 136 | self.prediction_xgboost = self.xgboost.predict(test_x) # Predictions using the XGBoost Model 137 | 138 | if len(test_y.unique()) == 1: #if there is only one label in y, then roc_auc_score returns error. We will use accuracy in that case 139 | self.xgboost_score = accuracy_score(test_y, self.prediction_xgboost) 140 | self.logger_object.log(self.file_object, 'Accuracy for XGBoost:' + str(self.xgboost_score)) # Log AUC 141 | else: 142 | self.xgboost_score = roc_auc_score(test_y, self.prediction_xgboost) # AUC for XGBoost 143 | self.logger_object.log(self.file_object, 'AUC for XGBoost:' + str(self.xgboost_score)) # Log AUC 144 | 145 | # create best model for Random Forest 146 | self.svm=self.get_best_params_for_svm(train_x,train_y) 147 | self.prediction_svm=self.svm.predict(test_x) # prediction using the SVM Algorithm 148 | 149 | if len(test_y.unique()) == 1:#if there is only one label in y, then roc_auc_score returns error. We will use accuracy in that case 150 | self.svm_score = accuracy_score(test_y,self.prediction_svm) 151 | self.logger_object.log(self.file_object, 'Accuracy for SVM:' + str(self.svm_score)) 152 | else: 153 | self.svm_score = roc_auc_score(test_y, self.prediction_svm) # AUC for Random Forest 154 | self.logger_object.log(self.file_object, 'AUC for SVM:' + str(self.svm_score)) 155 | 156 | #comparing the two models 157 | if(self.svm_score < self.xgboost_score): 158 | return 'XGBoost',self.xgboost 159 | else: 160 | return 'SVM',self.sv_classifier 161 | 162 | except Exception as e: 163 | self.logger_object.log(self.file_object, 164 | 'Exception occured in get_best_model method of the Model_Finder class. Exception message: ' + str( 165 | e)) 166 | self.logger_object.log(self.file_object, 167 | 'Model Selection Failed. Exited the get_best_model method of the Model_Finder class') 168 | raise Exception() 169 | 170 | -------------------------------------------------------------------------------- /data_ingestion/__pycache__/data_loader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/data_ingestion/__pycache__/data_loader.cpython-36.pyc -------------------------------------------------------------------------------- /data_ingestion/__pycache__/data_loader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/data_ingestion/__pycache__/data_loader.cpython-37.pyc -------------------------------------------------------------------------------- /data_ingestion/__pycache__/data_loader_prediction.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/data_ingestion/__pycache__/data_loader_prediction.cpython-36.pyc -------------------------------------------------------------------------------- /data_ingestion/__pycache__/data_loader_prediction.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/data_ingestion/__pycache__/data_loader_prediction.cpython-37.pyc -------------------------------------------------------------------------------- /data_ingestion/data_loader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | class Data_Getter: 4 | """ 5 | This class shall be used for obtaining the data from the source for training. 6 | 7 | Written By: iNeuron Intelligence 8 | Version: 1.0 9 | Revisions: None 10 | 11 | """ 12 | def __init__(self, file_object, logger_object): 13 | self.training_file='Training_FileFromDB/InputFile.csv' 14 | self.file_object=file_object 15 | self.logger_object=logger_object 16 | 17 | def get_data(self): 18 | """ 19 | Method Name: get_data 20 | Description: This method reads the data from source. 21 | Output: A pandas DataFrame. 22 | On Failure: Raise Exception 23 | 24 | Written By: iNeuron Intelligence 25 | Version: 1.0 26 | Revisions: None 27 | 28 | """ 29 | self.logger_object.log(self.file_object,'Entered the get_data method of the Data_Getter class') 30 | try: 31 | self.data= pd.read_csv(self.training_file) # reading the data file 32 | self.logger_object.log(self.file_object,'Data Load Successful.Exited the get_data method of the Data_Getter class') 33 | return self.data 34 | except Exception as e: 35 | self.logger_object.log(self.file_object,'Exception occured in get_data method of the Data_Getter class. Exception message: '+str(e)) 36 | self.logger_object.log(self.file_object, 37 | 'Data Load Unsuccessful.Exited the get_data method of the Data_Getter class') 38 | raise Exception() 39 | 40 | 41 | -------------------------------------------------------------------------------- /data_ingestion/data_loader_prediction.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | class Data_Getter_Pred: 4 | """ 5 | This class shall be used for obtaining the data from the source for prediction. 6 | 7 | Written By: iNeuron Intelligence 8 | Version: 1.0 9 | Revisions: None 10 | 11 | """ 12 | def __init__(self, file_object, logger_object): 13 | self.prediction_file='Prediction_FileFromDB/InputFile.csv' 14 | self.file_object=file_object 15 | self.logger_object=logger_object 16 | 17 | def get_data(self): 18 | """ 19 | Method Name: get_data 20 | Description: This method reads the data from source. 21 | Output: A pandas DataFrame. 22 | On Failure: Raise Exception 23 | 24 | Written By: iNeuron Intelligence 25 | Version: 1.0 26 | Revisions: None 27 | 28 | """ 29 | self.logger_object.log(self.file_object,'Entered the get_data method of the Data_Getter class') 30 | try: 31 | self.data= pd.read_csv(self.prediction_file) # reading the data file 32 | self.logger_object.log(self.file_object,'Data Load Successful.Exited the get_data method of the Data_Getter class') 33 | return self.data 34 | except Exception as e: 35 | self.logger_object.log(self.file_object,'Exception occured in get_data method of the Data_Getter class. Exception message: '+str(e)) 36 | self.logger_object.log(self.file_object, 37 | 'Data Load Unsuccessful.Exited the get_data method of the Data_Getter class') 38 | raise Exception() 39 | 40 | 41 | -------------------------------------------------------------------------------- /data_preprocessing/__pycache__/clustering.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/data_preprocessing/__pycache__/clustering.cpython-36.pyc -------------------------------------------------------------------------------- /data_preprocessing/__pycache__/clustering.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/data_preprocessing/__pycache__/clustering.cpython-37.pyc -------------------------------------------------------------------------------- /data_preprocessing/__pycache__/preprocessing.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/data_preprocessing/__pycache__/preprocessing.cpython-36.pyc -------------------------------------------------------------------------------- /data_preprocessing/__pycache__/preprocessing.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/data_preprocessing/__pycache__/preprocessing.cpython-37.pyc -------------------------------------------------------------------------------- /data_preprocessing/__pycache__/preprocessing_pred.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/data_preprocessing/__pycache__/preprocessing_pred.cpython-36.pyc -------------------------------------------------------------------------------- /data_preprocessing/clustering.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from sklearn.cluster import KMeans 3 | from kneed import KneeLocator 4 | from file_operations import file_methods 5 | 6 | class KMeansClustering: 7 | """ 8 | This class shall be used to divide the data into clusters before training. 9 | 10 | Written By: iNeuron Intelligence 11 | Version: 1.0 12 | Revisions: None 13 | 14 | """ 15 | 16 | def __init__(self, file_object, logger_object): 17 | self.file_object = file_object 18 | self.logger_object = logger_object 19 | 20 | def elbow_plot(self,data): 21 | """ 22 | Method Name: elbow_plot 23 | Description: This method saves the plot to decide the optimum number of clusters to the file. 24 | Output: A picture saved to the directory 25 | On Failure: Raise Exception 26 | 27 | Written By: iNeuron Intelligence 28 | Version: 1.0 29 | Revisions: None 30 | 31 | """ 32 | self.logger_object.log(self.file_object, 'Entered the elbow_plot method of the KMeansClustering class') 33 | wcss=[] # initializing an empty list 34 | try: 35 | for i in range (1,11): 36 | kmeans=KMeans(n_clusters=i,init='k-means++',random_state=42) # initializing the KMeans object 37 | kmeans.fit(data) # fitting the data to the KMeans Algorithm 38 | wcss.append(kmeans.inertia_) 39 | plt.plot(range(1,11),wcss) # creating the graph between WCSS and the number of clusters 40 | plt.title('The Elbow Method') 41 | plt.xlabel('Number of clusters') 42 | plt.ylabel('WCSS') 43 | #plt.show() 44 | plt.savefig('preprocessing_data/K-Means_Elbow.PNG') # saving the elbow plot locally 45 | # finding the value of the optimum cluster programmatically 46 | self.kn = KneeLocator(range(1, 11), wcss, curve='convex', direction='decreasing') 47 | self.logger_object.log(self.file_object, 'The optimum number of clusters is: '+str(self.kn.knee)+' . Exited the elbow_plot method of the KMeansClustering class') 48 | return self.kn.knee 49 | 50 | except Exception as e: 51 | self.logger_object.log(self.file_object,'Exception occured in elbow_plot method of the KMeansClustering class. Exception message: ' + str(e)) 52 | self.logger_object.log(self.file_object,'Finding the number of clusters failed. Exited the elbow_plot method of the KMeansClustering class') 53 | raise Exception() 54 | 55 | def create_clusters(self,data,number_of_clusters): 56 | """ 57 | Method Name: create_clusters 58 | Description: Create a new dataframe consisting of the cluster information. 59 | Output: A datframe with cluster column 60 | On Failure: Raise Exception 61 | 62 | Written By: iNeuron Intelligence 63 | Version: 1.0 64 | Revisions: None 65 | 66 | """ 67 | self.logger_object.log(self.file_object, 'Entered the create_clusters method of the KMeansClustering class') 68 | self.data=data 69 | try: 70 | self.kmeans = KMeans(n_clusters=number_of_clusters, init='k-means++', random_state=42) 71 | #self.data = self.data[~self.data.isin([np.nan, np.inf, -np.inf]).any(1)] 72 | self.y_kmeans=self.kmeans.fit_predict(data) # divide data into clusters 73 | 74 | self.file_op = file_methods.File_Operation(self.file_object,self.logger_object) 75 | self.save_model = self.file_op.save_model(self.kmeans, 'KMeans') # saving the KMeans model to directory 76 | # passing 'Model' as the functions need three parameters 77 | 78 | self.data['Cluster']=self.y_kmeans # create a new column in dataset for storing the cluster information 79 | self.logger_object.log(self.file_object, 'succesfully created '+str(self.kn.knee)+ 'clusters. Exited the create_clusters method of the KMeansClustering class') 80 | return self.data 81 | except Exception as e: 82 | self.logger_object.log(self.file_object,'Exception occured in create_clusters method of the KMeansClustering class. Exception message: ' + str(e)) 83 | self.logger_object.log(self.file_object,'Fitting the data to clusters failed. Exited the create_clusters method of the KMeansClustering class') 84 | raise Exception() -------------------------------------------------------------------------------- /data_preprocessing/preprocessing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.impute import KNNImputer 4 | from sklearn_pandas import CategoricalImputer 5 | 6 | 7 | 8 | 9 | class Preprocessor: 10 | """ 11 | This class shall be used to clean and transform the data before training. 12 | 13 | Written By: iNeuron Intelligence 14 | Version: 1.0 15 | Revisions: None 16 | 17 | """ 18 | 19 | def __init__(self, file_object, logger_object): 20 | self.file_object = file_object 21 | self.logger_object = logger_object 22 | 23 | def remove_columns(self,data,columns): 24 | """ 25 | Method Name: remove_columns 26 | Description: This method removes the given columns from a pandas dataframe. 27 | Output: A pandas DataFrame after removing the specified columns. 28 | On Failure: Raise Exception 29 | 30 | Written By: iNeuron Intelligence 31 | Version: 1.0 32 | Revisions: None 33 | 34 | """ 35 | self.logger_object.log(self.file_object, 'Entered the remove_columns method of the Preprocessor class') 36 | self.data=data 37 | self.columns=columns 38 | try: 39 | self.useful_data=self.data.drop(labels=self.columns, axis=1) # drop the labels specified in the columns 40 | self.logger_object.log(self.file_object, 41 | 'Column removal Successful.Exited the remove_columns method of the Preprocessor class') 42 | return self.useful_data 43 | except Exception as e: 44 | self.logger_object.log(self.file_object,'Exception occured in remove_columns method of the Preprocessor class. Exception message: '+str(e)) 45 | self.logger_object.log(self.file_object, 46 | 'Column removal Unsuccessful. Exited the remove_columns method of the Preprocessor class') 47 | raise Exception() 48 | 49 | def separate_label_feature(self, data, label_column_name): 50 | """ 51 | Method Name: separate_label_feature 52 | Description: This method separates the features and a Label Coulmns. 53 | Output: Returns two separate Dataframes, one containing features and the other containing Labels . 54 | On Failure: Raise Exception 55 | 56 | Written By: iNeuron Intelligence 57 | Version: 1.0 58 | Revisions: None 59 | 60 | """ 61 | self.logger_object.log(self.file_object, 'Entered the separate_label_feature method of the Preprocessor class') 62 | try: 63 | self.X=data.drop(labels=label_column_name,axis=1) # drop the columns specified and separate the feature columns 64 | self.Y=data[label_column_name] # Filter the Label columns 65 | self.logger_object.log(self.file_object, 66 | 'Label Separation Successful. Exited the separate_label_feature method of the Preprocessor class') 67 | return self.X,self.Y 68 | except Exception as e: 69 | self.logger_object.log(self.file_object,'Exception occured in separate_label_feature method of the Preprocessor class. Exception message: ' + str(e)) 70 | self.logger_object.log(self.file_object, 'Label Separation Unsuccessful. Exited the separate_label_feature method of the Preprocessor class') 71 | raise Exception() 72 | 73 | def dropUnnecessaryColumns(self,data,columnNameList): 74 | """ 75 | Method Name: is_null_present 76 | Description: This method drops the unwanted columns as discussed in EDA section. 77 | 78 | Written By: iNeuron Intelligence 79 | Version: 1.0 80 | Revisions: None 81 | 82 | """ 83 | data = data.drop(columnNameList,axis=1) 84 | return data 85 | 86 | 87 | def replaceInvalidValuesWithNull(self,data): 88 | 89 | """ 90 | Method Name: is_null_present 91 | Description: This method replaces invalid values i.e. '?' with null, as discussed in EDA. 92 | 93 | Written By: iNeuron Intelligence 94 | Version: 1.0 95 | Revisions: None 96 | 97 | """ 98 | 99 | for column in data.columns: 100 | count = data[column][data[column] == '?'].count() 101 | if count != 0: 102 | data[column] = data[column].replace('?', np.nan) 103 | return data 104 | 105 | def is_null_present(self,data): 106 | """ 107 | Method Name: is_null_present 108 | Description: This method checks whether there are null values present in the pandas Dataframe or not. 109 | Output: Returns True if null values are present in the DataFrame, False if they are not present and 110 | returns the list of columns for which null values are present. 111 | On Failure: Raise Exception 112 | 113 | Written By: iNeuron Intelligence 114 | Version: 1.0 115 | Revisions: None 116 | 117 | """ 118 | self.logger_object.log(self.file_object, 'Entered the is_null_present method of the Preprocessor class') 119 | self.null_present = False 120 | self.cols_with_missing_values=[] 121 | self.cols = data.columns 122 | try: 123 | self.null_counts=data.isna().sum() # check for the count of null values per column 124 | for i in range(len(self.null_counts)): 125 | if self.null_counts[i]>0: 126 | self.null_present=True 127 | self.cols_with_missing_values.append(self.cols[i]) 128 | if(self.null_present): # write the logs to see which columns have null values 129 | self.dataframe_with_null = pd.DataFrame() 130 | self.dataframe_with_null['columns'] = data.columns 131 | self.dataframe_with_null['missing values count'] = np.asarray(data.isna().sum()) 132 | self.dataframe_with_null.to_csv('preprocessing_data/null_values.csv') # storing the null column information to file 133 | self.logger_object.log(self.file_object,'Finding missing values is a success.Data written to the null values file. Exited the is_null_present method of the Preprocessor class') 134 | return self.null_present, self.cols_with_missing_values 135 | except Exception as e: 136 | self.logger_object.log(self.file_object,'Exception occured in is_null_present method of the Preprocessor class. Exception message: ' + str(e)) 137 | self.logger_object.log(self.file_object,'Finding missing values failed. Exited the is_null_present method of the Preprocessor class') 138 | raise Exception() 139 | 140 | def encodeCategoricalValues(self,data): 141 | """ 142 | Method Name: encodeCategoricalValues 143 | Description: This method encodes all the categorical values in the training set. 144 | Output: A Dataframe which has all the categorical values encoded. 145 | On Failure: Raise Exception 146 | 147 | Written By: iNeuron Intelligence 148 | Version: 1.0 149 | Revisions: None 150 | """ 151 | data["class"] = data["class"].map({'p': 1, 'e': 2}) 152 | 153 | for column in data.drop(['class'],axis=1).columns: 154 | data = pd.get_dummies(data, columns=[column]) 155 | 156 | return data 157 | 158 | 159 | def encodeCategoricalValuesPrediction(self,data): 160 | """ 161 | Method Name: encodeCategoricalValuesPrediction 162 | Description: This method encodes all the categorical values in the prediction set. 163 | Output: A Dataframe which has all the categorical values encoded. 164 | On Failure: Raise Exception 165 | 166 | Written By: iNeuron Intelligence 167 | Version: 1.0 168 | Revisions: None 169 | """ 170 | 171 | for column in data.columns: 172 | data = pd.get_dummies(data, columns=[column]) 173 | 174 | return data 175 | 176 | # def handleImbalanceDataset(self,X,Y): 177 | # """ 178 | # Method Name: handleImbalanceDataset 179 | # Description: This method handles the imbalance in the dataset by oversampling. 180 | # Output: A Dataframe which is balanced now. 181 | # On Failure: Raise Exception 182 | # 183 | # Written By: iNeuron Intelligence 184 | # Version: 1.0 185 | # Revisions: None 186 | # """ 187 | # 188 | # 189 | # 190 | # rdsmple = RandomOverSampler() 191 | # x_sampled, y_sampled = rdsmple.fit_sample(X, Y) 192 | # 193 | # return x_sampled,y_sampled 194 | 195 | def impute_missing_values(self, data, cols_with_missing_values): 196 | """ 197 | Method Name: impute_missing_values 198 | Description: This method replaces all the missing values in the Dataframe using KNN Imputer. 199 | Output: A Dataframe which has all the missing values imputed. 200 | On Failure: Raise Exception 201 | 202 | Written By: iNeuron Intelligence 203 | Version: 1.0 204 | Revisions: None 205 | """ 206 | self.logger_object.log(self.file_object, 'Entered the impute_missing_values method of the Preprocessor class') 207 | self.data= data 208 | self.cols_with_missing_values=cols_with_missing_values 209 | try: 210 | self.imputer = CategoricalImputer() 211 | for col in self.cols_with_missing_values: 212 | self.data[col] = self.imputer.fit_transform(self.data[col]) 213 | self.logger_object.log(self.file_object, 'Imputing missing values Successful. Exited the impute_missing_values method of the Preprocessor class') 214 | return self.data 215 | except Exception as e: 216 | self.logger_object.log(self.file_object,'Exception occured in impute_missing_values method of the Preprocessor class. Exception message: ' + str(e)) 217 | self.logger_object.log(self.file_object,'Imputing missing values failed. Exited the impute_missing_values method of the Preprocessor class') 218 | raise Exception() 219 | 220 | def get_columns_with_zero_std_deviation(self,data): 221 | """ 222 | Method Name: get_columns_with_zero_std_deviation 223 | Description: This method finds out the columns which have a standard deviation of zero. 224 | Output: List of the columns with standard deviation of zero 225 | On Failure: Raise Exception 226 | 227 | Written By: iNeuron Intelligence 228 | Version: 1.0 229 | Revisions: None 230 | """ 231 | self.logger_object.log(self.file_object, 'Entered the get_columns_with_zero_std_deviation method of the Preprocessor class') 232 | self.columns=data.columns 233 | self.data_n = data.describe() 234 | self.col_to_drop=[] 235 | try: 236 | for x in self.columns: 237 | if (self.data_n[x]['std'] == 0): # check if standard deviation is zero 238 | self.col_to_drop.append(x) # prepare the list of columns with standard deviation zero 239 | self.logger_object.log(self.file_object, 'Column search for Standard Deviation of Zero Successful. Exited the get_columns_with_zero_std_deviation method of the Preprocessor class') 240 | return self.col_to_drop 241 | 242 | except Exception as e: 243 | self.logger_object.log(self.file_object,'Exception occured in get_columns_with_zero_std_deviation method of the Preprocessor class. Exception message: ' + str(e)) 244 | self.logger_object.log(self.file_object, 'Column search for Standard Deviation of Zero Failed. Exited the get_columns_with_zero_std_deviation method of the Preprocessor class') 245 | raise Exception() -------------------------------------------------------------------------------- /file_operations/__pycache__/file_methods.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/file_operations/__pycache__/file_methods.cpython-36.pyc -------------------------------------------------------------------------------- /file_operations/__pycache__/file_methods.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/file_operations/__pycache__/file_methods.cpython-37.pyc -------------------------------------------------------------------------------- /file_operations/file_methods.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | import shutil 4 | 5 | 6 | class File_Operation: 7 | """ 8 | This class shall be used to save the model after training 9 | and load the saved model for prediction. 10 | 11 | Written By: iNeuron Intelligence 12 | Version: 1.0 13 | Revisions: None 14 | 15 | """ 16 | def __init__(self,file_object,logger_object): 17 | self.file_object = file_object 18 | self.logger_object = logger_object 19 | self.model_directory='models/' 20 | 21 | def save_model(self,model,filename): 22 | """ 23 | Method Name: save_model 24 | Description: Save the model file to directory 25 | Outcome: File gets saved 26 | On Failure: Raise Exception 27 | 28 | Written By: iNeuron Intelligence 29 | Version: 1.0 30 | Revisions: None 31 | """ 32 | self.logger_object.log(self.file_object, 'Entered the save_model method of the File_Operation class') 33 | try: 34 | path = os.path.join(self.model_directory,filename) #create seperate directory for each cluster 35 | if os.path.isdir(path): #remove previously existing models for each clusters 36 | shutil.rmtree(self.model_directory) 37 | os.makedirs(path) 38 | else: 39 | os.makedirs(path) # 40 | with open(path +'/' + filename+'.sav', 41 | 'wb') as f: 42 | pickle.dump(model, f) # save the model to file 43 | self.logger_object.log(self.file_object, 44 | 'Model File '+filename+' saved. Exited the save_model method of the Model_Finder class') 45 | 46 | return 'success' 47 | except Exception as e: 48 | self.logger_object.log(self.file_object,'Exception occured in save_model method of the Model_Finder class. Exception message: ' + str(e)) 49 | self.logger_object.log(self.file_object, 50 | 'Model File '+filename+' could not be saved. Exited the save_model method of the Model_Finder class') 51 | raise Exception() 52 | 53 | def load_model(self,filename): 54 | """ 55 | Method Name: load_model 56 | Description: load the model file to memory 57 | Output: The Model file loaded in memory 58 | On Failure: Raise Exception 59 | 60 | Written By: iNeuron Intelligence 61 | Version: 1.0 62 | Revisions: None 63 | """ 64 | self.logger_object.log(self.file_object, 'Entered the load_model method of the File_Operation class') 65 | try: 66 | with open(self.model_directory + filename + '/' + filename + '.sav', 67 | 'rb') as f: 68 | self.logger_object.log(self.file_object, 69 | 'Model File ' + filename + ' loaded. Exited the load_model method of the Model_Finder class') 70 | return pickle.load(f) 71 | except Exception as e: 72 | self.logger_object.log(self.file_object, 73 | 'Exception occured in load_model method of the Model_Finder class. Exception message: ' + str( 74 | e)) 75 | self.logger_object.log(self.file_object, 76 | 'Model File ' + filename + ' could not be saved. Exited the load_model method of the Model_Finder class') 77 | raise Exception() 78 | 79 | def find_correct_model_file(self,cluster_number): 80 | """ 81 | Method Name: find_correct_model_file 82 | Description: Select the correct model based on cluster number 83 | Output: The Model file 84 | On Failure: Raise Exception 85 | 86 | Written By: iNeuron Intelligence 87 | Version: 1.0 88 | Revisions: None 89 | """ 90 | self.logger_object.log(self.file_object, 'Entered the find_correct_model_file method of the File_Operation class') 91 | try: 92 | self.cluster_number= cluster_number 93 | self.folder_name=self.model_directory 94 | self.list_of_model_files = [] 95 | self.list_of_files = os.listdir(self.folder_name) 96 | for self.file in self.list_of_files: 97 | try: 98 | if (self.file.index(str( self.cluster_number))!=-1): 99 | self.model_name=self.file 100 | except: 101 | continue 102 | self.model_name=self.model_name.split('.')[0] 103 | self.logger_object.log(self.file_object, 104 | 'Exited the find_correct_model_file method of the Model_Finder class.') 105 | return self.model_name 106 | except Exception as e: 107 | self.logger_object.log(self.file_object, 108 | 'Exception occured in find_correct_model_file method of the Model_Finder class. Exception message: ' + str( 109 | e)) 110 | self.logger_object.log(self.file_object, 111 | 'Exited the find_correct_model_file method of the Model_Finder class with Failure') 112 | raise Exception() -------------------------------------------------------------------------------- /flask_monitoringdashboard.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/flask_monitoringdashboard.db -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from wsgiref import simple_server 2 | from flask import Flask, request 3 | from flask import Response 4 | import os 5 | from flask_cors import CORS, cross_origin 6 | from prediction_Validation_Insertion import pred_validation 7 | from trainingModel import trainModel 8 | from training_Validation_Insertion import train_validation 9 | import flask_monitoringdashboard as dashboard 10 | from predictFromModel import prediction 11 | 12 | os.putenv('LANG', 'en_US.UTF-8') 13 | os.putenv('LC_ALL', 'en_US.UTF-8') 14 | 15 | app = Flask(__name__) 16 | dashboard.bind(app) 17 | CORS(app) 18 | 19 | 20 | 21 | @app.route("/predict", methods=['POST']) 22 | @cross_origin() 23 | def predictRouteClient(): 24 | try: 25 | if request.json['folderPath'] is not None: 26 | path = request.json['folderPath'] 27 | 28 | pred_val = pred_validation(path) #object initialization 29 | 30 | pred_val.prediction_validation() #calling the prediction_validation function 31 | 32 | pred = prediction(path) #object initialization 33 | 34 | # predicting for dataset present in database 35 | path = pred.predictionFromModel() 36 | return Response("Prediction File created at %s!!!" % path) 37 | 38 | except ValueError: 39 | return Response("Error Occurred! %s" %ValueError) 40 | except KeyError: 41 | return Response("Error Occurred! %s" %KeyError) 42 | except Exception as e: 43 | return Response("Error Occurred! %s" %e) 44 | 45 | 46 | 47 | @app.route("/train", methods=['POST']) 48 | @cross_origin() 49 | def trainRouteClient(): 50 | 51 | try: 52 | if request.json['folderPath'] is not None: 53 | path = request.json['folderPath'] 54 | train_valObj = train_validation(path) #object initialization 55 | 56 | train_valObj.train_validation()#calling the training_validation function 57 | 58 | 59 | trainModelObj = trainModel() #object initialization 60 | trainModelObj.trainingModel() #training the model for the files in the table 61 | 62 | 63 | except ValueError: 64 | 65 | return Response("Error Occurred! %s" % ValueError) 66 | 67 | except KeyError: 68 | 69 | return Response("Error Occurred! %s" % KeyError) 70 | 71 | except Exception as e: 72 | 73 | return Response("Error Occurred! %s" % e) 74 | return Response("Training successfull!!") 75 | 76 | port = int(os.getenv("PORT")) 77 | if __name__ == "__main__": 78 | host = '0.0.0.0' 79 | #port = 5000 80 | httpd = simple_server.make_server(host, port, app) 81 | print("Serving on %s %d" % (host, port)) 82 | httpd.serve_forever() 83 | -------------------------------------------------------------------------------- /manifest.yml: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | applications: 4 | - name: phisingClassifier 5 | memory: 2GB 6 | disk_quota: 1GB 7 | random-route: true 8 | parameters: 9 | memory: 2GB 10 | buildpack: 'python_buildpack' -------------------------------------------------------------------------------- /models/KMeans/KMeans.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/models/KMeans/KMeans.sav -------------------------------------------------------------------------------- /models/XGBoost0/XGBoost0.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/models/XGBoost0/XGBoost0.sav -------------------------------------------------------------------------------- /models/XGBoost1/XGBoost1.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/models/XGBoost1/XGBoost1.sav -------------------------------------------------------------------------------- /models/XGBoost2/XGBoost2.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/models/XGBoost2/XGBoost2.sav -------------------------------------------------------------------------------- /models/XGBoost3/XGBoost3.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/models/XGBoost3/XGBoost3.sav -------------------------------------------------------------------------------- /predictFromModel.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | from file_operations import file_methods 3 | from data_preprocessing import preprocessing 4 | from data_ingestion import data_loader_prediction 5 | from application_logging import logger 6 | from Prediction_Raw_Data_Validation.predictionDataValidation import Prediction_Data_validation 7 | 8 | 9 | 10 | class prediction: 11 | 12 | def __init__(self,path): 13 | self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') 14 | self.log_writer = logger.App_Logger() 15 | self.pred_data_val = Prediction_Data_validation(path) 16 | 17 | def predictionFromModel(self): 18 | 19 | try: 20 | self.pred_data_val.deletePredictionFile() #deletes the existing prediction file from last run! 21 | self.log_writer.log(self.file_object,'Start of Prediction') 22 | data_getter=data_loader_prediction.Data_Getter_Pred(self.file_object,self.log_writer) 23 | data=data_getter.get_data() 24 | 25 | #code change 26 | # wafer_names=data['Wafer'] 27 | # data=data.drop(labels=['Wafer'],axis=1) 28 | 29 | preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer) 30 | #data = preprocessor.dropUnnecessaryColumns(data,['veiltype']) 31 | 32 | # replacing '?' values with np.nan as discussed in the EDA part 33 | 34 | data = preprocessor.replaceInvalidValuesWithNull(data) 35 | 36 | is_null_present,cols_with_missing_values=preprocessor.is_null_present(data) 37 | if(is_null_present): 38 | data=preprocessor.impute_missing_values(data,cols_with_missing_values) 39 | 40 | # get encoded values for categorical data 41 | #data = preprocessor.encodeCategoricalValuesPrediction(data) 42 | 43 | #data=data.to_numpy() 44 | file_loader=file_methods.File_Operation(self.file_object,self.log_writer) 45 | kmeans=file_loader.load_model('KMeans') 46 | 47 | ##Code changed 48 | #pred_data = data.drop(['Wafer'],axis=1) 49 | clusters=kmeans.predict(data)#drops the first column for cluster prediction 50 | data['clusters']=clusters 51 | clusters=data['clusters'].unique() 52 | result=[] # initialize blank list for storing predicitons 53 | # with open('EncoderPickle/enc.pickle', 'rb') as file: #let's load the encoder pickle file to decode the values 54 | # encoder = pickle.load(file) 55 | 56 | for i in clusters: 57 | cluster_data= data[data['clusters']==i] 58 | cluster_data = cluster_data.drop(['clusters'],axis=1) 59 | model_name = file_loader.find_correct_model_file(i) 60 | model = file_loader.load_model(model_name) 61 | for val in (model.predict(cluster_data)): 62 | result.append(val) 63 | result = pandas.DataFrame(result,columns=['Predictions']) 64 | path="Prediction_Output_File/Predictions.csv" 65 | result.to_csv("Prediction_Output_File/Predictions.csv",header=True) #appends result to prediction file 66 | self.log_writer.log(self.file_object,'End of Prediction') 67 | except Exception as ex: 68 | self.log_writer.log(self.file_object, 'Error occured while running the prediction!! Error:: %s' % ex) 69 | raise ex 70 | return path 71 | 72 | # old code 73 | # i=0 74 | # for row in data: 75 | # cluster_number=kmeans.predict([row]) 76 | # model_name=file_loader.find_correct_model_file(cluster_number[0]) 77 | # 78 | # model=file_loader.load_model(model_name) 79 | # #row= sparse.csr_matrix(row) 80 | # result=model.predict([row]) 81 | # if (result[0]==-1): 82 | # category='Bad' 83 | # else: 84 | # category='Good' 85 | # self.predictions.write("Wafer-"+ str(wafer_names[i])+','+category+'\n') 86 | # i=i+1 87 | # self.log_writer.log(self.file_object,'The Prediction is :' +str(result)) 88 | # self.log_writer.log(self.file_object,'End of Prediction') 89 | #print(result) 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /prediction_Validation_Insertion.py: -------------------------------------------------------------------------------- 1 | from Prediction_Raw_Data_Validation.predictionDataValidation import Prediction_Data_validation 2 | from DataTypeValidation_Insertion_Prediction.DataTypeValidationPrediction import dBOperation 3 | from DataTransformation_Prediction.DataTransformationPrediction import dataTransformPredict 4 | from application_logging import logger 5 | 6 | class pred_validation: 7 | def __init__(self,path): 8 | self.raw_data = Prediction_Data_validation(path) 9 | self.dataTransform = dataTransformPredict() 10 | self.dBOperation = dBOperation() 11 | self.file_object = open("Prediction_Logs/Prediction_Log.txt", 'a+') 12 | self.log_writer = logger.App_Logger() 13 | 14 | def prediction_validation(self): 15 | 16 | try: 17 | 18 | self.log_writer.log(self.file_object,'Start of Validation on files for prediction!!') 19 | #extracting values from prediction schema 20 | LengthOfDateStampInFile,LengthOfTimeStampInFile,column_names,noofcolumns = self.raw_data.valuesFromSchema() 21 | #getting the regex defined to validate filename 22 | regex = self.raw_data.manualRegexCreation() 23 | #validating filename of prediction files 24 | self.raw_data.validationFileNameRaw(regex,LengthOfDateStampInFile,LengthOfTimeStampInFile) 25 | #validating column length in the file 26 | self.raw_data.validateColumnLength(noofcolumns) 27 | #validating if any column has all values missing 28 | self.raw_data.validateMissingValuesInWholeColumn() 29 | self.log_writer.log(self.file_object,"Raw Data Validation Complete!!") 30 | 31 | self.log_writer.log(self.file_object,("Starting Data Transforamtion!!")) 32 | #replacing blanks in the csv file with "Null" values to insert in table 33 | self.dataTransform.addQuotesToStringValuesInColumn() 34 | 35 | self.log_writer.log(self.file_object,"DataTransformation Completed!!!") 36 | 37 | self.log_writer.log(self.file_object,"Creating Prediction_Database and tables on the basis of given schema!!!") 38 | #create database with given name, if present open the connection! Create table with columns given in schema 39 | self.dBOperation.createTableDb('Prediction',column_names) 40 | self.log_writer.log(self.file_object,"Table creation Completed!!") 41 | self.log_writer.log(self.file_object,"Insertion of Data into Table started!!!!") 42 | #insert csv files in the table 43 | self.dBOperation.insertIntoTableGoodData('Prediction') 44 | self.log_writer.log(self.file_object,"Insertion in Table completed!!!") 45 | self.log_writer.log(self.file_object,"Deleting Good Data Folder!!!") 46 | #Delete the good data folder after loading files in table 47 | self.raw_data.deleteExistingGoodDataTrainingFolder() 48 | self.log_writer.log(self.file_object,"Good_Data folder deleted!!!") 49 | self.log_writer.log(self.file_object,"Moving bad files to Archive and deleting Bad_Data folder!!!") 50 | #Move the bad files to archive folder 51 | self.raw_data.moveBadFilesToArchiveBad() 52 | self.log_writer.log(self.file_object,"Bad files moved to archive!! Bad folder Deleted!!") 53 | self.log_writer.log(self.file_object,"Validation Operation completed!!") 54 | self.log_writer.log(self.file_object,"Extracting csv file from table") 55 | #export data in table to csvfile 56 | self.dBOperation.selectingDatafromtableintocsv('Prediction') 57 | 58 | except Exception as e: 59 | raise e 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /preprocessing_data/K-Means_Elbow.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/krishnaik06/Phising-Classifier/850f8a368eed5fa626ceafd2a4c5c61155ae0102/preprocessing_data/K-Means_Elbow.PNG -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | APScheduler==3.6.3 2 | attrs==19.3.0 3 | certifi==2019.11.28 4 | Click==7.0 5 | colorhash==1.0.2 6 | configparser==4.0.2 7 | cycler==0.10.0 8 | Flask==1.1.1 9 | Flask-Cors==3.0.8 10 | Flask-MonitoringDashboard==3.0.6 11 | imbalanced-learn==0.6.1 12 | imblearn==0.0 13 | importlib-metadata==1.4.0 14 | itsdangerous==1.1.0 15 | Jinja2==2.11.0 16 | joblib==0.14.1 17 | jsonschema==3.2.0 18 | kiwisolver==1.1.0 19 | kneed==0.5.1 20 | MarkupSafe==1.1.1 21 | matplotlib==3.1.2 22 | more-itertools==8.1.0 23 | numpy==1.18.1 24 | pandas==0.25.3 25 | psutil==5.6.7 26 | pyparsing==2.4.6 27 | pyrsistent==0.15.7 28 | python-dateutil==2.8.1 29 | pytz==2019.3 30 | PyYAML==5.3 31 | regexp==0.1 32 | scikit-learn==0.22.1 33 | scipy==1.4.1 34 | six==1.14.0 35 | sklearn==0.0 36 | sklearn-pandas==1.8.0 37 | SQLAlchemy==1.3.13 38 | tzlocal==2.0.0 39 | Werkzeug==0.16.1 40 | wincertstore==0.2 41 | xgboost==0.90 42 | zipp==2.0.1 43 | -------------------------------------------------------------------------------- /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.6.9 -------------------------------------------------------------------------------- /schema_prediction.json: -------------------------------------------------------------------------------- 1 | { "SampleFileName": "phising_08012020_120000.csv", 2 | "LengthOfDateStampInFile": 8, 3 | "LengthOfTimeStampInFile": 6, 4 | "NumberofColumns" : 30, 5 | "ColName": { 6 | "having_IP_Address": "INTEGER" , 7 | "URL_Length": "INTEGER" , 8 | "Shortining_Service": "INTEGER" , 9 | "having_At_Symbol": "INTEGER" , 10 | "double_slash_redirecting": "INTEGER" , 11 | "Prefix_Suffix": "INTEGER" , 12 | "having_Sub_Domain": "INTEGER" , 13 | "SSLfinal_State": "INTEGER" , 14 | "Domain_registeration_length": "INTEGER" , 15 | "Favicon": "INTEGER" , 16 | "port": "INTEGER" , 17 | "HTTPS_token": "INTEGER" , 18 | "Request_URL": "INTEGER" , 19 | "URL_of_Anchor": "INTEGER" , 20 | "Links_in_tags": "INTEGER" , 21 | "SFH": "INTEGER" , 22 | "Submitting_to_email": "INTEGER" , 23 | "Abnormal_URL": "INTEGER" , 24 | "Redirect": "INTEGER" , 25 | "on_mouseover": "INTEGER" , 26 | "RightClick": "INTEGER" , 27 | "popUpWidnow": "INTEGER" , 28 | "Iframe": "INTEGER" , 29 | "age_of_domain": "INTEGER" , 30 | "DNSRecord": "INTEGER" , 31 | "web_traffic": "INTEGER" , 32 | "Page_Rank": "INTEGER" , 33 | "Google_Index": "INTEGER" , 34 | "Links_pointing_to_page": "INTEGER" , 35 | "Statistical_report": "INTEGER" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /schema_training.json: -------------------------------------------------------------------------------- 1 | { "SampleFileName": "phising_08012020_120000.csv", 2 | "LengthOfDateStampInFile": 8, 3 | "LengthOfTimeStampInFile": 6, 4 | "NumberofColumns" : 31, 5 | "ColName": { 6 | "having_IP_Address": "INTEGER" , 7 | "URL_Length": "INTEGER" , 8 | "Shortining_Service": "INTEGER" , 9 | "having_At_Symbol": "INTEGER" , 10 | "double_slash_redirecting": "INTEGER" , 11 | "Prefix_Suffix": "INTEGER" , 12 | "having_Sub_Domain": "INTEGER" , 13 | "SSLfinal_State": "INTEGER" , 14 | "Domain_registeration_length": "INTEGER" , 15 | "Favicon": "INTEGER" , 16 | "port": "INTEGER" , 17 | "HTTPS_token": "INTEGER" , 18 | "Request_URL": "INTEGER" , 19 | "URL_of_Anchor": "INTEGER" , 20 | "Links_in_tags": "INTEGER" , 21 | "SFH": "INTEGER" , 22 | "Submitting_to_email": "INTEGER" , 23 | "Abnormal_URL": "INTEGER" , 24 | "Redirect": "INTEGER" , 25 | "on_mouseover": "INTEGER" , 26 | "RightClick": "INTEGER" , 27 | "popUpWidnow": "INTEGER" , 28 | "Iframe": "INTEGER" , 29 | "age_of_domain": "INTEGER" , 30 | "DNSRecord": "INTEGER" , 31 | "web_traffic": "INTEGER" , 32 | "Page_Rank": "INTEGER" , 33 | "Google_Index": "INTEGER" , 34 | "Links_pointing_to_page": "INTEGER" , 35 | "Statistical_report": "INTEGER" , 36 | "Result" : "INTEGER" 37 | 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /trainingModel.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is the Entry point for Training the Machine Learning Model. 3 | 4 | Written By: iNeuron Intelligence 5 | Version: 1.0 6 | Revisions: None 7 | 8 | """ 9 | 10 | 11 | # Doing the necessary imports 12 | from sklearn.model_selection import train_test_split 13 | from data_ingestion import data_loader 14 | from data_preprocessing import preprocessing 15 | from data_preprocessing import clustering 16 | from best_model_finder import tuner 17 | from file_operations import file_methods 18 | from application_logging import logger 19 | 20 | #Creating the common Logging object 21 | 22 | 23 | class trainModel: 24 | 25 | def __init__(self): 26 | self.log_writer = logger.App_Logger() 27 | self.file_object = open("Training_Logs/ModelTrainingLog.txt", 'a+') 28 | def trainingModel(self): 29 | # Logging the start of Training 30 | self.log_writer.log(self.file_object, 'Start of Training') 31 | try: 32 | # Getting the data from the source 33 | data_getter=data_loader.Data_Getter(self.file_object,self.log_writer) 34 | data=data_getter.get_data() 35 | 36 | 37 | """doing the data preprocessing""" 38 | 39 | preprocessor=preprocessing.Preprocessor(self.file_object,self.log_writer) 40 | #data=preprocessor.remove_columns(data,['Wafer']) # remove the unnamed column as it doesn't contribute to prediction. 41 | 42 | #removing unwanted columns as discussed in the EDA part in ipynb file 43 | #data = preprocessor.dropUnnecessaryColumns(data,['veiltype']) 44 | 45 | #repalcing '?' values with np.nan as discussed in the EDA part 46 | 47 | data = preprocessor.replaceInvalidValuesWithNull(data) 48 | 49 | 50 | 51 | # check if missing values are present in the dataset 52 | is_null_present,cols_with_missing_values=preprocessor.is_null_present(data) 53 | 54 | # if missing values are there, replace them appropriately. 55 | if(is_null_present): 56 | data=preprocessor.impute_missing_values(data,cols_with_missing_values) # missing value imputation 57 | 58 | # get encoded values for categorical data 59 | 60 | #data = preprocessor.encodeCategoricalValues(data) 61 | 62 | # create separate features and labels 63 | X, Y = preprocessor.separate_label_feature(data, label_column_name='Result') 64 | 65 | # drop the columns obtained above 66 | #X=preprocessor.remove_columns(X,cols_to_drop) 67 | 68 | """ Applying the clustering approach""" 69 | 70 | kmeans=clustering.KMeansClustering(self.file_object,self.log_writer) # object initialization. 71 | number_of_clusters=kmeans.elbow_plot(X) # using the elbow plot to find the number of optimum clusters 72 | 73 | # Divide the data into clusters 74 | X=kmeans.create_clusters(X,number_of_clusters) 75 | 76 | #create a new column in the dataset consisting of the corresponding cluster assignments. 77 | X['Labels']=Y 78 | 79 | # getting the unique clusters from our dataset 80 | list_of_clusters=X['Cluster'].unique() 81 | 82 | """parsing all the clusters and looking for the best ML algorithm to fit on individual cluster""" 83 | 84 | for i in list_of_clusters: 85 | cluster_data=X[X['Cluster']==i] # filter the data for one cluster 86 | 87 | # Prepare the feature and Label columns 88 | cluster_features=cluster_data.drop(['Labels','Cluster'],axis=1) 89 | cluster_label= cluster_data['Labels'] 90 | 91 | # splitting the data into training and test set for each cluster one by one 92 | x_train, x_test, y_train, y_test = train_test_split(cluster_features, cluster_label, test_size=1 / 3, random_state=36) 93 | 94 | model_finder=tuner.Model_Finder(self.file_object,self.log_writer) # object initialization 95 | 96 | #getting the best model for each of the clusters 97 | best_model_name,best_model=model_finder.get_best_model(x_train,y_train,x_test,y_test) 98 | 99 | #saving the best model to the directory. 100 | file_op = file_methods.File_Operation(self.file_object,self.log_writer) 101 | save_model=file_op.save_model(best_model,best_model_name+str(i)) 102 | 103 | # logging the successful Training 104 | self.log_writer.log(self.file_object, 'Successful End of Training') 105 | self.file_object.close() 106 | 107 | except Exception: 108 | # logging the unsuccessful Training 109 | self.log_writer.log(self.file_object, 'Unsuccessful End of Training') 110 | self.file_object.close() 111 | raise Exception -------------------------------------------------------------------------------- /training_Validation_Insertion.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from Training_Raw_data_validation.rawValidation import Raw_Data_validation 3 | from DataTypeValidation_Insertion_Training.DataTypeValidation import dBOperation 4 | from DataTransform_Training.DataTransformation import dataTransform 5 | from application_logging import logger 6 | 7 | class train_validation: 8 | def __init__(self,path): 9 | self.raw_data = Raw_Data_validation(path) 10 | self.dataTransform = dataTransform() 11 | self.dBOperation = dBOperation() 12 | self.file_object = open("Training_Logs/Training_Main_Log.txt", 'a+') 13 | self.log_writer = logger.App_Logger() 14 | 15 | def train_validation(self): 16 | try: 17 | self.log_writer.log(self.file_object, 'Start of Validation on files for prediction!!') 18 | # extracting values from prediction schema 19 | LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema() 20 | # getting the regex defined to validate filename 21 | regex = self.raw_data.manualRegexCreation() 22 | # validating filename of prediction files 23 | self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile) 24 | # validating column length in the file 25 | self.raw_data.validateColumnLength(noofcolumns) 26 | # validating if any column has all values missing 27 | self.raw_data.validateMissingValuesInWholeColumn() 28 | self.log_writer.log(self.file_object, "Raw Data Validation Complete!!") 29 | 30 | self.log_writer.log(self.file_object, "Starting Data Transforamtion!!") 31 | # below function adds quotes to the '?' values in some columns. 32 | self.dataTransform.addQuotesToStringValuesInColumn() 33 | 34 | self.log_writer.log(self.file_object, "DataTransformation Completed!!!") 35 | 36 | self.log_writer.log(self.file_object, 37 | "Creating Training_Database and tables on the basis of given schema!!!") 38 | # create database with given name, if present open the connection! Create table with columns given in schema 39 | self.dBOperation.createTableDb('Training', column_names) 40 | self.log_writer.log(self.file_object, "Table creation Completed!!") 41 | self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!") 42 | # insert csv files in the table 43 | self.dBOperation.insertIntoTableGoodData('Training') 44 | self.log_writer.log(self.file_object, "Insertion in Table completed!!!") 45 | self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!") 46 | # Delete the good data folder after loading files in table 47 | self.raw_data.deleteExistingGoodDataTrainingFolder() 48 | self.log_writer.log(self.file_object, "Good_Data folder deleted!!!") 49 | self.log_writer.log(self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!") 50 | # Move the bad files to archive folder 51 | self.raw_data.moveBadFilesToArchiveBad() 52 | self.log_writer.log(self.file_object, "Bad files moved to archive!! Bad folder Deleted!!") 53 | self.log_writer.log(self.file_object, "Validation Operation completed!!") 54 | self.log_writer.log(self.file_object, "Extracting csv file from table") 55 | # export data in table to csvfile 56 | self.dBOperation.selectingDatafromtableintocsv('Training') 57 | self.file_object.close() 58 | 59 | except Exception as e: 60 | raise e 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | --------------------------------------------------------------------------------